alloc 0.0.12__tar.gz → 0.0.14__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {alloc-0.0.12 → alloc-0.0.14}/PKG-INFO +1 -1
- {alloc-0.0.12 → alloc-0.0.14}/pyproject.toml +1 -1
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/__init__.py +1 -1
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/callbacks.py +26 -1
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/cli.py +28 -7
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/extractor_runner.py +7 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/probe.py +2 -2
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/upload.py +6 -3
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/PKG-INFO +1 -1
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_callbacks.py +118 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_extractor_activation.py +72 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_upload.py +81 -0
- {alloc-0.0.12 → alloc-0.0.14}/README.md +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/setup.cfg +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/artifact_loader.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/artifact_writer.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/browser_auth.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/catalog/__init__.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/catalog/default_rate_card.json +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/catalog/gpus.v1.json +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/code_analyzer.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/config.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/context.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/diagnosis_display.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/diagnosis_engine.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/diagnosis_rules.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/display.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/ghost.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/model_extractor.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/model_registry.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/stability.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc/yaml_config.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/SOURCES.txt +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/dependency_links.txt +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/entry_points.txt +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/requires.txt +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/top_level.txt +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_artifact.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_artifact_loader.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_auth.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_catalog.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_cli.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_code_analyzer.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_context.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_diagnose_cli.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_diagnosis_engine.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_diagnosis_rules.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_ghost.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_ghost_degradation.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_init_from_org.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_interconnect.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_model_extractor.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_probe_hw.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_probe_multi.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_scan_auth.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_stability.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_topology_strategy.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_verdict.py +0 -0
- {alloc-0.0.12 → alloc-0.0.14}/tests/test_yaml_config.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: alloc
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.14
|
|
4
4
|
Summary: Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints.
|
|
5
5
|
Author-email: Alloc Labs <hello@alloclabs.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "alloc"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.14"
|
|
8
8
|
description = "Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "Apache-2.0"
|
|
@@ -9,7 +9,7 @@ _warnings.filterwarnings("ignore", category=FutureWarning, module=r"torch\.cuda"
|
|
|
9
9
|
_warnings.filterwarnings("ignore", category=DeprecationWarning, module=r"torch\.cuda")
|
|
10
10
|
del _warnings
|
|
11
11
|
|
|
12
|
-
__version__ = "0.0.
|
|
12
|
+
__version__ = "0.0.14"
|
|
13
13
|
|
|
14
14
|
from alloc.ghost import ghost, GhostReport
|
|
15
15
|
from alloc.callbacks import AllocCallback as HuggingFaceCallback
|
|
@@ -152,6 +152,28 @@ def _detect_architecture(model, optimizer=None, training_args=None):
|
|
|
152
152
|
if hasattr(config, "num_attention_heads") or hasattr(config, "num_heads"):
|
|
153
153
|
info["architecture_type"] = "transformer"
|
|
154
154
|
|
|
155
|
+
# --- Architecture dimensions ---
|
|
156
|
+
num_heads = (
|
|
157
|
+
getattr(config, "num_attention_heads", None)
|
|
158
|
+
or getattr(config, "num_heads", None)
|
|
159
|
+
or getattr(config, "n_head", None)
|
|
160
|
+
)
|
|
161
|
+
if num_heads is not None:
|
|
162
|
+
info["num_heads"] = num_heads
|
|
163
|
+
vocab_size = getattr(config, "vocab_size", None)
|
|
164
|
+
if vocab_size is not None:
|
|
165
|
+
info["vocab_size"] = vocab_size
|
|
166
|
+
hidden_dim = getattr(config, "hidden_size", None)
|
|
167
|
+
if hidden_dim is not None:
|
|
168
|
+
info["hidden_dim"] = hidden_dim
|
|
169
|
+
num_layers = (
|
|
170
|
+
getattr(config, "num_hidden_layers", None)
|
|
171
|
+
or getattr(config, "num_layers", None)
|
|
172
|
+
or getattr(config, "n_layer", None)
|
|
173
|
+
)
|
|
174
|
+
if num_layers is not None:
|
|
175
|
+
info["num_layers"] = num_layers
|
|
176
|
+
|
|
155
177
|
# --- Gradient checkpointing ---
|
|
156
178
|
gc = getattr(model, "is_gradient_checkpointing", None)
|
|
157
179
|
if gc is not None:
|
|
@@ -305,7 +327,8 @@ def _build_sidecar(
|
|
|
305
327
|
if architecture_info:
|
|
306
328
|
for key in ("architecture_type", "optimizer_type", "fine_tuning_method",
|
|
307
329
|
"gradient_checkpointing", "model_type", "attention_type",
|
|
308
|
-
"param_count", "trainable_param_count"
|
|
330
|
+
"param_count", "trainable_param_count",
|
|
331
|
+
"num_heads", "vocab_size", "hidden_dim", "num_layers"):
|
|
309
332
|
val = architecture_info.get(key)
|
|
310
333
|
if val is not None:
|
|
311
334
|
data[key] = val
|
|
@@ -681,6 +704,8 @@ def _write_full_artifact(monitor, sidecar_data, step_times_raw=None):
|
|
|
681
704
|
"architecture_type", "optimizer_type", "fine_tuning_method",
|
|
682
705
|
"gradient_checkpointing", "model_type", "attention_type",
|
|
683
706
|
"param_count", "trainable_param_count",
|
|
707
|
+
# Architecture dimensions (extracted from model config)
|
|
708
|
+
"num_heads", "vocab_size", "hidden_dim", "num_layers",
|
|
684
709
|
# Distributed signals (P3.5-C)
|
|
685
710
|
"comm_overhead_pct",
|
|
686
711
|
]
|
|
@@ -493,10 +493,20 @@ def run(
|
|
|
493
493
|
"baseline_run_id": after,
|
|
494
494
|
"experiment_id": experiment,
|
|
495
495
|
}
|
|
496
|
-
# Merge timing fields from callback sidecar
|
|
496
|
+
# Merge timing + architecture fields from callback sidecar
|
|
497
497
|
if callback_data:
|
|
498
498
|
for key in ("step_time_ms_p50", "step_time_ms_p90", "samples_per_sec",
|
|
499
|
-
"step_time_ms_mean", "step_time_ms_std"
|
|
499
|
+
"step_time_ms_mean", "step_time_ms_std",
|
|
500
|
+
"architecture_type", "optimizer_type", "fine_tuning_method",
|
|
501
|
+
"gradient_checkpointing", "model_type", "attention_type",
|
|
502
|
+
"param_count", "trainable_param_count",
|
|
503
|
+
"num_heads", "vocab_size", "hidden_dim", "num_layers",
|
|
504
|
+
"phase_forward_ms_p50", "phase_forward_ms_p90",
|
|
505
|
+
"phase_backward_ms_p50", "phase_backward_ms_p90",
|
|
506
|
+
"phase_optimizer_ms_p50", "phase_optimizer_ms_p90",
|
|
507
|
+
"phase_dataloader_ms_p50", "phase_dataloader_ms_p90",
|
|
508
|
+
"has_phase_timing", "comm_overhead_pct",
|
|
509
|
+
"dataloader_wait_pct", "batch_size"):
|
|
500
510
|
val = callback_data.get(key)
|
|
501
511
|
if val is not None:
|
|
502
512
|
probe_dict[key] = val
|
|
@@ -554,8 +564,11 @@ def run(
|
|
|
554
564
|
else:
|
|
555
565
|
console.print("[dim]Tip: alloc login --browser to connect your dashboard[/dim]")
|
|
556
566
|
|
|
567
|
+
# Propagate non-zero exit code — but NOT when calibrate mode
|
|
568
|
+
# intentionally killed the process (torchrun exits non-zero on SIGTERM)
|
|
557
569
|
if result.exit_code and result.exit_code != 0:
|
|
558
|
-
|
|
570
|
+
if result.stop_reason not in ("stable", "timeout"):
|
|
571
|
+
raise typer.Exit(result.exit_code)
|
|
559
572
|
|
|
560
573
|
|
|
561
574
|
@app.command()
|
|
@@ -2105,6 +2118,13 @@ def scan(
|
|
|
2105
2118
|
"""Remote ghost scan via Alloc API — no GPU needed."""
|
|
2106
2119
|
import httpx
|
|
2107
2120
|
|
|
2121
|
+
# When --json, redirect console to stderr so nothing contaminates stdout.
|
|
2122
|
+
from rich.console import Console as _Console
|
|
2123
|
+
if json_output:
|
|
2124
|
+
console = _Console(stderr=True)
|
|
2125
|
+
else:
|
|
2126
|
+
console = _Console()
|
|
2127
|
+
|
|
2108
2128
|
# Resolve param count from model name or explicit flag
|
|
2109
2129
|
resolved_param_count = param_count_b or _model_to_params(model)
|
|
2110
2130
|
if resolved_param_count is None:
|
|
@@ -2164,10 +2184,11 @@ def scan(
|
|
|
2164
2184
|
resp = client.post(f"{api_url}/scans", json=payload, headers=headers)
|
|
2165
2185
|
else:
|
|
2166
2186
|
# Token refresh failed — fall back to unauthenticated scan
|
|
2167
|
-
|
|
2168
|
-
|
|
2169
|
-
|
|
2170
|
-
|
|
2187
|
+
if not json_output:
|
|
2188
|
+
console.print(
|
|
2189
|
+
"[yellow]Session expired — falling back to public scan "
|
|
2190
|
+
"(org fleet context unavailable). Run `alloc login` to restore.[/yellow]",
|
|
2191
|
+
)
|
|
2171
2192
|
del headers["Authorization"]
|
|
2172
2193
|
resp = client.post(f"{api_url}/scans/cli", json=payload, headers=headers)
|
|
2173
2194
|
|
|
@@ -71,6 +71,11 @@ def _extract_architecture(model):
|
|
|
71
71
|
or getattr(config, "n_positions", None)
|
|
72
72
|
or getattr(config, "max_seq_len", None)
|
|
73
73
|
)
|
|
74
|
+
info["num_heads"] = (
|
|
75
|
+
getattr(config, "num_attention_heads", None)
|
|
76
|
+
or getattr(config, "num_heads", None)
|
|
77
|
+
or getattr(config, "n_head", None)
|
|
78
|
+
)
|
|
74
79
|
info["model_type"] = getattr(config, "model_type", None)
|
|
75
80
|
info["vocab_size"] = getattr(config, "vocab_size", None)
|
|
76
81
|
info["image_size"] = getattr(config, "image_size", None)
|
|
@@ -277,6 +282,8 @@ def main():
|
|
|
277
282
|
"num_layers": arch_info.get("num_layers"),
|
|
278
283
|
"seq_length": arch_info.get("seq_length"),
|
|
279
284
|
"model_type": arch_info.get("model_type"),
|
|
285
|
+
"vocab_size": arch_info.get("vocab_size"),
|
|
286
|
+
"num_heads": arch_info.get("num_heads"),
|
|
280
287
|
"activation_memory_bytes": activation_result.get("activation_memory_bytes"),
|
|
281
288
|
"activation_method": activation_result.get("activation_method"),
|
|
282
289
|
}
|
|
@@ -699,8 +699,8 @@ def probe_command(
|
|
|
699
699
|
num_gpus_detected=num_gpus_ref[0],
|
|
700
700
|
process_map=process_map_ref[0],
|
|
701
701
|
per_gpu_peak_vram_mb=(
|
|
702
|
-
[round(per_gpu_peaks_ref[0]
|
|
703
|
-
if
|
|
702
|
+
[round(per_gpu_peaks_ref[0][i], 1) for i in sorted(per_gpu_peaks_ref[0])]
|
|
703
|
+
if per_gpu_peaks_ref[0] else None
|
|
704
704
|
),
|
|
705
705
|
detected_interconnect=detected_ic_ref[0],
|
|
706
706
|
)
|
|
@@ -126,9 +126,12 @@ def upload_artifact(artifact_path: str, api_url: str, token: str) -> dict:
|
|
|
126
126
|
"process_map": probe.get("process_map"),
|
|
127
127
|
# Architecture fields: probe (callbacks) takes priority over ghost defaults
|
|
128
128
|
"batch_size": probe.get("batch_size") or (ghost.get("batch_size") if ghost else None),
|
|
129
|
-
"seq_length": ghost.get("seq_length") if ghost else None,
|
|
130
|
-
"hidden_dim": ghost.get("hidden_dim") if ghost else None,
|
|
131
|
-
"num_layers": ghost.get("num_layers") if ghost else None,
|
|
129
|
+
"seq_length": probe.get("seq_length") or (ghost.get("seq_length") if ghost else None),
|
|
130
|
+
"hidden_dim": probe.get("hidden_dim") or (ghost.get("hidden_dim") if ghost else None),
|
|
131
|
+
"num_layers": probe.get("num_layers") or (ghost.get("num_layers") if ghost else None),
|
|
132
|
+
"vocab_size": probe.get("vocab_size") or (ghost.get("vocab_size") if ghost else None),
|
|
133
|
+
"num_heads": probe.get("num_heads") or (ghost.get("num_heads") if ghost else None),
|
|
134
|
+
"model_type": probe.get("model_type") or (ghost.get("model_type") if ghost else None),
|
|
132
135
|
"ghost_report": ghost if ghost else None,
|
|
133
136
|
"source": probe.get("source") or "cli",
|
|
134
137
|
"command": probe.get("command"),
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: alloc
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.14
|
|
4
4
|
Summary: Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints.
|
|
5
5
|
Author-email: Alloc Labs <hello@alloclabs.com>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -870,6 +870,33 @@ class TestWriteFullArtifactWithPhases:
|
|
|
870
870
|
assert call_kwargs.kwargs.get("output_path") is None # default = alloc_artifact.json.gz
|
|
871
871
|
|
|
872
872
|
|
|
873
|
+
def test_dimension_fields_in_full_artifact(self, tmp_path):
|
|
874
|
+
"""Architecture dimension fields from sidecar must flow to full artifact."""
|
|
875
|
+
monitor = _NvmlMonitor()
|
|
876
|
+
sidecar = {
|
|
877
|
+
"framework": "test",
|
|
878
|
+
"step_count": 10,
|
|
879
|
+
"step_time_ms_p50": 100.0,
|
|
880
|
+
"num_heads": 32,
|
|
881
|
+
"vocab_size": 32000,
|
|
882
|
+
"hidden_dim": 4096,
|
|
883
|
+
"num_layers": 32,
|
|
884
|
+
"model_type": "llama",
|
|
885
|
+
"architecture_type": "transformer",
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
with patch("alloc.artifact_writer.write_report") as mock_write:
|
|
889
|
+
_write_full_artifact(monitor, sidecar)
|
|
890
|
+
assert mock_write.called
|
|
891
|
+
probe = mock_write.call_args.kwargs.get("probe_result")
|
|
892
|
+
assert probe["num_heads"] == 32
|
|
893
|
+
assert probe["vocab_size"] == 32000
|
|
894
|
+
assert probe["hidden_dim"] == 4096
|
|
895
|
+
assert probe["num_layers"] == 32
|
|
896
|
+
assert probe["model_type"] == "llama"
|
|
897
|
+
assert probe["architecture_type"] == "transformer"
|
|
898
|
+
|
|
899
|
+
|
|
873
900
|
# ── Architecture Detection (P3-A) ──────────────────────────────────────
|
|
874
901
|
|
|
875
902
|
class TestDetectArchitecture:
|
|
@@ -1041,6 +1068,97 @@ class TestBuildSidecarArchitecture:
|
|
|
1041
1068
|
assert "gradient_checkpointing" not in data
|
|
1042
1069
|
assert "architecture_type" not in data
|
|
1043
1070
|
|
|
1071
|
+
def test_dimension_fields_included(self):
|
|
1072
|
+
"""New dimension fields (num_heads, vocab_size, hidden_dim, num_layers) flow through sidecar."""
|
|
1073
|
+
arch_info = {
|
|
1074
|
+
"architecture_type": "transformer",
|
|
1075
|
+
"num_heads": 32,
|
|
1076
|
+
"vocab_size": 32000,
|
|
1077
|
+
"hidden_dim": 4096,
|
|
1078
|
+
"num_layers": 32,
|
|
1079
|
+
"model_type": "llama",
|
|
1080
|
+
}
|
|
1081
|
+
data = _build_sidecar(
|
|
1082
|
+
framework="huggingface",
|
|
1083
|
+
step_count=10,
|
|
1084
|
+
step_times_ms=[100.0],
|
|
1085
|
+
batch_size=8,
|
|
1086
|
+
architecture_info=arch_info,
|
|
1087
|
+
)
|
|
1088
|
+
assert data["num_heads"] == 32
|
|
1089
|
+
assert data["vocab_size"] == 32000
|
|
1090
|
+
assert data["hidden_dim"] == 4096
|
|
1091
|
+
assert data["num_layers"] == 32
|
|
1092
|
+
assert data["model_type"] == "llama"
|
|
1093
|
+
|
|
1094
|
+
|
|
1095
|
+
class TestDetectArchitectureDimensions:
|
|
1096
|
+
"""Test that _detect_architecture extracts dimension values from config."""
|
|
1097
|
+
|
|
1098
|
+
def test_extracts_all_dimensions(self):
|
|
1099
|
+
from alloc.callbacks import _detect_architecture
|
|
1100
|
+
|
|
1101
|
+
model = MagicMock()
|
|
1102
|
+
model.config.model_type = "llama"
|
|
1103
|
+
model.config.num_attention_heads = 32
|
|
1104
|
+
model.config.vocab_size = 32000
|
|
1105
|
+
model.config.hidden_size = 4096
|
|
1106
|
+
model.config.num_hidden_layers = 32
|
|
1107
|
+
model.is_gradient_checkpointing = False
|
|
1108
|
+
del model.peft_config
|
|
1109
|
+
del model.config._attn_implementation
|
|
1110
|
+
del model.config.attn_implementation
|
|
1111
|
+
del model.config.num_heads
|
|
1112
|
+
del model.config.n_head
|
|
1113
|
+
del model.config.num_layers
|
|
1114
|
+
del model.config.n_layer
|
|
1115
|
+
|
|
1116
|
+
result = _detect_architecture(model)
|
|
1117
|
+
assert result["num_heads"] == 32
|
|
1118
|
+
assert result["vocab_size"] == 32000
|
|
1119
|
+
assert result["hidden_dim"] == 4096
|
|
1120
|
+
assert result["num_layers"] == 32
|
|
1121
|
+
|
|
1122
|
+
def test_fallback_attr_names(self):
|
|
1123
|
+
"""Falls back to alternate attribute names (n_head, n_layer)."""
|
|
1124
|
+
from alloc.callbacks import _detect_architecture
|
|
1125
|
+
|
|
1126
|
+
model = MagicMock()
|
|
1127
|
+
model.config.model_type = "gpt2"
|
|
1128
|
+
model.config.n_head = 12
|
|
1129
|
+
model.config.vocab_size = 50257
|
|
1130
|
+
model.config.hidden_size = 768
|
|
1131
|
+
model.config.n_layer = 12
|
|
1132
|
+
model.is_gradient_checkpointing = False
|
|
1133
|
+
del model.peft_config
|
|
1134
|
+
del model.config._attn_implementation
|
|
1135
|
+
del model.config.attn_implementation
|
|
1136
|
+
del model.config.num_attention_heads
|
|
1137
|
+
del model.config.num_heads
|
|
1138
|
+
del model.config.num_hidden_layers
|
|
1139
|
+
del model.config.num_layers
|
|
1140
|
+
|
|
1141
|
+
result = _detect_architecture(model)
|
|
1142
|
+
assert result["num_heads"] == 12
|
|
1143
|
+
assert result["vocab_size"] == 50257
|
|
1144
|
+
assert result["hidden_dim"] == 768
|
|
1145
|
+
assert result["num_layers"] == 12
|
|
1146
|
+
|
|
1147
|
+
def test_no_config_no_dimensions(self):
|
|
1148
|
+
"""Model without config should not have dimension fields."""
|
|
1149
|
+
from alloc.callbacks import _detect_architecture
|
|
1150
|
+
|
|
1151
|
+
model = MagicMock()
|
|
1152
|
+
del model.config
|
|
1153
|
+
del model.peft_config
|
|
1154
|
+
del model.is_gradient_checkpointing
|
|
1155
|
+
|
|
1156
|
+
result = _detect_architecture(model)
|
|
1157
|
+
assert "num_heads" not in result
|
|
1158
|
+
assert "vocab_size" not in result
|
|
1159
|
+
assert "hidden_dim" not in result
|
|
1160
|
+
assert "num_layers" not in result
|
|
1161
|
+
|
|
1044
1162
|
|
|
1045
1163
|
# ── CUDA Event Non-Blocking Sync (C2 Fix) ─────────────────────────────
|
|
1046
1164
|
|
|
@@ -176,3 +176,75 @@ def test_count_params_dedup_shared():
|
|
|
176
176
|
count, dtype_str = _count_params(model)
|
|
177
177
|
# embed.weight = 100*64 = 6400 params (shared with proj.weight)
|
|
178
178
|
assert count == 6400
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# ---------------------------------------------------------------------------
|
|
182
|
+
# _extract_architecture — vocab_size, num_heads
|
|
183
|
+
# ---------------------------------------------------------------------------
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def test_extract_architecture_includes_vocab_size_and_num_heads():
|
|
187
|
+
"""Result dict should include vocab_size and num_heads when config has them."""
|
|
188
|
+
from alloc.extractor_runner import _extract_architecture
|
|
189
|
+
|
|
190
|
+
class FakeConfig:
|
|
191
|
+
hidden_size = 4096
|
|
192
|
+
num_hidden_layers = 32
|
|
193
|
+
max_position_embeddings = 2048
|
|
194
|
+
model_type = "llama"
|
|
195
|
+
vocab_size = 32000
|
|
196
|
+
num_attention_heads = 32
|
|
197
|
+
|
|
198
|
+
class FakeModel:
|
|
199
|
+
config = FakeConfig()
|
|
200
|
+
|
|
201
|
+
info = _extract_architecture(FakeModel())
|
|
202
|
+
assert info["vocab_size"] == 32000
|
|
203
|
+
assert info["num_heads"] == 32
|
|
204
|
+
assert info["hidden_dim"] == 4096
|
|
205
|
+
assert info["num_layers"] == 32
|
|
206
|
+
assert info["model_type"] == "llama"
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def test_extract_architecture_num_heads_fallback_attrs():
|
|
210
|
+
"""num_heads extraction falls back to num_heads and n_head attrs."""
|
|
211
|
+
from alloc.extractor_runner import _extract_architecture
|
|
212
|
+
|
|
213
|
+
# Test num_heads attr
|
|
214
|
+
class ConfigWithNumHeads:
|
|
215
|
+
hidden_size = 768
|
|
216
|
+
num_hidden_layers = 12
|
|
217
|
+
num_heads = 12
|
|
218
|
+
model_type = "custom"
|
|
219
|
+
vocab_size = 50000
|
|
220
|
+
|
|
221
|
+
class FakeModel1:
|
|
222
|
+
config = ConfigWithNumHeads()
|
|
223
|
+
|
|
224
|
+
info = _extract_architecture(FakeModel1())
|
|
225
|
+
assert info["num_heads"] == 12
|
|
226
|
+
|
|
227
|
+
# Test n_head attr
|
|
228
|
+
class ConfigWithNHead:
|
|
229
|
+
hidden_size = 768
|
|
230
|
+
n_layer = 12
|
|
231
|
+
n_head = 12
|
|
232
|
+
model_type = "gpt2"
|
|
233
|
+
vocab_size = 50257
|
|
234
|
+
|
|
235
|
+
class FakeModel2:
|
|
236
|
+
config = ConfigWithNHead()
|
|
237
|
+
|
|
238
|
+
info2 = _extract_architecture(FakeModel2())
|
|
239
|
+
assert info2["num_heads"] == 12
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def test_extract_architecture_no_config():
|
|
243
|
+
"""Model without config returns empty dict."""
|
|
244
|
+
from alloc.extractor_runner import _extract_architecture
|
|
245
|
+
|
|
246
|
+
class BareModel:
|
|
247
|
+
pass
|
|
248
|
+
|
|
249
|
+
info = _extract_architecture(BareModel())
|
|
250
|
+
assert info == {}
|
|
@@ -223,3 +223,84 @@ def test_upload_enriches_payload_from_probe_and_hardware(tmp_path):
|
|
|
223
223
|
assert payload["step_time_ms_p90"] == 161.8
|
|
224
224
|
assert payload["samples_per_sec"] == 22.9
|
|
225
225
|
assert payload["dataloader_wait_pct"] == 11.3
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def test_upload_sends_architecture_dimensions(tmp_path):
|
|
229
|
+
"""Upload should send vocab_size, num_heads, model_type from probe and ghost."""
|
|
230
|
+
import gzip
|
|
231
|
+
|
|
232
|
+
artifact = tmp_path / "arch_dims.json.gz"
|
|
233
|
+
data = {
|
|
234
|
+
"probe": {
|
|
235
|
+
"peak_vram_mb": 5000,
|
|
236
|
+
"vocab_size": 32000,
|
|
237
|
+
"num_heads": 32,
|
|
238
|
+
"model_type": "llama",
|
|
239
|
+
"hidden_dim": 4096,
|
|
240
|
+
"num_layers": 32,
|
|
241
|
+
},
|
|
242
|
+
"hardware": {"gpu_name": "NVIDIA A100-SXM4-80GB"},
|
|
243
|
+
"ghost": {
|
|
244
|
+
"seq_length": 2048,
|
|
245
|
+
},
|
|
246
|
+
}
|
|
247
|
+
with gzip.open(str(artifact), "wt", encoding="utf-8") as f:
|
|
248
|
+
json.dump(data, f)
|
|
249
|
+
|
|
250
|
+
mock_response = MagicMock()
|
|
251
|
+
mock_response.status_code = 200
|
|
252
|
+
mock_response.json.return_value = {"run_id": "run-dims", "status": "completed"}
|
|
253
|
+
|
|
254
|
+
mock_client = MagicMock()
|
|
255
|
+
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
|
256
|
+
mock_client.__exit__ = MagicMock(return_value=False)
|
|
257
|
+
mock_client.post.return_value = mock_response
|
|
258
|
+
|
|
259
|
+
with patch("httpx.Client", return_value=mock_client):
|
|
260
|
+
upload_artifact(str(artifact), "http://localhost:8000", "test-token")
|
|
261
|
+
|
|
262
|
+
payload = mock_client.post.call_args.kwargs["json"]
|
|
263
|
+
assert payload["vocab_size"] == 32000
|
|
264
|
+
assert payload["num_heads"] == 32
|
|
265
|
+
assert payload["model_type"] == "llama"
|
|
266
|
+
assert payload["hidden_dim"] == 4096
|
|
267
|
+
assert payload["num_layers"] == 32
|
|
268
|
+
assert payload["seq_length"] == 2048
|
|
269
|
+
|
|
270
|
+
|
|
271
|
+
def test_upload_ghost_fallback_for_dimensions(tmp_path):
|
|
272
|
+
"""Ghost fields should be used as fallback when probe doesn't have dimensions."""
|
|
273
|
+
import gzip
|
|
274
|
+
|
|
275
|
+
artifact = tmp_path / "ghost_fallback.json.gz"
|
|
276
|
+
data = {
|
|
277
|
+
"probe": {"peak_vram_mb": 5000},
|
|
278
|
+
"hardware": {"gpu_name": "NVIDIA A100-SXM4-80GB"},
|
|
279
|
+
"ghost": {
|
|
280
|
+
"vocab_size": 50257,
|
|
281
|
+
"num_heads": 12,
|
|
282
|
+
"model_type": "gpt2",
|
|
283
|
+
"hidden_dim": 768,
|
|
284
|
+
"num_layers": 12,
|
|
285
|
+
"seq_length": 1024,
|
|
286
|
+
},
|
|
287
|
+
}
|
|
288
|
+
with gzip.open(str(artifact), "wt", encoding="utf-8") as f:
|
|
289
|
+
json.dump(data, f)
|
|
290
|
+
|
|
291
|
+
mock_response = MagicMock()
|
|
292
|
+
mock_response.status_code = 200
|
|
293
|
+
mock_response.json.return_value = {"run_id": "run-ghost", "status": "completed"}
|
|
294
|
+
|
|
295
|
+
mock_client = MagicMock()
|
|
296
|
+
mock_client.__enter__ = MagicMock(return_value=mock_client)
|
|
297
|
+
mock_client.__exit__ = MagicMock(return_value=False)
|
|
298
|
+
mock_client.post.return_value = mock_response
|
|
299
|
+
|
|
300
|
+
with patch("httpx.Client", return_value=mock_client):
|
|
301
|
+
upload_artifact(str(artifact), "http://localhost:8000", "test-token")
|
|
302
|
+
|
|
303
|
+
payload = mock_client.post.call_args.kwargs["json"]
|
|
304
|
+
assert payload["vocab_size"] == 50257
|
|
305
|
+
assert payload["num_heads"] == 12
|
|
306
|
+
assert payload["model_type"] == "gpt2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|