alloc 0.0.12__tar.gz → 0.0.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {alloc-0.0.12 → alloc-0.0.14}/PKG-INFO +1 -1
  2. {alloc-0.0.12 → alloc-0.0.14}/pyproject.toml +1 -1
  3. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/__init__.py +1 -1
  4. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/callbacks.py +26 -1
  5. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/cli.py +28 -7
  6. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/extractor_runner.py +7 -0
  7. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/probe.py +2 -2
  8. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/upload.py +6 -3
  9. {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/PKG-INFO +1 -1
  10. {alloc-0.0.12 → alloc-0.0.14}/tests/test_callbacks.py +118 -0
  11. {alloc-0.0.12 → alloc-0.0.14}/tests/test_extractor_activation.py +72 -0
  12. {alloc-0.0.12 → alloc-0.0.14}/tests/test_upload.py +81 -0
  13. {alloc-0.0.12 → alloc-0.0.14}/README.md +0 -0
  14. {alloc-0.0.12 → alloc-0.0.14}/setup.cfg +0 -0
  15. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/artifact_loader.py +0 -0
  16. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/artifact_writer.py +0 -0
  17. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/browser_auth.py +0 -0
  18. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/catalog/__init__.py +0 -0
  19. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/catalog/default_rate_card.json +0 -0
  20. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/catalog/gpus.v1.json +0 -0
  21. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/code_analyzer.py +0 -0
  22. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/config.py +0 -0
  23. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/context.py +0 -0
  24. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/diagnosis_display.py +0 -0
  25. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/diagnosis_engine.py +0 -0
  26. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/diagnosis_rules.py +0 -0
  27. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/display.py +0 -0
  28. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/ghost.py +0 -0
  29. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/model_extractor.py +0 -0
  30. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/model_registry.py +0 -0
  31. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/stability.py +0 -0
  32. {alloc-0.0.12 → alloc-0.0.14}/src/alloc/yaml_config.py +0 -0
  33. {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/SOURCES.txt +0 -0
  34. {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/dependency_links.txt +0 -0
  35. {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/entry_points.txt +0 -0
  36. {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/requires.txt +0 -0
  37. {alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/top_level.txt +0 -0
  38. {alloc-0.0.12 → alloc-0.0.14}/tests/test_artifact.py +0 -0
  39. {alloc-0.0.12 → alloc-0.0.14}/tests/test_artifact_loader.py +0 -0
  40. {alloc-0.0.12 → alloc-0.0.14}/tests/test_auth.py +0 -0
  41. {alloc-0.0.12 → alloc-0.0.14}/tests/test_catalog.py +0 -0
  42. {alloc-0.0.12 → alloc-0.0.14}/tests/test_cli.py +0 -0
  43. {alloc-0.0.12 → alloc-0.0.14}/tests/test_code_analyzer.py +0 -0
  44. {alloc-0.0.12 → alloc-0.0.14}/tests/test_context.py +0 -0
  45. {alloc-0.0.12 → alloc-0.0.14}/tests/test_diagnose_cli.py +0 -0
  46. {alloc-0.0.12 → alloc-0.0.14}/tests/test_diagnosis_engine.py +0 -0
  47. {alloc-0.0.12 → alloc-0.0.14}/tests/test_diagnosis_rules.py +0 -0
  48. {alloc-0.0.12 → alloc-0.0.14}/tests/test_ghost.py +0 -0
  49. {alloc-0.0.12 → alloc-0.0.14}/tests/test_ghost_degradation.py +0 -0
  50. {alloc-0.0.12 → alloc-0.0.14}/tests/test_init_from_org.py +0 -0
  51. {alloc-0.0.12 → alloc-0.0.14}/tests/test_interconnect.py +0 -0
  52. {alloc-0.0.12 → alloc-0.0.14}/tests/test_model_extractor.py +0 -0
  53. {alloc-0.0.12 → alloc-0.0.14}/tests/test_probe_hw.py +0 -0
  54. {alloc-0.0.12 → alloc-0.0.14}/tests/test_probe_multi.py +0 -0
  55. {alloc-0.0.12 → alloc-0.0.14}/tests/test_scan_auth.py +0 -0
  56. {alloc-0.0.12 → alloc-0.0.14}/tests/test_stability.py +0 -0
  57. {alloc-0.0.12 → alloc-0.0.14}/tests/test_topology_strategy.py +0 -0
  58. {alloc-0.0.12 → alloc-0.0.14}/tests/test_verdict.py +0 -0
  59. {alloc-0.0.12 → alloc-0.0.14}/tests/test_yaml_config.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alloc
3
- Version: 0.0.12
3
+ Version: 0.0.14
4
4
  Summary: Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints.
5
5
  Author-email: Alloc Labs <hello@alloclabs.com>
6
6
  License-Expression: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "alloc"
7
- version = "0.0.12"
7
+ version = "0.0.14"
8
8
  description = "Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints."
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -9,7 +9,7 @@ _warnings.filterwarnings("ignore", category=FutureWarning, module=r"torch\.cuda"
9
9
  _warnings.filterwarnings("ignore", category=DeprecationWarning, module=r"torch\.cuda")
10
10
  del _warnings
11
11
 
12
- __version__ = "0.0.12"
12
+ __version__ = "0.0.14"
13
13
 
14
14
  from alloc.ghost import ghost, GhostReport
15
15
  from alloc.callbacks import AllocCallback as HuggingFaceCallback
@@ -152,6 +152,28 @@ def _detect_architecture(model, optimizer=None, training_args=None):
152
152
  if hasattr(config, "num_attention_heads") or hasattr(config, "num_heads"):
153
153
  info["architecture_type"] = "transformer"
154
154
 
155
+ # --- Architecture dimensions ---
156
+ num_heads = (
157
+ getattr(config, "num_attention_heads", None)
158
+ or getattr(config, "num_heads", None)
159
+ or getattr(config, "n_head", None)
160
+ )
161
+ if num_heads is not None:
162
+ info["num_heads"] = num_heads
163
+ vocab_size = getattr(config, "vocab_size", None)
164
+ if vocab_size is not None:
165
+ info["vocab_size"] = vocab_size
166
+ hidden_dim = getattr(config, "hidden_size", None)
167
+ if hidden_dim is not None:
168
+ info["hidden_dim"] = hidden_dim
169
+ num_layers = (
170
+ getattr(config, "num_hidden_layers", None)
171
+ or getattr(config, "num_layers", None)
172
+ or getattr(config, "n_layer", None)
173
+ )
174
+ if num_layers is not None:
175
+ info["num_layers"] = num_layers
176
+
155
177
  # --- Gradient checkpointing ---
156
178
  gc = getattr(model, "is_gradient_checkpointing", None)
157
179
  if gc is not None:
@@ -305,7 +327,8 @@ def _build_sidecar(
305
327
  if architecture_info:
306
328
  for key in ("architecture_type", "optimizer_type", "fine_tuning_method",
307
329
  "gradient_checkpointing", "model_type", "attention_type",
308
- "param_count", "trainable_param_count"):
330
+ "param_count", "trainable_param_count",
331
+ "num_heads", "vocab_size", "hidden_dim", "num_layers"):
309
332
  val = architecture_info.get(key)
310
333
  if val is not None:
311
334
  data[key] = val
@@ -681,6 +704,8 @@ def _write_full_artifact(monitor, sidecar_data, step_times_raw=None):
681
704
  "architecture_type", "optimizer_type", "fine_tuning_method",
682
705
  "gradient_checkpointing", "model_type", "attention_type",
683
706
  "param_count", "trainable_param_count",
707
+ # Architecture dimensions (extracted from model config)
708
+ "num_heads", "vocab_size", "hidden_dim", "num_layers",
684
709
  # Distributed signals (P3.5-C)
685
710
  "comm_overhead_pct",
686
711
  ]
@@ -493,10 +493,20 @@ def run(
493
493
  "baseline_run_id": after,
494
494
  "experiment_id": experiment,
495
495
  }
496
- # Merge timing fields from callback sidecar
496
+ # Merge timing + architecture fields from callback sidecar
497
497
  if callback_data:
498
498
  for key in ("step_time_ms_p50", "step_time_ms_p90", "samples_per_sec",
499
- "step_time_ms_mean", "step_time_ms_std"):
499
+ "step_time_ms_mean", "step_time_ms_std",
500
+ "architecture_type", "optimizer_type", "fine_tuning_method",
501
+ "gradient_checkpointing", "model_type", "attention_type",
502
+ "param_count", "trainable_param_count",
503
+ "num_heads", "vocab_size", "hidden_dim", "num_layers",
504
+ "phase_forward_ms_p50", "phase_forward_ms_p90",
505
+ "phase_backward_ms_p50", "phase_backward_ms_p90",
506
+ "phase_optimizer_ms_p50", "phase_optimizer_ms_p90",
507
+ "phase_dataloader_ms_p50", "phase_dataloader_ms_p90",
508
+ "has_phase_timing", "comm_overhead_pct",
509
+ "dataloader_wait_pct", "batch_size"):
500
510
  val = callback_data.get(key)
501
511
  if val is not None:
502
512
  probe_dict[key] = val
@@ -554,8 +564,11 @@ def run(
554
564
  else:
555
565
  console.print("[dim]Tip: alloc login --browser to connect your dashboard[/dim]")
556
566
 
567
+ # Propagate non-zero exit code — but NOT when calibrate mode
568
+ # intentionally killed the process (torchrun exits non-zero on SIGTERM)
557
569
  if result.exit_code and result.exit_code != 0:
558
- raise typer.Exit(result.exit_code)
570
+ if result.stop_reason not in ("stable", "timeout"):
571
+ raise typer.Exit(result.exit_code)
559
572
 
560
573
 
561
574
  @app.command()
@@ -2105,6 +2118,13 @@ def scan(
2105
2118
  """Remote ghost scan via Alloc API — no GPU needed."""
2106
2119
  import httpx
2107
2120
 
2121
+ # When --json, redirect console to stderr so nothing contaminates stdout.
2122
+ from rich.console import Console as _Console
2123
+ if json_output:
2124
+ console = _Console(stderr=True)
2125
+ else:
2126
+ console = _Console()
2127
+
2108
2128
  # Resolve param count from model name or explicit flag
2109
2129
  resolved_param_count = param_count_b or _model_to_params(model)
2110
2130
  if resolved_param_count is None:
@@ -2164,10 +2184,11 @@ def scan(
2164
2184
  resp = client.post(f"{api_url}/scans", json=payload, headers=headers)
2165
2185
  else:
2166
2186
  # Token refresh failed — fall back to unauthenticated scan
2167
- console.print(
2168
- "[yellow]Session expired — falling back to public scan "
2169
- "(org fleet context unavailable). Run `alloc login` to restore.[/yellow]",
2170
- )
2187
+ if not json_output:
2188
+ console.print(
2189
+ "[yellow]Session expired falling back to public scan "
2190
+ "(org fleet context unavailable). Run `alloc login` to restore.[/yellow]",
2191
+ )
2171
2192
  del headers["Authorization"]
2172
2193
  resp = client.post(f"{api_url}/scans/cli", json=payload, headers=headers)
2173
2194
 
@@ -71,6 +71,11 @@ def _extract_architecture(model):
71
71
  or getattr(config, "n_positions", None)
72
72
  or getattr(config, "max_seq_len", None)
73
73
  )
74
+ info["num_heads"] = (
75
+ getattr(config, "num_attention_heads", None)
76
+ or getattr(config, "num_heads", None)
77
+ or getattr(config, "n_head", None)
78
+ )
74
79
  info["model_type"] = getattr(config, "model_type", None)
75
80
  info["vocab_size"] = getattr(config, "vocab_size", None)
76
81
  info["image_size"] = getattr(config, "image_size", None)
@@ -277,6 +282,8 @@ def main():
277
282
  "num_layers": arch_info.get("num_layers"),
278
283
  "seq_length": arch_info.get("seq_length"),
279
284
  "model_type": arch_info.get("model_type"),
285
+ "vocab_size": arch_info.get("vocab_size"),
286
+ "num_heads": arch_info.get("num_heads"),
280
287
  "activation_memory_bytes": activation_result.get("activation_memory_bytes"),
281
288
  "activation_method": activation_result.get("activation_method"),
282
289
  }
@@ -699,8 +699,8 @@ def probe_command(
699
699
  num_gpus_detected=num_gpus_ref[0],
700
700
  process_map=process_map_ref[0],
701
701
  per_gpu_peak_vram_mb=(
702
- [round(per_gpu_peaks_ref[0].get(i, 0), 1) for i in range(num_gpus_ref[0])]
703
- if num_gpus_ref[0] > 1 and per_gpu_peaks_ref[0] else None
702
+ [round(per_gpu_peaks_ref[0][i], 1) for i in sorted(per_gpu_peaks_ref[0])]
703
+ if per_gpu_peaks_ref[0] else None
704
704
  ),
705
705
  detected_interconnect=detected_ic_ref[0],
706
706
  )
@@ -126,9 +126,12 @@ def upload_artifact(artifact_path: str, api_url: str, token: str) -> dict:
126
126
  "process_map": probe.get("process_map"),
127
127
  # Architecture fields: probe (callbacks) takes priority over ghost defaults
128
128
  "batch_size": probe.get("batch_size") or (ghost.get("batch_size") if ghost else None),
129
- "seq_length": ghost.get("seq_length") if ghost else None,
130
- "hidden_dim": ghost.get("hidden_dim") if ghost else None,
131
- "num_layers": ghost.get("num_layers") if ghost else None,
129
+ "seq_length": probe.get("seq_length") or (ghost.get("seq_length") if ghost else None),
130
+ "hidden_dim": probe.get("hidden_dim") or (ghost.get("hidden_dim") if ghost else None),
131
+ "num_layers": probe.get("num_layers") or (ghost.get("num_layers") if ghost else None),
132
+ "vocab_size": probe.get("vocab_size") or (ghost.get("vocab_size") if ghost else None),
133
+ "num_heads": probe.get("num_heads") or (ghost.get("num_heads") if ghost else None),
134
+ "model_type": probe.get("model_type") or (ghost.get("model_type") if ghost else None),
132
135
  "ghost_report": ghost if ghost else None,
133
136
  "source": probe.get("source") or "cli",
134
137
  "command": probe.get("command"),
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alloc
3
- Version: 0.0.12
3
+ Version: 0.0.14
4
4
  Summary: Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints.
5
5
  Author-email: Alloc Labs <hello@alloclabs.com>
6
6
  License-Expression: Apache-2.0
@@ -870,6 +870,33 @@ class TestWriteFullArtifactWithPhases:
870
870
  assert call_kwargs.kwargs.get("output_path") is None # default = alloc_artifact.json.gz
871
871
 
872
872
 
873
+ def test_dimension_fields_in_full_artifact(self, tmp_path):
874
+ """Architecture dimension fields from sidecar must flow to full artifact."""
875
+ monitor = _NvmlMonitor()
876
+ sidecar = {
877
+ "framework": "test",
878
+ "step_count": 10,
879
+ "step_time_ms_p50": 100.0,
880
+ "num_heads": 32,
881
+ "vocab_size": 32000,
882
+ "hidden_dim": 4096,
883
+ "num_layers": 32,
884
+ "model_type": "llama",
885
+ "architecture_type": "transformer",
886
+ }
887
+
888
+ with patch("alloc.artifact_writer.write_report") as mock_write:
889
+ _write_full_artifact(monitor, sidecar)
890
+ assert mock_write.called
891
+ probe = mock_write.call_args.kwargs.get("probe_result")
892
+ assert probe["num_heads"] == 32
893
+ assert probe["vocab_size"] == 32000
894
+ assert probe["hidden_dim"] == 4096
895
+ assert probe["num_layers"] == 32
896
+ assert probe["model_type"] == "llama"
897
+ assert probe["architecture_type"] == "transformer"
898
+
899
+
873
900
  # ── Architecture Detection (P3-A) ──────────────────────────────────────
874
901
 
875
902
  class TestDetectArchitecture:
@@ -1041,6 +1068,97 @@ class TestBuildSidecarArchitecture:
1041
1068
  assert "gradient_checkpointing" not in data
1042
1069
  assert "architecture_type" not in data
1043
1070
 
1071
+ def test_dimension_fields_included(self):
1072
+ """New dimension fields (num_heads, vocab_size, hidden_dim, num_layers) flow through sidecar."""
1073
+ arch_info = {
1074
+ "architecture_type": "transformer",
1075
+ "num_heads": 32,
1076
+ "vocab_size": 32000,
1077
+ "hidden_dim": 4096,
1078
+ "num_layers": 32,
1079
+ "model_type": "llama",
1080
+ }
1081
+ data = _build_sidecar(
1082
+ framework="huggingface",
1083
+ step_count=10,
1084
+ step_times_ms=[100.0],
1085
+ batch_size=8,
1086
+ architecture_info=arch_info,
1087
+ )
1088
+ assert data["num_heads"] == 32
1089
+ assert data["vocab_size"] == 32000
1090
+ assert data["hidden_dim"] == 4096
1091
+ assert data["num_layers"] == 32
1092
+ assert data["model_type"] == "llama"
1093
+
1094
+
1095
+ class TestDetectArchitectureDimensions:
1096
+ """Test that _detect_architecture extracts dimension values from config."""
1097
+
1098
+ def test_extracts_all_dimensions(self):
1099
+ from alloc.callbacks import _detect_architecture
1100
+
1101
+ model = MagicMock()
1102
+ model.config.model_type = "llama"
1103
+ model.config.num_attention_heads = 32
1104
+ model.config.vocab_size = 32000
1105
+ model.config.hidden_size = 4096
1106
+ model.config.num_hidden_layers = 32
1107
+ model.is_gradient_checkpointing = False
1108
+ del model.peft_config
1109
+ del model.config._attn_implementation
1110
+ del model.config.attn_implementation
1111
+ del model.config.num_heads
1112
+ del model.config.n_head
1113
+ del model.config.num_layers
1114
+ del model.config.n_layer
1115
+
1116
+ result = _detect_architecture(model)
1117
+ assert result["num_heads"] == 32
1118
+ assert result["vocab_size"] == 32000
1119
+ assert result["hidden_dim"] == 4096
1120
+ assert result["num_layers"] == 32
1121
+
1122
+ def test_fallback_attr_names(self):
1123
+ """Falls back to alternate attribute names (n_head, n_layer)."""
1124
+ from alloc.callbacks import _detect_architecture
1125
+
1126
+ model = MagicMock()
1127
+ model.config.model_type = "gpt2"
1128
+ model.config.n_head = 12
1129
+ model.config.vocab_size = 50257
1130
+ model.config.hidden_size = 768
1131
+ model.config.n_layer = 12
1132
+ model.is_gradient_checkpointing = False
1133
+ del model.peft_config
1134
+ del model.config._attn_implementation
1135
+ del model.config.attn_implementation
1136
+ del model.config.num_attention_heads
1137
+ del model.config.num_heads
1138
+ del model.config.num_hidden_layers
1139
+ del model.config.num_layers
1140
+
1141
+ result = _detect_architecture(model)
1142
+ assert result["num_heads"] == 12
1143
+ assert result["vocab_size"] == 50257
1144
+ assert result["hidden_dim"] == 768
1145
+ assert result["num_layers"] == 12
1146
+
1147
+ def test_no_config_no_dimensions(self):
1148
+ """Model without config should not have dimension fields."""
1149
+ from alloc.callbacks import _detect_architecture
1150
+
1151
+ model = MagicMock()
1152
+ del model.config
1153
+ del model.peft_config
1154
+ del model.is_gradient_checkpointing
1155
+
1156
+ result = _detect_architecture(model)
1157
+ assert "num_heads" not in result
1158
+ assert "vocab_size" not in result
1159
+ assert "hidden_dim" not in result
1160
+ assert "num_layers" not in result
1161
+
1044
1162
 
1045
1163
  # ── CUDA Event Non-Blocking Sync (C2 Fix) ─────────────────────────────
1046
1164
 
@@ -176,3 +176,75 @@ def test_count_params_dedup_shared():
176
176
  count, dtype_str = _count_params(model)
177
177
  # embed.weight = 100*64 = 6400 params (shared with proj.weight)
178
178
  assert count == 6400
179
+
180
+
181
+ # ---------------------------------------------------------------------------
182
+ # _extract_architecture — vocab_size, num_heads
183
+ # ---------------------------------------------------------------------------
184
+
185
+
186
+ def test_extract_architecture_includes_vocab_size_and_num_heads():
187
+ """Result dict should include vocab_size and num_heads when config has them."""
188
+ from alloc.extractor_runner import _extract_architecture
189
+
190
+ class FakeConfig:
191
+ hidden_size = 4096
192
+ num_hidden_layers = 32
193
+ max_position_embeddings = 2048
194
+ model_type = "llama"
195
+ vocab_size = 32000
196
+ num_attention_heads = 32
197
+
198
+ class FakeModel:
199
+ config = FakeConfig()
200
+
201
+ info = _extract_architecture(FakeModel())
202
+ assert info["vocab_size"] == 32000
203
+ assert info["num_heads"] == 32
204
+ assert info["hidden_dim"] == 4096
205
+ assert info["num_layers"] == 32
206
+ assert info["model_type"] == "llama"
207
+
208
+
209
+ def test_extract_architecture_num_heads_fallback_attrs():
210
+ """num_heads extraction falls back to num_heads and n_head attrs."""
211
+ from alloc.extractor_runner import _extract_architecture
212
+
213
+ # Test num_heads attr
214
+ class ConfigWithNumHeads:
215
+ hidden_size = 768
216
+ num_hidden_layers = 12
217
+ num_heads = 12
218
+ model_type = "custom"
219
+ vocab_size = 50000
220
+
221
+ class FakeModel1:
222
+ config = ConfigWithNumHeads()
223
+
224
+ info = _extract_architecture(FakeModel1())
225
+ assert info["num_heads"] == 12
226
+
227
+ # Test n_head attr
228
+ class ConfigWithNHead:
229
+ hidden_size = 768
230
+ n_layer = 12
231
+ n_head = 12
232
+ model_type = "gpt2"
233
+ vocab_size = 50257
234
+
235
+ class FakeModel2:
236
+ config = ConfigWithNHead()
237
+
238
+ info2 = _extract_architecture(FakeModel2())
239
+ assert info2["num_heads"] == 12
240
+
241
+
242
+ def test_extract_architecture_no_config():
243
+ """Model without config returns empty dict."""
244
+ from alloc.extractor_runner import _extract_architecture
245
+
246
+ class BareModel:
247
+ pass
248
+
249
+ info = _extract_architecture(BareModel())
250
+ assert info == {}
@@ -223,3 +223,84 @@ def test_upload_enriches_payload_from_probe_and_hardware(tmp_path):
223
223
  assert payload["step_time_ms_p90"] == 161.8
224
224
  assert payload["samples_per_sec"] == 22.9
225
225
  assert payload["dataloader_wait_pct"] == 11.3
226
+
227
+
228
+ def test_upload_sends_architecture_dimensions(tmp_path):
229
+ """Upload should send vocab_size, num_heads, model_type from probe and ghost."""
230
+ import gzip
231
+
232
+ artifact = tmp_path / "arch_dims.json.gz"
233
+ data = {
234
+ "probe": {
235
+ "peak_vram_mb": 5000,
236
+ "vocab_size": 32000,
237
+ "num_heads": 32,
238
+ "model_type": "llama",
239
+ "hidden_dim": 4096,
240
+ "num_layers": 32,
241
+ },
242
+ "hardware": {"gpu_name": "NVIDIA A100-SXM4-80GB"},
243
+ "ghost": {
244
+ "seq_length": 2048,
245
+ },
246
+ }
247
+ with gzip.open(str(artifact), "wt", encoding="utf-8") as f:
248
+ json.dump(data, f)
249
+
250
+ mock_response = MagicMock()
251
+ mock_response.status_code = 200
252
+ mock_response.json.return_value = {"run_id": "run-dims", "status": "completed"}
253
+
254
+ mock_client = MagicMock()
255
+ mock_client.__enter__ = MagicMock(return_value=mock_client)
256
+ mock_client.__exit__ = MagicMock(return_value=False)
257
+ mock_client.post.return_value = mock_response
258
+
259
+ with patch("httpx.Client", return_value=mock_client):
260
+ upload_artifact(str(artifact), "http://localhost:8000", "test-token")
261
+
262
+ payload = mock_client.post.call_args.kwargs["json"]
263
+ assert payload["vocab_size"] == 32000
264
+ assert payload["num_heads"] == 32
265
+ assert payload["model_type"] == "llama"
266
+ assert payload["hidden_dim"] == 4096
267
+ assert payload["num_layers"] == 32
268
+ assert payload["seq_length"] == 2048
269
+
270
+
271
+ def test_upload_ghost_fallback_for_dimensions(tmp_path):
272
+ """Ghost fields should be used as fallback when probe doesn't have dimensions."""
273
+ import gzip
274
+
275
+ artifact = tmp_path / "ghost_fallback.json.gz"
276
+ data = {
277
+ "probe": {"peak_vram_mb": 5000},
278
+ "hardware": {"gpu_name": "NVIDIA A100-SXM4-80GB"},
279
+ "ghost": {
280
+ "vocab_size": 50257,
281
+ "num_heads": 12,
282
+ "model_type": "gpt2",
283
+ "hidden_dim": 768,
284
+ "num_layers": 12,
285
+ "seq_length": 1024,
286
+ },
287
+ }
288
+ with gzip.open(str(artifact), "wt", encoding="utf-8") as f:
289
+ json.dump(data, f)
290
+
291
+ mock_response = MagicMock()
292
+ mock_response.status_code = 200
293
+ mock_response.json.return_value = {"run_id": "run-ghost", "status": "completed"}
294
+
295
+ mock_client = MagicMock()
296
+ mock_client.__enter__ = MagicMock(return_value=mock_client)
297
+ mock_client.__exit__ = MagicMock(return_value=False)
298
+ mock_client.post.return_value = mock_response
299
+
300
+ with patch("httpx.Client", return_value=mock_client):
301
+ upload_artifact(str(artifact), "http://localhost:8000", "test-token")
302
+
303
+ payload = mock_client.post.call_args.kwargs["json"]
304
+ assert payload["vocab_size"] == 50257
305
+ assert payload["num_heads"] == 12
306
+ assert payload["model_type"] == "gpt2"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes