PyPI - alloc - Versions diffs - 0.0.12__tar.gz → 0.0.14__tar.gz - Mend

alloc 0.0.12tar.gz → 0.0.14tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

{alloc-0.0.12 → alloc-0.0.14}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alloc
-Version: 0.0.12
+Version: 0.0.14
 Summary: Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints.
 Author-email: Alloc Labs <hello@alloclabs.com>
 License-Expression: Apache-2.0

{alloc-0.0.12 → alloc-0.0.14}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "alloc"
-version = "0.0.12"
+version = "0.0.14"
 description = "Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints."
 readme = "README.md"
 license = "Apache-2.0"

{alloc-0.0.12 → alloc-0.0.14}/src/alloc/__init__.py RENAMED Viewed

@@ -9,7 +9,7 @@ _warnings.filterwarnings("ignore", category=FutureWarning, module=r"torch\.cuda"
 _warnings.filterwarnings("ignore", category=DeprecationWarning, module=r"torch\.cuda")
 del _warnings
-__version__ = "0.0.12"
+__version__ = "0.0.14"
 from alloc.ghost import ghost, GhostReport
 from alloc.callbacks import AllocCallback as HuggingFaceCallback

{alloc-0.0.12 → alloc-0.0.14}/src/alloc/callbacks.py RENAMED Viewed

@@ -152,6 +152,28 @@ def _detect_architecture(model, optimizer=None, training_args=None):
                 if hasattr(config, "num_attention_heads") or hasattr(config, "num_heads"):
                     info["architecture_type"] = "transformer"
+            # --- Architecture dimensions ---
+            num_heads = (
+                getattr(config, "num_attention_heads", None)
+                or getattr(config, "num_heads", None)
+                or getattr(config, "n_head", None)
+            )
+            if num_heads is not None:
+                info["num_heads"] = num_heads
+            vocab_size = getattr(config, "vocab_size", None)
+            if vocab_size is not None:
+                info["vocab_size"] = vocab_size
+            hidden_dim = getattr(config, "hidden_size", None)
+            if hidden_dim is not None:
+                info["hidden_dim"] = hidden_dim
+            num_layers = (
+                getattr(config, "num_hidden_layers", None)
+                or getattr(config, "num_layers", None)
+                or getattr(config, "n_layer", None)
+            )
+            if num_layers is not None:
+                info["num_layers"] = num_layers
         # --- Gradient checkpointing ---
         gc = getattr(model, "is_gradient_checkpointing", None)
         if gc is not None:
@@ -305,7 +327,8 @@ def _build_sidecar(
     if architecture_info:
         for key in ("architecture_type", "optimizer_type", "fine_tuning_method",
                      "gradient_checkpointing", "model_type", "attention_type",
-                     "param_count", "trainable_param_count"):
+                     "param_count", "trainable_param_count",
+                     "num_heads", "vocab_size", "hidden_dim", "num_layers"):
             val = architecture_info.get(key)
             if val is not None:
                 data[key] = val
@@ -681,6 +704,8 @@ def _write_full_artifact(monitor, sidecar_data, step_times_raw=None):
             "architecture_type", "optimizer_type", "fine_tuning_method",
             "gradient_checkpointing", "model_type", "attention_type",
             "param_count", "trainable_param_count",
+            # Architecture dimensions (extracted from model config)
+            "num_heads", "vocab_size", "hidden_dim", "num_layers",
             # Distributed signals (P3.5-C)
             "comm_overhead_pct",
         ]

{alloc-0.0.12 → alloc-0.0.14}/src/alloc/cli.py RENAMED Viewed

@@ -493,10 +493,20 @@ def run(
             "baseline_run_id": after,
             "experiment_id": experiment,
         }
-        # Merge timing fields from callback sidecar
+        # Merge timing + architecture fields from callback sidecar
         if callback_data:
             for key in ("step_time_ms_p50", "step_time_ms_p90", "samples_per_sec",
-                         "step_time_ms_mean", "step_time_ms_std"):
+                         "step_time_ms_mean", "step_time_ms_std",
+                         "architecture_type", "optimizer_type", "fine_tuning_method",
+                         "gradient_checkpointing", "model_type", "attention_type",
+                         "param_count", "trainable_param_count",
+                         "num_heads", "vocab_size", "hidden_dim", "num_layers",
+                         "phase_forward_ms_p50", "phase_forward_ms_p90",
+                         "phase_backward_ms_p50", "phase_backward_ms_p90",
+                         "phase_optimizer_ms_p50", "phase_optimizer_ms_p90",
+                         "phase_dataloader_ms_p50", "phase_dataloader_ms_p90",
+                         "has_phase_timing", "comm_overhead_pct",
+                         "dataloader_wait_pct", "batch_size"):
                 val = callback_data.get(key)
                 if val is not None:
                     probe_dict[key] = val
@@ -554,8 +564,11 @@ def run(
             else:
                 console.print("[dim]Tip: alloc login --browser to connect your dashboard[/dim]")
+    # Propagate non-zero exit code — but NOT when calibrate mode
+    # intentionally killed the process (torchrun exits non-zero on SIGTERM)
     if result.exit_code and result.exit_code != 0:
-        raise typer.Exit(result.exit_code)
+        if result.stop_reason not in ("stable", "timeout"):
+            raise typer.Exit(result.exit_code)
 @app.command()
@@ -2105,6 +2118,13 @@ def scan(
     """Remote ghost scan via Alloc API — no GPU needed."""
     import httpx
+    # When --json, redirect console to stderr so nothing contaminates stdout.
+    from rich.console import Console as _Console
+    if json_output:
+        console = _Console(stderr=True)
+    else:
+        console = _Console()
     # Resolve param count from model name or explicit flag
     resolved_param_count = param_count_b or _model_to_params(model)
     if resolved_param_count is None:
@@ -2164,10 +2184,11 @@ def scan(
                     resp = client.post(f"{api_url}/scans", json=payload, headers=headers)
                 else:
                     # Token refresh failed — fall back to unauthenticated scan
-                    console.print(
-                        "[yellow]Session expired — falling back to public scan "
-                        "(org fleet context unavailable). Run `alloc login` to restore.[/yellow]",
-                    )
+                    if not json_output:
+                        console.print(
+                            "[yellow]Session expired — falling back to public scan "
+                            "(org fleet context unavailable). Run `alloc login` to restore.[/yellow]",
+                        )
                     del headers["Authorization"]
                     resp = client.post(f"{api_url}/scans/cli", json=payload, headers=headers)

{alloc-0.0.12 → alloc-0.0.14}/src/alloc/extractor_runner.py RENAMED Viewed

@@ -71,6 +71,11 @@ def _extract_architecture(model):
             or getattr(config, "n_positions", None)
             or getattr(config, "max_seq_len", None)
         )
+        info["num_heads"] = (
+            getattr(config, "num_attention_heads", None)
+            or getattr(config, "num_heads", None)
+            or getattr(config, "n_head", None)
+        )
         info["model_type"] = getattr(config, "model_type", None)
         info["vocab_size"] = getattr(config, "vocab_size", None)
         info["image_size"] = getattr(config, "image_size", None)
@@ -277,6 +282,8 @@ def main():
             "num_layers": arch_info.get("num_layers"),
             "seq_length": arch_info.get("seq_length"),
             "model_type": arch_info.get("model_type"),
+            "vocab_size": arch_info.get("vocab_size"),
+            "num_heads": arch_info.get("num_heads"),
             "activation_memory_bytes": activation_result.get("activation_memory_bytes"),
             "activation_method": activation_result.get("activation_method"),
         }

{alloc-0.0.12 → alloc-0.0.14}/src/alloc/probe.py RENAMED Viewed

@@ -699,8 +699,8 @@ def probe_command(
         num_gpus_detected=num_gpus_ref[0],
         process_map=process_map_ref[0],
         per_gpu_peak_vram_mb=(
-            [round(per_gpu_peaks_ref[0].get(i, 0), 1) for i in range(num_gpus_ref[0])]
-            if num_gpus_ref[0] > 1 and per_gpu_peaks_ref[0] else None
+            [round(per_gpu_peaks_ref[0][i], 1) for i in sorted(per_gpu_peaks_ref[0])]
+            if per_gpu_peaks_ref[0] else None
         ),
         detected_interconnect=detected_ic_ref[0],
     )

{alloc-0.0.12 → alloc-0.0.14}/src/alloc/upload.py RENAMED Viewed

@@ -126,9 +126,12 @@ def upload_artifact(artifact_path: str, api_url: str, token: str) -> dict:
         "process_map": probe.get("process_map"),
         # Architecture fields: probe (callbacks) takes priority over ghost defaults
         "batch_size": probe.get("batch_size") or (ghost.get("batch_size") if ghost else None),
-        "seq_length": ghost.get("seq_length") if ghost else None,
-        "hidden_dim": ghost.get("hidden_dim") if ghost else None,
-        "num_layers": ghost.get("num_layers") if ghost else None,
+        "seq_length": probe.get("seq_length") or (ghost.get("seq_length") if ghost else None),
+        "hidden_dim": probe.get("hidden_dim") or (ghost.get("hidden_dim") if ghost else None),
+        "num_layers": probe.get("num_layers") or (ghost.get("num_layers") if ghost else None),
+        "vocab_size": probe.get("vocab_size") or (ghost.get("vocab_size") if ghost else None),
+        "num_heads": probe.get("num_heads") or (ghost.get("num_heads") if ghost else None),
+        "model_type": probe.get("model_type") or (ghost.get("model_type") if ghost else None),
         "ghost_report": ghost if ghost else None,
         "source": probe.get("source") or "cli",
         "command": probe.get("command"),

{alloc-0.0.12 → alloc-0.0.14}/src/alloc.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alloc
-Version: 0.0.12
+Version: 0.0.14
 Summary: Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints.
 Author-email: Alloc Labs <hello@alloclabs.com>
 License-Expression: Apache-2.0

{alloc-0.0.12 → alloc-0.0.14}/tests/test_callbacks.py RENAMED Viewed

@@ -870,6 +870,33 @@ class TestWriteFullArtifactWithPhases:
             assert call_kwargs.kwargs.get("output_path") is None  # default = alloc_artifact.json.gz
+    def test_dimension_fields_in_full_artifact(self, tmp_path):
+        """Architecture dimension fields from sidecar must flow to full artifact."""
+        monitor = _NvmlMonitor()
+        sidecar = {
+            "framework": "test",
+            "step_count": 10,
+            "step_time_ms_p50": 100.0,
+            "num_heads": 32,
+            "vocab_size": 32000,
+            "hidden_dim": 4096,
+            "num_layers": 32,
+            "model_type": "llama",
+            "architecture_type": "transformer",
+        }
+        with patch("alloc.artifact_writer.write_report") as mock_write:
+            _write_full_artifact(monitor, sidecar)
+            assert mock_write.called
+            probe = mock_write.call_args.kwargs.get("probe_result")
+            assert probe["num_heads"] == 32
+            assert probe["vocab_size"] == 32000
+            assert probe["hidden_dim"] == 4096
+            assert probe["num_layers"] == 32
+            assert probe["model_type"] == "llama"
+            assert probe["architecture_type"] == "transformer"
 # ── Architecture Detection (P3-A) ──────────────────────────────────────
 class TestDetectArchitecture:
@@ -1041,6 +1068,97 @@ class TestBuildSidecarArchitecture:
         assert "gradient_checkpointing" not in data
         assert "architecture_type" not in data
+    def test_dimension_fields_included(self):
+        """New dimension fields (num_heads, vocab_size, hidden_dim, num_layers) flow through sidecar."""
+        arch_info = {
+            "architecture_type": "transformer",
+            "num_heads": 32,
+            "vocab_size": 32000,
+            "hidden_dim": 4096,
+            "num_layers": 32,
+            "model_type": "llama",
+        }
+        data = _build_sidecar(
+            framework="huggingface",
+            step_count=10,
+            step_times_ms=[100.0],
+            batch_size=8,
+            architecture_info=arch_info,
+        )
+        assert data["num_heads"] == 32
+        assert data["vocab_size"] == 32000
+        assert data["hidden_dim"] == 4096
+        assert data["num_layers"] == 32
+        assert data["model_type"] == "llama"
+class TestDetectArchitectureDimensions:
+    """Test that _detect_architecture extracts dimension values from config."""
+    def test_extracts_all_dimensions(self):
+        from alloc.callbacks import _detect_architecture
+        model = MagicMock()
+        model.config.model_type = "llama"
+        model.config.num_attention_heads = 32
+        model.config.vocab_size = 32000
+        model.config.hidden_size = 4096
+        model.config.num_hidden_layers = 32
+        model.is_gradient_checkpointing = False
+        del model.peft_config
+        del model.config._attn_implementation
+        del model.config.attn_implementation
+        del model.config.num_heads
+        del model.config.n_head
+        del model.config.num_layers
+        del model.config.n_layer
+        result = _detect_architecture(model)
+        assert result["num_heads"] == 32
+        assert result["vocab_size"] == 32000
+        assert result["hidden_dim"] == 4096
+        assert result["num_layers"] == 32
+    def test_fallback_attr_names(self):
+        """Falls back to alternate attribute names (n_head, n_layer)."""
+        from alloc.callbacks import _detect_architecture
+        model = MagicMock()
+        model.config.model_type = "gpt2"
+        model.config.n_head = 12
+        model.config.vocab_size = 50257
+        model.config.hidden_size = 768
+        model.config.n_layer = 12
+        model.is_gradient_checkpointing = False
+        del model.peft_config
+        del model.config._attn_implementation
+        del model.config.attn_implementation
+        del model.config.num_attention_heads
+        del model.config.num_heads
+        del model.config.num_hidden_layers
+        del model.config.num_layers
+        result = _detect_architecture(model)
+        assert result["num_heads"] == 12
+        assert result["vocab_size"] == 50257
+        assert result["hidden_dim"] == 768
+        assert result["num_layers"] == 12
+    def test_no_config_no_dimensions(self):
+        """Model without config should not have dimension fields."""
+        from alloc.callbacks import _detect_architecture
+        model = MagicMock()
+        del model.config
+        del model.peft_config
+        del model.is_gradient_checkpointing
+        result = _detect_architecture(model)
+        assert "num_heads" not in result
+        assert "vocab_size" not in result
+        assert "hidden_dim" not in result
+        assert "num_layers" not in result
 # ── CUDA Event Non-Blocking Sync (C2 Fix) ─────────────────────────────

{alloc-0.0.12 → alloc-0.0.14}/tests/test_extractor_activation.py RENAMED Viewed

@@ -176,3 +176,75 @@ def test_count_params_dedup_shared():
     count, dtype_str = _count_params(model)
     # embed.weight = 100*64 = 6400 params (shared with proj.weight)
     assert count == 6400
+# ---------------------------------------------------------------------------
+# _extract_architecture — vocab_size, num_heads
+# ---------------------------------------------------------------------------
+def test_extract_architecture_includes_vocab_size_and_num_heads():
+    """Result dict should include vocab_size and num_heads when config has them."""
+    from alloc.extractor_runner import _extract_architecture
+    class FakeConfig:
+        hidden_size = 4096
+        num_hidden_layers = 32
+        max_position_embeddings = 2048
+        model_type = "llama"
+        vocab_size = 32000
+        num_attention_heads = 32
+    class FakeModel:
+        config = FakeConfig()
+    info = _extract_architecture(FakeModel())
+    assert info["vocab_size"] == 32000
+    assert info["num_heads"] == 32
+    assert info["hidden_dim"] == 4096
+    assert info["num_layers"] == 32
+    assert info["model_type"] == "llama"
+def test_extract_architecture_num_heads_fallback_attrs():
+    """num_heads extraction falls back to num_heads and n_head attrs."""
+    from alloc.extractor_runner import _extract_architecture
+    # Test num_heads attr
+    class ConfigWithNumHeads:
+        hidden_size = 768
+        num_hidden_layers = 12
+        num_heads = 12
+        model_type = "custom"
+        vocab_size = 50000
+    class FakeModel1:
+        config = ConfigWithNumHeads()
+    info = _extract_architecture(FakeModel1())
+    assert info["num_heads"] == 12
+    # Test n_head attr
+    class ConfigWithNHead:
+        hidden_size = 768
+        n_layer = 12
+        n_head = 12
+        model_type = "gpt2"
+        vocab_size = 50257
+    class FakeModel2:
+        config = ConfigWithNHead()
+    info2 = _extract_architecture(FakeModel2())
+    assert info2["num_heads"] == 12
+def test_extract_architecture_no_config():
+    """Model without config returns empty dict."""
+    from alloc.extractor_runner import _extract_architecture
+    class BareModel:
+        pass
+    info = _extract_architecture(BareModel())
+    assert info == {}

{alloc-0.0.12 → alloc-0.0.14}/tests/test_upload.py RENAMED Viewed

@@ -223,3 +223,84 @@ def test_upload_enriches_payload_from_probe_and_hardware(tmp_path):
     assert payload["step_time_ms_p90"] == 161.8
     assert payload["samples_per_sec"] == 22.9
     assert payload["dataloader_wait_pct"] == 11.3
+def test_upload_sends_architecture_dimensions(tmp_path):
+    """Upload should send vocab_size, num_heads, model_type from probe and ghost."""
+    import gzip
+    artifact = tmp_path / "arch_dims.json.gz"
+    data = {
+        "probe": {
+            "peak_vram_mb": 5000,
+            "vocab_size": 32000,
+            "num_heads": 32,
+            "model_type": "llama",
+            "hidden_dim": 4096,
+            "num_layers": 32,
+        },
+        "hardware": {"gpu_name": "NVIDIA A100-SXM4-80GB"},
+        "ghost": {
+            "seq_length": 2048,
+        },
+    }
+    with gzip.open(str(artifact), "wt", encoding="utf-8") as f:
+        json.dump(data, f)
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {"run_id": "run-dims", "status": "completed"}
+    mock_client = MagicMock()
+    mock_client.__enter__ = MagicMock(return_value=mock_client)
+    mock_client.__exit__ = MagicMock(return_value=False)
+    mock_client.post.return_value = mock_response
+    with patch("httpx.Client", return_value=mock_client):
+        upload_artifact(str(artifact), "http://localhost:8000", "test-token")
+    payload = mock_client.post.call_args.kwargs["json"]
+    assert payload["vocab_size"] == 32000
+    assert payload["num_heads"] == 32
+    assert payload["model_type"] == "llama"
+    assert payload["hidden_dim"] == 4096
+    assert payload["num_layers"] == 32
+    assert payload["seq_length"] == 2048
+def test_upload_ghost_fallback_for_dimensions(tmp_path):
+    """Ghost fields should be used as fallback when probe doesn't have dimensions."""
+    import gzip
+    artifact = tmp_path / "ghost_fallback.json.gz"
+    data = {
+        "probe": {"peak_vram_mb": 5000},
+        "hardware": {"gpu_name": "NVIDIA A100-SXM4-80GB"},
+        "ghost": {
+            "vocab_size": 50257,
+            "num_heads": 12,
+            "model_type": "gpt2",
+            "hidden_dim": 768,
+            "num_layers": 12,
+            "seq_length": 1024,
+        },
+    }
+    with gzip.open(str(artifact), "wt", encoding="utf-8") as f:
+        json.dump(data, f)
+    mock_response = MagicMock()
+    mock_response.status_code = 200
+    mock_response.json.return_value = {"run_id": "run-ghost", "status": "completed"}
+    mock_client = MagicMock()
+    mock_client.__enter__ = MagicMock(return_value=mock_client)
+    mock_client.__exit__ = MagicMock(return_value=False)
+    mock_client.post.return_value = mock_response
+    with patch("httpx.Client", return_value=mock_client):
+        upload_artifact(str(artifact), "http://localhost:8000", "test-token")
+    payload = mock_client.post.call_args.kwargs["json"]
+    assert payload["vocab_size"] == 50257
+    assert payload["num_heads"] == 12
+    assert payload["model_type"] == "gpt2"