PyPI - alloc - Versions diffs - 0.0.9__tar.gz → 0.0.10__tar.gz - Mend

alloc 0.0.9tar.gz → 0.0.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

{alloc-0.0.9 → alloc-0.0.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alloc
-Version: 0.0.9
+Version: 0.0.10
 Summary: Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints.
 Author-email: Alloc Labs <hello@alloclabs.com>
 License-Expression: Apache-2.0
@@ -40,7 +40,7 @@ alloc run python train.py
 ```
 ```
-alloc v0.0.8 — Calibrate
+alloc v0.0.9 — Calibrate
  Run Summary
   Peak VRAM       31.2 GB / 40.0 GB (A100)

{alloc-0.0.9 → alloc-0.0.10}/README.md RENAMED Viewed

@@ -12,7 +12,7 @@ alloc run python train.py
 ```
 ```
-alloc v0.0.8 — Calibrate
+alloc v0.0.9 — Calibrate
  Run Summary
   Peak VRAM       31.2 GB / 40.0 GB (A100)

{alloc-0.0.9 → alloc-0.0.10}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "alloc"
-version = "0.0.9"
+version = "0.0.10"
 description = "Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints."
 readme = "README.md"
 license = "Apache-2.0"

{alloc-0.0.9 → alloc-0.0.10}/src/alloc/__init__.py RENAMED Viewed

@@ -9,7 +9,7 @@ _warnings.filterwarnings("ignore", category=FutureWarning, module=r"torch\.cuda"
 _warnings.filterwarnings("ignore", category=DeprecationWarning, module=r"torch\.cuda")
 del _warnings
-__version__ = "0.0.9"
+__version__ = "0.0.10"
 from alloc.ghost import ghost, GhostReport
 from alloc.callbacks import AllocCallback as HuggingFaceCallback

{alloc-0.0.9 → alloc-0.0.10}/src/alloc/browser_auth.py RENAMED Viewed

@@ -121,8 +121,9 @@ def browser_login(
     })
     authorize_url = f"{supabase_url}/auth/v1/authorize?{authorize_params}"
-    # Bind to 0.0.0.0 so both localhost and 127.0.0.1 reach the server.
-    server = HTTPServer(("0.0.0.0", port), _CallbackHandler)
+    # Bind to 127.0.0.1 only — the auth callback server should never be
+    # reachable from the network.
+    server = HTTPServer(("127.0.0.1", port), _CallbackHandler)
     server.auth_code = None  # type: ignore[attr-defined]
     server.auth_error = None  # type: ignore[attr-defined]
     server.timeout = 1  # poll interval for handle_request()

{alloc-0.0.9 → alloc-0.0.10}/src/alloc/callbacks.py RENAMED Viewed

@@ -501,7 +501,10 @@ class _NvmlMonitor:
             self._hw_context["nvlink_active_links"] = active_links
         except Exception:
-            pass
+            # NVLink detection code failed after entering the try block.
+            # We know NVML is functional (handles exist), so fall back to
+            # generic "nvlink" rather than leaving interconnect_type unset.
+            self._hw_context["interconnect_type"] = "nvlink"
         self._thread = threading.Thread(target=self._sample_loop, daemon=True)
         self._thread.start()

{alloc-0.0.9 → alloc-0.0.10}/src/alloc/diagnosis_engine.py RENAMED Viewed

@@ -403,7 +403,7 @@ def _estimate_model_params(model_name: str) -> Optional[float]:
         "whisper-large": 1.55,
     }
-    for key, params in estimates.items():
+    for key, params in sorted(estimates.items(), key=lambda x: len(x[0]), reverse=True):
         if key in name:
             return params

{alloc-0.0.9 → alloc-0.0.10}/src/alloc/probe.py RENAMED Viewed

@@ -215,8 +215,19 @@ def _discover_gpu_indices(proc_pid, pynvml, fallback_index=0, expected_gpus=None
                     if 0 <= idx < device_count:
                         visible_physical.append(idx)
                 except ValueError:
-                    visible_physical = list(range(device_count))
-                    break
+                    # UUID-style device identifiers — try NVML UUID matching
+                    try:
+                        for phys_idx in range(device_count):
+                            handle = pynvml.nvmlDeviceGetHandleByIndex(phys_idx)
+                            uuid = pynvml.nvmlDeviceGetUUID(handle)
+                            if isinstance(uuid, bytes):
+                                uuid = uuid.decode("utf-8", errors="replace")
+                            if d in uuid:
+                                visible_physical.append(phys_idx)
+                                break
+                    except Exception:
+                        visible_physical = list(range(device_count))
+                        break
         search_indices = visible_physical if visible_physical else list(range(device_count))
     else:
         search_indices = list(range(device_count))

{alloc-0.0.9 → alloc-0.0.10}/src/alloc.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: alloc
-Version: 0.0.9
+Version: 0.0.10
 Summary: Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints.
 Author-email: Alloc Labs <hello@alloclabs.com>
 License-Expression: Apache-2.0
@@ -40,7 +40,7 @@ alloc run python train.py
 ```
 ```
-alloc v0.0.8 — Calibrate
+alloc v0.0.9 — Calibrate
  Run Summary
   Peak VRAM       31.2 GB / 40.0 GB (A100)

{alloc-0.0.9 → alloc-0.0.10}/tests/test_callbacks.py RENAMED Viewed

@@ -1189,3 +1189,71 @@ class TestNvmlMonitorThreadSafety:
         assert len(probe["per_rank_peak_vram_mb"]) == 2
         for peak in probe["per_rank_peak_vram_mb"]:
             assert peak > 0
+class TestNvmlMonitorNvlinkFallback:
+    def test_nvlink_detection_failure_sets_nvlink_fallback(self):
+        """When the outer NVLink detection block raises, fall back to 'nvlink'.
+        We trigger this by making nvmlDeviceGetNvLinkState raise on the first
+        call (inner except breaks the loop → active_links=0 → 'pcie'), and
+        then making the active_links comparison itself blow up. The simplest
+        trigger is having _gpu_handles[0] raise IndexError (empty list after
+        the early-return guard).
+        """
+        mock_pynvml = MagicMock()
+        mock_pynvml.nvmlInit.return_value = None
+        mock_pynvml.nvmlShutdown.return_value = None
+        mock_pynvml.nvmlDeviceGetCount.return_value = 2
+        mock_pynvml.nvmlDeviceGetName.return_value = "NVIDIA A100-SXM4-80GB"
+        mem = SimpleNamespace(total=80 * 1024**3, used=1 * 1024**3)
+        mock_pynvml.nvmlDeviceGetMemoryInfo.return_value = mem
+        mock_pynvml.nvmlSystemGetDriverVersion.return_value = "535"
+        mock_pynvml.nvmlSystemGetCudaDriverVersion.return_value = 12000
+        mock_pynvml.nvmlDeviceGetCudaComputeCapability.return_value = (8, 0)
+        util = SimpleNamespace(gpu=75, memory=60)
+        mock_pynvml.nvmlDeviceGetUtilizationRates.return_value = util
+        mock_pynvml.nvmlDeviceGetPowerUsage.return_value = 300000
+        # Use a handle list that passes the `if not self._gpu_handles` guard
+        # (it's truthy) but raises IndexError on `self._gpu_handles[0]`.
+        class BadHandleList:
+            """Truthy but raises on index access."""
+            def __bool__(self):
+                return True
+            def __len__(self):
+                return 2
+            def __iter__(self):
+                return iter([])
+            def __getitem__(self, idx):
+                raise IndexError("corrupted handle list")
+        with patch("alloc.callbacks._try_import_pynvml", return_value=mock_pynvml):
+            monitor = _NvmlMonitor()
+        # Replace handles after __init__ but before start().
+        # start() will re-populate from nvmlDeviceGetCount, so we also need to
+        # make the handle-building loop produce our bad list. We do this by
+        # patching nvmlDeviceGetHandleByIndex to raise, so _gpu_handles stays
+        # empty after the try/except in handle building. But that triggers
+        # the early return. Instead, we patch _gpu_handles AFTER start()
+        # builds them but BEFORE NVLink detection runs. We achieve this by
+        # having nvmlDeviceGetCudaComputeCapability (the last hw-context call
+        # before NVLink detection) swap in the bad handles as a side effect.
+        original_sm = mock_pynvml.nvmlDeviceGetCudaComputeCapability
+        def swap_handles_then_return_sm(handle):
+            monitor._gpu_handles = BadHandleList()
+            return (8, 0)
+        mock_pynvml.nvmlDeviceGetCudaComputeCapability = MagicMock(
+            side_effect=swap_handles_then_return_sm
+        )
+        monitor.start()
+        import time
+        time.sleep(0.02)
+        monitor.stop()
+        hw, _ = monitor.get_results()
+        assert hw.get("interconnect_type") == "nvlink"

{alloc-0.0.9 → alloc-0.0.10}/tests/test_diagnosis_engine.py RENAMED Viewed

@@ -359,3 +359,15 @@ def test_estimate_model_params_known_vision_model():
     result = _estimate_model_params("stable-diffusion")
     assert result == 0.865
+def test_estimate_model_params_gpt2_medium_prefix_match():
+    """gpt2-medium-finetuned should match gpt2-medium (0.355), not gpt2 (0.124)."""
+    result = _estimate_model_params("gpt2-medium-finetuned")
+    assert result == 0.355
+def test_estimate_model_params_gpt2_alone():
+    """Plain gpt2 should still match 0.124."""
+    result = _estimate_model_params("gpt2")
+    assert result == 0.124

{alloc-0.0.9 → alloc-0.0.10}/tests/test_probe_multi.py RENAMED Viewed

@@ -158,6 +158,61 @@ def test_parse_plain_python():
     assert _parse_launcher_gpu_count(["python", "train.py"]) is None
+# ── CVD UUID resolution ──
+def test_cvd_uuid_resolves_to_correct_index():
+    """UUID-style CUDA_VISIBLE_DEVICES should resolve to the matching physical GPU index."""
+    mock = _mock_pynvml_multi_gpu(
+        proc_pid=1000,
+        gpu_process_map={0: [1000], 1: [], 2: []},
+    )
+    mock.nvmlDeviceGetCount.return_value = 3
+    # Set up UUID resolution: GPU 0 → UUID-A, GPU 1 → UUID-B, GPU 2 → UUID-C
+    uuid_map = {0: "GPU-aaaa-1111", 1: "GPU-bbbb-2222", 2: "GPU-cccc-3333"}
+    handles = {}
+    for idx in range(3):
+        handles[idx] = MagicMock(name=f"handle_{idx}")
+    def get_handle(idx):
+        return handles[idx]
+    def get_uuid(handle):
+        for idx, h in handles.items():
+            if handle == h:
+                return uuid_map[idx]
+        return "GPU-unknown"
+    mock.nvmlDeviceGetHandleByIndex = MagicMock(side_effect=get_handle)
+    mock.nvmlDeviceGetUUID = MagicMock(side_effect=get_uuid)
+    # CVD set to GPU 2's UUID
+    with patch("alloc.probe._get_child_pids", return_value=[]):
+        with patch.dict("os.environ", {"CUDA_VISIBLE_DEVICES": "GPU-cccc-3333"}):
+            result = _discover_gpu_indices(1000, mock, fallback_index=0)
+    # Should only search GPU index 2
+    assert 2 in result or result == [0]  # either found on idx 2, or fallback if no PID match
+def test_cvd_invalid_uuid_falls_back_to_all_gpus():
+    """Invalid UUID that doesn't match any device should fall back to all GPUs."""
+    mock = _mock_pynvml_multi_gpu(
+        proc_pid=1000,
+        gpu_process_map={0: [1000], 1: []},
+    )
+    mock.nvmlDeviceGetCount.return_value = 2
+    # UUID lookup raises for all devices
+    mock.nvmlDeviceGetUUID = MagicMock(side_effect=RuntimeError("no UUID support"))
+    with patch("alloc.probe._get_child_pids", return_value=[]):
+        with patch.dict("os.environ", {"CUDA_VISIBLE_DEVICES": "GPU-nonexistent"}):
+            result = _discover_gpu_indices(1000, mock, fallback_index=0)
+    # Should fall back to searching all GPUs and find PID 1000 on GPU 0
+    assert 0 in result
 def test_parse_torch_distributed_launch():
     assert _parse_launcher_gpu_count([
         "python", "-m", "torch.distributed.launch", "--nproc_per_node=2", "train.py"