alloc 0.0.8__tar.gz → 0.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {alloc-0.0.8 → alloc-0.0.9}/PKG-INFO +2 -2
  2. {alloc-0.0.8 → alloc-0.0.9}/README.md +1 -1
  3. {alloc-0.0.8 → alloc-0.0.9}/pyproject.toml +1 -1
  4. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/__init__.py +1 -1
  5. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/cli.py +23 -6
  6. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/extractor_runner.py +24 -1
  7. {alloc-0.0.8 → alloc-0.0.9}/src/alloc.egg-info/PKG-INFO +2 -2
  8. {alloc-0.0.8 → alloc-0.0.9}/tests/test_auth.py +29 -0
  9. {alloc-0.0.8 → alloc-0.0.9}/tests/test_topology_strategy.py +9 -3
  10. {alloc-0.0.8 → alloc-0.0.9}/setup.cfg +0 -0
  11. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/artifact_loader.py +0 -0
  12. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/artifact_writer.py +0 -0
  13. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/browser_auth.py +0 -0
  14. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/callbacks.py +0 -0
  15. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/catalog/__init__.py +0 -0
  16. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/catalog/default_rate_card.json +0 -0
  17. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/catalog/gpus.v1.json +0 -0
  18. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/code_analyzer.py +0 -0
  19. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/config.py +0 -0
  20. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/context.py +0 -0
  21. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/diagnosis_display.py +0 -0
  22. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/diagnosis_engine.py +0 -0
  23. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/diagnosis_rules.py +0 -0
  24. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/display.py +0 -0
  25. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/ghost.py +0 -0
  26. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/model_extractor.py +0 -0
  27. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/model_registry.py +0 -0
  28. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/probe.py +0 -0
  29. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/stability.py +0 -0
  30. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/upload.py +0 -0
  31. {alloc-0.0.8 → alloc-0.0.9}/src/alloc/yaml_config.py +0 -0
  32. {alloc-0.0.8 → alloc-0.0.9}/src/alloc.egg-info/SOURCES.txt +0 -0
  33. {alloc-0.0.8 → alloc-0.0.9}/src/alloc.egg-info/dependency_links.txt +0 -0
  34. {alloc-0.0.8 → alloc-0.0.9}/src/alloc.egg-info/entry_points.txt +0 -0
  35. {alloc-0.0.8 → alloc-0.0.9}/src/alloc.egg-info/requires.txt +0 -0
  36. {alloc-0.0.8 → alloc-0.0.9}/src/alloc.egg-info/top_level.txt +0 -0
  37. {alloc-0.0.8 → alloc-0.0.9}/tests/test_artifact.py +0 -0
  38. {alloc-0.0.8 → alloc-0.0.9}/tests/test_artifact_loader.py +0 -0
  39. {alloc-0.0.8 → alloc-0.0.9}/tests/test_callbacks.py +0 -0
  40. {alloc-0.0.8 → alloc-0.0.9}/tests/test_catalog.py +0 -0
  41. {alloc-0.0.8 → alloc-0.0.9}/tests/test_cli.py +0 -0
  42. {alloc-0.0.8 → alloc-0.0.9}/tests/test_code_analyzer.py +0 -0
  43. {alloc-0.0.8 → alloc-0.0.9}/tests/test_context.py +0 -0
  44. {alloc-0.0.8 → alloc-0.0.9}/tests/test_diagnose_cli.py +0 -0
  45. {alloc-0.0.8 → alloc-0.0.9}/tests/test_diagnosis_engine.py +0 -0
  46. {alloc-0.0.8 → alloc-0.0.9}/tests/test_diagnosis_rules.py +0 -0
  47. {alloc-0.0.8 → alloc-0.0.9}/tests/test_extractor_activation.py +0 -0
  48. {alloc-0.0.8 → alloc-0.0.9}/tests/test_ghost.py +0 -0
  49. {alloc-0.0.8 → alloc-0.0.9}/tests/test_ghost_degradation.py +0 -0
  50. {alloc-0.0.8 → alloc-0.0.9}/tests/test_init_from_org.py +0 -0
  51. {alloc-0.0.8 → alloc-0.0.9}/tests/test_interconnect.py +0 -0
  52. {alloc-0.0.8 → alloc-0.0.9}/tests/test_model_extractor.py +0 -0
  53. {alloc-0.0.8 → alloc-0.0.9}/tests/test_probe_hw.py +0 -0
  54. {alloc-0.0.8 → alloc-0.0.9}/tests/test_probe_multi.py +0 -0
  55. {alloc-0.0.8 → alloc-0.0.9}/tests/test_scan_auth.py +0 -0
  56. {alloc-0.0.8 → alloc-0.0.9}/tests/test_stability.py +0 -0
  57. {alloc-0.0.8 → alloc-0.0.9}/tests/test_upload.py +0 -0
  58. {alloc-0.0.8 → alloc-0.0.9}/tests/test_verdict.py +0 -0
  59. {alloc-0.0.8 → alloc-0.0.9}/tests/test_yaml_config.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alloc
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints.
5
5
  Author-email: Alloc Labs <hello@alloclabs.com>
6
6
  License-Expression: Apache-2.0
@@ -40,7 +40,7 @@ alloc run python train.py
40
40
  ```
41
41
 
42
42
  ```
43
- alloc v0.0.2 — Calibrate
43
+ alloc v0.0.8 — Calibrate
44
44
 
45
45
  Run Summary
46
46
  Peak VRAM 31.2 GB / 40.0 GB (A100)
@@ -12,7 +12,7 @@ alloc run python train.py
12
12
  ```
13
13
 
14
14
  ```
15
- alloc v0.0.2 — Calibrate
15
+ alloc v0.0.8 — Calibrate
16
16
 
17
17
  Run Summary
18
18
  Peak VRAM 31.2 GB / 40.0 GB (A100)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "alloc"
7
- version = "0.0.8"
7
+ version = "0.0.9"
8
8
  description = "Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints."
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -9,7 +9,7 @@ _warnings.filterwarnings("ignore", category=FutureWarning, module=r"torch\.cuda"
9
9
  _warnings.filterwarnings("ignore", category=DeprecationWarning, module=r"torch\.cuda")
10
10
  del _warnings
11
11
 
12
- __version__ = "0.0.8"
12
+ __version__ = "0.0.9"
13
13
 
14
14
  from alloc.ghost import ghost, GhostReport
15
15
  from alloc.callbacks import AllocCallback as HuggingFaceCallback
@@ -2400,23 +2400,33 @@ def whoami(
2400
2400
  profile = _get("/profile")
2401
2401
  fleet = _get("/gpu-fleet")
2402
2402
  else:
2403
- if json_output:
2403
+ # whoami is a status command — report structured result, exit 0
2404
+ if e.response.status_code == 401:
2405
+ out["token_status"] = "expired"
2406
+ else:
2407
+ out["token_status"] = "error"
2404
2408
  out["error"] = f"API error {e.response.status_code}"
2409
+ if json_output:
2405
2410
  _print_json(out)
2406
2411
  else:
2407
- console.print(f"[red]API error {e.response.status_code}[/red]")
2412
+ if e.response.status_code == 401:
2413
+ console.print("[yellow]Token expired.[/yellow]")
2414
+ else:
2415
+ console.print(f"[red]API error {e.response.status_code}[/red]")
2408
2416
  console.print("[dim]Run: alloc login[/dim]")
2409
- raise typer.Exit(1)
2417
+ return
2410
2418
  except httpx.ConnectError:
2419
+ out["token_status"] = "unreachable"
2420
+ out["error"] = f"Cannot connect to {api_url}"
2411
2421
  if json_output:
2412
- out["error"] = f"Cannot connect to {api_url}"
2413
2422
  _print_json(out)
2414
2423
  else:
2415
2424
  console.print(f"[red]Cannot connect to {api_url}[/red]")
2416
- raise typer.Exit(1)
2425
+ return
2417
2426
 
2418
2427
  # API validated the token — now we know login is real
2419
2428
  out["logged_in"] = True
2429
+ out["token_status"] = "valid"
2420
2430
 
2421
2431
  gpus = fleet.get("gpus") or []
2422
2432
  fleet_count = len([g for g in gpus if g.get("fleet_status") == "in_fleet"])
@@ -3565,7 +3575,14 @@ def _infer_parallel_topology_from_env(*, num_gpus_detected: int, config_intercon
3565
3575
  strategy = "pp+dp" if (dp is not None and dp > 1) else "pp"
3566
3576
  elif dp is not None and dp > 1:
3567
3577
  strategy = "ddp"
3568
- # If none of the above matched, strategy stays None (unknown)
3578
+ elif strategy is None and num_gpus_detected > 1 and not has_tp and not has_pp:
3579
+ # Multiple GPUs detected via NVML with no TP/PP env vars →
3580
+ # DDP is PyTorch's default and the only realistic inference.
3581
+ # This is NOT the old `or "ddp"` — it only fires when probe
3582
+ # actually observed multiple GPU processes.
3583
+ strategy = "ddp"
3584
+ if dp is None:
3585
+ dp = num_gpus_detected
3569
3586
 
3570
3587
  return {
3571
3588
  "num_nodes": nnodes or 1,
@@ -281,7 +281,30 @@ def main():
281
281
  "activation_method": activation_result.get("activation_method"),
282
282
  }
283
283
  else:
284
- result = {"status": "no_model"}
284
+ # No model found — check if this is a distributed training script
285
+ # that hides the model inside __main__ guard or main()
286
+ _is_dist = False
287
+ try:
288
+ import torch.distributed as _dist_mod
289
+ if _dist_mod.is_initialized():
290
+ _is_dist = True
291
+ except Exception:
292
+ pass
293
+ if not _is_dist:
294
+ # Check if module imported distributed primitives
295
+ for attr_name in dir(module):
296
+ try:
297
+ obj = getattr(module, attr_name)
298
+ mod_name = getattr(obj, "__module__", "") or ""
299
+ if "torch.distributed" in mod_name or "torch.nn.parallel" in mod_name:
300
+ _is_dist = True
301
+ break
302
+ except Exception:
303
+ continue
304
+ if _is_dist:
305
+ result = {"status": "error_distributed", "error": "no model found — script uses distributed training"}
306
+ else:
307
+ result = {"status": "no_model"}
285
308
 
286
309
  with open(sidecar_path, "w") as f:
287
310
  json.dump(result, f)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: alloc
3
- Version: 0.0.8
3
+ Version: 0.0.9
4
4
  Summary: Engineer-first training calibration: estimate VRAM fit, profile short runs, and pick GPU configs under real budget constraints.
5
5
  Author-email: Alloc Labs <hello@alloclabs.com>
6
6
  License-Expression: Apache-2.0
@@ -40,7 +40,7 @@ alloc run python train.py
40
40
  ```
41
41
 
42
42
  ```
43
- alloc v0.0.2 — Calibrate
43
+ alloc v0.0.8 — Calibrate
44
44
 
45
45
  Run Summary
46
46
  Peak VRAM 31.2 GB / 40.0 GB (A100)
@@ -68,6 +68,34 @@ def test_whoami_not_logged_in_json(tmp_path: Path):
68
68
  assert data["api_url"] == "https://api.example.com"
69
69
 
70
70
 
71
+ def test_whoami_stale_token_json(tmp_path: Path):
72
+ """Stale token should exit 0 with token_status: expired."""
73
+ mock_resp = MagicMock()
74
+ mock_resp.status_code = 401
75
+ mock_resp.raise_for_status.side_effect = httpx.HTTPStatusError(
76
+ "Unauthorized", request=MagicMock(), response=mock_resp,
77
+ )
78
+ mock_client = MagicMock()
79
+ mock_client.__enter__.return_value = mock_client
80
+ mock_client.__exit__.return_value = False
81
+ mock_client.get.return_value = mock_resp
82
+
83
+ env = {
84
+ "HOME": str(tmp_path),
85
+ "ALLOC_API_URL": "https://api.example.com",
86
+ "ALLOC_TOKEN": "stale-token",
87
+ }
88
+
89
+ with patch("httpx.Client", return_value=mock_client), \
90
+ patch("alloc.cli.try_refresh_access_token", return_value=None):
91
+ result = runner.invoke(app, ["whoami", "--json"], env=env)
92
+
93
+ assert result.exit_code == 0
94
+ data = json.loads(result.output)
95
+ assert data["logged_in"] is False
96
+ assert data["token_status"] == "expired"
97
+
98
+
71
99
  def test_whoami_logged_in_json(tmp_path: Path):
72
100
  profile_resp = MagicMock()
73
101
  profile_resp.raise_for_status.return_value = None
@@ -110,6 +138,7 @@ def test_whoami_logged_in_json(tmp_path: Path):
110
138
  assert result.exit_code == 0
111
139
  data = json.loads(result.output)
112
140
  assert data["logged_in"] is True
141
+ assert data["token_status"] == "valid"
113
142
  assert data["token_source"] == "env"
114
143
  assert data["email"] == "user@example.com"
115
144
  assert data["fleet_count"] == 1
@@ -18,9 +18,15 @@ class TestStrategyInference:
18
18
  num_gpus_detected=num_gpus,
19
19
  )
20
20
 
21
- def test_no_degrees_strategy_none(self):
22
- """When no degree env vars set, strategy should be None."""
23
- result = self._topo({})
21
+ def test_no_degrees_multi_gpu_infers_ddp(self):
22
+ """When no degree env vars but multiple GPUs detected, infer DDP."""
23
+ result = self._topo({}, num_gpus=4)
24
+ assert result["strategy"] == "ddp"
25
+ assert result["dp_degree"] == 4
26
+
27
+ def test_single_gpu_no_degrees_strategy_none(self):
28
+ """Single GPU with no degrees → strategy stays None."""
29
+ result = self._topo({}, num_gpus=1)
24
30
  assert result["strategy"] is None
25
31
 
26
32
  def test_dp_only_is_ddp(self):
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes