alloc 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
alloc-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,127 @@
1
+ Metadata-Version: 2.4
2
+ Name: alloc
3
+ Version: 0.1.0
4
+ Summary: GPU intelligence for ML training — right-size before you launch.
5
+ Author-email: Alloc Labs <hello@alloclabs.com>
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://alloclabs.com
8
+ Project-URL: Repository, https://github.com/alloc-labs/alloc
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.8
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Requires-Python: >=3.8
19
+ Description-Content-Type: text/markdown
20
+ Requires-Dist: typer>=0.9.0
21
+ Requires-Dist: rich>=13.0.0
22
+ Requires-Dist: httpx>=0.24.0
23
+ Requires-Dist: pydantic>=2.0.0
24
+ Provides-Extra: gpu
25
+ Requires-Dist: pynvml>=11.5.0; extra == "gpu"
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
28
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
29
+
30
+ # Alloc CLI
31
+
32
+ GPU intelligence for ML training. Right-size before you launch.
33
+
34
+ ## Install
35
+
36
+ ```bash
37
+ pip install alloc
38
+
39
+ # With GPU monitoring support
40
+ pip install alloc[gpu]
41
+ ```
42
+
43
+ ## Commands
44
+
45
+ ### `alloc scan` — Remote Ghost Scan (no GPU needed)
46
+
47
+ ```bash
48
+ alloc scan --model llama-3-70b --gpu A100-80GB
49
+ alloc scan --model mistral-7b --gpu A10G --strategy fsdp --num-gpus 4
50
+ alloc scan --param-count-b 13.0 --gpu H100-80GB --dtype bf16
51
+ ```
52
+
53
+ ### `alloc ghost` — Local static analysis
54
+
55
+ ```bash
56
+ alloc ghost train_7b.py --dtype bf16 --batch-size 32
57
+ ```
58
+
59
+ Analyzes model parameters from the script filename and computes VRAM breakdown.
60
+
61
+ ### `alloc run` — Training with GPU monitoring
62
+
63
+ ```bash
64
+ alloc run python train.py
65
+ alloc run torchrun --nproc_per_node=4 train.py
66
+ alloc run -- python train.py --epochs 10
67
+ ```
68
+
69
+ Wraps your command, monitors GPU memory/utilization/power via `pynvml`, and outputs a profile report.
70
+
71
+ ### `alloc version`
72
+
73
+ ```bash
74
+ alloc version
75
+ ```
76
+
77
+ ## Python API
78
+
79
+ ```python
80
+ import alloc
81
+
82
+ # Static VRAM analysis (never crashes your training)
83
+ report = alloc.ghost(model)
84
+ print(report.total_gb) # e.g., 115.42
85
+
86
+ # Or from param count (no torch needed)
87
+ report = alloc.ghost(param_count_b=7.0, dtype="bf16")
88
+ ```
89
+
90
+ ## Configuration
91
+
92
+ All config via environment variables. Zero config files required.
93
+
94
+ | Variable | Default | Description |
95
+ |----------|---------|-------------|
96
+ | `ALLOC_API_URL` | `https://alloc-production-ffc2.up.railway.app` | API endpoint for remote scans |
97
+ | `ALLOC_TOKEN` | (empty) | Auth token for API calls |
98
+ | `ALLOC_UPLOAD` | `false` | Upload results to dashboard |
99
+
100
+ ## Architecture
101
+
102
+ | Module | Purpose |
103
+ |--------|---------|
104
+ | `ghost.py` | Static VRAM analysis via parameter walking. With torch: `model.named_parameters()`. Without: pure math from param count. |
105
+ | `probe.py` | External GPU monitoring via `pynvml`. Runs user script unmodified as subprocess. |
106
+ | `offline.py` | Flight Recorder: writes `alloc_profile_report.json.gz`, optional W&B upload. |
107
+ | `cli.py` | Typer CLI with `ghost`, `run`, `scan`, `version` commands. |
108
+ | `display.py` | Rich terminal formatting for reports. |
109
+ | `config.py` | Env-var-only configuration. |
110
+
111
+ ## Design Principles
112
+
113
+ 1. **Zero config** — `alloc run python train.py` works out of the box
114
+ 2. **No monkey-patching** — External monitoring only, explicit opt-in API
115
+ 3. **Never crash user's training** — All Alloc failures are caught and silenced
116
+ 4. **Progressive disclosure** — Individual use first, team governance later
117
+
118
+ ## Deep GPU Metrics (via Probe)
119
+
120
+ | Metric | Why It Matters |
121
+ |--------|---------------|
122
+ | Memory bandwidth utilization | Identifies memory-bandwidth-bound workloads |
123
+ | Tensor core vs CUDA core utilization | Reveals if workload uses tensor cores (FP16/BF16) |
124
+ | SM occupancy | Low occupancy = kernel launch overhead or small batches |
125
+ | PCIe/NVLink transfer rates | Communication bottlenecks in multi-GPU setups |
126
+ | Compute throughput (TFLOPS) | Actual vs theoretical — feeds cost-efficiency analysis |
127
+ | Power draw | Thermal throttling detection |
alloc-0.1.0/README.md ADDED
@@ -0,0 +1,98 @@
1
+ # Alloc CLI
2
+
3
+ GPU intelligence for ML training. Right-size before you launch.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ pip install alloc
9
+
10
+ # With GPU monitoring support
11
+ pip install alloc[gpu]
12
+ ```
13
+
14
+ ## Commands
15
+
16
+ ### `alloc scan` — Remote Ghost Scan (no GPU needed)
17
+
18
+ ```bash
19
+ alloc scan --model llama-3-70b --gpu A100-80GB
20
+ alloc scan --model mistral-7b --gpu A10G --strategy fsdp --num-gpus 4
21
+ alloc scan --param-count-b 13.0 --gpu H100-80GB --dtype bf16
22
+ ```
23
+
24
+ ### `alloc ghost` — Local static analysis
25
+
26
+ ```bash
27
+ alloc ghost train_7b.py --dtype bf16 --batch-size 32
28
+ ```
29
+
30
+ Analyzes model parameters from the script filename and computes VRAM breakdown.
31
+
32
+ ### `alloc run` — Training with GPU monitoring
33
+
34
+ ```bash
35
+ alloc run python train.py
36
+ alloc run torchrun --nproc_per_node=4 train.py
37
+ alloc run -- python train.py --epochs 10
38
+ ```
39
+
40
+ Wraps your command, monitors GPU memory/utilization/power via `pynvml`, and outputs a profile report.
41
+
42
+ ### `alloc version`
43
+
44
+ ```bash
45
+ alloc version
46
+ ```
47
+
48
+ ## Python API
49
+
50
+ ```python
51
+ import alloc
52
+
53
+ # Static VRAM analysis (never crashes your training)
54
+ report = alloc.ghost(model)
55
+ print(report.total_gb) # e.g., 115.42
56
+
57
+ # Or from param count (no torch needed)
58
+ report = alloc.ghost(param_count_b=7.0, dtype="bf16")
59
+ ```
60
+
61
+ ## Configuration
62
+
63
+ All config via environment variables. Zero config files required.
64
+
65
+ | Variable | Default | Description |
66
+ |----------|---------|-------------|
67
+ | `ALLOC_API_URL` | `https://alloc-production-ffc2.up.railway.app` | API endpoint for remote scans |
68
+ | `ALLOC_TOKEN` | (empty) | Auth token for API calls |
69
+ | `ALLOC_UPLOAD` | `false` | Upload results to dashboard |
70
+
71
+ ## Architecture
72
+
73
+ | Module | Purpose |
74
+ |--------|---------|
75
+ | `ghost.py` | Static VRAM analysis via parameter walking. With torch: `model.named_parameters()`. Without: pure math from param count. |
76
+ | `probe.py` | External GPU monitoring via `pynvml`. Runs user script unmodified as subprocess. |
77
+ | `offline.py` | Flight Recorder: writes `alloc_profile_report.json.gz`, optional W&B upload. |
78
+ | `cli.py` | Typer CLI with `ghost`, `run`, `scan`, `version` commands. |
79
+ | `display.py` | Rich terminal formatting for reports. |
80
+ | `config.py` | Env-var-only configuration. |
81
+
82
+ ## Design Principles
83
+
84
+ 1. **Zero config** — `alloc run python train.py` works out of the box
85
+ 2. **No monkey-patching** — External monitoring only, explicit opt-in API
86
+ 3. **Never crash user's training** — All Alloc failures are caught and silenced
87
+ 4. **Progressive disclosure** — Individual use first, team governance later
88
+
89
+ ## Deep GPU Metrics (via Probe)
90
+
91
+ | Metric | Why It Matters |
92
+ |--------|---------------|
93
+ | Memory bandwidth utilization | Identifies memory-bandwidth-bound workloads |
94
+ | Tensor core vs CUDA core utilization | Reveals if workload uses tensor cores (FP16/BF16) |
95
+ | SM occupancy | Low occupancy = kernel launch overhead or small batches |
96
+ | PCIe/NVLink transfer rates | Communication bottlenecks in multi-GPU setups |
97
+ | Compute throughput (TFLOPS) | Actual vs theoretical — feeds cost-efficiency analysis |
98
+ | Power draw | Thermal throttling detection |
@@ -0,0 +1,43 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "alloc"
7
+ version = "0.1.0"
8
+ description = "GPU intelligence for ML training — right-size before you launch."
9
+ readme = "README.md"
10
+ license = {text = "Apache-2.0"}
11
+ requires-python = ">=3.8"
12
+ authors = [{name = "Alloc Labs", email = "hello@alloclabs.com"}]
13
+ classifiers = [
14
+ "Development Status :: 3 - Alpha",
15
+ "Intended Audience :: Developers",
16
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
17
+ "Programming Language :: Python :: 3",
18
+ "Programming Language :: Python :: 3.8",
19
+ "Programming Language :: Python :: 3.9",
20
+ "Programming Language :: Python :: 3.10",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ ]
24
+ dependencies = [
25
+ "typer>=0.9.0",
26
+ "rich>=13.0.0",
27
+ "httpx>=0.24.0",
28
+ "pydantic>=2.0.0",
29
+ ]
30
+
31
+ [project.optional-dependencies]
32
+ gpu = ["pynvml>=11.5.0"]
33
+ dev = ["pytest>=7.0.0", "pytest-cov>=4.0.0"]
34
+
35
+ [project.scripts]
36
+ alloc = "alloc.cli:app"
37
+
38
+ [project.urls]
39
+ Homepage = "https://alloclabs.com"
40
+ Repository = "https://github.com/alloc-labs/alloc"
41
+
42
+ [tool.setuptools.packages.find]
43
+ where = ["src"]
alloc-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,9 @@
1
+ """Alloc — GPU intelligence for ML training."""
2
+
3
+ from __future__ import annotations
4
+
5
+ __version__ = "0.1.0"
6
+
7
+ from alloc.ghost import ghost, GhostReport
8
+
9
+ __all__ = ["ghost", "GhostReport", "__version__"]
@@ -0,0 +1,393 @@
1
+ """Alloc CLI — GPU intelligence for ML training.
2
+
3
+ Commands:
4
+ alloc ghost <script.py> Static VRAM scan (no GPU, no execution)
5
+ alloc run <command...> Wrap training with probe monitoring
6
+ alloc scan --model <name> Remote scan via API
7
+ alloc login Authenticate with Alloc dashboard
8
+ alloc upload <artifact> Upload a profile report to Alloc
9
+ alloc version Show version
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ import os
15
+ import sys
16
+ from typing import Optional
17
+
18
+ import typer
19
+ from rich.console import Console
20
+
21
+ from alloc import __version__
22
+ from alloc.config import get_api_url, get_token, should_upload
23
+
24
+ app = typer.Typer(
25
+ name="alloc",
26
+ help="GPU intelligence for ML training. Right-size before you launch.",
27
+ no_args_is_help=True,
28
+ add_completion=False,
29
+ )
30
+ console = Console()
31
+
32
+
33
+ @app.command()
34
+ def ghost(
35
+ script: str = typer.Argument(..., help="Python script to analyze (e.g. train.py)"),
36
+ dtype: str = typer.Option("fp16", help="Data type: fp16, bf16, fp32"),
37
+ batch_size: int = typer.Option(32, help="Training batch size"),
38
+ seq_length: int = typer.Option(2048, help="Sequence length"),
39
+ hidden_dim: int = typer.Option(4096, help="Hidden dimension"),
40
+ ):
41
+ """Static VRAM scan — analyze a model without executing it."""
42
+ from alloc.ghost import ghost as ghost_fn
43
+ from alloc.display import print_ghost_report
44
+
45
+ # Try to extract param count from the script by importing it
46
+ param_count = _extract_param_count(script)
47
+
48
+ if param_count is None:
49
+ console.print(f"[yellow]Could not extract model from {script}.[/yellow]")
50
+ console.print("[dim]Tip: Use 'alloc scan --param-count-b 7.0' for direct param count input.[/dim]")
51
+ raise typer.Exit(1)
52
+
53
+ report = ghost_fn(
54
+ param_count=param_count,
55
+ dtype=dtype,
56
+ batch_size=batch_size,
57
+ seq_length=seq_length,
58
+ hidden_dim=hidden_dim,
59
+ )
60
+ print_ghost_report(report)
61
+
62
+
63
+ @app.command()
64
+ def run(
65
+ command: list[str] = typer.Argument(..., help="Command to run (e.g. python train.py)"),
66
+ timeout: int = typer.Option(300, help="Max monitoring time in seconds"),
67
+ gpu: int = typer.Option(0, help="GPU index to monitor"),
68
+ save: bool = typer.Option(True, help="Save profile report to disk"),
69
+ out: Optional[str] = typer.Option(None, "--out", help="Output path for profile report"),
70
+ upload: bool = typer.Option(False, "--upload", help="Upload report to Alloc dashboard after run"),
71
+ ):
72
+ """Run a training command with GPU monitoring."""
73
+ from alloc.probe import probe_command
74
+ from alloc.display import print_probe_result
75
+ from alloc.offline import write_report
76
+
77
+ if not command:
78
+ console.print("[red]No command provided.[/red]")
79
+ console.print("Usage: alloc run python train.py")
80
+ raise typer.Exit(1)
81
+
82
+ console.print(f"[green]alloc[/green] [dim]v{__version__}[/dim] — Probe monitoring")
83
+ console.print(f"[dim]Command: {' '.join(command)}[/dim]")
84
+ console.print()
85
+
86
+ result = probe_command(
87
+ command,
88
+ timeout_seconds=timeout,
89
+ gpu_index=gpu,
90
+ )
91
+
92
+ if result.error and "pynvml" in result.error:
93
+ console.print(f"[yellow]{result.error}[/yellow]")
94
+ console.print("[dim]Process ran without GPU monitoring.[/dim]")
95
+ elif result.error:
96
+ console.print(f"[red]Error: {result.error}[/red]")
97
+
98
+ if result.peak_vram_mb > 0:
99
+ print_probe_result(result)
100
+
101
+ artifact_path = ""
102
+ if save:
103
+ probe_dict = {
104
+ "peak_vram_mb": result.peak_vram_mb,
105
+ "avg_gpu_util": result.avg_gpu_util,
106
+ "avg_power_watts": result.avg_power_watts,
107
+ "duration_seconds": result.duration_seconds,
108
+ "samples": result.samples,
109
+ "exit_code": result.exit_code,
110
+ }
111
+ artifact_path = write_report(probe_result=probe_dict, output_path=out)
112
+ if artifact_path:
113
+ console.print(f"[dim]Report saved: {artifact_path}[/dim]")
114
+
115
+ # Upload if --upload flag or ALLOC_UPLOAD env var
116
+ if artifact_path and (upload or should_upload()):
117
+ _try_upload(artifact_path)
118
+
119
+ if result.exit_code and result.exit_code != 0:
120
+ raise typer.Exit(result.exit_code)
121
+
122
+
123
+ @app.command()
124
+ def scan(
125
+ model: str = typer.Option(..., "--model", "-m", help="Model name (e.g. llama-3-70b)"),
126
+ gpu: str = typer.Option("A100-80GB", "--gpu", "-g", help="Target GPU type"),
127
+ dtype: str = typer.Option("fp16", help="Data type: fp16, bf16, fp32"),
128
+ strategy: str = typer.Option("ddp", help="Strategy: ddp, fsdp, deepspeed"),
129
+ num_gpus: int = typer.Option(4, help="Number of GPUs"),
130
+ param_count_b: Optional[float] = typer.Option(None, "--param-count-b", "-p", help="Param count in billions (overrides model lookup)"),
131
+ batch_size: int = typer.Option(32, help="Batch size"),
132
+ seq_length: int = typer.Option(2048, help="Sequence length"),
133
+ hidden_dim: int = typer.Option(4096, help="Hidden dimension"),
134
+ ):
135
+ """Remote scan via Alloc API — no GPU needed."""
136
+ import httpx
137
+
138
+ # Resolve param count from model name or explicit flag
139
+ resolved_param_count = param_count_b or _model_to_params(model)
140
+ if resolved_param_count is None:
141
+ console.print(f"[yellow]Unknown model: {model}[/yellow]")
142
+ console.print("[dim]Use --param-count-b to specify directly.[/dim]")
143
+ raise typer.Exit(1)
144
+
145
+ api_url = get_api_url()
146
+ token = get_token()
147
+
148
+ payload = {
149
+ "entrypoint": f"{model}.py",
150
+ "param_count_b": resolved_param_count,
151
+ "dtype": dtype,
152
+ "strategy": strategy,
153
+ "gpu_type": gpu,
154
+ "num_gpus": num_gpus,
155
+ "batch_size": batch_size,
156
+ "seq_length": seq_length,
157
+ "hidden_dim": hidden_dim,
158
+ }
159
+
160
+ console.print(f"[green]alloc[/green] [dim]v{__version__}[/dim] — Remote Ghost Scan")
161
+ console.print(f"[dim]Model: {model} ({resolved_param_count}B) → {gpu} x{num_gpus}[/dim]")
162
+ console.print()
163
+
164
+ try:
165
+ headers = {"Content-Type": "application/json"}
166
+ if token:
167
+ headers["Authorization"] = f"Bearer {token}"
168
+
169
+ with httpx.Client(timeout=30) as client:
170
+ resp = client.post(f"{api_url}/scans/cli", json=payload, headers=headers)
171
+ resp.raise_for_status()
172
+ result = resp.json()
173
+
174
+ _print_scan_result(result, gpu, strategy)
175
+ except httpx.HTTPStatusError as e:
176
+ console.print(f"[red]API error {e.response.status_code}[/red]")
177
+ console.print(f"[dim]{e.response.text[:200]}[/dim]")
178
+ raise typer.Exit(1)
179
+ except httpx.ConnectError:
180
+ console.print(f"[red]Cannot connect to {api_url}[/red]")
181
+ console.print("[dim]Check ALLOC_API_URL or try: alloc ghost <script.py> for local scan[/dim]")
182
+ raise typer.Exit(1)
183
+
184
+
185
+ @app.command()
186
+ def login():
187
+ """Authenticate with Alloc dashboard."""
188
+ import httpx
189
+ from alloc.config import get_supabase_url, get_supabase_anon_key, load_config, save_config
190
+
191
+ email = typer.prompt("Email")
192
+ password = typer.prompt("Password", hide_input=True)
193
+
194
+ supabase_url = get_supabase_url()
195
+ anon_key = get_supabase_anon_key()
196
+
197
+ try:
198
+ with httpx.Client(timeout=15) as client:
199
+ resp = client.post(
200
+ f"{supabase_url}/auth/v1/token?grant_type=password",
201
+ json={"email": email, "password": password},
202
+ headers={
203
+ "apikey": anon_key,
204
+ "Content-Type": "application/json",
205
+ },
206
+ )
207
+ resp.raise_for_status()
208
+ data = resp.json()
209
+
210
+ token = data.get("access_token", "")
211
+ refresh = data.get("refresh_token", "")
212
+ if not token:
213
+ console.print("[red]Login failed: no access token received.[/red]")
214
+ raise typer.Exit(1)
215
+
216
+ cfg = load_config()
217
+ cfg["token"] = token
218
+ cfg["refresh_token"] = refresh
219
+ cfg["email"] = email
220
+ cfg["api_url"] = get_api_url()
221
+ save_config(cfg)
222
+
223
+ console.print(f"[green]Logged in as {email}[/green]")
224
+ except httpx.HTTPStatusError as e:
225
+ detail = ""
226
+ try:
227
+ detail = e.response.json().get("error_description", e.response.text[:200])
228
+ except Exception:
229
+ detail = e.response.text[:200]
230
+ console.print(f"[red]Login failed: {detail}[/red]")
231
+ raise typer.Exit(1)
232
+ except httpx.ConnectError:
233
+ console.print(f"[red]Cannot connect to {supabase_url}[/red]")
234
+ raise typer.Exit(1)
235
+ except Exception as e:
236
+ console.print(f"[red]Login failed: {e}[/red]")
237
+ raise typer.Exit(1)
238
+
239
+
240
+ @app.command()
241
+ def upload(
242
+ artifact: str = typer.Argument(..., help="Path to alloc_profile_report.json.gz"),
243
+ ):
244
+ """Upload a profile report to the Alloc dashboard."""
245
+ if not os.path.isfile(artifact):
246
+ console.print(f"[red]File not found: {artifact}[/red]")
247
+ raise typer.Exit(1)
248
+
249
+ if not artifact.endswith(".json.gz"):
250
+ console.print("[red]Expected a .json.gz artifact file.[/red]")
251
+ raise typer.Exit(1)
252
+
253
+ _try_upload(artifact)
254
+
255
+
256
+ @app.command()
257
+ def version():
258
+ """Show alloc version."""
259
+ console.print(f"alloc v{__version__}")
260
+
261
+
262
+ def _try_upload(artifact_path: str) -> None:
263
+ """Attempt to upload an artifact. Prints status, never raises."""
264
+ try:
265
+ from alloc.upload import upload_artifact
266
+
267
+ token = get_token()
268
+ if not token:
269
+ console.print("[yellow]Not logged in. Run `alloc login` first.[/yellow]")
270
+ return
271
+
272
+ api_url = get_api_url()
273
+ console.print(f"[dim]Uploading to {api_url}...[/dim]")
274
+ result = upload_artifact(artifact_path, api_url, token)
275
+ run_id = result.get("run_id", "unknown")
276
+ console.print(f"[green]Uploaded.[/green] Run ID: {run_id}")
277
+ except Exception as e:
278
+ console.print(f"[yellow]Upload failed: {e}[/yellow]")
279
+ console.print(f"[dim]You can retry later: alloc upload {artifact_path}[/dim]")
280
+
281
+
282
+ def _print_scan_result(result: dict, gpu: str, strategy: str) -> None:
283
+ """Print remote scan result."""
284
+ from rich.table import Table
285
+ from rich.panel import Panel
286
+
287
+ vram = result.get("vram_breakdown", {})
288
+ verdict = result.get("strategy_verdict", {})
289
+
290
+ table = Table(show_header=True, header_style="bold cyan", box=None, padding=(0, 2))
291
+ table.add_column("Component", style="dim")
292
+ table.add_column("Size", justify="right", style="bold")
293
+
294
+ table.add_row("Model weights", f"{vram.get('weights_gb', 0):.2f} GB")
295
+ table.add_row("Optimizer (Adam)", f"{vram.get('optimizer_gb', 0):.2f} GB")
296
+ table.add_row("Activations (est.)", f"{vram.get('activations_gb', 0):.2f} GB")
297
+ table.add_row("Buffer (10%)", f"{vram.get('buffer_gb', 0):.2f} GB")
298
+ table.add_row("", "")
299
+ table.add_row("[bold]Total VRAM[/bold]", f"[bold]{vram.get('total_gb', 0):.2f} GB[/bold]")
300
+
301
+ console.print(Panel(table, title="VRAM Breakdown", border_style="green", padding=(1, 2)))
302
+
303
+ feasible = verdict.get("feasible", False)
304
+ status = "[green]FEASIBLE[/green]" if feasible else "[red]INFEASIBLE[/red]"
305
+ console.print(f" Strategy: {strategy.upper()} on {gpu} — {status}")
306
+
307
+ if not feasible and verdict.get("recommendation"):
308
+ rec = verdict["recommendation"]
309
+ console.print(f" [yellow]Suggestion: switch to {rec.upper()}[/yellow]")
310
+
311
+ if verdict.get("reason"):
312
+ console.print(f" [dim]{verdict['reason']}[/dim]")
313
+
314
+ # Cost estimate if present
315
+ cost = result.get("est_cost_per_hour")
316
+ if cost is not None:
317
+ console.print(f" [dim]Est. cost: ~${cost:.2f}/hr[/dim]")
318
+
319
+ # Euler analysis if present
320
+ euler = result.get("euler_analysis")
321
+ if euler and euler.get("summary"):
322
+ console.print()
323
+ console.print(f" [bold cyan]Euler Analysis[/bold cyan]")
324
+ console.print(f" {euler['summary']}")
325
+ for rec in euler.get("recommendations", []):
326
+ console.print(f" [dim]• {rec}[/dim]")
327
+
328
+ console.print()
329
+
330
+
331
+ def _extract_param_count(script: str) -> Optional[int]:
332
+ """Try to extract param count from a Python script. Returns None if can't."""
333
+ # For now, don't execute the script — just check common model names in filename
334
+ import os
335
+ basename = os.path.basename(script).lower()
336
+
337
+ # Common model size patterns
338
+ patterns = {
339
+ "70b": int(70e9), "65b": int(65e9), "40b": int(40e9),
340
+ "33b": int(33e9), "30b": int(30e9), "13b": int(13e9),
341
+ "8b": int(8e9), "7b": int(7e9), "3b": int(3e9),
342
+ "1.5b": int(1.5e9), "1b": int(1e9),
343
+ "350m": int(350e6), "125m": int(125e6),
344
+ }
345
+ for pattern, count in patterns.items():
346
+ if pattern in basename:
347
+ return count
348
+
349
+ return None
350
+
351
+
352
+ # Well-known model names → param count in billions
353
+ _MODEL_PARAMS = {
354
+ "llama-3-70b": 70.0,
355
+ "llama-3-8b": 8.03,
356
+ "llama-2-70b": 70.0,
357
+ "llama-2-13b": 13.0,
358
+ "llama-2-7b": 7.0,
359
+ "mistral-7b": 7.24,
360
+ "mixtral-8x7b": 46.7,
361
+ "gpt2": 0.124,
362
+ "gpt2-medium": 0.355,
363
+ "gpt2-large": 0.774,
364
+ "gpt2-xl": 1.5,
365
+ "bert-base": 0.110,
366
+ "bert-large": 0.340,
367
+ "t5-small": 0.060,
368
+ "t5-base": 0.220,
369
+ "t5-large": 0.770,
370
+ "t5-xl": 3.0,
371
+ "t5-xxl": 11.0,
372
+ "falcon-7b": 7.0,
373
+ "falcon-40b": 40.0,
374
+ "phi-2": 2.78,
375
+ "gemma-2b": 2.51,
376
+ "gemma-7b": 8.54,
377
+ "qwen-7b": 7.72,
378
+ "qwen-14b": 14.2,
379
+ "qwen-72b": 72.7,
380
+ "deepseek-7b": 6.9,
381
+ "deepseek-67b": 67.0,
382
+ "vit-base": 0.086,
383
+ "vit-large": 0.307,
384
+ "whisper-small": 0.244,
385
+ "whisper-medium": 0.769,
386
+ "whisper-large": 1.55,
387
+ }
388
+
389
+
390
+ def _model_to_params(model: str) -> Optional[float]:
391
+ """Look up model param count by name."""
392
+ normalized = model.lower().strip()
393
+ return _MODEL_PARAMS.get(normalized)