alloc 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
alloc/display.py ADDED
@@ -0,0 +1,580 @@
1
+ """Rich terminal formatting for Alloc reports."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional, TYPE_CHECKING
6
+
7
+ if TYPE_CHECKING:
8
+ from alloc.ghost import GhostReport
9
+ from alloc.probe import ProbeResult
10
+
11
+
12
+ def print_ghost_report(report: GhostReport) -> None:
13
+ """Print a GhostReport to terminal with Rich formatting."""
14
+ try:
15
+ from rich.console import Console
16
+ from rich.table import Table
17
+ from rich.panel import Panel
18
+
19
+ console = Console()
20
+
21
+ table = Table(show_header=True, header_style="bold cyan", box=None, padding=(0, 2))
22
+ table.add_column("Component", style="dim")
23
+ table.add_column("Size", justify="right", style="bold")
24
+
25
+ table.add_row("Model weights", f"{report.weights_gb:.2f} GB")
26
+ table.add_row("Gradients", f"{report.gradients_gb:.2f} GB")
27
+ table.add_row("Optimizer (Adam)", f"{report.optimizer_gb:.2f} GB")
28
+ table.add_row("Activations (est.)", f"{report.activations_gb:.2f} GB")
29
+ table.add_row("Buffer (10%)", f"{report.buffer_gb:.2f} GB")
30
+ table.add_row("", "")
31
+ table.add_row("[bold]Total VRAM[/bold]", f"[bold green]{report.total_gb:.2f} GB[/bold green]")
32
+
33
+ header = f"Ghost Scan — {report.param_count_b:.1f}B params ({report.dtype})"
34
+ confidence_label = _ghost_confidence_label(getattr(report, "extraction_method", None))
35
+ console.print()
36
+ console.print(Panel(table, title=header, border_style="green", padding=(1, 2)))
37
+ console.print(f" [dim]Confidence: {confidence_label}[/dim]")
38
+ console.print()
39
+ except ImportError:
40
+ # Fallback without rich
41
+ _print_ghost_plain(report)
42
+
43
+
44
+ def _print_ghost_plain(report: GhostReport) -> None:
45
+ """Plain-text fallback when Rich is not available."""
46
+ confidence_label = _ghost_confidence_label(getattr(report, "extraction_method", None))
47
+ print(f"\n Ghost Scan — {report.param_count_b:.1f}B params ({report.dtype})")
48
+ print(f" {'─' * 40}")
49
+ print(f" Model weights: {report.weights_gb:>8.2f} GB")
50
+ print(f" Gradients: {report.gradients_gb:>8.2f} GB")
51
+ print(f" Optimizer (Adam): {report.optimizer_gb:>8.2f} GB")
52
+ print(f" Activations (est.): {report.activations_gb:>8.2f} GB")
53
+ print(f" Buffer (10%): {report.buffer_gb:>8.2f} GB")
54
+ print(f" {'─' * 40}")
55
+ print(f" Total VRAM: {report.total_gb:>8.2f} GB")
56
+ print(f"\n Confidence: {confidence_label}\n")
57
+
58
+
59
+ def print_probe_result(result: ProbeResult) -> None:
60
+ """Print a ProbeResult to terminal."""
61
+ try:
62
+ from rich.console import Console
63
+ from rich.table import Table
64
+ from rich.panel import Panel
65
+
66
+ console = Console()
67
+
68
+ table = Table(show_header=True, header_style="bold cyan", box=None, padding=(0, 2))
69
+ table.add_column("Metric", style="dim")
70
+ table.add_column("Value", justify="right", style="bold")
71
+
72
+ table.add_row("Peak VRAM", f"{result.peak_vram_mb:.0f} MB ({result.peak_vram_gb:.2f} GB)")
73
+ table.add_row("Avg GPU Utilization", f"{result.avg_gpu_util:.1f}%")
74
+ table.add_row("Avg Power Draw", f"{result.avg_power_watts:.0f} W")
75
+ table.add_row("Duration", f"{result.duration_seconds:.1f}s")
76
+ table.add_row("Samples", f"{len(result.samples)}")
77
+
78
+ if result.exit_code is not None:
79
+ status = "[green]success[/green]" if result.exit_code == 0 else f"[red]exit {result.exit_code}[/red]"
80
+ table.add_row("Process", status)
81
+
82
+ console.print()
83
+ console.print(Panel(table, title="Alloc Probe Results", border_style="blue", padding=(1, 2)))
84
+ console.print()
85
+ except ImportError:
86
+ print(f"\n Alloc Probe Results")
87
+ print(f" Peak VRAM: {result.peak_vram_mb:.0f} MB")
88
+ print(f" Avg GPU Util: {result.avg_gpu_util:.1f}%")
89
+ print(f" Duration: {result.duration_seconds:.1f}s\n")
90
+
91
+
92
+ # --- Verdict display for calibrate-and-exit mode ---
93
+
94
+
95
+ def print_verdict(result, artifact_path="", step_count=None, callback_data=None, budget_context=None):
96
+ # type: (ProbeResult, str, Optional[int], Optional[dict], Optional[dict]) -> None
97
+ """Print a verdict summary panel after calibration."""
98
+ vram_util_pct = result.vram_utilization_pct
99
+ bottleneck = _classify_bottleneck_local(
100
+ result.peak_vram_mb,
101
+ result.gpu_total_vram_mb,
102
+ result.avg_gpu_util,
103
+ )
104
+ confidence = _compute_confidence_local(
105
+ len(result.samples),
106
+ result.duration_seconds,
107
+ step_count,
108
+ callback_data=callback_data,
109
+ )
110
+ recommendation = _qualitative_recommendation(bottleneck, vram_util_pct, result.avg_gpu_util)
111
+ duration_label = _stop_reason_label(result.stop_reason, result.calibration_duration_s)
112
+
113
+ # GPU label: shorten "NVIDIA A100-SXM4-80GB" → "NVIDIA A100-SXM4"
114
+ gpu_short = result.gpu_name or "Unknown GPU"
115
+ if gpu_short and "-" in gpu_short:
116
+ parts = gpu_short.split("-")
117
+ if parts[-1].upper().endswith("GB"):
118
+ gpu_short = "-".join(parts[:-1])
119
+
120
+ # Process status
121
+ if result.exit_code is None:
122
+ proc_status = "unknown"
123
+ elif result.exit_code == 0:
124
+ proc_status = "success"
125
+ else:
126
+ proc_status = f"exit {result.exit_code}"
127
+
128
+ try:
129
+ from rich.console import Console
130
+ from rich.panel import Panel
131
+ from rich.text import Text
132
+
133
+ console = Console()
134
+
135
+ color = _bottleneck_color(bottleneck)
136
+ bottleneck_display = bottleneck.replace("_", " ").title()
137
+ title = f"Verdict: {bottleneck_display} (confidence: {confidence:.2f})"
138
+
139
+ lines = []
140
+
141
+ # Peak VRAM line
142
+ peak_gb = result.peak_vram_mb / 1024
143
+ if result.gpu_total_vram_mb:
144
+ total_gb = result.gpu_total_vram_mb / 1024
145
+ lines.append(f" Peak VRAM {peak_gb:.1f} GB / {total_gb:.1f} GB ({gpu_short})")
146
+ else:
147
+ lines.append(f" Peak VRAM {peak_gb:.1f} GB ({gpu_short})")
148
+
149
+ if vram_util_pct is not None:
150
+ lines.append(f" VRAM used {vram_util_pct:.1f}%")
151
+
152
+ lines.append(f" Avg GPU util {result.avg_gpu_util:.1f}%")
153
+ lines.append(f" Avg power {result.avg_power_watts:.0f} W")
154
+ lines.append(f" Duration {result.duration_seconds:.1f}s ({duration_label})")
155
+ lines.append(f" Samples {len(result.samples)}")
156
+ lines.append(f" Process {proc_status}")
157
+
158
+ # Timing from framework callbacks
159
+ if callback_data:
160
+ p50 = callback_data.get("step_time_ms_p50")
161
+ p90 = callback_data.get("step_time_ms_p90")
162
+ sps = callback_data.get("samples_per_sec")
163
+ dl_wait = callback_data.get("dataloader_wait_pct")
164
+ if p50 is not None and p90 is not None:
165
+ lines.append(f" Step time {p50:.1f} ms (p50) / {p90:.1f} ms (p90)")
166
+ if sps is not None:
167
+ lines.append(f" Throughput {sps:.1f} samples/sec")
168
+ if dl_wait is not None and dl_wait > 15:
169
+ lines.append(f" Dataloader ~{dl_wait:.0f}% wait (consider more workers)")
170
+
171
+ # Budget projection from .alloc.yaml context
172
+ if budget_context:
173
+ cph = budget_context.get("cost_per_hour")
174
+ budget_mo = budget_context.get("budget_monthly")
175
+ if cph is not None and cph > 0:
176
+ # Assume 8 hrs/day * 22 working days/month = 176 hrs
177
+ monthly_est = cph * 176
178
+ lines.append(f" Est. monthly ~${monthly_est:,.0f}/mo at current rate (8h/day)")
179
+ if budget_mo is not None and budget_mo > 0:
180
+ pct = (monthly_est / budget_mo) * 100
181
+ cap_note = ""
182
+ if budget_context.get("budget_cap_applied"):
183
+ cap_note = " (org cap applied)"
184
+ lines.append(f" Budget {pct:.0f}% of ${budget_mo:,.0f}/mo{cap_note}")
185
+
186
+ if recommendation:
187
+ lines.append("")
188
+ lines.append(f" Suggestion: {recommendation}")
189
+
190
+ content = "\n".join(lines)
191
+ console.print()
192
+ console.print(Panel(content, title=title, border_style=color, padding=(1, 0)))
193
+
194
+ if artifact_path:
195
+ console.print(f" [dim]Artifact: {artifact_path}[/dim]")
196
+ console.print(f" [dim]Next: alloc upload {artifact_path}[/dim]")
197
+ console.print()
198
+
199
+ except ImportError:
200
+ _print_verdict_plain(
201
+ result, bottleneck, confidence, vram_util_pct,
202
+ gpu_short, duration_label, proc_status,
203
+ recommendation, artifact_path,
204
+ )
205
+
206
+
207
+ def _print_verdict_plain(
208
+ result, bottleneck, confidence, vram_util_pct,
209
+ gpu_short, duration_label, proc_status,
210
+ recommendation, artifact_path,
211
+ ):
212
+ # type: (...) -> None
213
+ """Plain-text fallback for verdict."""
214
+ bottleneck_display = bottleneck.replace("_", " ").title()
215
+ print(f"\n Verdict: {bottleneck_display} (confidence: {confidence:.2f})")
216
+ print(f" {'─' * 50}")
217
+
218
+ peak_gb = result.peak_vram_mb / 1024
219
+ if result.gpu_total_vram_mb:
220
+ total_gb = result.gpu_total_vram_mb / 1024
221
+ print(f" Peak VRAM {peak_gb:.1f} GB / {total_gb:.1f} GB ({gpu_short})")
222
+ else:
223
+ print(f" Peak VRAM {peak_gb:.1f} GB ({gpu_short})")
224
+
225
+ if vram_util_pct is not None:
226
+ print(f" VRAM used {vram_util_pct:.1f}%")
227
+
228
+ print(f" Avg GPU util {result.avg_gpu_util:.1f}%")
229
+ print(f" Avg power {result.avg_power_watts:.0f} W")
230
+ print(f" Duration {result.duration_seconds:.1f}s ({duration_label})")
231
+ print(f" Samples {len(result.samples)}")
232
+ print(f" Process {proc_status}")
233
+
234
+ if recommendation:
235
+ print(f"\n Suggestion: {recommendation}")
236
+
237
+ print(f" {'─' * 50}")
238
+ if artifact_path:
239
+ print(f" Artifact: {artifact_path}")
240
+ print(f" Next: alloc upload {artifact_path}")
241
+ print()
242
+
243
+
244
+ def _classify_bottleneck_local(peak_vram_mb, gpu_total_vram_mb, avg_gpu_util):
245
+ # type: (float, Optional[float], float) -> str
246
+ """Classify workload bottleneck. Matches analyzer.py:116-132 thresholds."""
247
+ compute_util = avg_gpu_util
248
+
249
+ if gpu_total_vram_mb is None or gpu_total_vram_mb <= 0:
250
+ # Degrade to util-only classification
251
+ if compute_util > 80:
252
+ return "compute_bound"
253
+ if compute_util < 40:
254
+ return "underutilized"
255
+ return "unknown"
256
+
257
+ vram_util = (peak_vram_mb / gpu_total_vram_mb) * 100
258
+
259
+ if compute_util > 80:
260
+ return "compute_bound"
261
+ if compute_util < 40:
262
+ if vram_util < 60:
263
+ return "underutilized"
264
+ if vram_util > 80:
265
+ return "memory_bound"
266
+ return "balanced"
267
+
268
+
269
+ def _compute_confidence_local(sample_count, duration_s, step_count=None, callback_data=None):
270
+ # type: (int, float, Optional[int], Optional[dict]) -> float
271
+ """Estimate analysis confidence. Matches analyzer.py confidence formula.
272
+
273
+ Cap depends on signal level:
274
+ NVML_ONLY (no callback timing) → max 0.6
275
+ FRAMEWORK_TIMING (callback with step timing) → max 0.85
276
+ """
277
+ score = 0.3 # Baseline: we have probe data
278
+
279
+ if sample_count >= 100:
280
+ score += 0.3
281
+ elif sample_count >= 20:
282
+ score += 0.2
283
+ elif sample_count >= 5:
284
+ score += 0.1
285
+
286
+ if duration_s >= 300:
287
+ score += 0.2
288
+ elif duration_s >= 60:
289
+ score += 0.1
290
+
291
+ if step_count and step_count > 0:
292
+ score += 0.2
293
+
294
+ # Determine signal level cap
295
+ has_timing = (
296
+ callback_data is not None
297
+ and callback_data.get("step_time_ms_p50") is not None
298
+ )
299
+ cap = 0.85 if has_timing else 0.6
300
+ return min(score, cap)
301
+
302
+
303
+ def _qualitative_recommendation(bottleneck, vram_utilization_pct, avg_gpu_util):
304
+ # type: (str, Optional[float], float) -> Optional[str]
305
+ """Return a human-readable suggestion based on bottleneck classification."""
306
+ if bottleneck == "underutilized":
307
+ if vram_utilization_pct is not None and vram_utilization_pct < 30:
308
+ return "GPU is significantly oversized. Consider a smaller GPU."
309
+ return "GPU is underutilized. Consider a smaller or fewer GPUs."
310
+
311
+ if bottleneck == "memory_bound":
312
+ return "Workload is memory-bound. Consider higher-bandwidth GPU or FSDP."
313
+
314
+ if bottleneck == "compute_bound":
315
+ if vram_utilization_pct is not None and vram_utilization_pct < 70:
316
+ return "Compute-bound with VRAM headroom. Try increasing batch size."
317
+ return None
318
+
319
+ # balanced / unknown
320
+ return None
321
+
322
+
323
+ def _stop_reason_label(stop_reason, calibration_duration_s=None):
324
+ # type: (Optional[str], Optional[float]) -> str
325
+ """Human-readable label for stop reason."""
326
+ if stop_reason == "stable":
327
+ if calibration_duration_s is not None:
328
+ return f"auto-stopped: metrics stable at {calibration_duration_s:.1f}s"
329
+ return "auto-stopped: metrics stable"
330
+
331
+ if stop_reason == "timeout":
332
+ return "reached timeout — increase --timeout for more data"
333
+
334
+ if stop_reason == "process_exit":
335
+ return "training process exited"
336
+
337
+ return "unknown"
338
+
339
+
340
+ def build_verdict_dict(result, artifact_path="", step_count=None, callback_data=None, budget_context=None):
341
+ # type: (ProbeResult, str, Optional[int], Optional[dict], Optional[dict]) -> dict
342
+ """Build a verdict dict with the same data as print_verdict, for JSON output."""
343
+ vram_util_pct = result.vram_utilization_pct
344
+ bottleneck = _classify_bottleneck_local(
345
+ result.peak_vram_mb,
346
+ result.gpu_total_vram_mb,
347
+ result.avg_gpu_util,
348
+ )
349
+ confidence = _compute_confidence_local(
350
+ len(result.samples),
351
+ result.duration_seconds,
352
+ step_count,
353
+ callback_data=callback_data,
354
+ )
355
+ recommendation = _qualitative_recommendation(bottleneck, vram_util_pct, result.avg_gpu_util)
356
+ duration_label = _stop_reason_label(result.stop_reason, result.calibration_duration_s)
357
+
358
+ if result.exit_code is None:
359
+ proc_status = "unknown"
360
+ elif result.exit_code == 0:
361
+ proc_status = "success"
362
+ else:
363
+ proc_status = f"exit {result.exit_code}"
364
+
365
+ d = {
366
+ "bottleneck": bottleneck,
367
+ "confidence": round(confidence, 2),
368
+ "peak_vram_mb": round(result.peak_vram_mb, 1),
369
+ "peak_vram_gb": round(result.peak_vram_mb / 1024, 2),
370
+ "gpu_name": result.gpu_name,
371
+ "gpu_total_vram_mb": result.gpu_total_vram_mb,
372
+ "vram_utilization_pct": round(vram_util_pct, 1) if vram_util_pct is not None else None,
373
+ "avg_gpu_util": round(result.avg_gpu_util, 1),
374
+ "avg_power_watts": round(result.avg_power_watts, 0),
375
+ "duration_seconds": round(result.duration_seconds, 1),
376
+ "duration_label": duration_label,
377
+ "sample_count": len(result.samples),
378
+ "process_status": proc_status,
379
+ "recommendation": recommendation,
380
+ "artifact_path": artifact_path or None,
381
+ }
382
+ # Include timing fields from callback data
383
+ if callback_data:
384
+ for key in ("step_time_ms_p50", "step_time_ms_p90", "samples_per_sec", "dataloader_wait_pct"):
385
+ val = callback_data.get(key)
386
+ if val is not None:
387
+ d[key] = val
388
+ # Include budget projection
389
+ if budget_context:
390
+ cph = budget_context.get("cost_per_hour")
391
+ budget_mo = budget_context.get("budget_monthly")
392
+ if cph is not None and cph > 0:
393
+ monthly_est = cph * 176
394
+ d["budget_projection"] = {
395
+ "cost_per_hour": cph,
396
+ "est_monthly": round(monthly_est, 2),
397
+ "budget_monthly": budget_mo,
398
+ "budget_pct": round((monthly_est / budget_mo) * 100, 1) if budget_mo and budget_mo > 0 else None,
399
+ "budget_cap_applied": budget_context.get("budget_cap_applied", False),
400
+ "org_budget_monthly": budget_context.get("org_budget_monthly"),
401
+ }
402
+ return d
403
+
404
+
405
+ def print_verbose_run(result, step_count=None):
406
+ # type: (ProbeResult, Optional[int]) -> None
407
+ """Print detailed probe data: hardware context, sample dump, recommendation reasoning."""
408
+ try:
409
+ from rich.console import Console
410
+ from rich.table import Table
411
+ from rich.panel import Panel
412
+
413
+ console = Console()
414
+
415
+ # --- Hardware Context ---
416
+ hw_lines = []
417
+ if result.gpu_name:
418
+ hw_lines.append(f" GPU {result.gpu_name}")
419
+ if result.gpu_total_vram_mb:
420
+ hw_lines.append(f" Total VRAM {result.gpu_total_vram_mb:.0f} MB ({result.gpu_total_vram_mb / 1024:.1f} GB)")
421
+ if result.driver_version:
422
+ hw_lines.append(f" Driver {result.driver_version}")
423
+ if result.cuda_version:
424
+ hw_lines.append(f" CUDA {result.cuda_version}")
425
+ if result.sm_version:
426
+ hw_lines.append(f" SM Compute {result.sm_version}")
427
+ hw_lines.append(f" GPUs detected {result.num_gpus_detected}")
428
+ if result.probe_mode:
429
+ hw_lines.append(f" Probe mode {result.probe_mode}")
430
+ if result.stop_reason:
431
+ hw_lines.append(f" Stop reason {result.stop_reason}")
432
+ if step_count:
433
+ hw_lines.append(f" Step count {step_count} (from framework callback)")
434
+
435
+ if hw_lines:
436
+ console.print(Panel("\n".join(hw_lines), title="Hardware Context", border_style="cyan", padding=(1, 0)))
437
+
438
+ # --- Recommendation Reasoning ---
439
+ vram_util_pct = result.vram_utilization_pct
440
+ bottleneck = _classify_bottleneck_local(
441
+ result.peak_vram_mb, result.gpu_total_vram_mb, result.avg_gpu_util,
442
+ )
443
+ confidence = _compute_confidence_local(
444
+ len(result.samples), result.duration_seconds, step_count,
445
+ )
446
+ recommendation = _qualitative_recommendation(bottleneck, vram_util_pct, result.avg_gpu_util)
447
+
448
+ reason_lines = []
449
+ reason_lines.append(f" Bottleneck: {bottleneck}")
450
+
451
+ # Explain classification thresholds
452
+ if result.gpu_total_vram_mb and result.gpu_total_vram_mb > 0:
453
+ vram_util = (result.peak_vram_mb / result.gpu_total_vram_mb) * 100
454
+ reason_lines.append(f" VRAM util = {vram_util:.1f}% (thresholds: <60% low, >80% high)")
455
+ reason_lines.append(f" GPU util = {result.avg_gpu_util:.1f}% (thresholds: <40% low, >80% high)")
456
+
457
+ if bottleneck == "underutilized":
458
+ reason_lines.append(f" -> GPU util < 40% AND VRAM util < 60% => underutilized")
459
+ elif bottleneck == "memory_bound":
460
+ reason_lines.append(f" -> GPU util < 40% AND VRAM util > 80% => memory_bound")
461
+ elif bottleneck == "compute_bound":
462
+ reason_lines.append(f" -> GPU util > 80% => compute_bound")
463
+ elif bottleneck == "balanced":
464
+ reason_lines.append(f" -> No extreme thresholds hit => balanced")
465
+
466
+ reason_lines.append("")
467
+ reason_lines.append(f" Confidence: {confidence:.2f}")
468
+ reason_lines.append(f" Base score +0.30 (have probe data)")
469
+
470
+ sample_count = len(result.samples)
471
+ if sample_count >= 100:
472
+ reason_lines.append(f" Samples ({sample_count:>4}) +0.30 (>=100)")
473
+ elif sample_count >= 20:
474
+ reason_lines.append(f" Samples ({sample_count:>4}) +0.20 (>=20)")
475
+ elif sample_count >= 5:
476
+ reason_lines.append(f" Samples ({sample_count:>4}) +0.10 (>=5)")
477
+ else:
478
+ reason_lines.append(f" Samples ({sample_count:>4}) +0.00 (<5)")
479
+
480
+ dur = result.duration_seconds
481
+ if dur >= 300:
482
+ reason_lines.append(f" Duration ({dur:>5.0f}s) +0.20 (>=300s)")
483
+ elif dur >= 60:
484
+ reason_lines.append(f" Duration ({dur:>5.0f}s) +0.10 (>=60s)")
485
+ else:
486
+ reason_lines.append(f" Duration ({dur:>5.0f}s) +0.00 (<60s)")
487
+
488
+ if step_count and step_count > 0:
489
+ reason_lines.append(f" Step count +0.20 (framework callback)")
490
+ else:
491
+ reason_lines.append(f" Step count +0.00 (no callback)")
492
+
493
+ reason_lines.append(f" Signal cap max 0.60 (NVML_ONLY)")
494
+
495
+ if recommendation:
496
+ reason_lines.append("")
497
+ reason_lines.append(f" Recommendation: {recommendation}")
498
+
499
+ console.print(Panel("\n".join(reason_lines), title="Recommendation Reasoning", border_style="cyan", padding=(1, 0)))
500
+
501
+ # --- Probe Samples ---
502
+ if result.samples:
503
+ table = Table(
504
+ show_header=True, header_style="bold cyan", box=None,
505
+ padding=(0, 1), title="Probe Samples",
506
+ )
507
+ table.add_column("#", style="dim", justify="right")
508
+ table.add_column("Time (s)", justify="right")
509
+ table.add_column("VRAM (MB)", justify="right")
510
+ table.add_column("GPU Util %", justify="right")
511
+ table.add_column("Power (W)", justify="right")
512
+
513
+ for i, s in enumerate(result.samples):
514
+ table.add_row(
515
+ str(i),
516
+ f"{s.get('t', 0):.1f}",
517
+ f"{s.get('vram_mb', 0):.0f}",
518
+ f"{s.get('gpu_util_pct', 0):.1f}",
519
+ f"{s.get('power_w', 0):.0f}",
520
+ )
521
+
522
+ console.print()
523
+ console.print(table)
524
+ console.print()
525
+ except ImportError:
526
+ pass
527
+
528
+
529
+ def print_verbose_ghost(report):
530
+ # type: (GhostReport) -> None
531
+ """Print detailed VRAM formula breakdown."""
532
+ try:
533
+ from rich.console import Console
534
+ from rich.panel import Panel
535
+
536
+ console = Console()
537
+
538
+ bytes_map = {"fp32": 4, "float32": 4, "fp16": 2, "float16": 2, "bf16": 2, "bfloat16": 2, "int8": 1}
539
+ bpp = bytes_map.get(report.dtype, 2)
540
+
541
+ lines = []
542
+ lines.append(f" Parameters {report.param_count:,} ({report.param_count_b:.3f}B)")
543
+ lines.append(f" Dtype {report.dtype} ({bpp} bytes/param)")
544
+ lines.append("")
545
+ lines.append(f" Weights {report.param_count_b:.3f}B x {bpp} bytes = {report.weights_gb:.2f} GB")
546
+ lines.append(f" Gradients {report.param_count_b:.3f}B x {bpp} bytes = {report.gradients_gb:.2f} GB")
547
+ lines.append(f" Optimizer {report.param_count_b:.3f}B x 12 bytes (Adam: fp32 params + momentum + variance) = {report.optimizer_gb:.2f} GB")
548
+ lines.append(f" Activations batch_size x seq_len x hidden_dim x {bpp} bytes = {report.activations_gb:.2f} GB")
549
+ subtotal = report.weights_gb + report.gradients_gb + report.optimizer_gb + report.activations_gb
550
+ lines.append(f" Buffer 10% x ({subtotal:.2f} GB) = {report.buffer_gb:.2f} GB")
551
+ lines.append("")
552
+ lines.append(f" Total {report.total_gb:.2f} GB")
553
+ confidence_label = _ghost_confidence_label(getattr(report, "extraction_method", None))
554
+ lines.append(f" Confidence {confidence_label}")
555
+
556
+ console.print(Panel("\n".join(lines), title="VRAM Formula Breakdown", border_style="cyan", padding=(1, 0)))
557
+ except ImportError:
558
+ pass
559
+
560
+
561
+ def _ghost_confidence_label(extraction_method):
562
+ # type: (Optional[str]) -> str
563
+ """Return confidence label based on how model params were extracted."""
564
+ if extraction_method == "execution":
565
+ return "85% (exact param count)"
566
+ if extraction_method == "ast":
567
+ return "75% (inferred from model name)"
568
+ return "80% (static estimate)"
569
+
570
+
571
+ def _bottleneck_color(bottleneck):
572
+ # type: (str) -> str
573
+ """Rich color for bottleneck classification."""
574
+ if bottleneck == "underutilized":
575
+ return "yellow"
576
+ if bottleneck == "memory_bound":
577
+ return "red"
578
+ if bottleneck in ("compute_bound", "balanced"):
579
+ return "green"
580
+ return "dim"