alloc 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alloc/__init__.py +11 -0
- alloc/artifact_writer.py +67 -0
- alloc/callbacks.py +342 -0
- alloc/catalog/__init__.py +138 -0
- alloc/catalog/default_rate_card.json +18 -0
- alloc/catalog/gpus.v1.json +174 -0
- alloc/cli.py +1341 -0
- alloc/config.py +124 -0
- alloc/context.py +191 -0
- alloc/display.py +580 -0
- alloc/extractor_runner.py +141 -0
- alloc/ghost.py +167 -0
- alloc/model_extractor.py +170 -0
- alloc/model_registry.py +138 -0
- alloc/probe.py +461 -0
- alloc/stability.py +144 -0
- alloc/upload.py +138 -0
- alloc/yaml_config.py +287 -0
- alloc-0.0.1.dist-info/METADATA +256 -0
- alloc-0.0.1.dist-info/RECORD +23 -0
- alloc-0.0.1.dist-info/WHEEL +5 -0
- alloc-0.0.1.dist-info/entry_points.txt +2 -0
- alloc-0.0.1.dist-info/top_level.txt +1 -0
alloc/display.py
ADDED
|
@@ -0,0 +1,580 @@
|
|
|
1
|
+
"""Rich terminal formatting for Alloc reports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional, TYPE_CHECKING
|
|
6
|
+
|
|
7
|
+
if TYPE_CHECKING:
|
|
8
|
+
from alloc.ghost import GhostReport
|
|
9
|
+
from alloc.probe import ProbeResult
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def print_ghost_report(report: GhostReport) -> None:
|
|
13
|
+
"""Print a GhostReport to terminal with Rich formatting."""
|
|
14
|
+
try:
|
|
15
|
+
from rich.console import Console
|
|
16
|
+
from rich.table import Table
|
|
17
|
+
from rich.panel import Panel
|
|
18
|
+
|
|
19
|
+
console = Console()
|
|
20
|
+
|
|
21
|
+
table = Table(show_header=True, header_style="bold cyan", box=None, padding=(0, 2))
|
|
22
|
+
table.add_column("Component", style="dim")
|
|
23
|
+
table.add_column("Size", justify="right", style="bold")
|
|
24
|
+
|
|
25
|
+
table.add_row("Model weights", f"{report.weights_gb:.2f} GB")
|
|
26
|
+
table.add_row("Gradients", f"{report.gradients_gb:.2f} GB")
|
|
27
|
+
table.add_row("Optimizer (Adam)", f"{report.optimizer_gb:.2f} GB")
|
|
28
|
+
table.add_row("Activations (est.)", f"{report.activations_gb:.2f} GB")
|
|
29
|
+
table.add_row("Buffer (10%)", f"{report.buffer_gb:.2f} GB")
|
|
30
|
+
table.add_row("", "")
|
|
31
|
+
table.add_row("[bold]Total VRAM[/bold]", f"[bold green]{report.total_gb:.2f} GB[/bold green]")
|
|
32
|
+
|
|
33
|
+
header = f"Ghost Scan — {report.param_count_b:.1f}B params ({report.dtype})"
|
|
34
|
+
confidence_label = _ghost_confidence_label(getattr(report, "extraction_method", None))
|
|
35
|
+
console.print()
|
|
36
|
+
console.print(Panel(table, title=header, border_style="green", padding=(1, 2)))
|
|
37
|
+
console.print(f" [dim]Confidence: {confidence_label}[/dim]")
|
|
38
|
+
console.print()
|
|
39
|
+
except ImportError:
|
|
40
|
+
# Fallback without rich
|
|
41
|
+
_print_ghost_plain(report)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _print_ghost_plain(report: GhostReport) -> None:
|
|
45
|
+
"""Plain-text fallback when Rich is not available."""
|
|
46
|
+
confidence_label = _ghost_confidence_label(getattr(report, "extraction_method", None))
|
|
47
|
+
print(f"\n Ghost Scan — {report.param_count_b:.1f}B params ({report.dtype})")
|
|
48
|
+
print(f" {'─' * 40}")
|
|
49
|
+
print(f" Model weights: {report.weights_gb:>8.2f} GB")
|
|
50
|
+
print(f" Gradients: {report.gradients_gb:>8.2f} GB")
|
|
51
|
+
print(f" Optimizer (Adam): {report.optimizer_gb:>8.2f} GB")
|
|
52
|
+
print(f" Activations (est.): {report.activations_gb:>8.2f} GB")
|
|
53
|
+
print(f" Buffer (10%): {report.buffer_gb:>8.2f} GB")
|
|
54
|
+
print(f" {'─' * 40}")
|
|
55
|
+
print(f" Total VRAM: {report.total_gb:>8.2f} GB")
|
|
56
|
+
print(f"\n Confidence: {confidence_label}\n")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def print_probe_result(result: ProbeResult) -> None:
|
|
60
|
+
"""Print a ProbeResult to terminal."""
|
|
61
|
+
try:
|
|
62
|
+
from rich.console import Console
|
|
63
|
+
from rich.table import Table
|
|
64
|
+
from rich.panel import Panel
|
|
65
|
+
|
|
66
|
+
console = Console()
|
|
67
|
+
|
|
68
|
+
table = Table(show_header=True, header_style="bold cyan", box=None, padding=(0, 2))
|
|
69
|
+
table.add_column("Metric", style="dim")
|
|
70
|
+
table.add_column("Value", justify="right", style="bold")
|
|
71
|
+
|
|
72
|
+
table.add_row("Peak VRAM", f"{result.peak_vram_mb:.0f} MB ({result.peak_vram_gb:.2f} GB)")
|
|
73
|
+
table.add_row("Avg GPU Utilization", f"{result.avg_gpu_util:.1f}%")
|
|
74
|
+
table.add_row("Avg Power Draw", f"{result.avg_power_watts:.0f} W")
|
|
75
|
+
table.add_row("Duration", f"{result.duration_seconds:.1f}s")
|
|
76
|
+
table.add_row("Samples", f"{len(result.samples)}")
|
|
77
|
+
|
|
78
|
+
if result.exit_code is not None:
|
|
79
|
+
status = "[green]success[/green]" if result.exit_code == 0 else f"[red]exit {result.exit_code}[/red]"
|
|
80
|
+
table.add_row("Process", status)
|
|
81
|
+
|
|
82
|
+
console.print()
|
|
83
|
+
console.print(Panel(table, title="Alloc Probe Results", border_style="blue", padding=(1, 2)))
|
|
84
|
+
console.print()
|
|
85
|
+
except ImportError:
|
|
86
|
+
print(f"\n Alloc Probe Results")
|
|
87
|
+
print(f" Peak VRAM: {result.peak_vram_mb:.0f} MB")
|
|
88
|
+
print(f" Avg GPU Util: {result.avg_gpu_util:.1f}%")
|
|
89
|
+
print(f" Duration: {result.duration_seconds:.1f}s\n")
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
# --- Verdict display for calibrate-and-exit mode ---
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def print_verdict(result, artifact_path="", step_count=None, callback_data=None, budget_context=None):
|
|
96
|
+
# type: (ProbeResult, str, Optional[int], Optional[dict], Optional[dict]) -> None
|
|
97
|
+
"""Print a verdict summary panel after calibration."""
|
|
98
|
+
vram_util_pct = result.vram_utilization_pct
|
|
99
|
+
bottleneck = _classify_bottleneck_local(
|
|
100
|
+
result.peak_vram_mb,
|
|
101
|
+
result.gpu_total_vram_mb,
|
|
102
|
+
result.avg_gpu_util,
|
|
103
|
+
)
|
|
104
|
+
confidence = _compute_confidence_local(
|
|
105
|
+
len(result.samples),
|
|
106
|
+
result.duration_seconds,
|
|
107
|
+
step_count,
|
|
108
|
+
callback_data=callback_data,
|
|
109
|
+
)
|
|
110
|
+
recommendation = _qualitative_recommendation(bottleneck, vram_util_pct, result.avg_gpu_util)
|
|
111
|
+
duration_label = _stop_reason_label(result.stop_reason, result.calibration_duration_s)
|
|
112
|
+
|
|
113
|
+
# GPU label: shorten "NVIDIA A100-SXM4-80GB" → "NVIDIA A100-SXM4"
|
|
114
|
+
gpu_short = result.gpu_name or "Unknown GPU"
|
|
115
|
+
if gpu_short and "-" in gpu_short:
|
|
116
|
+
parts = gpu_short.split("-")
|
|
117
|
+
if parts[-1].upper().endswith("GB"):
|
|
118
|
+
gpu_short = "-".join(parts[:-1])
|
|
119
|
+
|
|
120
|
+
# Process status
|
|
121
|
+
if result.exit_code is None:
|
|
122
|
+
proc_status = "unknown"
|
|
123
|
+
elif result.exit_code == 0:
|
|
124
|
+
proc_status = "success"
|
|
125
|
+
else:
|
|
126
|
+
proc_status = f"exit {result.exit_code}"
|
|
127
|
+
|
|
128
|
+
try:
|
|
129
|
+
from rich.console import Console
|
|
130
|
+
from rich.panel import Panel
|
|
131
|
+
from rich.text import Text
|
|
132
|
+
|
|
133
|
+
console = Console()
|
|
134
|
+
|
|
135
|
+
color = _bottleneck_color(bottleneck)
|
|
136
|
+
bottleneck_display = bottleneck.replace("_", " ").title()
|
|
137
|
+
title = f"Verdict: {bottleneck_display} (confidence: {confidence:.2f})"
|
|
138
|
+
|
|
139
|
+
lines = []
|
|
140
|
+
|
|
141
|
+
# Peak VRAM line
|
|
142
|
+
peak_gb = result.peak_vram_mb / 1024
|
|
143
|
+
if result.gpu_total_vram_mb:
|
|
144
|
+
total_gb = result.gpu_total_vram_mb / 1024
|
|
145
|
+
lines.append(f" Peak VRAM {peak_gb:.1f} GB / {total_gb:.1f} GB ({gpu_short})")
|
|
146
|
+
else:
|
|
147
|
+
lines.append(f" Peak VRAM {peak_gb:.1f} GB ({gpu_short})")
|
|
148
|
+
|
|
149
|
+
if vram_util_pct is not None:
|
|
150
|
+
lines.append(f" VRAM used {vram_util_pct:.1f}%")
|
|
151
|
+
|
|
152
|
+
lines.append(f" Avg GPU util {result.avg_gpu_util:.1f}%")
|
|
153
|
+
lines.append(f" Avg power {result.avg_power_watts:.0f} W")
|
|
154
|
+
lines.append(f" Duration {result.duration_seconds:.1f}s ({duration_label})")
|
|
155
|
+
lines.append(f" Samples {len(result.samples)}")
|
|
156
|
+
lines.append(f" Process {proc_status}")
|
|
157
|
+
|
|
158
|
+
# Timing from framework callbacks
|
|
159
|
+
if callback_data:
|
|
160
|
+
p50 = callback_data.get("step_time_ms_p50")
|
|
161
|
+
p90 = callback_data.get("step_time_ms_p90")
|
|
162
|
+
sps = callback_data.get("samples_per_sec")
|
|
163
|
+
dl_wait = callback_data.get("dataloader_wait_pct")
|
|
164
|
+
if p50 is not None and p90 is not None:
|
|
165
|
+
lines.append(f" Step time {p50:.1f} ms (p50) / {p90:.1f} ms (p90)")
|
|
166
|
+
if sps is not None:
|
|
167
|
+
lines.append(f" Throughput {sps:.1f} samples/sec")
|
|
168
|
+
if dl_wait is not None and dl_wait > 15:
|
|
169
|
+
lines.append(f" Dataloader ~{dl_wait:.0f}% wait (consider more workers)")
|
|
170
|
+
|
|
171
|
+
# Budget projection from .alloc.yaml context
|
|
172
|
+
if budget_context:
|
|
173
|
+
cph = budget_context.get("cost_per_hour")
|
|
174
|
+
budget_mo = budget_context.get("budget_monthly")
|
|
175
|
+
if cph is not None and cph > 0:
|
|
176
|
+
# Assume 8 hrs/day * 22 working days/month = 176 hrs
|
|
177
|
+
monthly_est = cph * 176
|
|
178
|
+
lines.append(f" Est. monthly ~${monthly_est:,.0f}/mo at current rate (8h/day)")
|
|
179
|
+
if budget_mo is not None and budget_mo > 0:
|
|
180
|
+
pct = (monthly_est / budget_mo) * 100
|
|
181
|
+
cap_note = ""
|
|
182
|
+
if budget_context.get("budget_cap_applied"):
|
|
183
|
+
cap_note = " (org cap applied)"
|
|
184
|
+
lines.append(f" Budget {pct:.0f}% of ${budget_mo:,.0f}/mo{cap_note}")
|
|
185
|
+
|
|
186
|
+
if recommendation:
|
|
187
|
+
lines.append("")
|
|
188
|
+
lines.append(f" Suggestion: {recommendation}")
|
|
189
|
+
|
|
190
|
+
content = "\n".join(lines)
|
|
191
|
+
console.print()
|
|
192
|
+
console.print(Panel(content, title=title, border_style=color, padding=(1, 0)))
|
|
193
|
+
|
|
194
|
+
if artifact_path:
|
|
195
|
+
console.print(f" [dim]Artifact: {artifact_path}[/dim]")
|
|
196
|
+
console.print(f" [dim]Next: alloc upload {artifact_path}[/dim]")
|
|
197
|
+
console.print()
|
|
198
|
+
|
|
199
|
+
except ImportError:
|
|
200
|
+
_print_verdict_plain(
|
|
201
|
+
result, bottleneck, confidence, vram_util_pct,
|
|
202
|
+
gpu_short, duration_label, proc_status,
|
|
203
|
+
recommendation, artifact_path,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _print_verdict_plain(
|
|
208
|
+
result, bottleneck, confidence, vram_util_pct,
|
|
209
|
+
gpu_short, duration_label, proc_status,
|
|
210
|
+
recommendation, artifact_path,
|
|
211
|
+
):
|
|
212
|
+
# type: (...) -> None
|
|
213
|
+
"""Plain-text fallback for verdict."""
|
|
214
|
+
bottleneck_display = bottleneck.replace("_", " ").title()
|
|
215
|
+
print(f"\n Verdict: {bottleneck_display} (confidence: {confidence:.2f})")
|
|
216
|
+
print(f" {'─' * 50}")
|
|
217
|
+
|
|
218
|
+
peak_gb = result.peak_vram_mb / 1024
|
|
219
|
+
if result.gpu_total_vram_mb:
|
|
220
|
+
total_gb = result.gpu_total_vram_mb / 1024
|
|
221
|
+
print(f" Peak VRAM {peak_gb:.1f} GB / {total_gb:.1f} GB ({gpu_short})")
|
|
222
|
+
else:
|
|
223
|
+
print(f" Peak VRAM {peak_gb:.1f} GB ({gpu_short})")
|
|
224
|
+
|
|
225
|
+
if vram_util_pct is not None:
|
|
226
|
+
print(f" VRAM used {vram_util_pct:.1f}%")
|
|
227
|
+
|
|
228
|
+
print(f" Avg GPU util {result.avg_gpu_util:.1f}%")
|
|
229
|
+
print(f" Avg power {result.avg_power_watts:.0f} W")
|
|
230
|
+
print(f" Duration {result.duration_seconds:.1f}s ({duration_label})")
|
|
231
|
+
print(f" Samples {len(result.samples)}")
|
|
232
|
+
print(f" Process {proc_status}")
|
|
233
|
+
|
|
234
|
+
if recommendation:
|
|
235
|
+
print(f"\n Suggestion: {recommendation}")
|
|
236
|
+
|
|
237
|
+
print(f" {'─' * 50}")
|
|
238
|
+
if artifact_path:
|
|
239
|
+
print(f" Artifact: {artifact_path}")
|
|
240
|
+
print(f" Next: alloc upload {artifact_path}")
|
|
241
|
+
print()
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _classify_bottleneck_local(peak_vram_mb, gpu_total_vram_mb, avg_gpu_util):
|
|
245
|
+
# type: (float, Optional[float], float) -> str
|
|
246
|
+
"""Classify workload bottleneck. Matches analyzer.py:116-132 thresholds."""
|
|
247
|
+
compute_util = avg_gpu_util
|
|
248
|
+
|
|
249
|
+
if gpu_total_vram_mb is None or gpu_total_vram_mb <= 0:
|
|
250
|
+
# Degrade to util-only classification
|
|
251
|
+
if compute_util > 80:
|
|
252
|
+
return "compute_bound"
|
|
253
|
+
if compute_util < 40:
|
|
254
|
+
return "underutilized"
|
|
255
|
+
return "unknown"
|
|
256
|
+
|
|
257
|
+
vram_util = (peak_vram_mb / gpu_total_vram_mb) * 100
|
|
258
|
+
|
|
259
|
+
if compute_util > 80:
|
|
260
|
+
return "compute_bound"
|
|
261
|
+
if compute_util < 40:
|
|
262
|
+
if vram_util < 60:
|
|
263
|
+
return "underutilized"
|
|
264
|
+
if vram_util > 80:
|
|
265
|
+
return "memory_bound"
|
|
266
|
+
return "balanced"
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
def _compute_confidence_local(sample_count, duration_s, step_count=None, callback_data=None):
|
|
270
|
+
# type: (int, float, Optional[int], Optional[dict]) -> float
|
|
271
|
+
"""Estimate analysis confidence. Matches analyzer.py confidence formula.
|
|
272
|
+
|
|
273
|
+
Cap depends on signal level:
|
|
274
|
+
NVML_ONLY (no callback timing) → max 0.6
|
|
275
|
+
FRAMEWORK_TIMING (callback with step timing) → max 0.85
|
|
276
|
+
"""
|
|
277
|
+
score = 0.3 # Baseline: we have probe data
|
|
278
|
+
|
|
279
|
+
if sample_count >= 100:
|
|
280
|
+
score += 0.3
|
|
281
|
+
elif sample_count >= 20:
|
|
282
|
+
score += 0.2
|
|
283
|
+
elif sample_count >= 5:
|
|
284
|
+
score += 0.1
|
|
285
|
+
|
|
286
|
+
if duration_s >= 300:
|
|
287
|
+
score += 0.2
|
|
288
|
+
elif duration_s >= 60:
|
|
289
|
+
score += 0.1
|
|
290
|
+
|
|
291
|
+
if step_count and step_count > 0:
|
|
292
|
+
score += 0.2
|
|
293
|
+
|
|
294
|
+
# Determine signal level cap
|
|
295
|
+
has_timing = (
|
|
296
|
+
callback_data is not None
|
|
297
|
+
and callback_data.get("step_time_ms_p50") is not None
|
|
298
|
+
)
|
|
299
|
+
cap = 0.85 if has_timing else 0.6
|
|
300
|
+
return min(score, cap)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def _qualitative_recommendation(bottleneck, vram_utilization_pct, avg_gpu_util):
|
|
304
|
+
# type: (str, Optional[float], float) -> Optional[str]
|
|
305
|
+
"""Return a human-readable suggestion based on bottleneck classification."""
|
|
306
|
+
if bottleneck == "underutilized":
|
|
307
|
+
if vram_utilization_pct is not None and vram_utilization_pct < 30:
|
|
308
|
+
return "GPU is significantly oversized. Consider a smaller GPU."
|
|
309
|
+
return "GPU is underutilized. Consider a smaller or fewer GPUs."
|
|
310
|
+
|
|
311
|
+
if bottleneck == "memory_bound":
|
|
312
|
+
return "Workload is memory-bound. Consider higher-bandwidth GPU or FSDP."
|
|
313
|
+
|
|
314
|
+
if bottleneck == "compute_bound":
|
|
315
|
+
if vram_utilization_pct is not None and vram_utilization_pct < 70:
|
|
316
|
+
return "Compute-bound with VRAM headroom. Try increasing batch size."
|
|
317
|
+
return None
|
|
318
|
+
|
|
319
|
+
# balanced / unknown
|
|
320
|
+
return None
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def _stop_reason_label(stop_reason, calibration_duration_s=None):
|
|
324
|
+
# type: (Optional[str], Optional[float]) -> str
|
|
325
|
+
"""Human-readable label for stop reason."""
|
|
326
|
+
if stop_reason == "stable":
|
|
327
|
+
if calibration_duration_s is not None:
|
|
328
|
+
return f"auto-stopped: metrics stable at {calibration_duration_s:.1f}s"
|
|
329
|
+
return "auto-stopped: metrics stable"
|
|
330
|
+
|
|
331
|
+
if stop_reason == "timeout":
|
|
332
|
+
return "reached timeout — increase --timeout for more data"
|
|
333
|
+
|
|
334
|
+
if stop_reason == "process_exit":
|
|
335
|
+
return "training process exited"
|
|
336
|
+
|
|
337
|
+
return "unknown"
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
def build_verdict_dict(result, artifact_path="", step_count=None, callback_data=None, budget_context=None):
|
|
341
|
+
# type: (ProbeResult, str, Optional[int], Optional[dict], Optional[dict]) -> dict
|
|
342
|
+
"""Build a verdict dict with the same data as print_verdict, for JSON output."""
|
|
343
|
+
vram_util_pct = result.vram_utilization_pct
|
|
344
|
+
bottleneck = _classify_bottleneck_local(
|
|
345
|
+
result.peak_vram_mb,
|
|
346
|
+
result.gpu_total_vram_mb,
|
|
347
|
+
result.avg_gpu_util,
|
|
348
|
+
)
|
|
349
|
+
confidence = _compute_confidence_local(
|
|
350
|
+
len(result.samples),
|
|
351
|
+
result.duration_seconds,
|
|
352
|
+
step_count,
|
|
353
|
+
callback_data=callback_data,
|
|
354
|
+
)
|
|
355
|
+
recommendation = _qualitative_recommendation(bottleneck, vram_util_pct, result.avg_gpu_util)
|
|
356
|
+
duration_label = _stop_reason_label(result.stop_reason, result.calibration_duration_s)
|
|
357
|
+
|
|
358
|
+
if result.exit_code is None:
|
|
359
|
+
proc_status = "unknown"
|
|
360
|
+
elif result.exit_code == 0:
|
|
361
|
+
proc_status = "success"
|
|
362
|
+
else:
|
|
363
|
+
proc_status = f"exit {result.exit_code}"
|
|
364
|
+
|
|
365
|
+
d = {
|
|
366
|
+
"bottleneck": bottleneck,
|
|
367
|
+
"confidence": round(confidence, 2),
|
|
368
|
+
"peak_vram_mb": round(result.peak_vram_mb, 1),
|
|
369
|
+
"peak_vram_gb": round(result.peak_vram_mb / 1024, 2),
|
|
370
|
+
"gpu_name": result.gpu_name,
|
|
371
|
+
"gpu_total_vram_mb": result.gpu_total_vram_mb,
|
|
372
|
+
"vram_utilization_pct": round(vram_util_pct, 1) if vram_util_pct is not None else None,
|
|
373
|
+
"avg_gpu_util": round(result.avg_gpu_util, 1),
|
|
374
|
+
"avg_power_watts": round(result.avg_power_watts, 0),
|
|
375
|
+
"duration_seconds": round(result.duration_seconds, 1),
|
|
376
|
+
"duration_label": duration_label,
|
|
377
|
+
"sample_count": len(result.samples),
|
|
378
|
+
"process_status": proc_status,
|
|
379
|
+
"recommendation": recommendation,
|
|
380
|
+
"artifact_path": artifact_path or None,
|
|
381
|
+
}
|
|
382
|
+
# Include timing fields from callback data
|
|
383
|
+
if callback_data:
|
|
384
|
+
for key in ("step_time_ms_p50", "step_time_ms_p90", "samples_per_sec", "dataloader_wait_pct"):
|
|
385
|
+
val = callback_data.get(key)
|
|
386
|
+
if val is not None:
|
|
387
|
+
d[key] = val
|
|
388
|
+
# Include budget projection
|
|
389
|
+
if budget_context:
|
|
390
|
+
cph = budget_context.get("cost_per_hour")
|
|
391
|
+
budget_mo = budget_context.get("budget_monthly")
|
|
392
|
+
if cph is not None and cph > 0:
|
|
393
|
+
monthly_est = cph * 176
|
|
394
|
+
d["budget_projection"] = {
|
|
395
|
+
"cost_per_hour": cph,
|
|
396
|
+
"est_monthly": round(monthly_est, 2),
|
|
397
|
+
"budget_monthly": budget_mo,
|
|
398
|
+
"budget_pct": round((monthly_est / budget_mo) * 100, 1) if budget_mo and budget_mo > 0 else None,
|
|
399
|
+
"budget_cap_applied": budget_context.get("budget_cap_applied", False),
|
|
400
|
+
"org_budget_monthly": budget_context.get("org_budget_monthly"),
|
|
401
|
+
}
|
|
402
|
+
return d
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def print_verbose_run(result, step_count=None):
|
|
406
|
+
# type: (ProbeResult, Optional[int]) -> None
|
|
407
|
+
"""Print detailed probe data: hardware context, sample dump, recommendation reasoning."""
|
|
408
|
+
try:
|
|
409
|
+
from rich.console import Console
|
|
410
|
+
from rich.table import Table
|
|
411
|
+
from rich.panel import Panel
|
|
412
|
+
|
|
413
|
+
console = Console()
|
|
414
|
+
|
|
415
|
+
# --- Hardware Context ---
|
|
416
|
+
hw_lines = []
|
|
417
|
+
if result.gpu_name:
|
|
418
|
+
hw_lines.append(f" GPU {result.gpu_name}")
|
|
419
|
+
if result.gpu_total_vram_mb:
|
|
420
|
+
hw_lines.append(f" Total VRAM {result.gpu_total_vram_mb:.0f} MB ({result.gpu_total_vram_mb / 1024:.1f} GB)")
|
|
421
|
+
if result.driver_version:
|
|
422
|
+
hw_lines.append(f" Driver {result.driver_version}")
|
|
423
|
+
if result.cuda_version:
|
|
424
|
+
hw_lines.append(f" CUDA {result.cuda_version}")
|
|
425
|
+
if result.sm_version:
|
|
426
|
+
hw_lines.append(f" SM Compute {result.sm_version}")
|
|
427
|
+
hw_lines.append(f" GPUs detected {result.num_gpus_detected}")
|
|
428
|
+
if result.probe_mode:
|
|
429
|
+
hw_lines.append(f" Probe mode {result.probe_mode}")
|
|
430
|
+
if result.stop_reason:
|
|
431
|
+
hw_lines.append(f" Stop reason {result.stop_reason}")
|
|
432
|
+
if step_count:
|
|
433
|
+
hw_lines.append(f" Step count {step_count} (from framework callback)")
|
|
434
|
+
|
|
435
|
+
if hw_lines:
|
|
436
|
+
console.print(Panel("\n".join(hw_lines), title="Hardware Context", border_style="cyan", padding=(1, 0)))
|
|
437
|
+
|
|
438
|
+
# --- Recommendation Reasoning ---
|
|
439
|
+
vram_util_pct = result.vram_utilization_pct
|
|
440
|
+
bottleneck = _classify_bottleneck_local(
|
|
441
|
+
result.peak_vram_mb, result.gpu_total_vram_mb, result.avg_gpu_util,
|
|
442
|
+
)
|
|
443
|
+
confidence = _compute_confidence_local(
|
|
444
|
+
len(result.samples), result.duration_seconds, step_count,
|
|
445
|
+
)
|
|
446
|
+
recommendation = _qualitative_recommendation(bottleneck, vram_util_pct, result.avg_gpu_util)
|
|
447
|
+
|
|
448
|
+
reason_lines = []
|
|
449
|
+
reason_lines.append(f" Bottleneck: {bottleneck}")
|
|
450
|
+
|
|
451
|
+
# Explain classification thresholds
|
|
452
|
+
if result.gpu_total_vram_mb and result.gpu_total_vram_mb > 0:
|
|
453
|
+
vram_util = (result.peak_vram_mb / result.gpu_total_vram_mb) * 100
|
|
454
|
+
reason_lines.append(f" VRAM util = {vram_util:.1f}% (thresholds: <60% low, >80% high)")
|
|
455
|
+
reason_lines.append(f" GPU util = {result.avg_gpu_util:.1f}% (thresholds: <40% low, >80% high)")
|
|
456
|
+
|
|
457
|
+
if bottleneck == "underutilized":
|
|
458
|
+
reason_lines.append(f" -> GPU util < 40% AND VRAM util < 60% => underutilized")
|
|
459
|
+
elif bottleneck == "memory_bound":
|
|
460
|
+
reason_lines.append(f" -> GPU util < 40% AND VRAM util > 80% => memory_bound")
|
|
461
|
+
elif bottleneck == "compute_bound":
|
|
462
|
+
reason_lines.append(f" -> GPU util > 80% => compute_bound")
|
|
463
|
+
elif bottleneck == "balanced":
|
|
464
|
+
reason_lines.append(f" -> No extreme thresholds hit => balanced")
|
|
465
|
+
|
|
466
|
+
reason_lines.append("")
|
|
467
|
+
reason_lines.append(f" Confidence: {confidence:.2f}")
|
|
468
|
+
reason_lines.append(f" Base score +0.30 (have probe data)")
|
|
469
|
+
|
|
470
|
+
sample_count = len(result.samples)
|
|
471
|
+
if sample_count >= 100:
|
|
472
|
+
reason_lines.append(f" Samples ({sample_count:>4}) +0.30 (>=100)")
|
|
473
|
+
elif sample_count >= 20:
|
|
474
|
+
reason_lines.append(f" Samples ({sample_count:>4}) +0.20 (>=20)")
|
|
475
|
+
elif sample_count >= 5:
|
|
476
|
+
reason_lines.append(f" Samples ({sample_count:>4}) +0.10 (>=5)")
|
|
477
|
+
else:
|
|
478
|
+
reason_lines.append(f" Samples ({sample_count:>4}) +0.00 (<5)")
|
|
479
|
+
|
|
480
|
+
dur = result.duration_seconds
|
|
481
|
+
if dur >= 300:
|
|
482
|
+
reason_lines.append(f" Duration ({dur:>5.0f}s) +0.20 (>=300s)")
|
|
483
|
+
elif dur >= 60:
|
|
484
|
+
reason_lines.append(f" Duration ({dur:>5.0f}s) +0.10 (>=60s)")
|
|
485
|
+
else:
|
|
486
|
+
reason_lines.append(f" Duration ({dur:>5.0f}s) +0.00 (<60s)")
|
|
487
|
+
|
|
488
|
+
if step_count and step_count > 0:
|
|
489
|
+
reason_lines.append(f" Step count +0.20 (framework callback)")
|
|
490
|
+
else:
|
|
491
|
+
reason_lines.append(f" Step count +0.00 (no callback)")
|
|
492
|
+
|
|
493
|
+
reason_lines.append(f" Signal cap max 0.60 (NVML_ONLY)")
|
|
494
|
+
|
|
495
|
+
if recommendation:
|
|
496
|
+
reason_lines.append("")
|
|
497
|
+
reason_lines.append(f" Recommendation: {recommendation}")
|
|
498
|
+
|
|
499
|
+
console.print(Panel("\n".join(reason_lines), title="Recommendation Reasoning", border_style="cyan", padding=(1, 0)))
|
|
500
|
+
|
|
501
|
+
# --- Probe Samples ---
|
|
502
|
+
if result.samples:
|
|
503
|
+
table = Table(
|
|
504
|
+
show_header=True, header_style="bold cyan", box=None,
|
|
505
|
+
padding=(0, 1), title="Probe Samples",
|
|
506
|
+
)
|
|
507
|
+
table.add_column("#", style="dim", justify="right")
|
|
508
|
+
table.add_column("Time (s)", justify="right")
|
|
509
|
+
table.add_column("VRAM (MB)", justify="right")
|
|
510
|
+
table.add_column("GPU Util %", justify="right")
|
|
511
|
+
table.add_column("Power (W)", justify="right")
|
|
512
|
+
|
|
513
|
+
for i, s in enumerate(result.samples):
|
|
514
|
+
table.add_row(
|
|
515
|
+
str(i),
|
|
516
|
+
f"{s.get('t', 0):.1f}",
|
|
517
|
+
f"{s.get('vram_mb', 0):.0f}",
|
|
518
|
+
f"{s.get('gpu_util_pct', 0):.1f}",
|
|
519
|
+
f"{s.get('power_w', 0):.0f}",
|
|
520
|
+
)
|
|
521
|
+
|
|
522
|
+
console.print()
|
|
523
|
+
console.print(table)
|
|
524
|
+
console.print()
|
|
525
|
+
except ImportError:
|
|
526
|
+
pass
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
def print_verbose_ghost(report):
|
|
530
|
+
# type: (GhostReport) -> None
|
|
531
|
+
"""Print detailed VRAM formula breakdown."""
|
|
532
|
+
try:
|
|
533
|
+
from rich.console import Console
|
|
534
|
+
from rich.panel import Panel
|
|
535
|
+
|
|
536
|
+
console = Console()
|
|
537
|
+
|
|
538
|
+
bytes_map = {"fp32": 4, "float32": 4, "fp16": 2, "float16": 2, "bf16": 2, "bfloat16": 2, "int8": 1}
|
|
539
|
+
bpp = bytes_map.get(report.dtype, 2)
|
|
540
|
+
|
|
541
|
+
lines = []
|
|
542
|
+
lines.append(f" Parameters {report.param_count:,} ({report.param_count_b:.3f}B)")
|
|
543
|
+
lines.append(f" Dtype {report.dtype} ({bpp} bytes/param)")
|
|
544
|
+
lines.append("")
|
|
545
|
+
lines.append(f" Weights {report.param_count_b:.3f}B x {bpp} bytes = {report.weights_gb:.2f} GB")
|
|
546
|
+
lines.append(f" Gradients {report.param_count_b:.3f}B x {bpp} bytes = {report.gradients_gb:.2f} GB")
|
|
547
|
+
lines.append(f" Optimizer {report.param_count_b:.3f}B x 12 bytes (Adam: fp32 params + momentum + variance) = {report.optimizer_gb:.2f} GB")
|
|
548
|
+
lines.append(f" Activations batch_size x seq_len x hidden_dim x {bpp} bytes = {report.activations_gb:.2f} GB")
|
|
549
|
+
subtotal = report.weights_gb + report.gradients_gb + report.optimizer_gb + report.activations_gb
|
|
550
|
+
lines.append(f" Buffer 10% x ({subtotal:.2f} GB) = {report.buffer_gb:.2f} GB")
|
|
551
|
+
lines.append("")
|
|
552
|
+
lines.append(f" Total {report.total_gb:.2f} GB")
|
|
553
|
+
confidence_label = _ghost_confidence_label(getattr(report, "extraction_method", None))
|
|
554
|
+
lines.append(f" Confidence {confidence_label}")
|
|
555
|
+
|
|
556
|
+
console.print(Panel("\n".join(lines), title="VRAM Formula Breakdown", border_style="cyan", padding=(1, 0)))
|
|
557
|
+
except ImportError:
|
|
558
|
+
pass
|
|
559
|
+
|
|
560
|
+
|
|
561
|
+
def _ghost_confidence_label(extraction_method):
|
|
562
|
+
# type: (Optional[str]) -> str
|
|
563
|
+
"""Return confidence label based on how model params were extracted."""
|
|
564
|
+
if extraction_method == "execution":
|
|
565
|
+
return "85% (exact param count)"
|
|
566
|
+
if extraction_method == "ast":
|
|
567
|
+
return "75% (inferred from model name)"
|
|
568
|
+
return "80% (static estimate)"
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
def _bottleneck_color(bottleneck):
|
|
572
|
+
# type: (str) -> str
|
|
573
|
+
"""Rich color for bottleneck classification."""
|
|
574
|
+
if bottleneck == "underutilized":
|
|
575
|
+
return "yellow"
|
|
576
|
+
if bottleneck == "memory_bound":
|
|
577
|
+
return "red"
|
|
578
|
+
if bottleneck in ("compute_bound", "balanced"):
|
|
579
|
+
return "green"
|
|
580
|
+
return "dim"
|