claude-turing 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/.claude-plugin/plugin.json +34 -0
  2. package/LICENSE +21 -0
  3. package/README.md +457 -0
  4. package/agents/ml-evaluator.md +43 -0
  5. package/agents/ml-researcher.md +74 -0
  6. package/bin/cli.js +46 -0
  7. package/bin/turing-init.sh +57 -0
  8. package/commands/brief.md +83 -0
  9. package/commands/compare.md +24 -0
  10. package/commands/design.md +97 -0
  11. package/commands/init.md +123 -0
  12. package/commands/logbook.md +51 -0
  13. package/commands/mode.md +43 -0
  14. package/commands/poster.md +89 -0
  15. package/commands/preflight.md +75 -0
  16. package/commands/report.md +97 -0
  17. package/commands/rules/loop-protocol.md +91 -0
  18. package/commands/status.md +24 -0
  19. package/commands/suggest.md +95 -0
  20. package/commands/sweep.md +45 -0
  21. package/commands/train.md +66 -0
  22. package/commands/try.md +63 -0
  23. package/commands/turing.md +54 -0
  24. package/commands/validate.md +34 -0
  25. package/config/defaults.yaml +45 -0
  26. package/config/experiment_archetypes.yaml +127 -0
  27. package/config/lifecycle.toml +31 -0
  28. package/config/novelty_aliases.yaml +107 -0
  29. package/config/relationships.toml +125 -0
  30. package/config/state.toml +24 -0
  31. package/config/task_taxonomy.yaml +110 -0
  32. package/config/taxonomy.toml +37 -0
  33. package/package.json +54 -0
  34. package/src/claude-md.js +55 -0
  35. package/src/install.js +107 -0
  36. package/src/paths.js +20 -0
  37. package/src/postinstall.js +22 -0
  38. package/src/verify.js +109 -0
  39. package/templates/MEMORY.md +36 -0
  40. package/templates/README.md +93 -0
  41. package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
  42. package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
  43. package/templates/config.yaml +48 -0
  44. package/templates/evaluate.py +237 -0
  45. package/templates/features/__init__.py +0 -0
  46. package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
  47. package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
  48. package/templates/features/featurizers.py +138 -0
  49. package/templates/prepare.py +171 -0
  50. package/templates/program.md +216 -0
  51. package/templates/pyproject.toml +8 -0
  52. package/templates/requirements.txt +8 -0
  53. package/templates/scripts/__init__.py +0 -0
  54. package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  55. package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
  56. package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
  57. package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
  58. package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
  59. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  60. package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
  61. package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
  62. package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
  63. package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
  64. package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
  65. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  66. package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
  67. package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
  68. package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
  69. package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
  70. package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
  71. package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
  72. package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
  73. package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
  74. package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
  75. package/templates/scripts/check_convergence.py +230 -0
  76. package/templates/scripts/compare_runs.py +124 -0
  77. package/templates/scripts/critique_hypothesis.py +350 -0
  78. package/templates/scripts/experiment_index.py +288 -0
  79. package/templates/scripts/generate_brief.py +389 -0
  80. package/templates/scripts/generate_logbook.py +423 -0
  81. package/templates/scripts/log_experiment.py +243 -0
  82. package/templates/scripts/manage_hypotheses.py +543 -0
  83. package/templates/scripts/novelty_guard.py +343 -0
  84. package/templates/scripts/parse_metrics.py +139 -0
  85. package/templates/scripts/post-train-hook.sh +74 -0
  86. package/templates/scripts/preflight.py +549 -0
  87. package/templates/scripts/scaffold.py +409 -0
  88. package/templates/scripts/show_environment.py +92 -0
  89. package/templates/scripts/show_experiment_tree.py +144 -0
  90. package/templates/scripts/show_families.py +133 -0
  91. package/templates/scripts/show_metrics.py +157 -0
  92. package/templates/scripts/statistical_compare.py +259 -0
  93. package/templates/scripts/stop-hook.sh +34 -0
  94. package/templates/scripts/suggest_next.py +301 -0
  95. package/templates/scripts/sweep.py +276 -0
  96. package/templates/scripts/synthesize_decision.py +300 -0
  97. package/templates/scripts/turing_io.py +76 -0
  98. package/templates/scripts/update_state.py +296 -0
  99. package/templates/scripts/validate_stability.py +167 -0
  100. package/templates/scripts/verify_placeholders.py +119 -0
  101. package/templates/sweep_config.yaml +14 -0
  102. package/templates/tests/__init__.py +0 -0
  103. package/templates/tests/conftest.py +91 -0
  104. package/templates/train.py +240 -0
@@ -0,0 +1,549 @@
1
+ """Pre-flight resource estimator for ML experiments.
2
+
3
+ Estimates VRAM, RAM, and disk requirements before running a training script.
4
+ Compares against available system resources and issues warnings or blocks
5
+ if the experiment is likely to fail due to resource constraints.
6
+
7
+ Works with any ML project — not Turing-specific. Analyzes:
8
+ - Dataset size and shape (from CSV/parquet/splits)
9
+ - Model type and architecture (from config or CLI)
10
+ - Batch size, precision, and expected memory multipliers
11
+ - Available system resources (RAM, VRAM, disk)
12
+
13
+ Usage:
14
+ python scripts/preflight.py # Auto-detect from config.yaml
15
+ python scripts/preflight.py --config config.yaml # Explicit config
16
+ python scripts/preflight.py --model-type xgboost --dataset data.csv
17
+ python scripts/preflight.py --model-type torch --params 10M --batch-size 32
18
+ python scripts/preflight.py --json # Machine-readable output
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import argparse
24
+ import json
25
+ import os
26
+ import shutil
27
+ import sys
28
+ from pathlib import Path
29
+
30
+ try:
31
+ import yaml
32
+ HAS_YAML = True
33
+ except ImportError:
34
+ HAS_YAML = False
35
+
36
+
37
+ # ---------------------------------------------------------------------------
38
+ # System resource detection
39
+ # ---------------------------------------------------------------------------
40
+
41
+ def get_system_ram_gb() -> float:
42
+ """Get total system RAM in GB."""
43
+ try:
44
+ import psutil
45
+ return round(psutil.virtual_memory().total / (1024 ** 3), 1)
46
+ except ImportError:
47
+ pass
48
+ # Fallback: read /proc/meminfo on Linux
49
+ meminfo = Path("/proc/meminfo")
50
+ if meminfo.exists():
51
+ for line in meminfo.read_text().splitlines():
52
+ if line.startswith("MemTotal:"):
53
+ kb = int(line.split()[1])
54
+ return round(kb / (1024 ** 2), 1)
55
+ # macOS fallback
56
+ try:
57
+ import subprocess
58
+ result = subprocess.run(
59
+ ["sysctl", "-n", "hw.memsize"], capture_output=True, text=True, timeout=5,
60
+ )
61
+ if result.returncode == 0:
62
+ return round(int(result.stdout.strip()) / (1024 ** 3), 1)
63
+ except (FileNotFoundError, subprocess.TimeoutExpired):
64
+ pass
65
+ return 0.0
66
+
67
+
68
+ def get_available_ram_gb() -> float:
69
+ """Get available (free + cached) RAM in GB."""
70
+ try:
71
+ import psutil
72
+ return round(psutil.virtual_memory().available / (1024 ** 3), 1)
73
+ except ImportError:
74
+ pass
75
+ meminfo = Path("/proc/meminfo")
76
+ if meminfo.exists():
77
+ available = 0
78
+ for line in meminfo.read_text().splitlines():
79
+ if line.startswith("MemAvailable:"):
80
+ available = int(line.split()[1])
81
+ break
82
+ if available:
83
+ return round(available / (1024 ** 2), 1)
84
+ return 0.0
85
+
86
+
87
+ def get_gpu_info() -> list[dict]:
88
+ """Detect GPUs and their VRAM."""
89
+ gpus = []
90
+
91
+ # Try torch first
92
+ try:
93
+ import torch
94
+ if torch.cuda.is_available():
95
+ for i in range(torch.cuda.device_count()):
96
+ props = torch.cuda.get_device_properties(i)
97
+ gpus.append({
98
+ "index": i,
99
+ "name": props.name,
100
+ "vram_gb": round(props.total_mem / (1024 ** 3), 1),
101
+ "vram_free_gb": round(
102
+ (props.total_mem - torch.cuda.memory_reserved(i)) / (1024 ** 3), 1,
103
+ ),
104
+ "source": "torch",
105
+ })
106
+ return gpus
107
+ except ImportError:
108
+ pass
109
+
110
+ # Fallback: nvidia-smi
111
+ try:
112
+ import subprocess
113
+ result = subprocess.run(
114
+ ["nvidia-smi", "--query-gpu=index,name,memory.total,memory.free",
115
+ "--format=csv,nounits,noheader"],
116
+ capture_output=True, text=True, timeout=10,
117
+ )
118
+ if result.returncode == 0:
119
+ for line in result.stdout.strip().splitlines():
120
+ parts = [p.strip() for p in line.split(",")]
121
+ if len(parts) >= 4:
122
+ gpus.append({
123
+ "index": int(parts[0]),
124
+ "name": parts[1],
125
+ "vram_gb": round(int(parts[2]) / 1024, 1),
126
+ "vram_free_gb": round(int(parts[3]) / 1024, 1),
127
+ "source": "nvidia-smi",
128
+ })
129
+ except (FileNotFoundError, subprocess.TimeoutExpired):
130
+ pass
131
+
132
+ return gpus
133
+
134
+
135
+ def get_disk_free_gb(path: str = ".") -> float:
136
+ """Get free disk space at path in GB."""
137
+ usage = shutil.disk_usage(path)
138
+ return round(usage.free / (1024 ** 3), 1)
139
+
140
+
141
+ # ---------------------------------------------------------------------------
142
+ # Dataset analysis
143
+ # ---------------------------------------------------------------------------
144
+
145
+ def estimate_dataset_memory(path: str | None = None, splits_dir: str | None = None) -> dict:
146
+ """Estimate memory needed to load a dataset.
147
+
148
+ Returns dict with: file_size_mb, estimated_ram_gb, rows, columns, dtype_info.
149
+ """
150
+ result = {
151
+ "file_size_mb": 0.0,
152
+ "estimated_ram_gb": 0.0,
153
+ "rows": 0,
154
+ "columns": 0,
155
+ "source": None,
156
+ }
157
+
158
+ # Try splits directory first (Turing convention)
159
+ if splits_dir:
160
+ splits_path = Path(splits_dir)
161
+ if splits_path.exists():
162
+ total_size = sum(f.stat().st_size for f in splits_path.glob("*") if f.is_file())
163
+ result["file_size_mb"] = round(total_size / (1024 ** 2), 1)
164
+ # CSV in RAM is typically 3-5x the file size
165
+ result["estimated_ram_gb"] = round(total_size * 4 / (1024 ** 3), 2)
166
+ result["source"] = str(splits_path)
167
+
168
+ # Try to count rows/columns from first file
169
+ for f in sorted(splits_path.glob("*.csv")):
170
+ try:
171
+ with open(f) as fh:
172
+ header = fh.readline()
173
+ result["columns"] = len(header.split(","))
174
+ lines = sum(1 for _ in fh)
175
+ result["rows"] += lines
176
+ except OSError:
177
+ pass
178
+ return result
179
+
180
+ # Try single file
181
+ if path:
182
+ p = Path(path)
183
+ if p.exists():
184
+ result["file_size_mb"] = round(p.stat().st_size / (1024 ** 2), 1)
185
+ result["estimated_ram_gb"] = round(p.stat().st_size * 4 / (1024 ** 3), 2)
186
+ result["source"] = str(p)
187
+
188
+ if p.suffix == ".csv":
189
+ try:
190
+ with open(p) as fh:
191
+ header = fh.readline()
192
+ result["columns"] = len(header.split(","))
193
+ result["rows"] = sum(1 for _ in fh)
194
+ except OSError:
195
+ pass
196
+
197
+ return result
198
+
199
+
200
+ # ---------------------------------------------------------------------------
201
+ # Model resource estimation
202
+ # ---------------------------------------------------------------------------
203
+
204
+ def parse_param_count(s: str) -> int:
205
+ """Parse parameter count strings like '10M', '1.5B', '350K'."""
206
+ s = s.strip().upper()
207
+ multipliers = {"K": 1_000, "M": 1_000_000, "B": 1_000_000_000, "T": 1_000_000_000_000}
208
+ for suffix, mult in multipliers.items():
209
+ if s.endswith(suffix):
210
+ return int(float(s[:-1]) * mult)
211
+ return int(float(s))
212
+
213
+
214
+ def estimate_model_resources(
215
+ model_type: str,
216
+ n_estimators: int = 100,
217
+ max_depth: int = 6,
218
+ n_features: int = 50,
219
+ n_samples: int = 10000,
220
+ param_count: int | None = None,
221
+ batch_size: int = 32,
222
+ precision: str = "fp32",
223
+ sequence_length: int = 512,
224
+ ) -> dict:
225
+ """Estimate training resource requirements for a model.
226
+
227
+ Returns dict with: ram_gb, vram_gb, disk_gb, notes.
228
+ """
229
+ bytes_per_param = {"fp32": 4, "fp16": 2, "bf16": 2, "int8": 1, "int4": 0.5}
230
+ bpp = bytes_per_param.get(precision, 4)
231
+
232
+ result = {
233
+ "model_type": model_type,
234
+ "ram_gb": 0.0,
235
+ "vram_gb": 0.0,
236
+ "disk_gb": 0.0,
237
+ "requires_gpu": False,
238
+ "notes": [],
239
+ }
240
+
241
+ mt = model_type.lower()
242
+
243
+ if mt in ("xgboost", "lightgbm", "catboost"):
244
+ # Tree-based: RAM-bound, no GPU required (unless GPU training explicitly used)
245
+ # Rule of thumb: ~1KB per tree node, depth d → 2^d nodes per tree
246
+ nodes_per_tree = min(2 ** max_depth, 2 ** 10) # Cap at 1024
247
+ tree_memory_gb = (n_estimators * nodes_per_tree * 8 * n_features) / (1024 ** 3)
248
+ data_memory_gb = (n_samples * n_features * 8) / (1024 ** 3) # float64
249
+ result["ram_gb"] = round(tree_memory_gb + data_memory_gb * 2, 2) # 2x for train + working
250
+ result["disk_gb"] = round(tree_memory_gb * 2, 2) # Model artifact
251
+ result["notes"].append(f"{n_estimators} trees, depth {max_depth}")
252
+ if n_samples > 1_000_000:
253
+ result["notes"].append("large dataset — consider subsample parameter")
254
+
255
+ elif mt in ("randomforest", "random_forest", "sklearn_rf", "extra_trees"):
256
+ nodes_per_tree = min(2 ** max_depth, 2 ** 16)
257
+ tree_memory_gb = (n_estimators * nodes_per_tree * 8 * n_features) / (1024 ** 3)
258
+ data_memory_gb = (n_samples * n_features * 8) / (1024 ** 3)
259
+ result["ram_gb"] = round(tree_memory_gb + data_memory_gb * 2, 2)
260
+ result["disk_gb"] = round(tree_memory_gb * 3, 2) # sklearn RF models are large
261
+ result["notes"].append(f"{n_estimators} trees, unlimited depth" if max_depth > 20 else f"{n_estimators} trees, depth {max_depth}")
262
+
263
+ elif mt in ("mlp", "neural_network", "nn"):
264
+ if param_count is None:
265
+ # Estimate from features: simple 2-layer MLP
266
+ param_count = n_features * 256 + 256 * 128 + 128 * 1 # ~50K for typical tabular
267
+ model_gb = (param_count * bpp) / (1024 ** 3)
268
+ # Training: model + gradients + optimizer state (Adam = 2x) + activations
269
+ train_multiplier = 4 # model + grad + 2x optimizer state
270
+ activation_gb = (batch_size * param_count * bpp * 0.5) / (1024 ** 3)
271
+ result["vram_gb"] = round(model_gb * train_multiplier + activation_gb, 2)
272
+ result["ram_gb"] = round(model_gb * 2 + (n_samples * n_features * 8) / (1024 ** 3), 2)
273
+ result["disk_gb"] = round(model_gb * 2, 2)
274
+ result["requires_gpu"] = result["vram_gb"] > 1.0
275
+ result["notes"].append(f"{param_count:,} parameters ({precision})")
276
+ if result["vram_gb"] > 0.5:
277
+ result["notes"].append("GPU recommended for reasonable training speed")
278
+
279
+ elif mt in ("transformer", "llm", "bert", "gpt"):
280
+ if param_count is None:
281
+ param_count = 100_000_000 # Default 100M
282
+ model_gb = (param_count * bpp) / (1024 ** 3)
283
+ # Transformers: activations scale with batch_size * seq_len * hidden_dim
284
+ hidden_dim = int((param_count / 12) ** 0.5) # Rough estimate
285
+ activation_gb = (batch_size * sequence_length * hidden_dim * bpp * 12) / (1024 ** 3)
286
+ optimizer_gb = model_gb * 2 # Adam
287
+ result["vram_gb"] = round(model_gb + optimizer_gb + activation_gb, 2)
288
+ result["ram_gb"] = round(model_gb + 2, 2) # Model + data loading overhead
289
+ result["disk_gb"] = round(model_gb * 3, 2) # Checkpoints
290
+ result["requires_gpu"] = True
291
+ result["notes"].append(f"{param_count:,} parameters ({precision})")
292
+ result["notes"].append(f"batch_size={batch_size}, seq_len={sequence_length}")
293
+ if result["vram_gb"] > 24:
294
+ result["notes"].append("likely needs multi-GPU or gradient checkpointing")
295
+ elif result["vram_gb"] > 12:
296
+ result["notes"].append("needs >=16GB VRAM GPU (A100/A6000/RTX 4090)")
297
+ elif result["vram_gb"] > 6:
298
+ result["notes"].append("needs >=8GB VRAM GPU")
299
+
300
+ elif mt in ("linear", "logistic", "ridge", "lasso", "elastic_net"):
301
+ # Linear models: very lightweight
302
+ result["ram_gb"] = round((n_samples * n_features * 8 * 2) / (1024 ** 3), 2)
303
+ result["disk_gb"] = 0.01
304
+ result["notes"].append("lightweight — no resource concerns")
305
+
306
+ else:
307
+ result["notes"].append(f"unknown model type '{model_type}' — using conservative estimates")
308
+ result["ram_gb"] = round((n_samples * n_features * 8 * 3) / (1024 ** 3), 2)
309
+ result["vram_gb"] = 0.0
310
+ result["disk_gb"] = 0.5
311
+
312
+ return result
313
+
314
+
315
+ # ---------------------------------------------------------------------------
316
+ # Preflight check
317
+ # ---------------------------------------------------------------------------
318
+
319
+ def run_preflight(
320
+ model_type: str | None = None,
321
+ config_path: str = "config.yaml",
322
+ dataset_path: str | None = None,
323
+ param_count: str | None = None,
324
+ batch_size: int = 32,
325
+ precision: str = "fp32",
326
+ sequence_length: int = 512,
327
+ ) -> dict:
328
+ """Run a complete preflight check.
329
+
330
+ Returns dict with: system, dataset, model, verdict, warnings.
331
+ """
332
+ # Load config if available
333
+ config = {}
334
+ if HAS_YAML and Path(config_path).exists():
335
+ with open(config_path) as f:
336
+ config = yaml.safe_load(f) or {}
337
+
338
+ # Auto-detect from config
339
+ if not model_type:
340
+ model_type = config.get("model", {}).get("type", "xgboost")
341
+ hyperparams = config.get("model", {}).get("hyperparams", {})
342
+ n_estimators = hyperparams.get("n_estimators", 100)
343
+ max_depth = hyperparams.get("max_depth", 6)
344
+
345
+ # System resources
346
+ system = {
347
+ "ram_total_gb": get_system_ram_gb(),
348
+ "ram_available_gb": get_available_ram_gb(),
349
+ "disk_free_gb": get_disk_free_gb(),
350
+ "gpus": get_gpu_info(),
351
+ }
352
+
353
+ # Dataset analysis
354
+ splits_dir = config.get("data", {}).get("splits_dir")
355
+ data_source = dataset_path or config.get("data", {}).get("source")
356
+ dataset = estimate_dataset_memory(path=data_source, splits_dir=splits_dir)
357
+
358
+ # Model estimation
359
+ params = parse_param_count(param_count) if param_count else None
360
+ model = estimate_model_resources(
361
+ model_type=model_type,
362
+ n_estimators=n_estimators,
363
+ max_depth=max_depth,
364
+ n_features=max(dataset.get("columns", 50), 1),
365
+ n_samples=max(dataset.get("rows", 10000), 1),
366
+ param_count=params,
367
+ batch_size=batch_size,
368
+ precision=precision,
369
+ sequence_length=sequence_length,
370
+ )
371
+
372
+ # Total requirements
373
+ total_ram = model["ram_gb"] + dataset["estimated_ram_gb"]
374
+ total_vram = model["vram_gb"]
375
+ total_disk = model["disk_gb"] + dataset["file_size_mb"] / 1024
376
+
377
+ # Verdict
378
+ warnings = []
379
+ verdict = "PASS"
380
+
381
+ if system["ram_available_gb"] > 0 and total_ram > system["ram_available_gb"] * 0.9:
382
+ warnings.append(
383
+ f"RAM: need ~{total_ram:.1f}GB but only {system['ram_available_gb']:.1f}GB available"
384
+ )
385
+ verdict = "WARN"
386
+
387
+ if total_ram > system["ram_total_gb"] * 0.8:
388
+ warnings.append(
389
+ f"RAM: need ~{total_ram:.1f}GB, system has {system['ram_total_gb']:.1f}GB total — may cause swapping"
390
+ )
391
+ verdict = "FAIL"
392
+
393
+ if model["requires_gpu"]:
394
+ if not system["gpus"]:
395
+ warnings.append(f"VRAM: model needs ~{total_vram:.1f}GB VRAM but no GPU detected")
396
+ verdict = "FAIL"
397
+ else:
398
+ max_vram = max(g["vram_gb"] for g in system["gpus"])
399
+ if total_vram > max_vram * 0.95:
400
+ warnings.append(
401
+ f"VRAM: need ~{total_vram:.1f}GB but largest GPU has {max_vram:.1f}GB"
402
+ )
403
+ verdict = "FAIL"
404
+ elif total_vram > max_vram * 0.8:
405
+ warnings.append(
406
+ f"VRAM: need ~{total_vram:.1f}GB, GPU has {max_vram:.1f}GB — tight fit"
407
+ )
408
+ if verdict != "FAIL":
409
+ verdict = "WARN"
410
+
411
+ if total_disk > system["disk_free_gb"] * 0.5:
412
+ warnings.append(
413
+ f"Disk: model + data need ~{total_disk:.1f}GB, only {system['disk_free_gb']:.1f}GB free"
414
+ )
415
+ if verdict != "FAIL":
416
+ verdict = "WARN"
417
+
418
+ return {
419
+ "verdict": verdict,
420
+ "warnings": warnings,
421
+ "requirements": {
422
+ "ram_gb": round(total_ram, 2),
423
+ "vram_gb": round(total_vram, 2),
424
+ "disk_gb": round(total_disk, 2),
425
+ "requires_gpu": model["requires_gpu"],
426
+ },
427
+ "system": system,
428
+ "dataset": dataset,
429
+ "model": model,
430
+ }
431
+
432
+
433
+ def format_preflight(result: dict) -> str:
434
+ """Format preflight results for display."""
435
+ v = result["verdict"]
436
+ icon = {"PASS": "✓", "WARN": "!", "FAIL": "✗"}.get(v, "?")
437
+
438
+ lines = [
439
+ f"Preflight Check: {icon} {v}",
440
+ "=" * 50,
441
+ ]
442
+
443
+ # Requirements
444
+ req = result["requirements"]
445
+ lines.extend([
446
+ "",
447
+ "Requirements",
448
+ "-" * 50,
449
+ f" RAM: ~{req['ram_gb']:.1f} GB",
450
+ f" VRAM: ~{req['vram_gb']:.1f} GB" + (" (GPU required)" if req["requires_gpu"] else " (no GPU needed)"),
451
+ f" Disk: ~{req['disk_gb']:.1f} GB",
452
+ ])
453
+
454
+ # System
455
+ sys_info = result["system"]
456
+ lines.extend([
457
+ "",
458
+ "System",
459
+ "-" * 50,
460
+ f" RAM: {sys_info['ram_total_gb']:.1f} GB total, {sys_info['ram_available_gb']:.1f} GB available",
461
+ f" Disk: {sys_info['disk_free_gb']:.1f} GB free",
462
+ ])
463
+ if sys_info["gpus"]:
464
+ for gpu in sys_info["gpus"]:
465
+ lines.append(f" GPU {gpu['index']}: {gpu['name']} ({gpu['vram_gb']:.1f} GB VRAM)")
466
+ else:
467
+ lines.append(" GPU: none detected")
468
+
469
+ # Dataset
470
+ ds = result["dataset"]
471
+ if ds["source"]:
472
+ lines.extend([
473
+ "",
474
+ "Dataset",
475
+ "-" * 50,
476
+ f" Source: {ds['source']}",
477
+ f" Size: {ds['file_size_mb']:.1f} MB on disk",
478
+ f" In RAM: ~{ds['estimated_ram_gb']:.2f} GB estimated",
479
+ ])
480
+ if ds["rows"]:
481
+ lines.append(f" Shape: {ds['rows']:,} rows x {ds['columns']} columns")
482
+
483
+ # Model
484
+ model = result["model"]
485
+ lines.extend([
486
+ "",
487
+ "Model",
488
+ "-" * 50,
489
+ f" Type: {model['model_type']}",
490
+ f" RAM: ~{model['ram_gb']:.2f} GB",
491
+ f" VRAM: ~{model['vram_gb']:.2f} GB",
492
+ ])
493
+ for note in model["notes"]:
494
+ lines.append(f" Note: {note}")
495
+
496
+ # Warnings
497
+ if result["warnings"]:
498
+ lines.extend(["", "Warnings", "-" * 50])
499
+ for w in result["warnings"]:
500
+ lines.append(f" {w}")
501
+
502
+ # Verdict
503
+ lines.extend(["", "=" * 50])
504
+ if v == "PASS":
505
+ lines.append(" System has sufficient resources. Proceed with training.")
506
+ elif v == "WARN":
507
+ lines.append(" Training may succeed but resources are tight. Monitor memory usage.")
508
+ else:
509
+ lines.append(" Training will likely fail. Address warnings before proceeding.")
510
+
511
+ return "\n".join(lines)
512
+
513
+
514
+ def main() -> None:
515
+ parser = argparse.ArgumentParser(
516
+ description="Pre-flight resource check for ML training",
517
+ )
518
+ parser.add_argument("--config", default="config.yaml", help="Config file path")
519
+ parser.add_argument("--model-type", default=None, help="Model type (xgboost, lightgbm, torch, transformer, ...)")
520
+ parser.add_argument("--dataset", default=None, help="Path to dataset file")
521
+ parser.add_argument("--params", default=None, help="Parameter count (e.g., 10M, 1.5B)")
522
+ parser.add_argument("--batch-size", type=int, default=32, help="Training batch size")
523
+ parser.add_argument("--precision", default="fp32", choices=["fp32", "fp16", "bf16", "int8", "int4"])
524
+ parser.add_argument("--seq-len", type=int, default=512, help="Sequence length (transformers)")
525
+ parser.add_argument("--json", action="store_true", help="Machine-readable JSON output")
526
+
527
+ args = parser.parse_args()
528
+
529
+ result = run_preflight(
530
+ model_type=args.model_type,
531
+ config_path=args.config,
532
+ dataset_path=args.dataset,
533
+ param_count=args.params,
534
+ batch_size=args.batch_size,
535
+ precision=args.precision,
536
+ sequence_length=args.seq_len,
537
+ )
538
+
539
+ if args.json:
540
+ print(json.dumps(result, indent=2))
541
+ else:
542
+ print(format_preflight(result))
543
+
544
+ if result["verdict"] == "FAIL":
545
+ sys.exit(1)
546
+
547
+
548
+ if __name__ == "__main__":
549
+ main()