claude-turing 2.4.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,419 @@
1
+ #!/usr/bin/env python3
2
+ """Compute budget manager for the autoresearch pipeline.
3
+
4
+ Sets a total compute budget (experiments, hours, or cost) and allocates
5
+ across exploration vs exploitation. Auto-shifts to exploit mode when
6
+ budget runs low. Prevents runaway compute spend.
7
+
8
+ Usage:
9
+ python scripts/budget_manager.py set --experiments 50 --hours 8
10
+ python scripts/budget_manager.py status
11
+ python scripts/budget_manager.py reset
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import json
18
+ import sys
19
+ from datetime import datetime, timezone
20
+ from pathlib import Path
21
+
22
+ import yaml
23
+
24
+ from scripts.turing_io import load_config, load_experiments
25
+
26
+ DEFAULT_LOG_PATH = "experiments/log.jsonl"
27
+ DEFAULT_STATE_PATH = "experiment_state.yaml"
28
+
29
+ # Budget allocation policy thresholds
30
+ EXPLORE_PHASE_END = 0.50 # 0-50% budget: explore
31
+ MIXED_PHASE_END = 0.80 # 50-80%: mixed
32
+ # 80-100%: exploit only
33
+
34
+
35
+ # --- Budget State ---
36
+
37
+
38
+ def load_budget(state_path: str = DEFAULT_STATE_PATH) -> dict | None:
39
+ """Load budget from experiment state file."""
40
+ path = Path(state_path)
41
+ if not path.exists():
42
+ return None
43
+
44
+ with open(path) as f:
45
+ state = yaml.safe_load(f) or {}
46
+
47
+ return state.get("budget")
48
+
49
+
50
+ def save_budget(budget: dict, state_path: str = DEFAULT_STATE_PATH) -> None:
51
+ """Save budget to experiment state file."""
52
+ path = Path(state_path)
53
+
54
+ state = {}
55
+ if path.exists():
56
+ with open(path) as f:
57
+ state = yaml.safe_load(f) or {}
58
+
59
+ state["budget"] = budget
60
+
61
+ path.parent.mkdir(parents=True, exist_ok=True)
62
+ with open(path, "w") as f:
63
+ yaml.dump(state, f, default_flow_style=False, sort_keys=False)
64
+
65
+
66
+ # --- Budget Operations ---
67
+
68
+
69
+ def set_budget(
70
+ max_experiments: int | None = None,
71
+ max_hours: float | None = None,
72
+ state_path: str = DEFAULT_STATE_PATH,
73
+ ) -> dict:
74
+ """Set a compute budget.
75
+
76
+ Args:
77
+ max_experiments: Maximum number of experiments.
78
+ max_hours: Maximum wall-clock hours.
79
+ state_path: Path to experiment state file.
80
+
81
+ Returns:
82
+ Budget status dict.
83
+ """
84
+ if max_experiments is None and max_hours is None:
85
+ return {"error": "Specify at least one constraint: --experiments or --hours"}
86
+
87
+ budget = {
88
+ "set_at": datetime.now(timezone.utc).isoformat(),
89
+ "max_experiments": max_experiments,
90
+ "max_hours": max_hours,
91
+ "active": True,
92
+ }
93
+
94
+ save_budget(budget, state_path)
95
+
96
+ return {
97
+ "action": "set",
98
+ "budget": budget,
99
+ "message": _format_budget_constraints(budget),
100
+ }
101
+
102
+
103
+ def get_budget_status(
104
+ state_path: str = DEFAULT_STATE_PATH,
105
+ log_path: str = DEFAULT_LOG_PATH,
106
+ ) -> dict:
107
+ """Get current budget status with usage and projections.
108
+
109
+ Args:
110
+ state_path: Path to experiment state file.
111
+ log_path: Path to experiment log.
112
+
113
+ Returns:
114
+ Complete budget status dict.
115
+ """
116
+ budget = load_budget(state_path)
117
+ if not budget or not budget.get("active"):
118
+ return {"error": "No active budget. Use `/turing:budget set` first."}
119
+
120
+ experiments = load_experiments(log_path)
121
+
122
+ # Count experiments since budget was set
123
+ budget_set_at = budget.get("set_at", "")
124
+ experiments_since = [
125
+ e for e in experiments
126
+ if e.get("timestamp", "") >= budget_set_at
127
+ ]
128
+
129
+ used_experiments = len(experiments_since)
130
+ max_experiments = budget.get("max_experiments")
131
+
132
+ # Compute time usage
133
+ total_seconds = sum(
134
+ e.get("metrics", {}).get("train_seconds", 0)
135
+ for e in experiments_since
136
+ if isinstance(e.get("metrics", {}).get("train_seconds"), (int, float))
137
+ )
138
+ used_hours = total_seconds / 3600
139
+ max_hours = budget.get("max_hours")
140
+
141
+ # Compute budget fraction used
142
+ fractions = []
143
+ if max_experiments and max_experiments > 0:
144
+ fractions.append(used_experiments / max_experiments)
145
+ if max_hours and max_hours > 0:
146
+ fractions.append(used_hours / max_hours)
147
+
148
+ budget_used = max(fractions) if fractions else 0.0
149
+
150
+ # Determine current phase and recommended mode
151
+ phase = determine_phase(budget_used)
152
+ recommended_mode = phase_to_mode(phase)
153
+
154
+ # Burn rate
155
+ burn_rate = None
156
+ if used_hours > 0 and used_experiments > 0:
157
+ burn_rate = used_experiments / used_hours
158
+
159
+ # Projection
160
+ remaining_experiments = (max_experiments - used_experiments) if max_experiments else None
161
+ remaining_hours = (max_hours - used_hours) if max_hours else None
162
+ projected_exhaustion_hours = None
163
+ if burn_rate and burn_rate > 0 and remaining_experiments:
164
+ projected_exhaustion_hours = remaining_experiments / burn_rate
165
+
166
+ # Check if exhausted
167
+ exhausted = budget_used >= 1.0
168
+
169
+ # Allocation breakdown
170
+ explore_count = sum(1 for e in experiments_since if _is_explore(e))
171
+ exploit_count = used_experiments - explore_count
172
+
173
+ status = {
174
+ "action": "status",
175
+ "budget": budget,
176
+ "usage": {
177
+ "experiments_used": used_experiments,
178
+ "experiments_max": max_experiments,
179
+ "experiments_remaining": remaining_experiments,
180
+ "hours_used": round(used_hours, 2),
181
+ "hours_max": max_hours,
182
+ "hours_remaining": round(remaining_hours, 2) if remaining_hours is not None else None,
183
+ "budget_fraction": round(budget_used, 4),
184
+ },
185
+ "phase": phase,
186
+ "recommended_mode": recommended_mode,
187
+ "allocation": {
188
+ "explore": explore_count,
189
+ "exploit": exploit_count,
190
+ },
191
+ "burn_rate": round(burn_rate, 2) if burn_rate else None,
192
+ "projected_exhaustion_hours": round(projected_exhaustion_hours, 2) if projected_exhaustion_hours else None,
193
+ "exhausted": exhausted,
194
+ }
195
+
196
+ if exhausted:
197
+ status["warning"] = "Budget exhausted. /turing:train will refuse to start new experiments."
198
+
199
+ return status
200
+
201
+
202
+ def reset_budget(state_path: str = DEFAULT_STATE_PATH) -> dict:
203
+ """Reset (deactivate) the current budget."""
204
+ budget = load_budget(state_path)
205
+ if not budget:
206
+ return {"action": "reset", "message": "No budget to reset."}
207
+
208
+ budget["active"] = False
209
+ budget["reset_at"] = datetime.now(timezone.utc).isoformat()
210
+ save_budget(budget, state_path)
211
+
212
+ return {"action": "reset", "message": "Budget deactivated."}
213
+
214
+
215
+ def check_budget_allows(state_path: str = DEFAULT_STATE_PATH, log_path: str = DEFAULT_LOG_PATH) -> dict:
216
+ """Check if the budget allows another experiment.
217
+
218
+ Returns dict with allowed (bool) and reason.
219
+ """
220
+ budget = load_budget(state_path)
221
+ if not budget or not budget.get("active"):
222
+ return {"allowed": True, "reason": "No active budget"}
223
+
224
+ status = get_budget_status(state_path, log_path)
225
+ if "error" in status:
226
+ return {"allowed": True, "reason": "Budget status unavailable"}
227
+
228
+ if status.get("exhausted"):
229
+ return {
230
+ "allowed": False,
231
+ "reason": f"Budget exhausted ({status['usage']['budget_fraction']:.0%} used)",
232
+ }
233
+
234
+ return {
235
+ "allowed": True,
236
+ "reason": f"Budget at {status['usage']['budget_fraction']:.0%}",
237
+ "recommended_mode": status.get("recommended_mode"),
238
+ }
239
+
240
+
241
+ # --- Phase Logic ---
242
+
243
+
244
+ def determine_phase(budget_fraction: float) -> str:
245
+ """Determine budget phase from fraction used.
246
+
247
+ Returns: 'explore', 'mixed', or 'exploit'.
248
+ """
249
+ if budget_fraction < EXPLORE_PHASE_END:
250
+ return "explore"
251
+ elif budget_fraction < MIXED_PHASE_END:
252
+ return "mixed"
253
+ else:
254
+ return "exploit"
255
+
256
+
257
+ def phase_to_mode(phase: str) -> str:
258
+ """Map budget phase to recommended research mode."""
259
+ return {
260
+ "explore": "explore",
261
+ "mixed": "explore", # Still explore promising, but start exploiting
262
+ "exploit": "exploit",
263
+ }.get(phase, "explore")
264
+
265
+
266
+ def _is_explore(experiment: dict) -> bool:
267
+ """Heuristic: classify experiment as explore vs exploit."""
268
+ config = experiment.get("config", {})
269
+ # New model types or significantly different configs = explore
270
+ # Similar to prior experiments = exploit
271
+ # Simple heuristic: if experiment has a novel model_type, it's exploration
272
+ return config.get("model_type", "") != config.get("base_model_type", config.get("model_type", ""))
273
+
274
+
275
+ def _format_budget_constraints(budget: dict) -> str:
276
+ """Format budget constraints as human-readable string."""
277
+ parts = []
278
+ if budget.get("max_experiments"):
279
+ parts.append(f"{budget['max_experiments']} experiments")
280
+ if budget.get("max_hours"):
281
+ parts.append(f"{budget['max_hours']} hours")
282
+ return "Budget set: " + ", ".join(parts) if parts else "Budget set (no constraints)"
283
+
284
+
285
+ # --- Report Formatting ---
286
+
287
+
288
+ def format_budget_report(report: dict) -> str:
289
+ """Format budget report as markdown."""
290
+ if "error" in report:
291
+ return f"ERROR: {report['error']}"
292
+
293
+ action = report.get("action", "?")
294
+
295
+ if action == "set":
296
+ return f"# Budget Set\n\n{report.get('message', '')}"
297
+
298
+ if action == "reset":
299
+ return f"# Budget Reset\n\n{report.get('message', '')}"
300
+
301
+ if action != "status":
302
+ return f"Unknown action: {action}"
303
+
304
+ usage = report.get("usage", {})
305
+ phase = report.get("phase", "?")
306
+
307
+ lines = [
308
+ "# Budget Status",
309
+ "",
310
+ ]
311
+
312
+ # Experiments
313
+ if usage.get("experiments_max"):
314
+ pct = usage["experiments_used"] / usage["experiments_max"] * 100
315
+ lines.append(
316
+ f"**Experiments:** {usage['experiments_used']}/{usage['experiments_max']} "
317
+ f"used ({pct:.0f}%), {usage.get('experiments_remaining', 0)} remaining"
318
+ )
319
+
320
+ # Time
321
+ if usage.get("hours_max"):
322
+ pct = usage["hours_used"] / usage["hours_max"] * 100
323
+ lines.append(
324
+ f"**Time:** {usage['hours_used']:.1f}/{usage['hours_max']:.1f}h "
325
+ f"used ({pct:.0f}%), {usage.get('hours_remaining', 0):.1f}h remaining"
326
+ )
327
+
328
+ # Burn rate
329
+ burn = report.get("burn_rate")
330
+ if burn:
331
+ lines.append(f"**Burn rate:** {burn:.1f} experiments/hour")
332
+
333
+ proj = report.get("projected_exhaustion_hours")
334
+ if proj:
335
+ lines.append(f"**Projected exhaustion:** ~{proj:.1f} hours")
336
+
337
+ lines.append("")
338
+
339
+ # Phase
340
+ phase_labels = {
341
+ "explore": "EXPLORE (try diverse hypotheses)",
342
+ "mixed": "MIXED (explore promising, exploit best)",
343
+ "exploit": "EXPLOIT ONLY (refine the winner)",
344
+ }
345
+ lines.append(f"**Phase:** {phase_labels.get(phase, phase)}")
346
+ lines.append(f"**Recommended mode:** {report.get('recommended_mode', '?')}")
347
+
348
+ # Allocation
349
+ alloc = report.get("allocation", {})
350
+ if alloc:
351
+ lines.extend([
352
+ "",
353
+ "**Allocation:**",
354
+ f"- Explore: {alloc.get('explore', 0)} experiments",
355
+ f"- Exploit: {alloc.get('exploit', 0)} experiments",
356
+ ])
357
+
358
+ # Warning
359
+ if report.get("warning"):
360
+ lines.extend(["", f"**WARNING:** {report['warning']}"])
361
+
362
+ # Auto-mode shift info
363
+ if phase == "explore":
364
+ shift_at = usage.get("experiments_max", 0) * MIXED_PHASE_END
365
+ lines.extend(["", f"*Auto-shift to mixed mode at experiment {shift_at:.0f}*"])
366
+
367
+ return "\n".join(lines)
368
+
369
+
370
+ def main() -> None:
371
+ """CLI entry point."""
372
+ parser = argparse.ArgumentParser(
373
+ description="Compute budget manager",
374
+ )
375
+ parser.add_argument(
376
+ "action", choices=["set", "status", "reset", "check"],
377
+ help="Budget action",
378
+ )
379
+ parser.add_argument(
380
+ "--experiments", type=int,
381
+ help="Max experiments budget",
382
+ )
383
+ parser.add_argument(
384
+ "--hours", type=float,
385
+ help="Max hours budget",
386
+ )
387
+ parser.add_argument(
388
+ "--state", default=DEFAULT_STATE_PATH,
389
+ help=f"Path to experiment state (default: {DEFAULT_STATE_PATH})",
390
+ )
391
+ parser.add_argument(
392
+ "--log", default=DEFAULT_LOG_PATH,
393
+ help="Path to experiment log",
394
+ )
395
+ parser.add_argument(
396
+ "--json", action="store_true",
397
+ help="Output raw JSON instead of formatted report",
398
+ )
399
+ args = parser.parse_args()
400
+
401
+ if args.action == "set":
402
+ report = set_budget(args.experiments, args.hours, args.state)
403
+ elif args.action == "status":
404
+ report = get_budget_status(args.state, args.log)
405
+ elif args.action == "reset":
406
+ report = reset_budget(args.state)
407
+ elif args.action == "check":
408
+ report = check_budget_allows(args.state, args.log)
409
+ else:
410
+ report = {"error": f"Unknown action: {args.action}"}
411
+
412
+ if args.json:
413
+ print(json.dumps(report, indent=2, default=str))
414
+ else:
415
+ print(format_budget_report(report))
416
+
417
+
418
+ if __name__ == "__main__":
419
+ main()
@@ -326,6 +326,35 @@ def load_ensemble_results(ensemble_dir: str = "experiments/ensembles") -> list[d
326
326
  return reports
327
327
 
328
328
 
329
+ def load_budget_status(state_path: str = "experiment_state.yaml", log_path: str = "experiments/log.jsonl") -> dict | None:
330
+ """Load budget status if active."""
331
+ try:
332
+ from scripts.budget_manager import get_budget_status
333
+ result = get_budget_status(state_path, log_path)
334
+ if "error" not in result:
335
+ return result
336
+ except (ImportError, Exception):
337
+ pass
338
+ return None
339
+
340
+
341
+ def load_scaling_results(scaling_dir: str = "experiments/scaling") -> list[dict]:
342
+ """Load scaling study results from YAML files."""
343
+ path = Path(scaling_dir)
344
+ if not path.exists():
345
+ return []
346
+ reports = []
347
+ for f in sorted(path.glob("scale-*.yaml")):
348
+ try:
349
+ with open(f) as fh:
350
+ report = yaml.safe_load(fh)
351
+ if report and isinstance(report, dict) and "verdict" in report:
352
+ reports.append(report)
353
+ except (yaml.YAMLError, OSError):
354
+ continue
355
+ return reports
356
+
357
+
329
358
  def format_brief(
330
359
  campaign: dict,
331
360
  best: dict | None,
@@ -345,6 +374,8 @@ def format_brief(
345
374
  queue_summary: dict | None = None,
346
375
  regression_checks: list[dict] | None = None,
347
376
  ensemble_results: list[dict] | None = None,
377
+ budget_status: dict | None = None,
378
+ scaling_results: list[dict] | None = None,
348
379
  ) -> str:
349
380
  """Format the research briefing as markdown."""
350
381
  direction = "lower" if lower_is_better else "higher"
@@ -579,6 +610,31 @@ def format_brief(
579
610
  else:
580
611
  lines.append(f"- {n_models}-model ensemble: no improvement over best single")
581
612
 
613
+ # Budget status
614
+ if budget_status and budget_status.get("usage"):
615
+ usage = budget_status["usage"]
616
+ phase = budget_status.get("phase", "?")
617
+ lines.extend(["", "## Budget", ""])
618
+ if usage.get("experiments_max"):
619
+ lines.append(
620
+ f"- **Experiments:** {usage['experiments_used']}/{usage['experiments_max']} "
621
+ f"({usage['budget_fraction']:.0%} used)"
622
+ )
623
+ if usage.get("hours_max"):
624
+ lines.append(f"- **Time:** {usage['hours_used']:.1f}/{usage['hours_max']:.1f}h")
625
+ lines.append(f"- **Phase:** {phase}")
626
+ if budget_status.get("exhausted"):
627
+ lines.append("- **STATUS: EXHAUSTED** — no more experiments will run")
628
+
629
+ # Scaling predictions
630
+ if scaling_results:
631
+ lines.extend(["", "## Scaling Predictions", ""])
632
+ for study in scaling_results:
633
+ verdict = study.get("verdict", {})
634
+ v = verdict.get("verdict", "?")
635
+ reason = verdict.get("reason", "")
636
+ lines.append(f"- **{v.upper()}**: {reason}")
637
+
582
638
  # Regression check history (stability)
583
639
  if regression_checks:
584
640
  lines.extend(["", "## Stability", ""])
@@ -670,6 +726,8 @@ def generate_brief(
670
726
  queue_summary = load_queue_summary()
671
727
  regression_checks = load_regression_checks()
672
728
  ensemble_results = load_ensemble_results()
729
+ budget_status = load_budget_status(log_path=log_path)
730
+ scaling_results = load_scaling_results()
673
731
 
674
732
  return format_brief(
675
733
  campaign, best, trajectory, model_types, hypotheses,
@@ -683,6 +741,8 @@ def generate_brief(
683
741
  queue_summary=queue_summary,
684
742
  regression_checks=regression_checks if regression_checks else None,
685
743
  ensemble_results=ensemble_results if ensemble_results else None,
744
+ budget_status=budget_status,
745
+ scaling_results=scaling_results if scaling_results else None,
686
746
  )
687
747
 
688
748