claude-turing 2.1.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +5 -2
  3. package/commands/fork.md +40 -0
  4. package/commands/queue.md +48 -0
  5. package/commands/retry.md +41 -0
  6. package/commands/turing.md +6 -0
  7. package/config/failure_modes.yaml +74 -0
  8. package/package.json +1 -1
  9. package/src/install.js +2 -1
  10. package/src/verify.js +4 -0
  11. package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
  12. package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
  13. package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
  14. package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
  15. package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  16. package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
  17. package/templates/scripts/__pycache__/cost_frontier.cpython-314.pyc +0 -0
  18. package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
  19. package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
  20. package/templates/scripts/__pycache__/experiment_queue.cpython-314.pyc +0 -0
  21. package/templates/scripts/__pycache__/fork_experiment.cpython-314.pyc +0 -0
  22. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  23. package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
  24. package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
  25. package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
  26. package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
  27. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  28. package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
  29. package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
  30. package/templates/scripts/__pycache__/smart_retry.cpython-314.pyc +0 -0
  31. package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
  32. package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
  33. package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
  34. package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
  35. package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
  36. package/templates/scripts/experiment_queue.py +441 -0
  37. package/templates/scripts/fork_experiment.py +286 -0
  38. package/templates/scripts/generate_brief.py +25 -0
  39. package/templates/scripts/scaffold.py +6 -0
  40. package/templates/scripts/smart_retry.py +398 -0
  41. package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
  42. package/templates/tests/__pycache__/__init__.cpython-314.pyc +0 -0
  43. package/templates/tests/__pycache__/conftest.cpython-314-pytest-9.0.2.pyc +0 -0
  44. package/templates/tests/__pycache__/test_cost_frontier.cpython-314-pytest-9.0.2.pyc +0 -0
@@ -0,0 +1,441 @@
1
+ #!/usr/bin/env python3
2
+ """Batch experiment scheduler with priority ordering and dependencies.
3
+
4
+ Queue multiple experiments for unattended execution. The researcher
5
+ loads the queue Friday afternoon, reads /turing:brief Monday morning.
6
+
7
+ Usage:
8
+ python scripts/experiment_queue.py add "try LightGBM" --priority high
9
+ python scripts/experiment_queue.py add "deeper trees" --after q-001
10
+ python scripts/experiment_queue.py list
11
+ python scripts/experiment_queue.py run [--halt-on-error]
12
+ python scripts/experiment_queue.py pause
13
+ python scripts/experiment_queue.py clear
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import argparse
19
+ import json
20
+ import subprocess
21
+ import sys
22
+ from datetime import datetime, timezone
23
+ from pathlib import Path
24
+
25
+ import yaml
26
+
27
+ from scripts.turing_io import load_config, load_experiments
28
+
29
+
30
+ DEFAULT_QUEUE_PATH = "experiments/queue.yaml"
31
+ PRIORITY_ORDER = {"critical": 0, "high": 1, "medium": 2, "low": 3}
32
+
33
+
34
+ def load_queue(queue_path: str = DEFAULT_QUEUE_PATH) -> list[dict]:
35
+ """Load the experiment queue from YAML."""
36
+ path = Path(queue_path)
37
+ if not path.exists() or path.stat().st_size == 0:
38
+ return []
39
+ with open(path) as f:
40
+ data = yaml.safe_load(f)
41
+ return data if isinstance(data, list) else []
42
+
43
+
44
+ def save_queue(queue: list[dict], queue_path: str = DEFAULT_QUEUE_PATH) -> None:
45
+ """Save the experiment queue to YAML."""
46
+ path = Path(queue_path)
47
+ path.parent.mkdir(parents=True, exist_ok=True)
48
+ with open(path, "w") as f:
49
+ yaml.dump(queue, f, default_flow_style=False, sort_keys=False)
50
+
51
+
52
+ def get_next_queue_id(queue: list[dict]) -> str:
53
+ """Generate next sequential queue ID."""
54
+ max_id = 0
55
+ for item in queue:
56
+ qid = item.get("id", "")
57
+ if qid.startswith("q-"):
58
+ try:
59
+ num = int(qid.split("-")[1])
60
+ max_id = max(max_id, num)
61
+ except (ValueError, IndexError):
62
+ pass
63
+ return f"q-{max_id + 1:03d}"
64
+
65
+
66
+ def add_to_queue(
67
+ description: str,
68
+ priority: str = "medium",
69
+ after: str | None = None,
70
+ hypothesis_id: str | None = None,
71
+ queue_path: str = DEFAULT_QUEUE_PATH,
72
+ ) -> dict:
73
+ """Add an experiment to the queue.
74
+
75
+ Args:
76
+ description: What to try.
77
+ priority: critical/high/medium/low.
78
+ after: Queue ID this depends on (runs after that item completes).
79
+ hypothesis_id: Link to hypothesis queue entry.
80
+ queue_path: Path to queue YAML.
81
+
82
+ Returns:
83
+ The created queue item dict.
84
+ """
85
+ queue = load_queue(queue_path)
86
+ qid = get_next_queue_id(queue)
87
+
88
+ item = {
89
+ "id": qid,
90
+ "description": description,
91
+ "priority": priority,
92
+ "status": "queued",
93
+ "depends_on": after,
94
+ "hypothesis_id": hypothesis_id,
95
+ "created_at": datetime.now(timezone.utc).isoformat(),
96
+ "started_at": None,
97
+ "completed_at": None,
98
+ "result_experiment": None,
99
+ "error": None,
100
+ "retries": 0,
101
+ }
102
+
103
+ queue.append(item)
104
+ save_queue(queue, queue_path)
105
+ return item
106
+
107
+
108
+ def sort_queue(queue: list[dict]) -> list[dict]:
109
+ """Sort queue by priority then creation time, respecting dependencies.
110
+
111
+ Returns items in execution order: dependencies first, then by priority.
112
+ Uses topological sort — within each "ready" batch, items are sorted
113
+ by priority so that critical items run before low-priority ones
114
+ as long as dependency constraints are satisfied.
115
+ """
116
+ queued = [q for q in queue if q.get("status") == "queued"]
117
+
118
+ # Topological sort: process items whose dependencies are resolved,
119
+ # picking highest-priority items first within each batch
120
+ resolved = []
121
+ remaining = list(queued)
122
+ resolved_ids = set()
123
+
124
+ max_iterations = len(remaining) * 2
125
+ iteration = 0
126
+ while remaining and iteration < max_iterations:
127
+ iteration += 1
128
+ # Find all items whose deps are satisfied
129
+ ready = [
130
+ item for item in remaining
131
+ if item.get("depends_on") is None or item["depends_on"] in resolved_ids
132
+ ]
133
+ if not ready:
134
+ # Circular dependency — add remaining in priority order
135
+ remaining.sort(key=lambda x: PRIORITY_ORDER.get(x.get("priority", "medium"), 2))
136
+ resolved.extend(remaining)
137
+ break
138
+ # Sort ready batch by priority
139
+ ready.sort(key=lambda x: PRIORITY_ORDER.get(x.get("priority", "medium"), 2))
140
+ for item in ready:
141
+ resolved.append(item)
142
+ resolved_ids.add(item["id"])
143
+ remaining.remove(item)
144
+
145
+ return resolved
146
+
147
+
148
+ def estimate_runtime(queue: list[dict], profile_dir: str = "experiments/profiles") -> float:
149
+ """Estimate total runtime for queued items from profile data."""
150
+ path = Path(profile_dir)
151
+ if not path.exists():
152
+ return 0.0
153
+
154
+ # Get average runtime from profiles
155
+ runtimes = []
156
+ for f in path.glob("*-profile.yaml"):
157
+ try:
158
+ with open(f) as fh:
159
+ profile = yaml.safe_load(fh)
160
+ if profile and isinstance(profile, dict):
161
+ p = profile.get("profile", {})
162
+ rt = p.get("total_time_sec", 0)
163
+ if rt > 0:
164
+ runtimes.append(rt)
165
+ except (yaml.YAMLError, OSError):
166
+ continue
167
+
168
+ if not runtimes:
169
+ return 0.0
170
+
171
+ avg_runtime = sum(runtimes) / len(runtimes)
172
+ n_queued = sum(1 for q in queue if q.get("status") == "queued")
173
+ return avg_runtime * n_queued
174
+
175
+
176
+ def run_queue_item(item: dict, timeout: int = 600) -> dict:
177
+ """Execute a single queue item.
178
+
179
+ Returns updated item dict with status, result, timing.
180
+ """
181
+ item["status"] = "running"
182
+ item["started_at"] = datetime.now(timezone.utc).isoformat()
183
+
184
+ cmd = ["python", "train.py", "--seed", "42"]
185
+ try:
186
+ proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
187
+ except subprocess.TimeoutExpired:
188
+ item["status"] = "failed"
189
+ item["error"] = "timeout"
190
+ item["completed_at"] = datetime.now(timezone.utc).isoformat()
191
+ return item
192
+
193
+ if proc.returncode != 0:
194
+ item["status"] = "failed"
195
+ item["error"] = _classify_error(proc.stderr + proc.stdout)
196
+ item["completed_at"] = datetime.now(timezone.utc).isoformat()
197
+ return item
198
+
199
+ # Parse metrics
200
+ metrics = {}
201
+ in_block = False
202
+ for line in proc.stdout.splitlines():
203
+ line = line.strip()
204
+ if line == "---":
205
+ if in_block:
206
+ break
207
+ in_block = True
208
+ continue
209
+ if in_block and ":" in line:
210
+ key, value = line.split(":", 1)
211
+ try:
212
+ metrics[key.strip()] = float(value.strip())
213
+ except ValueError:
214
+ metrics[key.strip()] = value.strip()
215
+
216
+ item["status"] = "completed"
217
+ item["completed_at"] = datetime.now(timezone.utc).isoformat()
218
+ item["result_metrics"] = metrics
219
+ return item
220
+
221
+
222
+ def _classify_error(output: str) -> str:
223
+ """Classify error from output text."""
224
+ output_lower = output.lower()
225
+ if "cuda out of memory" in output_lower or "memoryerror" in output_lower:
226
+ return "oom"
227
+ if "nan" in output_lower and ("loss" in output_lower or "nan" in output):
228
+ return "nan_loss"
229
+ if "timeouterror" in output_lower:
230
+ return "timeout"
231
+ if "modulenotfounderror" in output_lower or "importerror" in output_lower:
232
+ return "import_error"
233
+ return "unknown"
234
+
235
+
236
+ def run_queue(
237
+ queue_path: str = DEFAULT_QUEUE_PATH,
238
+ halt_on_error: bool = False,
239
+ timeout: int = 600,
240
+ ) -> dict:
241
+ """Execute all queued experiments in order.
242
+
243
+ Args:
244
+ queue_path: Path to queue YAML.
245
+ halt_on_error: Stop on first failure.
246
+ timeout: Per-experiment timeout.
247
+
248
+ Returns:
249
+ Batch summary dict.
250
+ """
251
+ queue = load_queue(queue_path)
252
+ execution_order = sort_queue(queue)
253
+
254
+ if not execution_order:
255
+ return {"status": "empty", "message": "No queued experiments."}
256
+
257
+ summary = {
258
+ "started_at": datetime.now(timezone.utc).isoformat(),
259
+ "total": len(execution_order),
260
+ "completed": 0,
261
+ "failed": 0,
262
+ "skipped": 0,
263
+ "results": [],
264
+ }
265
+
266
+ print(f"Running {len(execution_order)} queued experiments...", file=sys.stderr)
267
+
268
+ for i, item in enumerate(execution_order):
269
+ # Check if paused
270
+ current_queue = load_queue(queue_path)
271
+ paused = any(q.get("_paused") for q in current_queue)
272
+ if paused:
273
+ summary["status"] = "paused"
274
+ summary["skipped"] = len(execution_order) - i
275
+ break
276
+
277
+ print(f"\n [{i+1}/{len(execution_order)}] {item['id']}: {item['description']}", file=sys.stderr)
278
+
279
+ result = run_queue_item(item, timeout=timeout)
280
+
281
+ # Update queue file
282
+ for q in queue:
283
+ if q["id"] == item["id"]:
284
+ q.update(result)
285
+ save_queue(queue, queue_path)
286
+
287
+ if result["status"] == "completed":
288
+ summary["completed"] += 1
289
+ print(f" ✓ Completed", file=sys.stderr)
290
+ else:
291
+ summary["failed"] += 1
292
+ print(f" ✗ Failed: {result.get('error', 'unknown')}", file=sys.stderr)
293
+ if halt_on_error:
294
+ summary["status"] = "halted"
295
+ summary["skipped"] = len(execution_order) - i - 1
296
+ break
297
+
298
+ summary["results"].append({
299
+ "id": item["id"],
300
+ "description": item["description"],
301
+ "status": result["status"],
302
+ "error": result.get("error"),
303
+ })
304
+
305
+ if "status" not in summary:
306
+ summary["status"] = "completed"
307
+ summary["completed_at"] = datetime.now(timezone.utc).isoformat()
308
+
309
+ # Save summary
310
+ summary_path = Path(queue_path).parent / "queue-summary.yaml"
311
+ with open(summary_path, "w") as f:
312
+ yaml.dump(summary, f, default_flow_style=False, sort_keys=False)
313
+
314
+ return summary
315
+
316
+
317
+ def pause_queue(queue_path: str = DEFAULT_QUEUE_PATH) -> None:
318
+ """Set pause flag on the queue."""
319
+ queue = load_queue(queue_path)
320
+ queue.append({"_paused": True, "paused_at": datetime.now(timezone.utc).isoformat()})
321
+ save_queue(queue, queue_path)
322
+
323
+
324
+ def clear_queue(queue_path: str = DEFAULT_QUEUE_PATH) -> int:
325
+ """Remove all queued items. Returns count of items cleared."""
326
+ queue = load_queue(queue_path)
327
+ cleared = sum(1 for q in queue if q.get("status") == "queued")
328
+ queue = [q for q in queue if q.get("status") != "queued"]
329
+ save_queue(queue, queue_path)
330
+ return cleared
331
+
332
+
333
+ def format_queue_list(queue: list[dict]) -> str:
334
+ """Format the queue as a readable table."""
335
+ items = [q for q in queue if not q.get("_paused")]
336
+ if not items:
337
+ return "Queue is empty."
338
+
339
+ lines = [
340
+ "# Experiment Queue",
341
+ "",
342
+ "| ID | Priority | Status | Description | Depends On |",
343
+ "|----|----------|--------|-------------|------------|",
344
+ ]
345
+
346
+ for item in items:
347
+ dep = item.get("depends_on") or "—"
348
+ lines.append(
349
+ f"| {item['id']} | {item.get('priority', 'medium')} "
350
+ f"| {item.get('status', 'queued')} | {item['description'][:50]} | {dep} |"
351
+ )
352
+
353
+ # Stats
354
+ queued = sum(1 for q in items if q.get("status") == "queued")
355
+ completed = sum(1 for q in items if q.get("status") == "completed")
356
+ failed = sum(1 for q in items if q.get("status") == "failed")
357
+
358
+ lines.extend([
359
+ "",
360
+ f"**Queued:** {queued} | **Completed:** {completed} | **Failed:** {failed}",
361
+ ])
362
+
363
+ return "\n".join(lines)
364
+
365
+
366
+ def format_batch_summary(summary: dict) -> str:
367
+ """Format batch execution summary."""
368
+ lines = [
369
+ "# Queue Execution Summary",
370
+ "",
371
+ f"- **Status:** {summary.get('status', 'unknown')}",
372
+ f"- **Total:** {summary.get('total', 0)}",
373
+ f"- **Completed:** {summary.get('completed', 0)}",
374
+ f"- **Failed:** {summary.get('failed', 0)}",
375
+ f"- **Skipped:** {summary.get('skipped', 0)}",
376
+ ]
377
+
378
+ results = summary.get("results", [])
379
+ if results:
380
+ lines.extend(["", "## Results", ""])
381
+ for r in results:
382
+ status = "✓" if r["status"] == "completed" else "✗"
383
+ error = f" ({r['error']})" if r.get("error") else ""
384
+ lines.append(f"- {status} {r['id']}: {r['description']}{error}")
385
+
386
+ return "\n".join(lines)
387
+
388
+
389
+ def main() -> None:
390
+ """CLI entry point."""
391
+ parser = argparse.ArgumentParser(description="Experiment queue scheduler")
392
+ parser.add_argument("action", choices=["add", "list", "run", "pause", "clear"],
393
+ help="Queue action")
394
+ parser.add_argument("description", nargs="?", default=None,
395
+ help="Experiment description (for add)")
396
+ parser.add_argument("--priority", default="medium",
397
+ choices=["critical", "high", "medium", "low"])
398
+ parser.add_argument("--after", default=None, help="Queue ID dependency")
399
+ parser.add_argument("--hypothesis", default=None, help="Hypothesis ID link")
400
+ parser.add_argument("--queue", default=DEFAULT_QUEUE_PATH, help="Queue file path")
401
+ parser.add_argument("--halt-on-error", action="store_true")
402
+ parser.add_argument("--timeout", type=int, default=600)
403
+ parser.add_argument("--json", action="store_true")
404
+ args = parser.parse_args()
405
+
406
+ if args.action == "add":
407
+ if not args.description:
408
+ print("Usage: queue add 'description' [--priority high]", file=sys.stderr)
409
+ sys.exit(1)
410
+ item = add_to_queue(args.description, args.priority, args.after,
411
+ args.hypothesis, args.queue)
412
+ if args.json:
413
+ print(json.dumps(item, indent=2, default=str))
414
+ else:
415
+ print(f"Added {item['id']}: {item['description']} [{item['priority']}]")
416
+
417
+ elif args.action == "list":
418
+ queue = load_queue(args.queue)
419
+ if args.json:
420
+ print(json.dumps(queue, indent=2, default=str))
421
+ else:
422
+ print(format_queue_list(queue))
423
+
424
+ elif args.action == "run":
425
+ summary = run_queue(args.queue, args.halt_on_error, args.timeout)
426
+ if args.json:
427
+ print(json.dumps(summary, indent=2, default=str))
428
+ else:
429
+ print(format_batch_summary(summary))
430
+
431
+ elif args.action == "pause":
432
+ pause_queue(args.queue)
433
+ print("Queue paused. Current experiment will finish, then stop.")
434
+
435
+ elif args.action == "clear":
436
+ n = clear_queue(args.queue)
437
+ print(f"Cleared {n} queued experiments.")
438
+
439
+
440
+ if __name__ == "__main__":
441
+ main()