claude-turing 2.0.0 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +7 -2
- package/commands/fork.md +40 -0
- package/commands/lit.md +47 -0
- package/commands/paper.md +44 -0
- package/commands/queue.md +48 -0
- package/commands/retry.md +41 -0
- package/commands/turing.md +10 -0
- package/config/failure_modes.yaml +74 -0
- package/package.json +1 -1
- package/src/install.js +2 -0
- package/src/verify.js +6 -0
- package/templates/scripts/__pycache__/draft_paper_sections.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_queue.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/fork_experiment.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/literature_search.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/smart_retry.cpython-314.pyc +0 -0
- package/templates/scripts/draft_paper_sections.py +498 -0
- package/templates/scripts/experiment_queue.py +441 -0
- package/templates/scripts/fork_experiment.py +286 -0
- package/templates/scripts/generate_brief.py +25 -0
- package/templates/scripts/literature_search.py +421 -0
- package/templates/scripts/scaffold.py +10 -0
- package/templates/scripts/smart_retry.py +398 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Experiment branching — run parallel tracks from a common parent.
|
|
3
|
+
|
|
4
|
+
"Try both A and B from this point" — creates child experiments,
|
|
5
|
+
runs both, reports which branch wins.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python scripts/fork_experiment.py exp-042 --branches "LightGBM dart" "XGBoost deeper"
|
|
9
|
+
python scripts/fork_experiment.py exp-042 --branches "A" "B" --auto-promote
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import argparse
|
|
15
|
+
import json
|
|
16
|
+
import subprocess
|
|
17
|
+
import sys
|
|
18
|
+
from datetime import datetime, timezone
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
|
|
21
|
+
import yaml
|
|
22
|
+
|
|
23
|
+
from scripts.turing_io import load_config, load_experiments
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def find_experiment(experiments: list[dict], exp_id: str) -> dict | None:
|
|
27
|
+
"""Find experiment by ID."""
|
|
28
|
+
for exp in experiments:
|
|
29
|
+
if exp.get("experiment_id") == exp_id:
|
|
30
|
+
return exp
|
|
31
|
+
return None
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def create_branch(
|
|
35
|
+
parent: dict,
|
|
36
|
+
branch_description: str,
|
|
37
|
+
branch_index: int,
|
|
38
|
+
) -> dict:
|
|
39
|
+
"""Create a branch descriptor from a parent experiment.
|
|
40
|
+
|
|
41
|
+
Returns dict with branch metadata (not yet executed).
|
|
42
|
+
"""
|
|
43
|
+
parent_id = parent.get("experiment_id", "unknown")
|
|
44
|
+
return {
|
|
45
|
+
"branch_id": f"fork-{parent_id}-{branch_index + 1}",
|
|
46
|
+
"parent_id": parent_id,
|
|
47
|
+
"description": branch_description,
|
|
48
|
+
"status": "pending",
|
|
49
|
+
"created_at": datetime.now(timezone.utc).isoformat(),
|
|
50
|
+
"result_experiment": None,
|
|
51
|
+
"metrics": {},
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def run_branch(branch: dict, seed: int = 42, timeout: int = 600) -> dict:
|
|
56
|
+
"""Execute a single branch experiment.
|
|
57
|
+
|
|
58
|
+
Returns updated branch dict with status and metrics.
|
|
59
|
+
"""
|
|
60
|
+
branch["status"] = "running"
|
|
61
|
+
branch["started_at"] = datetime.now(timezone.utc).isoformat()
|
|
62
|
+
|
|
63
|
+
cmd = ["python", "train.py", "--seed", str(seed)]
|
|
64
|
+
try:
|
|
65
|
+
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=timeout)
|
|
66
|
+
except subprocess.TimeoutExpired:
|
|
67
|
+
branch["status"] = "failed"
|
|
68
|
+
branch["error"] = "timeout"
|
|
69
|
+
return branch
|
|
70
|
+
|
|
71
|
+
if proc.returncode != 0:
|
|
72
|
+
branch["status"] = "failed"
|
|
73
|
+
branch["error"] = proc.stderr[-300:] if proc.stderr else "unknown error"
|
|
74
|
+
return branch
|
|
75
|
+
|
|
76
|
+
# Parse metrics
|
|
77
|
+
metrics = {}
|
|
78
|
+
in_block = False
|
|
79
|
+
for line in proc.stdout.splitlines():
|
|
80
|
+
line = line.strip()
|
|
81
|
+
if line == "---":
|
|
82
|
+
if in_block:
|
|
83
|
+
break
|
|
84
|
+
in_block = True
|
|
85
|
+
continue
|
|
86
|
+
if in_block and ":" in line:
|
|
87
|
+
key, value = line.split(":", 1)
|
|
88
|
+
try:
|
|
89
|
+
metrics[key.strip()] = float(value.strip())
|
|
90
|
+
except ValueError:
|
|
91
|
+
metrics[key.strip()] = value.strip()
|
|
92
|
+
|
|
93
|
+
branch["status"] = "completed"
|
|
94
|
+
branch["completed_at"] = datetime.now(timezone.utc).isoformat()
|
|
95
|
+
branch["metrics"] = metrics
|
|
96
|
+
return branch
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def determine_winner(
|
|
100
|
+
branches: list[dict],
|
|
101
|
+
metric: str,
|
|
102
|
+
lower_is_better: bool,
|
|
103
|
+
) -> dict | None:
|
|
104
|
+
"""Determine the winning branch by primary metric.
|
|
105
|
+
|
|
106
|
+
Returns the winning branch dict, or None if no branches completed.
|
|
107
|
+
"""
|
|
108
|
+
completed = [b for b in branches if b.get("status") == "completed" and metric in b.get("metrics", {})]
|
|
109
|
+
if not completed:
|
|
110
|
+
return None
|
|
111
|
+
|
|
112
|
+
if lower_is_better:
|
|
113
|
+
return min(completed, key=lambda b: b["metrics"][metric])
|
|
114
|
+
else:
|
|
115
|
+
return max(completed, key=lambda b: b["metrics"][metric])
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def format_fork_report(
|
|
119
|
+
parent_id: str,
|
|
120
|
+
branches: list[dict],
|
|
121
|
+
winner: dict | None,
|
|
122
|
+
metric: str,
|
|
123
|
+
) -> str:
|
|
124
|
+
"""Format fork results as a comparison tree."""
|
|
125
|
+
lines = [
|
|
126
|
+
f"# Fork from {parent_id}",
|
|
127
|
+
"",
|
|
128
|
+
]
|
|
129
|
+
|
|
130
|
+
if not branches:
|
|
131
|
+
lines.append("No branches executed.")
|
|
132
|
+
return "\n".join(lines)
|
|
133
|
+
|
|
134
|
+
winner_id = winner["branch_id"] if winner else None
|
|
135
|
+
|
|
136
|
+
for branch in branches:
|
|
137
|
+
status = branch.get("status", "?")
|
|
138
|
+
desc = branch.get("description", "?")
|
|
139
|
+
bid = branch.get("branch_id", "?")
|
|
140
|
+
|
|
141
|
+
if status == "completed":
|
|
142
|
+
metric_val = branch.get("metrics", {}).get(metric, "N/A")
|
|
143
|
+
is_winner = bid == winner_id
|
|
144
|
+
marker = "WINNER" if is_winner else ""
|
|
145
|
+
if isinstance(metric_val, float):
|
|
146
|
+
lines.append(f"├── {bid}: {desc} → {metric}={metric_val:.4f} {marker}")
|
|
147
|
+
else:
|
|
148
|
+
lines.append(f"├── {bid}: {desc} → {metric}={metric_val} {marker}")
|
|
149
|
+
elif status == "failed":
|
|
150
|
+
error = branch.get("error", "unknown")
|
|
151
|
+
lines.append(f"├── {bid}: {desc} → FAILED ({error})")
|
|
152
|
+
else:
|
|
153
|
+
lines.append(f"├── {bid}: {desc} → {status}")
|
|
154
|
+
|
|
155
|
+
if winner:
|
|
156
|
+
lines.extend([
|
|
157
|
+
"",
|
|
158
|
+
f"**Recommendation:** promote {winner['branch_id']}, abandon the rest.",
|
|
159
|
+
])
|
|
160
|
+
|
|
161
|
+
return "\n".join(lines)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def save_fork_report(report: dict, output_dir: str = "experiments/forks") -> Path:
|
|
165
|
+
"""Save fork report to YAML."""
|
|
166
|
+
out_path = Path(output_dir)
|
|
167
|
+
out_path.mkdir(parents=True, exist_ok=True)
|
|
168
|
+
parent_id = report.get("parent_id", "unknown")
|
|
169
|
+
filepath = out_path / f"{parent_id}-fork.yaml"
|
|
170
|
+
with open(filepath, "w") as f:
|
|
171
|
+
yaml.dump(report, f, default_flow_style=False, sort_keys=False)
|
|
172
|
+
return filepath
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def run_fork(
|
|
176
|
+
exp_id: str,
|
|
177
|
+
branch_descriptions: list[str],
|
|
178
|
+
auto_promote: bool = False,
|
|
179
|
+
config_path: str = "config.yaml",
|
|
180
|
+
log_path: str = "experiments/log.jsonl",
|
|
181
|
+
timeout: int = 600,
|
|
182
|
+
) -> dict:
|
|
183
|
+
"""Fork an experiment into multiple branches and run all.
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
exp_id: Parent experiment ID.
|
|
187
|
+
branch_descriptions: List of branch descriptions.
|
|
188
|
+
auto_promote: Automatically keep winner and discard rest.
|
|
189
|
+
config_path: Path to config.yaml.
|
|
190
|
+
log_path: Path to experiment log.
|
|
191
|
+
timeout: Per-branch timeout.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
Fork result dict with branches, winner, and recommendation.
|
|
195
|
+
"""
|
|
196
|
+
config = load_config(config_path)
|
|
197
|
+
eval_cfg = config.get("evaluation", {})
|
|
198
|
+
primary_metric = eval_cfg.get("primary_metric", "accuracy")
|
|
199
|
+
lower_is_better = eval_cfg.get("lower_is_better", False)
|
|
200
|
+
|
|
201
|
+
experiments = load_experiments(log_path)
|
|
202
|
+
parent = find_experiment(experiments, exp_id)
|
|
203
|
+
|
|
204
|
+
if not parent:
|
|
205
|
+
return {"error": f"Experiment {exp_id} not found"}
|
|
206
|
+
|
|
207
|
+
if not branch_descriptions:
|
|
208
|
+
return {"error": "No branches specified. Use --branches 'A' 'B'"}
|
|
209
|
+
|
|
210
|
+
# Create branches
|
|
211
|
+
branches = []
|
|
212
|
+
for i, desc in enumerate(branch_descriptions):
|
|
213
|
+
branches.append(create_branch(parent, desc, i))
|
|
214
|
+
|
|
215
|
+
print(f"Forking {exp_id} into {len(branches)} branches:", file=sys.stderr)
|
|
216
|
+
for b in branches:
|
|
217
|
+
print(f" {b['branch_id']}: {b['description']}", file=sys.stderr)
|
|
218
|
+
print(file=sys.stderr)
|
|
219
|
+
|
|
220
|
+
# Execute branches
|
|
221
|
+
for i, branch in enumerate(branches):
|
|
222
|
+
print(f" [{i+1}/{len(branches)}] Running {branch['branch_id']}...", end=" ",
|
|
223
|
+
flush=True, file=sys.stderr)
|
|
224
|
+
run_branch(branch, seed=42 + i, timeout=timeout)
|
|
225
|
+
if branch["status"] == "completed":
|
|
226
|
+
metric_val = branch.get("metrics", {}).get(primary_metric, "N/A")
|
|
227
|
+
print(f"{primary_metric}={metric_val}", file=sys.stderr)
|
|
228
|
+
else:
|
|
229
|
+
print(f"FAILED", file=sys.stderr)
|
|
230
|
+
|
|
231
|
+
# Determine winner
|
|
232
|
+
winner = determine_winner(branches, primary_metric, lower_is_better)
|
|
233
|
+
|
|
234
|
+
result = {
|
|
235
|
+
"parent_id": exp_id,
|
|
236
|
+
"timestamp": datetime.now(timezone.utc).isoformat(),
|
|
237
|
+
"metric": primary_metric,
|
|
238
|
+
"lower_is_better": lower_is_better,
|
|
239
|
+
"branches": branches,
|
|
240
|
+
"winner": winner["branch_id"] if winner else None,
|
|
241
|
+
"winner_metric": winner["metrics"].get(primary_metric) if winner else None,
|
|
242
|
+
"auto_promote": auto_promote,
|
|
243
|
+
"total_branches": len(branches),
|
|
244
|
+
"completed": sum(1 for b in branches if b["status"] == "completed"),
|
|
245
|
+
"failed": sum(1 for b in branches if b["status"] == "failed"),
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
return result
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def main() -> None:
|
|
252
|
+
"""CLI entry point."""
|
|
253
|
+
parser = argparse.ArgumentParser(description="Fork experiment into parallel branches")
|
|
254
|
+
parser.add_argument("exp_id", help="Parent experiment ID")
|
|
255
|
+
parser.add_argument("--branches", nargs="+", required=True, help="Branch descriptions")
|
|
256
|
+
parser.add_argument("--auto-promote", action="store_true", help="Auto-keep winner")
|
|
257
|
+
parser.add_argument("--config", default="config.yaml")
|
|
258
|
+
parser.add_argument("--log", default="experiments/log.jsonl")
|
|
259
|
+
parser.add_argument("--timeout", type=int, default=600)
|
|
260
|
+
parser.add_argument("--json", action="store_true")
|
|
261
|
+
args = parser.parse_args()
|
|
262
|
+
|
|
263
|
+
result = run_fork(
|
|
264
|
+
args.exp_id, args.branches, args.auto_promote,
|
|
265
|
+
args.config, args.log, args.timeout,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
if "error" not in result:
|
|
269
|
+
filepath = save_fork_report(result)
|
|
270
|
+
print(f"\nSaved to {filepath}", file=sys.stderr)
|
|
271
|
+
|
|
272
|
+
if args.json:
|
|
273
|
+
print(json.dumps(result, indent=2, default=str))
|
|
274
|
+
else:
|
|
275
|
+
if "error" in result:
|
|
276
|
+
print(f"ERROR: {result['error']}")
|
|
277
|
+
else:
|
|
278
|
+
print(format_fork_report(
|
|
279
|
+
result["parent_id"], result["branches"],
|
|
280
|
+
next((b for b in result["branches"] if b["branch_id"] == result.get("winner")), None),
|
|
281
|
+
result["metric"],
|
|
282
|
+
))
|
|
283
|
+
|
|
284
|
+
|
|
285
|
+
if __name__ == "__main__":
|
|
286
|
+
main()
|
|
@@ -212,6 +212,18 @@ def detect_environment_drift(experiments: list[dict]) -> list[str]:
|
|
|
212
212
|
return warnings
|
|
213
213
|
|
|
214
214
|
|
|
215
|
+
def load_queue_summary(queue_path: str = "experiments/queue-summary.yaml") -> dict | None:
|
|
216
|
+
"""Load the most recent queue execution summary."""
|
|
217
|
+
path = Path(queue_path)
|
|
218
|
+
if not path.exists():
|
|
219
|
+
return None
|
|
220
|
+
try:
|
|
221
|
+
with open(path) as f:
|
|
222
|
+
return yaml.safe_load(f)
|
|
223
|
+
except (yaml.YAMLError, OSError):
|
|
224
|
+
return None
|
|
225
|
+
|
|
226
|
+
|
|
215
227
|
def load_profiles(profile_dir: str = "experiments/profiles") -> list[dict]:
|
|
216
228
|
"""Load all profiling results from YAML files."""
|
|
217
229
|
path = Path(profile_dir)
|
|
@@ -296,6 +308,7 @@ def format_brief(
|
|
|
296
308
|
reproductions: list[dict] | None = None,
|
|
297
309
|
diagnoses: list[dict] | None = None,
|
|
298
310
|
profiles: list[dict] | None = None,
|
|
311
|
+
queue_summary: dict | None = None,
|
|
299
312
|
) -> str:
|
|
300
313
|
"""Format the research briefing as markdown."""
|
|
301
314
|
direction = "lower" if lower_is_better else "higher"
|
|
@@ -472,6 +485,16 @@ def format_brief(
|
|
|
472
485
|
if failed:
|
|
473
486
|
lines.extend(["", f"*{len(failed)} experiment(s) failed reproducibility checks.*"])
|
|
474
487
|
|
|
488
|
+
# Queue report
|
|
489
|
+
if queue_summary and queue_summary.get("total"):
|
|
490
|
+
qs = queue_summary
|
|
491
|
+
lines.extend(["", "## Queue Report", ""])
|
|
492
|
+
lines.append(
|
|
493
|
+
f"**{qs.get('status', '?')}** — {qs.get('completed', 0)} completed, "
|
|
494
|
+
f"{qs.get('failed', 0)} failed, {qs.get('skipped', 0)} skipped "
|
|
495
|
+
f"of {qs.get('total', 0)} queued"
|
|
496
|
+
)
|
|
497
|
+
|
|
475
498
|
# Profiles
|
|
476
499
|
if profiles:
|
|
477
500
|
lines.extend(["", "## Performance Profile", ""])
|
|
@@ -569,6 +592,7 @@ def generate_brief(
|
|
|
569
592
|
reproductions = load_reproductions()
|
|
570
593
|
diagnoses = load_diagnoses()
|
|
571
594
|
profiles = load_profiles()
|
|
595
|
+
queue_summary = load_queue_summary()
|
|
572
596
|
|
|
573
597
|
return format_brief(
|
|
574
598
|
campaign, best, trajectory, model_types, hypotheses,
|
|
@@ -579,6 +603,7 @@ def generate_brief(
|
|
|
579
603
|
reproductions=reproductions if reproductions else None,
|
|
580
604
|
diagnoses=diagnoses if diagnoses else None,
|
|
581
605
|
profiles=profiles if profiles else None,
|
|
606
|
+
queue_summary=queue_summary,
|
|
582
607
|
)
|
|
583
608
|
|
|
584
609
|
|