claude-turing 2.1.0 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +5 -2
- package/commands/fork.md +40 -0
- package/commands/queue.md +48 -0
- package/commands/retry.md +41 -0
- package/commands/turing.md +6 -0
- package/config/failure_modes.yaml +74 -0
- package/package.json +1 -1
- package/src/install.js +2 -1
- package/src/verify.js +4 -0
- package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
- package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
- package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
- package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/cost_frontier.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_queue.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/fork_experiment.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/smart_retry.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
- package/templates/scripts/experiment_queue.py +441 -0
- package/templates/scripts/fork_experiment.py +286 -0
- package/templates/scripts/generate_brief.py +25 -0
- package/templates/scripts/scaffold.py +6 -0
- package/templates/scripts/smart_retry.py +398 -0
- package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
- package/templates/tests/__pycache__/__init__.cpython-314.pyc +0 -0
- package/templates/tests/__pycache__/conftest.cpython-314-pytest-9.0.2.pyc +0 -0
- package/templates/tests/__pycache__/test_cost_frontier.cpython-314-pytest-9.0.2.pyc +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "turing",
|
|
3
|
-
"version": "2.1
|
|
4
|
-
"description": "Autonomous ML research harness — the autoresearch loop as a formal protocol.
|
|
3
|
+
"version": "2.2.1",
|
|
4
|
+
"description": "Autonomous ML research harness — the autoresearch loop as a formal protocol. 30 commands, 2 specialized agents, experiment orchestration (batch queue + smart retry + branching), literature integration + paper drafting, production model export, performance profiling, smart checkpoints, experiment intelligence, statistical rigor, tree-search hypothesis exploration, cost-performance frontier, model cards, model registry, hypothesis database with novelty guard, anti-cheating guardrails, and the taste-leverage loop. Inspired by Karpathy's autoresearch and the scientific method itself.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "pragnition"
|
|
7
7
|
},
|
package/README.md
CHANGED
|
@@ -332,6 +332,9 @@ The index (`hypotheses.yaml`) is the lightweight queue. The detail files (`hypot
|
|
|
332
332
|
| `/turing:checkpoint <action>` | Smart checkpoint management — list, prune (Pareto), average, resume, stats |
|
|
333
333
|
| `/turing:lit <query>` | Literature search — papers, SOTA baselines, related work |
|
|
334
334
|
| `/turing:paper [--sections] [--format]` | Draft paper sections from experiment logs (setup, results, ablation, hyperparams) |
|
|
335
|
+
| `/turing:queue <action>` | Batch experiment scheduler — add, list, run, pause, clear |
|
|
336
|
+
| `/turing:retry <exp-id>` | Smart failure recovery — auto-diagnose crash, apply fix, re-run |
|
|
337
|
+
| `/turing:fork <exp-id>` | Experiment branching — run parallel tracks, report winner |
|
|
335
338
|
| `/turing:export [--format]` | Export model to production format with equivalence check + latency benchmark |
|
|
336
339
|
| `/turing:card` | Generate a model card — performance, limitations, intended use, artifact contract |
|
|
337
340
|
| `/turing:logbook` | Generate HTML experiment logbook |
|
|
@@ -522,11 +525,11 @@ Each project gets independent config, data, experiments, models, and agent memor
|
|
|
522
525
|
|
|
523
526
|
## Architecture of Turing Itself
|
|
524
527
|
|
|
525
|
-
|
|
528
|
+
30 commands, 2 agents, 9 config files, 49 template scripts, model registry, artifact contract, cost-performance frontier, model cards, tree-search exploration, statistical rigor, experiment intelligence, performance profiling, smart checkpoints, production model export, literature integration, paper section drafting, experiment orchestration (queue + retry + fork), 778 tests, 16 ADRs. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full codemap.
|
|
526
529
|
|
|
527
530
|
```
|
|
528
531
|
turing/
|
|
529
|
-
├── commands/
|
|
532
|
+
├── commands/ 29 skill files (core + taste-leverage + reporting + exploration + statistical rigor + experiment intelligence + performance + deployment + research workflow + orchestration)
|
|
530
533
|
├── agents/ 2 agents (researcher: read/write, evaluator: read-only)
|
|
531
534
|
├── config/ 8 files (lifecycle, taxonomy, archetypes, novelty aliases)
|
|
532
535
|
├── templates/ Scaffolded into user projects by /turing:init
|
package/commands/fork.md
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: fork
|
|
3
|
+
description: Branch an experiment into parallel tracks — run both A and B, report the winner.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-id> --branches \"approach A\" \"approach B\" [--auto-promote]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Fork an experiment into parallel branches and compare results.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:**
|
|
14
|
+
```bash
|
|
15
|
+
source .venv/bin/activate
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
2. **Parse arguments from `$ARGUMENTS`:**
|
|
19
|
+
- First argument is the parent experiment ID
|
|
20
|
+
- `--branches "A" "B" "C"` — branch descriptions (2+ required)
|
|
21
|
+
- `--auto-promote` — automatically keep the winning branch
|
|
22
|
+
|
|
23
|
+
3. **Run fork:**
|
|
24
|
+
```bash
|
|
25
|
+
python scripts/fork_experiment.py $ARGUMENTS
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
4. **Report results:**
|
|
29
|
+
- Comparison tree showing each branch's metric
|
|
30
|
+
- Winner identified and marked
|
|
31
|
+
- Recommendation: promote winner, abandon rest
|
|
32
|
+
|
|
33
|
+
5. **Saved output:** report written to `experiments/forks/exp-NNN-fork.yaml`
|
|
34
|
+
|
|
35
|
+
## Examples
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
/turing:fork exp-042 --branches "LightGBM with dart" "XGBoost deeper trees"
|
|
39
|
+
/turing:fork exp-042 --branches "A" "B" "C" --auto-promote
|
|
40
|
+
```
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: queue
|
|
3
|
+
description: Queue experiments for batch execution with priority ordering and dependency chains. Load the queue, walk away, read the summary.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<add|list|run|pause|clear> [description] [--priority high] [--after q-001]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Manage the experiment queue for unattended batch execution.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:**
|
|
14
|
+
```bash
|
|
15
|
+
source .venv/bin/activate
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
2. **Parse arguments from `$ARGUMENTS`:**
|
|
19
|
+
- **add** `"description"` `--priority high` `--after q-001` — queue an experiment
|
|
20
|
+
- **list** — show queue with status, priority, dependencies
|
|
21
|
+
- **run** `--halt-on-error` — execute all queued experiments
|
|
22
|
+
- **pause** — stop after current experiment finishes
|
|
23
|
+
- **clear** — discard all queued items
|
|
24
|
+
|
|
25
|
+
3. **Run queue manager:**
|
|
26
|
+
```bash
|
|
27
|
+
python scripts/experiment_queue.py $ARGUMENTS
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
4. **Report results by action:**
|
|
31
|
+
- **add:** confirms ID and priority
|
|
32
|
+
- **list:** table of queued/completed/failed items
|
|
33
|
+
- **run:** batch summary with per-experiment status
|
|
34
|
+
- **pause/clear:** confirmation message
|
|
35
|
+
|
|
36
|
+
5. **Queue persists in** `experiments/queue.yaml`
|
|
37
|
+
|
|
38
|
+
## Examples
|
|
39
|
+
|
|
40
|
+
```
|
|
41
|
+
/turing:queue add "try LightGBM" --priority high
|
|
42
|
+
/turing:queue add "deeper trees" --after q-001
|
|
43
|
+
/turing:queue list
|
|
44
|
+
/turing:queue run
|
|
45
|
+
/turing:queue run --halt-on-error
|
|
46
|
+
/turing:queue pause
|
|
47
|
+
/turing:queue clear
|
|
48
|
+
```
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: retry
|
|
3
|
+
description: Smart failure recovery — auto-diagnose crash type and retry with targeted fix. OOM → halve batch. NaN → add clipping.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-id> [--max-attempts 3]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Auto-diagnose and recover from experiment failures.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:**
|
|
14
|
+
```bash
|
|
15
|
+
source .venv/bin/activate
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
2. **Parse arguments from `$ARGUMENTS`:**
|
|
19
|
+
- First argument is the experiment ID (required)
|
|
20
|
+
- `--max-attempts 3` limits retry count
|
|
21
|
+
- `--classify "error text"` just classifies without retrying
|
|
22
|
+
|
|
23
|
+
3. **Run smart retry:**
|
|
24
|
+
```bash
|
|
25
|
+
python scripts/smart_retry.py $ARGUMENTS
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
4. **Report results:**
|
|
29
|
+
- **RECOVERED:** fix applied, retry succeeded
|
|
30
|
+
- **FAILED:** all retry attempts exhausted
|
|
31
|
+
- **MANUAL FIX NEEDED:** failure type requires human intervention
|
|
32
|
+
- Shows failure classification, fix applied, and attempt history
|
|
33
|
+
|
|
34
|
+
5. **Saved output:** report written to `experiments/retries/exp-NNN-retry.yaml`
|
|
35
|
+
|
|
36
|
+
## Examples
|
|
37
|
+
|
|
38
|
+
```
|
|
39
|
+
/turing:retry exp-042 # Auto-diagnose and retry
|
|
40
|
+
/turing:retry exp-042 --max-attempts 5 # More retries
|
|
41
|
+
```
|
package/commands/turing.md
CHANGED
|
@@ -34,6 +34,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
34
34
|
| "lit", "literature", "papers", "SOTA", "baseline", "related work", "citations" | `/turing:lit` | Research |
|
|
35
35
|
| "paper", "draft paper", "write paper", "results table", "latex", "experimental setup" | `/turing:paper` | Document |
|
|
36
36
|
| "export", "deploy", "production", "onnx", "torchscript", "tflite", "ship model" | `/turing:export` | Deploy |
|
|
37
|
+
| "queue", "batch", "overnight", "schedule experiments", "run queue" | `/turing:queue` | Orchestrate |
|
|
38
|
+
| "retry", "retry experiment", "crashed", "OOM", "fix and rerun" | `/turing:retry` | Orchestrate |
|
|
39
|
+
| "fork", "branch", "try both", "parallel experiments", "A or B" | `/turing:fork` | Orchestrate |
|
|
37
40
|
| "profile", "profiling", "bottleneck", "slow training", "why is it slow", "timing" | `/turing:profile` | Check |
|
|
38
41
|
| "checkpoint", "checkpoints", "prune checkpoints", "disk space", "resume training" | `/turing:checkpoint` | Check |
|
|
39
42
|
|
|
@@ -66,6 +69,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
66
69
|
| `/turing:lit <query>` | Literature search: papers, SOTA baselines, related work | (inline, uses WebSearch) |
|
|
67
70
|
| `/turing:paper [--sections] [--format]` | Draft paper sections from experiment logs (setup, results, ablation, hyperparams) | (inline) |
|
|
68
71
|
| `/turing:export [exp-id] [--format]` | Export model to production format with equivalence check + latency benchmark | (inline) |
|
|
72
|
+
| `/turing:queue <action>` | Batch experiment scheduler: add, list, run, pause, clear | (inline) |
|
|
73
|
+
| `/turing:retry <exp-id>` | Smart failure recovery: auto-diagnose crash, apply fix, re-run | (inline) |
|
|
74
|
+
| `/turing:fork <exp-id> --branches` | Experiment branching: run parallel tracks, report winner | (inline) |
|
|
69
75
|
| `/turing:profile [exp-id]` | Computational profiling: timing, memory, throughput, bottleneck detection | (inline) |
|
|
70
76
|
| `/turing:checkpoint <action>` | Smart checkpoint management: list, prune (Pareto), average, resume, stats | (inline) |
|
|
71
77
|
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Configurable failure taxonomy for /turing:retry
|
|
2
|
+
# Add project-specific patterns to extend the built-in taxonomy.
|
|
3
|
+
# Each failure mode has:
|
|
4
|
+
# patterns: list of strings to match in stdout/stderr (case-insensitive)
|
|
5
|
+
# fix: human-readable description of the fix
|
|
6
|
+
# config_changes: dict of hyperparameter changes (//N = divide, *N = multiply)
|
|
7
|
+
# severity: recoverable | requires_intervention
|
|
8
|
+
|
|
9
|
+
oom:
|
|
10
|
+
patterns:
|
|
11
|
+
- "CUDA out of memory"
|
|
12
|
+
- "MemoryError"
|
|
13
|
+
- "RuntimeError: out of memory"
|
|
14
|
+
- "std::bad_alloc"
|
|
15
|
+
- "OutOfMemoryError"
|
|
16
|
+
fix: "Reduce batch_size by 50%"
|
|
17
|
+
config_changes:
|
|
18
|
+
batch_size: "//2"
|
|
19
|
+
severity: recoverable
|
|
20
|
+
|
|
21
|
+
nan_loss:
|
|
22
|
+
patterns:
|
|
23
|
+
- "loss is NaN"
|
|
24
|
+
- "loss is nan"
|
|
25
|
+
- "RuntimeWarning: invalid value"
|
|
26
|
+
- "loss: nan"
|
|
27
|
+
- "NaN loss"
|
|
28
|
+
fix: "Add gradient clipping at 1.0, reduce learning_rate by 10x"
|
|
29
|
+
config_changes:
|
|
30
|
+
gradient_clip: 1.0
|
|
31
|
+
learning_rate: "//10"
|
|
32
|
+
severity: recoverable
|
|
33
|
+
|
|
34
|
+
timeout:
|
|
35
|
+
patterns:
|
|
36
|
+
- "TimeoutError"
|
|
37
|
+
- "exceeded time limit"
|
|
38
|
+
- "timed out"
|
|
39
|
+
- "TimeoutExpired"
|
|
40
|
+
fix: "Double max_epochs or training timeout"
|
|
41
|
+
config_changes:
|
|
42
|
+
max_epochs: "*2"
|
|
43
|
+
severity: recoverable
|
|
44
|
+
|
|
45
|
+
import_error:
|
|
46
|
+
patterns:
|
|
47
|
+
- "ModuleNotFoundError"
|
|
48
|
+
- "ImportError"
|
|
49
|
+
- "No module named"
|
|
50
|
+
fix: "Install missing dependency"
|
|
51
|
+
config_changes: {}
|
|
52
|
+
severity: requires_intervention
|
|
53
|
+
|
|
54
|
+
convergence_failure:
|
|
55
|
+
patterns:
|
|
56
|
+
- "loss did not decrease"
|
|
57
|
+
- "no improvement"
|
|
58
|
+
- "early stopping"
|
|
59
|
+
- "convergence warning"
|
|
60
|
+
fix: "Increase learning_rate by 3x for warm-up"
|
|
61
|
+
config_changes:
|
|
62
|
+
learning_rate: "*3"
|
|
63
|
+
severity: recoverable
|
|
64
|
+
|
|
65
|
+
data_error:
|
|
66
|
+
patterns:
|
|
67
|
+
- "FileNotFoundError"
|
|
68
|
+
- "No such file"
|
|
69
|
+
- "empty dataset"
|
|
70
|
+
- "zero samples"
|
|
71
|
+
- "KeyError"
|
|
72
|
+
fix: "Check data path and preprocessing"
|
|
73
|
+
config_changes: {}
|
|
74
|
+
severity: requires_intervention
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-turing",
|
|
3
|
-
"version": "2.1
|
|
3
|
+
"version": "2.2.1",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
|
|
6
6
|
"bin": {
|
package/src/install.js
CHANGED
|
@@ -24,7 +24,7 @@ const SUB_COMMANDS = [
|
|
|
24
24
|
"try", "brief", "suggest", "explore", "design", "logbook", "poster",
|
|
25
25
|
"report", "mode", "preflight", "card", "seed", "reproduce",
|
|
26
26
|
"diagnose", "ablate", "frontier", "profile", "checkpoint", "export",
|
|
27
|
-
"lit", "paper",
|
|
27
|
+
"lit", "paper", "queue", "retry", "fork",
|
|
28
28
|
];
|
|
29
29
|
|
|
30
30
|
export async function install(opts = {}) {
|
|
@@ -78,6 +78,7 @@ export async function install(opts = {}) {
|
|
|
78
78
|
"defaults.yaml", "lifecycle.toml", "taxonomy.toml",
|
|
79
79
|
"experiment_archetypes.yaml", "novelty_aliases.yaml",
|
|
80
80
|
"relationships.toml", "state.toml", "task_taxonomy.yaml",
|
|
81
|
+
"failure_modes.yaml",
|
|
81
82
|
];
|
|
82
83
|
for (const file of CONFIG_FILES) {
|
|
83
84
|
await copyFile(
|
package/src/verify.js
CHANGED
|
@@ -41,6 +41,9 @@ const EXPECTED_COMMANDS = [
|
|
|
41
41
|
"export/SKILL.md",
|
|
42
42
|
"lit/SKILL.md",
|
|
43
43
|
"paper/SKILL.md",
|
|
44
|
+
"queue/SKILL.md",
|
|
45
|
+
"retry/SKILL.md",
|
|
46
|
+
"fork/SKILL.md",
|
|
44
47
|
];
|
|
45
48
|
|
|
46
49
|
const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];
|
|
@@ -49,6 +52,7 @@ const EXPECTED_CONFIG = [
|
|
|
49
52
|
"defaults.yaml", "lifecycle.toml", "taxonomy.toml",
|
|
50
53
|
"experiment_archetypes.yaml", "novelty_aliases.yaml",
|
|
51
54
|
"relationships.toml", "state.toml", "task_taxonomy.yaml",
|
|
55
|
+
"failure_modes.yaml",
|
|
52
56
|
];
|
|
53
57
|
|
|
54
58
|
async function fileExists(path) {
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|