claude-turing 3.3.0 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +13 -2
- package/commands/annotate.md +23 -0
- package/commands/archive.md +23 -0
- package/commands/cite.md +23 -0
- package/commands/flashback.md +22 -0
- package/commands/merge.md +24 -0
- package/commands/present.md +23 -0
- package/commands/prune.md +26 -0
- package/commands/quantize.md +24 -0
- package/commands/replay.md +23 -0
- package/commands/search.md +22 -0
- package/commands/surgery.md +27 -0
- package/commands/template.md +22 -0
- package/commands/trend.md +21 -0
- package/commands/turing.md +22 -0
- package/package.json +1 -1
- package/src/install.js +2 -0
- package/src/verify.js +11 -0
- package/templates/scripts/__pycache__/architecture_surgery.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_annotations.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_archive.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_replay.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_search.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_templates.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_merger.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_pruning.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/model_quantization.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/session_flashback.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/trend_analysis.cpython-314.pyc +0 -0
- package/templates/scripts/architecture_surgery.py +238 -0
- package/templates/scripts/citation_manager.py +436 -0
- package/templates/scripts/experiment_annotations.py +392 -0
- package/templates/scripts/experiment_archive.py +534 -0
- package/templates/scripts/experiment_replay.py +592 -0
- package/templates/scripts/experiment_search.py +451 -0
- package/templates/scripts/experiment_templates.py +501 -0
- package/templates/scripts/generate_changelog.py +464 -0
- package/templates/scripts/generate_figures.py +597 -0
- package/templates/scripts/model_merger.py +277 -0
- package/templates/scripts/model_pruning.py +182 -0
- package/templates/scripts/model_quantization.py +177 -0
- package/templates/scripts/scaffold.py +20 -0
- package/templates/scripts/session_flashback.py +461 -0
- package/templates/scripts/trend_analysis.py +503 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "turing",
|
|
3
|
-
"version": "3.
|
|
4
|
-
"description": "Autonomous ML research harness — the autoresearch loop as a formal protocol.
|
|
3
|
+
"version": "3.5.0",
|
|
4
|
+
"description": "Autonomous ML research harness — the autoresearch loop as a formal protocol. 60 commands, 2 specialized agents, experiment archaeology (trend + flashback + archive + annotate + search + template + replay), model surgery (prune + quantize + merge + surgery), feature & training intelligence, model debugging, pre-training intelligence, meta-intelligence, scaling & efficiency, model composition, deep analysis, experiment orchestration, literature + paper, model export, profiling, checkpoints, experiment intelligence, statistical rigor, tree-search, cost-performance, model cards, hypothesis database, novelty guard, anti-cheating, taste-leverage loop. Inspired by Karpathy's autoresearch and the scientific method itself.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "pragnition"
|
|
7
7
|
},
|
package/README.md
CHANGED
|
@@ -360,6 +360,17 @@ The index (`hypotheses.yaml`) is the lightweight queue. The detail files (`hypot
|
|
|
360
360
|
| `/turing:calibrate [exp-id]` | Probability calibration — ECE/MCE, reliability diagrams, Platt/isotonic/temperature scaling |
|
|
361
361
|
| `/turing:feature [--method]` | Automated feature selection — multi-method consensus ranking, redundancy, interactions |
|
|
362
362
|
| `/turing:curriculum [exp-id]` | Training curriculum optimization — difficulty scoring, strategy comparison, mislabeled sample detection |
|
|
363
|
+
| `/turing:prune <exp-id>` | Weight pruning — magnitude/structured/lottery, sparsity sweep, knee point detection |
|
|
364
|
+
| `/turing:quantize <exp-id>` | Post-training quantization — FP16/INT8, accuracy-latency comparison |
|
|
365
|
+
| `/turing:merge <exp-ids...>` | Model merging — uniform/greedy soup, TIES, DARE, zero latency cost |
|
|
366
|
+
| `/turing:surgery <exp-id>` | Architecture modification — add/remove layer, widen/narrow, swap activation |
|
|
367
|
+
| `/turing:trend` | Long-term trend analysis — improvement velocity, family ROI, diminishing returns |
|
|
368
|
+
| `/turing:flashback` | Session context restoration — "where was I?" after days away |
|
|
369
|
+
| `/turing:archive` | Experiment lifecycle cleanup — compress old artifacts, summary index |
|
|
370
|
+
| `/turing:annotate <exp-id>` | Retrospective annotations — human notes and tags on experiments |
|
|
371
|
+
| `/turing:search <query>` | Natural language experiment search — text + structured filters |
|
|
372
|
+
| `/turing:template <action>` | Experiment template library — save/list/apply reusable configs |
|
|
373
|
+
| `/turing:replay <exp-id>` | Experiment replay — re-run old approach with current infrastructure |
|
|
363
374
|
|
|
364
375
|
And for fully hands-off operation:
|
|
365
376
|
|
|
@@ -544,11 +555,11 @@ Each project gets independent config, data, experiments, models, and agent memor
|
|
|
544
555
|
|
|
545
556
|
## Architecture of Turing Itself
|
|
546
557
|
|
|
547
|
-
|
|
558
|
+
60 commands, 2 agents, 10 config files, 79 template scripts, model registry, artifact contract, cost-performance frontier, model cards, tree-search exploration, statistical rigor, experiment intelligence, performance profiling, smart checkpoints, production model export, literature integration, paper section drafting, experiment orchestration (queue + retry + fork), deep analysis (diff + watch + regress), model composition (ensemble + stitch + warm), scaling & efficiency (scale + budget + distill), meta-intelligence (transfer + audit), pre-training intelligence (sanity + baseline + leak), model debugging (xray + sensitivity + calibrate), feature & training intelligence (feature + curriculum), model surgery (prune + quantize + merge + surgery), experiment archaeology (trend + flashback + archive + annotate + search + template + replay), 16 ADRs. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full codemap.
|
|
548
559
|
|
|
549
560
|
```
|
|
550
561
|
turing/
|
|
551
|
-
├── commands/
|
|
562
|
+
├── commands/ 59 skill files (core + taste-leverage + reporting + exploration + statistical rigor + experiment intelligence + performance + deployment + research workflow + orchestration + deep analysis + model composition + scaling & efficiency + meta-intelligence + pre-training intelligence + model debugging + feature & training intelligence + model surgery + experiment archaeology)
|
|
552
563
|
├── agents/ 2 agents (researcher: read/write, evaluator: read-only)
|
|
553
564
|
├── config/ 8 files (lifecycle, taxonomy, archetypes, novelty aliases)
|
|
554
565
|
├── templates/ Scaffolded into user projects by /turing:init
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: annotate
|
|
3
|
+
description: Retrospective experiment annotations — add human notes, tags, and context that automated metrics can't capture.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-id> \"note\" [--tag fragile] | --list | --search \"keyword\""
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Add context that experiment logs can't capture. "This only worked because the data was pre-sorted."
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
13
|
+
2. **Run:** `python scripts/experiment_annotations.py $ARGUMENTS`
|
|
14
|
+
3. **Operations:** add (text + tags), list (per-experiment or all), search (keyword or tag)
|
|
15
|
+
4. **Stored in:** `experiments/annotations.yaml`
|
|
16
|
+
|
|
17
|
+
## Examples
|
|
18
|
+
```
|
|
19
|
+
/turing:annotate exp-042 "Fragile — only works with specific preprocessing"
|
|
20
|
+
/turing:annotate exp-042 "Reviewer 2 requested this" --tag reviewer-requested
|
|
21
|
+
/turing:annotate --list
|
|
22
|
+
/turing:annotate --search "fragile"
|
|
23
|
+
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: archive
|
|
3
|
+
description: Experiment lifecycle cleanup — compress old artifacts, prune checkpoints, create queryable summary index. Reclaim disk space.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[--older-than 30d] [--keep-best 10] [--dry-run]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Keep your project directory manageable after 200+ experiments.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
13
|
+
2. **Run:** `python scripts/experiment_archive.py $ARGUMENTS`
|
|
14
|
+
3. **Protected experiments:** Pareto-optimal, current best, recent, top-N by metric
|
|
15
|
+
4. **Report:** archived count, preserved count, space reclaimed
|
|
16
|
+
5. **Saved output:** `experiments/archive/index.yaml`
|
|
17
|
+
|
|
18
|
+
## Examples
|
|
19
|
+
```
|
|
20
|
+
/turing:archive --dry-run # Preview what would be archived
|
|
21
|
+
/turing:archive --older-than 30 --keep-best 10 # Archive old, keep top 10
|
|
22
|
+
/turing:archive # Default: 30 days, keep 10
|
|
23
|
+
```
|
package/commands/cite.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: cite
|
|
3
|
+
description: Citation & attribution manager — track papers, datasets, methods. Audit for missing citations, generate BibTeX.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<add|list|check|bib> [--key Chen2016 --title XGBoost --url ...]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Track which papers and methods influenced each experiment. Catch missing citations before submission.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
13
|
+
2. **Run:** `python scripts/citation_manager.py $ARGUMENTS`
|
|
14
|
+
3. **Operations:** add (associate citation with experiment), list (group by type), check (audit missing), bib (BibTeX)
|
|
15
|
+
4. **Stored in:** `experiments/citations.yaml`
|
|
16
|
+
|
|
17
|
+
## Examples
|
|
18
|
+
```
|
|
19
|
+
/turing:cite add exp-042 --key Chen2016 --title "XGBoost" --type method --url "https://arxiv.org/abs/1603.02754"
|
|
20
|
+
/turing:cite list
|
|
21
|
+
/turing:cite check # Audit for missing citations
|
|
22
|
+
/turing:cite bib # Generate BibTeX
|
|
23
|
+
```
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: flashback
|
|
3
|
+
description: Session context restoration — "where was I?" summary after days away. Current best, pending hypotheses, last session, annotations.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[--days 7] [--last 10]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Come back to a project after a week and start working in 10 seconds instead of 30 minutes.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
13
|
+
2. **Run:** `python scripts/session_flashback.py $ARGUMENTS`
|
|
14
|
+
3. **Report:** current best, last session experiments, pending hypotheses, annotations, budget, suggested next action
|
|
15
|
+
4. **Saved output:** `experiments/flashbacks/flashback-*.yaml`
|
|
16
|
+
|
|
17
|
+
## Examples
|
|
18
|
+
```
|
|
19
|
+
/turing:flashback # Default: last 7 days
|
|
20
|
+
/turing:flashback --days 14 # 2-week lookback
|
|
21
|
+
/turing:flashback --last 5 # Last 5 experiments
|
|
22
|
+
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: merge
|
|
3
|
+
description: Model merging — average weights from multiple checkpoints into a single model (soups, TIES, DARE). Free accuracy, zero latency cost.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-ids...> [--method uniform|greedy|ties|dare]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Combine model weights (not predictions) into a single, better model with no latency overhead.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
14
|
+
2. **Run:** `python scripts/model_merger.py $ARGUMENTS`
|
|
15
|
+
3. **Methods:** uniform soup (simple average), greedy soup (include only if improves), TIES (trim+elect+merge), DARE (drop+rescale)
|
|
16
|
+
4. **Report:** compatibility check, per-model metrics, method comparison, improvement delta
|
|
17
|
+
5. **Saved output:** `experiments/merges/merge-*.yaml`
|
|
18
|
+
|
|
19
|
+
## Examples
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
/turing:merge exp-042 exp-053 exp-067 # All methods
|
|
23
|
+
/turing:merge exp-042 exp-053 --method greedy # Greedy soup only
|
|
24
|
+
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: present
|
|
3
|
+
description: Presentation figure generation — training curves, comparison charts, ablation tables, Pareto plots, sensitivity heatmaps.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[--figures training,comparison] [--style light|dark|poster]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Generate presentation-ready figure specifications from experiment data in seconds.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
13
|
+
2. **Run:** `python scripts/generate_figures.py $ARGUMENTS`
|
|
14
|
+
3. **Figure types:** training, comparison, ablation, pareto, sensitivity
|
|
15
|
+
4. **Styles:** light (papers), dark (demos), poster (large fonts)
|
|
16
|
+
5. **Saved output:** `paper/figures/`
|
|
17
|
+
|
|
18
|
+
## Examples
|
|
19
|
+
```
|
|
20
|
+
/turing:present # All figures
|
|
21
|
+
/turing:present --figures training,comparison # Specific figures
|
|
22
|
+
/turing:present --style dark # Dark theme
|
|
23
|
+
```
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: prune
|
|
3
|
+
description: Weight pruning — measure accuracy at different sparsity levels, find the knee point, produce a smaller/faster model.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-id> [--sparsity 0.5,0.75,0.9] [--method magnitude|structured|lottery]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Remove redundant weights for faster inference and smaller models.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
14
|
+
2. **Run:** `python scripts/model_pruning.py $ARGUMENTS`
|
|
15
|
+
3. **Methods:** magnitude (zero small weights), structured (remove neurons), lottery (iterative with rewind)
|
|
16
|
+
4. **For tree models:** progressively reduces n_estimators
|
|
17
|
+
5. **Report:** sparsity sweep table, knee point, recommended sparsity
|
|
18
|
+
6. **Saved output:** `experiments/pruning/<exp-id>-pruning.yaml`
|
|
19
|
+
|
|
20
|
+
## Examples
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
/turing:prune exp-042 # Default: magnitude, 5 levels
|
|
24
|
+
/turing:prune exp-042 --method structured # Remove entire neurons
|
|
25
|
+
/turing:prune exp-042 --sparsity 0.5,0.75,0.9 # Custom levels
|
|
26
|
+
```
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: quantize
|
|
3
|
+
description: Post-training quantization — FP32→INT8/FP16, measure accuracy loss, 2-4x speedup with <0.5% accuracy loss.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-id> [--precision int8|fp16|dynamic]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Quantize for production. Lowest-effort optimization: 2-4x speedup, 2-4x memory reduction.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
14
|
+
2. **Run:** `python scripts/model_quantization.py $ARGUMENTS`
|
|
15
|
+
3. **Precision levels:** FP32 (baseline), FP16 (GPU), INT8 dynamic (simplest), INT8 static (best accuracy)
|
|
16
|
+
4. **Report:** precision comparison table, recommended level, QAT suggestion if needed
|
|
17
|
+
5. **Saved output:** `experiments/quantization/<exp-id>-quantization.yaml`
|
|
18
|
+
|
|
19
|
+
## Examples
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
/turing:quantize exp-042 # Compare all precision levels
|
|
23
|
+
/turing:quantize exp-042 --precision int8 # INT8 specifically
|
|
24
|
+
```
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: replay
|
|
3
|
+
description: Experiment replay — re-run a historical experiment with current infrastructure to test if old approaches do better now.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-id> [--with-current-data] [--with-current-preprocessing]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Should you revisit old ideas? Infrastructure changes may make failed approaches work now.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
13
|
+
2. **Run:** `python scripts/experiment_replay.py $ARGUMENTS`
|
|
14
|
+
3. **Modes:** default (current code+data), --with-current-data, --with-current-preprocessing
|
|
15
|
+
4. **Report:** original vs replayed metrics, delta, verdict
|
|
16
|
+
5. **Saved output:** `experiments/replays/`
|
|
17
|
+
|
|
18
|
+
## Examples
|
|
19
|
+
```
|
|
20
|
+
/turing:replay exp-023 # Replay with current infrastructure
|
|
21
|
+
/turing:replay exp-023 --with-current-data # Current data, old code
|
|
22
|
+
/turing:replay --list # List replayable experiments
|
|
23
|
+
```
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: search
|
|
3
|
+
description: Natural language experiment search — query with text + structured filters over 200+ experiments.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<query> [--filter \"accuracy>0.85\"] [--limit 10]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Find specific experiments in a large history with natural language and structured filters.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
13
|
+
2. **Run:** `python scripts/experiment_search.py $ARGUMENTS`
|
|
14
|
+
3. **Filters:** `accuracy>0.85`, `status:kept`, `family:baseline`, `date:last-week`
|
|
15
|
+
4. **Report:** ranked table of matching experiments
|
|
16
|
+
|
|
17
|
+
## Examples
|
|
18
|
+
```
|
|
19
|
+
/turing:search "LightGBM high accuracy" --filter "accuracy>0.85"
|
|
20
|
+
/turing:search "failed neural net" --filter "status:discarded"
|
|
21
|
+
/turing:search "last week" --limit 5
|
|
22
|
+
```
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: surgery
|
|
3
|
+
description: Architecture modification — add/remove layers, widen/narrow, swap activations, inject skip connections. Specify what to change, system handles how.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<exp-id> --op <operation> [args...]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Programmatic architecture changes with auto warm-start from existing weights.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
14
|
+
2. **Run:** `python scripts/architecture_surgery.py $ARGUMENTS`
|
|
15
|
+
3. **Operations:** add-layer, remove-layer, widen, narrow, swap-activation, add-skip, add-norm, deepen, swap-objective
|
|
16
|
+
4. **For tree models:** deepen (increase max_depth), widen (more estimators), swap-objective
|
|
17
|
+
5. **Report:** operation details, config changes, parameter count delta, warm-start source
|
|
18
|
+
6. **Saved output:** `experiments/surgery/<exp-id>-<op>.yaml`
|
|
19
|
+
|
|
20
|
+
## Examples
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
/turing:surgery exp-042 --op widen 2 # 2x wider hidden layers
|
|
24
|
+
/turing:surgery exp-042 --op add-layer # Insert a layer
|
|
25
|
+
/turing:surgery exp-042 --op swap-activation relu gelu # ReLU → GELU
|
|
26
|
+
/turing:surgery exp-042 --op deepen # Deeper trees
|
|
27
|
+
```
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: template
|
|
3
|
+
description: Experiment template library — save winning configs as reusable templates, apply to new projects.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<save|list|apply|share> [--name name] [--from exp-id]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Turn your best experiment configs into reusable recipes that persist across projects.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
13
|
+
2. **Run:** `python scripts/experiment_templates.py $ARGUMENTS`
|
|
14
|
+
3. **Operations:** save (from experiment), list (all templates), apply (to current project), share (export)
|
|
15
|
+
4. **Stored at:** `~/.turing/templates/` (cross-project)
|
|
16
|
+
|
|
17
|
+
## Examples
|
|
18
|
+
```
|
|
19
|
+
/turing:template save --from exp-042 --name "tabular-xgboost-v2"
|
|
20
|
+
/turing:template list
|
|
21
|
+
/turing:template apply tabular-xgboost-v2
|
|
22
|
+
```
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: trend
|
|
3
|
+
description: Long-term trend analysis — improvement velocity, family ROI, diminishing returns detection, strategic research direction.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[--window 30d] [--metric accuracy]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
See the arc of your research, not just the latest results. Strategic view over 100+ experiments.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. **Activate environment:** `source .venv/bin/activate`
|
|
13
|
+
2. **Run:** `python scripts/trend_analysis.py $ARGUMENTS`
|
|
14
|
+
3. **Report:** improvement velocity over time windows, family ROI ranking, diminishing returns prediction, phase transitions
|
|
15
|
+
4. **Saved output:** `experiments/trends/trend-*.yaml`
|
|
16
|
+
|
|
17
|
+
## Examples
|
|
18
|
+
```
|
|
19
|
+
/turing:trend # Full trend analysis
|
|
20
|
+
/turing:trend --window 14d # Last 2 weeks
|
|
21
|
+
```
|
package/commands/turing.md
CHANGED
|
@@ -58,6 +58,17 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
58
58
|
| "calibrate", "calibration", "ECE", "reliability diagram", "overconfident", "probability calibration" | `/turing:calibrate` | Analyze |
|
|
59
59
|
| "feature", "features", "feature selection", "feature importance", "which features matter", "redundant features" | `/turing:feature` | Analyze |
|
|
60
60
|
| "curriculum", "training order", "easy to hard", "data ordering", "curriculum learning" | `/turing:curriculum` | Optimize |
|
|
61
|
+
| "prune", "pruning", "sparsity", "remove weights", "smaller model", "weight pruning" | `/turing:prune` | Optimize |
|
|
62
|
+
| "quantize", "quantization", "int8", "fp16", "reduce precision", "faster inference" | `/turing:quantize` | Optimize |
|
|
63
|
+
| "merge", "model soup", "merge weights", "average models", "TIES", "DARE" | `/turing:merge` | Compose |
|
|
64
|
+
| "surgery", "architecture", "add layer", "widen", "modify model", "swap activation" | `/turing:surgery` | Modify |
|
|
65
|
+
| "trend", "trends", "research direction", "improvement rate", "diminishing returns", "what's working" | `/turing:trend` | Analyze |
|
|
66
|
+
| "flashback", "where was I", "context", "resume", "catch up", "what happened" | `/turing:flashback` | Recall |
|
|
67
|
+
| "archive", "cleanup", "compress old", "disk space", "archive experiments" | `/turing:archive` | Manage |
|
|
68
|
+
| "annotate", "note", "tag experiment", "add note", "experiment note" | `/turing:annotate` | Record |
|
|
69
|
+
| "search", "find experiment", "query experiments", "which experiments" | `/turing:search` | Query |
|
|
70
|
+
| "template", "recipe", "save config", "reusable config", "starting point" | `/turing:template` | Manage |
|
|
71
|
+
| "replay", "re-run", "revisit", "retry old", "would it work now" | `/turing:replay` | Validate |
|
|
61
72
|
|
|
62
73
|
## Sub-commands
|
|
63
74
|
|
|
@@ -112,6 +123,17 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
112
123
|
| `/turing:calibrate [exp-id]` | Probability calibration: ECE/MCE, reliability diagrams, Platt/isotonic/temperature scaling | (inline) |
|
|
113
124
|
| `/turing:feature [--method]` | Automated feature selection: multi-method consensus ranking, redundancy, interaction generation | (inline) |
|
|
114
125
|
| `/turing:curriculum [exp-id]` | Training curriculum optimization: difficulty scoring, strategy comparison, impossible sample detection | (inline) |
|
|
126
|
+
| `/turing:prune <exp-id>` | Weight pruning: magnitude/structured/lottery, sparsity sweep, knee point detection | (inline) |
|
|
127
|
+
| `/turing:quantize <exp-id>` | Post-training quantization: FP16/INT8, accuracy-latency comparison, QAT suggestion | (inline) |
|
|
128
|
+
| `/turing:merge <exp-ids...>` | Model merging: uniform/greedy soup, TIES, DARE — free accuracy, zero latency cost | (inline) |
|
|
129
|
+
| `/turing:surgery <exp-id>` | Architecture modification: add/remove layer, widen/narrow, swap activation, skip connections | (inline) |
|
|
130
|
+
| `/turing:trend` | Long-term trend analysis: improvement velocity, family ROI, diminishing returns detection | (inline) |
|
|
131
|
+
| `/turing:flashback` | Session context restoration: "where was I?" after days away from the project | (inline) |
|
|
132
|
+
| `/turing:archive` | Experiment lifecycle cleanup: compress old artifacts, prune checkpoints, summary index | (inline) |
|
|
133
|
+
| `/turing:annotate <exp-id>` | Retrospective annotations: add human notes, tags, search by content | (inline) |
|
|
134
|
+
| `/turing:search <query>` | Natural language experiment search with structured filters | (inline) |
|
|
135
|
+
| `/turing:template <action>` | Experiment template library: save/list/apply reusable configs across projects | (inline) |
|
|
136
|
+
| `/turing:replay <exp-id>` | Experiment replay: re-run old experiment with current infrastructure | (inline) |
|
|
115
137
|
|
|
116
138
|
## Proactive Detection
|
|
117
139
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-turing",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.5.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
|
|
6
6
|
"bin": {
|
package/src/install.js
CHANGED
|
@@ -32,6 +32,8 @@ const SUB_COMMANDS = [
|
|
|
32
32
|
"sanity", "baseline", "leak",
|
|
33
33
|
"xray", "sensitivity", "calibrate",
|
|
34
34
|
"feature", "curriculum",
|
|
35
|
+
"prune", "quantize", "merge", "surgery",
|
|
36
|
+
"trend", "flashback", "archive", "annotate", "search", "template", "replay",
|
|
35
37
|
];
|
|
36
38
|
|
|
37
39
|
export async function install(opts = {}) {
|
package/src/verify.js
CHANGED
|
@@ -63,6 +63,17 @@ const EXPECTED_COMMANDS = [
|
|
|
63
63
|
"calibrate/SKILL.md",
|
|
64
64
|
"feature/SKILL.md",
|
|
65
65
|
"curriculum/SKILL.md",
|
|
66
|
+
"prune/SKILL.md",
|
|
67
|
+
"quantize/SKILL.md",
|
|
68
|
+
"merge/SKILL.md",
|
|
69
|
+
"surgery/SKILL.md",
|
|
70
|
+
"trend/SKILL.md",
|
|
71
|
+
"flashback/SKILL.md",
|
|
72
|
+
"archive/SKILL.md",
|
|
73
|
+
"annotate/SKILL.md",
|
|
74
|
+
"search/SKILL.md",
|
|
75
|
+
"template/SKILL.md",
|
|
76
|
+
"replay/SKILL.md",
|
|
66
77
|
];
|
|
67
78
|
|
|
68
79
|
const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|