claude-turing 1.5.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +5 -2
- package/commands/export.md +48 -0
- package/commands/lit.md +47 -0
- package/commands/paper.md +44 -0
- package/commands/turing.md +6 -0
- package/package.json +1 -1
- package/src/install.js +2 -1
- package/src/verify.js +3 -0
- package/templates/scripts/__pycache__/draft_paper_sections.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/equivalence_checker.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/export_card.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/export_formats.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/latency_benchmark.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/literature_search.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/draft_paper_sections.py +498 -0
- package/templates/scripts/equivalence_checker.py +158 -0
- package/templates/scripts/export_card.py +183 -0
- package/templates/scripts/export_formats.py +385 -0
- package/templates/scripts/export_model.py +324 -0
- package/templates/scripts/latency_benchmark.py +167 -0
- package/templates/scripts/literature_search.py +421 -0
- package/templates/scripts/scaffold.py +10 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "turing",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "Autonomous ML research harness — the autoresearch loop as a formal protocol.
|
|
3
|
+
"version": "2.1.0",
|
|
4
|
+
"description": "Autonomous ML research harness — the autoresearch loop as a formal protocol. 27 commands, 2 specialized agents, literature integration + paper section drafting, production model export (6 formats, equivalence, latency), performance profiling, smart Pareto-based checkpoint management, experiment intelligence (error analysis, ablation, Pareto frontiers), statistical rigor (seed studies, reproducibility), tree-search hypothesis exploration (TreeQuest AB-MCTS), cost-performance frontier, model cards, model registry, hypothesis database with novelty guard, anti-cheating guardrails, and the taste-leverage loop. Inspired by Karpathy's autoresearch and the scientific method itself.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "pragnition"
|
|
7
7
|
},
|
package/README.md
CHANGED
|
@@ -330,6 +330,9 @@ The index (`hypotheses.yaml`) is the lightweight queue. The detail files (`hypot
|
|
|
330
330
|
| `/turing:frontier [--metrics]` | Pareto frontier — multi-objective tradeoff visualization |
|
|
331
331
|
| `/turing:profile [exp-id]` | Computational profiling — timing, memory, throughput, bottleneck detection |
|
|
332
332
|
| `/turing:checkpoint <action>` | Smart checkpoint management — list, prune (Pareto), average, resume, stats |
|
|
333
|
+
| `/turing:lit <query>` | Literature search — papers, SOTA baselines, related work |
|
|
334
|
+
| `/turing:paper [--sections] [--format]` | Draft paper sections from experiment logs (setup, results, ablation, hyperparams) |
|
|
335
|
+
| `/turing:export [--format]` | Export model to production format with equivalence check + latency benchmark |
|
|
333
336
|
| `/turing:card` | Generate a model card — performance, limitations, intended use, artifact contract |
|
|
334
337
|
| `/turing:logbook` | Generate HTML experiment logbook |
|
|
335
338
|
| `/turing:report` | Generate research report |
|
|
@@ -519,11 +522,11 @@ Each project gets independent config, data, experiments, models, and agent memor
|
|
|
519
522
|
|
|
520
523
|
## Architecture of Turing Itself
|
|
521
524
|
|
|
522
|
-
|
|
525
|
+
27 commands, 2 agents, 8 config files, 46 template scripts, model registry, artifact contract, cost-performance frontier, model cards, tree-search exploration, statistical rigor, experiment intelligence, performance profiling, smart checkpoints, production model export, literature integration, paper section drafting, 664 tests, 16 ADRs. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full codemap.
|
|
523
526
|
|
|
524
527
|
```
|
|
525
528
|
turing/
|
|
526
|
-
├── commands/
|
|
529
|
+
├── commands/ 26 skill files (core + taste-leverage + reporting + exploration + statistical rigor + experiment intelligence + performance + deployment + research workflow)
|
|
527
530
|
├── agents/ 2 agents (researcher: read/write, evaluator: read-only)
|
|
528
531
|
├── config/ 8 files (lifecycle, taxonomy, archetypes, novelty aliases)
|
|
529
532
|
├── templates/ Scaffolded into user projects by /turing:init
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: export
|
|
3
|
+
description: Export model to production format with equivalence verification, latency benchmarking, and deployment model card.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[exp-id] [--format joblib|xgboost_json|onnx|torchscript|tflite]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Export a trained model to a production-ready format.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:**
|
|
14
|
+
```bash
|
|
15
|
+
source .venv/bin/activate
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
2. **Parse arguments from `$ARGUMENTS`:**
|
|
19
|
+
- First argument can be an experiment ID (e.g., `exp-042`); defaults to best
|
|
20
|
+
- `--format joblib|xgboost_json|onnx|torchscript|tflite` specifies export format (auto-detected if omitted)
|
|
21
|
+
- `--skip-equivalence` skips inference equivalence check
|
|
22
|
+
- `--skip-latency` skips latency benchmark
|
|
23
|
+
- `--samples 100` sets test sample count
|
|
24
|
+
|
|
25
|
+
3. **Run export pipeline:**
|
|
26
|
+
```bash
|
|
27
|
+
python scripts/export_model.py $ARGUMENTS
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
4. **Report results:**
|
|
31
|
+
- **Export:** format, file size, output path, dependencies
|
|
32
|
+
- **Equivalence:** verdict (equivalent/approximately_equivalent/divergent), max delta
|
|
33
|
+
- **Latency:** p50/p95/p99 ms, speedup vs original
|
|
34
|
+
- **Model Card:** metrics, seed study, equivalence, latency, dependencies
|
|
35
|
+
|
|
36
|
+
5. **Output:** exported model + model_card.yaml written to `exports/exp-NNN/`
|
|
37
|
+
|
|
38
|
+
6. **If model file not found:** suggest checking models/best/ directory.
|
|
39
|
+
|
|
40
|
+
## Examples
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
/turing:export # Best experiment, default format
|
|
44
|
+
/turing:export exp-042 # Specific experiment
|
|
45
|
+
/turing:export --format xgboost_json # Native XGBoost JSON
|
|
46
|
+
/turing:export --format onnx # ONNX format
|
|
47
|
+
/turing:export --skip-equivalence --skip-latency # Fast export
|
|
48
|
+
```
|
package/commands/lit.md
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: lit
|
|
3
|
+
description: Literature search scoped to the current experiment domain — find papers, SOTA baselines, and related work without leaving the terminal.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "<query> | --baseline | --related <exp-id>"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob, WebSearch
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Search the literature for papers, baselines, and related work.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:**
|
|
14
|
+
```bash
|
|
15
|
+
source .venv/bin/activate
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
2. **Parse arguments from `$ARGUMENTS`:**
|
|
19
|
+
- **Free query:** `"gradient boosting for tabular data"` — searches Semantic Scholar
|
|
20
|
+
- **Baseline:** `--baseline` — finds SOTA results for the current task, compares against your best
|
|
21
|
+
- **Related:** `--related exp-042` — finds papers using similar methods to a specific experiment
|
|
22
|
+
- `--auto-queue` — auto-queues hypotheses from literature with `source: "literature"`
|
|
23
|
+
- `--limit 10` — max number of results
|
|
24
|
+
|
|
25
|
+
3. **Run literature search:**
|
|
26
|
+
```bash
|
|
27
|
+
python scripts/literature_search.py $ARGUMENTS
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
4. **Report results:**
|
|
31
|
+
- **Papers:** title, authors, year, venue, citations, abstract snippet, URL
|
|
32
|
+
- **Baseline mode:** SOTA comparison with gap analysis against current best
|
|
33
|
+
- **Related mode:** methodological differences worth investigating
|
|
34
|
+
- **Hypotheses:** if `--auto-queue`, shows queued experiments from findings
|
|
35
|
+
|
|
36
|
+
5. **Saved output:** results written to `experiments/literature/query-YYYY-MM-DD-HHMMSS.md`
|
|
37
|
+
|
|
38
|
+
6. **If API unavailable:** reports error and suggests manual search.
|
|
39
|
+
|
|
40
|
+
## Examples
|
|
41
|
+
|
|
42
|
+
```
|
|
43
|
+
/turing:lit "gradient boosting missing values" # Free query
|
|
44
|
+
/turing:lit --baseline # SOTA comparison
|
|
45
|
+
/turing:lit --related exp-042 # Related work
|
|
46
|
+
/turing:lit --auto-queue "ensemble methods" # Queue hypotheses
|
|
47
|
+
```
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: paper
|
|
3
|
+
description: Draft mechanical paper sections (setup, results, ablation, hyperparameters) from experiment logs. LaTeX and markdown output.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[--sections setup,results,ablation] [--format latex|markdown]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Draft paper sections directly from experiment data.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
|
|
13
|
+
1. **Activate environment:**
|
|
14
|
+
```bash
|
|
15
|
+
source .venv/bin/activate
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
2. **Parse arguments from `$ARGUMENTS`:**
|
|
19
|
+
- `--sections setup,results,ablation,hyperparameters` — which sections to draft (default: all)
|
|
20
|
+
- `--format latex|markdown` — output format (default: latex)
|
|
21
|
+
|
|
22
|
+
3. **Run paper drafting:**
|
|
23
|
+
```bash
|
|
24
|
+
python scripts/draft_paper_sections.py $ARGUMENTS
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
4. **Report results:**
|
|
28
|
+
- **setup:** Experimental setup prose (dataset, metrics, split, seed methodology)
|
|
29
|
+
- **results:** Comparison table with all model types, best bolded, seed study stats
|
|
30
|
+
- **ablation:** Ablation table from `/turing:ablate` results
|
|
31
|
+
- **hyperparameters:** Appendix-style parameter table per model
|
|
32
|
+
|
|
33
|
+
5. **Output:** Each section saved to `paper/sections/` as `.tex` or `.md`
|
|
34
|
+
|
|
35
|
+
6. **Numbers are pulled directly from experiment logs** — no manual transcription needed.
|
|
36
|
+
|
|
37
|
+
## Examples
|
|
38
|
+
|
|
39
|
+
```
|
|
40
|
+
/turing:paper # All sections, LaTeX
|
|
41
|
+
/turing:paper --format markdown # All sections, markdown
|
|
42
|
+
/turing:paper --sections setup,results # Just setup + results
|
|
43
|
+
/turing:paper --sections ablation --format latex # Just ablation table
|
|
44
|
+
```
|
package/commands/turing.md
CHANGED
|
@@ -31,6 +31,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
31
31
|
| "diagnose", "error analysis", "failure modes", "where does it fail", "confusion matrix" | `/turing:diagnose` | Analyze |
|
|
32
32
|
| "ablate", "ablation", "remove component", "which features matter", "component impact" | `/turing:ablate` | Analyze |
|
|
33
33
|
| "frontier", "pareto", "tradeoff", "tradeoffs", "multi-objective", "which model is best" | `/turing:frontier` | Analyze |
|
|
34
|
+
| "lit", "literature", "papers", "SOTA", "baseline", "related work", "citations" | `/turing:lit` | Research |
|
|
35
|
+
| "paper", "draft paper", "write paper", "results table", "latex", "experimental setup" | `/turing:paper` | Document |
|
|
36
|
+
| "export", "deploy", "production", "onnx", "torchscript", "tflite", "ship model" | `/turing:export` | Deploy |
|
|
34
37
|
| "profile", "profiling", "bottleneck", "slow training", "why is it slow", "timing" | `/turing:profile` | Check |
|
|
35
38
|
| "checkpoint", "checkpoints", "prune checkpoints", "disk space", "resume training" | `/turing:checkpoint` | Check |
|
|
36
39
|
|
|
@@ -60,6 +63,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
60
63
|
| `/turing:diagnose [exp-id]` | Error analysis: failure modes, confused pairs, feature-range bias | (inline) |
|
|
61
64
|
| `/turing:ablate [--components]` | Ablation study: remove components, measure impact, flag dead weight | (inline) |
|
|
62
65
|
| `/turing:frontier [--metrics]` | Pareto frontier: multi-objective tradeoff visualization | (inline) |
|
|
66
|
+
| `/turing:lit <query>` | Literature search: papers, SOTA baselines, related work | (inline, uses WebSearch) |
|
|
67
|
+
| `/turing:paper [--sections] [--format]` | Draft paper sections from experiment logs (setup, results, ablation, hyperparams) | (inline) |
|
|
68
|
+
| `/turing:export [exp-id] [--format]` | Export model to production format with equivalence check + latency benchmark | (inline) |
|
|
63
69
|
| `/turing:profile [exp-id]` | Computational profiling: timing, memory, throughput, bottleneck detection | (inline) |
|
|
64
70
|
| `/turing:checkpoint <action>` | Smart checkpoint management: list, prune (Pareto), average, resume, stats | (inline) |
|
|
65
71
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-turing",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
|
|
6
6
|
"bin": {
|
package/src/install.js
CHANGED
|
@@ -23,7 +23,8 @@ const SUB_COMMANDS = [
|
|
|
23
23
|
"init", "train", "status", "compare", "sweep", "validate",
|
|
24
24
|
"try", "brief", "suggest", "explore", "design", "logbook", "poster",
|
|
25
25
|
"report", "mode", "preflight", "card", "seed", "reproduce",
|
|
26
|
-
"diagnose", "ablate", "frontier", "profile", "checkpoint",
|
|
26
|
+
"diagnose", "ablate", "frontier", "profile", "checkpoint", "export",
|
|
27
|
+
"lit", "paper",
|
|
27
28
|
];
|
|
28
29
|
|
|
29
30
|
export async function install(opts = {}) {
|
package/src/verify.js
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|