claude-turing 1.0.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +66 -3
  3. package/commands/card.md +36 -0
  4. package/commands/explore.md +107 -0
  5. package/commands/suggest.md +68 -4
  6. package/commands/turing.md +4 -0
  7. package/package.json +1 -1
  8. package/src/claude-md.js +1 -0
  9. package/src/install.js +2 -2
  10. package/src/verify.js +2 -0
  11. package/templates/requirements.txt +4 -0
  12. package/templates/scripts/__pycache__/cost_frontier.cpython-314.pyc +0 -0
  13. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  14. package/templates/scripts/__pycache__/generate_model_card.cpython-314.pyc +0 -0
  15. package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
  16. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  17. package/templates/scripts/__pycache__/treequest_suggest.cpython-314.pyc +0 -0
  18. package/templates/scripts/cleanup.py +599 -0
  19. package/templates/scripts/cost_frontier.py +292 -0
  20. package/templates/scripts/diff_configs.py +534 -0
  21. package/templates/scripts/export_results.py +457 -0
  22. package/templates/scripts/generate_brief.py +58 -3
  23. package/templates/scripts/generate_model_card.py +342 -0
  24. package/templates/scripts/leaderboard.py +508 -0
  25. package/templates/scripts/manage_hypotheses.py +2 -2
  26. package/templates/scripts/plot_trajectory.py +611 -0
  27. package/templates/scripts/scaffold.py +8 -0
  28. package/templates/scripts/show_metrics.py +23 -2
  29. package/templates/scripts/treequest_suggest.py +520 -0
  30. package/templates/tests/__pycache__/__init__.cpython-314.pyc +0 -0
  31. package/templates/tests/__pycache__/conftest.cpython-314-pytest-9.0.2.pyc +0 -0
  32. package/templates/tests/__pycache__/test_cost_frontier.cpython-314-pytest-9.0.2.pyc +0 -0
  33. package/templates/tests/test_cost_frontier.py +222 -0
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "turing",
3
- "version": "1.0.1",
4
- "description": "Autonomous ML research harness — the autoresearch loop as a formal protocol. 14 commands, 2 specialized agents, structured experiment lifecycle with convergence detection, immutable evaluation infrastructure, novelty guard, decision synthesis, hypothesis database, and safety guardrails that separate the hypothesis space from the measurement apparatus. Inspired by Karpathy's autoresearch and the scientific method itself.",
3
+ "version": "1.2.0",
4
+ "description": "Autonomous ML research harness — the autoresearch loop as a formal protocol. 17 commands, 2 specialized agents, tree-search hypothesis exploration (TreeQuest AB-MCTS), cost-performance frontier analysis, model cards, model registry, hypothesis database with novelty guard, anti-cheating guardrails, and the taste-leverage loop. Inspired by Karpathy's autoresearch and the scientific method itself.",
5
5
  "author": {
6
6
  "name": "pragnition"
7
7
  },
package/README.md CHANGED
@@ -313,6 +313,8 @@ The index (`hypotheses.yaml`) is the lightweight queue. The detail files (`hypot
313
313
  | `/turing:try <hypothesis>` | Inject a hypothesis — free text or `archetype:model_comparison` |
314
314
  | `/turing:brief [--deep]` | Research briefing — campaign summary, failure patterns, literature-grounded suggestions |
315
315
  | `/turing:suggest` | Literature-grounded model architecture suggestions with citations |
316
+ | `/turing:suggest --strategy treequest` | Tree-search hypothesis exploration (alias for `/turing:explore`) |
317
+ | `/turing:explore` | AB-MCTS tree search over critique-scored hypothesis space |
316
318
  | `/turing:design <hyp-id>` | Generate structured experiment design from a hypothesis |
317
319
  | `/turing:mode <explore\|exploit\|replicate>` | Set research strategy — drives novelty guard policy |
318
320
 
@@ -321,6 +323,7 @@ The index (`hypotheses.yaml`) is the lightweight queue. The detail files (`hypot
321
323
  | Command | What it does |
322
324
  |---------|-------------|
323
325
  | `/turing:validate [--auto]` | Check metric stability — auto-configure multi-run if noisy |
326
+ | `/turing:card` | Generate a model card — performance, limitations, intended use, artifact contract |
324
327
  | `/turing:logbook` | Generate HTML experiment logbook |
325
328
  | `/turing:report` | Generate research report |
326
329
  | `/turing:poster` | Generate research poster |
@@ -389,6 +392,65 @@ After N experiments with no meaningful improvement, the agent stops and reports
389
392
 
390
393
  For noisy metrics, `/turing:validate` runs the pipeline multiple times and measures variance. If the coefficient of variation exceeds 5%, it auto-configures multi-run evaluation so the agent can't be rewarded for lucky single runs.
391
394
 
395
+ ## Tree-Search Hypothesis Exploration
396
+
397
+ > *"The learned coin-flipper weaves through the quadrillion-coin room with a preternatural air."*
398
+
399
+ Sometimes the best experiment to try next isn't obvious from the literature or the agent's memory. `/turing:explore` uses [TreeQuest](https://github.com/SakanaAI/treequest)'s AB-MCTS (Adaptive Branching Monte Carlo Tree Search) to search the space of experiment *ideas* as a tree, scored by the critique engine (novelty x feasibility x impact).
400
+
401
+ ```
402
+ /turing:explore # Run MCTS over hypothesis space
403
+ /turing:explore --strategy greedy # Greedy fallback (no TreeQuest needed)
404
+ /turing:explore --iterations 50 --top 8 # Deeper search, more results
405
+ /turing:suggest --strategy treequest # Same thing via suggest
406
+ ```
407
+
408
+ How it works:
409
+
410
+ ```
411
+ Seeds MCTS expands best-scoring branches
412
+
413
+ ┌──────┼──────┐ Each node is a hypothesis scored by:
414
+ ▼ ▼ ▼ - Novelty (vs experiment history)
415
+ LightGBM Reg Features - Feasibility (hardware, deps)
416
+ │ │ │ - Expected impact (type success rate)
417
+ ▼ ▼ ▼
418
+ +dart +L1 +poly Top-K results queued as hypotheses
419
+ │ │ for the next /turing:train run
420
+ ▼ ▼
421
+ +subsamp +target-enc
422
+ ```
423
+
424
+ Unlike `/turing:suggest` (which searches the web for papers), `/turing:explore` searches the space of *refinement chains* — combinations and sequences of modifications that score well together. It discovers non-obvious experiment strategies that independent suggestions cannot find.
425
+
426
+ Falls back to greedy best-first search when TreeQuest is not installed.
427
+
428
+ ## Cost-Performance Frontier
429
+
430
+ > *"This model is 2% better but takes 10x longer to train. Is that worth it?"*
431
+
432
+ The briefing now surfaces [Pareto-optimal](https://en.wikipedia.org/wiki/Pareto_efficiency) experiments — the efficient set where no other experiment is both faster AND has a better metric. The cost report tells you the tradeoff in plain language:
433
+
434
+ ```
435
+ Best metric: exp-012 (accuracy=0.893, 2400s)
436
+ Best efficiency: exp-003 (accuracy=0.871, 3s)
437
+ The 2.5% improvement costs 800x more compute.
438
+ ```
439
+
440
+ Run `python scripts/cost_frontier.py` directly, or read the "Cost-Performance Analysis" section in `/turing:brief`.
441
+
442
+ ## Model Cards
443
+
444
+ When it's time to ship, `/turing:card` generates a standardized model card documenting:
445
+ - Model type, framework, training time
446
+ - Performance metrics (all configured metrics)
447
+ - Training data source and split ratios
448
+ - Limitations (including overfit detection)
449
+ - Intended use and ethical considerations (user fills these in)
450
+ - Artifact contract version for production consumers
451
+
452
+ Inspired by [Google's Model Cards](https://arxiv.org/abs/1810.03993) and [Hugging Face model cards](https://huggingface.co/docs/hub/model-cards).
453
+
392
454
  ## Installation
393
455
 
394
456
  ```bash
@@ -424,11 +486,11 @@ Each project gets independent config, data, experiments, models, and agent memor
424
486
 
425
487
  ## Architecture of Turing Itself
426
488
 
427
- 15 commands, 2 agents, 8 config files, 25 template scripts, model registry, artifact contract, 338 tests, 16 ADRs. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full codemap.
489
+ 17 commands, 2 agents, 8 config files, 31 template scripts, model registry, artifact contract, cost-performance frontier, model cards, tree-search exploration, 379 tests, 16 ADRs. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full codemap.
428
490
 
429
491
  ```
430
492
  turing/
431
- ├── commands/ 15 skill files (core + taste-leverage + reporting)
493
+ ├── commands/ 16 skill files (core + taste-leverage + reporting + exploration)
432
494
  ├── agents/ 2 agents (researcher: read/write, evaluator: read-only)
433
495
  ├── config/ 8 files (lifecycle, taxonomy, archetypes, novelty aliases)
434
496
  ├── templates/ Scaffolded into user projects by /turing:init
@@ -437,7 +499,7 @@ turing/
437
499
  │ ├── train.py Training code (AGENT-EDITABLE)
438
500
  │ ├── model_contract.md Artifact schema for production consumers
439
501
  │ ├── model_registry.yaml Available model architectures + hyperparams
440
- │ └── scripts/ 25 Python scripts (core loop + analysis + infra)
502
+ │ └── scripts/ 26 Python scripts (core loop + analysis + infra + tree search)
441
503
  ├── tests/ 338 tests (unit + integration + anti-pattern + manifest)
442
504
  ├── src/ 5 JS installer files (npm deployment)
443
505
  ├── bin/ CLI entry points
@@ -455,6 +517,7 @@ turing/
455
517
  - **[Principle of Least Privilege](https://en.wikipedia.org/wiki/Principle_of_least_privilege)** (Saltzer & Schroeder, 1975) — each agent has exactly the capabilities needed for its role
456
518
  - **[Early Stopping](https://en.wikipedia.org/wiki/Early_stopping)** (Prechelt, 1998) — convergence detection as discrete early stopping
457
519
  - **[Multi-Armed Bandits](https://en.wikipedia.org/wiki/Multi-armed_bandit)** — the explore-exploit tradeoff
520
+ - **[TreeQuest](https://github.com/SakanaAI/treequest)** (Sakana AI, 2025) — AB-MCTS for inference-time scaling; repurposed here for hypothesis-space exploration
458
521
  - **[Version Control as Lab Notebook](https://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1004668)** (Ram, 2013) — git as a scientific record-keeping system
459
522
  - **[Reproducibility Crisis](https://en.wikipedia.org/wiki/Replication_crisis)** — if the measurement can change between experiments, results are not reproducible
460
523
 
@@ -0,0 +1,36 @@
1
+ ---
2
+ name: card
3
+ description: Generate a standardized model card documenting the trained model — type, performance, training data, limitations, intended use, and artifact contract.
4
+ disable-model-invocation: true
5
+ allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*), Grep, Glob
6
+ ---
7
+
8
+ You generate a standardized model card from the experiment log, model contract, and config.
9
+
10
+ ## Steps
11
+
12
+ 1. **Activate the virtual environment:**
13
+ ```bash
14
+ source .venv/bin/activate
15
+ ```
16
+
17
+ 2. **Run the model card generator:**
18
+ ```bash
19
+ python scripts/generate_model_card.py --config config.yaml --log experiments/log.jsonl --contract model_contract.md --output MODEL_CARD.md
20
+ ```
21
+
22
+ 3. **Read and present the generated card:**
23
+ - Read `MODEL_CARD.md` and display it to the user.
24
+ - If no experiments exist yet, inform the user and show the skeleton card.
25
+
26
+ 4. **Suggest next steps:**
27
+ - Review the **Ethical Considerations** section and fill in bias, fairness, and impact notes.
28
+ - Review the **Intended Use** section and document what the model is NOT intended for.
29
+ - If limitations mention overfitting, suggest running `/turing:validate` for stability checks.
30
+ - If the card looks complete, suggest committing it to version control.
31
+
32
+ ## Error Handling
33
+
34
+ - If `config.yaml` is missing, tell the user to run `/turing:init` first.
35
+ - If `experiments/log.jsonl` is missing or empty, generate a skeleton card and note that training is needed.
36
+ - If `.venv` doesn't exist, try `python3 scripts/generate_model_card.py` directly.
@@ -0,0 +1,107 @@
1
+ ---
2
+ name: explore
3
+ description: Tree-search-guided hypothesis exploration using AB-MCTS. Explores the space of experiment ideas as a search tree, scored by the critique engine. Discovers non-obvious refinement chains that linear suggestion cannot find.
4
+ disable-model-invocation: true
5
+ argument-hint: "[ml/project] [--iterations N] [--top N] [--strategy abmcts-a|abmcts-m|greedy]"
6
+ allowed-tools: Read, Write, Bash(python scripts/*:*, source .venv/bin/activate:*), Grep, Glob
7
+ ---
8
+
9
+ Explore the hypothesis space using tree search. Instead of suggesting independent ideas, this builds and searches a tree of refinement chains — each node is a hypothesis scored by novelty, feasibility, and expected impact.
10
+
11
+ ## Project Detection
12
+
13
+ 0. **Detect project directory:**
14
+ - If `$ARGUMENTS` contains a path (e.g., `ml/coding`), use that as the project directory
15
+ - Else if cwd contains `config.yaml` and `train.py`, use cwd
16
+ - Else search for `ml/*/` subdirectories containing `config.yaml`
17
+ - If exactly one found, use it
18
+ - If multiple found, list them and ask the user which to target
19
+ - All subsequent commands run from the detected project directory
20
+
21
+ ## Parse Options
22
+
23
+ Extract from `$ARGUMENTS`:
24
+ - `--iterations N` — search depth (default: 30)
25
+ - `--top N` — number of results to return (default: 5)
26
+ - `--strategy` — algorithm choice: `abmcts-a` (default), `abmcts-m` (Bayesian), or `greedy` (no TreeQuest needed)
27
+ - `--seeds-only` — just show generated seeds without running search
28
+ - `--json` — output as JSON for programmatic use
29
+
30
+ ## Steps
31
+
32
+ ### 1. Assess Current State
33
+
34
+ ```bash
35
+ source .venv/bin/activate && python scripts/show_metrics.py --last 10 2>/dev/null || echo "No experiments yet"
36
+ ```
37
+
38
+ Read `config.yaml` to understand the current model and metric.
39
+
40
+ ### 2. Run Tree Search
41
+
42
+ ```bash
43
+ source .venv/bin/activate && python scripts/treequest_suggest.py \
44
+ --log experiments/log.jsonl \
45
+ --config config.yaml \
46
+ --top <N> \
47
+ --iterations <N> \
48
+ --strategy <strategy>
49
+ ```
50
+
51
+ The script will:
52
+ - Generate seed hypotheses from config and experiment history
53
+ - Run AB-MCTS (or greedy fallback) over the hypothesis tree
54
+ - Score each node using the critique engine
55
+ - Return top-K ranked, deduplicated hypotheses
56
+
57
+ ### 3. Queue Best Hypotheses
58
+
59
+ For each result, add to the hypothesis queue:
60
+
61
+ ```bash
62
+ source .venv/bin/activate && python scripts/manage_hypotheses.py add "<description>" \
63
+ --priority medium --source treequest
64
+ ```
65
+
66
+ ### 4. Show Results
67
+
68
+ Display the search output and confirm queuing:
69
+
70
+ ```
71
+ TreeQuest Hypothesis Exploration (AB-MCTS-A)
72
+ ============================================
73
+ Nodes explored: 35
74
+ Top 5 hypotheses by critique score:
75
+
76
+ 1. [PROCEED] (score: 7.8/10)
77
+ Switch to LightGBM with dart boosting; additionally add polynomial features
78
+ Novelty: 8 Feasibility: 9 Impact: 7
79
+ -> Queued as hyp-NNN
80
+
81
+ 2. [PROCEED] (score: 7.2/10)
82
+ Use low learning rate (0.01) with 2000 estimators; additionally add L2 regularization
83
+ Novelty: 7 Feasibility: 8 Impact: 7
84
+ Depth: 1 (refined from parent)
85
+ -> Queued as hyp-NNN
86
+
87
+ ...
88
+
89
+ Queued N hypotheses. Run /turing:train to test them.
90
+ ```
91
+
92
+ ## How It Differs From /turing:suggest
93
+
94
+ | | `/turing:suggest` | `/turing:explore` |
95
+ |---|---|---|
96
+ | **Source** | Web literature search | Tree search over critique scores |
97
+ | **Strategy** | Independent suggestions | Refinement chains (parent -> child) |
98
+ | **Requires internet** | Yes | No |
99
+ | **Discovers** | What papers recommend | What combinations score well |
100
+ | **Best for** | Early-stage exploration | Mid-experiment optimization |
101
+
102
+ ## Integration
103
+
104
+ - Results feed into `hypotheses.yaml` — the next `/turing:train` picks them up
105
+ - `/turing:brief` shows queued treequest-sourced hypotheses
106
+ - `/turing:suggest --strategy treequest` is an alias for this command
107
+ - Human can override priority: `/turing:try` always takes precedence
@@ -6,9 +6,16 @@ argument-hint: "[task description override]"
6
6
  allowed-tools: Read, Write, Bash(python scripts/*:*, source .venv/bin/activate:*), Grep, Glob, WebSearch, WebFetch
7
7
  ---
8
8
 
9
- Suggest model architectures for the current ML task, grounded in recent literature. Hypotheses backed by papers, not vibes.
9
+ Suggest model architectures for the current ML task. Supports two strategies:
10
10
 
11
- ## Steps
11
+ - **literature** (default): Web search for recent papers, synthesize grounded suggestions with citations.
12
+ - **treequest**: Tree-search-guided hypothesis exploration using AB-MCTS over the critique scoring function. Explores refinement chains that literature search cannot find.
13
+
14
+ ## Strategy Detection
15
+
16
+ If `$ARGUMENTS` contains `--strategy treequest` or `treequest`, use the TreeQuest strategy below. Otherwise use the default literature strategy.
17
+
18
+ ## Steps (Literature Strategy — default)
12
19
 
13
20
  ### 1. Understand the Task
14
21
 
@@ -84,12 +91,69 @@ Sources consulted: <N papers/articles>
84
91
  Queued N hypotheses. Run /turing:train to test them.
85
92
  ```
86
93
 
87
- ## Fallback
94
+ ## Fallback (Literature Strategy)
88
95
 
89
96
  If web search returns insufficient results, suggest model families from `config/taxonomy.toml` based on what hasn't been tried yet. Note that suggestions are taxonomy-based, not literature-backed, and queue with `--source taxonomy`.
90
97
 
98
+ ## Steps (TreeQuest Strategy)
99
+
100
+ When using `--strategy treequest`:
101
+
102
+ ### 1. Detect Project Directory
103
+
104
+ Same detection logic as the literature strategy — find `config.yaml` + `train.py`.
105
+
106
+ ### 2. Run Tree Search
107
+
108
+ ```bash
109
+ source .venv/bin/activate && python scripts/treequest_suggest.py \
110
+ --log experiments/log.jsonl \
111
+ --config config.yaml \
112
+ --top 5 \
113
+ --iterations 30 \
114
+ --strategy abmcts-a
115
+ ```
116
+
117
+ If TreeQuest is not installed, the script automatically falls back to greedy best-first search.
118
+
119
+ ### 3. Queue Results
120
+
121
+ For each result from the tree search, queue as a hypothesis:
122
+
123
+ ```bash
124
+ source .venv/bin/activate && python scripts/manage_hypotheses.py add "<description>" --priority medium --source treequest
125
+ ```
126
+
127
+ ### 4. Show Results
128
+
129
+ Display the tree search output and confirm hypotheses were queued:
130
+
131
+ ```
132
+ TreeQuest Hypothesis Exploration (AB-MCTS-A)
133
+ ============================================
134
+ Nodes explored: 35
135
+ Top 5 hypotheses by critique score:
136
+
137
+ 1. [PROCEED] (score: 7.8/10)
138
+ Switch to LightGBM with dart boosting; additionally add polynomial features
139
+ Novelty: 8 Feasibility: 9 Impact: 7
140
+
141
+ ...
142
+
143
+ Queued N hypotheses. Run /turing:train to test them.
144
+ ```
145
+
146
+ ### TreeQuest Options
147
+
148
+ Pass additional flags via `$ARGUMENTS`:
149
+ - `--iterations N` — search depth (default: 30)
150
+ - `--top N` — number of results (default: 5)
151
+ - `--strategy abmcts-m` — use Bayesian mixed model variant (requires PyMC)
152
+ - `--greedy` — force greedy fallback without TreeQuest
153
+
91
154
  ## Integration
92
155
 
93
156
  - Suggestions feed into `hypotheses.yaml` — the next `/turing:train` picks them up
94
- - `/turing:brief` shows queued literature-sourced hypotheses
157
+ - `/turing:brief` shows queued literature-sourced and treequest-sourced hypotheses
158
+ - `/turing:explore` runs the TreeQuest search as a standalone command
95
159
  - Human can override priority: `/turing:try` always takes precedence
@@ -21,9 +21,11 @@ You are the Turing ML research router. Detect the user's intent and route to the
21
21
  | "report", "write-up", "findings", "document results" | `/turing:report` | Document |
22
22
  | "validate", "stability", "check variance", "noisy" | `/turing:validate` | Validate |
23
23
  | "suggest", "what model", "recommend", "which architecture", "literature" | `/turing:suggest` | Research |
24
+ | "explore hypotheses", "tree search", "treequest", "search hypothesis space", "MCTS" | `/turing:explore` | Research |
24
25
  | "design", "plan experiment", "how should I test", "experiment design" | `/turing:design` | Design |
25
26
  | "mode", "explore", "exploit", "replicate", "strategy" | `/turing:mode` | Strategy |
26
27
  | "preflight", "resources", "VRAM", "memory", "can I run", "OOM", "GPU" | `/turing:preflight` | Check |
28
+ | "card", "model card", "document model", "model documentation" | `/turing:card` | Document |
27
29
 
28
30
  ## Sub-commands
29
31
 
@@ -38,12 +40,14 @@ You are the Turing ML research router. Detect the user's intent and route to the
38
40
  | `/turing:init` | Scaffold a new ML project | (inline) |
39
41
  | `/turing:validate` | Check metric stability, auto-fix if noisy | (inline) |
40
42
  | `/turing:suggest` | Literature-grounded model architecture suggestions | (inline, uses WebSearch) |
43
+ | `/turing:explore` | Tree-search hypothesis exploration via AB-MCTS | (inline) |
41
44
  | `/turing:design <hyp-id>` | Generate structured experiment design from hypothesis | (inline, uses WebSearch) |
42
45
  | `/turing:logbook` | HTML/markdown logbook with trajectory chart | (inline) |
43
46
  | `/turing:poster` | Single-page HTML research poster | (inline) |
44
47
  | `/turing:report` | Structured markdown research report | (inline) |
45
48
  | `/turing:mode <mode>` | Set research strategy (explore/exploit/replicate) | (inline) |
46
49
  | `/turing:preflight` | Pre-flight resource check (VRAM/RAM/disk) | (inline) |
50
+ | `/turing:card` | Generate standardized model card (type, performance, data, limitations, contract) | (inline) |
47
51
 
48
52
  ## Proactive Detection
49
53
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-turing",
3
- "version": "1.0.1",
3
+ "version": "1.2.0",
4
4
  "type": "module",
5
5
  "description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
6
6
  "bin": {
package/src/claude-md.js CHANGED
@@ -21,6 +21,7 @@ Autonomous ML research harness. The autoresearch loop as a formal protocol.
21
21
  | \`/turing:validate\` | Check metric stability, auto-fix if noisy |
22
22
  | \`/turing:try <hypothesis>\` | Inject a hypothesis into the experiment queue |
23
23
  | \`/turing:brief\` | Generate research intelligence report |
24
+ | \`/turing:explore\` | Tree-search hypothesis exploration (AB-MCTS) |
24
25
  | \`/turing:preflight\` | Pre-flight resource check (VRAM/RAM/disk) |
25
26
 
26
27
  ### Agents
package/src/install.js CHANGED
@@ -21,8 +21,8 @@ const PLUGIN_ROOT = join(__dirname, "..");
21
21
  // Single source of truth for sub-commands (DRY — used for dirs and file copy)
22
22
  const SUB_COMMANDS = [
23
23
  "init", "train", "status", "compare", "sweep", "validate",
24
- "try", "brief", "suggest", "design", "logbook", "poster",
25
- "report", "mode", "preflight",
24
+ "try", "brief", "suggest", "explore", "design", "logbook", "poster",
25
+ "report", "mode", "preflight", "card",
26
26
  ];
27
27
 
28
28
  export async function install(opts = {}) {
package/src/verify.js CHANGED
@@ -23,12 +23,14 @@ const EXPECTED_COMMANDS = [
23
23
  "try/SKILL.md",
24
24
  "brief/SKILL.md",
25
25
  "suggest/SKILL.md",
26
+ "explore/SKILL.md",
26
27
  "design/SKILL.md",
27
28
  "logbook/SKILL.md",
28
29
  "poster/SKILL.md",
29
30
  "report/SKILL.md",
30
31
  "mode/SKILL.md",
31
32
  "preflight/SKILL.md",
33
+ "card/SKILL.md",
32
34
  ];
33
35
 
34
36
  const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];
@@ -6,3 +6,7 @@ numpy>=2.0
6
6
  joblib>=1.4
7
7
  pyyaml>=6.0
8
8
  pytest>=8.0
9
+
10
+ # Optional: tree-search-guided hypothesis exploration
11
+ # Install with: pip install "treequest[all]"
12
+ # treequest>=0.1