claude-turing 4.3.0 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +5 -5
- package/LICENSE +1 -1
- package/README.md +78 -552
- package/bin/cli.js +23 -4
- package/commands/doctor.md +31 -0
- package/commands/init.md +21 -3
- package/commands/plan.md +27 -0
- package/commands/postmortem.md +28 -0
- package/commands/turing.md +6 -0
- package/config/defaults.yaml +2 -0
- package/package.json +5 -5
- package/src/install.js +18 -2
- package/src/verify.js +45 -2
- package/templates/README.md +1 -1
- package/templates/__pycache__/evaluate.cpython-312.pyc +0 -0
- package/templates/__pycache__/prepare.cpython-312.pyc +0 -0
- package/templates/config.yaml +1 -1
- package/templates/features/__pycache__/__init__.cpython-312.pyc +0 -0
- package/templates/features/__pycache__/featurizers.cpython-312.pyc +0 -0
- package/templates/program.md +1 -1
- package/templates/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/ablation_study.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/architecture_surgery.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/budget_manager.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/build_ensemble.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/calibration.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/check_convergence.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/checkpoint_manager.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/citation_manager.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/cost_frontier.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/counterfactual_explanation.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/critique_hypothesis.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/curriculum_optimizer.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/diagnose_errors.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/draft_paper_sections.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/equivalence_checker.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_annotations.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_archive.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_diff.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_index.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_queue.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_replay.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_search.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_simulator.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_templates.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/export_card.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/export_formats.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/failure_postmortem.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/failure_postmortem.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/feature_intelligence.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/fork_experiment.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_baselines.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_changelog.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_figures.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_logbook.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_model_card.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_onboarding.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/harness_doctor.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/harness_doctor.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/incremental_update.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/knowledge_transfer.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/latency_benchmark.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/leakage_detector.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/literature_search.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/log_experiment.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/manage_hypotheses.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/methodology_audit.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_distiller.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_lifecycle.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_merger.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_pruning.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_quantization.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_xray.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/novelty_guard.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/package_experiments.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/pareto_frontier.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/parse_metrics.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/pipeline_manager.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/profile_training.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/regression_gate.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/reproduce_experiment.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/research_planner.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/research_planner.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/sanity_checks.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaling_estimator.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/seed_runner.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/sensitivity_analysis.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/session_flashback.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/show_experiment_tree.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/show_families.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/simulate_review.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/smart_retry.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/statistical_compare.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/suggest_next.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/sweep.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/synthesize_decision.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/training_monitor.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/treequest_suggest.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/trend_analysis.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/turing_io.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/update_state.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/verify_placeholders.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/warm_start.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/whatif_engine.cpython-312.pyc +0 -0
- package/templates/scripts/failure_postmortem.py +510 -0
- package/templates/scripts/generate_brief.py +61 -0
- package/templates/scripts/harness_doctor.py +610 -0
- package/templates/scripts/research_planner.py +470 -0
- package/templates/scripts/scaffold.py +56 -28
package/bin/cli.js
CHANGED
|
@@ -1,9 +1,25 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { createRequire } from "module";
|
|
3
|
+
import { realpathSync } from "fs";
|
|
4
|
+
import { fileURLToPath } from "url";
|
|
3
5
|
const require = createRequire(import.meta.url);
|
|
4
6
|
const { Command } = require("commander");
|
|
5
7
|
const pkg = require("../package.json");
|
|
6
8
|
|
|
9
|
+
export function buildInitArgs(name, dir) {
|
|
10
|
+
return [name, dir].filter(Boolean);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function isDirectRun() {
|
|
14
|
+
if (!process.argv[1]) return false;
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
return realpathSync(fileURLToPath(import.meta.url)) === realpathSync(process.argv[1]);
|
|
18
|
+
} catch {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
7
23
|
const program = new Command();
|
|
8
24
|
|
|
9
25
|
program
|
|
@@ -34,13 +50,16 @@ program
|
|
|
34
50
|
.command("init [name] [dir]")
|
|
35
51
|
.description("Scaffold ML project (CLI mode, non-Claude-Code usage)")
|
|
36
52
|
.action(async (name, dir) => {
|
|
37
|
-
const {
|
|
53
|
+
const { spawnSync } = await import("child_process");
|
|
38
54
|
const { dirname, join } = await import("path");
|
|
39
55
|
const { fileURLToPath } = await import("url");
|
|
40
56
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
41
57
|
const script = join(__dirname, "turing-init.sh");
|
|
42
|
-
const args =
|
|
43
|
-
|
|
58
|
+
const args = buildInitArgs(name, dir);
|
|
59
|
+
const result = spawnSync("bash", [script, ...args], { stdio: "inherit" });
|
|
60
|
+
process.exit(result.status ?? 1);
|
|
44
61
|
});
|
|
45
62
|
|
|
46
|
-
|
|
63
|
+
if (isDirectRun()) {
|
|
64
|
+
program.parse();
|
|
65
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: doctor
|
|
3
|
+
description: Harness self-diagnosis — check environment, project, resources, and git state. Auto-fix common issues.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[--fix] [--verbose]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Is Turing healthy? Check everything and get a score.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. `source .venv/bin/activate`
|
|
13
|
+
2. `python scripts/harness_doctor.py $ARGUMENTS`
|
|
14
|
+
3. **Saved:** `experiments/doctor/`
|
|
15
|
+
|
|
16
|
+
## Checks
|
|
17
|
+
- **Environment:** Python version, venv status
|
|
18
|
+
- **Dependencies:** all required packages importable
|
|
19
|
+
- **Config:** config.yaml valid with required fields
|
|
20
|
+
- **Experiment log:** JSONL integrity, corrupt line detection
|
|
21
|
+
- **Scripts:** train.py, prepare.py, evaluate.py exist and parse
|
|
22
|
+
- **Disk space:** warn if <1GB free
|
|
23
|
+
- **Git state:** uncommitted changes to critical files
|
|
24
|
+
- **Claude hooks:** `.claude/settings.local.json` hook group schema; `--fix` migrates legacy bare command hooks
|
|
25
|
+
|
|
26
|
+
## Examples
|
|
27
|
+
```
|
|
28
|
+
/turing:doctor
|
|
29
|
+
/turing:doctor --fix
|
|
30
|
+
/turing:doctor --verbose --json
|
|
31
|
+
```
|
package/commands/init.md
CHANGED
|
@@ -37,18 +37,36 @@ python3 <templates_dir>/scripts/scaffold.py \
|
|
|
37
37
|
The scaffold script handles everything in a single atomic operation:
|
|
38
38
|
- Copies all template files with placeholder substitution
|
|
39
39
|
- Creates data/, experiments/, models/ directories
|
|
40
|
-
- Sets up agent memory at `.claude/agent-memory/ml-researcher/MEMORY.md`
|
|
40
|
+
- Sets up agent memory at `.claude/agent-memory/ml-researcher-{project_name}/MEMORY.md`
|
|
41
41
|
- Configures Claude Code hooks in `.claude/settings.local.json`
|
|
42
42
|
- Creates Python virtual environment and installs requirements
|
|
43
43
|
- Verifies all placeholders were replaced (fails loudly if any remain)
|
|
44
44
|
|
|
45
45
|
## Locating Templates
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
Use the installed command-pack templates directory first:
|
|
48
|
+
```
|
|
49
|
+
.claude/commands/turing/templates/
|
|
50
|
+
~/.claude/commands/turing/templates/
|
|
51
|
+
```
|
|
52
|
+
Then fall back to plugin or npm locations:
|
|
48
53
|
```
|
|
49
54
|
~/.claude/plugins/*/templates/
|
|
55
|
+
node_modules/claude-turing/templates/
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Example command:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
python3 ~/.claude/commands/turing/templates/scripts/scaffold.py \
|
|
62
|
+
--project-name "<project_name>" \
|
|
63
|
+
--target-metric "<target_metric>" \
|
|
64
|
+
--metric-direction "<metric_direction>" \
|
|
65
|
+
--task-description "<task_description>" \
|
|
66
|
+
--ml-dir "<ml_dir>" \
|
|
67
|
+
--data-source "<data_source>" \
|
|
68
|
+
--templates-dir ~/.claude/commands/turing/templates
|
|
50
69
|
```
|
|
51
|
-
Or check if installed via npm by looking for `node_modules/claude-turing/templates/`.
|
|
52
70
|
|
|
53
71
|
## After Scaffolding
|
|
54
72
|
|
package/commands/plan.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: plan
|
|
3
|
+
description: Research planning assistant — design a strategic experiment campaign with budget-aware ROI allocation.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[--budget 20] [--goal \"maximize F1 for production\"]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
Design the next N experiments strategically, not randomly. Allocates budget by expected ROI.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. `source .venv/bin/activate`
|
|
13
|
+
2. `python scripts/research_planner.py $ARGUMENTS`
|
|
14
|
+
3. **Saved:** `experiments/plans/`
|
|
15
|
+
|
|
16
|
+
## How it works
|
|
17
|
+
- Analyzes experiment history to compute per-family ROI
|
|
18
|
+
- Adjusts strategy priorities based on project state and goal
|
|
19
|
+
- Allocates budget across: feature engineering, model search, ensemble, calibration, verification
|
|
20
|
+
- Generates phased plan with specific experiment descriptions
|
|
21
|
+
|
|
22
|
+
## Examples
|
|
23
|
+
```
|
|
24
|
+
/turing:plan --budget 20
|
|
25
|
+
/turing:plan --budget 10 --goal "maximize F1 for production deployment"
|
|
26
|
+
/turing:plan --budget 30 --json
|
|
27
|
+
```
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: postmortem
|
|
3
|
+
description: Failure postmortem — diagnose why experiments stopped improving and get actionable next steps.
|
|
4
|
+
disable-model-invocation: true
|
|
5
|
+
argument-hint: "[--window 10] [--auto-trigger 5]"
|
|
6
|
+
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
When experiments stop improving, find out why. Diagnoses search space exhaustion, config errors, data issues, metric ceilings, and noise floors.
|
|
10
|
+
|
|
11
|
+
## Steps
|
|
12
|
+
1. `source .venv/bin/activate`
|
|
13
|
+
2. `python scripts/failure_postmortem.py $ARGUMENTS`
|
|
14
|
+
3. **Saved:** `experiments/postmortems/`
|
|
15
|
+
|
|
16
|
+
## Diagnosis categories
|
|
17
|
+
- **Search space exhaustion:** micro-tuning params that don't matter
|
|
18
|
+
- **Systematic config error:** all experiments share a bad common config
|
|
19
|
+
- **Data issue:** all model types fail similarly
|
|
20
|
+
- **Metric ceiling:** near theoretical maximum
|
|
21
|
+
- **Noise floor:** improvements within seed variance
|
|
22
|
+
|
|
23
|
+
## Examples
|
|
24
|
+
```
|
|
25
|
+
/turing:postmortem
|
|
26
|
+
/turing:postmortem --window 15
|
|
27
|
+
/turing:postmortem --json
|
|
28
|
+
```
|
package/commands/turing.md
CHANGED
|
@@ -80,6 +80,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
80
80
|
| "simulate", "predict outcome", "pre-filter", "which configs will work", "forecast" | `/turing:simulate` | Predict |
|
|
81
81
|
| "update", "incremental", "new data", "add data", "fine-tune existing", "partial update" | `/turing:update` | Update |
|
|
82
82
|
| "registry", "promote", "demote", "staging", "production", "which model is deployed", "model lifecycle" | `/turing:registry` | Govern |
|
|
83
|
+
| "postmortem", "why failing", "failure streak", "why no improvement", "what went wrong" | `/turing:postmortem` | Diagnose |
|
|
84
|
+
| "doctor", "health check", "is it broken", "diagnose harness", "self-check" | `/turing:doctor` | Check |
|
|
85
|
+
| "plan", "research plan", "campaign", "what next", "allocate budget", "strategic plan" | `/turing:plan` | Plan |
|
|
83
86
|
|
|
84
87
|
## Sub-commands
|
|
85
88
|
|
|
@@ -156,6 +159,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
156
159
|
| `/turing:simulate [--configs] [--top-k]` | Experiment outcome prediction: pre-filter configs using surrogate model, save budget | (inline) |
|
|
157
160
|
| `/turing:update <exp-id> --new-data <path>` | Incremental model update: add new data without full retraining, forgetting detection | (inline) |
|
|
158
161
|
| `/turing:registry [list\|register\|promote\|demote\|history]` | Model registry: stage lifecycle (candidate → staging → production) with promotion gates | (inline) |
|
|
162
|
+
| `/turing:postmortem [--window N]` | Failure postmortem: diagnose why experiments stopped improving (exhaustion, config error, data issue, ceiling, noise) | (inline) |
|
|
163
|
+
| `/turing:doctor [--fix]` | Harness self-diagnosis: environment, dependencies, config, log integrity, scripts, disk, git state, Claude hooks | (inline) |
|
|
164
|
+
| `/turing:plan [--budget N] [--goal]` | Research planning assistant: strategic campaign design with budget-aware ROI allocation | (inline) |
|
|
159
165
|
|
|
160
166
|
## Proactive Detection
|
|
161
167
|
|
package/config/defaults.yaml
CHANGED
|
@@ -35,6 +35,8 @@ placeholders:
|
|
|
35
35
|
ML_DIR: "Directory for ML files relative to project root"
|
|
36
36
|
DATA_SOURCE: "Path to training data file"
|
|
37
37
|
METRIC_DIRECTION: "lower or higher — which direction is better"
|
|
38
|
+
LOWER_IS_BETTER: "Boolean derived from METRIC_DIRECTION for generated config.yaml"
|
|
39
|
+
MEMORY_DIR_NAME: "Filesystem-safe project name used for scoped agent memory"
|
|
38
40
|
|
|
39
41
|
# Agent configuration
|
|
40
42
|
agents:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-turing",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.5.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
|
|
6
6
|
"bin": {
|
|
@@ -34,15 +34,15 @@
|
|
|
34
34
|
"ai-agents"
|
|
35
35
|
],
|
|
36
36
|
"author": {
|
|
37
|
-
"name": "
|
|
37
|
+
"name": "Prannaya Gupta"
|
|
38
38
|
},
|
|
39
|
-
"homepage": "https://github.com/
|
|
39
|
+
"homepage": "https://github.com/ThePyProgrammer/turing",
|
|
40
40
|
"repository": {
|
|
41
41
|
"type": "git",
|
|
42
|
-
"url": "git+https://github.com/
|
|
42
|
+
"url": "git+https://github.com/ThePyProgrammer/turing.git"
|
|
43
43
|
},
|
|
44
44
|
"bugs": {
|
|
45
|
-
"url": "https://github.com/
|
|
45
|
+
"url": "https://github.com/ThePyProgrammer/turing/issues"
|
|
46
46
|
},
|
|
47
47
|
"license": "MIT",
|
|
48
48
|
"engines": {
|
package/src/install.js
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* node src/install.js [--global] [--project]
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
import { readdir, copyFile, mkdir } from "fs/promises";
|
|
12
|
+
import { readdir, copyFile, mkdir, cp } from "fs/promises";
|
|
13
13
|
import { join, dirname } from "path";
|
|
14
14
|
import { fileURLToPath } from "url";
|
|
15
15
|
import { getTargetPaths } from "./paths.js";
|
|
@@ -38,6 +38,7 @@ const SUB_COMMANDS = [
|
|
|
38
38
|
"onboard", "share", "review",
|
|
39
39
|
"whatif", "counterfactual", "simulate",
|
|
40
40
|
"update", "registry",
|
|
41
|
+
"postmortem", "doctor", "plan",
|
|
41
42
|
];
|
|
42
43
|
|
|
43
44
|
export async function install(opts = {}) {
|
|
@@ -49,7 +50,7 @@ export async function install(opts = {}) {
|
|
|
49
50
|
console.log("");
|
|
50
51
|
|
|
51
52
|
// Create directories for each sub-command + agents + config
|
|
52
|
-
for (const subDir of ["", "agents", "config", "rules", ...SUB_COMMANDS]) {
|
|
53
|
+
for (const subDir of ["", "agents", "config", "rules", "templates", ...SUB_COMMANDS]) {
|
|
53
54
|
await mkdir(join(paths.commands, subDir), { recursive: true });
|
|
54
55
|
}
|
|
55
56
|
|
|
@@ -102,6 +103,21 @@ export async function install(opts = {}) {
|
|
|
102
103
|
}
|
|
103
104
|
console.log(` ${CONFIG_FILES.length} config files installed`);
|
|
104
105
|
|
|
106
|
+
// Copy templates used by /turing:init
|
|
107
|
+
await cp(
|
|
108
|
+
join(PLUGIN_ROOT, "templates"),
|
|
109
|
+
join(paths.commands, "templates"),
|
|
110
|
+
{
|
|
111
|
+
recursive: true,
|
|
112
|
+
force: true,
|
|
113
|
+
filter: (src) =>
|
|
114
|
+
!src.includes("__pycache__") &&
|
|
115
|
+
!src.includes(".pytest_cache") &&
|
|
116
|
+
!src.endsWith(".pyc"),
|
|
117
|
+
},
|
|
118
|
+
);
|
|
119
|
+
console.log(" Templates installed");
|
|
120
|
+
|
|
105
121
|
// Update CLAUDE.md
|
|
106
122
|
await updateClaudeMd(paths.claudeMd);
|
|
107
123
|
console.log(" CLAUDE.md updated");
|
package/src/verify.js
CHANGED
|
@@ -8,10 +8,14 @@
|
|
|
8
8
|
* node src/verify.js [--scope global|project]
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
import { access } from "fs/promises";
|
|
12
|
-
import { join } from "path";
|
|
11
|
+
import { access, readdir } from "fs/promises";
|
|
12
|
+
import { dirname, join } from "path";
|
|
13
|
+
import { fileURLToPath } from "url";
|
|
13
14
|
import { getTargetPaths } from "./paths.js";
|
|
14
15
|
|
|
16
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
17
|
+
const PLUGIN_ROOT = join(__dirname, "..");
|
|
18
|
+
|
|
15
19
|
const EXPECTED_COMMANDS = [
|
|
16
20
|
"SKILL.md",
|
|
17
21
|
"init/SKILL.md",
|
|
@@ -85,6 +89,9 @@ const EXPECTED_COMMANDS = [
|
|
|
85
89
|
"simulate/SKILL.md",
|
|
86
90
|
"update/SKILL.md",
|
|
87
91
|
"registry/SKILL.md",
|
|
92
|
+
"postmortem/SKILL.md",
|
|
93
|
+
"doctor/SKILL.md",
|
|
94
|
+
"plan/SKILL.md",
|
|
88
95
|
];
|
|
89
96
|
|
|
90
97
|
const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];
|
|
@@ -97,6 +104,27 @@ const EXPECTED_CONFIG = [
|
|
|
97
104
|
"watch_alerts.yaml",
|
|
98
105
|
];
|
|
99
106
|
|
|
107
|
+
async function templateFiles(root, relativeDir = "templates") {
|
|
108
|
+
const dir = join(root, relativeDir);
|
|
109
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
110
|
+
const files = [];
|
|
111
|
+
|
|
112
|
+
for (const entry of entries) {
|
|
113
|
+
if (entry.name === "__pycache__" || entry.name === ".pytest_cache") {
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const relativePath = `${relativeDir}/${entry.name}`;
|
|
118
|
+
if (entry.isDirectory()) {
|
|
119
|
+
files.push(...await templateFiles(root, relativePath));
|
|
120
|
+
} else if (!entry.name.endsWith(".pyc")) {
|
|
121
|
+
files.push(relativePath);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return files;
|
|
126
|
+
}
|
|
127
|
+
|
|
100
128
|
async function fileExists(path) {
|
|
101
129
|
try {
|
|
102
130
|
await access(path);
|
|
@@ -108,7 +136,9 @@ async function fileExists(path) {
|
|
|
108
136
|
|
|
109
137
|
export async function verify(opts = {}) {
|
|
110
138
|
const scopes = opts.scope ? [opts.scope] : ["global", "project"];
|
|
139
|
+
const expectedTemplates = await templateFiles(PLUGIN_ROOT);
|
|
111
140
|
let found = false;
|
|
141
|
+
let totalMissing = 0;
|
|
112
142
|
|
|
113
143
|
for (const scope of scopes) {
|
|
114
144
|
const paths = getTargetPaths(scope);
|
|
@@ -141,10 +171,18 @@ export async function verify(opts = {}) {
|
|
|
141
171
|
if (!ok) missing++;
|
|
142
172
|
}
|
|
143
173
|
|
|
174
|
+
console.log("\nTemplates:");
|
|
175
|
+
for (const template of expectedTemplates) {
|
|
176
|
+
const ok = await fileExists(join(paths.commands, template));
|
|
177
|
+
console.log(` ${ok ? "✓" : "✗"} commands/${template}`);
|
|
178
|
+
if (!ok) missing++;
|
|
179
|
+
}
|
|
180
|
+
|
|
144
181
|
// Check CLAUDE.md
|
|
145
182
|
const claudeOk = await fileExists(paths.claudeMd);
|
|
146
183
|
console.log(`\n ${claudeOk ? "✓" : "✗"} CLAUDE.md`);
|
|
147
184
|
|
|
185
|
+
totalMissing += missing;
|
|
148
186
|
console.log(
|
|
149
187
|
`\n ${missing === 0 ? "✓ Installation complete" : `✗ ${missing} files missing — run claude-turing install`}\n`,
|
|
150
188
|
);
|
|
@@ -152,6 +190,11 @@ export async function verify(opts = {}) {
|
|
|
152
190
|
|
|
153
191
|
if (!found) {
|
|
154
192
|
console.log("\n✗ turing not found. Run: claude-turing install\n");
|
|
193
|
+
totalMissing++;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if (totalMissing > 0) {
|
|
197
|
+
process.exitCode = 1;
|
|
155
198
|
}
|
|
156
199
|
}
|
|
157
200
|
|
package/templates/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
## Overview
|
|
6
6
|
|
|
7
|
-
This pipeline uses the [Turing](https://github.com/
|
|
7
|
+
This pipeline uses the [Turing](https://github.com/ThePyProgrammer/turing) autoresearch pattern — an AI agent iteratively trains, evaluates, and improves models by modifying `train.py` while the evaluation infrastructure (`prepare.py`, `evaluate.py`) remains immutable.
|
|
8
8
|
|
|
9
9
|
**Primary metric:** {{TARGET_METRIC}} ({{METRIC_DIRECTION}} is better)
|
|
10
10
|
|
|
Binary file
|
|
Binary file
|
package/templates/config.yaml
CHANGED
|
@@ -17,7 +17,7 @@ evaluation:
|
|
|
17
17
|
metrics: ["{{TARGET_METRIC}}", "f1_weighted", "accuracy"]
|
|
18
18
|
# Set to true for metrics where lower is better (mae, mse, rmse, loss)
|
|
19
19
|
# Set to false for metrics where higher is better (accuracy, f1, auc)
|
|
20
|
-
lower_is_better:
|
|
20
|
+
lower_is_better: {{LOWER_IS_BETTER}}
|
|
21
21
|
|
|
22
22
|
# Multi-seed configuration (Phase 10.1: /turing:seed)
|
|
23
23
|
# Seeds used for seed studies — diverse values for good coverage
|
|
Binary file
|
|
Binary file
|
package/templates/program.md
CHANGED
|
@@ -43,7 +43,7 @@ git checkout -b exp/NNN-description
|
|
|
43
43
|
|
|
44
44
|
## Memory
|
|
45
45
|
|
|
46
|
-
Read `.claude/agent-memory/ml-researcher/MEMORY.md` at the start of each session.
|
|
46
|
+
Read `.claude/agent-memory/ml-researcher-{{MEMORY_DIR_NAME}}/MEMORY.md` at the start of each session.
|
|
47
47
|
Update it after each experiment with:
|
|
48
48
|
- Best result (if improved)
|
|
49
49
|
- What was tried and why
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|