claude-turing 4.3.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. package/.claude-plugin/plugin.json +5 -5
  2. package/LICENSE +1 -1
  3. package/README.md +78 -552
  4. package/bin/cli.js +23 -4
  5. package/commands/doctor.md +31 -0
  6. package/commands/init.md +21 -3
  7. package/commands/plan.md +27 -0
  8. package/commands/postmortem.md +28 -0
  9. package/commands/turing.md +6 -0
  10. package/config/defaults.yaml +2 -0
  11. package/package.json +5 -5
  12. package/src/install.js +18 -2
  13. package/src/verify.js +45 -2
  14. package/templates/README.md +1 -1
  15. package/templates/__pycache__/evaluate.cpython-312.pyc +0 -0
  16. package/templates/__pycache__/prepare.cpython-312.pyc +0 -0
  17. package/templates/config.yaml +1 -1
  18. package/templates/features/__pycache__/__init__.cpython-312.pyc +0 -0
  19. package/templates/features/__pycache__/featurizers.cpython-312.pyc +0 -0
  20. package/templates/program.md +1 -1
  21. package/templates/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  22. package/templates/scripts/__pycache__/ablation_study.cpython-312.pyc +0 -0
  23. package/templates/scripts/__pycache__/architecture_surgery.cpython-312.pyc +0 -0
  24. package/templates/scripts/__pycache__/budget_manager.cpython-312.pyc +0 -0
  25. package/templates/scripts/__pycache__/build_ensemble.cpython-312.pyc +0 -0
  26. package/templates/scripts/__pycache__/calibration.cpython-312.pyc +0 -0
  27. package/templates/scripts/__pycache__/check_convergence.cpython-312.pyc +0 -0
  28. package/templates/scripts/__pycache__/checkpoint_manager.cpython-312.pyc +0 -0
  29. package/templates/scripts/__pycache__/citation_manager.cpython-312.pyc +0 -0
  30. package/templates/scripts/__pycache__/cost_frontier.cpython-312.pyc +0 -0
  31. package/templates/scripts/__pycache__/counterfactual_explanation.cpython-312.pyc +0 -0
  32. package/templates/scripts/__pycache__/critique_hypothesis.cpython-312.pyc +0 -0
  33. package/templates/scripts/__pycache__/curriculum_optimizer.cpython-312.pyc +0 -0
  34. package/templates/scripts/__pycache__/diagnose_errors.cpython-312.pyc +0 -0
  35. package/templates/scripts/__pycache__/draft_paper_sections.cpython-312.pyc +0 -0
  36. package/templates/scripts/__pycache__/equivalence_checker.cpython-312.pyc +0 -0
  37. package/templates/scripts/__pycache__/experiment_annotations.cpython-312.pyc +0 -0
  38. package/templates/scripts/__pycache__/experiment_archive.cpython-312.pyc +0 -0
  39. package/templates/scripts/__pycache__/experiment_diff.cpython-312.pyc +0 -0
  40. package/templates/scripts/__pycache__/experiment_index.cpython-312.pyc +0 -0
  41. package/templates/scripts/__pycache__/experiment_queue.cpython-312.pyc +0 -0
  42. package/templates/scripts/__pycache__/experiment_replay.cpython-312.pyc +0 -0
  43. package/templates/scripts/__pycache__/experiment_search.cpython-312.pyc +0 -0
  44. package/templates/scripts/__pycache__/experiment_simulator.cpython-312.pyc +0 -0
  45. package/templates/scripts/__pycache__/experiment_templates.cpython-312.pyc +0 -0
  46. package/templates/scripts/__pycache__/export_card.cpython-312.pyc +0 -0
  47. package/templates/scripts/__pycache__/export_formats.cpython-312.pyc +0 -0
  48. package/templates/scripts/__pycache__/failure_postmortem.cpython-312.pyc +0 -0
  49. package/templates/scripts/__pycache__/failure_postmortem.cpython-314.pyc +0 -0
  50. package/templates/scripts/__pycache__/feature_intelligence.cpython-312.pyc +0 -0
  51. package/templates/scripts/__pycache__/fork_experiment.cpython-312.pyc +0 -0
  52. package/templates/scripts/__pycache__/generate_baselines.cpython-312.pyc +0 -0
  53. package/templates/scripts/__pycache__/generate_brief.cpython-312.pyc +0 -0
  54. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  55. package/templates/scripts/__pycache__/generate_changelog.cpython-312.pyc +0 -0
  56. package/templates/scripts/__pycache__/generate_figures.cpython-312.pyc +0 -0
  57. package/templates/scripts/__pycache__/generate_logbook.cpython-312.pyc +0 -0
  58. package/templates/scripts/__pycache__/generate_model_card.cpython-312.pyc +0 -0
  59. package/templates/scripts/__pycache__/generate_onboarding.cpython-312.pyc +0 -0
  60. package/templates/scripts/__pycache__/harness_doctor.cpython-312.pyc +0 -0
  61. package/templates/scripts/__pycache__/harness_doctor.cpython-314.pyc +0 -0
  62. package/templates/scripts/__pycache__/incremental_update.cpython-312.pyc +0 -0
  63. package/templates/scripts/__pycache__/knowledge_transfer.cpython-312.pyc +0 -0
  64. package/templates/scripts/__pycache__/latency_benchmark.cpython-312.pyc +0 -0
  65. package/templates/scripts/__pycache__/leakage_detector.cpython-312.pyc +0 -0
  66. package/templates/scripts/__pycache__/literature_search.cpython-312.pyc +0 -0
  67. package/templates/scripts/__pycache__/log_experiment.cpython-312.pyc +0 -0
  68. package/templates/scripts/__pycache__/manage_hypotheses.cpython-312.pyc +0 -0
  69. package/templates/scripts/__pycache__/methodology_audit.cpython-312.pyc +0 -0
  70. package/templates/scripts/__pycache__/model_distiller.cpython-312.pyc +0 -0
  71. package/templates/scripts/__pycache__/model_lifecycle.cpython-312.pyc +0 -0
  72. package/templates/scripts/__pycache__/model_merger.cpython-312.pyc +0 -0
  73. package/templates/scripts/__pycache__/model_pruning.cpython-312.pyc +0 -0
  74. package/templates/scripts/__pycache__/model_quantization.cpython-312.pyc +0 -0
  75. package/templates/scripts/__pycache__/model_xray.cpython-312.pyc +0 -0
  76. package/templates/scripts/__pycache__/novelty_guard.cpython-312.pyc +0 -0
  77. package/templates/scripts/__pycache__/package_experiments.cpython-312.pyc +0 -0
  78. package/templates/scripts/__pycache__/pareto_frontier.cpython-312.pyc +0 -0
  79. package/templates/scripts/__pycache__/parse_metrics.cpython-312.pyc +0 -0
  80. package/templates/scripts/__pycache__/pipeline_manager.cpython-312.pyc +0 -0
  81. package/templates/scripts/__pycache__/profile_training.cpython-312.pyc +0 -0
  82. package/templates/scripts/__pycache__/regression_gate.cpython-312.pyc +0 -0
  83. package/templates/scripts/__pycache__/reproduce_experiment.cpython-312.pyc +0 -0
  84. package/templates/scripts/__pycache__/research_planner.cpython-312.pyc +0 -0
  85. package/templates/scripts/__pycache__/research_planner.cpython-314.pyc +0 -0
  86. package/templates/scripts/__pycache__/sanity_checks.cpython-312.pyc +0 -0
  87. package/templates/scripts/__pycache__/scaffold.cpython-312.pyc +0 -0
  88. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  89. package/templates/scripts/__pycache__/scaling_estimator.cpython-312.pyc +0 -0
  90. package/templates/scripts/__pycache__/seed_runner.cpython-312.pyc +0 -0
  91. package/templates/scripts/__pycache__/sensitivity_analysis.cpython-312.pyc +0 -0
  92. package/templates/scripts/__pycache__/session_flashback.cpython-312.pyc +0 -0
  93. package/templates/scripts/__pycache__/show_experiment_tree.cpython-312.pyc +0 -0
  94. package/templates/scripts/__pycache__/show_families.cpython-312.pyc +0 -0
  95. package/templates/scripts/__pycache__/simulate_review.cpython-312.pyc +0 -0
  96. package/templates/scripts/__pycache__/smart_retry.cpython-312.pyc +0 -0
  97. package/templates/scripts/__pycache__/statistical_compare.cpython-312.pyc +0 -0
  98. package/templates/scripts/__pycache__/suggest_next.cpython-312.pyc +0 -0
  99. package/templates/scripts/__pycache__/sweep.cpython-312.pyc +0 -0
  100. package/templates/scripts/__pycache__/synthesize_decision.cpython-312.pyc +0 -0
  101. package/templates/scripts/__pycache__/training_monitor.cpython-312.pyc +0 -0
  102. package/templates/scripts/__pycache__/treequest_suggest.cpython-312.pyc +0 -0
  103. package/templates/scripts/__pycache__/trend_analysis.cpython-312.pyc +0 -0
  104. package/templates/scripts/__pycache__/turing_io.cpython-312.pyc +0 -0
  105. package/templates/scripts/__pycache__/update_state.cpython-312.pyc +0 -0
  106. package/templates/scripts/__pycache__/verify_placeholders.cpython-312.pyc +0 -0
  107. package/templates/scripts/__pycache__/warm_start.cpython-312.pyc +0 -0
  108. package/templates/scripts/__pycache__/whatif_engine.cpython-312.pyc +0 -0
  109. package/templates/scripts/failure_postmortem.py +510 -0
  110. package/templates/scripts/generate_brief.py +61 -0
  111. package/templates/scripts/harness_doctor.py +610 -0
  112. package/templates/scripts/research_planner.py +470 -0
  113. package/templates/scripts/scaffold.py +56 -28
package/bin/cli.js CHANGED
@@ -1,9 +1,25 @@
1
1
  #!/usr/bin/env node
2
2
  import { createRequire } from "module";
3
+ import { realpathSync } from "fs";
4
+ import { fileURLToPath } from "url";
3
5
  const require = createRequire(import.meta.url);
4
6
  const { Command } = require("commander");
5
7
  const pkg = require("../package.json");
6
8
 
9
+ export function buildInitArgs(name, dir) {
10
+ return [name, dir].filter(Boolean);
11
+ }
12
+
13
+ function isDirectRun() {
14
+ if (!process.argv[1]) return false;
15
+
16
+ try {
17
+ return realpathSync(fileURLToPath(import.meta.url)) === realpathSync(process.argv[1]);
18
+ } catch {
19
+ return false;
20
+ }
21
+ }
22
+
7
23
  const program = new Command();
8
24
 
9
25
  program
@@ -34,13 +50,16 @@ program
34
50
  .command("init [name] [dir]")
35
51
  .description("Scaffold ML project (CLI mode, non-Claude-Code usage)")
36
52
  .action(async (name, dir) => {
37
- const { execSync } = await import("child_process");
53
+ const { spawnSync } = await import("child_process");
38
54
  const { dirname, join } = await import("path");
39
55
  const { fileURLToPath } = await import("url");
40
56
  const __dirname = dirname(fileURLToPath(import.meta.url));
41
57
  const script = join(__dirname, "turing-init.sh");
42
- const args = [name, dir].filter(Boolean).join(" ");
43
- execSync(`bash "${script}" ${args}`, { stdio: "inherit" });
58
+ const args = buildInitArgs(name, dir);
59
+ const result = spawnSync("bash", [script, ...args], { stdio: "inherit" });
60
+ process.exit(result.status ?? 1);
44
61
  });
45
62
 
46
- program.parse();
63
+ if (isDirectRun()) {
64
+ program.parse();
65
+ }
@@ -0,0 +1,31 @@
1
+ ---
2
+ name: doctor
3
+ description: Harness self-diagnosis — check environment, project, resources, and git state. Auto-fix common issues.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--fix] [--verbose]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Is Turing healthy? Check everything and get a score.
10
+
11
+ ## Steps
12
+ 1. `source .venv/bin/activate`
13
+ 2. `python scripts/harness_doctor.py $ARGUMENTS`
14
+ 3. **Saved:** `experiments/doctor/`
15
+
16
+ ## Checks
17
+ - **Environment:** Python version, venv status
18
+ - **Dependencies:** all required packages importable
19
+ - **Config:** config.yaml valid with required fields
20
+ - **Experiment log:** JSONL integrity, corrupt line detection
21
+ - **Scripts:** train.py, prepare.py, evaluate.py exist and parse
22
+ - **Disk space:** warn if <1GB free
23
+ - **Git state:** uncommitted changes to critical files
24
+ - **Claude hooks:** `.claude/settings.local.json` hook group schema; `--fix` migrates legacy bare command hooks
25
+
26
+ ## Examples
27
+ ```
28
+ /turing:doctor
29
+ /turing:doctor --fix
30
+ /turing:doctor --verbose --json
31
+ ```
package/commands/init.md CHANGED
@@ -37,18 +37,36 @@ python3 <templates_dir>/scripts/scaffold.py \
37
37
  The scaffold script handles everything in a single atomic operation:
38
38
  - Copies all template files with placeholder substitution
39
39
  - Creates data/, experiments/, models/ directories
40
- - Sets up agent memory at `.claude/agent-memory/ml-researcher/MEMORY.md`
40
+ - Sets up agent memory at `.claude/agent-memory/ml-researcher-{project_name}/MEMORY.md`
41
41
  - Configures Claude Code hooks in `.claude/settings.local.json`
42
42
  - Creates Python virtual environment and installs requirements
43
43
  - Verifies all placeholders were replaced (fails loudly if any remain)
44
44
 
45
45
  ## Locating Templates
46
46
 
47
- Find the templates directory using Glob:
47
+ Use the installed command-pack templates directory first:
48
+ ```
49
+ .claude/commands/turing/templates/
50
+ ~/.claude/commands/turing/templates/
51
+ ```
52
+ Then fall back to plugin or npm locations:
48
53
  ```
49
54
  ~/.claude/plugins/*/templates/
55
+ node_modules/claude-turing/templates/
56
+ ```
57
+
58
+ Example command:
59
+
60
+ ```bash
61
+ python3 ~/.claude/commands/turing/templates/scripts/scaffold.py \
62
+ --project-name "<project_name>" \
63
+ --target-metric "<target_metric>" \
64
+ --metric-direction "<metric_direction>" \
65
+ --task-description "<task_description>" \
66
+ --ml-dir "<ml_dir>" \
67
+ --data-source "<data_source>" \
68
+ --templates-dir ~/.claude/commands/turing/templates
50
69
  ```
51
- Or check if installed via npm by looking for `node_modules/claude-turing/templates/`.
52
70
 
53
71
  ## After Scaffolding
54
72
 
@@ -0,0 +1,27 @@
1
+ ---
2
+ name: plan
3
+ description: Research planning assistant — design a strategic experiment campaign with budget-aware ROI allocation.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--budget 20] [--goal \"maximize F1 for production\"]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Design the next N experiments strategically, not randomly. Allocates budget by expected ROI.
10
+
11
+ ## Steps
12
+ 1. `source .venv/bin/activate`
13
+ 2. `python scripts/research_planner.py $ARGUMENTS`
14
+ 3. **Saved:** `experiments/plans/`
15
+
16
+ ## How it works
17
+ - Analyzes experiment history to compute per-family ROI
18
+ - Adjusts strategy priorities based on project state and goal
19
+ - Allocates budget across: feature engineering, model search, ensemble, calibration, verification
20
+ - Generates phased plan with specific experiment descriptions
21
+
22
+ ## Examples
23
+ ```
24
+ /turing:plan --budget 20
25
+ /turing:plan --budget 10 --goal "maximize F1 for production deployment"
26
+ /turing:plan --budget 30 --json
27
+ ```
@@ -0,0 +1,28 @@
1
+ ---
2
+ name: postmortem
3
+ description: Failure postmortem — diagnose why experiments stopped improving and get actionable next steps.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--window 10] [--auto-trigger 5]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ When experiments stop improving, find out why. Diagnoses search space exhaustion, config errors, data issues, metric ceilings, and noise floors.
10
+
11
+ ## Steps
12
+ 1. `source .venv/bin/activate`
13
+ 2. `python scripts/failure_postmortem.py $ARGUMENTS`
14
+ 3. **Saved:** `experiments/postmortems/`
15
+
16
+ ## Diagnosis categories
17
+ - **Search space exhaustion:** micro-tuning params that don't matter
18
+ - **Systematic config error:** all experiments share a bad common config
19
+ - **Data issue:** all model types fail similarly
20
+ - **Metric ceiling:** near theoretical maximum
21
+ - **Noise floor:** improvements within seed variance
22
+
23
+ ## Examples
24
+ ```
25
+ /turing:postmortem
26
+ /turing:postmortem --window 15
27
+ /turing:postmortem --json
28
+ ```
@@ -80,6 +80,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
80
80
  | "simulate", "predict outcome", "pre-filter", "which configs will work", "forecast" | `/turing:simulate` | Predict |
81
81
  | "update", "incremental", "new data", "add data", "fine-tune existing", "partial update" | `/turing:update` | Update |
82
82
  | "registry", "promote", "demote", "staging", "production", "which model is deployed", "model lifecycle" | `/turing:registry` | Govern |
83
+ | "postmortem", "why failing", "failure streak", "why no improvement", "what went wrong" | `/turing:postmortem` | Diagnose |
84
+ | "doctor", "health check", "is it broken", "diagnose harness", "self-check" | `/turing:doctor` | Check |
85
+ | "plan", "research plan", "campaign", "what next", "allocate budget", "strategic plan" | `/turing:plan` | Plan |
83
86
 
84
87
  ## Sub-commands
85
88
 
@@ -156,6 +159,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
156
159
  | `/turing:simulate [--configs] [--top-k]` | Experiment outcome prediction: pre-filter configs using surrogate model, save budget | (inline) |
157
160
  | `/turing:update <exp-id> --new-data <path>` | Incremental model update: add new data without full retraining, forgetting detection | (inline) |
158
161
  | `/turing:registry [list\|register\|promote\|demote\|history]` | Model registry: stage lifecycle (candidate → staging → production) with promotion gates | (inline) |
162
+ | `/turing:postmortem [--window N]` | Failure postmortem: diagnose why experiments stopped improving (exhaustion, config error, data issue, ceiling, noise) | (inline) |
163
+ | `/turing:doctor [--fix]` | Harness self-diagnosis: environment, dependencies, config, log integrity, scripts, disk, git state, Claude hooks | (inline) |
164
+ | `/turing:plan [--budget N] [--goal]` | Research planning assistant: strategic campaign design with budget-aware ROI allocation | (inline) |
159
165
 
160
166
  ## Proactive Detection
161
167
 
@@ -35,6 +35,8 @@ placeholders:
35
35
  ML_DIR: "Directory for ML files relative to project root"
36
36
  DATA_SOURCE: "Path to training data file"
37
37
  METRIC_DIRECTION: "lower or higher — which direction is better"
38
+ LOWER_IS_BETTER: "Boolean derived from METRIC_DIRECTION for generated config.yaml"
39
+ MEMORY_DIR_NAME: "Filesystem-safe project name used for scoped agent memory"
38
40
 
39
41
  # Agent configuration
40
42
  agents:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-turing",
3
- "version": "4.3.0",
3
+ "version": "4.5.0",
4
4
  "type": "module",
5
5
  "description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
6
6
  "bin": {
@@ -34,15 +34,15 @@
34
34
  "ai-agents"
35
35
  ],
36
36
  "author": {
37
- "name": "pragnition"
37
+ "name": "Prannaya Gupta"
38
38
  },
39
- "homepage": "https://github.com/pragnition/turing",
39
+ "homepage": "https://github.com/ThePyProgrammer/turing",
40
40
  "repository": {
41
41
  "type": "git",
42
- "url": "git+https://github.com/pragnition/turing.git"
42
+ "url": "git+https://github.com/ThePyProgrammer/turing.git"
43
43
  },
44
44
  "bugs": {
45
- "url": "https://github.com/pragnition/turing/issues"
45
+ "url": "https://github.com/ThePyProgrammer/turing/issues"
46
46
  },
47
47
  "license": "MIT",
48
48
  "engines": {
package/src/install.js CHANGED
@@ -9,7 +9,7 @@
9
9
  * node src/install.js [--global] [--project]
10
10
  */
11
11
 
12
- import { readdir, copyFile, mkdir } from "fs/promises";
12
+ import { readdir, copyFile, mkdir, cp } from "fs/promises";
13
13
  import { join, dirname } from "path";
14
14
  import { fileURLToPath } from "url";
15
15
  import { getTargetPaths } from "./paths.js";
@@ -38,6 +38,7 @@ const SUB_COMMANDS = [
38
38
  "onboard", "share", "review",
39
39
  "whatif", "counterfactual", "simulate",
40
40
  "update", "registry",
41
+ "postmortem", "doctor", "plan",
41
42
  ];
42
43
 
43
44
  export async function install(opts = {}) {
@@ -49,7 +50,7 @@ export async function install(opts = {}) {
49
50
  console.log("");
50
51
 
51
52
  // Create directories for each sub-command + agents + config
52
- for (const subDir of ["", "agents", "config", "rules", ...SUB_COMMANDS]) {
53
+ for (const subDir of ["", "agents", "config", "rules", "templates", ...SUB_COMMANDS]) {
53
54
  await mkdir(join(paths.commands, subDir), { recursive: true });
54
55
  }
55
56
 
@@ -102,6 +103,21 @@ export async function install(opts = {}) {
102
103
  }
103
104
  console.log(` ${CONFIG_FILES.length} config files installed`);
104
105
 
106
+ // Copy templates used by /turing:init
107
+ await cp(
108
+ join(PLUGIN_ROOT, "templates"),
109
+ join(paths.commands, "templates"),
110
+ {
111
+ recursive: true,
112
+ force: true,
113
+ filter: (src) =>
114
+ !src.includes("__pycache__") &&
115
+ !src.includes(".pytest_cache") &&
116
+ !src.endsWith(".pyc"),
117
+ },
118
+ );
119
+ console.log(" Templates installed");
120
+
105
121
  // Update CLAUDE.md
106
122
  await updateClaudeMd(paths.claudeMd);
107
123
  console.log(" CLAUDE.md updated");
package/src/verify.js CHANGED
@@ -8,10 +8,14 @@
8
8
  * node src/verify.js [--scope global|project]
9
9
  */
10
10
 
11
- import { access } from "fs/promises";
12
- import { join } from "path";
11
+ import { access, readdir } from "fs/promises";
12
+ import { dirname, join } from "path";
13
+ import { fileURLToPath } from "url";
13
14
  import { getTargetPaths } from "./paths.js";
14
15
 
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ const PLUGIN_ROOT = join(__dirname, "..");
18
+
15
19
  const EXPECTED_COMMANDS = [
16
20
  "SKILL.md",
17
21
  "init/SKILL.md",
@@ -85,6 +89,9 @@ const EXPECTED_COMMANDS = [
85
89
  "simulate/SKILL.md",
86
90
  "update/SKILL.md",
87
91
  "registry/SKILL.md",
92
+ "postmortem/SKILL.md",
93
+ "doctor/SKILL.md",
94
+ "plan/SKILL.md",
88
95
  ];
89
96
 
90
97
  const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];
@@ -97,6 +104,27 @@ const EXPECTED_CONFIG = [
97
104
  "watch_alerts.yaml",
98
105
  ];
99
106
 
107
+ async function templateFiles(root, relativeDir = "templates") {
108
+ const dir = join(root, relativeDir);
109
+ const entries = await readdir(dir, { withFileTypes: true });
110
+ const files = [];
111
+
112
+ for (const entry of entries) {
113
+ if (entry.name === "__pycache__" || entry.name === ".pytest_cache") {
114
+ continue;
115
+ }
116
+
117
+ const relativePath = `${relativeDir}/${entry.name}`;
118
+ if (entry.isDirectory()) {
119
+ files.push(...await templateFiles(root, relativePath));
120
+ } else if (!entry.name.endsWith(".pyc")) {
121
+ files.push(relativePath);
122
+ }
123
+ }
124
+
125
+ return files;
126
+ }
127
+
100
128
  async function fileExists(path) {
101
129
  try {
102
130
  await access(path);
@@ -108,7 +136,9 @@ async function fileExists(path) {
108
136
 
109
137
  export async function verify(opts = {}) {
110
138
  const scopes = opts.scope ? [opts.scope] : ["global", "project"];
139
+ const expectedTemplates = await templateFiles(PLUGIN_ROOT);
111
140
  let found = false;
141
+ let totalMissing = 0;
112
142
 
113
143
  for (const scope of scopes) {
114
144
  const paths = getTargetPaths(scope);
@@ -141,10 +171,18 @@ export async function verify(opts = {}) {
141
171
  if (!ok) missing++;
142
172
  }
143
173
 
174
+ console.log("\nTemplates:");
175
+ for (const template of expectedTemplates) {
176
+ const ok = await fileExists(join(paths.commands, template));
177
+ console.log(` ${ok ? "✓" : "✗"} commands/${template}`);
178
+ if (!ok) missing++;
179
+ }
180
+
144
181
  // Check CLAUDE.md
145
182
  const claudeOk = await fileExists(paths.claudeMd);
146
183
  console.log(`\n ${claudeOk ? "✓" : "✗"} CLAUDE.md`);
147
184
 
185
+ totalMissing += missing;
148
186
  console.log(
149
187
  `\n ${missing === 0 ? "✓ Installation complete" : `✗ ${missing} files missing — run claude-turing install`}\n`,
150
188
  );
@@ -152,6 +190,11 @@ export async function verify(opts = {}) {
152
190
 
153
191
  if (!found) {
154
192
  console.log("\n✗ turing not found. Run: claude-turing install\n");
193
+ totalMissing++;
194
+ }
195
+
196
+ if (totalMissing > 0) {
197
+ process.exitCode = 1;
155
198
  }
156
199
  }
157
200
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  ## Overview
6
6
 
7
- This pipeline uses the [Turing](https://github.com/pragnition/turing) autoresearch pattern — an AI agent iteratively trains, evaluates, and improves models by modifying `train.py` while the evaluation infrastructure (`prepare.py`, `evaluate.py`) remains immutable.
7
+ This pipeline uses the [Turing](https://github.com/ThePyProgrammer/turing) autoresearch pattern — an AI agent iteratively trains, evaluates, and improves models by modifying `train.py` while the evaluation infrastructure (`prepare.py`, `evaluate.py`) remains immutable.
8
8
 
9
9
  **Primary metric:** {{TARGET_METRIC}} ({{METRIC_DIRECTION}} is better)
10
10
 
@@ -17,7 +17,7 @@ evaluation:
17
17
  metrics: ["{{TARGET_METRIC}}", "f1_weighted", "accuracy"]
18
18
  # Set to true for metrics where lower is better (mae, mse, rmse, loss)
19
19
  # Set to false for metrics where higher is better (accuracy, f1, auc)
20
- lower_is_better: false # {{METRIC_DIRECTION}} -- change to true if lower is better
20
+ lower_is_better: {{LOWER_IS_BETTER}}
21
21
 
22
22
  # Multi-seed configuration (Phase 10.1: /turing:seed)
23
23
  # Seeds used for seed studies — diverse values for good coverage
@@ -43,7 +43,7 @@ git checkout -b exp/NNN-description
43
43
 
44
44
  ## Memory
45
45
 
46
- Read `.claude/agent-memory/ml-researcher/MEMORY.md` at the start of each session.
46
+ Read `.claude/agent-memory/ml-researcher-{{MEMORY_DIR_NAME}}/MEMORY.md` at the start of each session.
47
47
  Update it after each experiment with:
48
48
  - Best result (if improved)
49
49
  - What was tried and why