claude-turing 4.4.0 → 4.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. package/.claude-plugin/plugin.json +4 -4
  2. package/LICENSE +1 -1
  3. package/README.md +78 -555
  4. package/bin/cli.js +23 -4
  5. package/commands/doctor.md +1 -0
  6. package/commands/init.md +21 -3
  7. package/commands/turing.md +1 -1
  8. package/config/defaults.yaml +2 -0
  9. package/package.json +5 -5
  10. package/src/install.js +17 -2
  11. package/src/verify.js +42 -2
  12. package/templates/README.md +1 -1
  13. package/templates/__pycache__/evaluate.cpython-312.pyc +0 -0
  14. package/templates/__pycache__/prepare.cpython-312.pyc +0 -0
  15. package/templates/config.yaml +1 -1
  16. package/templates/features/__pycache__/__init__.cpython-312.pyc +0 -0
  17. package/templates/features/__pycache__/featurizers.cpython-312.pyc +0 -0
  18. package/templates/program.md +1 -1
  19. package/templates/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  20. package/templates/scripts/__pycache__/ablation_study.cpython-312.pyc +0 -0
  21. package/templates/scripts/__pycache__/architecture_surgery.cpython-312.pyc +0 -0
  22. package/templates/scripts/__pycache__/budget_manager.cpython-312.pyc +0 -0
  23. package/templates/scripts/__pycache__/build_ensemble.cpython-312.pyc +0 -0
  24. package/templates/scripts/__pycache__/calibration.cpython-312.pyc +0 -0
  25. package/templates/scripts/__pycache__/check_convergence.cpython-312.pyc +0 -0
  26. package/templates/scripts/__pycache__/checkpoint_manager.cpython-312.pyc +0 -0
  27. package/templates/scripts/__pycache__/citation_manager.cpython-312.pyc +0 -0
  28. package/templates/scripts/__pycache__/cost_frontier.cpython-312.pyc +0 -0
  29. package/templates/scripts/__pycache__/counterfactual_explanation.cpython-312.pyc +0 -0
  30. package/templates/scripts/__pycache__/critique_hypothesis.cpython-312.pyc +0 -0
  31. package/templates/scripts/__pycache__/curriculum_optimizer.cpython-312.pyc +0 -0
  32. package/templates/scripts/__pycache__/diagnose_errors.cpython-312.pyc +0 -0
  33. package/templates/scripts/__pycache__/draft_paper_sections.cpython-312.pyc +0 -0
  34. package/templates/scripts/__pycache__/equivalence_checker.cpython-312.pyc +0 -0
  35. package/templates/scripts/__pycache__/experiment_annotations.cpython-312.pyc +0 -0
  36. package/templates/scripts/__pycache__/experiment_archive.cpython-312.pyc +0 -0
  37. package/templates/scripts/__pycache__/experiment_diff.cpython-312.pyc +0 -0
  38. package/templates/scripts/__pycache__/experiment_index.cpython-312.pyc +0 -0
  39. package/templates/scripts/__pycache__/experiment_queue.cpython-312.pyc +0 -0
  40. package/templates/scripts/__pycache__/experiment_replay.cpython-312.pyc +0 -0
  41. package/templates/scripts/__pycache__/experiment_search.cpython-312.pyc +0 -0
  42. package/templates/scripts/__pycache__/experiment_simulator.cpython-312.pyc +0 -0
  43. package/templates/scripts/__pycache__/experiment_templates.cpython-312.pyc +0 -0
  44. package/templates/scripts/__pycache__/export_card.cpython-312.pyc +0 -0
  45. package/templates/scripts/__pycache__/export_formats.cpython-312.pyc +0 -0
  46. package/templates/scripts/__pycache__/failure_postmortem.cpython-312.pyc +0 -0
  47. package/templates/scripts/__pycache__/feature_intelligence.cpython-312.pyc +0 -0
  48. package/templates/scripts/__pycache__/fork_experiment.cpython-312.pyc +0 -0
  49. package/templates/scripts/__pycache__/generate_baselines.cpython-312.pyc +0 -0
  50. package/templates/scripts/__pycache__/generate_brief.cpython-312.pyc +0 -0
  51. package/templates/scripts/__pycache__/generate_changelog.cpython-312.pyc +0 -0
  52. package/templates/scripts/__pycache__/generate_figures.cpython-312.pyc +0 -0
  53. package/templates/scripts/__pycache__/generate_logbook.cpython-312.pyc +0 -0
  54. package/templates/scripts/__pycache__/generate_model_card.cpython-312.pyc +0 -0
  55. package/templates/scripts/__pycache__/generate_onboarding.cpython-312.pyc +0 -0
  56. package/templates/scripts/__pycache__/harness_doctor.cpython-312.pyc +0 -0
  57. package/templates/scripts/__pycache__/harness_doctor.cpython-314.pyc +0 -0
  58. package/templates/scripts/__pycache__/incremental_update.cpython-312.pyc +0 -0
  59. package/templates/scripts/__pycache__/knowledge_transfer.cpython-312.pyc +0 -0
  60. package/templates/scripts/__pycache__/latency_benchmark.cpython-312.pyc +0 -0
  61. package/templates/scripts/__pycache__/leakage_detector.cpython-312.pyc +0 -0
  62. package/templates/scripts/__pycache__/literature_search.cpython-312.pyc +0 -0
  63. package/templates/scripts/__pycache__/log_experiment.cpython-312.pyc +0 -0
  64. package/templates/scripts/__pycache__/manage_hypotheses.cpython-312.pyc +0 -0
  65. package/templates/scripts/__pycache__/methodology_audit.cpython-312.pyc +0 -0
  66. package/templates/scripts/__pycache__/model_distiller.cpython-312.pyc +0 -0
  67. package/templates/scripts/__pycache__/model_lifecycle.cpython-312.pyc +0 -0
  68. package/templates/scripts/__pycache__/model_merger.cpython-312.pyc +0 -0
  69. package/templates/scripts/__pycache__/model_pruning.cpython-312.pyc +0 -0
  70. package/templates/scripts/__pycache__/model_quantization.cpython-312.pyc +0 -0
  71. package/templates/scripts/__pycache__/model_xray.cpython-312.pyc +0 -0
  72. package/templates/scripts/__pycache__/novelty_guard.cpython-312.pyc +0 -0
  73. package/templates/scripts/__pycache__/package_experiments.cpython-312.pyc +0 -0
  74. package/templates/scripts/__pycache__/pareto_frontier.cpython-312.pyc +0 -0
  75. package/templates/scripts/__pycache__/parse_metrics.cpython-312.pyc +0 -0
  76. package/templates/scripts/__pycache__/pipeline_manager.cpython-312.pyc +0 -0
  77. package/templates/scripts/__pycache__/profile_training.cpython-312.pyc +0 -0
  78. package/templates/scripts/__pycache__/regression_gate.cpython-312.pyc +0 -0
  79. package/templates/scripts/__pycache__/reproduce_experiment.cpython-312.pyc +0 -0
  80. package/templates/scripts/__pycache__/research_planner.cpython-312.pyc +0 -0
  81. package/templates/scripts/__pycache__/sanity_checks.cpython-312.pyc +0 -0
  82. package/templates/scripts/__pycache__/scaffold.cpython-312.pyc +0 -0
  83. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  84. package/templates/scripts/__pycache__/scaling_estimator.cpython-312.pyc +0 -0
  85. package/templates/scripts/__pycache__/seed_runner.cpython-312.pyc +0 -0
  86. package/templates/scripts/__pycache__/sensitivity_analysis.cpython-312.pyc +0 -0
  87. package/templates/scripts/__pycache__/session_flashback.cpython-312.pyc +0 -0
  88. package/templates/scripts/__pycache__/show_experiment_tree.cpython-312.pyc +0 -0
  89. package/templates/scripts/__pycache__/show_families.cpython-312.pyc +0 -0
  90. package/templates/scripts/__pycache__/simulate_review.cpython-312.pyc +0 -0
  91. package/templates/scripts/__pycache__/smart_retry.cpython-312.pyc +0 -0
  92. package/templates/scripts/__pycache__/statistical_compare.cpython-312.pyc +0 -0
  93. package/templates/scripts/__pycache__/suggest_next.cpython-312.pyc +0 -0
  94. package/templates/scripts/__pycache__/sweep.cpython-312.pyc +0 -0
  95. package/templates/scripts/__pycache__/synthesize_decision.cpython-312.pyc +0 -0
  96. package/templates/scripts/__pycache__/training_monitor.cpython-312.pyc +0 -0
  97. package/templates/scripts/__pycache__/treequest_suggest.cpython-312.pyc +0 -0
  98. package/templates/scripts/__pycache__/trend_analysis.cpython-312.pyc +0 -0
  99. package/templates/scripts/__pycache__/turing_io.cpython-312.pyc +0 -0
  100. package/templates/scripts/__pycache__/update_state.cpython-312.pyc +0 -0
  101. package/templates/scripts/__pycache__/verify_placeholders.cpython-312.pyc +0 -0
  102. package/templates/scripts/__pycache__/warm_start.cpython-312.pyc +0 -0
  103. package/templates/scripts/__pycache__/whatif_engine.cpython-312.pyc +0 -0
  104. package/templates/scripts/harness_doctor.py +145 -1
  105. package/templates/scripts/scaffold.py +50 -28
package/bin/cli.js CHANGED
@@ -1,9 +1,25 @@
1
1
  #!/usr/bin/env node
2
2
  import { createRequire } from "module";
3
+ import { realpathSync } from "fs";
4
+ import { fileURLToPath } from "url";
3
5
  const require = createRequire(import.meta.url);
4
6
  const { Command } = require("commander");
5
7
  const pkg = require("../package.json");
6
8
 
9
+ export function buildInitArgs(name, dir) {
10
+ return [name, dir].filter(Boolean);
11
+ }
12
+
13
+ function isDirectRun() {
14
+ if (!process.argv[1]) return false;
15
+
16
+ try {
17
+ return realpathSync(fileURLToPath(import.meta.url)) === realpathSync(process.argv[1]);
18
+ } catch {
19
+ return false;
20
+ }
21
+ }
22
+
7
23
  const program = new Command();
8
24
 
9
25
  program
@@ -34,13 +50,16 @@ program
34
50
  .command("init [name] [dir]")
35
51
  .description("Scaffold ML project (CLI mode, non-Claude-Code usage)")
36
52
  .action(async (name, dir) => {
37
- const { execSync } = await import("child_process");
53
+ const { spawnSync } = await import("child_process");
38
54
  const { dirname, join } = await import("path");
39
55
  const { fileURLToPath } = await import("url");
40
56
  const __dirname = dirname(fileURLToPath(import.meta.url));
41
57
  const script = join(__dirname, "turing-init.sh");
42
- const args = [name, dir].filter(Boolean).join(" ");
43
- execSync(`bash "${script}" ${args}`, { stdio: "inherit" });
58
+ const args = buildInitArgs(name, dir);
59
+ const result = spawnSync("bash", [script, ...args], { stdio: "inherit" });
60
+ process.exit(result.status ?? 1);
44
61
  });
45
62
 
46
- program.parse();
63
+ if (isDirectRun()) {
64
+ program.parse();
65
+ }
@@ -21,6 +21,7 @@ Is Turing healthy? Check everything and get a score.
21
21
  - **Scripts:** train.py, prepare.py, evaluate.py exist and parse
22
22
  - **Disk space:** warn if <1GB free
23
23
  - **Git state:** uncommitted changes to critical files
24
+ - **Claude hooks:** `.claude/settings.local.json` hook group schema; `--fix` migrates legacy bare command hooks
24
25
 
25
26
  ## Examples
26
27
  ```
package/commands/init.md CHANGED
@@ -37,18 +37,36 @@ python3 <templates_dir>/scripts/scaffold.py \
37
37
  The scaffold script handles everything in a single atomic operation:
38
38
  - Copies all template files with placeholder substitution
39
39
  - Creates data/, experiments/, models/ directories
40
- - Sets up agent memory at `.claude/agent-memory/ml-researcher/MEMORY.md`
40
+ - Sets up agent memory at `.claude/agent-memory/ml-researcher-{project_name}/MEMORY.md`
41
41
  - Configures Claude Code hooks in `.claude/settings.local.json`
42
42
  - Creates Python virtual environment and installs requirements
43
43
  - Verifies all placeholders were replaced (fails loudly if any remain)
44
44
 
45
45
  ## Locating Templates
46
46
 
47
- Find the templates directory using Glob:
47
+ Use the installed command-pack templates directory first:
48
+ ```
49
+ .claude/commands/turing/templates/
50
+ ~/.claude/commands/turing/templates/
51
+ ```
52
+ Then fall back to plugin or npm locations:
48
53
  ```
49
54
  ~/.claude/plugins/*/templates/
55
+ node_modules/claude-turing/templates/
56
+ ```
57
+
58
+ Example command:
59
+
60
+ ```bash
61
+ python3 ~/.claude/commands/turing/templates/scripts/scaffold.py \
62
+ --project-name "<project_name>" \
63
+ --target-metric "<target_metric>" \
64
+ --metric-direction "<metric_direction>" \
65
+ --task-description "<task_description>" \
66
+ --ml-dir "<ml_dir>" \
67
+ --data-source "<data_source>" \
68
+ --templates-dir ~/.claude/commands/turing/templates
50
69
  ```
51
- Or check if installed via npm by looking for `node_modules/claude-turing/templates/`.
52
70
 
53
71
  ## After Scaffolding
54
72
 
@@ -160,7 +160,7 @@ You are the Turing ML research router. Detect the user's intent and route to the
160
160
  | `/turing:update <exp-id> --new-data <path>` | Incremental model update: add new data without full retraining, forgetting detection | (inline) |
161
161
  | `/turing:registry [list\|register\|promote\|demote\|history]` | Model registry: stage lifecycle (candidate → staging → production) with promotion gates | (inline) |
162
162
  | `/turing:postmortem [--window N]` | Failure postmortem: diagnose why experiments stopped improving (exhaustion, config error, data issue, ceiling, noise) | (inline) |
163
- | `/turing:doctor [--fix]` | Harness self-diagnosis: environment, dependencies, config, log integrity, scripts, disk, git state | (inline) |
163
+ | `/turing:doctor [--fix]` | Harness self-diagnosis: environment, dependencies, config, log integrity, scripts, disk, git state, Claude hooks | (inline) |
164
164
  | `/turing:plan [--budget N] [--goal]` | Research planning assistant: strategic campaign design with budget-aware ROI allocation | (inline) |
165
165
 
166
166
  ## Proactive Detection
@@ -35,6 +35,8 @@ placeholders:
35
35
  ML_DIR: "Directory for ML files relative to project root"
36
36
  DATA_SOURCE: "Path to training data file"
37
37
  METRIC_DIRECTION: "lower or higher — which direction is better"
38
+ LOWER_IS_BETTER: "Boolean derived from METRIC_DIRECTION for generated config.yaml"
39
+ MEMORY_DIR_NAME: "Filesystem-safe project name used for scoped agent memory"
38
40
 
39
41
  # Agent configuration
40
42
  agents:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-turing",
3
- "version": "4.4.0",
3
+ "version": "4.5.0",
4
4
  "type": "module",
5
5
  "description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
6
6
  "bin": {
@@ -34,15 +34,15 @@
34
34
  "ai-agents"
35
35
  ],
36
36
  "author": {
37
- "name": "pragnition"
37
+ "name": "Prannaya Gupta"
38
38
  },
39
- "homepage": "https://github.com/pragnition/turing",
39
+ "homepage": "https://github.com/ThePyProgrammer/turing",
40
40
  "repository": {
41
41
  "type": "git",
42
- "url": "git+https://github.com/pragnition/turing.git"
42
+ "url": "git+https://github.com/ThePyProgrammer/turing.git"
43
43
  },
44
44
  "bugs": {
45
- "url": "https://github.com/pragnition/turing/issues"
45
+ "url": "https://github.com/ThePyProgrammer/turing/issues"
46
46
  },
47
47
  "license": "MIT",
48
48
  "engines": {
package/src/install.js CHANGED
@@ -9,7 +9,7 @@
9
9
  * node src/install.js [--global] [--project]
10
10
  */
11
11
 
12
- import { readdir, copyFile, mkdir } from "fs/promises";
12
+ import { readdir, copyFile, mkdir, cp } from "fs/promises";
13
13
  import { join, dirname } from "path";
14
14
  import { fileURLToPath } from "url";
15
15
  import { getTargetPaths } from "./paths.js";
@@ -50,7 +50,7 @@ export async function install(opts = {}) {
50
50
  console.log("");
51
51
 
52
52
  // Create directories for each sub-command + agents + config
53
- for (const subDir of ["", "agents", "config", "rules", ...SUB_COMMANDS]) {
53
+ for (const subDir of ["", "agents", "config", "rules", "templates", ...SUB_COMMANDS]) {
54
54
  await mkdir(join(paths.commands, subDir), { recursive: true });
55
55
  }
56
56
 
@@ -103,6 +103,21 @@ export async function install(opts = {}) {
103
103
  }
104
104
  console.log(` ${CONFIG_FILES.length} config files installed`);
105
105
 
106
+ // Copy templates used by /turing:init
107
+ await cp(
108
+ join(PLUGIN_ROOT, "templates"),
109
+ join(paths.commands, "templates"),
110
+ {
111
+ recursive: true,
112
+ force: true,
113
+ filter: (src) =>
114
+ !src.includes("__pycache__") &&
115
+ !src.includes(".pytest_cache") &&
116
+ !src.endsWith(".pyc"),
117
+ },
118
+ );
119
+ console.log(" Templates installed");
120
+
106
121
  // Update CLAUDE.md
107
122
  await updateClaudeMd(paths.claudeMd);
108
123
  console.log(" CLAUDE.md updated");
package/src/verify.js CHANGED
@@ -8,10 +8,14 @@
8
8
  * node src/verify.js [--scope global|project]
9
9
  */
10
10
 
11
- import { access } from "fs/promises";
12
- import { join } from "path";
11
+ import { access, readdir } from "fs/promises";
12
+ import { dirname, join } from "path";
13
+ import { fileURLToPath } from "url";
13
14
  import { getTargetPaths } from "./paths.js";
14
15
 
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ const PLUGIN_ROOT = join(__dirname, "..");
18
+
15
19
  const EXPECTED_COMMANDS = [
16
20
  "SKILL.md",
17
21
  "init/SKILL.md",
@@ -100,6 +104,27 @@ const EXPECTED_CONFIG = [
100
104
  "watch_alerts.yaml",
101
105
  ];
102
106
 
107
+ async function templateFiles(root, relativeDir = "templates") {
108
+ const dir = join(root, relativeDir);
109
+ const entries = await readdir(dir, { withFileTypes: true });
110
+ const files = [];
111
+
112
+ for (const entry of entries) {
113
+ if (entry.name === "__pycache__" || entry.name === ".pytest_cache") {
114
+ continue;
115
+ }
116
+
117
+ const relativePath = `${relativeDir}/${entry.name}`;
118
+ if (entry.isDirectory()) {
119
+ files.push(...await templateFiles(root, relativePath));
120
+ } else if (!entry.name.endsWith(".pyc")) {
121
+ files.push(relativePath);
122
+ }
123
+ }
124
+
125
+ return files;
126
+ }
127
+
103
128
  async function fileExists(path) {
104
129
  try {
105
130
  await access(path);
@@ -111,7 +136,9 @@ async function fileExists(path) {
111
136
 
112
137
  export async function verify(opts = {}) {
113
138
  const scopes = opts.scope ? [opts.scope] : ["global", "project"];
139
+ const expectedTemplates = await templateFiles(PLUGIN_ROOT);
114
140
  let found = false;
141
+ let totalMissing = 0;
115
142
 
116
143
  for (const scope of scopes) {
117
144
  const paths = getTargetPaths(scope);
@@ -144,10 +171,18 @@ export async function verify(opts = {}) {
144
171
  if (!ok) missing++;
145
172
  }
146
173
 
174
+ console.log("\nTemplates:");
175
+ for (const template of expectedTemplates) {
176
+ const ok = await fileExists(join(paths.commands, template));
177
+ console.log(` ${ok ? "✓" : "✗"} commands/${template}`);
178
+ if (!ok) missing++;
179
+ }
180
+
147
181
  // Check CLAUDE.md
148
182
  const claudeOk = await fileExists(paths.claudeMd);
149
183
  console.log(`\n ${claudeOk ? "✓" : "✗"} CLAUDE.md`);
150
184
 
185
+ totalMissing += missing;
151
186
  console.log(
152
187
  `\n ${missing === 0 ? "✓ Installation complete" : `✗ ${missing} files missing — run claude-turing install`}\n`,
153
188
  );
@@ -155,6 +190,11 @@ export async function verify(opts = {}) {
155
190
 
156
191
  if (!found) {
157
192
  console.log("\n✗ turing not found. Run: claude-turing install\n");
193
+ totalMissing++;
194
+ }
195
+
196
+ if (totalMissing > 0) {
197
+ process.exitCode = 1;
158
198
  }
159
199
  }
160
200
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  ## Overview
6
6
 
7
- This pipeline uses the [Turing](https://github.com/pragnition/turing) autoresearch pattern — an AI agent iteratively trains, evaluates, and improves models by modifying `train.py` while the evaluation infrastructure (`prepare.py`, `evaluate.py`) remains immutable.
7
+ This pipeline uses the [Turing](https://github.com/ThePyProgrammer/turing) autoresearch pattern — an AI agent iteratively trains, evaluates, and improves models by modifying `train.py` while the evaluation infrastructure (`prepare.py`, `evaluate.py`) remains immutable.
8
8
 
9
9
  **Primary metric:** {{TARGET_METRIC}} ({{METRIC_DIRECTION}} is better)
10
10
 
@@ -17,7 +17,7 @@ evaluation:
17
17
  metrics: ["{{TARGET_METRIC}}", "f1_weighted", "accuracy"]
18
18
  # Set to true for metrics where lower is better (mae, mse, rmse, loss)
19
19
  # Set to false for metrics where higher is better (accuracy, f1, auc)
20
- lower_is_better: false # {{METRIC_DIRECTION}} -- change to true if lower is better
20
+ lower_is_better: {{LOWER_IS_BETTER}}
21
21
 
22
22
  # Multi-seed configuration (Phase 10.1: /turing:seed)
23
23
  # Seeds used for seed studies — diverse values for good coverage
@@ -43,7 +43,7 @@ git checkout -b exp/NNN-description
43
43
 
44
44
  ## Memory
45
45
 
46
- Read `.claude/agent-memory/ml-researcher/MEMORY.md` at the start of each session.
46
+ Read `.claude/agent-memory/ml-researcher-{{MEMORY_DIR_NAME}}/MEMORY.md` at the start of each session.
47
47
  Update it after each experiment with:
48
48
  - Best result (if improved)
49
49
  - What was tried and why