claude-turing 4.4.0 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/.claude-plugin/marketplace.json +18 -0
  2. package/.claude-plugin/plugin.json +4 -4
  3. package/LICENSE +1 -1
  4. package/README.md +78 -555
  5. package/bin/cli.js +23 -4
  6. package/commands/doctor.md +1 -0
  7. package/commands/init.md +21 -3
  8. package/commands/turing.md +85 -77
  9. package/config/commands.yaml +928 -0
  10. package/config/defaults.yaml +2 -0
  11. package/package.json +7 -6
  12. package/src/command-registry.js +151 -0
  13. package/src/install.js +24 -35
  14. package/src/verify.js +45 -88
  15. package/templates/README.md +1 -1
  16. package/templates/__pycache__/evaluate.cpython-312.pyc +0 -0
  17. package/templates/__pycache__/prepare.cpython-312.pyc +0 -0
  18. package/templates/config.yaml +1 -1
  19. package/templates/features/__pycache__/__init__.cpython-312.pyc +0 -0
  20. package/templates/features/__pycache__/featurizers.cpython-312.pyc +0 -0
  21. package/templates/program.md +1 -1
  22. package/templates/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  23. package/templates/scripts/__pycache__/ablation_study.cpython-312.pyc +0 -0
  24. package/templates/scripts/__pycache__/architecture_surgery.cpython-312.pyc +0 -0
  25. package/templates/scripts/__pycache__/budget_manager.cpython-312.pyc +0 -0
  26. package/templates/scripts/__pycache__/build_ensemble.cpython-312.pyc +0 -0
  27. package/templates/scripts/__pycache__/calibration.cpython-312.pyc +0 -0
  28. package/templates/scripts/__pycache__/check_convergence.cpython-312.pyc +0 -0
  29. package/templates/scripts/__pycache__/checkpoint_manager.cpython-312.pyc +0 -0
  30. package/templates/scripts/__pycache__/citation_manager.cpython-312.pyc +0 -0
  31. package/templates/scripts/__pycache__/cost_frontier.cpython-312.pyc +0 -0
  32. package/templates/scripts/__pycache__/counterfactual_explanation.cpython-312.pyc +0 -0
  33. package/templates/scripts/__pycache__/critique_hypothesis.cpython-312.pyc +0 -0
  34. package/templates/scripts/__pycache__/curriculum_optimizer.cpython-312.pyc +0 -0
  35. package/templates/scripts/__pycache__/diagnose_errors.cpython-312.pyc +0 -0
  36. package/templates/scripts/__pycache__/draft_paper_sections.cpython-312.pyc +0 -0
  37. package/templates/scripts/__pycache__/equivalence_checker.cpython-312.pyc +0 -0
  38. package/templates/scripts/__pycache__/experiment_annotations.cpython-312.pyc +0 -0
  39. package/templates/scripts/__pycache__/experiment_archive.cpython-312.pyc +0 -0
  40. package/templates/scripts/__pycache__/experiment_diff.cpython-312.pyc +0 -0
  41. package/templates/scripts/__pycache__/experiment_index.cpython-312.pyc +0 -0
  42. package/templates/scripts/__pycache__/experiment_queue.cpython-312.pyc +0 -0
  43. package/templates/scripts/__pycache__/experiment_replay.cpython-312.pyc +0 -0
  44. package/templates/scripts/__pycache__/experiment_search.cpython-312.pyc +0 -0
  45. package/templates/scripts/__pycache__/experiment_simulator.cpython-312.pyc +0 -0
  46. package/templates/scripts/__pycache__/experiment_templates.cpython-312.pyc +0 -0
  47. package/templates/scripts/__pycache__/export_card.cpython-312.pyc +0 -0
  48. package/templates/scripts/__pycache__/export_formats.cpython-312.pyc +0 -0
  49. package/templates/scripts/__pycache__/failure_postmortem.cpython-312.pyc +0 -0
  50. package/templates/scripts/__pycache__/feature_intelligence.cpython-312.pyc +0 -0
  51. package/templates/scripts/__pycache__/fork_experiment.cpython-312.pyc +0 -0
  52. package/templates/scripts/__pycache__/generate_baselines.cpython-312.pyc +0 -0
  53. package/templates/scripts/__pycache__/generate_brief.cpython-312.pyc +0 -0
  54. package/templates/scripts/__pycache__/generate_changelog.cpython-312.pyc +0 -0
  55. package/templates/scripts/__pycache__/generate_figures.cpython-312.pyc +0 -0
  56. package/templates/scripts/__pycache__/generate_logbook.cpython-312.pyc +0 -0
  57. package/templates/scripts/__pycache__/generate_model_card.cpython-312.pyc +0 -0
  58. package/templates/scripts/__pycache__/generate_onboarding.cpython-312.pyc +0 -0
  59. package/templates/scripts/__pycache__/harness_doctor.cpython-312.pyc +0 -0
  60. package/templates/scripts/__pycache__/harness_doctor.cpython-314.pyc +0 -0
  61. package/templates/scripts/__pycache__/incremental_update.cpython-312.pyc +0 -0
  62. package/templates/scripts/__pycache__/knowledge_transfer.cpython-312.pyc +0 -0
  63. package/templates/scripts/__pycache__/latency_benchmark.cpython-312.pyc +0 -0
  64. package/templates/scripts/__pycache__/leakage_detector.cpython-312.pyc +0 -0
  65. package/templates/scripts/__pycache__/literature_search.cpython-312.pyc +0 -0
  66. package/templates/scripts/__pycache__/log_experiment.cpython-312.pyc +0 -0
  67. package/templates/scripts/__pycache__/manage_hypotheses.cpython-312.pyc +0 -0
  68. package/templates/scripts/__pycache__/methodology_audit.cpython-312.pyc +0 -0
  69. package/templates/scripts/__pycache__/model_distiller.cpython-312.pyc +0 -0
  70. package/templates/scripts/__pycache__/model_lifecycle.cpython-312.pyc +0 -0
  71. package/templates/scripts/__pycache__/model_merger.cpython-312.pyc +0 -0
  72. package/templates/scripts/__pycache__/model_pruning.cpython-312.pyc +0 -0
  73. package/templates/scripts/__pycache__/model_quantization.cpython-312.pyc +0 -0
  74. package/templates/scripts/__pycache__/model_xray.cpython-312.pyc +0 -0
  75. package/templates/scripts/__pycache__/novelty_guard.cpython-312.pyc +0 -0
  76. package/templates/scripts/__pycache__/package_experiments.cpython-312.pyc +0 -0
  77. package/templates/scripts/__pycache__/pareto_frontier.cpython-312.pyc +0 -0
  78. package/templates/scripts/__pycache__/parse_metrics.cpython-312.pyc +0 -0
  79. package/templates/scripts/__pycache__/pipeline_manager.cpython-312.pyc +0 -0
  80. package/templates/scripts/__pycache__/profile_training.cpython-312.pyc +0 -0
  81. package/templates/scripts/__pycache__/regression_gate.cpython-312.pyc +0 -0
  82. package/templates/scripts/__pycache__/reproduce_experiment.cpython-312.pyc +0 -0
  83. package/templates/scripts/__pycache__/research_planner.cpython-312.pyc +0 -0
  84. package/templates/scripts/__pycache__/sanity_checks.cpython-312.pyc +0 -0
  85. package/templates/scripts/__pycache__/scaffold.cpython-312.pyc +0 -0
  86. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  87. package/templates/scripts/__pycache__/scaling_estimator.cpython-312.pyc +0 -0
  88. package/templates/scripts/__pycache__/seed_runner.cpython-312.pyc +0 -0
  89. package/templates/scripts/__pycache__/sensitivity_analysis.cpython-312.pyc +0 -0
  90. package/templates/scripts/__pycache__/session_flashback.cpython-312.pyc +0 -0
  91. package/templates/scripts/__pycache__/show_experiment_tree.cpython-312.pyc +0 -0
  92. package/templates/scripts/__pycache__/show_families.cpython-312.pyc +0 -0
  93. package/templates/scripts/__pycache__/simulate_review.cpython-312.pyc +0 -0
  94. package/templates/scripts/__pycache__/smart_retry.cpython-312.pyc +0 -0
  95. package/templates/scripts/__pycache__/statistical_compare.cpython-312.pyc +0 -0
  96. package/templates/scripts/__pycache__/suggest_next.cpython-312.pyc +0 -0
  97. package/templates/scripts/__pycache__/sweep.cpython-312.pyc +0 -0
  98. package/templates/scripts/__pycache__/synthesize_decision.cpython-312.pyc +0 -0
  99. package/templates/scripts/__pycache__/training_monitor.cpython-312.pyc +0 -0
  100. package/templates/scripts/__pycache__/treequest_suggest.cpython-312.pyc +0 -0
  101. package/templates/scripts/__pycache__/trend_analysis.cpython-312.pyc +0 -0
  102. package/templates/scripts/__pycache__/turing_io.cpython-312.pyc +0 -0
  103. package/templates/scripts/__pycache__/update_state.cpython-312.pyc +0 -0
  104. package/templates/scripts/__pycache__/verify_placeholders.cpython-312.pyc +0 -0
  105. package/templates/scripts/__pycache__/warm_start.cpython-312.pyc +0 -0
  106. package/templates/scripts/__pycache__/whatif_engine.cpython-312.pyc +0 -0
  107. package/templates/scripts/harness_doctor.py +145 -1
  108. package/templates/scripts/scaffold.py +50 -28
@@ -35,6 +35,8 @@ placeholders:
35
35
  ML_DIR: "Directory for ML files relative to project root"
36
36
  DATA_SOURCE: "Path to training data file"
37
37
  METRIC_DIRECTION: "lower or higher — which direction is better"
38
+ LOWER_IS_BETTER: "Boolean derived from METRIC_DIRECTION for generated config.yaml"
39
+ MEMORY_DIR_NAME: "Filesystem-safe project name used for scoped agent memory"
38
40
 
39
41
  # Agent configuration
40
42
  agents:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-turing",
3
- "version": "4.4.0",
3
+ "version": "4.6.0",
4
4
  "type": "module",
5
5
  "description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
6
6
  "bin": {
@@ -34,21 +34,22 @@
34
34
  "ai-agents"
35
35
  ],
36
36
  "author": {
37
- "name": "pragnition"
37
+ "name": "Prannaya Gupta"
38
38
  },
39
- "homepage": "https://github.com/pragnition/turing",
39
+ "homepage": "https://github.com/ThePyProgrammer/turing",
40
40
  "repository": {
41
41
  "type": "git",
42
- "url": "git+https://github.com/pragnition/turing.git"
42
+ "url": "git+https://github.com/ThePyProgrammer/turing.git"
43
43
  },
44
44
  "bugs": {
45
- "url": "https://github.com/pragnition/turing/issues"
45
+ "url": "https://github.com/ThePyProgrammer/turing/issues"
46
46
  },
47
47
  "license": "MIT",
48
48
  "engines": {
49
49
  "node": ">=18.0.0"
50
50
  },
51
51
  "dependencies": {
52
- "commander": "^13.0.0"
52
+ "commander": "^13.0.0",
53
+ "yaml": "^2.6.1"
53
54
  }
54
55
  }
@@ -0,0 +1,151 @@
1
+ import { readFile } from 'fs/promises';
2
+ import { dirname, join } from 'path';
3
+ import { fileURLToPath } from 'url';
4
+ import YAML from 'yaml';
5
+
6
+ const __dirname = dirname(fileURLToPath(import.meta.url));
7
+ const PLUGIN_ROOT = dirname(__dirname);
8
+ const REGISTRY_PATH = join(PLUGIN_ROOT, 'config', 'commands.yaml');
9
+
10
+ const COMMAND_NAME_PATTERN = /^[a-z][a-z0-9-]*$/;
11
+ const INVOCATION_MODES = new Set(['slash_only']);
12
+ const MODEL_INVOCATIONS = new Set(['disabled', 'enabled']);
13
+ const SCRIPT_LOCATIONS = new Set(['repo', 'scaffold']);
14
+
15
+ function isRecord(value) {
16
+ return value !== null && typeof value === 'object' && !Array.isArray(value);
17
+ }
18
+
19
+ function requireRecord(value, label) {
20
+ if (!isRecord(value)) {
21
+ throw new Error(`${label} must be a mapping`);
22
+ }
23
+ return value;
24
+ }
25
+
26
+ function requireNonEmptyString(value, label) {
27
+ if (typeof value !== 'string' || value.length === 0) {
28
+ throw new Error(`${label} must be a non-empty string`);
29
+ }
30
+ return value;
31
+ }
32
+
33
+ function requireNonEmptyStringList(value, label) {
34
+ if (!Array.isArray(value) || value.length === 0) {
35
+ throw new Error(`${label} must be a non-empty string list`);
36
+ }
37
+
38
+ for (const [index, item] of value.entries()) {
39
+ requireNonEmptyString(item, `${label}[${index}]`);
40
+ }
41
+
42
+ return value;
43
+ }
44
+
45
+ function requireEnum(value, allowed, label) {
46
+ requireNonEmptyString(value, label);
47
+ if (!allowed.has(value)) {
48
+ throw new Error(`${label} must be one of: ${Array.from(allowed).join(', ')}`);
49
+ }
50
+ return value;
51
+ }
52
+
53
+ function validateEquivalentScript(value, commandName) {
54
+ const label = `commands.${commandName}.equivalent_script`;
55
+ const script = requireRecord(value, label);
56
+ const keys = Object.keys(script).sort();
57
+ const expectedKeys = ['location', 'path'];
58
+ if (keys.length !== expectedKeys.length || keys.some((key, index) => key !== expectedKeys[index])) {
59
+ throw new Error(`${label} must contain exactly: ${expectedKeys.join(', ')}`);
60
+ }
61
+
62
+ requireNonEmptyString(script.path, `${label}.path`);
63
+ requireEnum(
64
+ script.location,
65
+ SCRIPT_LOCATIONS,
66
+ `${label}.location`,
67
+ );
68
+ }
69
+
70
+ function validateCommand(commandName, value) {
71
+ if (!COMMAND_NAME_PATTERN.test(commandName)) {
72
+ throw new Error(`Invalid command name: ${commandName}`);
73
+ }
74
+
75
+ const command = requireRecord(value, `commands.${commandName}`);
76
+ requireNonEmptyString(command.description, `commands.${commandName}.description`);
77
+ requireNonEmptyString(command.lifecycle, `commands.${commandName}.lifecycle`);
78
+ requireEnum(command.invocation_mode, INVOCATION_MODES, `commands.${commandName}.invocation_mode`);
79
+ requireEnum(
80
+ command.model_invocation,
81
+ MODEL_INVOCATIONS,
82
+ `commands.${commandName}.model_invocation`,
83
+ );
84
+
85
+ if (typeof command.mutates_project !== 'boolean') {
86
+ throw new Error(`commands.${commandName}.mutates_project must be a boolean`);
87
+ }
88
+
89
+ requireNonEmptyStringList(command.tools, `commands.${commandName}.tools`);
90
+
91
+ if ('argument_hint' in command) {
92
+ requireNonEmptyString(command.argument_hint, `commands.${commandName}.argument_hint`);
93
+ }
94
+
95
+ if ('equivalent_script' in command) {
96
+ validateEquivalentScript(command.equivalent_script, commandName);
97
+ }
98
+ }
99
+
100
+ function validateRegistry(value) {
101
+ const registry = requireRecord(value, 'Command registry root');
102
+ const configFiles = requireNonEmptyStringList(registry.config_files, 'config_files');
103
+ const commands = requireRecord(registry.commands, 'commands');
104
+
105
+ for (const [commandName, command] of Object.entries(commands)) {
106
+ validateCommand(commandName, command);
107
+ }
108
+
109
+ return {
110
+ commands,
111
+ commandNames: Object.keys(commands).sort(),
112
+ configFiles: [...configFiles].sort(),
113
+ };
114
+ }
115
+
116
+ export async function loadCommandRegistry(registryPath = REGISTRY_PATH) {
117
+ let source;
118
+ try {
119
+ source = await readFile(registryPath, 'utf8');
120
+ } catch (error) {
121
+ throw new Error(`Failed to read command registry at ${registryPath}: ${error.message}`);
122
+ }
123
+
124
+ let parsed;
125
+ try {
126
+ parsed = YAML.parse(source);
127
+ } catch (error) {
128
+ throw new Error(`Failed to parse command registry at ${registryPath}: ${error.message}`);
129
+ }
130
+
131
+ try {
132
+ return validateRegistry(parsed);
133
+ } catch (error) {
134
+ throw new Error(`Invalid command registry at ${registryPath}: ${error.message}`);
135
+ }
136
+ }
137
+
138
+ export async function getCommandNames(registryPath) {
139
+ const registry = await loadCommandRegistry(registryPath);
140
+ return registry.commandNames;
141
+ }
142
+
143
+ export async function getExpectedCommandPaths(registryPath) {
144
+ const names = await getCommandNames(registryPath);
145
+ return ['SKILL.md', ...names.map((name) => `${name}/SKILL.md`)];
146
+ }
147
+
148
+ export async function getConfigFiles(registryPath) {
149
+ const registry = await loadCommandRegistry(registryPath);
150
+ return registry.configFiles;
151
+ }
package/src/install.js CHANGED
@@ -9,48 +9,29 @@
9
9
  * node src/install.js [--global] [--project]
10
10
  */
11
11
 
12
- import { readdir, copyFile, mkdir } from "fs/promises";
12
+ import { readdir, copyFile, mkdir, cp } from "fs/promises";
13
13
  import { join, dirname } from "path";
14
14
  import { fileURLToPath } from "url";
15
15
  import { getTargetPaths } from "./paths.js";
16
16
  import { updateClaudeMd } from "./claude-md.js";
17
+ import { getCommandNames, getConfigFiles } from "./command-registry.js";
17
18
 
18
19
  const __dirname = dirname(fileURLToPath(import.meta.url));
19
20
  const PLUGIN_ROOT = join(__dirname, "..");
20
21
 
21
- // Single source of truth for sub-commands (DRY — used for dirs and file copy)
22
- const SUB_COMMANDS = [
23
- "init", "train", "status", "compare", "sweep", "validate",
24
- "try", "brief", "suggest", "explore", "design", "logbook", "poster",
25
- "report", "mode", "preflight", "card", "seed", "reproduce",
26
- "diagnose", "ablate", "frontier", "profile", "checkpoint", "export",
27
- "lit", "paper", "queue", "retry", "fork",
28
- "diff", "watch", "regress",
29
- "ensemble", "stitch", "warm",
30
- "scale", "budget", "distill",
31
- "transfer", "audit",
32
- "sanity", "baseline", "leak",
33
- "xray", "sensitivity", "calibrate",
34
- "feature", "curriculum",
35
- "prune", "quantize", "merge", "surgery",
36
- "trend", "flashback", "archive", "annotate", "search", "template", "replay",
37
- "cite", "present", "changelog",
38
- "onboard", "share", "review",
39
- "whatif", "counterfactual", "simulate",
40
- "update", "registry",
41
- "postmortem", "doctor", "plan",
42
- ];
43
22
 
44
23
  export async function install(opts = {}) {
45
24
  const scope = opts.global ? "global" : opts.project ? "project" : "global";
46
25
  const paths = getTargetPaths(scope);
26
+ const subCommands = await getCommandNames();
27
+ const configFiles = await getConfigFiles();
47
28
 
48
29
  console.log("Turing ML Research Harness — Installer");
49
30
  console.log(`Target: ${paths.commands} (${scope})`);
50
31
  console.log("");
51
32
 
52
33
  // Create directories for each sub-command + agents + config
53
- for (const subDir of ["", "agents", "config", "rules", ...SUB_COMMANDS]) {
34
+ for (const subDir of ["", "agents", "config", "rules", "templates", ...subCommands]) {
54
35
  await mkdir(join(paths.commands, subDir), { recursive: true });
55
36
  }
56
37
 
@@ -62,13 +43,13 @@ export async function install(opts = {}) {
62
43
  console.log(" Router -> SKILL.md");
63
44
 
64
45
  // Copy sub-commands as <name>/SKILL.md
65
- for (const cmd of SUB_COMMANDS) {
46
+ for (const cmd of subCommands) {
66
47
  await copyFile(
67
48
  join(PLUGIN_ROOT, "commands", `${cmd}.md`),
68
49
  join(paths.commands, cmd, "SKILL.md"),
69
50
  );
70
51
  }
71
- console.log(` ${SUB_COMMANDS.length} commands installed`);
52
+ console.log(` ${subCommands.length} commands installed`);
72
53
 
73
54
  // Copy rules
74
55
  await copyFile(
@@ -88,20 +69,28 @@ export async function install(opts = {}) {
88
69
  console.log(` ${agentFiles.length} agents installed`);
89
70
 
90
71
  // Copy config (static schema files only)
91
- const CONFIG_FILES = [
92
- "defaults.yaml", "lifecycle.toml", "taxonomy.toml",
93
- "experiment_archetypes.yaml", "novelty_aliases.yaml",
94
- "relationships.toml", "state.toml", "task_taxonomy.yaml",
95
- "failure_modes.yaml",
96
- "watch_alerts.yaml",
97
- ];
98
- for (const file of CONFIG_FILES) {
72
+ for (const file of configFiles) {
99
73
  await copyFile(
100
74
  join(PLUGIN_ROOT, "config", file),
101
75
  join(paths.config, file),
102
76
  );
103
77
  }
104
- console.log(` ${CONFIG_FILES.length} config files installed`);
78
+ console.log(` ${configFiles.length} config files installed`);
79
+
80
+ // Copy templates used by /turing:init
81
+ await cp(
82
+ join(PLUGIN_ROOT, "templates"),
83
+ join(paths.commands, "templates"),
84
+ {
85
+ recursive: true,
86
+ force: true,
87
+ filter: (src) =>
88
+ !src.includes("__pycache__") &&
89
+ !src.includes(".pytest_cache") &&
90
+ !src.endsWith(".pyc"),
91
+ },
92
+ );
93
+ console.log(" Templates installed");
105
94
 
106
95
  // Update CLAUDE.md
107
96
  await updateClaudeMd(paths.claudeMd);
package/src/verify.js CHANGED
@@ -8,97 +8,37 @@
8
8
  * node src/verify.js [--scope global|project]
9
9
  */
10
10
 
11
- import { access } from "fs/promises";
12
- import { join } from "path";
11
+ import { access, readdir } from "fs/promises";
12
+ import { dirname, join } from "path";
13
+ import { fileURLToPath } from "url";
14
+ import { getConfigFiles, getExpectedCommandPaths } from "./command-registry.js";
13
15
  import { getTargetPaths } from "./paths.js";
14
16
 
15
- const EXPECTED_COMMANDS = [
16
- "SKILL.md",
17
- "init/SKILL.md",
18
- "train/SKILL.md",
19
- "status/SKILL.md",
20
- "compare/SKILL.md",
21
- "sweep/SKILL.md",
22
- "validate/SKILL.md",
23
- "try/SKILL.md",
24
- "brief/SKILL.md",
25
- "suggest/SKILL.md",
26
- "explore/SKILL.md",
27
- "design/SKILL.md",
28
- "logbook/SKILL.md",
29
- "poster/SKILL.md",
30
- "report/SKILL.md",
31
- "mode/SKILL.md",
32
- "preflight/SKILL.md",
33
- "card/SKILL.md",
34
- "seed/SKILL.md",
35
- "reproduce/SKILL.md",
36
- "diagnose/SKILL.md",
37
- "ablate/SKILL.md",
38
- "frontier/SKILL.md",
39
- "profile/SKILL.md",
40
- "checkpoint/SKILL.md",
41
- "export/SKILL.md",
42
- "lit/SKILL.md",
43
- "paper/SKILL.md",
44
- "queue/SKILL.md",
45
- "retry/SKILL.md",
46
- "fork/SKILL.md",
47
- "diff/SKILL.md",
48
- "watch/SKILL.md",
49
- "regress/SKILL.md",
50
- "ensemble/SKILL.md",
51
- "stitch/SKILL.md",
52
- "warm/SKILL.md",
53
- "scale/SKILL.md",
54
- "budget/SKILL.md",
55
- "distill/SKILL.md",
56
- "transfer/SKILL.md",
57
- "audit/SKILL.md",
58
- "sanity/SKILL.md",
59
- "baseline/SKILL.md",
60
- "leak/SKILL.md",
61
- "xray/SKILL.md",
62
- "sensitivity/SKILL.md",
63
- "calibrate/SKILL.md",
64
- "feature/SKILL.md",
65
- "curriculum/SKILL.md",
66
- "prune/SKILL.md",
67
- "quantize/SKILL.md",
68
- "merge/SKILL.md",
69
- "surgery/SKILL.md",
70
- "trend/SKILL.md",
71
- "flashback/SKILL.md",
72
- "archive/SKILL.md",
73
- "annotate/SKILL.md",
74
- "search/SKILL.md",
75
- "template/SKILL.md",
76
- "replay/SKILL.md",
77
- "cite/SKILL.md",
78
- "present/SKILL.md",
79
- "changelog/SKILL.md",
80
- "onboard/SKILL.md",
81
- "share/SKILL.md",
82
- "review/SKILL.md",
83
- "whatif/SKILL.md",
84
- "counterfactual/SKILL.md",
85
- "simulate/SKILL.md",
86
- "update/SKILL.md",
87
- "registry/SKILL.md",
88
- "postmortem/SKILL.md",
89
- "doctor/SKILL.md",
90
- "plan/SKILL.md",
91
- ];
17
+ const __dirname = dirname(fileURLToPath(import.meta.url));
18
+ const PLUGIN_ROOT = join(__dirname, "..");
92
19
 
93
20
  const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];
94
21
 
95
- const EXPECTED_CONFIG = [
96
- "defaults.yaml", "lifecycle.toml", "taxonomy.toml",
97
- "experiment_archetypes.yaml", "novelty_aliases.yaml",
98
- "relationships.toml", "state.toml", "task_taxonomy.yaml",
99
- "failure_modes.yaml",
100
- "watch_alerts.yaml",
101
- ];
22
+ async function templateFiles(root, relativeDir = "templates") {
23
+ const dir = join(root, relativeDir);
24
+ const entries = await readdir(dir, { withFileTypes: true });
25
+ const files = [];
26
+
27
+ for (const entry of entries) {
28
+ if (entry.name === "__pycache__" || entry.name === ".pytest_cache") {
29
+ continue;
30
+ }
31
+
32
+ const relativePath = `${relativeDir}/${entry.name}`;
33
+ if (entry.isDirectory()) {
34
+ files.push(...await templateFiles(root, relativePath));
35
+ } else if (!entry.name.endsWith(".pyc")) {
36
+ files.push(relativePath);
37
+ }
38
+ }
39
+
40
+ return files;
41
+ }
102
42
 
103
43
  async function fileExists(path) {
104
44
  try {
@@ -110,8 +50,12 @@ async function fileExists(path) {
110
50
  }
111
51
 
112
52
  export async function verify(opts = {}) {
53
+ const expectedCommands = await getExpectedCommandPaths();
54
+ const expectedConfig = await getConfigFiles();
113
55
  const scopes = opts.scope ? [opts.scope] : ["global", "project"];
56
+ const expectedTemplates = await templateFiles(PLUGIN_ROOT);
114
57
  let found = false;
58
+ let totalMissing = 0;
115
59
 
116
60
  for (const scope of scopes) {
117
61
  const paths = getTargetPaths(scope);
@@ -124,7 +68,7 @@ export async function verify(opts = {}) {
124
68
  let missing = 0;
125
69
 
126
70
  console.log("Commands:");
127
- for (const cmd of EXPECTED_COMMANDS) {
71
+ for (const cmd of expectedCommands) {
128
72
  const ok = await fileExists(join(paths.commands, cmd));
129
73
  console.log(` ${ok ? "✓" : "✗"} commands/${cmd}`);
130
74
  if (!ok) missing++;
@@ -138,16 +82,24 @@ export async function verify(opts = {}) {
138
82
  }
139
83
 
140
84
  console.log("\nConfig:");
141
- for (const cfg of EXPECTED_CONFIG) {
85
+ for (const cfg of expectedConfig) {
142
86
  const ok = await fileExists(join(paths.config, cfg));
143
87
  console.log(` ${ok ? "✓" : "✗"} config/${cfg}`);
144
88
  if (!ok) missing++;
145
89
  }
146
90
 
91
+ console.log("\nTemplates:");
92
+ for (const template of expectedTemplates) {
93
+ const ok = await fileExists(join(paths.commands, template));
94
+ console.log(` ${ok ? "✓" : "✗"} commands/${template}`);
95
+ if (!ok) missing++;
96
+ }
97
+
147
98
  // Check CLAUDE.md
148
99
  const claudeOk = await fileExists(paths.claudeMd);
149
100
  console.log(`\n ${claudeOk ? "✓" : "✗"} CLAUDE.md`);
150
101
 
102
+ totalMissing += missing;
151
103
  console.log(
152
104
  `\n ${missing === 0 ? "✓ Installation complete" : `✗ ${missing} files missing — run claude-turing install`}\n`,
153
105
  );
@@ -155,6 +107,11 @@ export async function verify(opts = {}) {
155
107
 
156
108
  if (!found) {
157
109
  console.log("\n✗ turing not found. Run: claude-turing install\n");
110
+ totalMissing++;
111
+ }
112
+
113
+ if (totalMissing > 0) {
114
+ process.exitCode = 1;
158
115
  }
159
116
  }
160
117
 
@@ -4,7 +4,7 @@
4
4
 
5
5
  ## Overview
6
6
 
7
- This pipeline uses the [Turing](https://github.com/pragnition/turing) autoresearch pattern — an AI agent iteratively trains, evaluates, and improves models by modifying `train.py` while the evaluation infrastructure (`prepare.py`, `evaluate.py`) remains immutable.
7
+ This pipeline uses the [Turing](https://github.com/ThePyProgrammer/turing) autoresearch pattern — an AI agent iteratively trains, evaluates, and improves models by modifying `train.py` while the evaluation infrastructure (`prepare.py`, `evaluate.py`) remains immutable.
8
8
 
9
9
  **Primary metric:** {{TARGET_METRIC}} ({{METRIC_DIRECTION}} is better)
10
10
 
@@ -17,7 +17,7 @@ evaluation:
17
17
  metrics: ["{{TARGET_METRIC}}", "f1_weighted", "accuracy"]
18
18
  # Set to true for metrics where lower is better (mae, mse, rmse, loss)
19
19
  # Set to false for metrics where higher is better (accuracy, f1, auc)
20
- lower_is_better: false # {{METRIC_DIRECTION}} -- change to true if lower is better
20
+ lower_is_better: {{LOWER_IS_BETTER}}
21
21
 
22
22
  # Multi-seed configuration (Phase 10.1: /turing:seed)
23
23
  # Seeds used for seed studies — diverse values for good coverage
@@ -43,7 +43,7 @@ git checkout -b exp/NNN-description
43
43
 
44
44
  ## Memory
45
45
 
46
- Read `.claude/agent-memory/ml-researcher/MEMORY.md` at the start of each session.
46
+ Read `.claude/agent-memory/ml-researcher-{{MEMORY_DIR_NAME}}/MEMORY.md` at the start of each session.
47
47
  Update it after each experiment with:
48
48
  - Best result (if improved)
49
49
  - What was tried and why