claude-turing 4.7.0 → 4.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +1 -1
  3. package/agents/ml-evaluator.md +4 -4
  4. package/agents/ml-researcher.md +2 -2
  5. package/bin/turing-init.sh +2 -2
  6. package/commands/ablate.md +3 -4
  7. package/commands/annotate.md +2 -3
  8. package/commands/archive.md +2 -3
  9. package/commands/audit.md +3 -4
  10. package/commands/baseline.md +3 -4
  11. package/commands/brief.md +5 -6
  12. package/commands/budget.md +3 -4
  13. package/commands/calibrate.md +3 -4
  14. package/commands/card.md +3 -4
  15. package/commands/changelog.md +2 -3
  16. package/commands/checkpoint.md +3 -4
  17. package/commands/cite.md +2 -3
  18. package/commands/compare.md +1 -2
  19. package/commands/counterfactual.md +2 -3
  20. package/commands/curriculum.md +3 -4
  21. package/commands/design.md +3 -4
  22. package/commands/diagnose.md +4 -5
  23. package/commands/diff.md +3 -4
  24. package/commands/distill.md +3 -4
  25. package/commands/doctor.md +2 -3
  26. package/commands/ensemble.md +3 -4
  27. package/commands/explore.md +4 -5
  28. package/commands/export.md +3 -4
  29. package/commands/feature.md +3 -4
  30. package/commands/flashback.md +2 -3
  31. package/commands/fork.md +3 -4
  32. package/commands/frontier.md +3 -4
  33. package/commands/init.md +5 -6
  34. package/commands/leak.md +3 -4
  35. package/commands/lit.md +3 -4
  36. package/commands/logbook.md +5 -6
  37. package/commands/merge.md +2 -3
  38. package/commands/mode.md +1 -2
  39. package/commands/onboard.md +2 -3
  40. package/commands/paper.md +3 -4
  41. package/commands/plan.md +2 -3
  42. package/commands/poster.md +3 -4
  43. package/commands/postmortem.md +2 -3
  44. package/commands/preflight.md +5 -6
  45. package/commands/present.md +2 -3
  46. package/commands/profile.md +3 -4
  47. package/commands/prune.md +2 -3
  48. package/commands/quantize.md +2 -3
  49. package/commands/queue.md +3 -4
  50. package/commands/registry.md +2 -3
  51. package/commands/regress.md +3 -4
  52. package/commands/replay.md +2 -3
  53. package/commands/report.md +3 -4
  54. package/commands/reproduce.md +3 -4
  55. package/commands/retry.md +3 -4
  56. package/commands/review.md +2 -3
  57. package/commands/rules/loop-protocol.md +11 -11
  58. package/commands/sanity.md +3 -4
  59. package/commands/scale.md +4 -5
  60. package/commands/search.md +2 -3
  61. package/commands/seed.md +3 -4
  62. package/commands/sensitivity.md +3 -4
  63. package/commands/share.md +2 -3
  64. package/commands/simulate.md +2 -3
  65. package/commands/status.md +1 -2
  66. package/commands/stitch.md +3 -4
  67. package/commands/suggest.md +5 -6
  68. package/commands/surgery.md +2 -3
  69. package/commands/sweep.md +8 -9
  70. package/commands/template.md +2 -3
  71. package/commands/train.md +5 -6
  72. package/commands/transfer.md +3 -4
  73. package/commands/trend.md +2 -3
  74. package/commands/try.md +4 -5
  75. package/commands/turing.md +3 -3
  76. package/commands/update.md +2 -3
  77. package/commands/validate.md +4 -5
  78. package/commands/warm.md +3 -4
  79. package/commands/watch.md +4 -5
  80. package/commands/whatif.md +2 -3
  81. package/commands/xray.md +3 -4
  82. package/config/commands.yaml +75 -75
  83. package/package.json +3 -2
  84. package/skills/turing/SKILL.md +3 -3
  85. package/skills/turing/ablate/SKILL.md +3 -4
  86. package/skills/turing/annotate/SKILL.md +2 -3
  87. package/skills/turing/archive/SKILL.md +2 -3
  88. package/skills/turing/audit/SKILL.md +3 -4
  89. package/skills/turing/baseline/SKILL.md +3 -4
  90. package/skills/turing/brief/SKILL.md +5 -6
  91. package/skills/turing/budget/SKILL.md +3 -4
  92. package/skills/turing/calibrate/SKILL.md +3 -4
  93. package/skills/turing/card/SKILL.md +3 -4
  94. package/skills/turing/changelog/SKILL.md +2 -3
  95. package/skills/turing/checkpoint/SKILL.md +3 -4
  96. package/skills/turing/cite/SKILL.md +2 -3
  97. package/skills/turing/compare/SKILL.md +1 -2
  98. package/skills/turing/counterfactual/SKILL.md +2 -3
  99. package/skills/turing/curriculum/SKILL.md +3 -4
  100. package/skills/turing/design/SKILL.md +3 -4
  101. package/skills/turing/diagnose/SKILL.md +4 -5
  102. package/skills/turing/diff/SKILL.md +3 -4
  103. package/skills/turing/distill/SKILL.md +3 -4
  104. package/skills/turing/doctor/SKILL.md +2 -3
  105. package/skills/turing/ensemble/SKILL.md +3 -4
  106. package/skills/turing/explore/SKILL.md +4 -5
  107. package/skills/turing/export/SKILL.md +3 -4
  108. package/skills/turing/feature/SKILL.md +3 -4
  109. package/skills/turing/flashback/SKILL.md +2 -3
  110. package/skills/turing/fork/SKILL.md +3 -4
  111. package/skills/turing/frontier/SKILL.md +3 -4
  112. package/skills/turing/init/SKILL.md +5 -6
  113. package/skills/turing/leak/SKILL.md +3 -4
  114. package/skills/turing/lit/SKILL.md +3 -4
  115. package/skills/turing/logbook/SKILL.md +5 -6
  116. package/skills/turing/merge/SKILL.md +2 -3
  117. package/skills/turing/mode/SKILL.md +1 -2
  118. package/skills/turing/onboard/SKILL.md +2 -3
  119. package/skills/turing/paper/SKILL.md +3 -4
  120. package/skills/turing/plan/SKILL.md +2 -3
  121. package/skills/turing/poster/SKILL.md +3 -4
  122. package/skills/turing/postmortem/SKILL.md +2 -3
  123. package/skills/turing/preflight/SKILL.md +5 -6
  124. package/skills/turing/present/SKILL.md +2 -3
  125. package/skills/turing/profile/SKILL.md +3 -4
  126. package/skills/turing/prune/SKILL.md +2 -3
  127. package/skills/turing/quantize/SKILL.md +2 -3
  128. package/skills/turing/queue/SKILL.md +3 -4
  129. package/skills/turing/registry/SKILL.md +2 -3
  130. package/skills/turing/regress/SKILL.md +3 -4
  131. package/skills/turing/replay/SKILL.md +2 -3
  132. package/skills/turing/report/SKILL.md +3 -4
  133. package/skills/turing/reproduce/SKILL.md +3 -4
  134. package/skills/turing/retry/SKILL.md +3 -4
  135. package/skills/turing/review/SKILL.md +2 -3
  136. package/skills/turing/rules/loop-protocol.md +11 -11
  137. package/skills/turing/sanity/SKILL.md +3 -4
  138. package/skills/turing/scale/SKILL.md +4 -5
  139. package/skills/turing/search/SKILL.md +2 -3
  140. package/skills/turing/seed/SKILL.md +3 -4
  141. package/skills/turing/sensitivity/SKILL.md +3 -4
  142. package/skills/turing/share/SKILL.md +2 -3
  143. package/skills/turing/simulate/SKILL.md +2 -3
  144. package/skills/turing/status/SKILL.md +1 -2
  145. package/skills/turing/stitch/SKILL.md +3 -4
  146. package/skills/turing/suggest/SKILL.md +5 -6
  147. package/skills/turing/surgery/SKILL.md +2 -3
  148. package/skills/turing/sweep/SKILL.md +8 -9
  149. package/skills/turing/template/SKILL.md +2 -3
  150. package/skills/turing/train/SKILL.md +5 -6
  151. package/skills/turing/transfer/SKILL.md +3 -4
  152. package/skills/turing/trend/SKILL.md +2 -3
  153. package/skills/turing/try/SKILL.md +4 -5
  154. package/skills/turing/update/SKILL.md +2 -3
  155. package/skills/turing/validate/SKILL.md +4 -5
  156. package/skills/turing/warm/SKILL.md +3 -4
  157. package/skills/turing/watch/SKILL.md +4 -5
  158. package/skills/turing/whatif/SKILL.md +2 -3
  159. package/skills/turing/xray/SKILL.md +3 -4
  160. package/src/command-registry.js +12 -0
  161. package/src/install.js +4 -3
  162. package/src/sync-commands-layout.js +149 -0
  163. package/src/sync-skills-layout.js +4 -133
  164. package/templates/README.md +5 -8
  165. package/templates/program.md +18 -18
  166. package/templates/pyproject.toml +10 -0
  167. package/templates/requirements.txt +4 -1
  168. package/templates/scripts/generate_onboarding.py +1 -1
  169. package/templates/scripts/post-train-hook.sh +7 -8
  170. package/templates/scripts/scaffold.py +24 -26
  171. package/templates/scripts/stop-hook.sh +2 -3
  172. package/templates/scripts/turing-run-python.sh +9 -0
@@ -1,7 +1,6 @@
1
1
  ---
2
2
  name: watch
3
3
  description: Live training monitor with early-warning alerts for loss spikes, NaN, overfitting, and metric plateaus.
4
- disable-model-invocation: true
5
4
  argument-hint: "[--alerts] [--interval 10] [--analyze run.log]"
6
5
  allowed-tools: Read, Bash(*), Grep, Glob
7
6
  ---
@@ -10,9 +9,9 @@ Stream metrics during training with early-warning alerts. Catches problems mid-r
10
9
 
11
10
  ## Steps
12
11
 
13
- 1. **Activate environment:**
12
+ 1. **Sync environment:**
14
13
  ```bash
15
- source .venv/bin/activate
14
+ uv sync
16
15
  ```
17
16
 
18
17
  2. **Parse arguments from `$ARGUMENTS`:**
@@ -24,13 +23,13 @@ Stream metrics during training with early-warning alerts. Catches problems mid-r
24
23
 
25
24
  3. **For post-hoc analysis:**
26
25
  ```bash
27
- python scripts/training_monitor.py --analyze run.log
26
+ uv run python scripts/training_monitor.py --analyze run.log
28
27
  ```
29
28
 
30
29
  4. **For live monitoring (inform user):**
31
30
  Live monitoring requires a running training process. Suggest the user run in a separate terminal:
32
31
  ```bash
33
- python scripts/training_monitor.py --log run.log --interval 10
32
+ uv run python scripts/training_monitor.py --log run.log --interval 10
34
33
  ```
35
34
 
36
35
  5. **Alert types:**
@@ -1,7 +1,6 @@
1
1
  ---
2
2
  name: whatif
3
3
  description: What-if analysis — answer hypotheticals from existing experiment data without running new experiments.
4
- disable-model-invocation: true
5
4
  argument-hint: "\"<question>\" [--json]"
6
5
  allowed-tools: Read, Bash(*), Grep, Glob
7
6
  ---
@@ -9,8 +8,8 @@ allowed-tools: Read, Bash(*), Grep, Glob
9
8
  Answer "what if?" questions using existing experiment data. Routes to the right estimator automatically.
10
9
 
11
10
  ## Steps
12
- 1. `source .venv/bin/activate`
13
- 2. `python scripts/whatif_engine.py $ARGUMENTS`
11
+ 1. `uv sync`
12
+ 2. `uv run python scripts/whatif_engine.py $ARGUMENTS`
14
13
  3. **Saved:** `experiments/whatif/`
15
14
 
16
15
  ## Supported question types
@@ -1,7 +1,6 @@
1
1
  ---
2
2
  name: xray
3
3
  description: Internal model diagnostics — gradient flow, dead neurons, activation stats, weight distributions, tree depth analysis.
4
- disable-model-invocation: true
5
4
  argument-hint: "[exp-id] [--layer encoder.layer.2] [--compare exp-a exp-b]"
6
5
  allowed-tools: Read, Bash(*), Grep, Glob
7
6
  ---
@@ -10,9 +9,9 @@ See inside the model. When it underperforms, the fix depends on *why*.
10
9
 
11
10
  ## Steps
12
11
 
13
- 1. **Activate environment:**
12
+ 1. **Sync environment:**
14
13
  ```bash
15
- source .venv/bin/activate
14
+ uv sync
16
15
  ```
17
16
 
18
17
  2. **Parse arguments from `$ARGUMENTS`:**
@@ -23,7 +22,7 @@ See inside the model. When it underperforms, the fix depends on *why*.
23
22
 
24
23
  3. **Run model diagnostics:**
25
24
  ```bash
26
- python scripts/model_xray.py $ARGUMENTS
25
+ uv run python scripts/model_xray.py $ARGUMENTS
27
26
  ```
28
27
 
29
28
  4. **Diagnostics by model type:**
@@ -140,11 +140,13 @@ export async function getCommandNames(registryPath) {
140
140
  return registry.commandNames;
141
141
  }
142
142
 
143
+ // Installed public layout under .claude/commands/turing.
143
144
  export async function getExpectedCommandPaths(registryPath) {
144
145
  const names = await getCommandNames(registryPath);
145
146
  return ['SKILL.md', ...names.map((name) => `${name}/SKILL.md`)];
146
147
  }
147
148
 
149
+ // Editable repository source layout.
148
150
  export async function getExpectedSkillSourcePaths(registryPath) {
149
151
  const names = await getCommandNames(registryPath);
150
152
  return [
@@ -154,6 +156,16 @@ export async function getExpectedSkillSourcePaths(registryPath) {
154
156
  ];
155
157
  }
156
158
 
159
+ // Generated repository compatibility layout.
160
+ export async function getExpectedLegacyCommandCompatPaths(registryPath) {
161
+ const names = await getCommandNames(registryPath);
162
+ return [
163
+ 'commands/turing.md',
164
+ ...names.map((name) => `commands/${name}.md`),
165
+ 'commands/rules/loop-protocol.md',
166
+ ];
167
+ }
168
+
157
169
  export async function getConfigFiles(registryPath) {
158
170
  const registry = await loadCommandRegistry(registryPath);
159
171
  return registry.configFiles;
package/src/install.js CHANGED
@@ -18,6 +18,7 @@ import { getCommandNames, getConfigFiles } from "./command-registry.js";
18
18
 
19
19
  const __dirname = dirname(fileURLToPath(import.meta.url));
20
20
  const PLUGIN_ROOT = join(__dirname, "..");
21
+ const SKILL_SOURCE_ROOT = join(PLUGIN_ROOT, "skills", "turing");
21
22
 
22
23
 
23
24
  export async function install(opts = {}) {
@@ -37,7 +38,7 @@ export async function install(opts = {}) {
37
38
 
38
39
  // Copy root command (router) as SKILL.md
39
40
  await copyFile(
40
- join(PLUGIN_ROOT, "commands", "turing.md"),
41
+ join(SKILL_SOURCE_ROOT, "SKILL.md"),
41
42
  join(paths.commands, "SKILL.md"),
42
43
  );
43
44
  console.log(" Router -> SKILL.md");
@@ -45,7 +46,7 @@ export async function install(opts = {}) {
45
46
  // Copy sub-commands as <name>/SKILL.md
46
47
  for (const cmd of subCommands) {
47
48
  await copyFile(
48
- join(PLUGIN_ROOT, "commands", `${cmd}.md`),
49
+ join(SKILL_SOURCE_ROOT, cmd, "SKILL.md"),
49
50
  join(paths.commands, cmd, "SKILL.md"),
50
51
  );
51
52
  }
@@ -53,7 +54,7 @@ export async function install(opts = {}) {
53
54
 
54
55
  // Copy rules
55
56
  await copyFile(
56
- join(PLUGIN_ROOT, "commands", "rules", "loop-protocol.md"),
57
+ join(SKILL_SOURCE_ROOT, "rules", "loop-protocol.md"),
57
58
  join(paths.commands, "rules", "loop-protocol.md"),
58
59
  );
59
60
  console.log(" Rules installed");
@@ -0,0 +1,149 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Synchronize the legacy commands/ compatibility tree from skills/turing/.
4
+ *
5
+ * Usage:
6
+ * node src/sync-commands-layout.js [--check]
7
+ */
8
+
9
+ import { mkdir, readdir, readFile, rm, writeFile } from "fs/promises";
10
+ import { dirname, join, relative } from "path";
11
+ import { fileURLToPath } from "url";
12
+ import { getCommandNames } from "./command-registry.js";
13
+
14
+ const __dirname = dirname(fileURLToPath(import.meta.url));
15
+ const PLUGIN_ROOT = join(__dirname, "..");
16
+ const SKILLS_DIR = join(PLUGIN_ROOT, "skills", "turing");
17
+ const COMMANDS_DIR = join(PLUGIN_ROOT, "commands");
18
+
19
+ async function readUtf8(path) {
20
+ return readFile(path, "utf8");
21
+ }
22
+
23
+ async function copyTextFile(source, target) {
24
+ await mkdir(dirname(target), { recursive: true });
25
+ await writeFile(target, await readUtf8(source));
26
+ }
27
+
28
+ async function compatibilityEntries() {
29
+ const names = await getCommandNames();
30
+ return [
31
+ {
32
+ source: join(SKILLS_DIR, "SKILL.md"),
33
+ target: join(COMMANDS_DIR, "turing.md"),
34
+ },
35
+ ...names.map((name) => ({
36
+ source: join(SKILLS_DIR, name, "SKILL.md"),
37
+ target: join(COMMANDS_DIR, `${name}.md`),
38
+ })),
39
+ {
40
+ source: join(SKILLS_DIR, "rules", "loop-protocol.md"),
41
+ target: join(COMMANDS_DIR, "rules", "loop-protocol.md"),
42
+ },
43
+ ];
44
+ }
45
+
46
+ async function existingCompatibilityEntries(dir = COMMANDS_DIR) {
47
+ let entries;
48
+ try {
49
+ entries = await readdir(dir, { withFileTypes: true });
50
+ } catch (error) {
51
+ if (error.code === "ENOENT") {
52
+ return [];
53
+ }
54
+ throw error;
55
+ }
56
+
57
+ const paths = [];
58
+ for (const entry of entries) {
59
+ const path = join(dir, entry.name);
60
+ paths.push(path);
61
+ if (entry.isDirectory()) {
62
+ paths.push(...await existingCompatibilityEntries(path));
63
+ }
64
+ }
65
+ return paths;
66
+ }
67
+
68
+ async function findDrift() {
69
+ const entries = await compatibilityEntries();
70
+ const expectedTargets = new Set(entries.map(({ target }) => target));
71
+ const expectedPaths = new Set([COMMANDS_DIR]);
72
+ for (const target of expectedTargets) {
73
+ let current = target;
74
+ while (current.startsWith(COMMANDS_DIR)) {
75
+ expectedPaths.add(current);
76
+ if (current === COMMANDS_DIR) {
77
+ break;
78
+ }
79
+ current = dirname(current);
80
+ }
81
+ }
82
+ const issues = [];
83
+
84
+ for (const { source, target } of entries) {
85
+ let sourceText;
86
+ try {
87
+ sourceText = await readUtf8(source);
88
+ } catch (error) {
89
+ issues.push(`missing source ${relative(PLUGIN_ROOT, source)}: ${error.message}`);
90
+ continue;
91
+ }
92
+
93
+ let targetText;
94
+ try {
95
+ targetText = await readUtf8(target);
96
+ } catch (error) {
97
+ if (error.code === "ENOENT") {
98
+ issues.push(`missing compatibility file ${relative(PLUGIN_ROOT, target)}`);
99
+ } else {
100
+ issues.push(`cannot read compatibility file ${relative(PLUGIN_ROOT, target)}: ${error.message}`);
101
+ }
102
+ continue;
103
+ }
104
+
105
+ if (targetText !== sourceText) {
106
+ issues.push(`diverged compatibility file ${relative(PLUGIN_ROOT, target)}`);
107
+ }
108
+ }
109
+
110
+ for (const path of await existingCompatibilityEntries()) {
111
+ if (!expectedPaths.has(path)) {
112
+ issues.push(`stale compatibility path ${relative(PLUGIN_ROOT, path)}`);
113
+ }
114
+ }
115
+
116
+ return issues;
117
+ }
118
+
119
+ export async function syncCommandsLayout({ check = false } = {}) {
120
+ if (check) {
121
+ const issues = await findDrift();
122
+ if (issues.length > 0) {
123
+ for (const issue of issues) {
124
+ console.error(issue);
125
+ }
126
+ process.exitCode = 1;
127
+ return;
128
+ }
129
+ console.log("commands compatibility tree is in sync");
130
+ return;
131
+ }
132
+
133
+ await rm(COMMANDS_DIR, { recursive: true, force: true });
134
+ for (const { source, target } of await compatibilityEntries()) {
135
+ await copyTextFile(source, target);
136
+ }
137
+ console.log("commands compatibility tree synchronized");
138
+ }
139
+
140
+ const isDirectRun =
141
+ process.argv[1] &&
142
+ fileURLToPath(import.meta.url).endsWith(process.argv[1].replace(/^.*\//, ""));
143
+
144
+ if (isDirectRun) {
145
+ syncCommandsLayout({ check: process.argv.includes("--check") }).catch((error) => {
146
+ console.error(error.message);
147
+ process.exitCode = 1;
148
+ });
149
+ }
@@ -1,148 +1,19 @@
1
1
  #!/usr/bin/env node
2
2
  /**
3
- * Synchronize the modern skills/turing package mirror from commands/.
3
+ * Backward-compatible wrapper for the flipped source layout.
4
4
  *
5
- * Usage:
6
- * node src/sync-skills-layout.js [--check]
5
+ * The editable source is now skills/turing/, and sync generates commands/.
7
6
  */
8
7
 
9
- import { mkdir, readdir, readFile, rm, writeFile } from "fs/promises";
10
- import { dirname, join, relative } from "path";
11
8
  import { fileURLToPath } from "url";
12
- import { getCommandNames } from "./command-registry.js";
13
-
14
- const __dirname = dirname(fileURLToPath(import.meta.url));
15
- const PLUGIN_ROOT = join(__dirname, "..");
16
- const COMMANDS_DIR = join(PLUGIN_ROOT, "commands");
17
- const SKILLS_DIR = join(PLUGIN_ROOT, "skills", "turing");
18
-
19
- async function readUtf8(path) {
20
- return readFile(path, "utf8");
21
- }
22
-
23
- async function copyTextFile(source, target) {
24
- await mkdir(dirname(target), { recursive: true });
25
- await writeFile(target, await readUtf8(source));
26
- }
27
-
28
- async function mirrorEntries() {
29
- const names = await getCommandNames();
30
- return [
31
- {
32
- source: join(COMMANDS_DIR, "turing.md"),
33
- target: join(SKILLS_DIR, "SKILL.md"),
34
- },
35
- ...names.map((name) => ({
36
- source: join(COMMANDS_DIR, `${name}.md`),
37
- target: join(SKILLS_DIR, name, "SKILL.md"),
38
- })),
39
- {
40
- source: join(COMMANDS_DIR, "rules", "loop-protocol.md"),
41
- target: join(SKILLS_DIR, "rules", "loop-protocol.md"),
42
- },
43
- ];
44
- }
45
-
46
- async function existingMirrorEntries(dir = SKILLS_DIR) {
47
- let entries;
48
- try {
49
- entries = await readdir(dir, { withFileTypes: true });
50
- } catch (error) {
51
- if (error.code === "ENOENT") {
52
- return [];
53
- }
54
- throw error;
55
- }
56
-
57
- const paths = [];
58
- for (const entry of entries) {
59
- const path = join(dir, entry.name);
60
- paths.push(path);
61
- if (entry.isDirectory()) {
62
- paths.push(...await existingMirrorEntries(path));
63
- }
64
- }
65
- return paths;
66
- }
67
-
68
- async function findDrift() {
69
- const entries = await mirrorEntries();
70
- const expectedTargets = new Set(entries.map(({ target }) => target));
71
- const expectedPaths = new Set([SKILLS_DIR]);
72
- for (const target of expectedTargets) {
73
- let current = target;
74
- while (current.startsWith(SKILLS_DIR)) {
75
- expectedPaths.add(current);
76
- if (current === SKILLS_DIR) {
77
- break;
78
- }
79
- current = dirname(current);
80
- }
81
- }
82
- const issues = [];
83
-
84
- for (const { source, target } of entries) {
85
- let sourceText;
86
- try {
87
- sourceText = await readUtf8(source);
88
- } catch (error) {
89
- issues.push(`missing source ${relative(PLUGIN_ROOT, source)}: ${error.message}`);
90
- continue;
91
- }
92
-
93
- let targetText;
94
- try {
95
- targetText = await readUtf8(target);
96
- } catch (error) {
97
- if (error.code === "ENOENT") {
98
- issues.push(`missing mirror ${relative(PLUGIN_ROOT, target)}`);
99
- } else {
100
- issues.push(`cannot read mirror ${relative(PLUGIN_ROOT, target)}: ${error.message}`);
101
- }
102
- continue;
103
- }
104
-
105
- if (targetText !== sourceText) {
106
- issues.push(`diverged mirror ${relative(PLUGIN_ROOT, target)}`);
107
- }
108
- }
109
-
110
- for (const path of await existingMirrorEntries()) {
111
- if (!expectedPaths.has(path)) {
112
- issues.push(`stale mirror ${relative(PLUGIN_ROOT, path)}`);
113
- }
114
- }
115
-
116
- return issues;
117
- }
118
-
119
- export async function syncSkillsLayout({ check = false } = {}) {
120
- if (check) {
121
- const issues = await findDrift();
122
- if (issues.length > 0) {
123
- for (const issue of issues) {
124
- console.error(issue);
125
- }
126
- process.exitCode = 1;
127
- return;
128
- }
129
- console.log("skills/turing mirror is in sync");
130
- return;
131
- }
132
-
133
- await rm(SKILLS_DIR, { recursive: true, force: true });
134
- for (const { source, target } of await mirrorEntries()) {
135
- await copyTextFile(source, target);
136
- }
137
- console.log("skills/turing mirror synchronized");
138
- }
9
+ import { syncCommandsLayout } from "./sync-commands-layout.js";
139
10
 
140
11
  const isDirectRun =
141
12
  process.argv[1] &&
142
13
  fileURLToPath(import.meta.url).endsWith(process.argv[1].replace(/^.*\//, ""));
143
14
 
144
15
  if (isDirectRun) {
145
- syncSkillsLayout({ check: process.argv.includes("--check") }).catch((error) => {
16
+ syncCommandsLayout({ check: process.argv.includes("--check") }).catch((error) => {
146
17
  console.error(error.message);
147
18
  process.exitCode = 1;
148
19
  });
@@ -21,23 +21,21 @@ This separation is the invariant that makes experiment comparisons valid.
21
21
 
22
22
  ```bash
23
23
  # 1. Set up the environment
24
- python -m venv .venv
25
- source .venv/bin/activate
26
- pip install -r requirements.txt
24
+ uv sync
27
25
 
28
26
  # 2. Add your training data to {{DATA_SOURCE}}
29
27
 
30
28
  # 3. Create train/val/test splits
31
- python prepare.py
29
+ uv run python prepare.py
32
30
 
33
31
  # 4. Run training
34
- python train.py > run.log 2>&1
32
+ uv run python train.py > run.log 2>&1
35
33
 
36
34
  # 5. Check results
37
35
  grep -A 10 "^---" run.log
38
36
 
39
37
  # 6. View experiment history
40
- python scripts/show_metrics.py
38
+ uv run python scripts/show_metrics.py
41
39
  ```
42
40
 
43
41
  ## Using the Autoresearch Agent
@@ -88,6 +86,5 @@ For hands-off mode: `/loop 5m /turing:train`
88
86
  ## Running Tests
89
87
 
90
88
  ```bash
91
- source .venv/bin/activate
92
- python -m pytest tests/ -v
89
+ uv run pytest tests/ -v
93
90
  ```
@@ -54,11 +54,11 @@ Update it after each experiment with:
54
54
 
55
55
  For systematic hyperparameter search:
56
56
  1. Edit `sweep_config.yaml` with parameter ranges
57
- 2. Generate queue: `python scripts/sweep.py`
58
- 3. Check status: `python scripts/sweep.py --status`
59
- 4. Get next: `python scripts/sweep.py --next`
57
+ 2. Generate queue: `uv run python scripts/sweep.py`
58
+ 3. Check status: `uv run python scripts/sweep.py --status`
59
+ 4. Get next: `uv run python scripts/sweep.py --next`
60
60
  5. Apply overrides, create branch, run training
61
- 6. Mark done: `python scripts/sweep.py --mark <name> complete|failed`
61
+ 6. Mark done: `uv run python scripts/sweep.py --mark <name> complete|failed`
62
62
 
63
63
  ## THE LOOP
64
64
 
@@ -66,8 +66,8 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
66
66
 
67
67
  1. **OBSERVE** — Read recent results, check hypothesis queue, research plan, and review failed diffs:
68
68
  ```bash
69
- python scripts/show_metrics.py --last 5
70
- python scripts/manage_hypotheses.py next 2>/dev/null || echo "No queued hypotheses"
69
+ uv run python scripts/show_metrics.py --last 5
70
+ uv run python scripts/manage_hypotheses.py next 2>/dev/null || echo "No queued hypotheses"
71
71
  cat RESEARCH_PLAN.md 2>/dev/null || true
72
72
  ```
73
73
 
@@ -88,12 +88,12 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
88
88
 
89
89
  **If using a queued hypothesis:**
90
90
  ```bash
91
- python scripts/manage_hypotheses.py mark hyp-NNN in-progress
91
+ uv run python scripts/manage_hypotheses.py mark hyp-NNN in-progress
92
92
  ```
93
93
 
94
94
  **If generating your own hypothesis**, register it with structured detail:
95
95
  ```bash
96
- python scripts/manage_hypotheses.py add "your hypothesis description" \
96
+ uv run python scripts/manage_hypotheses.py add "your hypothesis description" \
97
97
  --priority medium --source agent \
98
98
  --model-type xgboost \
99
99
  --hyperparams '{"max_depth": 8, "n_estimators": 200}' \
@@ -101,7 +101,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
101
101
  --tags "depth,estimators" \
102
102
  --parent exp-NNN \
103
103
  --expected "deeper trees should capture feature interactions"
104
- python scripts/manage_hypotheses.py mark hyp-NNN in-progress
104
+ uv run python scripts/manage_hypotheses.py mark hyp-NNN in-progress
105
105
  ```
106
106
 
107
107
  This creates both an index entry in `hypotheses.yaml` and a detailed file at `hypotheses/hyp-NNN.yaml` with full architecture, hyperparameters, expected outcome, and lineage.
@@ -110,7 +110,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
110
110
 
111
111
  To read a hypothesis's full detail:
112
112
  ```bash
113
- python scripts/manage_hypotheses.py show hyp-NNN
113
+ uv run python scripts/manage_hypotheses.py show hyp-NNN
114
114
  ```
115
115
 
116
116
  3. **PREPARE** — Modify `config.yaml` for hyperparameter changes. Only modify `train.py` for structural code changes.
@@ -122,7 +122,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
122
122
 
123
123
  5. **EXECUTE** training:
124
124
  ```bash
125
- source .venv/bin/activate && python train.py > run.log 2>&1
125
+ uv run python train.py > run.log 2>&1
126
126
  ```
127
127
 
128
128
  6. **MEASURE** — Parse metrics from run.log:
@@ -144,7 +144,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
144
144
 
145
145
  8. **RECORD** — Log the experiment (kept or discarded):
146
146
  ```bash
147
- python scripts/log_experiment.py experiments/log.jsonl exp-NNN kept|discarded \
147
+ uv run python scripts/log_experiment.py experiments/log.jsonl exp-NNN kept|discarded \
148
148
  '{"{{TARGET_METRIC}}": X.XX, ...}' \
149
149
  '{"model_type": "xgboost", "hyperparams": {...}}' \
150
150
  models/model.joblib "Description of hypothesis and outcome"
@@ -152,7 +152,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
152
152
 
153
153
  Update the hypothesis status with result metrics:
154
154
  ```bash
155
- python scripts/manage_hypotheses.py mark hyp-NNN tested \
155
+ uv run python scripts/manage_hypotheses.py mark hyp-NNN tested \
156
156
  --result exp-NNN \
157
157
  --metrics '{"{{TARGET_METRIC}}": X.XX, ...}' \
158
158
  --notes "Brief explanation of what happened and why"
@@ -162,7 +162,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
162
162
 
163
163
  Then synthesize a decision packet and auto-queue follow-ups:
164
164
  ```bash
165
- python scripts/synthesize_decision.py --experiment exp-NNN --auto-queue
165
+ uv run python scripts/synthesize_decision.py --experiment exp-NNN --auto-queue
166
166
  ```
167
167
  This produces a verdict (promote/branch_followup/abandon/fix_and_retry) and automatically queues follow-up hypotheses for `branch_followup` and `fix_and_retry` outcomes.
168
168
 
@@ -172,7 +172,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
172
172
  - Report final best model and recommend next steps
173
173
  - **Before declaring final results**, run a seed study to verify robustness:
174
174
  ```bash
175
- python scripts/seed_runner.py --quick
175
+ uv run python scripts/seed_runner.py --quick
176
176
  ```
177
177
  If CV > 5%, the result is seed-sensitive — report mean ± std, not a single-seed number.
178
178
 
@@ -180,9 +180,9 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
180
180
 
181
181
  ## Execution Rules
182
182
 
183
- - **ALWAYS redirect output:** `python train.py > run.log 2>&1`
183
+ - **ALWAYS redirect output:** `uv run python train.py > run.log 2>&1`
184
184
  - **ALWAYS parse with grep:** `grep -A 10 "^---" run.log | head -10`
185
- - **ALWAYS activate venv:** `source .venv/bin/activate`
185
+ - **ALWAYS run Python through uv:** `uv run python ...`
186
186
  - **NEVER install packages** without human approval
187
187
 
188
188
  ## Strategy Escalation Protocol
@@ -219,5 +219,5 @@ Starting suggestions (ordered by expected impact):
219
219
  ## Comparing Runs
220
220
 
221
221
  ```bash
222
- python scripts/compare_runs.py exp-001 exp-002
222
+ uv run python scripts/compare_runs.py exp-001 exp-002
223
223
  ```
@@ -2,6 +2,16 @@
2
2
  name = "{{PROJECT_NAME}}-ml"
3
3
  version = "0.1.0"
4
4
  requires-python = ">=3.12"
5
+ dependencies = [
6
+ "scikit-learn>=1.6",
7
+ "xgboost>=3.2",
8
+ "lightgbm>=4.6",
9
+ "pandas>=2.2",
10
+ "numpy>=2.0",
11
+ "joblib>=1.4",
12
+ "pyyaml>=6.0",
13
+ "pytest>=8.0",
14
+ ]
5
15
 
6
16
  [tool.pytest.ini_options]
7
17
  testpaths = ["tests"]
@@ -1,3 +1,6 @@
1
+ # Compatibility export only. pyproject.toml is canonical for dependencies.
2
+ # Prefer: uv sync
3
+
1
4
  scikit-learn>=1.6
2
5
  xgboost>=3.2
3
6
  lightgbm>=4.6
@@ -8,5 +11,5 @@ pyyaml>=6.0
8
11
  pytest>=8.0
9
12
 
10
13
  # Optional: tree-search-guided hypothesis exploration
11
- # Install with: pip install "treequest[all]"
14
+ # Install with: uv add "treequest[all]"
12
15
  # treequest>=0.1
@@ -210,7 +210,7 @@ def format_onboarding_report(config, experiments, families, best, decisions,
210
210
  "5. `/turing:try \"your hypothesis\"` — inject ideas",
211
211
  "6. `/turing:train` — run next experiment",
212
212
  ], "engineer": [
213
- "1. `pip install -r requirements.txt`",
213
+ "1. `uv sync`",
214
214
  "2. Review `config.yaml` for data paths",
215
215
  "3. `/turing:status` — where things stand",
216
216
  "4. Check `train.py` for current model",