claude-turing 4.7.0 → 4.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +1 -1
- package/agents/ml-evaluator.md +4 -4
- package/agents/ml-researcher.md +2 -2
- package/bin/turing-init.sh +2 -2
- package/commands/ablate.md +3 -4
- package/commands/annotate.md +2 -3
- package/commands/archive.md +2 -3
- package/commands/audit.md +3 -4
- package/commands/baseline.md +3 -4
- package/commands/brief.md +5 -6
- package/commands/budget.md +3 -4
- package/commands/calibrate.md +3 -4
- package/commands/card.md +3 -4
- package/commands/changelog.md +2 -3
- package/commands/checkpoint.md +3 -4
- package/commands/cite.md +2 -3
- package/commands/compare.md +1 -2
- package/commands/counterfactual.md +2 -3
- package/commands/curriculum.md +3 -4
- package/commands/design.md +3 -4
- package/commands/diagnose.md +4 -5
- package/commands/diff.md +3 -4
- package/commands/distill.md +3 -4
- package/commands/doctor.md +2 -3
- package/commands/ensemble.md +3 -4
- package/commands/explore.md +4 -5
- package/commands/export.md +3 -4
- package/commands/feature.md +3 -4
- package/commands/flashback.md +2 -3
- package/commands/fork.md +3 -4
- package/commands/frontier.md +3 -4
- package/commands/init.md +5 -6
- package/commands/leak.md +3 -4
- package/commands/lit.md +3 -4
- package/commands/logbook.md +5 -6
- package/commands/merge.md +2 -3
- package/commands/mode.md +1 -2
- package/commands/onboard.md +2 -3
- package/commands/paper.md +3 -4
- package/commands/plan.md +2 -3
- package/commands/poster.md +3 -4
- package/commands/postmortem.md +2 -3
- package/commands/preflight.md +5 -6
- package/commands/present.md +2 -3
- package/commands/profile.md +3 -4
- package/commands/prune.md +2 -3
- package/commands/quantize.md +2 -3
- package/commands/queue.md +3 -4
- package/commands/registry.md +2 -3
- package/commands/regress.md +3 -4
- package/commands/replay.md +2 -3
- package/commands/report.md +3 -4
- package/commands/reproduce.md +3 -4
- package/commands/retry.md +3 -4
- package/commands/review.md +2 -3
- package/commands/rules/loop-protocol.md +11 -11
- package/commands/sanity.md +3 -4
- package/commands/scale.md +4 -5
- package/commands/search.md +2 -3
- package/commands/seed.md +3 -4
- package/commands/sensitivity.md +3 -4
- package/commands/share.md +2 -3
- package/commands/simulate.md +2 -3
- package/commands/status.md +1 -2
- package/commands/stitch.md +3 -4
- package/commands/suggest.md +5 -6
- package/commands/surgery.md +2 -3
- package/commands/sweep.md +8 -9
- package/commands/template.md +2 -3
- package/commands/train.md +5 -6
- package/commands/transfer.md +3 -4
- package/commands/trend.md +2 -3
- package/commands/try.md +4 -5
- package/commands/turing.md +3 -3
- package/commands/update.md +2 -3
- package/commands/validate.md +4 -5
- package/commands/warm.md +3 -4
- package/commands/watch.md +4 -5
- package/commands/whatif.md +2 -3
- package/commands/xray.md +3 -4
- package/config/commands.yaml +75 -75
- package/package.json +3 -2
- package/skills/turing/SKILL.md +3 -3
- package/skills/turing/ablate/SKILL.md +3 -4
- package/skills/turing/annotate/SKILL.md +2 -3
- package/skills/turing/archive/SKILL.md +2 -3
- package/skills/turing/audit/SKILL.md +3 -4
- package/skills/turing/baseline/SKILL.md +3 -4
- package/skills/turing/brief/SKILL.md +5 -6
- package/skills/turing/budget/SKILL.md +3 -4
- package/skills/turing/calibrate/SKILL.md +3 -4
- package/skills/turing/card/SKILL.md +3 -4
- package/skills/turing/changelog/SKILL.md +2 -3
- package/skills/turing/checkpoint/SKILL.md +3 -4
- package/skills/turing/cite/SKILL.md +2 -3
- package/skills/turing/compare/SKILL.md +1 -2
- package/skills/turing/counterfactual/SKILL.md +2 -3
- package/skills/turing/curriculum/SKILL.md +3 -4
- package/skills/turing/design/SKILL.md +3 -4
- package/skills/turing/diagnose/SKILL.md +4 -5
- package/skills/turing/diff/SKILL.md +3 -4
- package/skills/turing/distill/SKILL.md +3 -4
- package/skills/turing/doctor/SKILL.md +2 -3
- package/skills/turing/ensemble/SKILL.md +3 -4
- package/skills/turing/explore/SKILL.md +4 -5
- package/skills/turing/export/SKILL.md +3 -4
- package/skills/turing/feature/SKILL.md +3 -4
- package/skills/turing/flashback/SKILL.md +2 -3
- package/skills/turing/fork/SKILL.md +3 -4
- package/skills/turing/frontier/SKILL.md +3 -4
- package/skills/turing/init/SKILL.md +5 -6
- package/skills/turing/leak/SKILL.md +3 -4
- package/skills/turing/lit/SKILL.md +3 -4
- package/skills/turing/logbook/SKILL.md +5 -6
- package/skills/turing/merge/SKILL.md +2 -3
- package/skills/turing/mode/SKILL.md +1 -2
- package/skills/turing/onboard/SKILL.md +2 -3
- package/skills/turing/paper/SKILL.md +3 -4
- package/skills/turing/plan/SKILL.md +2 -3
- package/skills/turing/poster/SKILL.md +3 -4
- package/skills/turing/postmortem/SKILL.md +2 -3
- package/skills/turing/preflight/SKILL.md +5 -6
- package/skills/turing/present/SKILL.md +2 -3
- package/skills/turing/profile/SKILL.md +3 -4
- package/skills/turing/prune/SKILL.md +2 -3
- package/skills/turing/quantize/SKILL.md +2 -3
- package/skills/turing/queue/SKILL.md +3 -4
- package/skills/turing/registry/SKILL.md +2 -3
- package/skills/turing/regress/SKILL.md +3 -4
- package/skills/turing/replay/SKILL.md +2 -3
- package/skills/turing/report/SKILL.md +3 -4
- package/skills/turing/reproduce/SKILL.md +3 -4
- package/skills/turing/retry/SKILL.md +3 -4
- package/skills/turing/review/SKILL.md +2 -3
- package/skills/turing/rules/loop-protocol.md +11 -11
- package/skills/turing/sanity/SKILL.md +3 -4
- package/skills/turing/scale/SKILL.md +4 -5
- package/skills/turing/search/SKILL.md +2 -3
- package/skills/turing/seed/SKILL.md +3 -4
- package/skills/turing/sensitivity/SKILL.md +3 -4
- package/skills/turing/share/SKILL.md +2 -3
- package/skills/turing/simulate/SKILL.md +2 -3
- package/skills/turing/status/SKILL.md +1 -2
- package/skills/turing/stitch/SKILL.md +3 -4
- package/skills/turing/suggest/SKILL.md +5 -6
- package/skills/turing/surgery/SKILL.md +2 -3
- package/skills/turing/sweep/SKILL.md +8 -9
- package/skills/turing/template/SKILL.md +2 -3
- package/skills/turing/train/SKILL.md +5 -6
- package/skills/turing/transfer/SKILL.md +3 -4
- package/skills/turing/trend/SKILL.md +2 -3
- package/skills/turing/try/SKILL.md +4 -5
- package/skills/turing/update/SKILL.md +2 -3
- package/skills/turing/validate/SKILL.md +4 -5
- package/skills/turing/warm/SKILL.md +3 -4
- package/skills/turing/watch/SKILL.md +4 -5
- package/skills/turing/whatif/SKILL.md +2 -3
- package/skills/turing/xray/SKILL.md +3 -4
- package/src/command-registry.js +12 -0
- package/src/install.js +4 -3
- package/src/sync-commands-layout.js +149 -0
- package/src/sync-skills-layout.js +4 -133
- package/templates/README.md +5 -8
- package/templates/program.md +18 -18
- package/templates/pyproject.toml +10 -0
- package/templates/requirements.txt +4 -1
- package/templates/scripts/generate_onboarding.py +1 -1
- package/templates/scripts/post-train-hook.sh +7 -8
- package/templates/scripts/scaffold.py +24 -26
- package/templates/scripts/stop-hook.sh +2 -3
- package/templates/scripts/turing-run-python.sh +9 -0
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: watch
|
|
3
3
|
description: Live training monitor with early-warning alerts for loss spikes, NaN, overfitting, and metric plateaus.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--alerts] [--interval 10] [--analyze run.log]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -10,9 +9,9 @@ Stream metrics during training with early-warning alerts. Catches problems mid-r
|
|
|
10
9
|
|
|
11
10
|
## Steps
|
|
12
11
|
|
|
13
|
-
1. **
|
|
12
|
+
1. **Sync environment:**
|
|
14
13
|
```bash
|
|
15
|
-
|
|
14
|
+
uv sync
|
|
16
15
|
```
|
|
17
16
|
|
|
18
17
|
2. **Parse arguments from `$ARGUMENTS`:**
|
|
@@ -24,13 +23,13 @@ Stream metrics during training with early-warning alerts. Catches problems mid-r
|
|
|
24
23
|
|
|
25
24
|
3. **For post-hoc analysis:**
|
|
26
25
|
```bash
|
|
27
|
-
python scripts/training_monitor.py --analyze run.log
|
|
26
|
+
uv run python scripts/training_monitor.py --analyze run.log
|
|
28
27
|
```
|
|
29
28
|
|
|
30
29
|
4. **For live monitoring (inform user):**
|
|
31
30
|
Live monitoring requires a running training process. Suggest the user run in a separate terminal:
|
|
32
31
|
```bash
|
|
33
|
-
python scripts/training_monitor.py --log run.log --interval 10
|
|
32
|
+
uv run python scripts/training_monitor.py --log run.log --interval 10
|
|
34
33
|
```
|
|
35
34
|
|
|
36
35
|
5. **Alert types:**
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: whatif
|
|
3
3
|
description: What-if analysis — answer hypotheticals from existing experiment data without running new experiments.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "\"<question>\" [--json]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -9,8 +8,8 @@ allowed-tools: Read, Bash(*), Grep, Glob
|
|
|
9
8
|
Answer "what if?" questions using existing experiment data. Routes to the right estimator automatically.
|
|
10
9
|
|
|
11
10
|
## Steps
|
|
12
|
-
1. `
|
|
13
|
-
2. `python scripts/whatif_engine.py $ARGUMENTS`
|
|
11
|
+
1. `uv sync`
|
|
12
|
+
2. `uv run python scripts/whatif_engine.py $ARGUMENTS`
|
|
14
13
|
3. **Saved:** `experiments/whatif/`
|
|
15
14
|
|
|
16
15
|
## Supported question types
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: xray
|
|
3
3
|
description: Internal model diagnostics — gradient flow, dead neurons, activation stats, weight distributions, tree depth analysis.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[exp-id] [--layer encoder.layer.2] [--compare exp-a exp-b]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -10,9 +9,9 @@ See inside the model. When it underperforms, the fix depends on *why*.
|
|
|
10
9
|
|
|
11
10
|
## Steps
|
|
12
11
|
|
|
13
|
-
1. **
|
|
12
|
+
1. **Sync environment:**
|
|
14
13
|
```bash
|
|
15
|
-
|
|
14
|
+
uv sync
|
|
16
15
|
```
|
|
17
16
|
|
|
18
17
|
2. **Parse arguments from `$ARGUMENTS`:**
|
|
@@ -23,7 +22,7 @@ See inside the model. When it underperforms, the fix depends on *why*.
|
|
|
23
22
|
|
|
24
23
|
3. **Run model diagnostics:**
|
|
25
24
|
```bash
|
|
26
|
-
python scripts/model_xray.py $ARGUMENTS
|
|
25
|
+
uv run python scripts/model_xray.py $ARGUMENTS
|
|
27
26
|
```
|
|
28
27
|
|
|
29
28
|
4. **Diagnostics by model type:**
|
package/src/command-registry.js
CHANGED
|
@@ -140,11 +140,13 @@ export async function getCommandNames(registryPath) {
|
|
|
140
140
|
return registry.commandNames;
|
|
141
141
|
}
|
|
142
142
|
|
|
143
|
+
// Installed public layout under .claude/commands/turing.
|
|
143
144
|
export async function getExpectedCommandPaths(registryPath) {
|
|
144
145
|
const names = await getCommandNames(registryPath);
|
|
145
146
|
return ['SKILL.md', ...names.map((name) => `${name}/SKILL.md`)];
|
|
146
147
|
}
|
|
147
148
|
|
|
149
|
+
// Editable repository source layout.
|
|
148
150
|
export async function getExpectedSkillSourcePaths(registryPath) {
|
|
149
151
|
const names = await getCommandNames(registryPath);
|
|
150
152
|
return [
|
|
@@ -154,6 +156,16 @@ export async function getExpectedSkillSourcePaths(registryPath) {
|
|
|
154
156
|
];
|
|
155
157
|
}
|
|
156
158
|
|
|
159
|
+
// Generated repository compatibility layout.
|
|
160
|
+
export async function getExpectedLegacyCommandCompatPaths(registryPath) {
|
|
161
|
+
const names = await getCommandNames(registryPath);
|
|
162
|
+
return [
|
|
163
|
+
'commands/turing.md',
|
|
164
|
+
...names.map((name) => `commands/${name}.md`),
|
|
165
|
+
'commands/rules/loop-protocol.md',
|
|
166
|
+
];
|
|
167
|
+
}
|
|
168
|
+
|
|
157
169
|
export async function getConfigFiles(registryPath) {
|
|
158
170
|
const registry = await loadCommandRegistry(registryPath);
|
|
159
171
|
return registry.configFiles;
|
package/src/install.js
CHANGED
|
@@ -18,6 +18,7 @@ import { getCommandNames, getConfigFiles } from "./command-registry.js";
|
|
|
18
18
|
|
|
19
19
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
20
20
|
const PLUGIN_ROOT = join(__dirname, "..");
|
|
21
|
+
const SKILL_SOURCE_ROOT = join(PLUGIN_ROOT, "skills", "turing");
|
|
21
22
|
|
|
22
23
|
|
|
23
24
|
export async function install(opts = {}) {
|
|
@@ -37,7 +38,7 @@ export async function install(opts = {}) {
|
|
|
37
38
|
|
|
38
39
|
// Copy root command (router) as SKILL.md
|
|
39
40
|
await copyFile(
|
|
40
|
-
join(
|
|
41
|
+
join(SKILL_SOURCE_ROOT, "SKILL.md"),
|
|
41
42
|
join(paths.commands, "SKILL.md"),
|
|
42
43
|
);
|
|
43
44
|
console.log(" Router -> SKILL.md");
|
|
@@ -45,7 +46,7 @@ export async function install(opts = {}) {
|
|
|
45
46
|
// Copy sub-commands as <name>/SKILL.md
|
|
46
47
|
for (const cmd of subCommands) {
|
|
47
48
|
await copyFile(
|
|
48
|
-
join(
|
|
49
|
+
join(SKILL_SOURCE_ROOT, cmd, "SKILL.md"),
|
|
49
50
|
join(paths.commands, cmd, "SKILL.md"),
|
|
50
51
|
);
|
|
51
52
|
}
|
|
@@ -53,7 +54,7 @@ export async function install(opts = {}) {
|
|
|
53
54
|
|
|
54
55
|
// Copy rules
|
|
55
56
|
await copyFile(
|
|
56
|
-
join(
|
|
57
|
+
join(SKILL_SOURCE_ROOT, "rules", "loop-protocol.md"),
|
|
57
58
|
join(paths.commands, "rules", "loop-protocol.md"),
|
|
58
59
|
);
|
|
59
60
|
console.log(" Rules installed");
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Synchronize the legacy commands/ compatibility tree from skills/turing/.
|
|
4
|
+
*
|
|
5
|
+
* Usage:
|
|
6
|
+
* node src/sync-commands-layout.js [--check]
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import { mkdir, readdir, readFile, rm, writeFile } from "fs/promises";
|
|
10
|
+
import { dirname, join, relative } from "path";
|
|
11
|
+
import { fileURLToPath } from "url";
|
|
12
|
+
import { getCommandNames } from "./command-registry.js";
|
|
13
|
+
|
|
14
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
+
const PLUGIN_ROOT = join(__dirname, "..");
|
|
16
|
+
const SKILLS_DIR = join(PLUGIN_ROOT, "skills", "turing");
|
|
17
|
+
const COMMANDS_DIR = join(PLUGIN_ROOT, "commands");
|
|
18
|
+
|
|
19
|
+
async function readUtf8(path) {
|
|
20
|
+
return readFile(path, "utf8");
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
async function copyTextFile(source, target) {
|
|
24
|
+
await mkdir(dirname(target), { recursive: true });
|
|
25
|
+
await writeFile(target, await readUtf8(source));
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
async function compatibilityEntries() {
|
|
29
|
+
const names = await getCommandNames();
|
|
30
|
+
return [
|
|
31
|
+
{
|
|
32
|
+
source: join(SKILLS_DIR, "SKILL.md"),
|
|
33
|
+
target: join(COMMANDS_DIR, "turing.md"),
|
|
34
|
+
},
|
|
35
|
+
...names.map((name) => ({
|
|
36
|
+
source: join(SKILLS_DIR, name, "SKILL.md"),
|
|
37
|
+
target: join(COMMANDS_DIR, `${name}.md`),
|
|
38
|
+
})),
|
|
39
|
+
{
|
|
40
|
+
source: join(SKILLS_DIR, "rules", "loop-protocol.md"),
|
|
41
|
+
target: join(COMMANDS_DIR, "rules", "loop-protocol.md"),
|
|
42
|
+
},
|
|
43
|
+
];
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
async function existingCompatibilityEntries(dir = COMMANDS_DIR) {
|
|
47
|
+
let entries;
|
|
48
|
+
try {
|
|
49
|
+
entries = await readdir(dir, { withFileTypes: true });
|
|
50
|
+
} catch (error) {
|
|
51
|
+
if (error.code === "ENOENT") {
|
|
52
|
+
return [];
|
|
53
|
+
}
|
|
54
|
+
throw error;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const paths = [];
|
|
58
|
+
for (const entry of entries) {
|
|
59
|
+
const path = join(dir, entry.name);
|
|
60
|
+
paths.push(path);
|
|
61
|
+
if (entry.isDirectory()) {
|
|
62
|
+
paths.push(...await existingCompatibilityEntries(path));
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
return paths;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
async function findDrift() {
|
|
69
|
+
const entries = await compatibilityEntries();
|
|
70
|
+
const expectedTargets = new Set(entries.map(({ target }) => target));
|
|
71
|
+
const expectedPaths = new Set([COMMANDS_DIR]);
|
|
72
|
+
for (const target of expectedTargets) {
|
|
73
|
+
let current = target;
|
|
74
|
+
while (current.startsWith(COMMANDS_DIR)) {
|
|
75
|
+
expectedPaths.add(current);
|
|
76
|
+
if (current === COMMANDS_DIR) {
|
|
77
|
+
break;
|
|
78
|
+
}
|
|
79
|
+
current = dirname(current);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
const issues = [];
|
|
83
|
+
|
|
84
|
+
for (const { source, target } of entries) {
|
|
85
|
+
let sourceText;
|
|
86
|
+
try {
|
|
87
|
+
sourceText = await readUtf8(source);
|
|
88
|
+
} catch (error) {
|
|
89
|
+
issues.push(`missing source ${relative(PLUGIN_ROOT, source)}: ${error.message}`);
|
|
90
|
+
continue;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
let targetText;
|
|
94
|
+
try {
|
|
95
|
+
targetText = await readUtf8(target);
|
|
96
|
+
} catch (error) {
|
|
97
|
+
if (error.code === "ENOENT") {
|
|
98
|
+
issues.push(`missing compatibility file ${relative(PLUGIN_ROOT, target)}`);
|
|
99
|
+
} else {
|
|
100
|
+
issues.push(`cannot read compatibility file ${relative(PLUGIN_ROOT, target)}: ${error.message}`);
|
|
101
|
+
}
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
if (targetText !== sourceText) {
|
|
106
|
+
issues.push(`diverged compatibility file ${relative(PLUGIN_ROOT, target)}`);
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
for (const path of await existingCompatibilityEntries()) {
|
|
111
|
+
if (!expectedPaths.has(path)) {
|
|
112
|
+
issues.push(`stale compatibility path ${relative(PLUGIN_ROOT, path)}`);
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
return issues;
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
export async function syncCommandsLayout({ check = false } = {}) {
|
|
120
|
+
if (check) {
|
|
121
|
+
const issues = await findDrift();
|
|
122
|
+
if (issues.length > 0) {
|
|
123
|
+
for (const issue of issues) {
|
|
124
|
+
console.error(issue);
|
|
125
|
+
}
|
|
126
|
+
process.exitCode = 1;
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
console.log("commands compatibility tree is in sync");
|
|
130
|
+
return;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
await rm(COMMANDS_DIR, { recursive: true, force: true });
|
|
134
|
+
for (const { source, target } of await compatibilityEntries()) {
|
|
135
|
+
await copyTextFile(source, target);
|
|
136
|
+
}
|
|
137
|
+
console.log("commands compatibility tree synchronized");
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
const isDirectRun =
|
|
141
|
+
process.argv[1] &&
|
|
142
|
+
fileURLToPath(import.meta.url).endsWith(process.argv[1].replace(/^.*\//, ""));
|
|
143
|
+
|
|
144
|
+
if (isDirectRun) {
|
|
145
|
+
syncCommandsLayout({ check: process.argv.includes("--check") }).catch((error) => {
|
|
146
|
+
console.error(error.message);
|
|
147
|
+
process.exitCode = 1;
|
|
148
|
+
});
|
|
149
|
+
}
|
|
@@ -1,148 +1,19 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
/**
|
|
3
|
-
*
|
|
3
|
+
* Backward-compatible wrapper for the flipped source layout.
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
* node src/sync-skills-layout.js [--check]
|
|
5
|
+
* The editable source is now skills/turing/, and sync generates commands/.
|
|
7
6
|
*/
|
|
8
7
|
|
|
9
|
-
import { mkdir, readdir, readFile, rm, writeFile } from "fs/promises";
|
|
10
|
-
import { dirname, join, relative } from "path";
|
|
11
8
|
import { fileURLToPath } from "url";
|
|
12
|
-
import {
|
|
13
|
-
|
|
14
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
-
const PLUGIN_ROOT = join(__dirname, "..");
|
|
16
|
-
const COMMANDS_DIR = join(PLUGIN_ROOT, "commands");
|
|
17
|
-
const SKILLS_DIR = join(PLUGIN_ROOT, "skills", "turing");
|
|
18
|
-
|
|
19
|
-
async function readUtf8(path) {
|
|
20
|
-
return readFile(path, "utf8");
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
async function copyTextFile(source, target) {
|
|
24
|
-
await mkdir(dirname(target), { recursive: true });
|
|
25
|
-
await writeFile(target, await readUtf8(source));
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
async function mirrorEntries() {
|
|
29
|
-
const names = await getCommandNames();
|
|
30
|
-
return [
|
|
31
|
-
{
|
|
32
|
-
source: join(COMMANDS_DIR, "turing.md"),
|
|
33
|
-
target: join(SKILLS_DIR, "SKILL.md"),
|
|
34
|
-
},
|
|
35
|
-
...names.map((name) => ({
|
|
36
|
-
source: join(COMMANDS_DIR, `${name}.md`),
|
|
37
|
-
target: join(SKILLS_DIR, name, "SKILL.md"),
|
|
38
|
-
})),
|
|
39
|
-
{
|
|
40
|
-
source: join(COMMANDS_DIR, "rules", "loop-protocol.md"),
|
|
41
|
-
target: join(SKILLS_DIR, "rules", "loop-protocol.md"),
|
|
42
|
-
},
|
|
43
|
-
];
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
async function existingMirrorEntries(dir = SKILLS_DIR) {
|
|
47
|
-
let entries;
|
|
48
|
-
try {
|
|
49
|
-
entries = await readdir(dir, { withFileTypes: true });
|
|
50
|
-
} catch (error) {
|
|
51
|
-
if (error.code === "ENOENT") {
|
|
52
|
-
return [];
|
|
53
|
-
}
|
|
54
|
-
throw error;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
const paths = [];
|
|
58
|
-
for (const entry of entries) {
|
|
59
|
-
const path = join(dir, entry.name);
|
|
60
|
-
paths.push(path);
|
|
61
|
-
if (entry.isDirectory()) {
|
|
62
|
-
paths.push(...await existingMirrorEntries(path));
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
|
-
return paths;
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
async function findDrift() {
|
|
69
|
-
const entries = await mirrorEntries();
|
|
70
|
-
const expectedTargets = new Set(entries.map(({ target }) => target));
|
|
71
|
-
const expectedPaths = new Set([SKILLS_DIR]);
|
|
72
|
-
for (const target of expectedTargets) {
|
|
73
|
-
let current = target;
|
|
74
|
-
while (current.startsWith(SKILLS_DIR)) {
|
|
75
|
-
expectedPaths.add(current);
|
|
76
|
-
if (current === SKILLS_DIR) {
|
|
77
|
-
break;
|
|
78
|
-
}
|
|
79
|
-
current = dirname(current);
|
|
80
|
-
}
|
|
81
|
-
}
|
|
82
|
-
const issues = [];
|
|
83
|
-
|
|
84
|
-
for (const { source, target } of entries) {
|
|
85
|
-
let sourceText;
|
|
86
|
-
try {
|
|
87
|
-
sourceText = await readUtf8(source);
|
|
88
|
-
} catch (error) {
|
|
89
|
-
issues.push(`missing source ${relative(PLUGIN_ROOT, source)}: ${error.message}`);
|
|
90
|
-
continue;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
let targetText;
|
|
94
|
-
try {
|
|
95
|
-
targetText = await readUtf8(target);
|
|
96
|
-
} catch (error) {
|
|
97
|
-
if (error.code === "ENOENT") {
|
|
98
|
-
issues.push(`missing mirror ${relative(PLUGIN_ROOT, target)}`);
|
|
99
|
-
} else {
|
|
100
|
-
issues.push(`cannot read mirror ${relative(PLUGIN_ROOT, target)}: ${error.message}`);
|
|
101
|
-
}
|
|
102
|
-
continue;
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
if (targetText !== sourceText) {
|
|
106
|
-
issues.push(`diverged mirror ${relative(PLUGIN_ROOT, target)}`);
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
for (const path of await existingMirrorEntries()) {
|
|
111
|
-
if (!expectedPaths.has(path)) {
|
|
112
|
-
issues.push(`stale mirror ${relative(PLUGIN_ROOT, path)}`);
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
return issues;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
export async function syncSkillsLayout({ check = false } = {}) {
|
|
120
|
-
if (check) {
|
|
121
|
-
const issues = await findDrift();
|
|
122
|
-
if (issues.length > 0) {
|
|
123
|
-
for (const issue of issues) {
|
|
124
|
-
console.error(issue);
|
|
125
|
-
}
|
|
126
|
-
process.exitCode = 1;
|
|
127
|
-
return;
|
|
128
|
-
}
|
|
129
|
-
console.log("skills/turing mirror is in sync");
|
|
130
|
-
return;
|
|
131
|
-
}
|
|
132
|
-
|
|
133
|
-
await rm(SKILLS_DIR, { recursive: true, force: true });
|
|
134
|
-
for (const { source, target } of await mirrorEntries()) {
|
|
135
|
-
await copyTextFile(source, target);
|
|
136
|
-
}
|
|
137
|
-
console.log("skills/turing mirror synchronized");
|
|
138
|
-
}
|
|
9
|
+
import { syncCommandsLayout } from "./sync-commands-layout.js";
|
|
139
10
|
|
|
140
11
|
const isDirectRun =
|
|
141
12
|
process.argv[1] &&
|
|
142
13
|
fileURLToPath(import.meta.url).endsWith(process.argv[1].replace(/^.*\//, ""));
|
|
143
14
|
|
|
144
15
|
if (isDirectRun) {
|
|
145
|
-
|
|
16
|
+
syncCommandsLayout({ check: process.argv.includes("--check") }).catch((error) => {
|
|
146
17
|
console.error(error.message);
|
|
147
18
|
process.exitCode = 1;
|
|
148
19
|
});
|
package/templates/README.md
CHANGED
|
@@ -21,23 +21,21 @@ This separation is the invariant that makes experiment comparisons valid.
|
|
|
21
21
|
|
|
22
22
|
```bash
|
|
23
23
|
# 1. Set up the environment
|
|
24
|
-
|
|
25
|
-
source .venv/bin/activate
|
|
26
|
-
pip install -r requirements.txt
|
|
24
|
+
uv sync
|
|
27
25
|
|
|
28
26
|
# 2. Add your training data to {{DATA_SOURCE}}
|
|
29
27
|
|
|
30
28
|
# 3. Create train/val/test splits
|
|
31
|
-
python prepare.py
|
|
29
|
+
uv run python prepare.py
|
|
32
30
|
|
|
33
31
|
# 4. Run training
|
|
34
|
-
python train.py > run.log 2>&1
|
|
32
|
+
uv run python train.py > run.log 2>&1
|
|
35
33
|
|
|
36
34
|
# 5. Check results
|
|
37
35
|
grep -A 10 "^---" run.log
|
|
38
36
|
|
|
39
37
|
# 6. View experiment history
|
|
40
|
-
python scripts/show_metrics.py
|
|
38
|
+
uv run python scripts/show_metrics.py
|
|
41
39
|
```
|
|
42
40
|
|
|
43
41
|
## Using the Autoresearch Agent
|
|
@@ -88,6 +86,5 @@ For hands-off mode: `/loop 5m /turing:train`
|
|
|
88
86
|
## Running Tests
|
|
89
87
|
|
|
90
88
|
```bash
|
|
91
|
-
|
|
92
|
-
python -m pytest tests/ -v
|
|
89
|
+
uv run pytest tests/ -v
|
|
93
90
|
```
|
package/templates/program.md
CHANGED
|
@@ -54,11 +54,11 @@ Update it after each experiment with:
|
|
|
54
54
|
|
|
55
55
|
For systematic hyperparameter search:
|
|
56
56
|
1. Edit `sweep_config.yaml` with parameter ranges
|
|
57
|
-
2. Generate queue: `python scripts/sweep.py`
|
|
58
|
-
3. Check status: `python scripts/sweep.py --status`
|
|
59
|
-
4. Get next: `python scripts/sweep.py --next`
|
|
57
|
+
2. Generate queue: `uv run python scripts/sweep.py`
|
|
58
|
+
3. Check status: `uv run python scripts/sweep.py --status`
|
|
59
|
+
4. Get next: `uv run python scripts/sweep.py --next`
|
|
60
60
|
5. Apply overrides, create branch, run training
|
|
61
|
-
6. Mark done: `python scripts/sweep.py --mark <name> complete|failed`
|
|
61
|
+
6. Mark done: `uv run python scripts/sweep.py --mark <name> complete|failed`
|
|
62
62
|
|
|
63
63
|
## THE LOOP
|
|
64
64
|
|
|
@@ -66,8 +66,8 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
66
66
|
|
|
67
67
|
1. **OBSERVE** — Read recent results, check hypothesis queue, research plan, and review failed diffs:
|
|
68
68
|
```bash
|
|
69
|
-
python scripts/show_metrics.py --last 5
|
|
70
|
-
python scripts/manage_hypotheses.py next 2>/dev/null || echo "No queued hypotheses"
|
|
69
|
+
uv run python scripts/show_metrics.py --last 5
|
|
70
|
+
uv run python scripts/manage_hypotheses.py next 2>/dev/null || echo "No queued hypotheses"
|
|
71
71
|
cat RESEARCH_PLAN.md 2>/dev/null || true
|
|
72
72
|
```
|
|
73
73
|
|
|
@@ -88,12 +88,12 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
88
88
|
|
|
89
89
|
**If using a queued hypothesis:**
|
|
90
90
|
```bash
|
|
91
|
-
python scripts/manage_hypotheses.py mark hyp-NNN in-progress
|
|
91
|
+
uv run python scripts/manage_hypotheses.py mark hyp-NNN in-progress
|
|
92
92
|
```
|
|
93
93
|
|
|
94
94
|
**If generating your own hypothesis**, register it with structured detail:
|
|
95
95
|
```bash
|
|
96
|
-
python scripts/manage_hypotheses.py add "your hypothesis description" \
|
|
96
|
+
uv run python scripts/manage_hypotheses.py add "your hypothesis description" \
|
|
97
97
|
--priority medium --source agent \
|
|
98
98
|
--model-type xgboost \
|
|
99
99
|
--hyperparams '{"max_depth": 8, "n_estimators": 200}' \
|
|
@@ -101,7 +101,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
101
101
|
--tags "depth,estimators" \
|
|
102
102
|
--parent exp-NNN \
|
|
103
103
|
--expected "deeper trees should capture feature interactions"
|
|
104
|
-
python scripts/manage_hypotheses.py mark hyp-NNN in-progress
|
|
104
|
+
uv run python scripts/manage_hypotheses.py mark hyp-NNN in-progress
|
|
105
105
|
```
|
|
106
106
|
|
|
107
107
|
This creates both an index entry in `hypotheses.yaml` and a detailed file at `hypotheses/hyp-NNN.yaml` with full architecture, hyperparameters, expected outcome, and lineage.
|
|
@@ -110,7 +110,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
110
110
|
|
|
111
111
|
To read a hypothesis's full detail:
|
|
112
112
|
```bash
|
|
113
|
-
python scripts/manage_hypotheses.py show hyp-NNN
|
|
113
|
+
uv run python scripts/manage_hypotheses.py show hyp-NNN
|
|
114
114
|
```
|
|
115
115
|
|
|
116
116
|
3. **PREPARE** — Modify `config.yaml` for hyperparameter changes. Only modify `train.py` for structural code changes.
|
|
@@ -122,7 +122,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
122
122
|
|
|
123
123
|
5. **EXECUTE** training:
|
|
124
124
|
```bash
|
|
125
|
-
|
|
125
|
+
uv run python train.py > run.log 2>&1
|
|
126
126
|
```
|
|
127
127
|
|
|
128
128
|
6. **MEASURE** — Parse metrics from run.log:
|
|
@@ -144,7 +144,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
144
144
|
|
|
145
145
|
8. **RECORD** — Log the experiment (kept or discarded):
|
|
146
146
|
```bash
|
|
147
|
-
python scripts/log_experiment.py experiments/log.jsonl exp-NNN kept|discarded \
|
|
147
|
+
uv run python scripts/log_experiment.py experiments/log.jsonl exp-NNN kept|discarded \
|
|
148
148
|
'{"{{TARGET_METRIC}}": X.XX, ...}' \
|
|
149
149
|
'{"model_type": "xgboost", "hyperparams": {...}}' \
|
|
150
150
|
models/model.joblib "Description of hypothesis and outcome"
|
|
@@ -152,7 +152,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
152
152
|
|
|
153
153
|
Update the hypothesis status with result metrics:
|
|
154
154
|
```bash
|
|
155
|
-
python scripts/manage_hypotheses.py mark hyp-NNN tested \
|
|
155
|
+
uv run python scripts/manage_hypotheses.py mark hyp-NNN tested \
|
|
156
156
|
--result exp-NNN \
|
|
157
157
|
--metrics '{"{{TARGET_METRIC}}": X.XX, ...}' \
|
|
158
158
|
--notes "Brief explanation of what happened and why"
|
|
@@ -162,7 +162,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
162
162
|
|
|
163
163
|
Then synthesize a decision packet and auto-queue follow-ups:
|
|
164
164
|
```bash
|
|
165
|
-
python scripts/synthesize_decision.py --experiment exp-NNN --auto-queue
|
|
165
|
+
uv run python scripts/synthesize_decision.py --experiment exp-NNN --auto-queue
|
|
166
166
|
```
|
|
167
167
|
This produces a verdict (promote/branch_followup/abandon/fix_and_retry) and automatically queues follow-up hypotheses for `branch_followup` and `fix_and_retry` outcomes.
|
|
168
168
|
|
|
@@ -172,7 +172,7 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
172
172
|
- Report final best model and recommend next steps
|
|
173
173
|
- **Before declaring final results**, run a seed study to verify robustness:
|
|
174
174
|
```bash
|
|
175
|
-
python scripts/seed_runner.py --quick
|
|
175
|
+
uv run python scripts/seed_runner.py --quick
|
|
176
176
|
```
|
|
177
177
|
If CV > 5%, the result is seed-sensitive — report mean ± std, not a single-seed number.
|
|
178
178
|
|
|
@@ -180,9 +180,9 @@ The autoresearch experiment loop. Each iteration is one experiment — one hypot
|
|
|
180
180
|
|
|
181
181
|
## Execution Rules
|
|
182
182
|
|
|
183
|
-
- **ALWAYS redirect output:** `python train.py > run.log 2>&1`
|
|
183
|
+
- **ALWAYS redirect output:** `uv run python train.py > run.log 2>&1`
|
|
184
184
|
- **ALWAYS parse with grep:** `grep -A 10 "^---" run.log | head -10`
|
|
185
|
-
- **ALWAYS
|
|
185
|
+
- **ALWAYS run Python through uv:** `uv run python ...`
|
|
186
186
|
- **NEVER install packages** without human approval
|
|
187
187
|
|
|
188
188
|
## Strategy Escalation Protocol
|
|
@@ -219,5 +219,5 @@ Starting suggestions (ordered by expected impact):
|
|
|
219
219
|
## Comparing Runs
|
|
220
220
|
|
|
221
221
|
```bash
|
|
222
|
-
python scripts/compare_runs.py exp-001 exp-002
|
|
222
|
+
uv run python scripts/compare_runs.py exp-001 exp-002
|
|
223
223
|
```
|
package/templates/pyproject.toml
CHANGED
|
@@ -2,6 +2,16 @@
|
|
|
2
2
|
name = "{{PROJECT_NAME}}-ml"
|
|
3
3
|
version = "0.1.0"
|
|
4
4
|
requires-python = ">=3.12"
|
|
5
|
+
dependencies = [
|
|
6
|
+
"scikit-learn>=1.6",
|
|
7
|
+
"xgboost>=3.2",
|
|
8
|
+
"lightgbm>=4.6",
|
|
9
|
+
"pandas>=2.2",
|
|
10
|
+
"numpy>=2.0",
|
|
11
|
+
"joblib>=1.4",
|
|
12
|
+
"pyyaml>=6.0",
|
|
13
|
+
"pytest>=8.0",
|
|
14
|
+
]
|
|
5
15
|
|
|
6
16
|
[tool.pytest.ini_options]
|
|
7
17
|
testpaths = ["tests"]
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
# Compatibility export only. pyproject.toml is canonical for dependencies.
|
|
2
|
+
# Prefer: uv sync
|
|
3
|
+
|
|
1
4
|
scikit-learn>=1.6
|
|
2
5
|
xgboost>=3.2
|
|
3
6
|
lightgbm>=4.6
|
|
@@ -8,5 +11,5 @@ pyyaml>=6.0
|
|
|
8
11
|
pytest>=8.0
|
|
9
12
|
|
|
10
13
|
# Optional: tree-search-guided hypothesis exploration
|
|
11
|
-
# Install with:
|
|
14
|
+
# Install with: uv add "treequest[all]"
|
|
12
15
|
# treequest>=0.1
|
|
@@ -210,7 +210,7 @@ def format_onboarding_report(config, experiments, families, best, decisions,
|
|
|
210
210
|
"5. `/turing:try \"your hypothesis\"` — inject ideas",
|
|
211
211
|
"6. `/turing:train` — run next experiment",
|
|
212
212
|
], "engineer": [
|
|
213
|
-
"1. `
|
|
213
|
+
"1. `uv sync`",
|
|
214
214
|
"2. Review `config.yaml` for data paths",
|
|
215
215
|
"3. `/turing:status` — where things stand",
|
|
216
216
|
"4. Check `train.py` for current model",
|