claude-turing 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +34 -0
- package/LICENSE +21 -0
- package/README.md +457 -0
- package/agents/ml-evaluator.md +43 -0
- package/agents/ml-researcher.md +74 -0
- package/bin/cli.js +46 -0
- package/bin/turing-init.sh +57 -0
- package/commands/brief.md +83 -0
- package/commands/compare.md +24 -0
- package/commands/design.md +97 -0
- package/commands/init.md +123 -0
- package/commands/logbook.md +51 -0
- package/commands/mode.md +43 -0
- package/commands/poster.md +89 -0
- package/commands/preflight.md +75 -0
- package/commands/report.md +97 -0
- package/commands/rules/loop-protocol.md +91 -0
- package/commands/status.md +24 -0
- package/commands/suggest.md +95 -0
- package/commands/sweep.md +45 -0
- package/commands/train.md +66 -0
- package/commands/try.md +63 -0
- package/commands/turing.md +54 -0
- package/commands/validate.md +34 -0
- package/config/defaults.yaml +45 -0
- package/config/experiment_archetypes.yaml +127 -0
- package/config/lifecycle.toml +31 -0
- package/config/novelty_aliases.yaml +107 -0
- package/config/relationships.toml +125 -0
- package/config/state.toml +24 -0
- package/config/task_taxonomy.yaml +110 -0
- package/config/taxonomy.toml +37 -0
- package/package.json +54 -0
- package/src/claude-md.js +55 -0
- package/src/install.js +107 -0
- package/src/paths.js +20 -0
- package/src/postinstall.js +22 -0
- package/src/verify.js +109 -0
- package/templates/MEMORY.md +36 -0
- package/templates/README.md +93 -0
- package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
- package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
- package/templates/config.yaml +48 -0
- package/templates/evaluate.py +237 -0
- package/templates/features/__init__.py +0 -0
- package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
- package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
- package/templates/features/featurizers.py +138 -0
- package/templates/prepare.py +171 -0
- package/templates/program.md +216 -0
- package/templates/pyproject.toml +8 -0
- package/templates/requirements.txt +8 -0
- package/templates/scripts/__init__.py +0 -0
- package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
- package/templates/scripts/check_convergence.py +230 -0
- package/templates/scripts/compare_runs.py +124 -0
- package/templates/scripts/critique_hypothesis.py +350 -0
- package/templates/scripts/experiment_index.py +288 -0
- package/templates/scripts/generate_brief.py +389 -0
- package/templates/scripts/generate_logbook.py +423 -0
- package/templates/scripts/log_experiment.py +243 -0
- package/templates/scripts/manage_hypotheses.py +543 -0
- package/templates/scripts/novelty_guard.py +343 -0
- package/templates/scripts/parse_metrics.py +139 -0
- package/templates/scripts/post-train-hook.sh +74 -0
- package/templates/scripts/preflight.py +549 -0
- package/templates/scripts/scaffold.py +409 -0
- package/templates/scripts/show_environment.py +92 -0
- package/templates/scripts/show_experiment_tree.py +144 -0
- package/templates/scripts/show_families.py +133 -0
- package/templates/scripts/show_metrics.py +157 -0
- package/templates/scripts/statistical_compare.py +259 -0
- package/templates/scripts/stop-hook.sh +34 -0
- package/templates/scripts/suggest_next.py +301 -0
- package/templates/scripts/sweep.py +276 -0
- package/templates/scripts/synthesize_decision.py +300 -0
- package/templates/scripts/turing_io.py +76 -0
- package/templates/scripts/update_state.py +296 -0
- package/templates/scripts/validate_stability.py +167 -0
- package/templates/scripts/verify_placeholders.py +119 -0
- package/templates/sweep_config.yaml +14 -0
- package/templates/tests/__init__.py +0 -0
- package/templates/tests/conftest.py +91 -0
- package/templates/train.py +240 -0
package/src/claude-md.js
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { readFile, writeFile } from "fs/promises";
|
|
2
|
+
|
|
3
|
+
const BEGIN = "<!-- BEGIN turing -->";
|
|
4
|
+
const END = "<!-- END turing -->";
|
|
5
|
+
|
|
6
|
+
const SECTION = `${BEGIN}
|
|
7
|
+
## turing
|
|
8
|
+
|
|
9
|
+
Autonomous ML research harness. The autoresearch loop as a formal protocol.
|
|
10
|
+
|
|
11
|
+
### Commands
|
|
12
|
+
|
|
13
|
+
| Command | Purpose |
|
|
14
|
+
|---------|---------|
|
|
15
|
+
| \`/turing\` | Router — detects ML intent and routes to sub-commands |
|
|
16
|
+
| \`/turing:init\` | Scaffold a new ML project with autoresearch harness |
|
|
17
|
+
| \`/turing:train [N]\` | Run autonomous experiment loop (optional max iterations) |
|
|
18
|
+
| \`/turing:status\` | Show experiment status, best model, convergence state |
|
|
19
|
+
| \`/turing:compare <a> <b>\` | Side-by-side experiment comparison |
|
|
20
|
+
| \`/turing:sweep\` | Generate and run hyperparameter sweep |
|
|
21
|
+
| \`/turing:validate\` | Check metric stability, auto-fix if noisy |
|
|
22
|
+
| \`/turing:try <hypothesis>\` | Inject a hypothesis into the experiment queue |
|
|
23
|
+
| \`/turing:brief\` | Generate research intelligence report |
|
|
24
|
+
| \`/turing:preflight\` | Pre-flight resource check (VRAM/RAM/disk) |
|
|
25
|
+
|
|
26
|
+
### Agents
|
|
27
|
+
|
|
28
|
+
| Agent | Purpose |
|
|
29
|
+
|-------|---------|
|
|
30
|
+
| \`@ml-researcher\` | Autonomous training agent (Read/Write/Edit/Bash) |
|
|
31
|
+
| \`@ml-evaluator\` | Read-only analysis agent (Read/Bash only) |
|
|
32
|
+
${END}`;
|
|
33
|
+
|
|
34
|
+
export async function updateClaudeMd(claudeMdPath) {
|
|
35
|
+
let content = "";
|
|
36
|
+
try {
|
|
37
|
+
content = await readFile(claudeMdPath, "utf-8");
|
|
38
|
+
} catch {
|
|
39
|
+
// File doesn't exist yet
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const regex = new RegExp(
|
|
43
|
+
`${BEGIN.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}[\\s\\S]*?${END.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`,
|
|
44
|
+
);
|
|
45
|
+
|
|
46
|
+
if (regex.test(content)) {
|
|
47
|
+
content = content.replace(regex, SECTION);
|
|
48
|
+
} else {
|
|
49
|
+
content = content
|
|
50
|
+
? content.trimEnd() + "\n\n" + SECTION + "\n"
|
|
51
|
+
: SECTION + "\n";
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
await writeFile(claudeMdPath, content, "utf-8");
|
|
55
|
+
}
|
package/src/install.js
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Turing installer.
|
|
4
|
+
*
|
|
5
|
+
* Deploys commands, agents, and config to the Claude Code plugin directory.
|
|
6
|
+
* Optionally inserts a managed section into the project's CLAUDE.md.
|
|
7
|
+
*
|
|
8
|
+
* Usage:
|
|
9
|
+
* node src/install.js [--global] [--project]
|
|
10
|
+
*/
|
|
11
|
+
|
|
12
|
+
import { readdir, copyFile, mkdir } from "fs/promises";
|
|
13
|
+
import { join, dirname } from "path";
|
|
14
|
+
import { fileURLToPath } from "url";
|
|
15
|
+
import { getTargetPaths } from "./paths.js";
|
|
16
|
+
import { updateClaudeMd } from "./claude-md.js";
|
|
17
|
+
|
|
18
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
19
|
+
const PLUGIN_ROOT = join(__dirname, "..");
|
|
20
|
+
|
|
21
|
+
// Single source of truth for sub-commands (DRY — used for dirs and file copy)
|
|
22
|
+
const SUB_COMMANDS = [
|
|
23
|
+
"init", "train", "status", "compare", "sweep", "validate",
|
|
24
|
+
"try", "brief", "suggest", "design", "logbook", "poster",
|
|
25
|
+
"report", "mode", "preflight",
|
|
26
|
+
];
|
|
27
|
+
|
|
28
|
+
export async function install(opts = {}) {
|
|
29
|
+
const scope = opts.global ? "global" : opts.project ? "project" : "global";
|
|
30
|
+
const paths = getTargetPaths(scope);
|
|
31
|
+
|
|
32
|
+
console.log("Turing ML Research Harness — Installer");
|
|
33
|
+
console.log(`Target: ${paths.commands} (${scope})`);
|
|
34
|
+
console.log("");
|
|
35
|
+
|
|
36
|
+
// Create directories for each sub-command + agents + config
|
|
37
|
+
for (const subDir of ["", "agents", "config", "rules", ...SUB_COMMANDS]) {
|
|
38
|
+
await mkdir(join(paths.commands, subDir), { recursive: true });
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Copy root command (router) as SKILL.md
|
|
42
|
+
await copyFile(
|
|
43
|
+
join(PLUGIN_ROOT, "commands", "turing.md"),
|
|
44
|
+
join(paths.commands, "SKILL.md"),
|
|
45
|
+
);
|
|
46
|
+
console.log(" Router -> SKILL.md");
|
|
47
|
+
|
|
48
|
+
// Copy sub-commands as <name>/SKILL.md
|
|
49
|
+
for (const cmd of SUB_COMMANDS) {
|
|
50
|
+
await copyFile(
|
|
51
|
+
join(PLUGIN_ROOT, "commands", `${cmd}.md`),
|
|
52
|
+
join(paths.commands, cmd, "SKILL.md"),
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
console.log(` ${SUB_COMMANDS.length} commands installed`);
|
|
56
|
+
|
|
57
|
+
// Copy rules
|
|
58
|
+
await copyFile(
|
|
59
|
+
join(PLUGIN_ROOT, "commands", "rules", "loop-protocol.md"),
|
|
60
|
+
join(paths.commands, "rules", "loop-protocol.md"),
|
|
61
|
+
);
|
|
62
|
+
console.log(" Rules installed");
|
|
63
|
+
|
|
64
|
+
// Copy agents
|
|
65
|
+
const agentFiles = await readdir(join(PLUGIN_ROOT, "agents"));
|
|
66
|
+
for (const file of agentFiles) {
|
|
67
|
+
await copyFile(
|
|
68
|
+
join(PLUGIN_ROOT, "agents", file),
|
|
69
|
+
join(paths.agents, file),
|
|
70
|
+
);
|
|
71
|
+
}
|
|
72
|
+
console.log(` ${agentFiles.length} agents installed`);
|
|
73
|
+
|
|
74
|
+
// Copy config (static schema files only)
|
|
75
|
+
const CONFIG_FILES = [
|
|
76
|
+
"defaults.yaml", "lifecycle.toml", "taxonomy.toml",
|
|
77
|
+
"experiment_archetypes.yaml", "novelty_aliases.yaml",
|
|
78
|
+
"relationships.toml", "state.toml", "task_taxonomy.yaml",
|
|
79
|
+
];
|
|
80
|
+
for (const file of CONFIG_FILES) {
|
|
81
|
+
await copyFile(
|
|
82
|
+
join(PLUGIN_ROOT, "config", file),
|
|
83
|
+
join(paths.config, file),
|
|
84
|
+
);
|
|
85
|
+
}
|
|
86
|
+
console.log(` ${CONFIG_FILES.length} config files installed`);
|
|
87
|
+
|
|
88
|
+
// Update CLAUDE.md
|
|
89
|
+
await updateClaudeMd(paths.claudeMd);
|
|
90
|
+
console.log(" CLAUDE.md updated");
|
|
91
|
+
|
|
92
|
+
console.log("");
|
|
93
|
+
console.log(
|
|
94
|
+
`Installation complete. Run /turing:init to scaffold an ML project.`,
|
|
95
|
+
);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Direct execution
|
|
99
|
+
const isDirectRun =
|
|
100
|
+
process.argv[1] &&
|
|
101
|
+
fileURLToPath(import.meta.url).endsWith(process.argv[1].replace(/^.*\//, ""));
|
|
102
|
+
if (isDirectRun) {
|
|
103
|
+
install({
|
|
104
|
+
global: process.argv.includes("--global"),
|
|
105
|
+
project: process.argv.includes("--project"),
|
|
106
|
+
});
|
|
107
|
+
}
|
package/src/paths.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { homedir } from "os";
|
|
2
|
+
import { join } from "path";
|
|
3
|
+
|
|
4
|
+
export function getTargetPaths(scope) {
|
|
5
|
+
const base =
|
|
6
|
+
scope === "global"
|
|
7
|
+
? join(homedir(), ".claude")
|
|
8
|
+
: join(process.cwd(), ".claude");
|
|
9
|
+
|
|
10
|
+
return {
|
|
11
|
+
commands: join(base, "commands", "turing"),
|
|
12
|
+
agents: join(base, "commands", "turing", "agents"),
|
|
13
|
+
config: join(base, "commands", "turing", "config"),
|
|
14
|
+
claudeMd:
|
|
15
|
+
scope === "global"
|
|
16
|
+
? join(homedir(), ".claude", "CLAUDE.md")
|
|
17
|
+
: join(process.cwd(), "CLAUDE.md"),
|
|
18
|
+
scope,
|
|
19
|
+
};
|
|
20
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* npm postinstall hook.
|
|
4
|
+
*
|
|
5
|
+
* Prints setup instructions after `npm install claude-turing`.
|
|
6
|
+
* Does NOT auto-install — the user must explicitly run the installer.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
console.log("");
|
|
10
|
+
console.log("╔══════════════════════════════════════════════════════╗");
|
|
11
|
+
console.log("║ Turing ML Research Harness ║");
|
|
12
|
+
console.log("║ ║");
|
|
13
|
+
console.log("║ To complete setup, run: ║");
|
|
14
|
+
console.log("║ npx claude-turing install --global ║");
|
|
15
|
+
console.log("║ ║");
|
|
16
|
+
console.log("║ Or within a project: ║");
|
|
17
|
+
console.log("║ npx claude-turing install ║");
|
|
18
|
+
console.log("║ ║");
|
|
19
|
+
console.log("║ Then in Claude Code: ║");
|
|
20
|
+
console.log("║ /turing:init ║");
|
|
21
|
+
console.log("╚══════════════════════════════════════════════════════╝");
|
|
22
|
+
console.log("");
|
package/src/verify.js
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
/**
|
|
3
|
+
* Turing installation verifier.
|
|
4
|
+
*
|
|
5
|
+
* Checks that all expected files are in place and reports status.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* node src/verify.js [--scope global|project]
|
|
9
|
+
*/
|
|
10
|
+
|
|
11
|
+
import { access } from "fs/promises";
|
|
12
|
+
import { join } from "path";
|
|
13
|
+
import { getTargetPaths } from "./paths.js";
|
|
14
|
+
|
|
15
|
+
const EXPECTED_COMMANDS = [
|
|
16
|
+
"SKILL.md",
|
|
17
|
+
"init/SKILL.md",
|
|
18
|
+
"train/SKILL.md",
|
|
19
|
+
"status/SKILL.md",
|
|
20
|
+
"compare/SKILL.md",
|
|
21
|
+
"sweep/SKILL.md",
|
|
22
|
+
"validate/SKILL.md",
|
|
23
|
+
"try/SKILL.md",
|
|
24
|
+
"brief/SKILL.md",
|
|
25
|
+
"suggest/SKILL.md",
|
|
26
|
+
"design/SKILL.md",
|
|
27
|
+
"logbook/SKILL.md",
|
|
28
|
+
"poster/SKILL.md",
|
|
29
|
+
"report/SKILL.md",
|
|
30
|
+
"mode/SKILL.md",
|
|
31
|
+
"preflight/SKILL.md",
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];
|
|
35
|
+
|
|
36
|
+
const EXPECTED_CONFIG = [
|
|
37
|
+
"defaults.yaml", "lifecycle.toml", "taxonomy.toml",
|
|
38
|
+
"experiment_archetypes.yaml", "novelty_aliases.yaml",
|
|
39
|
+
"relationships.toml", "state.toml", "task_taxonomy.yaml",
|
|
40
|
+
];
|
|
41
|
+
|
|
42
|
+
async function fileExists(path) {
|
|
43
|
+
try {
|
|
44
|
+
await access(path);
|
|
45
|
+
return true;
|
|
46
|
+
} catch {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export async function verify(opts = {}) {
|
|
52
|
+
const scopes = opts.scope ? [opts.scope] : ["global", "project"];
|
|
53
|
+
let found = false;
|
|
54
|
+
|
|
55
|
+
for (const scope of scopes) {
|
|
56
|
+
const paths = getTargetPaths(scope);
|
|
57
|
+
const exists = await fileExists(join(paths.commands, "SKILL.md"));
|
|
58
|
+
if (!exists) continue;
|
|
59
|
+
found = true;
|
|
60
|
+
|
|
61
|
+
console.log(`\n✓ turing found (${scope}): ${paths.commands}\n`);
|
|
62
|
+
|
|
63
|
+
let missing = 0;
|
|
64
|
+
|
|
65
|
+
console.log("Commands:");
|
|
66
|
+
for (const cmd of EXPECTED_COMMANDS) {
|
|
67
|
+
const ok = await fileExists(join(paths.commands, cmd));
|
|
68
|
+
console.log(` ${ok ? "✓" : "✗"} commands/${cmd}`);
|
|
69
|
+
if (!ok) missing++;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
console.log("\nAgents:");
|
|
73
|
+
for (const agent of EXPECTED_AGENTS) {
|
|
74
|
+
const ok = await fileExists(join(paths.agents, agent));
|
|
75
|
+
console.log(` ${ok ? "✓" : "✗"} agents/${agent}`);
|
|
76
|
+
if (!ok) missing++;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
console.log("\nConfig:");
|
|
80
|
+
for (const cfg of EXPECTED_CONFIG) {
|
|
81
|
+
const ok = await fileExists(join(paths.config, cfg));
|
|
82
|
+
console.log(` ${ok ? "✓" : "✗"} config/${cfg}`);
|
|
83
|
+
if (!ok) missing++;
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
// Check CLAUDE.md
|
|
87
|
+
const claudeOk = await fileExists(paths.claudeMd);
|
|
88
|
+
console.log(`\n ${claudeOk ? "✓" : "✗"} CLAUDE.md`);
|
|
89
|
+
|
|
90
|
+
console.log(
|
|
91
|
+
`\n ${missing === 0 ? "✓ Installation complete" : `✗ ${missing} files missing — run claude-turing install`}\n`,
|
|
92
|
+
);
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
if (!found) {
|
|
96
|
+
console.log("\n✗ turing not found. Run: claude-turing install\n");
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Direct execution
|
|
101
|
+
const isDirectRun =
|
|
102
|
+
process.argv[1] &&
|
|
103
|
+
import.meta.url.endsWith(process.argv[1].replace(/^.*\//, ""));
|
|
104
|
+
if (isDirectRun) {
|
|
105
|
+
const scopeIdx = process.argv.indexOf("--scope");
|
|
106
|
+
verify({
|
|
107
|
+
scope: scopeIdx !== -1 ? process.argv[scopeIdx + 1] : undefined,
|
|
108
|
+
});
|
|
109
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# ML Researcher Memory
|
|
2
|
+
|
|
3
|
+
## Goal
|
|
4
|
+
|
|
5
|
+
{{TASK_DESCRIPTION}}
|
|
6
|
+
|
|
7
|
+
Primary metric: {{TARGET_METRIC}} ({{METRIC_DIRECTION}} is better).
|
|
8
|
+
|
|
9
|
+
## Best Result
|
|
10
|
+
|
|
11
|
+
No experiments completed yet. Run `/turing:train` to begin.
|
|
12
|
+
|
|
13
|
+
## Observations
|
|
14
|
+
|
|
15
|
+
- Initial model: XGBoost with default hyperparams (n_estimators=100, max_depth=4, lr=0.1)
|
|
16
|
+
- Config file: `config.yaml` controls all hyperparameters — do not hardcode in train.py
|
|
17
|
+
- Sweep tool: `python scripts/sweep.py` for systematic grid search
|
|
18
|
+
- Per-experiment branches: `exp/NNN-description` preserves all code variants
|
|
19
|
+
- Evaluation is immutable: `prepare.py` and `evaluate.py` are READ-ONLY
|
|
20
|
+
|
|
21
|
+
## Failed Approaches
|
|
22
|
+
|
|
23
|
+
(none yet)
|
|
24
|
+
|
|
25
|
+
## Promising Directions
|
|
26
|
+
|
|
27
|
+
- Hyperparameter sweep across n_estimators, max_depth, learning_rate
|
|
28
|
+
- LightGBM as alternative to XGBoost
|
|
29
|
+
- Feature engineering: add domain-specific features
|
|
30
|
+
- Try different model architectures (RandomForest, MLP)
|
|
31
|
+
|
|
32
|
+
## Session History
|
|
33
|
+
|
|
34
|
+
| Session | Experiments | Best Metric | Notes |
|
|
35
|
+
|---------|-------------|-------------|-------|
|
|
36
|
+
| (none) | 0 | N/A | Pipeline initialized, no experiments run |
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# {{PROJECT_NAME}} ML Pipeline
|
|
2
|
+
|
|
3
|
+
{{TASK_DESCRIPTION}}
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
This pipeline uses the [Turing](https://github.com/pragnition/turing) autoresearch pattern — an AI agent iteratively trains, evaluates, and improves models by modifying `train.py` while the evaluation infrastructure (`prepare.py`, `evaluate.py`) remains immutable.
|
|
8
|
+
|
|
9
|
+
**Primary metric:** {{TARGET_METRIC}} ({{METRIC_DIRECTION}} is better)
|
|
10
|
+
|
|
11
|
+
## The Separation
|
|
12
|
+
|
|
13
|
+
| Layer | Files | Agent Access | Purpose |
|
|
14
|
+
|-------|-------|-------------|---------|
|
|
15
|
+
| Measurement | `prepare.py`, `evaluate.py` | READ-ONLY | Ensures all experiments are measured by the same yardstick |
|
|
16
|
+
| Hypothesis | `train.py`, `config.yaml` | READ-WRITE | All experimental changes go here |
|
|
17
|
+
|
|
18
|
+
This separation is the invariant that makes experiment comparisons valid.
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
# 1. Set up the environment
|
|
24
|
+
python -m venv .venv
|
|
25
|
+
source .venv/bin/activate
|
|
26
|
+
pip install -r requirements.txt
|
|
27
|
+
|
|
28
|
+
# 2. Add your training data to {{DATA_SOURCE}}
|
|
29
|
+
|
|
30
|
+
# 3. Create train/val/test splits
|
|
31
|
+
python prepare.py
|
|
32
|
+
|
|
33
|
+
# 4. Run training
|
|
34
|
+
python train.py > run.log 2>&1
|
|
35
|
+
|
|
36
|
+
# 5. Check results
|
|
37
|
+
grep -A 10 "^---" run.log
|
|
38
|
+
|
|
39
|
+
# 6. View experiment history
|
|
40
|
+
python scripts/show_metrics.py
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Using the Autoresearch Agent
|
|
44
|
+
|
|
45
|
+
The agent follows `program.md`. It:
|
|
46
|
+
|
|
47
|
+
1. Reads recent experiment results
|
|
48
|
+
2. Proposes a hypothesis
|
|
49
|
+
3. Modifies `train.py` or `config.yaml`
|
|
50
|
+
4. Runs training and evaluates
|
|
51
|
+
5. Keeps improvements, discards regressions
|
|
52
|
+
6. Repeats until convergence
|
|
53
|
+
|
|
54
|
+
To start: `/turing:train` in Claude Code.
|
|
55
|
+
For hands-off mode: `/loop 5m /turing:train`
|
|
56
|
+
|
|
57
|
+
## Directory Structure
|
|
58
|
+
|
|
59
|
+
```
|
|
60
|
+
{{ML_DIR}}/
|
|
61
|
+
prepare.py READ-ONLY: Data loading, splitting
|
|
62
|
+
evaluate.py READ-ONLY: Evaluation harness
|
|
63
|
+
train.py AGENT-EDITABLE: Training code
|
|
64
|
+
config.yaml Hyperparameters and settings
|
|
65
|
+
sweep_config.yaml Sweep parameter ranges
|
|
66
|
+
program.md Agent protocol instructions
|
|
67
|
+
features/
|
|
68
|
+
featurizers.py Feature engineering pipeline
|
|
69
|
+
scripts/
|
|
70
|
+
log_experiment.py Experiment JSONL logging
|
|
71
|
+
show_metrics.py Display experiment metrics
|
|
72
|
+
compare_runs.py Side-by-side comparison
|
|
73
|
+
sweep.py Hyperparameter sweep tool
|
|
74
|
+
post-train-hook.sh Auto-log after training
|
|
75
|
+
stop-hook.sh Convergence detection hook
|
|
76
|
+
experiments/
|
|
77
|
+
log.jsonl Structured experiment log
|
|
78
|
+
results.tsv Quick-reference summary
|
|
79
|
+
models/
|
|
80
|
+
best/ Current best model
|
|
81
|
+
archive/ Previous best models
|
|
82
|
+
data/
|
|
83
|
+
splits/ Train/val/test splits
|
|
84
|
+
tests/
|
|
85
|
+
conftest.py Shared test fixtures
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Running Tests
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
source .venv/bin/activate
|
|
92
|
+
python -m pytest tests/ -v
|
|
93
|
+
```
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# {{PROJECT_NAME}} ML Pipeline Configuration
|
|
2
|
+
# Edit this file to change hyperparameters. The autoresearch agent
|
|
3
|
+
# modifies this file for hyperparameter experiments.
|
|
4
|
+
|
|
5
|
+
data:
|
|
6
|
+
source: "{{DATA_SOURCE}}"
|
|
7
|
+
splits_dir: "data/splits"
|
|
8
|
+
target_column: "label" # Column name for the prediction target
|
|
9
|
+
split_ratios:
|
|
10
|
+
train: 0.70
|
|
11
|
+
val: 0.15
|
|
12
|
+
test: 0.15
|
|
13
|
+
random_state: 42
|
|
14
|
+
|
|
15
|
+
evaluation:
|
|
16
|
+
primary_metric: "{{TARGET_METRIC}}"
|
|
17
|
+
metrics: ["{{TARGET_METRIC}}", "f1_weighted", "accuracy"]
|
|
18
|
+
# Set to true for metrics where lower is better (mae, mse, rmse, loss)
|
|
19
|
+
# Set to false for metrics where higher is better (accuracy, f1, auc)
|
|
20
|
+
lower_is_better: false # {{METRIC_DIRECTION}} -- change to true if lower is better
|
|
21
|
+
|
|
22
|
+
convergence:
|
|
23
|
+
patience: 3 # Consecutive non-improvements before stopping
|
|
24
|
+
improvement_threshold: 0.005 # 0.5% relative improvement required
|
|
25
|
+
|
|
26
|
+
model:
|
|
27
|
+
type: "xgboost"
|
|
28
|
+
hyperparams:
|
|
29
|
+
n_estimators: 100
|
|
30
|
+
max_depth: 4
|
|
31
|
+
learning_rate: 0.1
|
|
32
|
+
objective: "multi:softmax"
|
|
33
|
+
num_class: 2 # Adjust for your number of classes
|
|
34
|
+
eval_metric: "mlogloss"
|
|
35
|
+
verbosity: 0
|
|
36
|
+
|
|
37
|
+
output:
|
|
38
|
+
models_dir: "models"
|
|
39
|
+
best_model_dir: "models/best"
|
|
40
|
+
archive_dir: "models/archive"
|
|
41
|
+
experiment_log: "experiments/log.jsonl"
|
|
42
|
+
results_tsv: "experiments/results.tsv"
|
|
43
|
+
|
|
44
|
+
# Behavioral probe constraints (enforced by hidden evaluation harness)
|
|
45
|
+
# These prevent the agent from gaming the metric by skipping real work.
|
|
46
|
+
constraints:
|
|
47
|
+
min_train_time: 5 # Seconds — training under this is suspicious
|
|
48
|
+
min_model_size_bytes: 100 # Bytes — a real model has non-trivial size
|