claude-turing 4.4.0 → 4.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +4 -4
- package/LICENSE +1 -1
- package/README.md +78 -555
- package/bin/cli.js +23 -4
- package/commands/doctor.md +1 -0
- package/commands/init.md +21 -3
- package/commands/turing.md +1 -1
- package/config/defaults.yaml +2 -0
- package/package.json +5 -5
- package/src/install.js +17 -2
- package/src/verify.js +42 -2
- package/templates/README.md +1 -1
- package/templates/__pycache__/evaluate.cpython-312.pyc +0 -0
- package/templates/__pycache__/prepare.cpython-312.pyc +0 -0
- package/templates/config.yaml +1 -1
- package/templates/features/__pycache__/__init__.cpython-312.pyc +0 -0
- package/templates/features/__pycache__/featurizers.cpython-312.pyc +0 -0
- package/templates/program.md +1 -1
- package/templates/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/ablation_study.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/architecture_surgery.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/budget_manager.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/build_ensemble.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/calibration.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/check_convergence.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/checkpoint_manager.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/citation_manager.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/cost_frontier.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/counterfactual_explanation.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/critique_hypothesis.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/curriculum_optimizer.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/diagnose_errors.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/draft_paper_sections.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/equivalence_checker.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_annotations.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_archive.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_diff.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_index.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_queue.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_replay.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_search.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_simulator.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/experiment_templates.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/export_card.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/export_formats.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/failure_postmortem.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/feature_intelligence.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/fork_experiment.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_baselines.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_brief.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_changelog.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_figures.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_logbook.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_model_card.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/generate_onboarding.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/harness_doctor.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/harness_doctor.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/incremental_update.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/knowledge_transfer.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/latency_benchmark.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/leakage_detector.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/literature_search.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/log_experiment.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/manage_hypotheses.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/methodology_audit.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_distiller.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_lifecycle.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_merger.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_pruning.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_quantization.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/model_xray.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/novelty_guard.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/package_experiments.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/pareto_frontier.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/parse_metrics.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/pipeline_manager.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/profile_training.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/regression_gate.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/reproduce_experiment.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/research_planner.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/sanity_checks.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
- package/templates/scripts/__pycache__/scaling_estimator.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/seed_runner.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/sensitivity_analysis.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/session_flashback.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/show_experiment_tree.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/show_families.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/simulate_review.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/smart_retry.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/statistical_compare.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/suggest_next.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/sweep.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/synthesize_decision.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/training_monitor.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/treequest_suggest.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/trend_analysis.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/turing_io.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/update_state.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/verify_placeholders.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/warm_start.cpython-312.pyc +0 -0
- package/templates/scripts/__pycache__/whatif_engine.cpython-312.pyc +0 -0
- package/templates/scripts/harness_doctor.py +145 -1
- package/templates/scripts/scaffold.py +50 -28
package/bin/cli.js
CHANGED
|
@@ -1,9 +1,25 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { createRequire } from "module";
|
|
3
|
+
import { realpathSync } from "fs";
|
|
4
|
+
import { fileURLToPath } from "url";
|
|
3
5
|
const require = createRequire(import.meta.url);
|
|
4
6
|
const { Command } = require("commander");
|
|
5
7
|
const pkg = require("../package.json");
|
|
6
8
|
|
|
9
|
+
export function buildInitArgs(name, dir) {
|
|
10
|
+
return [name, dir].filter(Boolean);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function isDirectRun() {
|
|
14
|
+
if (!process.argv[1]) return false;
|
|
15
|
+
|
|
16
|
+
try {
|
|
17
|
+
return realpathSync(fileURLToPath(import.meta.url)) === realpathSync(process.argv[1]);
|
|
18
|
+
} catch {
|
|
19
|
+
return false;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
7
23
|
const program = new Command();
|
|
8
24
|
|
|
9
25
|
program
|
|
@@ -34,13 +50,16 @@ program
|
|
|
34
50
|
.command("init [name] [dir]")
|
|
35
51
|
.description("Scaffold ML project (CLI mode, non-Claude-Code usage)")
|
|
36
52
|
.action(async (name, dir) => {
|
|
37
|
-
const {
|
|
53
|
+
const { spawnSync } = await import("child_process");
|
|
38
54
|
const { dirname, join } = await import("path");
|
|
39
55
|
const { fileURLToPath } = await import("url");
|
|
40
56
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
41
57
|
const script = join(__dirname, "turing-init.sh");
|
|
42
|
-
const args =
|
|
43
|
-
|
|
58
|
+
const args = buildInitArgs(name, dir);
|
|
59
|
+
const result = spawnSync("bash", [script, ...args], { stdio: "inherit" });
|
|
60
|
+
process.exit(result.status ?? 1);
|
|
44
61
|
});
|
|
45
62
|
|
|
46
|
-
|
|
63
|
+
if (isDirectRun()) {
|
|
64
|
+
program.parse();
|
|
65
|
+
}
|
package/commands/doctor.md
CHANGED
|
@@ -21,6 +21,7 @@ Is Turing healthy? Check everything and get a score.
|
|
|
21
21
|
- **Scripts:** train.py, prepare.py, evaluate.py exist and parse
|
|
22
22
|
- **Disk space:** warn if <1GB free
|
|
23
23
|
- **Git state:** uncommitted changes to critical files
|
|
24
|
+
- **Claude hooks:** `.claude/settings.local.json` hook group schema; `--fix` migrates legacy bare command hooks
|
|
24
25
|
|
|
25
26
|
## Examples
|
|
26
27
|
```
|
package/commands/init.md
CHANGED
|
@@ -37,18 +37,36 @@ python3 <templates_dir>/scripts/scaffold.py \
|
|
|
37
37
|
The scaffold script handles everything in a single atomic operation:
|
|
38
38
|
- Copies all template files with placeholder substitution
|
|
39
39
|
- Creates data/, experiments/, models/ directories
|
|
40
|
-
- Sets up agent memory at `.claude/agent-memory/ml-researcher/MEMORY.md`
|
|
40
|
+
- Sets up agent memory at `.claude/agent-memory/ml-researcher-{project_name}/MEMORY.md`
|
|
41
41
|
- Configures Claude Code hooks in `.claude/settings.local.json`
|
|
42
42
|
- Creates Python virtual environment and installs requirements
|
|
43
43
|
- Verifies all placeholders were replaced (fails loudly if any remain)
|
|
44
44
|
|
|
45
45
|
## Locating Templates
|
|
46
46
|
|
|
47
|
-
|
|
47
|
+
Use the installed command-pack templates directory first:
|
|
48
|
+
```
|
|
49
|
+
.claude/commands/turing/templates/
|
|
50
|
+
~/.claude/commands/turing/templates/
|
|
51
|
+
```
|
|
52
|
+
Then fall back to plugin or npm locations:
|
|
48
53
|
```
|
|
49
54
|
~/.claude/plugins/*/templates/
|
|
55
|
+
node_modules/claude-turing/templates/
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Example command:
|
|
59
|
+
|
|
60
|
+
```bash
|
|
61
|
+
python3 ~/.claude/commands/turing/templates/scripts/scaffold.py \
|
|
62
|
+
--project-name "<project_name>" \
|
|
63
|
+
--target-metric "<target_metric>" \
|
|
64
|
+
--metric-direction "<metric_direction>" \
|
|
65
|
+
--task-description "<task_description>" \
|
|
66
|
+
--ml-dir "<ml_dir>" \
|
|
67
|
+
--data-source "<data_source>" \
|
|
68
|
+
--templates-dir ~/.claude/commands/turing/templates
|
|
50
69
|
```
|
|
51
|
-
Or check if installed via npm by looking for `node_modules/claude-turing/templates/`.
|
|
52
70
|
|
|
53
71
|
## After Scaffolding
|
|
54
72
|
|
package/commands/turing.md
CHANGED
|
@@ -160,7 +160,7 @@ You are the Turing ML research router. Detect the user's intent and route to the
|
|
|
160
160
|
| `/turing:update <exp-id> --new-data <path>` | Incremental model update: add new data without full retraining, forgetting detection | (inline) |
|
|
161
161
|
| `/turing:registry [list\|register\|promote\|demote\|history]` | Model registry: stage lifecycle (candidate → staging → production) with promotion gates | (inline) |
|
|
162
162
|
| `/turing:postmortem [--window N]` | Failure postmortem: diagnose why experiments stopped improving (exhaustion, config error, data issue, ceiling, noise) | (inline) |
|
|
163
|
-
| `/turing:doctor [--fix]` | Harness self-diagnosis: environment, dependencies, config, log integrity, scripts, disk, git state | (inline) |
|
|
163
|
+
| `/turing:doctor [--fix]` | Harness self-diagnosis: environment, dependencies, config, log integrity, scripts, disk, git state, Claude hooks | (inline) |
|
|
164
164
|
| `/turing:plan [--budget N] [--goal]` | Research planning assistant: strategic campaign design with budget-aware ROI allocation | (inline) |
|
|
165
165
|
|
|
166
166
|
## Proactive Detection
|
package/config/defaults.yaml
CHANGED
|
@@ -35,6 +35,8 @@ placeholders:
|
|
|
35
35
|
ML_DIR: "Directory for ML files relative to project root"
|
|
36
36
|
DATA_SOURCE: "Path to training data file"
|
|
37
37
|
METRIC_DIRECTION: "lower or higher — which direction is better"
|
|
38
|
+
LOWER_IS_BETTER: "Boolean derived from METRIC_DIRECTION for generated config.yaml"
|
|
39
|
+
MEMORY_DIR_NAME: "Filesystem-safe project name used for scoped agent memory"
|
|
38
40
|
|
|
39
41
|
# Agent configuration
|
|
40
42
|
agents:
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-turing",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.5.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
|
|
6
6
|
"bin": {
|
|
@@ -34,15 +34,15 @@
|
|
|
34
34
|
"ai-agents"
|
|
35
35
|
],
|
|
36
36
|
"author": {
|
|
37
|
-
"name": "
|
|
37
|
+
"name": "Prannaya Gupta"
|
|
38
38
|
},
|
|
39
|
-
"homepage": "https://github.com/
|
|
39
|
+
"homepage": "https://github.com/ThePyProgrammer/turing",
|
|
40
40
|
"repository": {
|
|
41
41
|
"type": "git",
|
|
42
|
-
"url": "git+https://github.com/
|
|
42
|
+
"url": "git+https://github.com/ThePyProgrammer/turing.git"
|
|
43
43
|
},
|
|
44
44
|
"bugs": {
|
|
45
|
-
"url": "https://github.com/
|
|
45
|
+
"url": "https://github.com/ThePyProgrammer/turing/issues"
|
|
46
46
|
},
|
|
47
47
|
"license": "MIT",
|
|
48
48
|
"engines": {
|
package/src/install.js
CHANGED
|
@@ -9,7 +9,7 @@
|
|
|
9
9
|
* node src/install.js [--global] [--project]
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
-
import { readdir, copyFile, mkdir } from "fs/promises";
|
|
12
|
+
import { readdir, copyFile, mkdir, cp } from "fs/promises";
|
|
13
13
|
import { join, dirname } from "path";
|
|
14
14
|
import { fileURLToPath } from "url";
|
|
15
15
|
import { getTargetPaths } from "./paths.js";
|
|
@@ -50,7 +50,7 @@ export async function install(opts = {}) {
|
|
|
50
50
|
console.log("");
|
|
51
51
|
|
|
52
52
|
// Create directories for each sub-command + agents + config
|
|
53
|
-
for (const subDir of ["", "agents", "config", "rules", ...SUB_COMMANDS]) {
|
|
53
|
+
for (const subDir of ["", "agents", "config", "rules", "templates", ...SUB_COMMANDS]) {
|
|
54
54
|
await mkdir(join(paths.commands, subDir), { recursive: true });
|
|
55
55
|
}
|
|
56
56
|
|
|
@@ -103,6 +103,21 @@ export async function install(opts = {}) {
|
|
|
103
103
|
}
|
|
104
104
|
console.log(` ${CONFIG_FILES.length} config files installed`);
|
|
105
105
|
|
|
106
|
+
// Copy templates used by /turing:init
|
|
107
|
+
await cp(
|
|
108
|
+
join(PLUGIN_ROOT, "templates"),
|
|
109
|
+
join(paths.commands, "templates"),
|
|
110
|
+
{
|
|
111
|
+
recursive: true,
|
|
112
|
+
force: true,
|
|
113
|
+
filter: (src) =>
|
|
114
|
+
!src.includes("__pycache__") &&
|
|
115
|
+
!src.includes(".pytest_cache") &&
|
|
116
|
+
!src.endsWith(".pyc"),
|
|
117
|
+
},
|
|
118
|
+
);
|
|
119
|
+
console.log(" Templates installed");
|
|
120
|
+
|
|
106
121
|
// Update CLAUDE.md
|
|
107
122
|
await updateClaudeMd(paths.claudeMd);
|
|
108
123
|
console.log(" CLAUDE.md updated");
|
package/src/verify.js
CHANGED
|
@@ -8,10 +8,14 @@
|
|
|
8
8
|
* node src/verify.js [--scope global|project]
|
|
9
9
|
*/
|
|
10
10
|
|
|
11
|
-
import { access } from "fs/promises";
|
|
12
|
-
import { join } from "path";
|
|
11
|
+
import { access, readdir } from "fs/promises";
|
|
12
|
+
import { dirname, join } from "path";
|
|
13
|
+
import { fileURLToPath } from "url";
|
|
13
14
|
import { getTargetPaths } from "./paths.js";
|
|
14
15
|
|
|
16
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
17
|
+
const PLUGIN_ROOT = join(__dirname, "..");
|
|
18
|
+
|
|
15
19
|
const EXPECTED_COMMANDS = [
|
|
16
20
|
"SKILL.md",
|
|
17
21
|
"init/SKILL.md",
|
|
@@ -100,6 +104,27 @@ const EXPECTED_CONFIG = [
|
|
|
100
104
|
"watch_alerts.yaml",
|
|
101
105
|
];
|
|
102
106
|
|
|
107
|
+
async function templateFiles(root, relativeDir = "templates") {
|
|
108
|
+
const dir = join(root, relativeDir);
|
|
109
|
+
const entries = await readdir(dir, { withFileTypes: true });
|
|
110
|
+
const files = [];
|
|
111
|
+
|
|
112
|
+
for (const entry of entries) {
|
|
113
|
+
if (entry.name === "__pycache__" || entry.name === ".pytest_cache") {
|
|
114
|
+
continue;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const relativePath = `${relativeDir}/${entry.name}`;
|
|
118
|
+
if (entry.isDirectory()) {
|
|
119
|
+
files.push(...await templateFiles(root, relativePath));
|
|
120
|
+
} else if (!entry.name.endsWith(".pyc")) {
|
|
121
|
+
files.push(relativePath);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
return files;
|
|
126
|
+
}
|
|
127
|
+
|
|
103
128
|
async function fileExists(path) {
|
|
104
129
|
try {
|
|
105
130
|
await access(path);
|
|
@@ -111,7 +136,9 @@ async function fileExists(path) {
|
|
|
111
136
|
|
|
112
137
|
export async function verify(opts = {}) {
|
|
113
138
|
const scopes = opts.scope ? [opts.scope] : ["global", "project"];
|
|
139
|
+
const expectedTemplates = await templateFiles(PLUGIN_ROOT);
|
|
114
140
|
let found = false;
|
|
141
|
+
let totalMissing = 0;
|
|
115
142
|
|
|
116
143
|
for (const scope of scopes) {
|
|
117
144
|
const paths = getTargetPaths(scope);
|
|
@@ -144,10 +171,18 @@ export async function verify(opts = {}) {
|
|
|
144
171
|
if (!ok) missing++;
|
|
145
172
|
}
|
|
146
173
|
|
|
174
|
+
console.log("\nTemplates:");
|
|
175
|
+
for (const template of expectedTemplates) {
|
|
176
|
+
const ok = await fileExists(join(paths.commands, template));
|
|
177
|
+
console.log(` ${ok ? "✓" : "✗"} commands/${template}`);
|
|
178
|
+
if (!ok) missing++;
|
|
179
|
+
}
|
|
180
|
+
|
|
147
181
|
// Check CLAUDE.md
|
|
148
182
|
const claudeOk = await fileExists(paths.claudeMd);
|
|
149
183
|
console.log(`\n ${claudeOk ? "✓" : "✗"} CLAUDE.md`);
|
|
150
184
|
|
|
185
|
+
totalMissing += missing;
|
|
151
186
|
console.log(
|
|
152
187
|
`\n ${missing === 0 ? "✓ Installation complete" : `✗ ${missing} files missing — run claude-turing install`}\n`,
|
|
153
188
|
);
|
|
@@ -155,6 +190,11 @@ export async function verify(opts = {}) {
|
|
|
155
190
|
|
|
156
191
|
if (!found) {
|
|
157
192
|
console.log("\n✗ turing not found. Run: claude-turing install\n");
|
|
193
|
+
totalMissing++;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
if (totalMissing > 0) {
|
|
197
|
+
process.exitCode = 1;
|
|
158
198
|
}
|
|
159
199
|
}
|
|
160
200
|
|
package/templates/README.md
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
|
|
5
5
|
## Overview
|
|
6
6
|
|
|
7
|
-
This pipeline uses the [Turing](https://github.com/
|
|
7
|
+
This pipeline uses the [Turing](https://github.com/ThePyProgrammer/turing) autoresearch pattern — an AI agent iteratively trains, evaluates, and improves models by modifying `train.py` while the evaluation infrastructure (`prepare.py`, `evaluate.py`) remains immutable.
|
|
8
8
|
|
|
9
9
|
**Primary metric:** {{TARGET_METRIC}} ({{METRIC_DIRECTION}} is better)
|
|
10
10
|
|
|
Binary file
|
|
Binary file
|
package/templates/config.yaml
CHANGED
|
@@ -17,7 +17,7 @@ evaluation:
|
|
|
17
17
|
metrics: ["{{TARGET_METRIC}}", "f1_weighted", "accuracy"]
|
|
18
18
|
# Set to true for metrics where lower is better (mae, mse, rmse, loss)
|
|
19
19
|
# Set to false for metrics where higher is better (accuracy, f1, auc)
|
|
20
|
-
lower_is_better:
|
|
20
|
+
lower_is_better: {{LOWER_IS_BETTER}}
|
|
21
21
|
|
|
22
22
|
# Multi-seed configuration (Phase 10.1: /turing:seed)
|
|
23
23
|
# Seeds used for seed studies — diverse values for good coverage
|
|
Binary file
|
|
Binary file
|
package/templates/program.md
CHANGED
|
@@ -43,7 +43,7 @@ git checkout -b exp/NNN-description
|
|
|
43
43
|
|
|
44
44
|
## Memory
|
|
45
45
|
|
|
46
|
-
Read `.claude/agent-memory/ml-researcher/MEMORY.md` at the start of each session.
|
|
46
|
+
Read `.claude/agent-memory/ml-researcher-{{MEMORY_DIR_NAME}}/MEMORY.md` at the start of each session.
|
|
47
47
|
Update it after each experiment with:
|
|
48
48
|
- Best result (if improved)
|
|
49
49
|
- What was tried and why
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|