claude-turing 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/.claude-plugin/plugin.json +34 -0
  2. package/LICENSE +21 -0
  3. package/README.md +457 -0
  4. package/agents/ml-evaluator.md +43 -0
  5. package/agents/ml-researcher.md +74 -0
  6. package/bin/cli.js +46 -0
  7. package/bin/turing-init.sh +57 -0
  8. package/commands/brief.md +83 -0
  9. package/commands/compare.md +24 -0
  10. package/commands/design.md +97 -0
  11. package/commands/init.md +123 -0
  12. package/commands/logbook.md +51 -0
  13. package/commands/mode.md +43 -0
  14. package/commands/poster.md +89 -0
  15. package/commands/preflight.md +75 -0
  16. package/commands/report.md +97 -0
  17. package/commands/rules/loop-protocol.md +91 -0
  18. package/commands/status.md +24 -0
  19. package/commands/suggest.md +95 -0
  20. package/commands/sweep.md +45 -0
  21. package/commands/train.md +66 -0
  22. package/commands/try.md +63 -0
  23. package/commands/turing.md +54 -0
  24. package/commands/validate.md +34 -0
  25. package/config/defaults.yaml +45 -0
  26. package/config/experiment_archetypes.yaml +127 -0
  27. package/config/lifecycle.toml +31 -0
  28. package/config/novelty_aliases.yaml +107 -0
  29. package/config/relationships.toml +125 -0
  30. package/config/state.toml +24 -0
  31. package/config/task_taxonomy.yaml +110 -0
  32. package/config/taxonomy.toml +37 -0
  33. package/package.json +54 -0
  34. package/src/claude-md.js +55 -0
  35. package/src/install.js +107 -0
  36. package/src/paths.js +20 -0
  37. package/src/postinstall.js +22 -0
  38. package/src/verify.js +109 -0
  39. package/templates/MEMORY.md +36 -0
  40. package/templates/README.md +93 -0
  41. package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
  42. package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
  43. package/templates/config.yaml +48 -0
  44. package/templates/evaluate.py +237 -0
  45. package/templates/features/__init__.py +0 -0
  46. package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
  47. package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
  48. package/templates/features/featurizers.py +138 -0
  49. package/templates/prepare.py +171 -0
  50. package/templates/program.md +216 -0
  51. package/templates/pyproject.toml +8 -0
  52. package/templates/requirements.txt +8 -0
  53. package/templates/scripts/__init__.py +0 -0
  54. package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  55. package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
  56. package/templates/scripts/__pycache__/classify_task.cpython-314.pyc +0 -0
  57. package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
  58. package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
  59. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  60. package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
  61. package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
  62. package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
  63. package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
  64. package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
  65. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  66. package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
  67. package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
  68. package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
  69. package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
  70. package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
  71. package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
  72. package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
  73. package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
  74. package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
  75. package/templates/scripts/check_convergence.py +230 -0
  76. package/templates/scripts/compare_runs.py +124 -0
  77. package/templates/scripts/critique_hypothesis.py +350 -0
  78. package/templates/scripts/experiment_index.py +288 -0
  79. package/templates/scripts/generate_brief.py +389 -0
  80. package/templates/scripts/generate_logbook.py +423 -0
  81. package/templates/scripts/log_experiment.py +243 -0
  82. package/templates/scripts/manage_hypotheses.py +543 -0
  83. package/templates/scripts/novelty_guard.py +343 -0
  84. package/templates/scripts/parse_metrics.py +139 -0
  85. package/templates/scripts/post-train-hook.sh +74 -0
  86. package/templates/scripts/preflight.py +549 -0
  87. package/templates/scripts/scaffold.py +409 -0
  88. package/templates/scripts/show_environment.py +92 -0
  89. package/templates/scripts/show_experiment_tree.py +144 -0
  90. package/templates/scripts/show_families.py +133 -0
  91. package/templates/scripts/show_metrics.py +157 -0
  92. package/templates/scripts/statistical_compare.py +259 -0
  93. package/templates/scripts/stop-hook.sh +34 -0
  94. package/templates/scripts/suggest_next.py +301 -0
  95. package/templates/scripts/sweep.py +276 -0
  96. package/templates/scripts/synthesize_decision.py +300 -0
  97. package/templates/scripts/turing_io.py +76 -0
  98. package/templates/scripts/update_state.py +296 -0
  99. package/templates/scripts/validate_stability.py +167 -0
  100. package/templates/scripts/verify_placeholders.py +119 -0
  101. package/templates/sweep_config.yaml +14 -0
  102. package/templates/tests/__init__.py +0 -0
  103. package/templates/tests/conftest.py +91 -0
  104. package/templates/train.py +240 -0
@@ -0,0 +1,55 @@
1
+ import { readFile, writeFile } from "fs/promises";
2
+
3
+ const BEGIN = "<!-- BEGIN turing -->";
4
+ const END = "<!-- END turing -->";
5
+
6
+ const SECTION = `${BEGIN}
7
+ ## turing
8
+
9
+ Autonomous ML research harness. The autoresearch loop as a formal protocol.
10
+
11
+ ### Commands
12
+
13
+ | Command | Purpose |
14
+ |---------|---------|
15
+ | \`/turing\` | Router — detects ML intent and routes to sub-commands |
16
+ | \`/turing:init\` | Scaffold a new ML project with autoresearch harness |
17
+ | \`/turing:train [N]\` | Run autonomous experiment loop (optional max iterations) |
18
+ | \`/turing:status\` | Show experiment status, best model, convergence state |
19
+ | \`/turing:compare <a> <b>\` | Side-by-side experiment comparison |
20
+ | \`/turing:sweep\` | Generate and run hyperparameter sweep |
21
+ | \`/turing:validate\` | Check metric stability, auto-fix if noisy |
22
+ | \`/turing:try <hypothesis>\` | Inject a hypothesis into the experiment queue |
23
+ | \`/turing:brief\` | Generate research intelligence report |
24
+ | \`/turing:preflight\` | Pre-flight resource check (VRAM/RAM/disk) |
25
+
26
+ ### Agents
27
+
28
+ | Agent | Purpose |
29
+ |-------|---------|
30
+ | \`@ml-researcher\` | Autonomous training agent (Read/Write/Edit/Bash) |
31
+ | \`@ml-evaluator\` | Read-only analysis agent (Read/Bash only) |
32
+ ${END}`;
33
+
34
+ export async function updateClaudeMd(claudeMdPath) {
35
+ let content = "";
36
+ try {
37
+ content = await readFile(claudeMdPath, "utf-8");
38
+ } catch {
39
+ // File doesn't exist yet
40
+ }
41
+
42
+ const regex = new RegExp(
43
+ `${BEGIN.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}[\\s\\S]*?${END.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}`,
44
+ );
45
+
46
+ if (regex.test(content)) {
47
+ content = content.replace(regex, SECTION);
48
+ } else {
49
+ content = content
50
+ ? content.trimEnd() + "\n\n" + SECTION + "\n"
51
+ : SECTION + "\n";
52
+ }
53
+
54
+ await writeFile(claudeMdPath, content, "utf-8");
55
+ }
package/src/install.js ADDED
@@ -0,0 +1,107 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Turing installer.
4
+ *
5
+ * Deploys commands, agents, and config to the Claude Code plugin directory.
6
+ * Optionally inserts a managed section into the project's CLAUDE.md.
7
+ *
8
+ * Usage:
9
+ * node src/install.js [--global] [--project]
10
+ */
11
+
12
+ import { readdir, copyFile, mkdir } from "fs/promises";
13
+ import { join, dirname } from "path";
14
+ import { fileURLToPath } from "url";
15
+ import { getTargetPaths } from "./paths.js";
16
+ import { updateClaudeMd } from "./claude-md.js";
17
+
18
+ const __dirname = dirname(fileURLToPath(import.meta.url));
19
+ const PLUGIN_ROOT = join(__dirname, "..");
20
+
21
+ // Single source of truth for sub-commands (DRY — used for dirs and file copy)
22
+ const SUB_COMMANDS = [
23
+ "init", "train", "status", "compare", "sweep", "validate",
24
+ "try", "brief", "suggest", "design", "logbook", "poster",
25
+ "report", "mode", "preflight",
26
+ ];
27
+
28
+ export async function install(opts = {}) {
29
+ const scope = opts.global ? "global" : opts.project ? "project" : "global";
30
+ const paths = getTargetPaths(scope);
31
+
32
+ console.log("Turing ML Research Harness — Installer");
33
+ console.log(`Target: ${paths.commands} (${scope})`);
34
+ console.log("");
35
+
36
+ // Create directories for each sub-command + agents + config
37
+ for (const subDir of ["", "agents", "config", "rules", ...SUB_COMMANDS]) {
38
+ await mkdir(join(paths.commands, subDir), { recursive: true });
39
+ }
40
+
41
+ // Copy root command (router) as SKILL.md
42
+ await copyFile(
43
+ join(PLUGIN_ROOT, "commands", "turing.md"),
44
+ join(paths.commands, "SKILL.md"),
45
+ );
46
+ console.log(" Router -> SKILL.md");
47
+
48
+ // Copy sub-commands as <name>/SKILL.md
49
+ for (const cmd of SUB_COMMANDS) {
50
+ await copyFile(
51
+ join(PLUGIN_ROOT, "commands", `${cmd}.md`),
52
+ join(paths.commands, cmd, "SKILL.md"),
53
+ );
54
+ }
55
+ console.log(` ${SUB_COMMANDS.length} commands installed`);
56
+
57
+ // Copy rules
58
+ await copyFile(
59
+ join(PLUGIN_ROOT, "commands", "rules", "loop-protocol.md"),
60
+ join(paths.commands, "rules", "loop-protocol.md"),
61
+ );
62
+ console.log(" Rules installed");
63
+
64
+ // Copy agents
65
+ const agentFiles = await readdir(join(PLUGIN_ROOT, "agents"));
66
+ for (const file of agentFiles) {
67
+ await copyFile(
68
+ join(PLUGIN_ROOT, "agents", file),
69
+ join(paths.agents, file),
70
+ );
71
+ }
72
+ console.log(` ${agentFiles.length} agents installed`);
73
+
74
+ // Copy config (static schema files only)
75
+ const CONFIG_FILES = [
76
+ "defaults.yaml", "lifecycle.toml", "taxonomy.toml",
77
+ "experiment_archetypes.yaml", "novelty_aliases.yaml",
78
+ "relationships.toml", "state.toml", "task_taxonomy.yaml",
79
+ ];
80
+ for (const file of CONFIG_FILES) {
81
+ await copyFile(
82
+ join(PLUGIN_ROOT, "config", file),
83
+ join(paths.config, file),
84
+ );
85
+ }
86
+ console.log(` ${CONFIG_FILES.length} config files installed`);
87
+
88
+ // Update CLAUDE.md
89
+ await updateClaudeMd(paths.claudeMd);
90
+ console.log(" CLAUDE.md updated");
91
+
92
+ console.log("");
93
+ console.log(
94
+ `Installation complete. Run /turing:init to scaffold an ML project.`,
95
+ );
96
+ }
97
+
98
+ // Direct execution
99
+ const isDirectRun =
100
+ process.argv[1] &&
101
+ fileURLToPath(import.meta.url).endsWith(process.argv[1].replace(/^.*\//, ""));
102
+ if (isDirectRun) {
103
+ install({
104
+ global: process.argv.includes("--global"),
105
+ project: process.argv.includes("--project"),
106
+ });
107
+ }
package/src/paths.js ADDED
@@ -0,0 +1,20 @@
1
+ import { homedir } from "os";
2
+ import { join } from "path";
3
+
4
+ export function getTargetPaths(scope) {
5
+ const base =
6
+ scope === "global"
7
+ ? join(homedir(), ".claude")
8
+ : join(process.cwd(), ".claude");
9
+
10
+ return {
11
+ commands: join(base, "commands", "turing"),
12
+ agents: join(base, "commands", "turing", "agents"),
13
+ config: join(base, "commands", "turing", "config"),
14
+ claudeMd:
15
+ scope === "global"
16
+ ? join(homedir(), ".claude", "CLAUDE.md")
17
+ : join(process.cwd(), "CLAUDE.md"),
18
+ scope,
19
+ };
20
+ }
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * npm postinstall hook.
4
+ *
5
+ * Prints setup instructions after `npm install claude-turing`.
6
+ * Does NOT auto-install — the user must explicitly run the installer.
7
+ */
8
+
9
+ console.log("");
10
+ console.log("╔══════════════════════════════════════════════════════╗");
11
+ console.log("║ Turing ML Research Harness ║");
12
+ console.log("║ ║");
13
+ console.log("║ To complete setup, run: ║");
14
+ console.log("║ npx claude-turing install --global ║");
15
+ console.log("║ ║");
16
+ console.log("║ Or within a project: ║");
17
+ console.log("║ npx claude-turing install ║");
18
+ console.log("║ ║");
19
+ console.log("║ Then in Claude Code: ║");
20
+ console.log("║ /turing:init ║");
21
+ console.log("╚══════════════════════════════════════════════════════╝");
22
+ console.log("");
package/src/verify.js ADDED
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/env node
2
+ /**
3
+ * Turing installation verifier.
4
+ *
5
+ * Checks that all expected files are in place and reports status.
6
+ *
7
+ * Usage:
8
+ * node src/verify.js [--scope global|project]
9
+ */
10
+
11
+ import { access } from "fs/promises";
12
+ import { join } from "path";
13
+ import { getTargetPaths } from "./paths.js";
14
+
15
+ const EXPECTED_COMMANDS = [
16
+ "SKILL.md",
17
+ "init/SKILL.md",
18
+ "train/SKILL.md",
19
+ "status/SKILL.md",
20
+ "compare/SKILL.md",
21
+ "sweep/SKILL.md",
22
+ "validate/SKILL.md",
23
+ "try/SKILL.md",
24
+ "brief/SKILL.md",
25
+ "suggest/SKILL.md",
26
+ "design/SKILL.md",
27
+ "logbook/SKILL.md",
28
+ "poster/SKILL.md",
29
+ "report/SKILL.md",
30
+ "mode/SKILL.md",
31
+ "preflight/SKILL.md",
32
+ ];
33
+
34
+ const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];
35
+
36
+ const EXPECTED_CONFIG = [
37
+ "defaults.yaml", "lifecycle.toml", "taxonomy.toml",
38
+ "experiment_archetypes.yaml", "novelty_aliases.yaml",
39
+ "relationships.toml", "state.toml", "task_taxonomy.yaml",
40
+ ];
41
+
42
+ async function fileExists(path) {
43
+ try {
44
+ await access(path);
45
+ return true;
46
+ } catch {
47
+ return false;
48
+ }
49
+ }
50
+
51
+ export async function verify(opts = {}) {
52
+ const scopes = opts.scope ? [opts.scope] : ["global", "project"];
53
+ let found = false;
54
+
55
+ for (const scope of scopes) {
56
+ const paths = getTargetPaths(scope);
57
+ const exists = await fileExists(join(paths.commands, "SKILL.md"));
58
+ if (!exists) continue;
59
+ found = true;
60
+
61
+ console.log(`\n✓ turing found (${scope}): ${paths.commands}\n`);
62
+
63
+ let missing = 0;
64
+
65
+ console.log("Commands:");
66
+ for (const cmd of EXPECTED_COMMANDS) {
67
+ const ok = await fileExists(join(paths.commands, cmd));
68
+ console.log(` ${ok ? "✓" : "✗"} commands/${cmd}`);
69
+ if (!ok) missing++;
70
+ }
71
+
72
+ console.log("\nAgents:");
73
+ for (const agent of EXPECTED_AGENTS) {
74
+ const ok = await fileExists(join(paths.agents, agent));
75
+ console.log(` ${ok ? "✓" : "✗"} agents/${agent}`);
76
+ if (!ok) missing++;
77
+ }
78
+
79
+ console.log("\nConfig:");
80
+ for (const cfg of EXPECTED_CONFIG) {
81
+ const ok = await fileExists(join(paths.config, cfg));
82
+ console.log(` ${ok ? "✓" : "✗"} config/${cfg}`);
83
+ if (!ok) missing++;
84
+ }
85
+
86
+ // Check CLAUDE.md
87
+ const claudeOk = await fileExists(paths.claudeMd);
88
+ console.log(`\n ${claudeOk ? "✓" : "✗"} CLAUDE.md`);
89
+
90
+ console.log(
91
+ `\n ${missing === 0 ? "✓ Installation complete" : `✗ ${missing} files missing — run claude-turing install`}\n`,
92
+ );
93
+ }
94
+
95
+ if (!found) {
96
+ console.log("\n✗ turing not found. Run: claude-turing install\n");
97
+ }
98
+ }
99
+
100
+ // Direct execution
101
+ const isDirectRun =
102
+ process.argv[1] &&
103
+ import.meta.url.endsWith(process.argv[1].replace(/^.*\//, ""));
104
+ if (isDirectRun) {
105
+ const scopeIdx = process.argv.indexOf("--scope");
106
+ verify({
107
+ scope: scopeIdx !== -1 ? process.argv[scopeIdx + 1] : undefined,
108
+ });
109
+ }
@@ -0,0 +1,36 @@
1
+ # ML Researcher Memory
2
+
3
+ ## Goal
4
+
5
+ {{TASK_DESCRIPTION}}
6
+
7
+ Primary metric: {{TARGET_METRIC}} ({{METRIC_DIRECTION}} is better).
8
+
9
+ ## Best Result
10
+
11
+ No experiments completed yet. Run `/turing:train` to begin.
12
+
13
+ ## Observations
14
+
15
+ - Initial model: XGBoost with default hyperparams (n_estimators=100, max_depth=4, lr=0.1)
16
+ - Config file: `config.yaml` controls all hyperparameters — do not hardcode in train.py
17
+ - Sweep tool: `python scripts/sweep.py` for systematic grid search
18
+ - Per-experiment branches: `exp/NNN-description` preserves all code variants
19
+ - Evaluation is immutable: `prepare.py` and `evaluate.py` are READ-ONLY
20
+
21
+ ## Failed Approaches
22
+
23
+ (none yet)
24
+
25
+ ## Promising Directions
26
+
27
+ - Hyperparameter sweep across n_estimators, max_depth, learning_rate
28
+ - LightGBM as alternative to XGBoost
29
+ - Feature engineering: add domain-specific features
30
+ - Try different model architectures (RandomForest, MLP)
31
+
32
+ ## Session History
33
+
34
+ | Session | Experiments | Best Metric | Notes |
35
+ |---------|-------------|-------------|-------|
36
+ | (none) | 0 | N/A | Pipeline initialized, no experiments run |
@@ -0,0 +1,93 @@
1
+ # {{PROJECT_NAME}} ML Pipeline
2
+
3
+ {{TASK_DESCRIPTION}}
4
+
5
+ ## Overview
6
+
7
+ This pipeline uses the [Turing](https://github.com/pragnition/turing) autoresearch pattern — an AI agent iteratively trains, evaluates, and improves models by modifying `train.py` while the evaluation infrastructure (`prepare.py`, `evaluate.py`) remains immutable.
8
+
9
+ **Primary metric:** {{TARGET_METRIC}} ({{METRIC_DIRECTION}} is better)
10
+
11
+ ## The Separation
12
+
13
+ | Layer | Files | Agent Access | Purpose |
14
+ |-------|-------|-------------|---------|
15
+ | Measurement | `prepare.py`, `evaluate.py` | READ-ONLY | Ensures all experiments are measured by the same yardstick |
16
+ | Hypothesis | `train.py`, `config.yaml` | READ-WRITE | All experimental changes go here |
17
+
18
+ This separation is the invariant that makes experiment comparisons valid.
19
+
20
+ ## Quick Start
21
+
22
+ ```bash
23
+ # 1. Set up the environment
24
+ python -m venv .venv
25
+ source .venv/bin/activate
26
+ pip install -r requirements.txt
27
+
28
+ # 2. Add your training data to {{DATA_SOURCE}}
29
+
30
+ # 3. Create train/val/test splits
31
+ python prepare.py
32
+
33
+ # 4. Run training
34
+ python train.py > run.log 2>&1
35
+
36
+ # 5. Check results
37
+ grep -A 10 "^---" run.log
38
+
39
+ # 6. View experiment history
40
+ python scripts/show_metrics.py
41
+ ```
42
+
43
+ ## Using the Autoresearch Agent
44
+
45
+ The agent follows `program.md`. It:
46
+
47
+ 1. Reads recent experiment results
48
+ 2. Proposes a hypothesis
49
+ 3. Modifies `train.py` or `config.yaml`
50
+ 4. Runs training and evaluates
51
+ 5. Keeps improvements, discards regressions
52
+ 6. Repeats until convergence
53
+
54
+ To start: `/turing:train` in Claude Code.
55
+ For hands-off mode: `/loop 5m /turing:train`
56
+
57
+ ## Directory Structure
58
+
59
+ ```
60
+ {{ML_DIR}}/
61
+ prepare.py READ-ONLY: Data loading, splitting
62
+ evaluate.py READ-ONLY: Evaluation harness
63
+ train.py AGENT-EDITABLE: Training code
64
+ config.yaml Hyperparameters and settings
65
+ sweep_config.yaml Sweep parameter ranges
66
+ program.md Agent protocol instructions
67
+ features/
68
+ featurizers.py Feature engineering pipeline
69
+ scripts/
70
+ log_experiment.py Experiment JSONL logging
71
+ show_metrics.py Display experiment metrics
72
+ compare_runs.py Side-by-side comparison
73
+ sweep.py Hyperparameter sweep tool
74
+ post-train-hook.sh Auto-log after training
75
+ stop-hook.sh Convergence detection hook
76
+ experiments/
77
+ log.jsonl Structured experiment log
78
+ results.tsv Quick-reference summary
79
+ models/
80
+ best/ Current best model
81
+ archive/ Previous best models
82
+ data/
83
+ splits/ Train/val/test splits
84
+ tests/
85
+ conftest.py Shared test fixtures
86
+ ```
87
+
88
+ ## Running Tests
89
+
90
+ ```bash
91
+ source .venv/bin/activate
92
+ python -m pytest tests/ -v
93
+ ```
@@ -0,0 +1,48 @@
1
+ # {{PROJECT_NAME}} ML Pipeline Configuration
2
+ # Edit this file to change hyperparameters. The autoresearch agent
3
+ # modifies this file for hyperparameter experiments.
4
+
5
+ data:
6
+ source: "{{DATA_SOURCE}}"
7
+ splits_dir: "data/splits"
8
+ target_column: "label" # Column name for the prediction target
9
+ split_ratios:
10
+ train: 0.70
11
+ val: 0.15
12
+ test: 0.15
13
+ random_state: 42
14
+
15
+ evaluation:
16
+ primary_metric: "{{TARGET_METRIC}}"
17
+ metrics: ["{{TARGET_METRIC}}", "f1_weighted", "accuracy"]
18
+ # Set to true for metrics where lower is better (mae, mse, rmse, loss)
19
+ # Set to false for metrics where higher is better (accuracy, f1, auc)
20
+ lower_is_better: false # {{METRIC_DIRECTION}} -- change to true if lower is better
21
+
22
+ convergence:
23
+ patience: 3 # Consecutive non-improvements before stopping
24
+ improvement_threshold: 0.005 # 0.5% relative improvement required
25
+
26
+ model:
27
+ type: "xgboost"
28
+ hyperparams:
29
+ n_estimators: 100
30
+ max_depth: 4
31
+ learning_rate: 0.1
32
+ objective: "multi:softmax"
33
+ num_class: 2 # Adjust for your number of classes
34
+ eval_metric: "mlogloss"
35
+ verbosity: 0
36
+
37
+ output:
38
+ models_dir: "models"
39
+ best_model_dir: "models/best"
40
+ archive_dir: "models/archive"
41
+ experiment_log: "experiments/log.jsonl"
42
+ results_tsv: "experiments/results.tsv"
43
+
44
+ # Behavioral probe constraints (enforced by hidden evaluation harness)
45
+ # These prevent the agent from gaming the metric by skipping real work.
46
+ constraints:
47
+ min_train_time: 5 # Seconds — training under this is suspicious
48
+ min_model_size_bytes: 100 # Bytes — a real model has non-trivial size