claude-turing 4.5.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (260) hide show
  1. package/.claude-plugin/marketplace.json +18 -0
  2. package/.claude-plugin/plugin.json +2 -2
  3. package/README.md +1 -1
  4. package/commands/turing.md +85 -77
  5. package/config/commands.yaml +928 -0
  6. package/package.json +11 -4
  7. package/skills/turing/SKILL.md +180 -0
  8. package/skills/turing/ablate/SKILL.md +47 -0
  9. package/skills/turing/annotate/SKILL.md +23 -0
  10. package/skills/turing/archive/SKILL.md +23 -0
  11. package/skills/turing/audit/SKILL.md +56 -0
  12. package/skills/turing/baseline/SKILL.md +45 -0
  13. package/skills/turing/brief/SKILL.md +95 -0
  14. package/skills/turing/budget/SKILL.md +52 -0
  15. package/skills/turing/calibrate/SKILL.md +47 -0
  16. package/skills/turing/card/SKILL.md +36 -0
  17. package/skills/turing/changelog/SKILL.md +22 -0
  18. package/skills/turing/checkpoint/SKILL.md +47 -0
  19. package/skills/turing/cite/SKILL.md +23 -0
  20. package/skills/turing/compare/SKILL.md +24 -0
  21. package/skills/turing/counterfactual/SKILL.md +27 -0
  22. package/skills/turing/curriculum/SKILL.md +43 -0
  23. package/skills/turing/design/SKILL.md +97 -0
  24. package/skills/turing/diagnose/SKILL.md +52 -0
  25. package/skills/turing/diff/SKILL.md +48 -0
  26. package/skills/turing/distill/SKILL.md +56 -0
  27. package/skills/turing/doctor/SKILL.md +31 -0
  28. package/skills/turing/ensemble/SKILL.md +54 -0
  29. package/skills/turing/explore/SKILL.md +107 -0
  30. package/skills/turing/export/SKILL.md +48 -0
  31. package/skills/turing/feature/SKILL.md +42 -0
  32. package/skills/turing/flashback/SKILL.md +22 -0
  33. package/skills/turing/fork/SKILL.md +40 -0
  34. package/skills/turing/frontier/SKILL.md +45 -0
  35. package/skills/turing/init/SKILL.md +154 -0
  36. package/skills/turing/leak/SKILL.md +47 -0
  37. package/skills/turing/lit/SKILL.md +47 -0
  38. package/skills/turing/logbook/SKILL.md +51 -0
  39. package/skills/turing/merge/SKILL.md +24 -0
  40. package/skills/turing/mode/SKILL.md +43 -0
  41. package/skills/turing/onboard/SKILL.md +20 -0
  42. package/skills/turing/paper/SKILL.md +44 -0
  43. package/skills/turing/plan/SKILL.md +27 -0
  44. package/skills/turing/poster/SKILL.md +89 -0
  45. package/skills/turing/postmortem/SKILL.md +28 -0
  46. package/skills/turing/preflight/SKILL.md +75 -0
  47. package/skills/turing/present/SKILL.md +23 -0
  48. package/skills/turing/profile/SKILL.md +43 -0
  49. package/skills/turing/prune/SKILL.md +26 -0
  50. package/skills/turing/quantize/SKILL.md +24 -0
  51. package/skills/turing/queue/SKILL.md +48 -0
  52. package/skills/turing/registry/SKILL.md +31 -0
  53. package/skills/turing/regress/SKILL.md +53 -0
  54. package/skills/turing/replay/SKILL.md +23 -0
  55. package/skills/turing/report/SKILL.md +97 -0
  56. package/skills/turing/reproduce/SKILL.md +48 -0
  57. package/skills/turing/retry/SKILL.md +41 -0
  58. package/skills/turing/review/SKILL.md +20 -0
  59. package/skills/turing/rules/loop-protocol.md +91 -0
  60. package/skills/turing/sanity/SKILL.md +48 -0
  61. package/skills/turing/scale/SKILL.md +55 -0
  62. package/skills/turing/search/SKILL.md +22 -0
  63. package/skills/turing/seed/SKILL.md +47 -0
  64. package/skills/turing/sensitivity/SKILL.md +41 -0
  65. package/skills/turing/share/SKILL.md +20 -0
  66. package/skills/turing/simulate/SKILL.md +28 -0
  67. package/skills/turing/status/SKILL.md +24 -0
  68. package/skills/turing/stitch/SKILL.md +49 -0
  69. package/skills/turing/suggest/SKILL.md +159 -0
  70. package/skills/turing/surgery/SKILL.md +27 -0
  71. package/skills/turing/sweep/SKILL.md +45 -0
  72. package/skills/turing/template/SKILL.md +22 -0
  73. package/skills/turing/train/SKILL.md +75 -0
  74. package/skills/turing/transfer/SKILL.md +54 -0
  75. package/skills/turing/trend/SKILL.md +21 -0
  76. package/skills/turing/try/SKILL.md +63 -0
  77. package/skills/turing/update/SKILL.md +27 -0
  78. package/skills/turing/validate/SKILL.md +34 -0
  79. package/skills/turing/warm/SKILL.md +53 -0
  80. package/skills/turing/watch/SKILL.md +60 -0
  81. package/skills/turing/whatif/SKILL.md +31 -0
  82. package/skills/turing/xray/SKILL.md +43 -0
  83. package/src/command-registry.js +160 -0
  84. package/src/install.js +8 -34
  85. package/src/sync-skills-layout.js +149 -0
  86. package/src/verify.js +5 -88
  87. package/templates/__pycache__/evaluate.cpython-312.pyc +0 -0
  88. package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
  89. package/templates/__pycache__/prepare.cpython-312.pyc +0 -0
  90. package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
  91. package/templates/features/__pycache__/__init__.cpython-312.pyc +0 -0
  92. package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
  93. package/templates/features/__pycache__/featurizers.cpython-312.pyc +0 -0
  94. package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
  95. package/templates/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  96. package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  97. package/templates/scripts/__pycache__/ablation_study.cpython-312.pyc +0 -0
  98. package/templates/scripts/__pycache__/ablation_study.cpython-314.pyc +0 -0
  99. package/templates/scripts/__pycache__/architecture_surgery.cpython-312.pyc +0 -0
  100. package/templates/scripts/__pycache__/architecture_surgery.cpython-314.pyc +0 -0
  101. package/templates/scripts/__pycache__/budget_manager.cpython-312.pyc +0 -0
  102. package/templates/scripts/__pycache__/budget_manager.cpython-314.pyc +0 -0
  103. package/templates/scripts/__pycache__/build_ensemble.cpython-312.pyc +0 -0
  104. package/templates/scripts/__pycache__/build_ensemble.cpython-314.pyc +0 -0
  105. package/templates/scripts/__pycache__/calibration.cpython-312.pyc +0 -0
  106. package/templates/scripts/__pycache__/calibration.cpython-314.pyc +0 -0
  107. package/templates/scripts/__pycache__/check_convergence.cpython-312.pyc +0 -0
  108. package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
  109. package/templates/scripts/__pycache__/checkpoint_manager.cpython-312.pyc +0 -0
  110. package/templates/scripts/__pycache__/checkpoint_manager.cpython-314.pyc +0 -0
  111. package/templates/scripts/__pycache__/citation_manager.cpython-312.pyc +0 -0
  112. package/templates/scripts/__pycache__/citation_manager.cpython-314.pyc +0 -0
  113. package/templates/scripts/__pycache__/cost_frontier.cpython-312.pyc +0 -0
  114. package/templates/scripts/__pycache__/cost_frontier.cpython-314.pyc +0 -0
  115. package/templates/scripts/__pycache__/counterfactual_explanation.cpython-312.pyc +0 -0
  116. package/templates/scripts/__pycache__/counterfactual_explanation.cpython-314.pyc +0 -0
  117. package/templates/scripts/__pycache__/critique_hypothesis.cpython-312.pyc +0 -0
  118. package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
  119. package/templates/scripts/__pycache__/curriculum_optimizer.cpython-312.pyc +0 -0
  120. package/templates/scripts/__pycache__/curriculum_optimizer.cpython-314.pyc +0 -0
  121. package/templates/scripts/__pycache__/diagnose_errors.cpython-312.pyc +0 -0
  122. package/templates/scripts/__pycache__/diagnose_errors.cpython-314.pyc +0 -0
  123. package/templates/scripts/__pycache__/draft_paper_sections.cpython-312.pyc +0 -0
  124. package/templates/scripts/__pycache__/draft_paper_sections.cpython-314.pyc +0 -0
  125. package/templates/scripts/__pycache__/equivalence_checker.cpython-312.pyc +0 -0
  126. package/templates/scripts/__pycache__/equivalence_checker.cpython-314.pyc +0 -0
  127. package/templates/scripts/__pycache__/experiment_annotations.cpython-312.pyc +0 -0
  128. package/templates/scripts/__pycache__/experiment_annotations.cpython-314.pyc +0 -0
  129. package/templates/scripts/__pycache__/experiment_archive.cpython-312.pyc +0 -0
  130. package/templates/scripts/__pycache__/experiment_archive.cpython-314.pyc +0 -0
  131. package/templates/scripts/__pycache__/experiment_diff.cpython-312.pyc +0 -0
  132. package/templates/scripts/__pycache__/experiment_diff.cpython-314.pyc +0 -0
  133. package/templates/scripts/__pycache__/experiment_index.cpython-312.pyc +0 -0
  134. package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
  135. package/templates/scripts/__pycache__/experiment_queue.cpython-312.pyc +0 -0
  136. package/templates/scripts/__pycache__/experiment_queue.cpython-314.pyc +0 -0
  137. package/templates/scripts/__pycache__/experiment_replay.cpython-312.pyc +0 -0
  138. package/templates/scripts/__pycache__/experiment_replay.cpython-314.pyc +0 -0
  139. package/templates/scripts/__pycache__/experiment_search.cpython-312.pyc +0 -0
  140. package/templates/scripts/__pycache__/experiment_search.cpython-314.pyc +0 -0
  141. package/templates/scripts/__pycache__/experiment_simulator.cpython-312.pyc +0 -0
  142. package/templates/scripts/__pycache__/experiment_simulator.cpython-314.pyc +0 -0
  143. package/templates/scripts/__pycache__/experiment_templates.cpython-312.pyc +0 -0
  144. package/templates/scripts/__pycache__/experiment_templates.cpython-314.pyc +0 -0
  145. package/templates/scripts/__pycache__/export_card.cpython-312.pyc +0 -0
  146. package/templates/scripts/__pycache__/export_card.cpython-314.pyc +0 -0
  147. package/templates/scripts/__pycache__/export_formats.cpython-312.pyc +0 -0
  148. package/templates/scripts/__pycache__/export_formats.cpython-314.pyc +0 -0
  149. package/templates/scripts/__pycache__/failure_postmortem.cpython-312.pyc +0 -0
  150. package/templates/scripts/__pycache__/failure_postmortem.cpython-314.pyc +0 -0
  151. package/templates/scripts/__pycache__/feature_intelligence.cpython-312.pyc +0 -0
  152. package/templates/scripts/__pycache__/feature_intelligence.cpython-314.pyc +0 -0
  153. package/templates/scripts/__pycache__/fork_experiment.cpython-312.pyc +0 -0
  154. package/templates/scripts/__pycache__/fork_experiment.cpython-314.pyc +0 -0
  155. package/templates/scripts/__pycache__/generate_baselines.cpython-312.pyc +0 -0
  156. package/templates/scripts/__pycache__/generate_baselines.cpython-314.pyc +0 -0
  157. package/templates/scripts/__pycache__/generate_brief.cpython-312.pyc +0 -0
  158. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  159. package/templates/scripts/__pycache__/generate_changelog.cpython-312.pyc +0 -0
  160. package/templates/scripts/__pycache__/generate_changelog.cpython-314.pyc +0 -0
  161. package/templates/scripts/__pycache__/generate_figures.cpython-312.pyc +0 -0
  162. package/templates/scripts/__pycache__/generate_figures.cpython-314.pyc +0 -0
  163. package/templates/scripts/__pycache__/generate_logbook.cpython-312.pyc +0 -0
  164. package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
  165. package/templates/scripts/__pycache__/generate_model_card.cpython-312.pyc +0 -0
  166. package/templates/scripts/__pycache__/generate_model_card.cpython-314.pyc +0 -0
  167. package/templates/scripts/__pycache__/generate_onboarding.cpython-312.pyc +0 -0
  168. package/templates/scripts/__pycache__/generate_onboarding.cpython-314.pyc +0 -0
  169. package/templates/scripts/__pycache__/harness_doctor.cpython-312.pyc +0 -0
  170. package/templates/scripts/__pycache__/harness_doctor.cpython-314.pyc +0 -0
  171. package/templates/scripts/__pycache__/incremental_update.cpython-312.pyc +0 -0
  172. package/templates/scripts/__pycache__/incremental_update.cpython-314.pyc +0 -0
  173. package/templates/scripts/__pycache__/knowledge_transfer.cpython-312.pyc +0 -0
  174. package/templates/scripts/__pycache__/knowledge_transfer.cpython-314.pyc +0 -0
  175. package/templates/scripts/__pycache__/latency_benchmark.cpython-312.pyc +0 -0
  176. package/templates/scripts/__pycache__/latency_benchmark.cpython-314.pyc +0 -0
  177. package/templates/scripts/__pycache__/leakage_detector.cpython-312.pyc +0 -0
  178. package/templates/scripts/__pycache__/leakage_detector.cpython-314.pyc +0 -0
  179. package/templates/scripts/__pycache__/literature_search.cpython-312.pyc +0 -0
  180. package/templates/scripts/__pycache__/literature_search.cpython-314.pyc +0 -0
  181. package/templates/scripts/__pycache__/log_experiment.cpython-312.pyc +0 -0
  182. package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
  183. package/templates/scripts/__pycache__/manage_hypotheses.cpython-312.pyc +0 -0
  184. package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
  185. package/templates/scripts/__pycache__/methodology_audit.cpython-312.pyc +0 -0
  186. package/templates/scripts/__pycache__/methodology_audit.cpython-314.pyc +0 -0
  187. package/templates/scripts/__pycache__/model_distiller.cpython-312.pyc +0 -0
  188. package/templates/scripts/__pycache__/model_distiller.cpython-314.pyc +0 -0
  189. package/templates/scripts/__pycache__/model_lifecycle.cpython-312.pyc +0 -0
  190. package/templates/scripts/__pycache__/model_lifecycle.cpython-314.pyc +0 -0
  191. package/templates/scripts/__pycache__/model_merger.cpython-312.pyc +0 -0
  192. package/templates/scripts/__pycache__/model_merger.cpython-314.pyc +0 -0
  193. package/templates/scripts/__pycache__/model_pruning.cpython-312.pyc +0 -0
  194. package/templates/scripts/__pycache__/model_pruning.cpython-314.pyc +0 -0
  195. package/templates/scripts/__pycache__/model_quantization.cpython-312.pyc +0 -0
  196. package/templates/scripts/__pycache__/model_quantization.cpython-314.pyc +0 -0
  197. package/templates/scripts/__pycache__/model_xray.cpython-312.pyc +0 -0
  198. package/templates/scripts/__pycache__/model_xray.cpython-314.pyc +0 -0
  199. package/templates/scripts/__pycache__/novelty_guard.cpython-312.pyc +0 -0
  200. package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
  201. package/templates/scripts/__pycache__/package_experiments.cpython-312.pyc +0 -0
  202. package/templates/scripts/__pycache__/package_experiments.cpython-314.pyc +0 -0
  203. package/templates/scripts/__pycache__/pareto_frontier.cpython-312.pyc +0 -0
  204. package/templates/scripts/__pycache__/pareto_frontier.cpython-314.pyc +0 -0
  205. package/templates/scripts/__pycache__/parse_metrics.cpython-312.pyc +0 -0
  206. package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
  207. package/templates/scripts/__pycache__/pipeline_manager.cpython-312.pyc +0 -0
  208. package/templates/scripts/__pycache__/pipeline_manager.cpython-314.pyc +0 -0
  209. package/templates/scripts/__pycache__/profile_training.cpython-312.pyc +0 -0
  210. package/templates/scripts/__pycache__/profile_training.cpython-314.pyc +0 -0
  211. package/templates/scripts/__pycache__/regression_gate.cpython-312.pyc +0 -0
  212. package/templates/scripts/__pycache__/regression_gate.cpython-314.pyc +0 -0
  213. package/templates/scripts/__pycache__/reproduce_experiment.cpython-312.pyc +0 -0
  214. package/templates/scripts/__pycache__/reproduce_experiment.cpython-314.pyc +0 -0
  215. package/templates/scripts/__pycache__/research_planner.cpython-312.pyc +0 -0
  216. package/templates/scripts/__pycache__/research_planner.cpython-314.pyc +0 -0
  217. package/templates/scripts/__pycache__/sanity_checks.cpython-312.pyc +0 -0
  218. package/templates/scripts/__pycache__/sanity_checks.cpython-314.pyc +0 -0
  219. package/templates/scripts/__pycache__/scaffold.cpython-312.pyc +0 -0
  220. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  221. package/templates/scripts/__pycache__/scaling_estimator.cpython-312.pyc +0 -0
  222. package/templates/scripts/__pycache__/scaling_estimator.cpython-314.pyc +0 -0
  223. package/templates/scripts/__pycache__/seed_runner.cpython-312.pyc +0 -0
  224. package/templates/scripts/__pycache__/seed_runner.cpython-314.pyc +0 -0
  225. package/templates/scripts/__pycache__/sensitivity_analysis.cpython-312.pyc +0 -0
  226. package/templates/scripts/__pycache__/sensitivity_analysis.cpython-314.pyc +0 -0
  227. package/templates/scripts/__pycache__/session_flashback.cpython-312.pyc +0 -0
  228. package/templates/scripts/__pycache__/session_flashback.cpython-314.pyc +0 -0
  229. package/templates/scripts/__pycache__/show_experiment_tree.cpython-312.pyc +0 -0
  230. package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
  231. package/templates/scripts/__pycache__/show_families.cpython-312.pyc +0 -0
  232. package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
  233. package/templates/scripts/__pycache__/simulate_review.cpython-312.pyc +0 -0
  234. package/templates/scripts/__pycache__/simulate_review.cpython-314.pyc +0 -0
  235. package/templates/scripts/__pycache__/smart_retry.cpython-312.pyc +0 -0
  236. package/templates/scripts/__pycache__/smart_retry.cpython-314.pyc +0 -0
  237. package/templates/scripts/__pycache__/statistical_compare.cpython-312.pyc +0 -0
  238. package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
  239. package/templates/scripts/__pycache__/suggest_next.cpython-312.pyc +0 -0
  240. package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
  241. package/templates/scripts/__pycache__/sweep.cpython-312.pyc +0 -0
  242. package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
  243. package/templates/scripts/__pycache__/synthesize_decision.cpython-312.pyc +0 -0
  244. package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
  245. package/templates/scripts/__pycache__/training_monitor.cpython-312.pyc +0 -0
  246. package/templates/scripts/__pycache__/training_monitor.cpython-314.pyc +0 -0
  247. package/templates/scripts/__pycache__/treequest_suggest.cpython-312.pyc +0 -0
  248. package/templates/scripts/__pycache__/treequest_suggest.cpython-314.pyc +0 -0
  249. package/templates/scripts/__pycache__/trend_analysis.cpython-312.pyc +0 -0
  250. package/templates/scripts/__pycache__/trend_analysis.cpython-314.pyc +0 -0
  251. package/templates/scripts/__pycache__/turing_io.cpython-312.pyc +0 -0
  252. package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
  253. package/templates/scripts/__pycache__/update_state.cpython-312.pyc +0 -0
  254. package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
  255. package/templates/scripts/__pycache__/verify_placeholders.cpython-312.pyc +0 -0
  256. package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
  257. package/templates/scripts/__pycache__/warm_start.cpython-312.pyc +0 -0
  258. package/templates/scripts/__pycache__/warm_start.cpython-314.pyc +0 -0
  259. package/templates/scripts/__pycache__/whatif_engine.cpython-312.pyc +0 -0
  260. package/templates/scripts/__pycache__/whatif_engine.cpython-314.pyc +0 -0
@@ -0,0 +1,48 @@
1
+ ---
2
+ name: export
3
+ description: Export model to production format with equivalence verification, latency benchmarking, and deployment model card.
4
+ disable-model-invocation: true
5
+ argument-hint: "[exp-id] [--format joblib|xgboost_json|onnx|torchscript|tflite]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Export a trained model to a production-ready format.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - First argument can be an experiment ID (e.g., `exp-042`); defaults to best
20
+ - `--format joblib|xgboost_json|onnx|torchscript|tflite` specifies export format (auto-detected if omitted)
21
+ - `--skip-equivalence` skips inference equivalence check
22
+ - `--skip-latency` skips latency benchmark
23
+ - `--samples 100` sets test sample count
24
+
25
+ 3. **Run export pipeline:**
26
+ ```bash
27
+ python scripts/export_model.py $ARGUMENTS
28
+ ```
29
+
30
+ 4. **Report results:**
31
+ - **Export:** format, file size, output path, dependencies
32
+ - **Equivalence:** verdict (equivalent/approximately_equivalent/divergent), max delta
33
+ - **Latency:** p50/p95/p99 ms, speedup vs original
34
+ - **Model Card:** metrics, seed study, equivalence, latency, dependencies
35
+
36
+ 5. **Output:** exported model + model_card.yaml written to `exports/exp-NNN/`
37
+
38
+ 6. **If model file not found:** suggest checking models/best/ directory.
39
+
40
+ ## Examples
41
+
42
+ ```
43
+ /turing:export # Best experiment, default format
44
+ /turing:export exp-042 # Specific experiment
45
+ /turing:export --format xgboost_json # Native XGBoost JSON
46
+ /turing:export --format onnx # ONNX format
47
+ /turing:export --skip-equivalence --skip-latency # Fast export
48
+ ```
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: feature
3
+ description: Automated feature selection — multi-method importance consensus, redundancy detection, and interaction feature generation.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--method all|importance] [--top-k 20]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Systematically evaluate which features matter and which are noise.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - `--method all|importance|selection|generation` — analysis type (default: all)
20
+ - `--top-k 20` — number of top features to consider
21
+ - `--json` — raw JSON output
22
+
23
+ 3. **Run feature analysis:**
24
+ ```bash
25
+ python scripts/feature_intelligence.py $ARGUMENTS
26
+ ```
27
+
28
+ 4. **Report includes:**
29
+ - Consensus ranking: features ranked by number of methods placing them in top-K
30
+ - Per-method ranks: mutual information, L1, tree-based
31
+ - Redundant pairs: features with |r| > 0.95
32
+ - Candidate interaction features from top consensus set
33
+ - Drop recommendation for zero-consensus features
34
+
35
+ 5. **Saved output:** report in `experiments/features/features-*.yaml`
36
+
37
+ ## Examples
38
+
39
+ ```
40
+ /turing:feature # Full analysis
41
+ /turing:feature --top-k 10 # Top-10 consensus
42
+ ```
@@ -0,0 +1,22 @@
1
+ ---
2
+ name: flashback
3
+ description: Session context restoration — "where was I?" summary after days away. Current best, pending hypotheses, last session, annotations.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--days 7] [--last 10]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Come back to a project after a week and start working in 10 seconds instead of 30 minutes.
10
+
11
+ ## Steps
12
+ 1. **Activate environment:** `source .venv/bin/activate`
13
+ 2. **Run:** `python scripts/session_flashback.py $ARGUMENTS`
14
+ 3. **Report:** current best, last session experiments, pending hypotheses, annotations, budget, suggested next action
15
+ 4. **Saved output:** `experiments/flashbacks/flashback-*.yaml`
16
+
17
+ ## Examples
18
+ ```
19
+ /turing:flashback # Default: last 7 days
20
+ /turing:flashback --days 14 # 2-week lookback
21
+ /turing:flashback --last 5 # Last 5 experiments
22
+ ```
@@ -0,0 +1,40 @@
1
+ ---
2
+ name: fork
3
+ description: Branch an experiment into parallel tracks — run both A and B, report the winner.
4
+ disable-model-invocation: true
5
+ argument-hint: "<exp-id> --branches \"approach A\" \"approach B\" [--auto-promote]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Fork an experiment into parallel branches and compare results.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - First argument is the parent experiment ID
20
+ - `--branches "A" "B" "C"` — branch descriptions (2+ required)
21
+ - `--auto-promote` — automatically keep the winning branch
22
+
23
+ 3. **Run fork:**
24
+ ```bash
25
+ python scripts/fork_experiment.py $ARGUMENTS
26
+ ```
27
+
28
+ 4. **Report results:**
29
+ - Comparison tree showing each branch's metric
30
+ - Winner identified and marked
31
+ - Recommendation: promote winner, abandon rest
32
+
33
+ 5. **Saved output:** report written to `experiments/forks/exp-NNN-fork.yaml`
34
+
35
+ ## Examples
36
+
37
+ ```
38
+ /turing:fork exp-042 --branches "LightGBM with dart" "XGBoost deeper trees"
39
+ /turing:fork exp-042 --branches "A" "B" "C" --auto-promote
40
+ ```
@@ -0,0 +1,45 @@
1
+ ---
2
+ name: frontier
3
+ description: Visualize Pareto frontier across multiple objectives — answers "which model is actually best?" when there are tradeoffs.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--metrics \"accuracy,train_seconds,n_params\"] [--ascii]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Visualize the Pareto frontier across multiple objectives from experiment history.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - `--metrics "accuracy,train_seconds,n_params"` specifies metrics to analyze
20
+ - Without `--metrics`, uses primary metric + train_seconds from config
21
+ - `--ascii` generates an ASCII scatter plot (2D projection)
22
+
23
+ 3. **Run Pareto analysis:**
24
+ ```bash
25
+ python scripts/pareto_frontier.py $ARGUMENTS
26
+ ```
27
+
28
+ 4. **Report results:**
29
+ - **Pareto-optimal experiments:** table with all metrics and what each is best at
30
+ - **Dominated experiments:** with their nearest Pareto neighbor
31
+ - **ASCII scatter plot** (if `--ascii`): 2D projection with * for Pareto, · for dominated
32
+ - Summary: "N Pareto-optimal of M experiments across K metrics"
33
+
34
+ 5. **Saved output:** results written to `experiments/frontiers/frontier-YYYY-MM-DD.yaml`
35
+
36
+ 6. **If no experiments have all requested metrics:** suggest which metrics are available.
37
+
38
+ ## Examples
39
+
40
+ ```
41
+ /turing:frontier # Default: metric vs time
42
+ /turing:frontier --metrics "accuracy,train_seconds" # 2D frontier
43
+ /turing:frontier --metrics "accuracy,train_seconds,n_params" # 3D frontier
44
+ /turing:frontier --ascii # With scatter plot
45
+ ```
@@ -0,0 +1,154 @@
1
+ ---
2
+ name: init
3
+ description: Initialize a new ML project with the Turing autoresearch harness. Scaffolds the full experiment infrastructure — immutable evaluation pipeline, agent-editable training code, structured logging, convergence detection hooks, and a Python virtual environment. Use --plan to generate a research plan.
4
+ disable-model-invocation: true
5
+ argument-hint: "[project_name] [--plan]"
6
+ allowed-tools: Read, Write, Edit, Bash(*), Grep, Glob, WebSearch, WebFetch
7
+ ---
8
+
9
+ Scaffold a new ML project with the Turing autoresearch harness. This creates the separation between the measurement apparatus (READ-ONLY) and the hypothesis space (AGENT-EDITABLE) that makes autonomous experimentation trustworthy.
10
+
11
+ ## Interactive Setup
12
+
13
+ Ask the user for the following (or accept from `$ARGUMENTS` if provided as JSON):
14
+
15
+ 1. **Project name** (`{{PROJECT_NAME}}`): Name of the ML project (e.g., "sentiment", "churn", "fraud-detection")
16
+ 2. **Target metric** (`{{TARGET_METRIC}}`): Primary metric to optimize (e.g., "accuracy", "f1", "mae", "mse", "auc")
17
+ 3. **Metric direction**: Is lower better (mae, mse, loss) or higher better (accuracy, f1, auc)?
18
+ 4. **Task description** (`{{TASK_DESCRIPTION}}`): What the model does (e.g., "Predict customer churn from usage data")
19
+ 5. **ML directory** (`{{ML_DIR}}`): Where ML files go relative to project root (e.g., "ml/sentiment")
20
+ 6. **Data source** (`{{DATA_SOURCE}}`): Where training data comes from (e.g., "data/reviews.csv")
21
+
22
+ ## Scaffolding
23
+
24
+ Once you have all 6 values, delegate to the unified scaffolding script:
25
+
26
+ ```bash
27
+ python3 <templates_dir>/scripts/scaffold.py \
28
+ --project-name "<project_name>" \
29
+ --target-metric "<target_metric>" \
30
+ --metric-direction "<metric_direction>" \
31
+ --task-description "<task_description>" \
32
+ --ml-dir "<ml_dir>" \
33
+ --data-source "<data_source>" \
34
+ --templates-dir "<templates_dir>"
35
+ ```
36
+
37
+ The scaffold script handles everything in a single atomic operation:
38
+ - Copies all template files with placeholder substitution
39
+ - Creates data/, experiments/, models/ directories
40
+ - Sets up agent memory at `.claude/agent-memory/ml-researcher-{project_name}/MEMORY.md`
41
+ - Configures Claude Code hooks in `.claude/settings.local.json`
42
+ - Creates Python virtual environment and installs requirements
43
+ - Verifies all placeholders were replaced (fails loudly if any remain)
44
+
45
+ ## Locating Templates
46
+
47
+ Use the installed command-pack templates directory first:
48
+ ```
49
+ .claude/commands/turing/templates/
50
+ ~/.claude/commands/turing/templates/
51
+ ```
52
+ Then fall back to plugin or npm locations:
53
+ ```
54
+ ~/.claude/plugins/*/templates/
55
+ node_modules/claude-turing/templates/
56
+ ```
57
+
58
+ Example command:
59
+
60
+ ```bash
61
+ python3 ~/.claude/commands/turing/templates/scripts/scaffold.py \
62
+ --project-name "<project_name>" \
63
+ --target-metric "<target_metric>" \
64
+ --metric-direction "<metric_direction>" \
65
+ --task-description "<task_description>" \
66
+ --ml-dir "<ml_dir>" \
67
+ --data-source "<data_source>" \
68
+ --templates-dir ~/.claude/commands/turing/templates
69
+ ```
70
+
71
+ ## After Scaffolding
72
+
73
+ Report what was created:
74
+ - The separation: READ-ONLY (`prepare.py`, `evaluate.py`) vs AGENT-EDITABLE (`train.py`)
75
+ - Next steps: add data to the configured data source path, run `python prepare.py`, then `/turing:train`
76
+ - The taste-leverage loop: `/turing:try` to inject hypotheses, `/turing:brief` for intelligence reports
77
+
78
+ ## Research Plan Generation (--plan flag)
79
+
80
+ If `$ARGUMENTS` contains `--plan`, generate a research plan AFTER scaffolding. This gives the agent strategic direction for its first 5-10 experiments rather than ad-hoc exploration.
81
+
82
+ ### Steps:
83
+
84
+ 1. **Read the task context** from the just-created `config.yaml`: task description, model type, target metric, data source.
85
+
86
+ 2. **Search literature** with `WebSearch` for the task domain:
87
+ - "state of the art <task description> machine learning 2024 2025"
88
+ - "best model <target metric> <data type> benchmark"
89
+ - "<task description> common approaches survey"
90
+
91
+ Use `WebFetch` on top 2-3 results to extract: dominant model families, typical metric ranges, known challenges.
92
+
93
+ 3. **Generate `RESEARCH_PLAN.md`** in the ML project directory with this structure:
94
+
95
+ ```markdown
96
+ # Research Plan: <task description>
97
+
98
+ Generated: <date>
99
+
100
+ ## Task Summary
101
+ <one paragraph describing the task, data, and success criteria>
102
+
103
+ ## Model Families to Explore
104
+ Ordered by expected relevance based on literature:
105
+ 1. **<family 1>** — <why, with citation>
106
+ 2. **<family 2>** — <why, with citation>
107
+ 3. **<family 3>** — <why, with citation>
108
+
109
+ ## Evaluation Strategy
110
+ - Primary metric: <metric> (<higher/lower> is better)
111
+ - Multi-run recommendation: <yes/no, based on expected variance>
112
+ - Baseline target: <realistic first-pass metric from literature>
113
+
114
+ ## Search Budget
115
+ - <N> experiments per model family before moving on
116
+ - Total budget: <N> experiments before first convergence check
117
+
118
+ ## Success Criteria
119
+ - Target metric: <value from literature benchmarks>
120
+ - Convergence: <patience> consecutive non-improvements
121
+
122
+ ## Known Challenges
123
+ - <challenge 1 from literature, e.g., "class imbalance common in this domain">
124
+ - <challenge 2>
125
+
126
+ ## Sources
127
+ - <citation 1>
128
+ - <citation 2>
129
+ ```
130
+
131
+ 4. **Self-critique the plan** (one round):
132
+ - Are the model families ordered by evidence strength?
133
+ - Is the budget realistic?
134
+ - Are the success criteria grounded in benchmark data?
135
+ Revise if any section is vague or unsupported.
136
+
137
+ 5. **Report:** "Research plan generated at `<ml_dir>/RESEARCH_PLAN.md`. The agent will read this during `/turing:train` for strategic direction."
138
+
139
+ ### Integration
140
+
141
+ The agent's `program.md` OBSERVE step reads `RESEARCH_PLAN.md` (if it exists) for strategic direction. The plan is advisory — the agent can deviate but should note why in `experiment_state.yaml`.
142
+
143
+ ## Multiple Projects
144
+
145
+ You can scaffold multiple ML projects in the same repository:
146
+
147
+ ```bash
148
+ /turing:init # First project: prompts for ml_dir (e.g., ml/sentiment)
149
+ /turing:init # Second project: prompts for ml_dir (e.g., ml/churn)
150
+ ```
151
+
152
+ Each project gets its own directory with independent config, data, experiments, and models. `/turing:train ml/sentiment` or `/turing:train ml/churn` targets a specific project. If you `cd ml/sentiment` first, `/turing:train` auto-detects from cwd.
153
+
154
+ Agent memory is scoped per project: `.claude/agent-memory/ml-researcher-{project_name}/MEMORY.md`
@@ -0,0 +1,47 @@
1
+ ---
2
+ name: leak
3
+ description: Targeted leakage detection — probe for data leakage with single-feature tests, correlation checks, and train/test overlap detection.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--deep] [--features feature_1,feature_2]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Actively probe for data leakage. The #1 cause of "too good to be true" results.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - `--deep` — run full single-feature analysis (slow but thorough)
20
+ - `--features "feat_1,feat_2"` — check specific features
21
+ - `--json` — raw JSON output
22
+
23
+ 3. **Run leakage scan:**
24
+ ```bash
25
+ python scripts/leakage_detector.py $ARGUMENTS
26
+ ```
27
+
28
+ 4. **Checks performed:**
29
+ - **Feature-target correlation:** flag features with >0.95 correlation to target
30
+ - **Single-feature predictiveness (--deep):** train on each feature alone, flag any that achieve >80% of full model performance
31
+ - **Train/test overlap:** hash-based deduplication across splits
32
+
33
+ 5. **Verdicts:**
34
+ - **CLEAN** — no leakage detected
35
+ - **SUSPICIOUS** — warnings to review
36
+ - **LEAKAGE DETECTED** — critical flags found
37
+
38
+ 6. **Integration:** satisfies the "data leakage" check in `/turing:audit`
39
+
40
+ 7. **Saved output:** report in `experiments/leakage/leak-*.yaml`
41
+
42
+ ## Examples
43
+
44
+ ```
45
+ /turing:leak # Standard correlation + overlap checks
46
+ /turing:leak --deep # Full single-feature analysis
47
+ ```
@@ -0,0 +1,47 @@
1
+ ---
2
+ name: lit
3
+ description: Literature search scoped to the current experiment domain — find papers, SOTA baselines, and related work without leaving the terminal.
4
+ disable-model-invocation: true
5
+ argument-hint: "<query> | --baseline | --related <exp-id>"
6
+ allowed-tools: Read, Bash(*), Grep, Glob, WebSearch
7
+ ---
8
+
9
+ Search the literature for papers, baselines, and related work.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - **Free query:** `"gradient boosting for tabular data"` — searches Semantic Scholar
20
+ - **Baseline:** `--baseline` — finds SOTA results for the current task, compares against your best
21
+ - **Related:** `--related exp-042` — finds papers using similar methods to a specific experiment
22
+ - `--auto-queue` — auto-queues hypotheses from literature with `source: "literature"`
23
+ - `--limit 10` — max number of results
24
+
25
+ 3. **Run literature search:**
26
+ ```bash
27
+ python scripts/literature_search.py $ARGUMENTS
28
+ ```
29
+
30
+ 4. **Report results:**
31
+ - **Papers:** title, authors, year, venue, citations, abstract snippet, URL
32
+ - **Baseline mode:** SOTA comparison with gap analysis against current best
33
+ - **Related mode:** methodological differences worth investigating
34
+ - **Hypotheses:** if `--auto-queue`, shows queued experiments from findings
35
+
36
+ 5. **Saved output:** results written to `experiments/literature/query-YYYY-MM-DD-HHMMSS.md`
37
+
38
+ 6. **If API unavailable:** reports error and suggests manual search.
39
+
40
+ ## Examples
41
+
42
+ ```
43
+ /turing:lit "gradient boosting missing values" # Free query
44
+ /turing:lit --baseline # SOTA comparison
45
+ /turing:lit --related exp-042 # Related work
46
+ /turing:lit --auto-queue "ensemble methods" # Queue hypotheses
47
+ ```
@@ -0,0 +1,51 @@
1
+ ---
2
+ name: logbook
3
+ description: Generate a research logbook showing the full experiment narrative — hypotheses proposed, experiments run, decisions made, and progress over time. Outputs HTML (with interactive chart) or markdown.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--since YYYY-MM-DD] [--format html|markdown] [--output path]"
6
+ allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*, mkdir:*), Grep, Glob
7
+ ---
8
+
9
+ Generate a research logbook that captures the full narrative of the experiment campaign.
10
+
11
+ ## Steps
12
+
13
+ 1. **Generate the logbook:**
14
+ ```bash
15
+ source .venv/bin/activate && python scripts/generate_logbook.py
16
+ ```
17
+
18
+ **With options from `$ARGUMENTS`:**
19
+ - `--since 2026-03-15` — only include events after this date
20
+ - `--format markdown` — output as markdown instead of HTML
21
+ - `--output logbook.html` — write to file instead of stdout
22
+
23
+ **Common usage:**
24
+ ```bash
25
+ # HTML logbook with interactive trajectory chart
26
+ source .venv/bin/activate && python scripts/generate_logbook.py --output logbook.html
27
+
28
+ # Markdown for embedding in docs or READMEs
29
+ source .venv/bin/activate && python scripts/generate_logbook.py --format markdown --output logbook.md
30
+
31
+ # Last week's activity
32
+ source .venv/bin/activate && python scripts/generate_logbook.py --since 2026-03-24 --output logbook.html
33
+ ```
34
+
35
+ 2. **Present the result:**
36
+ - If HTML: tell the user to open the file in their browser. The logbook includes an interactive Chart.js trajectory visualization.
37
+ - If markdown: display inline or note the output file location.
38
+
39
+ ## What the Logbook Contains
40
+
41
+ - **Campaign summary:** total experiments, keep rate, best metric, hypothesis count
42
+ - **Improvement trajectory:** interactive line chart showing metric progression and best-so-far envelope
43
+ - **Experiment log:** every experiment with ID, description, metric value, status (kept/discarded), date
44
+ - **Hypothesis queue:** every hypothesis with source (human/agent/literature), status, priority
45
+
46
+ ## When to Use
47
+
48
+ - To share progress with collaborators
49
+ - Before and after meetings to show what was tried
50
+ - To archive a completed research campaign
51
+ - To track progress over a specific time period
@@ -0,0 +1,24 @@
1
+ ---
2
+ name: merge
3
+ description: Model merging — average weights from multiple checkpoints into a single model (soups, TIES, DARE). Free accuracy, zero latency cost.
4
+ disable-model-invocation: true
5
+ argument-hint: "<exp-ids...> [--method uniform|greedy|ties|dare]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Combine model weights (not predictions) into a single, better model with no latency overhead.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:** `source .venv/bin/activate`
14
+ 2. **Run:** `python scripts/model_merger.py $ARGUMENTS`
15
+ 3. **Methods:** uniform soup (simple average), greedy soup (include only if improves), TIES (trim+elect+merge), DARE (drop+rescale)
16
+ 4. **Report:** compatibility check, per-model metrics, method comparison, improvement delta
17
+ 5. **Saved output:** `experiments/merges/merge-*.yaml`
18
+
19
+ ## Examples
20
+
21
+ ```
22
+ /turing:merge exp-042 exp-053 exp-067 # All methods
23
+ /turing:merge exp-042 exp-053 --method greedy # Greedy soup only
24
+ ```
@@ -0,0 +1,43 @@
1
+ ---
2
+ name: mode
3
+ description: Set the research strategy mode — explore (try new things), exploit (refine what works), or replicate (verify results). Drives novelty guard policy and agent behavior.
4
+ disable-model-invocation: true
5
+ argument-hint: "<explore|exploit|replicate>"
6
+ ---
7
+
8
+ Set the research mode for the current project. The mode determines how the novelty guard filters proposed experiments and how the agent prioritizes its work.
9
+
10
+ ## Modes
11
+
12
+ | Mode | Novelty Guard Policy | Agent Behavior |
13
+ |------|---------------------|----------------|
14
+ | **explore** | Allow novel ideas, block repeats and follow-ups | Try fundamentally different approaches |
15
+ | **exploit** | Allow follow-ups and known successes, block repeats | Refine the current best configuration |
16
+ | **replicate** | Allow duplicate runs, block novel ideas | Re-run best experiments with different seeds |
17
+
18
+ ## Steps
19
+
20
+ 1. **Parse mode** from `$ARGUMENTS`. Must be one of: `explore`, `exploit`, `replicate`.
21
+
22
+ 2. **Update experiment state:**
23
+ ```bash
24
+ source .venv/bin/activate
25
+ python -c "
26
+ import yaml
27
+ from pathlib import Path
28
+ path = Path('experiment_state.yaml')
29
+ state = yaml.safe_load(path.read_text()) if path.exists() else {}
30
+ state['research_mode'] = '$ARGUMENTS'
31
+ path.write_text(yaml.dump(state, default_flow_style=False))
32
+ print(f'Research mode set to: $ARGUMENTS')
33
+ "
34
+ ```
35
+
36
+ 3. **Confirm** with guidance:
37
+ - `explore`: "The agent will prioritize novel ideas and avoid follow-ups. Best when the current approach feels exhausted."
38
+ - `exploit`: "The agent will refine the current best. Best when you have a promising direction."
39
+ - `replicate`: "The agent will re-run experiments for statistical verification. Best before declaring a winner."
40
+
41
+ ## Default
42
+
43
+ The default mode is `exploit` (refine what works). Change to `explore` when plateauing, `replicate` before final decisions.
@@ -0,0 +1,20 @@
1
+ ---
2
+ name: onboard
3
+ description: Project onboarding — generate a walkthrough for new collaborators. Task, history, decisions, next steps.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--audience researcher|engineer|stakeholder] [--depth brief|full]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ 5-minute read that replaces a 1-hour onboarding meeting.
10
+
11
+ ## Steps
12
+ 1. `source .venv/bin/activate`
13
+ 2. `python scripts/generate_onboarding.py $ARGUMENTS`
14
+ 3. **Saved:** `ONBOARDING.md`
15
+
16
+ ## Examples
17
+ ```
18
+ /turing:onboard
19
+ /turing:onboard --audience engineer --depth brief
20
+ ```
@@ -0,0 +1,44 @@
1
+ ---
2
+ name: paper
3
+ description: Draft mechanical paper sections (setup, results, ablation, hyperparameters) from experiment logs. LaTeX and markdown output.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--sections setup,results,ablation] [--format latex|markdown]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Draft paper sections directly from experiment data.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - `--sections setup,results,ablation,hyperparameters` — which sections to draft (default: all)
20
+ - `--format latex|markdown` — output format (default: latex)
21
+
22
+ 3. **Run paper drafting:**
23
+ ```bash
24
+ python scripts/draft_paper_sections.py $ARGUMENTS
25
+ ```
26
+
27
+ 4. **Report results:**
28
+ - **setup:** Experimental setup prose (dataset, metrics, split, seed methodology)
29
+ - **results:** Comparison table with all model types, best bolded, seed study stats
30
+ - **ablation:** Ablation table from `/turing:ablate` results
31
+ - **hyperparameters:** Appendix-style parameter table per model
32
+
33
+ 5. **Output:** Each section saved to `paper/sections/` as `.tex` or `.md`
34
+
35
+ 6. **Numbers are pulled directly from experiment logs** — no manual transcription needed.
36
+
37
+ ## Examples
38
+
39
+ ```
40
+ /turing:paper # All sections, LaTeX
41
+ /turing:paper --format markdown # All sections, markdown
42
+ /turing:paper --sections setup,results # Just setup + results
43
+ /turing:paper --sections ablation --format latex # Just ablation table
44
+ ```
@@ -0,0 +1,27 @@
1
+ ---
2
+ name: plan
3
+ description: Research planning assistant — design a strategic experiment campaign with budget-aware ROI allocation.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--budget 20] [--goal \"maximize F1 for production\"]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Design the next N experiments strategically, not randomly. Allocates budget by expected ROI.
10
+
11
+ ## Steps
12
+ 1. `source .venv/bin/activate`
13
+ 2. `python scripts/research_planner.py $ARGUMENTS`
14
+ 3. **Saved:** `experiments/plans/`
15
+
16
+ ## How it works
17
+ - Analyzes experiment history to compute per-family ROI
18
+ - Adjusts strategy priorities based on project state and goal
19
+ - Allocates budget across: feature engineering, model search, ensemble, calibration, verification
20
+ - Generates phased plan with specific experiment descriptions
21
+
22
+ ## Examples
23
+ ```
24
+ /turing:plan --budget 20
25
+ /turing:plan --budget 10 --goal "maximize F1 for production deployment"
26
+ /turing:plan --budget 30 --json
27
+ ```