claude-turing 4.6.0 → 4.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (333) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +1 -1
  3. package/commands/ablate.md +0 -1
  4. package/commands/annotate.md +0 -1
  5. package/commands/archive.md +0 -1
  6. package/commands/audit.md +0 -1
  7. package/commands/baseline.md +0 -1
  8. package/commands/brief.md +0 -1
  9. package/commands/budget.md +0 -1
  10. package/commands/calibrate.md +0 -1
  11. package/commands/card.md +0 -1
  12. package/commands/changelog.md +0 -1
  13. package/commands/checkpoint.md +0 -1
  14. package/commands/cite.md +0 -1
  15. package/commands/compare.md +0 -1
  16. package/commands/counterfactual.md +0 -1
  17. package/commands/curriculum.md +0 -1
  18. package/commands/design.md +0 -1
  19. package/commands/diagnose.md +0 -1
  20. package/commands/diff.md +0 -1
  21. package/commands/distill.md +0 -1
  22. package/commands/doctor.md +0 -1
  23. package/commands/ensemble.md +0 -1
  24. package/commands/explore.md +0 -1
  25. package/commands/export.md +0 -1
  26. package/commands/feature.md +0 -1
  27. package/commands/flashback.md +0 -1
  28. package/commands/fork.md +0 -1
  29. package/commands/frontier.md +0 -1
  30. package/commands/init.md +0 -1
  31. package/commands/leak.md +0 -1
  32. package/commands/lit.md +0 -1
  33. package/commands/logbook.md +0 -1
  34. package/commands/merge.md +0 -1
  35. package/commands/mode.md +0 -1
  36. package/commands/onboard.md +0 -1
  37. package/commands/paper.md +0 -1
  38. package/commands/plan.md +0 -1
  39. package/commands/poster.md +0 -1
  40. package/commands/postmortem.md +0 -1
  41. package/commands/preflight.md +0 -1
  42. package/commands/present.md +0 -1
  43. package/commands/profile.md +0 -1
  44. package/commands/prune.md +0 -1
  45. package/commands/quantize.md +0 -1
  46. package/commands/queue.md +0 -1
  47. package/commands/registry.md +0 -1
  48. package/commands/regress.md +0 -1
  49. package/commands/replay.md +0 -1
  50. package/commands/report.md +0 -1
  51. package/commands/reproduce.md +0 -1
  52. package/commands/retry.md +0 -1
  53. package/commands/review.md +0 -1
  54. package/commands/sanity.md +0 -1
  55. package/commands/scale.md +0 -1
  56. package/commands/search.md +0 -1
  57. package/commands/seed.md +0 -1
  58. package/commands/sensitivity.md +0 -1
  59. package/commands/share.md +0 -1
  60. package/commands/simulate.md +0 -1
  61. package/commands/status.md +0 -1
  62. package/commands/stitch.md +0 -1
  63. package/commands/suggest.md +0 -1
  64. package/commands/surgery.md +0 -1
  65. package/commands/sweep.md +0 -1
  66. package/commands/template.md +0 -1
  67. package/commands/train.md +0 -1
  68. package/commands/transfer.md +0 -1
  69. package/commands/trend.md +0 -1
  70. package/commands/try.md +0 -1
  71. package/commands/turing.md +3 -3
  72. package/commands/update.md +0 -1
  73. package/commands/validate.md +0 -1
  74. package/commands/warm.md +0 -1
  75. package/commands/watch.md +0 -1
  76. package/commands/whatif.md +0 -1
  77. package/commands/xray.md +0 -1
  78. package/config/commands.yaml +74 -74
  79. package/package.json +10 -3
  80. package/skills/turing/SKILL.md +180 -0
  81. package/skills/turing/ablate/SKILL.md +46 -0
  82. package/skills/turing/annotate/SKILL.md +22 -0
  83. package/skills/turing/archive/SKILL.md +22 -0
  84. package/skills/turing/audit/SKILL.md +55 -0
  85. package/skills/turing/baseline/SKILL.md +44 -0
  86. package/skills/turing/brief/SKILL.md +94 -0
  87. package/skills/turing/budget/SKILL.md +51 -0
  88. package/skills/turing/calibrate/SKILL.md +46 -0
  89. package/skills/turing/card/SKILL.md +35 -0
  90. package/skills/turing/changelog/SKILL.md +21 -0
  91. package/skills/turing/checkpoint/SKILL.md +46 -0
  92. package/skills/turing/cite/SKILL.md +22 -0
  93. package/skills/turing/compare/SKILL.md +23 -0
  94. package/skills/turing/counterfactual/SKILL.md +26 -0
  95. package/skills/turing/curriculum/SKILL.md +42 -0
  96. package/skills/turing/design/SKILL.md +96 -0
  97. package/skills/turing/diagnose/SKILL.md +51 -0
  98. package/skills/turing/diff/SKILL.md +47 -0
  99. package/skills/turing/distill/SKILL.md +55 -0
  100. package/skills/turing/doctor/SKILL.md +30 -0
  101. package/skills/turing/ensemble/SKILL.md +53 -0
  102. package/skills/turing/explore/SKILL.md +106 -0
  103. package/skills/turing/export/SKILL.md +47 -0
  104. package/skills/turing/feature/SKILL.md +41 -0
  105. package/skills/turing/flashback/SKILL.md +21 -0
  106. package/skills/turing/fork/SKILL.md +39 -0
  107. package/skills/turing/frontier/SKILL.md +44 -0
  108. package/skills/turing/init/SKILL.md +153 -0
  109. package/skills/turing/leak/SKILL.md +46 -0
  110. package/skills/turing/lit/SKILL.md +46 -0
  111. package/skills/turing/logbook/SKILL.md +50 -0
  112. package/skills/turing/merge/SKILL.md +23 -0
  113. package/skills/turing/mode/SKILL.md +42 -0
  114. package/skills/turing/onboard/SKILL.md +19 -0
  115. package/skills/turing/paper/SKILL.md +43 -0
  116. package/skills/turing/plan/SKILL.md +26 -0
  117. package/skills/turing/poster/SKILL.md +88 -0
  118. package/skills/turing/postmortem/SKILL.md +27 -0
  119. package/skills/turing/preflight/SKILL.md +74 -0
  120. package/skills/turing/present/SKILL.md +22 -0
  121. package/skills/turing/profile/SKILL.md +42 -0
  122. package/skills/turing/prune/SKILL.md +25 -0
  123. package/skills/turing/quantize/SKILL.md +23 -0
  124. package/skills/turing/queue/SKILL.md +47 -0
  125. package/skills/turing/registry/SKILL.md +30 -0
  126. package/skills/turing/regress/SKILL.md +52 -0
  127. package/skills/turing/replay/SKILL.md +22 -0
  128. package/skills/turing/report/SKILL.md +96 -0
  129. package/skills/turing/reproduce/SKILL.md +47 -0
  130. package/skills/turing/retry/SKILL.md +40 -0
  131. package/skills/turing/review/SKILL.md +19 -0
  132. package/skills/turing/rules/loop-protocol.md +91 -0
  133. package/skills/turing/sanity/SKILL.md +47 -0
  134. package/skills/turing/scale/SKILL.md +54 -0
  135. package/skills/turing/search/SKILL.md +21 -0
  136. package/skills/turing/seed/SKILL.md +46 -0
  137. package/skills/turing/sensitivity/SKILL.md +40 -0
  138. package/skills/turing/share/SKILL.md +19 -0
  139. package/skills/turing/simulate/SKILL.md +27 -0
  140. package/skills/turing/status/SKILL.md +23 -0
  141. package/skills/turing/stitch/SKILL.md +48 -0
  142. package/skills/turing/suggest/SKILL.md +158 -0
  143. package/skills/turing/surgery/SKILL.md +26 -0
  144. package/skills/turing/sweep/SKILL.md +44 -0
  145. package/skills/turing/template/SKILL.md +21 -0
  146. package/skills/turing/train/SKILL.md +74 -0
  147. package/skills/turing/transfer/SKILL.md +53 -0
  148. package/skills/turing/trend/SKILL.md +20 -0
  149. package/skills/turing/try/SKILL.md +62 -0
  150. package/skills/turing/update/SKILL.md +26 -0
  151. package/skills/turing/validate/SKILL.md +33 -0
  152. package/skills/turing/warm/SKILL.md +52 -0
  153. package/skills/turing/watch/SKILL.md +59 -0
  154. package/skills/turing/whatif/SKILL.md +30 -0
  155. package/skills/turing/xray/SKILL.md +42 -0
  156. package/src/command-registry.js +21 -0
  157. package/src/install.js +4 -3
  158. package/src/sync-commands-layout.js +149 -0
  159. package/src/sync-skills-layout.js +20 -0
  160. package/templates/__pycache__/evaluate.cpython-312.pyc +0 -0
  161. package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
  162. package/templates/__pycache__/prepare.cpython-312.pyc +0 -0
  163. package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
  164. package/templates/features/__pycache__/__init__.cpython-312.pyc +0 -0
  165. package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
  166. package/templates/features/__pycache__/featurizers.cpython-312.pyc +0 -0
  167. package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
  168. package/templates/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  169. package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  170. package/templates/scripts/__pycache__/ablation_study.cpython-312.pyc +0 -0
  171. package/templates/scripts/__pycache__/ablation_study.cpython-314.pyc +0 -0
  172. package/templates/scripts/__pycache__/architecture_surgery.cpython-312.pyc +0 -0
  173. package/templates/scripts/__pycache__/architecture_surgery.cpython-314.pyc +0 -0
  174. package/templates/scripts/__pycache__/budget_manager.cpython-312.pyc +0 -0
  175. package/templates/scripts/__pycache__/budget_manager.cpython-314.pyc +0 -0
  176. package/templates/scripts/__pycache__/build_ensemble.cpython-312.pyc +0 -0
  177. package/templates/scripts/__pycache__/build_ensemble.cpython-314.pyc +0 -0
  178. package/templates/scripts/__pycache__/calibration.cpython-312.pyc +0 -0
  179. package/templates/scripts/__pycache__/calibration.cpython-314.pyc +0 -0
  180. package/templates/scripts/__pycache__/check_convergence.cpython-312.pyc +0 -0
  181. package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
  182. package/templates/scripts/__pycache__/checkpoint_manager.cpython-312.pyc +0 -0
  183. package/templates/scripts/__pycache__/checkpoint_manager.cpython-314.pyc +0 -0
  184. package/templates/scripts/__pycache__/citation_manager.cpython-312.pyc +0 -0
  185. package/templates/scripts/__pycache__/citation_manager.cpython-314.pyc +0 -0
  186. package/templates/scripts/__pycache__/cost_frontier.cpython-312.pyc +0 -0
  187. package/templates/scripts/__pycache__/cost_frontier.cpython-314.pyc +0 -0
  188. package/templates/scripts/__pycache__/counterfactual_explanation.cpython-312.pyc +0 -0
  189. package/templates/scripts/__pycache__/counterfactual_explanation.cpython-314.pyc +0 -0
  190. package/templates/scripts/__pycache__/critique_hypothesis.cpython-312.pyc +0 -0
  191. package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
  192. package/templates/scripts/__pycache__/curriculum_optimizer.cpython-312.pyc +0 -0
  193. package/templates/scripts/__pycache__/curriculum_optimizer.cpython-314.pyc +0 -0
  194. package/templates/scripts/__pycache__/diagnose_errors.cpython-312.pyc +0 -0
  195. package/templates/scripts/__pycache__/diagnose_errors.cpython-314.pyc +0 -0
  196. package/templates/scripts/__pycache__/draft_paper_sections.cpython-312.pyc +0 -0
  197. package/templates/scripts/__pycache__/draft_paper_sections.cpython-314.pyc +0 -0
  198. package/templates/scripts/__pycache__/equivalence_checker.cpython-312.pyc +0 -0
  199. package/templates/scripts/__pycache__/equivalence_checker.cpython-314.pyc +0 -0
  200. package/templates/scripts/__pycache__/experiment_annotations.cpython-312.pyc +0 -0
  201. package/templates/scripts/__pycache__/experiment_annotations.cpython-314.pyc +0 -0
  202. package/templates/scripts/__pycache__/experiment_archive.cpython-312.pyc +0 -0
  203. package/templates/scripts/__pycache__/experiment_archive.cpython-314.pyc +0 -0
  204. package/templates/scripts/__pycache__/experiment_diff.cpython-312.pyc +0 -0
  205. package/templates/scripts/__pycache__/experiment_diff.cpython-314.pyc +0 -0
  206. package/templates/scripts/__pycache__/experiment_index.cpython-312.pyc +0 -0
  207. package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
  208. package/templates/scripts/__pycache__/experiment_queue.cpython-312.pyc +0 -0
  209. package/templates/scripts/__pycache__/experiment_queue.cpython-314.pyc +0 -0
  210. package/templates/scripts/__pycache__/experiment_replay.cpython-312.pyc +0 -0
  211. package/templates/scripts/__pycache__/experiment_replay.cpython-314.pyc +0 -0
  212. package/templates/scripts/__pycache__/experiment_search.cpython-312.pyc +0 -0
  213. package/templates/scripts/__pycache__/experiment_search.cpython-314.pyc +0 -0
  214. package/templates/scripts/__pycache__/experiment_simulator.cpython-312.pyc +0 -0
  215. package/templates/scripts/__pycache__/experiment_simulator.cpython-314.pyc +0 -0
  216. package/templates/scripts/__pycache__/experiment_templates.cpython-312.pyc +0 -0
  217. package/templates/scripts/__pycache__/experiment_templates.cpython-314.pyc +0 -0
  218. package/templates/scripts/__pycache__/export_card.cpython-312.pyc +0 -0
  219. package/templates/scripts/__pycache__/export_card.cpython-314.pyc +0 -0
  220. package/templates/scripts/__pycache__/export_formats.cpython-312.pyc +0 -0
  221. package/templates/scripts/__pycache__/export_formats.cpython-314.pyc +0 -0
  222. package/templates/scripts/__pycache__/failure_postmortem.cpython-312.pyc +0 -0
  223. package/templates/scripts/__pycache__/failure_postmortem.cpython-314.pyc +0 -0
  224. package/templates/scripts/__pycache__/feature_intelligence.cpython-312.pyc +0 -0
  225. package/templates/scripts/__pycache__/feature_intelligence.cpython-314.pyc +0 -0
  226. package/templates/scripts/__pycache__/fork_experiment.cpython-312.pyc +0 -0
  227. package/templates/scripts/__pycache__/fork_experiment.cpython-314.pyc +0 -0
  228. package/templates/scripts/__pycache__/generate_baselines.cpython-312.pyc +0 -0
  229. package/templates/scripts/__pycache__/generate_baselines.cpython-314.pyc +0 -0
  230. package/templates/scripts/__pycache__/generate_brief.cpython-312.pyc +0 -0
  231. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  232. package/templates/scripts/__pycache__/generate_changelog.cpython-312.pyc +0 -0
  233. package/templates/scripts/__pycache__/generate_changelog.cpython-314.pyc +0 -0
  234. package/templates/scripts/__pycache__/generate_figures.cpython-312.pyc +0 -0
  235. package/templates/scripts/__pycache__/generate_figures.cpython-314.pyc +0 -0
  236. package/templates/scripts/__pycache__/generate_logbook.cpython-312.pyc +0 -0
  237. package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
  238. package/templates/scripts/__pycache__/generate_model_card.cpython-312.pyc +0 -0
  239. package/templates/scripts/__pycache__/generate_model_card.cpython-314.pyc +0 -0
  240. package/templates/scripts/__pycache__/generate_onboarding.cpython-312.pyc +0 -0
  241. package/templates/scripts/__pycache__/generate_onboarding.cpython-314.pyc +0 -0
  242. package/templates/scripts/__pycache__/harness_doctor.cpython-312.pyc +0 -0
  243. package/templates/scripts/__pycache__/harness_doctor.cpython-314.pyc +0 -0
  244. package/templates/scripts/__pycache__/incremental_update.cpython-312.pyc +0 -0
  245. package/templates/scripts/__pycache__/incremental_update.cpython-314.pyc +0 -0
  246. package/templates/scripts/__pycache__/knowledge_transfer.cpython-312.pyc +0 -0
  247. package/templates/scripts/__pycache__/knowledge_transfer.cpython-314.pyc +0 -0
  248. package/templates/scripts/__pycache__/latency_benchmark.cpython-312.pyc +0 -0
  249. package/templates/scripts/__pycache__/latency_benchmark.cpython-314.pyc +0 -0
  250. package/templates/scripts/__pycache__/leakage_detector.cpython-312.pyc +0 -0
  251. package/templates/scripts/__pycache__/leakage_detector.cpython-314.pyc +0 -0
  252. package/templates/scripts/__pycache__/literature_search.cpython-312.pyc +0 -0
  253. package/templates/scripts/__pycache__/literature_search.cpython-314.pyc +0 -0
  254. package/templates/scripts/__pycache__/log_experiment.cpython-312.pyc +0 -0
  255. package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
  256. package/templates/scripts/__pycache__/manage_hypotheses.cpython-312.pyc +0 -0
  257. package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
  258. package/templates/scripts/__pycache__/methodology_audit.cpython-312.pyc +0 -0
  259. package/templates/scripts/__pycache__/methodology_audit.cpython-314.pyc +0 -0
  260. package/templates/scripts/__pycache__/model_distiller.cpython-312.pyc +0 -0
  261. package/templates/scripts/__pycache__/model_distiller.cpython-314.pyc +0 -0
  262. package/templates/scripts/__pycache__/model_lifecycle.cpython-312.pyc +0 -0
  263. package/templates/scripts/__pycache__/model_lifecycle.cpython-314.pyc +0 -0
  264. package/templates/scripts/__pycache__/model_merger.cpython-312.pyc +0 -0
  265. package/templates/scripts/__pycache__/model_merger.cpython-314.pyc +0 -0
  266. package/templates/scripts/__pycache__/model_pruning.cpython-312.pyc +0 -0
  267. package/templates/scripts/__pycache__/model_pruning.cpython-314.pyc +0 -0
  268. package/templates/scripts/__pycache__/model_quantization.cpython-312.pyc +0 -0
  269. package/templates/scripts/__pycache__/model_quantization.cpython-314.pyc +0 -0
  270. package/templates/scripts/__pycache__/model_xray.cpython-312.pyc +0 -0
  271. package/templates/scripts/__pycache__/model_xray.cpython-314.pyc +0 -0
  272. package/templates/scripts/__pycache__/novelty_guard.cpython-312.pyc +0 -0
  273. package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
  274. package/templates/scripts/__pycache__/package_experiments.cpython-312.pyc +0 -0
  275. package/templates/scripts/__pycache__/package_experiments.cpython-314.pyc +0 -0
  276. package/templates/scripts/__pycache__/pareto_frontier.cpython-312.pyc +0 -0
  277. package/templates/scripts/__pycache__/pareto_frontier.cpython-314.pyc +0 -0
  278. package/templates/scripts/__pycache__/parse_metrics.cpython-312.pyc +0 -0
  279. package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
  280. package/templates/scripts/__pycache__/pipeline_manager.cpython-312.pyc +0 -0
  281. package/templates/scripts/__pycache__/pipeline_manager.cpython-314.pyc +0 -0
  282. package/templates/scripts/__pycache__/profile_training.cpython-312.pyc +0 -0
  283. package/templates/scripts/__pycache__/profile_training.cpython-314.pyc +0 -0
  284. package/templates/scripts/__pycache__/regression_gate.cpython-312.pyc +0 -0
  285. package/templates/scripts/__pycache__/regression_gate.cpython-314.pyc +0 -0
  286. package/templates/scripts/__pycache__/reproduce_experiment.cpython-312.pyc +0 -0
  287. package/templates/scripts/__pycache__/reproduce_experiment.cpython-314.pyc +0 -0
  288. package/templates/scripts/__pycache__/research_planner.cpython-312.pyc +0 -0
  289. package/templates/scripts/__pycache__/research_planner.cpython-314.pyc +0 -0
  290. package/templates/scripts/__pycache__/sanity_checks.cpython-312.pyc +0 -0
  291. package/templates/scripts/__pycache__/sanity_checks.cpython-314.pyc +0 -0
  292. package/templates/scripts/__pycache__/scaffold.cpython-312.pyc +0 -0
  293. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  294. package/templates/scripts/__pycache__/scaling_estimator.cpython-312.pyc +0 -0
  295. package/templates/scripts/__pycache__/scaling_estimator.cpython-314.pyc +0 -0
  296. package/templates/scripts/__pycache__/seed_runner.cpython-312.pyc +0 -0
  297. package/templates/scripts/__pycache__/seed_runner.cpython-314.pyc +0 -0
  298. package/templates/scripts/__pycache__/sensitivity_analysis.cpython-312.pyc +0 -0
  299. package/templates/scripts/__pycache__/sensitivity_analysis.cpython-314.pyc +0 -0
  300. package/templates/scripts/__pycache__/session_flashback.cpython-312.pyc +0 -0
  301. package/templates/scripts/__pycache__/session_flashback.cpython-314.pyc +0 -0
  302. package/templates/scripts/__pycache__/show_experiment_tree.cpython-312.pyc +0 -0
  303. package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
  304. package/templates/scripts/__pycache__/show_families.cpython-312.pyc +0 -0
  305. package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
  306. package/templates/scripts/__pycache__/simulate_review.cpython-312.pyc +0 -0
  307. package/templates/scripts/__pycache__/simulate_review.cpython-314.pyc +0 -0
  308. package/templates/scripts/__pycache__/smart_retry.cpython-312.pyc +0 -0
  309. package/templates/scripts/__pycache__/smart_retry.cpython-314.pyc +0 -0
  310. package/templates/scripts/__pycache__/statistical_compare.cpython-312.pyc +0 -0
  311. package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
  312. package/templates/scripts/__pycache__/suggest_next.cpython-312.pyc +0 -0
  313. package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
  314. package/templates/scripts/__pycache__/sweep.cpython-312.pyc +0 -0
  315. package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
  316. package/templates/scripts/__pycache__/synthesize_decision.cpython-312.pyc +0 -0
  317. package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
  318. package/templates/scripts/__pycache__/training_monitor.cpython-312.pyc +0 -0
  319. package/templates/scripts/__pycache__/training_monitor.cpython-314.pyc +0 -0
  320. package/templates/scripts/__pycache__/treequest_suggest.cpython-312.pyc +0 -0
  321. package/templates/scripts/__pycache__/treequest_suggest.cpython-314.pyc +0 -0
  322. package/templates/scripts/__pycache__/trend_analysis.cpython-312.pyc +0 -0
  323. package/templates/scripts/__pycache__/trend_analysis.cpython-314.pyc +0 -0
  324. package/templates/scripts/__pycache__/turing_io.cpython-312.pyc +0 -0
  325. package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
  326. package/templates/scripts/__pycache__/update_state.cpython-312.pyc +0 -0
  327. package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
  328. package/templates/scripts/__pycache__/verify_placeholders.cpython-312.pyc +0 -0
  329. package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
  330. package/templates/scripts/__pycache__/warm_start.cpython-312.pyc +0 -0
  331. package/templates/scripts/__pycache__/warm_start.cpython-314.pyc +0 -0
  332. package/templates/scripts/__pycache__/whatif_engine.cpython-312.pyc +0 -0
  333. package/templates/scripts/__pycache__/whatif_engine.cpython-314.pyc +0 -0
@@ -0,0 +1,88 @@
1
+ ---
2
+ name: poster
3
+ description: Generate a single-page HTML research poster summarizing the experiment campaign — best result, trajectory, key findings, and methodology. Adapted from posterskill's self-contained HTML architecture.
4
+ argument-hint: "[title override]"
5
+ allowed-tools: Read, Write, Edit, Bash(python scripts/*:*, source .venv/bin/activate:*, mkdir:*, open:*), Grep, Glob
6
+ ---
7
+
8
+ Generate a research poster summarizing the experiment campaign as a single self-contained HTML file. Adapted from [posterskill](https://github.com/ethanweber/posterskill)'s architecture — no build step, works when opened as `file://`.
9
+
10
+ ## Steps
11
+
12
+ ### 1. Gather Data
13
+
14
+ Read the experiment history and project context:
15
+
16
+ ```bash
17
+ cat config.yaml
18
+ source .venv/bin/activate && python scripts/generate_brief.py
19
+ source .venv/bin/activate && python scripts/show_metrics.py --last 20
20
+ cat experiment_state.yaml 2>/dev/null || true
21
+ cat RESEARCH_PLAN.md 2>/dev/null || true
22
+ ```
23
+
24
+ From this, extract:
25
+ - **Title:** from config task description (or `$ARGUMENTS` override)
26
+ - **Best result:** metric name, value, experiment ID
27
+ - **Improvement trajectory:** metric values over experiments
28
+ - **Key findings:** what model families worked, what didn't, what was surprising
29
+ - **Methodology:** the experiment loop, evaluation strategy, convergence criteria
30
+ - **Campaign stats:** total experiments, keep rate, time span
31
+
32
+ ### 2. Generate the Poster HTML
33
+
34
+ Create `poster/index.html` — a self-contained HTML file with:
35
+
36
+ ```bash
37
+ mkdir -p poster
38
+ ```
39
+
40
+ **Structure the poster with these cards:**
41
+
42
+ | Card | Content |
43
+ |------|---------|
44
+ | **Header** | Title, "Autonomous ML Research Campaign", date range, best metric badge |
45
+ | **Objective** | Task description and success criteria from config |
46
+ | **Methodology** | The autoresearch loop: hypothesize → train → evaluate → decide. Mention immutable evaluation, git-disciplined rollback |
47
+ | **Trajectory** | Chart.js line chart of metric progression (embed data inline) |
48
+ | **Best Configuration** | Model type, hyperparameters, metric values from best experiment |
49
+ | **Key Findings** | 3-5 bullet points: what worked, what didn't, surprises |
50
+ | **Explored Approaches** | Table of model families tried with keep rates |
51
+ | **Campaign Stats** | Total experiments, keep rate, human vs agent hypotheses, convergence |
52
+
53
+ **Design principles (from posterskill):**
54
+ - Single self-contained HTML file, CDN dependencies only (Chart.js, Google Fonts)
55
+ - Print-optimized CSS (`@media print`, `@page` with poster dimensions)
56
+ - Card-based layout with colored top borders
57
+ - Clean typography (system fonts or Nunito from Google Fonts)
58
+ - Data embedded directly in the HTML as JSON — no external file dependencies
59
+
60
+ **Poster dimensions:** Default A1 landscape (841mm x 594mm). The user can print to PDF from their browser.
61
+
62
+ ### 3. Self-Critique
63
+
64
+ Review the generated poster:
65
+ - Does the trajectory chart render correctly with the embedded data?
66
+ - Are the key findings specific and data-grounded (not generic)?
67
+ - Is the best configuration complete (model type + all relevant hyperparameters)?
68
+ - Would a collaborator understand the campaign from this single page?
69
+
70
+ Fix any issues found.
71
+
72
+ ### 4. Present
73
+
74
+ ```
75
+ Research poster generated at poster/index.html
76
+
77
+ Open in your browser to view. Print to PDF for sharing.
78
+ Best result: <metric>=<value> (<experiment_id>)
79
+ Campaign: <N> experiments, <keep_rate>% keep rate
80
+ ```
81
+
82
+ Suggest: "Open `poster/index.html` in your browser. Use Ctrl+P / Cmd+P to save as PDF."
83
+
84
+ ## Integration
85
+
86
+ - The poster reads from the same data sources as `/turing:brief` and `/turing:logbook`
87
+ - For a more detailed view, use `/turing:logbook` (full experiment-by-experiment narrative)
88
+ - For a quick summary, use `/turing:brief` (text-only intelligence report)
@@ -0,0 +1,27 @@
1
+ ---
2
+ name: postmortem
3
+ description: Failure postmortem — diagnose why experiments stopped improving and get actionable next steps.
4
+ argument-hint: "[--window 10] [--auto-trigger 5]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ When experiments stop improving, find out why. Diagnoses search space exhaustion, config errors, data issues, metric ceilings, and noise floors.
9
+
10
+ ## Steps
11
+ 1. `source .venv/bin/activate`
12
+ 2. `python scripts/failure_postmortem.py $ARGUMENTS`
13
+ 3. **Saved:** `experiments/postmortems/`
14
+
15
+ ## Diagnosis categories
16
+ - **Search space exhaustion:** micro-tuning params that don't matter
17
+ - **Systematic config error:** all experiments share a bad common config
18
+ - **Data issue:** all model types fail similarly
19
+ - **Metric ceiling:** near theoretical maximum
20
+ - **Noise floor:** improvements within seed variance
21
+
22
+ ## Examples
23
+ ```
24
+ /turing:postmortem
25
+ /turing:postmortem --window 15
26
+ /turing:postmortem --json
27
+ ```
@@ -0,0 +1,74 @@
1
+ ---
2
+ name: preflight
3
+ description: Pre-flight resource check — estimates VRAM, RAM, and disk requirements before running ML training. Compares against available system resources and issues PASS/WARN/FAIL verdict. Use before training to catch OOM errors before they happen.
4
+ argument-hint: "[--model-type torch] [--params 10M] [--batch-size 32]"
5
+ allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*, nvidia-smi:*), Grep, Glob
6
+ ---
7
+
8
+ Check whether the current system has enough resources to run the planned experiment.
9
+
10
+ ## Steps
11
+
12
+ 1. **Activate environment:**
13
+ ```bash
14
+ source .venv/bin/activate
15
+ ```
16
+
17
+ 2. **Run preflight check:**
18
+
19
+ If `$ARGUMENTS` is empty (auto-detect from config.yaml):
20
+ ```bash
21
+ python scripts/preflight.py
22
+ ```
23
+
24
+ If `$ARGUMENTS` contains flags:
25
+ ```bash
26
+ python scripts/preflight.py $ARGUMENTS
27
+ ```
28
+
29
+ 3. **Interpret the verdict:**
30
+
31
+ - **PASS** — system has sufficient resources. Proceed with training.
32
+ - **WARN** — resources are tight. Training may succeed but could be slow or unstable. Present warnings to the user and ask whether to proceed.
33
+ - **FAIL** — training will likely fail (OOM, disk full, no GPU for GPU-required model). Present the specific resource gap and suggest mitigations:
34
+ - RAM too low: reduce dataset size, use chunked loading, or add swap
35
+ - VRAM too low: reduce batch size, use fp16/bf16, enable gradient checkpointing, or use a smaller model
36
+ - Disk too low: clean up old models/checkpoints
37
+ - No GPU: switch to a CPU-friendly model (XGBoost, LightGBM, sklearn)
38
+
39
+ 4. **If running before `/turing:train`:** report the verdict so the human can decide whether to proceed, adjust config, or choose a different model type.
40
+
41
+ ## Examples
42
+
43
+ ```bash
44
+ # Auto-detect from config.yaml (works for Turing projects)
45
+ /turing:preflight
46
+
47
+ # Check for a specific model type
48
+ /turing:preflight --model-type transformer --params 350M --batch-size 16 --precision fp16
49
+
50
+ # Check with a specific dataset
51
+ /turing:preflight --model-type xgboost --dataset data/train.csv
52
+
53
+ # JSON output for scripting
54
+ /turing:preflight --json
55
+ ```
56
+
57
+ ## What It Checks
58
+
59
+ | Resource | How estimated | Warning threshold |
60
+ |----------|--------------|-------------------|
61
+ | **RAM** | Dataset size (4x CSV on disk) + model memory (tree nodes or param count) | >90% of available |
62
+ | **VRAM** | Model params + gradients + optimizer state + activations | >80% of largest GPU |
63
+ | **Disk** | Model artifacts + dataset + checkpoints | >50% of free space |
64
+ | **GPU presence** | torch.cuda or nvidia-smi | Required for neural nets >1GB VRAM |
65
+
66
+ ## Model-Specific Estimates
67
+
68
+ | Model Type | RAM | VRAM | GPU Required? |
69
+ |-----------|-----|------|---------------|
70
+ | XGBoost/LightGBM | Trees + data (typically <4GB) | 0 | No |
71
+ | Random Forest | Trees + data (can be large) | 0 | No |
72
+ | Linear/Logistic | 2x data | 0 | No |
73
+ | MLP (small) | Data + params | Params x 4 (Adam) | If >1GB VRAM |
74
+ | Transformer | Data + params | Params x 4 + activations | Yes |
@@ -0,0 +1,22 @@
1
+ ---
2
+ name: present
3
+ description: Presentation figure generation — training curves, comparison charts, ablation tables, Pareto plots, sensitivity heatmaps.
4
+ argument-hint: "[--figures training,comparison] [--style light|dark|poster]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ Generate presentation-ready figure specifications from experiment data in seconds.
9
+
10
+ ## Steps
11
+ 1. **Activate environment:** `source .venv/bin/activate`
12
+ 2. **Run:** `python scripts/generate_figures.py $ARGUMENTS`
13
+ 3. **Figure types:** training, comparison, ablation, pareto, sensitivity
14
+ 4. **Styles:** light (papers), dark (demos), poster (large fonts)
15
+ 5. **Saved output:** `paper/figures/`
16
+
17
+ ## Examples
18
+ ```
19
+ /turing:present # All figures
20
+ /turing:present --figures training,comparison # Specific figures
21
+ /turing:present --style dark # Dark theme
22
+ ```
@@ -0,0 +1,42 @@
1
+ ---
2
+ name: profile
3
+ description: Profile a training run — timing breakdown, memory usage, throughput, bottleneck detection with actionable recommendations.
4
+ argument-hint: "[exp-id] [--seed 42]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ Profile a training run to identify performance bottlenecks.
9
+
10
+ ## Steps
11
+
12
+ 1. **Activate environment:**
13
+ ```bash
14
+ source .venv/bin/activate
15
+ ```
16
+
17
+ 2. **Parse arguments from `$ARGUMENTS`:**
18
+ - First argument can be an experiment ID (e.g., `exp-042`); defaults to best
19
+ - `--seed 42` sets the random seed for the profiling run
20
+
21
+ 3. **Run profiling:**
22
+ ```bash
23
+ python scripts/profile_training.py $ARGUMENTS
24
+ ```
25
+
26
+ 4. **Report results:**
27
+ - **Timing:** total time, training time, overhead breakdown
28
+ - **Memory:** peak RSS, Python peak, GPU peak (if applicable)
29
+ - **Throughput:** samples/sec
30
+ - **Bottleneck:** identified bottleneck type and severity
31
+ - **Recommendations:** actionable fixes for the detected bottleneck
32
+
33
+ 5. **Saved output:** results written to `experiments/profiles/exp-NNN-profile.yaml`
34
+
35
+ 6. **If no training pipeline exists:** suggest `/turing:init` first.
36
+
37
+ ## Examples
38
+
39
+ ```
40
+ /turing:profile # Profile best experiment config
41
+ /turing:profile exp-042 # Profile specific experiment
42
+ ```
@@ -0,0 +1,25 @@
1
+ ---
2
+ name: prune
3
+ description: Weight pruning — measure accuracy at different sparsity levels, find the knee point, produce a smaller/faster model.
4
+ argument-hint: "<exp-id> [--sparsity 0.5,0.75,0.9] [--method magnitude|structured|lottery]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ Remove redundant weights for faster inference and smaller models.
9
+
10
+ ## Steps
11
+
12
+ 1. **Activate environment:** `source .venv/bin/activate`
13
+ 2. **Run:** `python scripts/model_pruning.py $ARGUMENTS`
14
+ 3. **Methods:** magnitude (zero small weights), structured (remove neurons), lottery (iterative with rewind)
15
+ 4. **For tree models:** progressively reduces n_estimators
16
+ 5. **Report:** sparsity sweep table, knee point, recommended sparsity
17
+ 6. **Saved output:** `experiments/pruning/<exp-id>-pruning.yaml`
18
+
19
+ ## Examples
20
+
21
+ ```
22
+ /turing:prune exp-042 # Default: magnitude, 5 levels
23
+ /turing:prune exp-042 --method structured # Remove entire neurons
24
+ /turing:prune exp-042 --sparsity 0.5,0.75,0.9 # Custom levels
25
+ ```
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: quantize
3
+ description: Post-training quantization — FP32→INT8/FP16, measure accuracy loss, 2-4x speedup with <0.5% accuracy loss.
4
+ argument-hint: "<exp-id> [--precision int8|fp16|dynamic]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ Quantize for production. Lowest-effort optimization: 2-4x speedup, 2-4x memory reduction.
9
+
10
+ ## Steps
11
+
12
+ 1. **Activate environment:** `source .venv/bin/activate`
13
+ 2. **Run:** `python scripts/model_quantization.py $ARGUMENTS`
14
+ 3. **Precision levels:** FP32 (baseline), FP16 (GPU), INT8 dynamic (simplest), INT8 static (best accuracy)
15
+ 4. **Report:** precision comparison table, recommended level, QAT suggestion if needed
16
+ 5. **Saved output:** `experiments/quantization/<exp-id>-quantization.yaml`
17
+
18
+ ## Examples
19
+
20
+ ```
21
+ /turing:quantize exp-042 # Compare all precision levels
22
+ /turing:quantize exp-042 --precision int8 # INT8 specifically
23
+ ```
@@ -0,0 +1,47 @@
1
+ ---
2
+ name: queue
3
+ description: Queue experiments for batch execution with priority ordering and dependency chains. Load the queue, walk away, read the summary.
4
+ argument-hint: "<add|list|run|pause|clear> [description] [--priority high] [--after q-001]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ Manage the experiment queue for unattended batch execution.
9
+
10
+ ## Steps
11
+
12
+ 1. **Activate environment:**
13
+ ```bash
14
+ source .venv/bin/activate
15
+ ```
16
+
17
+ 2. **Parse arguments from `$ARGUMENTS`:**
18
+ - **add** `"description"` `--priority high` `--after q-001` — queue an experiment
19
+ - **list** — show queue with status, priority, dependencies
20
+ - **run** `--halt-on-error` — execute all queued experiments
21
+ - **pause** — stop after current experiment finishes
22
+ - **clear** — discard all queued items
23
+
24
+ 3. **Run queue manager:**
25
+ ```bash
26
+ python scripts/experiment_queue.py $ARGUMENTS
27
+ ```
28
+
29
+ 4. **Report results by action:**
30
+ - **add:** confirms ID and priority
31
+ - **list:** table of queued/completed/failed items
32
+ - **run:** batch summary with per-experiment status
33
+ - **pause/clear:** confirmation message
34
+
35
+ 5. **Queue persists in** `experiments/queue.yaml`
36
+
37
+ ## Examples
38
+
39
+ ```
40
+ /turing:queue add "try LightGBM" --priority high
41
+ /turing:queue add "deeper trees" --after q-001
42
+ /turing:queue list
43
+ /turing:queue run
44
+ /turing:queue run --halt-on-error
45
+ /turing:queue pause
46
+ /turing:queue clear
47
+ ```
@@ -0,0 +1,30 @@
1
+ ---
2
+ name: registry
3
+ description: Model registry — track, promote, and govern the model lifecycle from candidate to production.
4
+ argument-hint: "[list|register|promote|demote|archive|history] [exp-id] [stage]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ Track which model is production, staging, candidate, or archived. Promotion requires passing gates.
9
+
10
+ ## Steps
11
+ 1. `source .venv/bin/activate`
12
+ 2. `python scripts/model_lifecycle.py $ARGUMENTS`
13
+ 3. **Registry:** `experiments/registry.yaml`
14
+
15
+ ## Promotion gates
16
+ - **candidate → staging:** regression check + seed study must PASS
17
+ - **staging → production:** audit + calibration check must PASS
18
+ - Use `--force` to skip gate checks
19
+
20
+ ## Examples
21
+ ```
22
+ /turing:registry list
23
+ /turing:registry register exp-095 --version v4.1
24
+ /turing:registry promote exp-089 staging
25
+ /turing:registry promote exp-089 production --force
26
+ /turing:registry demote exp-078 staging --reason "latency regression"
27
+ /turing:registry archive exp-042 --reason "superseded by v4"
28
+ /turing:registry history
29
+ /turing:registry history exp-089
30
+ ```
@@ -0,0 +1,52 @@
1
+ ---
2
+ name: regress
3
+ description: Performance regression gate — re-run best experiment after code/dependency changes and verify metrics haven't degraded.
4
+ argument-hint: "[--tolerance 0.01] [--against exp-id] [--quick]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ CI for your model. After any change to code, dependencies, or data, verify metrics haven't silently regressed.
9
+
10
+ ## Steps
11
+
12
+ 1. **Activate environment:**
13
+ ```bash
14
+ source .venv/bin/activate
15
+ ```
16
+
17
+ 2. **Parse arguments from `$ARGUMENTS`:**
18
+ - `--tolerance 0.01` sets the relative tolerance (default 1%)
19
+ - `--against exp-042` checks against a specific experiment (default: best)
20
+ - `--quick` runs 1 seed instead of 3 for fast checks
21
+ - `--runs 5` sets number of regression runs (default 3)
22
+ - `--json` outputs raw JSON
23
+
24
+ 3. **Run regression gate:**
25
+ ```bash
26
+ python scripts/regression_gate.py $ARGUMENTS
27
+ ```
28
+
29
+ 4. **Report results:**
30
+ - **PASS:** all metrics within tolerance — no regression
31
+ - **WARNING:** some metrics degraded within 2x tolerance — investigate
32
+ - **FAIL:** REGRESSION DETECTED — at least one metric degraded beyond tolerance
33
+ - Shows per-metric comparison with deltas and relative differences
34
+ - Shows environment diff if library versions changed (may explain regression)
35
+
36
+ 5. **Saved output:** report written to `experiments/regressions/check-YYYY-MM-DD.yaml`
37
+
38
+ 6. **If no experiments exist:** suggest running `/turing:train` first.
39
+
40
+ 7. **On FAIL verdict:** suggest investigating with:
41
+ - `/turing:diff <baseline> <latest>` to see what changed
42
+ - `pip freeze` comparison to identify library version changes
43
+ - `git diff` to review code changes
44
+
45
+ ## Examples
46
+
47
+ ```
48
+ /turing:regress # Default: check best, 1% tolerance, 3 runs
49
+ /turing:regress --quick # Fast check: 1 run
50
+ /turing:regress --against exp-042 # Check specific experiment
51
+ /turing:regress --tolerance 0.005 --runs 5 # Strict: 0.5% tolerance, 5 runs
52
+ ```
@@ -0,0 +1,22 @@
1
+ ---
2
+ name: replay
3
+ description: Experiment replay — re-run a historical experiment with current infrastructure to test if old approaches do better now.
4
+ argument-hint: "<exp-id> [--with-current-data] [--with-current-preprocessing]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ Should you revisit old ideas? Infrastructure changes may make failed approaches work now.
9
+
10
+ ## Steps
11
+ 1. **Activate environment:** `source .venv/bin/activate`
12
+ 2. **Run:** `python scripts/experiment_replay.py $ARGUMENTS`
13
+ 3. **Modes:** default (current code+data), --with-current-data, --with-current-preprocessing
14
+ 4. **Report:** original vs replayed metrics, delta, verdict
15
+ 5. **Saved output:** `experiments/replays/`
16
+
17
+ ## Examples
18
+ ```
19
+ /turing:replay exp-023 # Replay with current infrastructure
20
+ /turing:replay exp-023 --with-current-data # Current data, old code
21
+ /turing:replay --list # List replayable experiments
22
+ ```
@@ -0,0 +1,96 @@
1
+ ---
2
+ name: report
3
+ description: Generate a markdown research report from experiment history — structured for sharing, archiving, or including in documentation. More detailed than a brief, less visual than a poster.
4
+ argument-hint: "[--since YYYY-MM-DD] [--output path]"
5
+ allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*, mkdir:*), Grep, Glob
6
+ ---
7
+
8
+ Generate a structured markdown research report summarizing the experiment campaign.
9
+
10
+ ## Steps
11
+
12
+ ### 1. Generate the Report
13
+
14
+ Use the logbook generator in markdown mode as the data backbone:
15
+
16
+ ```bash
17
+ source .venv/bin/activate && python scripts/generate_logbook.py --format markdown
18
+ ```
19
+
20
+ Also gather supplementary data:
21
+ ```bash
22
+ source .venv/bin/activate && python scripts/generate_brief.py
23
+ cat experiment_state.yaml 2>/dev/null || true
24
+ cat RESEARCH_PLAN.md 2>/dev/null || true
25
+ ```
26
+
27
+ ### 2. Enhance with Analysis
28
+
29
+ The logbook generator produces raw data. Enhance it with your analysis to create a proper report. Add these sections that the script doesn't generate:
30
+
31
+ - **Executive Summary** (2-3 sentences): What was the task? What's the best result? Is it good enough?
32
+ - **Approach:** Describe the methodology — autoresearch loop, evaluation strategy, search strategy used
33
+ - **Key Findings:** Synthesize patterns from the experiment log:
34
+ - Which model families outperformed others?
35
+ - What hyperparameter ranges work vs don't?
36
+ - Were there surprising results?
37
+ - What failure patterns emerged?
38
+ - **Recommendations:** Based on the findings, what should be tried next? What should be avoided?
39
+ - **Limitations:** What wasn't explored? What constraints affected the results?
40
+
41
+ ### 3. Output
42
+
43
+ If `$ARGUMENTS` contains `--output <path>`:
44
+ ```bash
45
+ mkdir -p $(dirname <path>)
46
+ ```
47
+ Write the report to the specified path.
48
+
49
+ Otherwise, display the report directly.
50
+
51
+ **Common usage:**
52
+ ```
53
+ /turing:report --output reports/campaign-v1.md
54
+ /turing:report --since 2026-03-15 --output reports/week-12.md
55
+ ```
56
+
57
+ ## Report Structure
58
+
59
+ ```markdown
60
+ # Research Report: <task description>
61
+ Generated: <date>
62
+
63
+ ## Executive Summary
64
+ <2-3 sentences>
65
+
66
+ ## Methodology
67
+ <approach, evaluation strategy, convergence criteria>
68
+
69
+ ## Campaign Summary
70
+ <table: experiments, keep rate, best metric, timespan>
71
+
72
+ ## Improvement Trajectory
73
+ <table: experiment-by-experiment metric progression>
74
+
75
+ ## Key Findings
76
+ <synthesized patterns from experiment history>
77
+
78
+ ## Model Comparison
79
+ <table: model families, experiments per family, best metric, keep rate>
80
+
81
+ ## Hypothesis Analysis
82
+ <what was proposed, by whom, what worked>
83
+
84
+ ## Recommendations
85
+ <concrete next steps>
86
+
87
+ ## Limitations
88
+ <what wasn't tried, constraints>
89
+ ```
90
+
91
+ ## When to Use
92
+
93
+ - End of a research campaign for archiving
94
+ - Before a team review or status update
95
+ - To document findings for a paper or thesis
96
+ - To hand off a project to another researcher
@@ -0,0 +1,47 @@
1
+ ---
2
+ name: reproduce
3
+ description: Verify reproducibility of a specific experiment by re-running from logged config and checking metrics fall within tolerance.
4
+ argument-hint: "<exp-id> [--tolerance 0.02] [--strict] [--runs 3]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ Verify that a logged experiment can be reproduced with consistent results.
9
+
10
+ ## Steps
11
+
12
+ 1. **Activate environment:**
13
+ ```bash
14
+ source .venv/bin/activate
15
+ ```
16
+
17
+ 2. **Parse arguments from `$ARGUMENTS`:**
18
+ - First argument is the experiment ID (required), e.g. `exp-042`
19
+ - `--tolerance 0.02` sets the relative tolerance (default 2%)
20
+ - `--strict` requires exact float match (1e-6), overrides tolerance
21
+ - `--runs 3` sets number of reproduction runs (default 3, 1 for strict)
22
+
23
+ 3. **Run reproducibility verification:**
24
+ ```bash
25
+ python scripts/reproduce_experiment.py $ARGUMENTS
26
+ ```
27
+
28
+ 4. **Report results:**
29
+ - **reproducible:** metrics match exactly (deterministic algorithm)
30
+ - **approximately_reproducible:** metrics within tolerance or original falls in 95% CI
31
+ - **not_reproducible:** metrics outside tolerance and CI
32
+ - **environment_changed:** metrics diverge AND library versions differ
33
+ - Show environment diff if present (Python version, package versions)
34
+
35
+ 5. **Saved output:** report written to `experiments/reproductions/exp-NNN-repro.yaml`
36
+
37
+ 6. **If experiment ID not found:** list available experiment IDs from `experiments/log.jsonl`
38
+
39
+ 7. **If no training pipeline exists:** suggest `/turing:init` first.
40
+
41
+ ## Examples
42
+
43
+ ```
44
+ /turing:reproduce exp-042 # Default: 3 runs, 2% tolerance
45
+ /turing:reproduce exp-042 --strict # Exact match required
46
+ /turing:reproduce exp-042 --tolerance 0.05 --runs 5 # Lenient, more runs
47
+ ```
@@ -0,0 +1,40 @@
1
+ ---
2
+ name: retry
3
+ description: Smart failure recovery — auto-diagnose crash type and retry with targeted fix. OOM → halve batch. NaN → add clipping.
4
+ argument-hint: "<exp-id> [--max-attempts 3]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ Auto-diagnose and recover from experiment failures.
9
+
10
+ ## Steps
11
+
12
+ 1. **Activate environment:**
13
+ ```bash
14
+ source .venv/bin/activate
15
+ ```
16
+
17
+ 2. **Parse arguments from `$ARGUMENTS`:**
18
+ - First argument is the experiment ID (required)
19
+ - `--max-attempts 3` limits retry count
20
+ - `--classify "error text"` just classifies without retrying
21
+
22
+ 3. **Run smart retry:**
23
+ ```bash
24
+ python scripts/smart_retry.py $ARGUMENTS
25
+ ```
26
+
27
+ 4. **Report results:**
28
+ - **RECOVERED:** fix applied, retry succeeded
29
+ - **FAILED:** all retry attempts exhausted
30
+ - **MANUAL FIX NEEDED:** failure type requires human intervention
31
+ - Shows failure classification, fix applied, and attempt history
32
+
33
+ 5. **Saved output:** report written to `experiments/retries/exp-NNN-retry.yaml`
34
+
35
+ ## Examples
36
+
37
+ ```
38
+ /turing:retry exp-042 # Auto-diagnose and retry
39
+ /turing:retry exp-042 --max-attempts 5 # More retries
40
+ ```
@@ -0,0 +1,19 @@
1
+ ---
2
+ name: review
3
+ description: Peer review simulation — generate likely reviewer objections with severity ratings and fix commands.
4
+ argument-hint: "[--venue neurips|icml|general] [--harsh]"
5
+ allowed-tools: Read, Bash(*), Grep, Glob
6
+ ---
7
+
8
+ Simulate a conference reviewer before you submit. Each weakness links to the command that fixes it.
9
+
10
+ ## Steps
11
+ 1. `source .venv/bin/activate`
12
+ 2. `python scripts/simulate_review.py $ARGUMENTS`
13
+ 3. **Saved:** `experiments/reviews/`
14
+
15
+ ## Examples
16
+ ```
17
+ /turing:review
18
+ /turing:review --venue neurips --harsh
19
+ ```