claude-turing 4.6.0 → 4.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +1 -1
  3. package/package.json +9 -3
  4. package/skills/turing/SKILL.md +180 -0
  5. package/skills/turing/ablate/SKILL.md +47 -0
  6. package/skills/turing/annotate/SKILL.md +23 -0
  7. package/skills/turing/archive/SKILL.md +23 -0
  8. package/skills/turing/audit/SKILL.md +56 -0
  9. package/skills/turing/baseline/SKILL.md +45 -0
  10. package/skills/turing/brief/SKILL.md +95 -0
  11. package/skills/turing/budget/SKILL.md +52 -0
  12. package/skills/turing/calibrate/SKILL.md +47 -0
  13. package/skills/turing/card/SKILL.md +36 -0
  14. package/skills/turing/changelog/SKILL.md +22 -0
  15. package/skills/turing/checkpoint/SKILL.md +47 -0
  16. package/skills/turing/cite/SKILL.md +23 -0
  17. package/skills/turing/compare/SKILL.md +24 -0
  18. package/skills/turing/counterfactual/SKILL.md +27 -0
  19. package/skills/turing/curriculum/SKILL.md +43 -0
  20. package/skills/turing/design/SKILL.md +97 -0
  21. package/skills/turing/diagnose/SKILL.md +52 -0
  22. package/skills/turing/diff/SKILL.md +48 -0
  23. package/skills/turing/distill/SKILL.md +56 -0
  24. package/skills/turing/doctor/SKILL.md +31 -0
  25. package/skills/turing/ensemble/SKILL.md +54 -0
  26. package/skills/turing/explore/SKILL.md +107 -0
  27. package/skills/turing/export/SKILL.md +48 -0
  28. package/skills/turing/feature/SKILL.md +42 -0
  29. package/skills/turing/flashback/SKILL.md +22 -0
  30. package/skills/turing/fork/SKILL.md +40 -0
  31. package/skills/turing/frontier/SKILL.md +45 -0
  32. package/skills/turing/init/SKILL.md +154 -0
  33. package/skills/turing/leak/SKILL.md +47 -0
  34. package/skills/turing/lit/SKILL.md +47 -0
  35. package/skills/turing/logbook/SKILL.md +51 -0
  36. package/skills/turing/merge/SKILL.md +24 -0
  37. package/skills/turing/mode/SKILL.md +43 -0
  38. package/skills/turing/onboard/SKILL.md +20 -0
  39. package/skills/turing/paper/SKILL.md +44 -0
  40. package/skills/turing/plan/SKILL.md +27 -0
  41. package/skills/turing/poster/SKILL.md +89 -0
  42. package/skills/turing/postmortem/SKILL.md +28 -0
  43. package/skills/turing/preflight/SKILL.md +75 -0
  44. package/skills/turing/present/SKILL.md +23 -0
  45. package/skills/turing/profile/SKILL.md +43 -0
  46. package/skills/turing/prune/SKILL.md +26 -0
  47. package/skills/turing/quantize/SKILL.md +24 -0
  48. package/skills/turing/queue/SKILL.md +48 -0
  49. package/skills/turing/registry/SKILL.md +31 -0
  50. package/skills/turing/regress/SKILL.md +53 -0
  51. package/skills/turing/replay/SKILL.md +23 -0
  52. package/skills/turing/report/SKILL.md +97 -0
  53. package/skills/turing/reproduce/SKILL.md +48 -0
  54. package/skills/turing/retry/SKILL.md +41 -0
  55. package/skills/turing/review/SKILL.md +20 -0
  56. package/skills/turing/rules/loop-protocol.md +91 -0
  57. package/skills/turing/sanity/SKILL.md +48 -0
  58. package/skills/turing/scale/SKILL.md +55 -0
  59. package/skills/turing/search/SKILL.md +22 -0
  60. package/skills/turing/seed/SKILL.md +47 -0
  61. package/skills/turing/sensitivity/SKILL.md +41 -0
  62. package/skills/turing/share/SKILL.md +20 -0
  63. package/skills/turing/simulate/SKILL.md +28 -0
  64. package/skills/turing/status/SKILL.md +24 -0
  65. package/skills/turing/stitch/SKILL.md +49 -0
  66. package/skills/turing/suggest/SKILL.md +159 -0
  67. package/skills/turing/surgery/SKILL.md +27 -0
  68. package/skills/turing/sweep/SKILL.md +45 -0
  69. package/skills/turing/template/SKILL.md +22 -0
  70. package/skills/turing/train/SKILL.md +75 -0
  71. package/skills/turing/transfer/SKILL.md +54 -0
  72. package/skills/turing/trend/SKILL.md +21 -0
  73. package/skills/turing/try/SKILL.md +63 -0
  74. package/skills/turing/update/SKILL.md +27 -0
  75. package/skills/turing/validate/SKILL.md +34 -0
  76. package/skills/turing/warm/SKILL.md +53 -0
  77. package/skills/turing/watch/SKILL.md +60 -0
  78. package/skills/turing/whatif/SKILL.md +31 -0
  79. package/skills/turing/xray/SKILL.md +43 -0
  80. package/src/command-registry.js +9 -0
  81. package/src/sync-skills-layout.js +149 -0
  82. package/templates/__pycache__/evaluate.cpython-312.pyc +0 -0
  83. package/templates/__pycache__/evaluate.cpython-314.pyc +0 -0
  84. package/templates/__pycache__/prepare.cpython-312.pyc +0 -0
  85. package/templates/__pycache__/prepare.cpython-314.pyc +0 -0
  86. package/templates/features/__pycache__/__init__.cpython-312.pyc +0 -0
  87. package/templates/features/__pycache__/__init__.cpython-314.pyc +0 -0
  88. package/templates/features/__pycache__/featurizers.cpython-312.pyc +0 -0
  89. package/templates/features/__pycache__/featurizers.cpython-314.pyc +0 -0
  90. package/templates/scripts/__pycache__/__init__.cpython-312.pyc +0 -0
  91. package/templates/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  92. package/templates/scripts/__pycache__/ablation_study.cpython-312.pyc +0 -0
  93. package/templates/scripts/__pycache__/ablation_study.cpython-314.pyc +0 -0
  94. package/templates/scripts/__pycache__/architecture_surgery.cpython-312.pyc +0 -0
  95. package/templates/scripts/__pycache__/architecture_surgery.cpython-314.pyc +0 -0
  96. package/templates/scripts/__pycache__/budget_manager.cpython-312.pyc +0 -0
  97. package/templates/scripts/__pycache__/budget_manager.cpython-314.pyc +0 -0
  98. package/templates/scripts/__pycache__/build_ensemble.cpython-312.pyc +0 -0
  99. package/templates/scripts/__pycache__/build_ensemble.cpython-314.pyc +0 -0
  100. package/templates/scripts/__pycache__/calibration.cpython-312.pyc +0 -0
  101. package/templates/scripts/__pycache__/calibration.cpython-314.pyc +0 -0
  102. package/templates/scripts/__pycache__/check_convergence.cpython-312.pyc +0 -0
  103. package/templates/scripts/__pycache__/check_convergence.cpython-314.pyc +0 -0
  104. package/templates/scripts/__pycache__/checkpoint_manager.cpython-312.pyc +0 -0
  105. package/templates/scripts/__pycache__/checkpoint_manager.cpython-314.pyc +0 -0
  106. package/templates/scripts/__pycache__/citation_manager.cpython-312.pyc +0 -0
  107. package/templates/scripts/__pycache__/citation_manager.cpython-314.pyc +0 -0
  108. package/templates/scripts/__pycache__/cost_frontier.cpython-312.pyc +0 -0
  109. package/templates/scripts/__pycache__/cost_frontier.cpython-314.pyc +0 -0
  110. package/templates/scripts/__pycache__/counterfactual_explanation.cpython-312.pyc +0 -0
  111. package/templates/scripts/__pycache__/counterfactual_explanation.cpython-314.pyc +0 -0
  112. package/templates/scripts/__pycache__/critique_hypothesis.cpython-312.pyc +0 -0
  113. package/templates/scripts/__pycache__/critique_hypothesis.cpython-314.pyc +0 -0
  114. package/templates/scripts/__pycache__/curriculum_optimizer.cpython-312.pyc +0 -0
  115. package/templates/scripts/__pycache__/curriculum_optimizer.cpython-314.pyc +0 -0
  116. package/templates/scripts/__pycache__/diagnose_errors.cpython-312.pyc +0 -0
  117. package/templates/scripts/__pycache__/diagnose_errors.cpython-314.pyc +0 -0
  118. package/templates/scripts/__pycache__/draft_paper_sections.cpython-312.pyc +0 -0
  119. package/templates/scripts/__pycache__/draft_paper_sections.cpython-314.pyc +0 -0
  120. package/templates/scripts/__pycache__/equivalence_checker.cpython-312.pyc +0 -0
  121. package/templates/scripts/__pycache__/equivalence_checker.cpython-314.pyc +0 -0
  122. package/templates/scripts/__pycache__/experiment_annotations.cpython-312.pyc +0 -0
  123. package/templates/scripts/__pycache__/experiment_annotations.cpython-314.pyc +0 -0
  124. package/templates/scripts/__pycache__/experiment_archive.cpython-312.pyc +0 -0
  125. package/templates/scripts/__pycache__/experiment_archive.cpython-314.pyc +0 -0
  126. package/templates/scripts/__pycache__/experiment_diff.cpython-312.pyc +0 -0
  127. package/templates/scripts/__pycache__/experiment_diff.cpython-314.pyc +0 -0
  128. package/templates/scripts/__pycache__/experiment_index.cpython-312.pyc +0 -0
  129. package/templates/scripts/__pycache__/experiment_index.cpython-314.pyc +0 -0
  130. package/templates/scripts/__pycache__/experiment_queue.cpython-312.pyc +0 -0
  131. package/templates/scripts/__pycache__/experiment_queue.cpython-314.pyc +0 -0
  132. package/templates/scripts/__pycache__/experiment_replay.cpython-312.pyc +0 -0
  133. package/templates/scripts/__pycache__/experiment_replay.cpython-314.pyc +0 -0
  134. package/templates/scripts/__pycache__/experiment_search.cpython-312.pyc +0 -0
  135. package/templates/scripts/__pycache__/experiment_search.cpython-314.pyc +0 -0
  136. package/templates/scripts/__pycache__/experiment_simulator.cpython-312.pyc +0 -0
  137. package/templates/scripts/__pycache__/experiment_simulator.cpython-314.pyc +0 -0
  138. package/templates/scripts/__pycache__/experiment_templates.cpython-312.pyc +0 -0
  139. package/templates/scripts/__pycache__/experiment_templates.cpython-314.pyc +0 -0
  140. package/templates/scripts/__pycache__/export_card.cpython-312.pyc +0 -0
  141. package/templates/scripts/__pycache__/export_card.cpython-314.pyc +0 -0
  142. package/templates/scripts/__pycache__/export_formats.cpython-312.pyc +0 -0
  143. package/templates/scripts/__pycache__/export_formats.cpython-314.pyc +0 -0
  144. package/templates/scripts/__pycache__/failure_postmortem.cpython-312.pyc +0 -0
  145. package/templates/scripts/__pycache__/failure_postmortem.cpython-314.pyc +0 -0
  146. package/templates/scripts/__pycache__/feature_intelligence.cpython-312.pyc +0 -0
  147. package/templates/scripts/__pycache__/feature_intelligence.cpython-314.pyc +0 -0
  148. package/templates/scripts/__pycache__/fork_experiment.cpython-312.pyc +0 -0
  149. package/templates/scripts/__pycache__/fork_experiment.cpython-314.pyc +0 -0
  150. package/templates/scripts/__pycache__/generate_baselines.cpython-312.pyc +0 -0
  151. package/templates/scripts/__pycache__/generate_baselines.cpython-314.pyc +0 -0
  152. package/templates/scripts/__pycache__/generate_brief.cpython-312.pyc +0 -0
  153. package/templates/scripts/__pycache__/generate_brief.cpython-314.pyc +0 -0
  154. package/templates/scripts/__pycache__/generate_changelog.cpython-312.pyc +0 -0
  155. package/templates/scripts/__pycache__/generate_changelog.cpython-314.pyc +0 -0
  156. package/templates/scripts/__pycache__/generate_figures.cpython-312.pyc +0 -0
  157. package/templates/scripts/__pycache__/generate_figures.cpython-314.pyc +0 -0
  158. package/templates/scripts/__pycache__/generate_logbook.cpython-312.pyc +0 -0
  159. package/templates/scripts/__pycache__/generate_logbook.cpython-314.pyc +0 -0
  160. package/templates/scripts/__pycache__/generate_model_card.cpython-312.pyc +0 -0
  161. package/templates/scripts/__pycache__/generate_model_card.cpython-314.pyc +0 -0
  162. package/templates/scripts/__pycache__/generate_onboarding.cpython-312.pyc +0 -0
  163. package/templates/scripts/__pycache__/generate_onboarding.cpython-314.pyc +0 -0
  164. package/templates/scripts/__pycache__/harness_doctor.cpython-312.pyc +0 -0
  165. package/templates/scripts/__pycache__/harness_doctor.cpython-314.pyc +0 -0
  166. package/templates/scripts/__pycache__/incremental_update.cpython-312.pyc +0 -0
  167. package/templates/scripts/__pycache__/incremental_update.cpython-314.pyc +0 -0
  168. package/templates/scripts/__pycache__/knowledge_transfer.cpython-312.pyc +0 -0
  169. package/templates/scripts/__pycache__/knowledge_transfer.cpython-314.pyc +0 -0
  170. package/templates/scripts/__pycache__/latency_benchmark.cpython-312.pyc +0 -0
  171. package/templates/scripts/__pycache__/latency_benchmark.cpython-314.pyc +0 -0
  172. package/templates/scripts/__pycache__/leakage_detector.cpython-312.pyc +0 -0
  173. package/templates/scripts/__pycache__/leakage_detector.cpython-314.pyc +0 -0
  174. package/templates/scripts/__pycache__/literature_search.cpython-312.pyc +0 -0
  175. package/templates/scripts/__pycache__/literature_search.cpython-314.pyc +0 -0
  176. package/templates/scripts/__pycache__/log_experiment.cpython-312.pyc +0 -0
  177. package/templates/scripts/__pycache__/log_experiment.cpython-314.pyc +0 -0
  178. package/templates/scripts/__pycache__/manage_hypotheses.cpython-312.pyc +0 -0
  179. package/templates/scripts/__pycache__/manage_hypotheses.cpython-314.pyc +0 -0
  180. package/templates/scripts/__pycache__/methodology_audit.cpython-312.pyc +0 -0
  181. package/templates/scripts/__pycache__/methodology_audit.cpython-314.pyc +0 -0
  182. package/templates/scripts/__pycache__/model_distiller.cpython-312.pyc +0 -0
  183. package/templates/scripts/__pycache__/model_distiller.cpython-314.pyc +0 -0
  184. package/templates/scripts/__pycache__/model_lifecycle.cpython-312.pyc +0 -0
  185. package/templates/scripts/__pycache__/model_lifecycle.cpython-314.pyc +0 -0
  186. package/templates/scripts/__pycache__/model_merger.cpython-312.pyc +0 -0
  187. package/templates/scripts/__pycache__/model_merger.cpython-314.pyc +0 -0
  188. package/templates/scripts/__pycache__/model_pruning.cpython-312.pyc +0 -0
  189. package/templates/scripts/__pycache__/model_pruning.cpython-314.pyc +0 -0
  190. package/templates/scripts/__pycache__/model_quantization.cpython-312.pyc +0 -0
  191. package/templates/scripts/__pycache__/model_quantization.cpython-314.pyc +0 -0
  192. package/templates/scripts/__pycache__/model_xray.cpython-312.pyc +0 -0
  193. package/templates/scripts/__pycache__/model_xray.cpython-314.pyc +0 -0
  194. package/templates/scripts/__pycache__/novelty_guard.cpython-312.pyc +0 -0
  195. package/templates/scripts/__pycache__/novelty_guard.cpython-314.pyc +0 -0
  196. package/templates/scripts/__pycache__/package_experiments.cpython-312.pyc +0 -0
  197. package/templates/scripts/__pycache__/package_experiments.cpython-314.pyc +0 -0
  198. package/templates/scripts/__pycache__/pareto_frontier.cpython-312.pyc +0 -0
  199. package/templates/scripts/__pycache__/pareto_frontier.cpython-314.pyc +0 -0
  200. package/templates/scripts/__pycache__/parse_metrics.cpython-312.pyc +0 -0
  201. package/templates/scripts/__pycache__/parse_metrics.cpython-314.pyc +0 -0
  202. package/templates/scripts/__pycache__/pipeline_manager.cpython-312.pyc +0 -0
  203. package/templates/scripts/__pycache__/pipeline_manager.cpython-314.pyc +0 -0
  204. package/templates/scripts/__pycache__/profile_training.cpython-312.pyc +0 -0
  205. package/templates/scripts/__pycache__/profile_training.cpython-314.pyc +0 -0
  206. package/templates/scripts/__pycache__/regression_gate.cpython-312.pyc +0 -0
  207. package/templates/scripts/__pycache__/regression_gate.cpython-314.pyc +0 -0
  208. package/templates/scripts/__pycache__/reproduce_experiment.cpython-312.pyc +0 -0
  209. package/templates/scripts/__pycache__/reproduce_experiment.cpython-314.pyc +0 -0
  210. package/templates/scripts/__pycache__/research_planner.cpython-312.pyc +0 -0
  211. package/templates/scripts/__pycache__/research_planner.cpython-314.pyc +0 -0
  212. package/templates/scripts/__pycache__/sanity_checks.cpython-312.pyc +0 -0
  213. package/templates/scripts/__pycache__/sanity_checks.cpython-314.pyc +0 -0
  214. package/templates/scripts/__pycache__/scaffold.cpython-312.pyc +0 -0
  215. package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  216. package/templates/scripts/__pycache__/scaling_estimator.cpython-312.pyc +0 -0
  217. package/templates/scripts/__pycache__/scaling_estimator.cpython-314.pyc +0 -0
  218. package/templates/scripts/__pycache__/seed_runner.cpython-312.pyc +0 -0
  219. package/templates/scripts/__pycache__/seed_runner.cpython-314.pyc +0 -0
  220. package/templates/scripts/__pycache__/sensitivity_analysis.cpython-312.pyc +0 -0
  221. package/templates/scripts/__pycache__/sensitivity_analysis.cpython-314.pyc +0 -0
  222. package/templates/scripts/__pycache__/session_flashback.cpython-312.pyc +0 -0
  223. package/templates/scripts/__pycache__/session_flashback.cpython-314.pyc +0 -0
  224. package/templates/scripts/__pycache__/show_experiment_tree.cpython-312.pyc +0 -0
  225. package/templates/scripts/__pycache__/show_experiment_tree.cpython-314.pyc +0 -0
  226. package/templates/scripts/__pycache__/show_families.cpython-312.pyc +0 -0
  227. package/templates/scripts/__pycache__/show_families.cpython-314.pyc +0 -0
  228. package/templates/scripts/__pycache__/simulate_review.cpython-312.pyc +0 -0
  229. package/templates/scripts/__pycache__/simulate_review.cpython-314.pyc +0 -0
  230. package/templates/scripts/__pycache__/smart_retry.cpython-312.pyc +0 -0
  231. package/templates/scripts/__pycache__/smart_retry.cpython-314.pyc +0 -0
  232. package/templates/scripts/__pycache__/statistical_compare.cpython-312.pyc +0 -0
  233. package/templates/scripts/__pycache__/statistical_compare.cpython-314.pyc +0 -0
  234. package/templates/scripts/__pycache__/suggest_next.cpython-312.pyc +0 -0
  235. package/templates/scripts/__pycache__/suggest_next.cpython-314.pyc +0 -0
  236. package/templates/scripts/__pycache__/sweep.cpython-312.pyc +0 -0
  237. package/templates/scripts/__pycache__/sweep.cpython-314.pyc +0 -0
  238. package/templates/scripts/__pycache__/synthesize_decision.cpython-312.pyc +0 -0
  239. package/templates/scripts/__pycache__/synthesize_decision.cpython-314.pyc +0 -0
  240. package/templates/scripts/__pycache__/training_monitor.cpython-312.pyc +0 -0
  241. package/templates/scripts/__pycache__/training_monitor.cpython-314.pyc +0 -0
  242. package/templates/scripts/__pycache__/treequest_suggest.cpython-312.pyc +0 -0
  243. package/templates/scripts/__pycache__/treequest_suggest.cpython-314.pyc +0 -0
  244. package/templates/scripts/__pycache__/trend_analysis.cpython-312.pyc +0 -0
  245. package/templates/scripts/__pycache__/trend_analysis.cpython-314.pyc +0 -0
  246. package/templates/scripts/__pycache__/turing_io.cpython-312.pyc +0 -0
  247. package/templates/scripts/__pycache__/turing_io.cpython-314.pyc +0 -0
  248. package/templates/scripts/__pycache__/update_state.cpython-312.pyc +0 -0
  249. package/templates/scripts/__pycache__/update_state.cpython-314.pyc +0 -0
  250. package/templates/scripts/__pycache__/verify_placeholders.cpython-312.pyc +0 -0
  251. package/templates/scripts/__pycache__/verify_placeholders.cpython-314.pyc +0 -0
  252. package/templates/scripts/__pycache__/warm_start.cpython-312.pyc +0 -0
  253. package/templates/scripts/__pycache__/warm_start.cpython-314.pyc +0 -0
  254. package/templates/scripts/__pycache__/whatif_engine.cpython-312.pyc +0 -0
  255. package/templates/scripts/__pycache__/whatif_engine.cpython-314.pyc +0 -0
@@ -0,0 +1,89 @@
1
+ ---
2
+ name: poster
3
+ description: Generate a single-page HTML research poster summarizing the experiment campaign — best result, trajectory, key findings, and methodology. Adapted from posterskill's self-contained HTML architecture.
4
+ disable-model-invocation: true
5
+ argument-hint: "[title override]"
6
+ allowed-tools: Read, Write, Edit, Bash(python scripts/*:*, source .venv/bin/activate:*, mkdir:*, open:*), Grep, Glob
7
+ ---
8
+
9
+ Generate a research poster summarizing the experiment campaign as a single self-contained HTML file. Adapted from [posterskill](https://github.com/ethanweber/posterskill)'s architecture — no build step, works when opened as `file://`.
10
+
11
+ ## Steps
12
+
13
+ ### 1. Gather Data
14
+
15
+ Read the experiment history and project context:
16
+
17
+ ```bash
18
+ cat config.yaml
19
+ source .venv/bin/activate && python scripts/generate_brief.py
20
+ source .venv/bin/activate && python scripts/show_metrics.py --last 20
21
+ cat experiment_state.yaml 2>/dev/null || true
22
+ cat RESEARCH_PLAN.md 2>/dev/null || true
23
+ ```
24
+
25
+ From this, extract:
26
+ - **Title:** from config task description (or `$ARGUMENTS` override)
27
+ - **Best result:** metric name, value, experiment ID
28
+ - **Improvement trajectory:** metric values over experiments
29
+ - **Key findings:** what model families worked, what didn't, what was surprising
30
+ - **Methodology:** the experiment loop, evaluation strategy, convergence criteria
31
+ - **Campaign stats:** total experiments, keep rate, time span
32
+
33
+ ### 2. Generate the Poster HTML
34
+
35
+ Create `poster/index.html` — a self-contained HTML file with:
36
+
37
+ ```bash
38
+ mkdir -p poster
39
+ ```
40
+
41
+ **Structure the poster with these cards:**
42
+
43
+ | Card | Content |
44
+ |------|---------|
45
+ | **Header** | Title, "Autonomous ML Research Campaign", date range, best metric badge |
46
+ | **Objective** | Task description and success criteria from config |
47
+ | **Methodology** | The autoresearch loop: hypothesize → train → evaluate → decide. Mention immutable evaluation, git-disciplined rollback |
48
+ | **Trajectory** | Chart.js line chart of metric progression (embed data inline) |
49
+ | **Best Configuration** | Model type, hyperparameters, metric values from best experiment |
50
+ | **Key Findings** | 3-5 bullet points: what worked, what didn't, surprises |
51
+ | **Explored Approaches** | Table of model families tried with keep rates |
52
+ | **Campaign Stats** | Total experiments, keep rate, human vs agent hypotheses, convergence |
53
+
54
+ **Design principles (from posterskill):**
55
+ - Single self-contained HTML file, CDN dependencies only (Chart.js, Google Fonts)
56
+ - Print-optimized CSS (`@media print`, `@page` with poster dimensions)
57
+ - Card-based layout with colored top borders
58
+ - Clean typography (system fonts or Nunito from Google Fonts)
59
+ - Data embedded directly in the HTML as JSON — no external file dependencies
60
+
61
+ **Poster dimensions:** Default A1 landscape (841mm x 594mm). The user can print to PDF from their browser.
62
+
63
+ ### 3. Self-Critique
64
+
65
+ Review the generated poster:
66
+ - Does the trajectory chart render correctly with the embedded data?
67
+ - Are the key findings specific and data-grounded (not generic)?
68
+ - Is the best configuration complete (model type + all relevant hyperparameters)?
69
+ - Would a collaborator understand the campaign from this single page?
70
+
71
+ Fix any issues found.
72
+
73
+ ### 4. Present
74
+
75
+ ```
76
+ Research poster generated at poster/index.html
77
+
78
+ Open in your browser to view. Print to PDF for sharing.
79
+ Best result: <metric>=<value> (<experiment_id>)
80
+ Campaign: <N> experiments, <keep_rate>% keep rate
81
+ ```
82
+
83
+ Suggest: "Open `poster/index.html` in your browser. Use Ctrl+P / Cmd+P to save as PDF."
84
+
85
+ ## Integration
86
+
87
+ - The poster reads from the same data sources as `/turing:brief` and `/turing:logbook`
88
+ - For a more detailed view, use `/turing:logbook` (full experiment-by-experiment narrative)
89
+ - For a quick summary, use `/turing:brief` (text-only intelligence report)
@@ -0,0 +1,28 @@
1
+ ---
2
+ name: postmortem
3
+ description: Failure postmortem — diagnose why experiments stopped improving and get actionable next steps.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--window 10] [--auto-trigger 5]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ When experiments stop improving, find out why. Diagnoses search space exhaustion, config errors, data issues, metric ceilings, and noise floors.
10
+
11
+ ## Steps
12
+ 1. `source .venv/bin/activate`
13
+ 2. `python scripts/failure_postmortem.py $ARGUMENTS`
14
+ 3. **Saved:** `experiments/postmortems/`
15
+
16
+ ## Diagnosis categories
17
+ - **Search space exhaustion:** micro-tuning params that don't matter
18
+ - **Systematic config error:** all experiments share a bad common config
19
+ - **Data issue:** all model types fail similarly
20
+ - **Metric ceiling:** near theoretical maximum
21
+ - **Noise floor:** improvements within seed variance
22
+
23
+ ## Examples
24
+ ```
25
+ /turing:postmortem
26
+ /turing:postmortem --window 15
27
+ /turing:postmortem --json
28
+ ```
@@ -0,0 +1,75 @@
1
+ ---
2
+ name: preflight
3
+ description: Pre-flight resource check — estimates VRAM, RAM, and disk requirements before running ML training. Compares against available system resources and issues PASS/WARN/FAIL verdict. Use before training to catch OOM errors before they happen.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--model-type torch] [--params 10M] [--batch-size 32]"
6
+ allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*, nvidia-smi:*), Grep, Glob
7
+ ---
8
+
9
+ Check whether the current system has enough resources to run the planned experiment.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Run preflight check:**
19
+
20
+ If `$ARGUMENTS` is empty (auto-detect from config.yaml):
21
+ ```bash
22
+ python scripts/preflight.py
23
+ ```
24
+
25
+ If `$ARGUMENTS` contains flags:
26
+ ```bash
27
+ python scripts/preflight.py $ARGUMENTS
28
+ ```
29
+
30
+ 3. **Interpret the verdict:**
31
+
32
+ - **PASS** — system has sufficient resources. Proceed with training.
33
+ - **WARN** — resources are tight. Training may succeed but could be slow or unstable. Present warnings to the user and ask whether to proceed.
34
+ - **FAIL** — training will likely fail (OOM, disk full, no GPU for GPU-required model). Present the specific resource gap and suggest mitigations:
35
+ - RAM too low: reduce dataset size, use chunked loading, or add swap
36
+ - VRAM too low: reduce batch size, use fp16/bf16, enable gradient checkpointing, or use a smaller model
37
+ - Disk too low: clean up old models/checkpoints
38
+ - No GPU: switch to a CPU-friendly model (XGBoost, LightGBM, sklearn)
39
+
40
+ 4. **If running before `/turing:train`:** report the verdict so the human can decide whether to proceed, adjust config, or choose a different model type.
41
+
42
+ ## Examples
43
+
44
+ ```bash
45
+ # Auto-detect from config.yaml (works for Turing projects)
46
+ /turing:preflight
47
+
48
+ # Check for a specific model type
49
+ /turing:preflight --model-type transformer --params 350M --batch-size 16 --precision fp16
50
+
51
+ # Check with a specific dataset
52
+ /turing:preflight --model-type xgboost --dataset data/train.csv
53
+
54
+ # JSON output for scripting
55
+ /turing:preflight --json
56
+ ```
57
+
58
+ ## What It Checks
59
+
60
+ | Resource | How estimated | Warning threshold |
61
+ |----------|--------------|-------------------|
62
+ | **RAM** | Dataset size (4x CSV on disk) + model memory (tree nodes or param count) | >90% of available |
63
+ | **VRAM** | Model params + gradients + optimizer state + activations | >80% of largest GPU |
64
+ | **Disk** | Model artifacts + dataset + checkpoints | >50% of free space |
65
+ | **GPU presence** | torch.cuda or nvidia-smi | Required for neural nets >1GB VRAM |
66
+
67
+ ## Model-Specific Estimates
68
+
69
+ | Model Type | RAM | VRAM | GPU Required? |
70
+ |-----------|-----|------|---------------|
71
+ | XGBoost/LightGBM | Trees + data (typically <4GB) | 0 | No |
72
+ | Random Forest | Trees + data (can be large) | 0 | No |
73
+ | Linear/Logistic | 2x data | 0 | No |
74
+ | MLP (small) | Data + params | Params x 4 (Adam) | If >1GB VRAM |
75
+ | Transformer | Data + params | Params x 4 + activations | Yes |
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: present
3
+ description: Presentation figure generation — training curves, comparison charts, ablation tables, Pareto plots, sensitivity heatmaps.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--figures training,comparison] [--style light|dark|poster]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Generate presentation-ready figure specifications from experiment data in seconds.
10
+
11
+ ## Steps
12
+ 1. **Activate environment:** `source .venv/bin/activate`
13
+ 2. **Run:** `python scripts/generate_figures.py $ARGUMENTS`
14
+ 3. **Figure types:** training, comparison, ablation, pareto, sensitivity
15
+ 4. **Styles:** light (papers), dark (demos), poster (large fonts)
16
+ 5. **Saved output:** `paper/figures/`
17
+
18
+ ## Examples
19
+ ```
20
+ /turing:present # All figures
21
+ /turing:present --figures training,comparison # Specific figures
22
+ /turing:present --style dark # Dark theme
23
+ ```
@@ -0,0 +1,43 @@
1
+ ---
2
+ name: profile
3
+ description: Profile a training run — timing breakdown, memory usage, throughput, bottleneck detection with actionable recommendations.
4
+ disable-model-invocation: true
5
+ argument-hint: "[exp-id] [--seed 42]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Profile a training run to identify performance bottlenecks.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - First argument can be an experiment ID (e.g., `exp-042`); defaults to best
20
+ - `--seed 42` sets the random seed for the profiling run
21
+
22
+ 3. **Run profiling:**
23
+ ```bash
24
+ python scripts/profile_training.py $ARGUMENTS
25
+ ```
26
+
27
+ 4. **Report results:**
28
+ - **Timing:** total time, training time, overhead breakdown
29
+ - **Memory:** peak RSS, Python peak, GPU peak (if applicable)
30
+ - **Throughput:** samples/sec
31
+ - **Bottleneck:** identified bottleneck type and severity
32
+ - **Recommendations:** actionable fixes for the detected bottleneck
33
+
34
+ 5. **Saved output:** results written to `experiments/profiles/exp-NNN-profile.yaml`
35
+
36
+ 6. **If no training pipeline exists:** suggest `/turing:init` first.
37
+
38
+ ## Examples
39
+
40
+ ```
41
+ /turing:profile # Profile best experiment config
42
+ /turing:profile exp-042 # Profile specific experiment
43
+ ```
@@ -0,0 +1,26 @@
1
+ ---
2
+ name: prune
3
+ description: Weight pruning — measure accuracy at different sparsity levels, find the knee point, produce a smaller/faster model.
4
+ disable-model-invocation: true
5
+ argument-hint: "<exp-id> [--sparsity 0.5,0.75,0.9] [--method magnitude|structured|lottery]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Remove redundant weights for faster inference and smaller models.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:** `source .venv/bin/activate`
14
+ 2. **Run:** `python scripts/model_pruning.py $ARGUMENTS`
15
+ 3. **Methods:** magnitude (zero small weights), structured (remove neurons), lottery (iterative with rewind)
16
+ 4. **For tree models:** progressively reduces n_estimators
17
+ 5. **Report:** sparsity sweep table, knee point, recommended sparsity
18
+ 6. **Saved output:** `experiments/pruning/<exp-id>-pruning.yaml`
19
+
20
+ ## Examples
21
+
22
+ ```
23
+ /turing:prune exp-042 # Default: magnitude, 5 levels
24
+ /turing:prune exp-042 --method structured # Remove entire neurons
25
+ /turing:prune exp-042 --sparsity 0.5,0.75,0.9 # Custom levels
26
+ ```
@@ -0,0 +1,24 @@
1
+ ---
2
+ name: quantize
3
+ description: Post-training quantization — FP32→INT8/FP16, measure accuracy loss, 2-4x speedup with <0.5% accuracy loss.
4
+ disable-model-invocation: true
5
+ argument-hint: "<exp-id> [--precision int8|fp16|dynamic]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Quantize for production. Lowest-effort optimization: 2-4x speedup, 2-4x memory reduction.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:** `source .venv/bin/activate`
14
+ 2. **Run:** `python scripts/model_quantization.py $ARGUMENTS`
15
+ 3. **Precision levels:** FP32 (baseline), FP16 (GPU), INT8 dynamic (simplest), INT8 static (best accuracy)
16
+ 4. **Report:** precision comparison table, recommended level, QAT suggestion if needed
17
+ 5. **Saved output:** `experiments/quantization/<exp-id>-quantization.yaml`
18
+
19
+ ## Examples
20
+
21
+ ```
22
+ /turing:quantize exp-042 # Compare all precision levels
23
+ /turing:quantize exp-042 --precision int8 # INT8 specifically
24
+ ```
@@ -0,0 +1,48 @@
1
+ ---
2
+ name: queue
3
+ description: Queue experiments for batch execution with priority ordering and dependency chains. Load the queue, walk away, read the summary.
4
+ disable-model-invocation: true
5
+ argument-hint: "<add|list|run|pause|clear> [description] [--priority high] [--after q-001]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Manage the experiment queue for unattended batch execution.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - **add** `"description"` `--priority high` `--after q-001` — queue an experiment
20
+ - **list** — show queue with status, priority, dependencies
21
+ - **run** `--halt-on-error` — execute all queued experiments
22
+ - **pause** — stop after current experiment finishes
23
+ - **clear** — discard all queued items
24
+
25
+ 3. **Run queue manager:**
26
+ ```bash
27
+ python scripts/experiment_queue.py $ARGUMENTS
28
+ ```
29
+
30
+ 4. **Report results by action:**
31
+ - **add:** confirms ID and priority
32
+ - **list:** table of queued/completed/failed items
33
+ - **run:** batch summary with per-experiment status
34
+ - **pause/clear:** confirmation message
35
+
36
+ 5. **Queue persists in** `experiments/queue.yaml`
37
+
38
+ ## Examples
39
+
40
+ ```
41
+ /turing:queue add "try LightGBM" --priority high
42
+ /turing:queue add "deeper trees" --after q-001
43
+ /turing:queue list
44
+ /turing:queue run
45
+ /turing:queue run --halt-on-error
46
+ /turing:queue pause
47
+ /turing:queue clear
48
+ ```
@@ -0,0 +1,31 @@
1
+ ---
2
+ name: registry
3
+ description: Model registry — track, promote, and govern the model lifecycle from candidate to production.
4
+ disable-model-invocation: true
5
+ argument-hint: "[list|register|promote|demote|archive|history] [exp-id] [stage]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Track which model is production, staging, candidate, or archived. Promotion requires passing gates.
10
+
11
+ ## Steps
12
+ 1. `source .venv/bin/activate`
13
+ 2. `python scripts/model_lifecycle.py $ARGUMENTS`
14
+ 3. **Registry:** `experiments/registry.yaml`
15
+
16
+ ## Promotion gates
17
+ - **candidate → staging:** regression check + seed study must PASS
18
+ - **staging → production:** audit + calibration check must PASS
19
+ - Use `--force` to skip gate checks
20
+
21
+ ## Examples
22
+ ```
23
+ /turing:registry list
24
+ /turing:registry register exp-095 --version v4.1
25
+ /turing:registry promote exp-089 staging
26
+ /turing:registry promote exp-089 production --force
27
+ /turing:registry demote exp-078 staging --reason "latency regression"
28
+ /turing:registry archive exp-042 --reason "superseded by v4"
29
+ /turing:registry history
30
+ /turing:registry history exp-089
31
+ ```
@@ -0,0 +1,53 @@
1
+ ---
2
+ name: regress
3
+ description: Performance regression gate — re-run best experiment after code/dependency changes and verify metrics haven't degraded.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--tolerance 0.01] [--against exp-id] [--quick]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ CI for your model. After any change to code, dependencies, or data, verify metrics haven't silently regressed.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - `--tolerance 0.01` sets the relative tolerance (default 1%)
20
+ - `--against exp-042` checks against a specific experiment (default: best)
21
+ - `--quick` runs 1 seed instead of 3 for fast checks
22
+ - `--runs 5` sets number of regression runs (default 3)
23
+ - `--json` outputs raw JSON
24
+
25
+ 3. **Run regression gate:**
26
+ ```bash
27
+ python scripts/regression_gate.py $ARGUMENTS
28
+ ```
29
+
30
+ 4. **Report results:**
31
+ - **PASS:** all metrics within tolerance — no regression
32
+ - **WARNING:** some metrics degraded within 2x tolerance — investigate
33
+ - **FAIL:** REGRESSION DETECTED — at least one metric degraded beyond tolerance
34
+ - Shows per-metric comparison with deltas and relative differences
35
+ - Shows environment diff if library versions changed (may explain regression)
36
+
37
+ 5. **Saved output:** report written to `experiments/regressions/check-YYYY-MM-DD.yaml`
38
+
39
+ 6. **If no experiments exist:** suggest running `/turing:train` first.
40
+
41
+ 7. **On FAIL verdict:** suggest investigating with:
42
+ - `/turing:diff <baseline> <latest>` to see what changed
43
+ - `pip freeze` comparison to identify library version changes
44
+ - `git diff` to review code changes
45
+
46
+ ## Examples
47
+
48
+ ```
49
+ /turing:regress # Default: check best, 1% tolerance, 3 runs
50
+ /turing:regress --quick # Fast check: 1 run
51
+ /turing:regress --against exp-042 # Check specific experiment
52
+ /turing:regress --tolerance 0.005 --runs 5 # Strict: 0.5% tolerance, 5 runs
53
+ ```
@@ -0,0 +1,23 @@
1
+ ---
2
+ name: replay
3
+ description: Experiment replay — re-run a historical experiment with current infrastructure to test if old approaches do better now.
4
+ disable-model-invocation: true
5
+ argument-hint: "<exp-id> [--with-current-data] [--with-current-preprocessing]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Should you revisit old ideas? Infrastructure changes may make failed approaches work now.
10
+
11
+ ## Steps
12
+ 1. **Activate environment:** `source .venv/bin/activate`
13
+ 2. **Run:** `python scripts/experiment_replay.py $ARGUMENTS`
14
+ 3. **Modes:** default (current code+data), --with-current-data, --with-current-preprocessing
15
+ 4. **Report:** original vs replayed metrics, delta, verdict
16
+ 5. **Saved output:** `experiments/replays/`
17
+
18
+ ## Examples
19
+ ```
20
+ /turing:replay exp-023 # Replay with current infrastructure
21
+ /turing:replay exp-023 --with-current-data # Current data, old code
22
+ /turing:replay --list # List replayable experiments
23
+ ```
@@ -0,0 +1,97 @@
1
+ ---
2
+ name: report
3
+ description: Generate a markdown research report from experiment history — structured for sharing, archiving, or including in documentation. More detailed than a brief, less visual than a poster.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--since YYYY-MM-DD] [--output path]"
6
+ allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*, mkdir:*), Grep, Glob
7
+ ---
8
+
9
+ Generate a structured markdown research report summarizing the experiment campaign.
10
+
11
+ ## Steps
12
+
13
+ ### 1. Generate the Report
14
+
15
+ Use the logbook generator in markdown mode as the data backbone:
16
+
17
+ ```bash
18
+ source .venv/bin/activate && python scripts/generate_logbook.py --format markdown
19
+ ```
20
+
21
+ Also gather supplementary data:
22
+ ```bash
23
+ source .venv/bin/activate && python scripts/generate_brief.py
24
+ cat experiment_state.yaml 2>/dev/null || true
25
+ cat RESEARCH_PLAN.md 2>/dev/null || true
26
+ ```
27
+
28
+ ### 2. Enhance with Analysis
29
+
30
+ The logbook generator produces raw data. Enhance it with your analysis to create a proper report. Add these sections that the script doesn't generate:
31
+
32
+ - **Executive Summary** (2-3 sentences): What was the task? What's the best result? Is it good enough?
33
+ - **Approach:** Describe the methodology — autoresearch loop, evaluation strategy, search strategy used
34
+ - **Key Findings:** Synthesize patterns from the experiment log:
35
+ - Which model families outperformed others?
36
+ - What hyperparameter ranges work vs don't?
37
+ - Were there surprising results?
38
+ - What failure patterns emerged?
39
+ - **Recommendations:** Based on the findings, what should be tried next? What should be avoided?
40
+ - **Limitations:** What wasn't explored? What constraints affected the results?
41
+
42
+ ### 3. Output
43
+
44
+ If `$ARGUMENTS` contains `--output <path>`:
45
+ ```bash
46
+ mkdir -p $(dirname <path>)
47
+ ```
48
+ Write the report to the specified path.
49
+
50
+ Otherwise, display the report directly.
51
+
52
+ **Common usage:**
53
+ ```
54
+ /turing:report --output reports/campaign-v1.md
55
+ /turing:report --since 2026-03-15 --output reports/week-12.md
56
+ ```
57
+
58
+ ## Report Structure
59
+
60
+ ```markdown
61
+ # Research Report: <task description>
62
+ Generated: <date>
63
+
64
+ ## Executive Summary
65
+ <2-3 sentences>
66
+
67
+ ## Methodology
68
+ <approach, evaluation strategy, convergence criteria>
69
+
70
+ ## Campaign Summary
71
+ <table: experiments, keep rate, best metric, timespan>
72
+
73
+ ## Improvement Trajectory
74
+ <table: experiment-by-experiment metric progression>
75
+
76
+ ## Key Findings
77
+ <synthesized patterns from experiment history>
78
+
79
+ ## Model Comparison
80
+ <table: model families, experiments per family, best metric, keep rate>
81
+
82
+ ## Hypothesis Analysis
83
+ <what was proposed, by whom, what worked>
84
+
85
+ ## Recommendations
86
+ <concrete next steps>
87
+
88
+ ## Limitations
89
+ <what wasn't tried, constraints>
90
+ ```
91
+
92
+ ## When to Use
93
+
94
+ - End of a research campaign for archiving
95
+ - Before a team review or status update
96
+ - To document findings for a paper or thesis
97
+ - To hand off a project to another researcher
@@ -0,0 +1,48 @@
1
+ ---
2
+ name: reproduce
3
+ description: Verify reproducibility of a specific experiment by re-running from logged config and checking metrics fall within tolerance.
4
+ disable-model-invocation: true
5
+ argument-hint: "<exp-id> [--tolerance 0.02] [--strict] [--runs 3]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Verify that a logged experiment can be reproduced with consistent results.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - First argument is the experiment ID (required), e.g. `exp-042`
20
+ - `--tolerance 0.02` sets the relative tolerance (default 2%)
21
+ - `--strict` requires exact float match (1e-6), overrides tolerance
22
+ - `--runs 3` sets number of reproduction runs (default 3, 1 for strict)
23
+
24
+ 3. **Run reproducibility verification:**
25
+ ```bash
26
+ python scripts/reproduce_experiment.py $ARGUMENTS
27
+ ```
28
+
29
+ 4. **Report results:**
30
+ - **reproducible:** metrics match exactly (deterministic algorithm)
31
+ - **approximately_reproducible:** metrics within tolerance or original falls in 95% CI
32
+ - **not_reproducible:** metrics outside tolerance and CI
33
+ - **environment_changed:** metrics diverge AND library versions differ
34
+ - Show environment diff if present (Python version, package versions)
35
+
36
+ 5. **Saved output:** report written to `experiments/reproductions/exp-NNN-repro.yaml`
37
+
38
+ 6. **If experiment ID not found:** list available experiment IDs from `experiments/log.jsonl`
39
+
40
+ 7. **If no training pipeline exists:** suggest `/turing:init` first.
41
+
42
+ ## Examples
43
+
44
+ ```
45
+ /turing:reproduce exp-042 # Default: 3 runs, 2% tolerance
46
+ /turing:reproduce exp-042 --strict # Exact match required
47
+ /turing:reproduce exp-042 --tolerance 0.05 --runs 5 # Lenient, more runs
48
+ ```
@@ -0,0 +1,41 @@
1
+ ---
2
+ name: retry
3
+ description: Smart failure recovery — auto-diagnose crash type and retry with targeted fix. OOM → halve batch. NaN → add clipping.
4
+ disable-model-invocation: true
5
+ argument-hint: "<exp-id> [--max-attempts 3]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Auto-diagnose and recover from experiment failures.
10
+
11
+ ## Steps
12
+
13
+ 1. **Activate environment:**
14
+ ```bash
15
+ source .venv/bin/activate
16
+ ```
17
+
18
+ 2. **Parse arguments from `$ARGUMENTS`:**
19
+ - First argument is the experiment ID (required)
20
+ - `--max-attempts 3` limits retry count
21
+ - `--classify "error text"` just classifies without retrying
22
+
23
+ 3. **Run smart retry:**
24
+ ```bash
25
+ python scripts/smart_retry.py $ARGUMENTS
26
+ ```
27
+
28
+ 4. **Report results:**
29
+ - **RECOVERED:** fix applied, retry succeeded
30
+ - **FAILED:** all retry attempts exhausted
31
+ - **MANUAL FIX NEEDED:** failure type requires human intervention
32
+ - Shows failure classification, fix applied, and attempt history
33
+
34
+ 5. **Saved output:** report written to `experiments/retries/exp-NNN-retry.yaml`
35
+
36
+ ## Examples
37
+
38
+ ```
39
+ /turing:retry exp-042 # Auto-diagnose and retry
40
+ /turing:retry exp-042 --max-attempts 5 # More retries
41
+ ```
@@ -0,0 +1,20 @@
1
+ ---
2
+ name: review
3
+ description: Peer review simulation — generate likely reviewer objections with severity ratings and fix commands.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--venue neurips|icml|general] [--harsh]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Simulate a conference reviewer before you submit. Each weakness links to the command that fixes it.
10
+
11
+ ## Steps
12
+ 1. `source .venv/bin/activate`
13
+ 2. `python scripts/simulate_review.py $ARGUMENTS`
14
+ 3. **Saved:** `experiments/reviews/`
15
+
16
+ ## Examples
17
+ ```
18
+ /turing:review
19
+ /turing:review --venue neurips --harsh
20
+ ```