claude-turing 4.7.0 → 4.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/.claude-plugin/plugin.json +2 -2
  2. package/README.md +1 -1
  3. package/agents/ml-evaluator.md +4 -4
  4. package/agents/ml-researcher.md +2 -2
  5. package/bin/turing-init.sh +2 -2
  6. package/commands/ablate.md +3 -4
  7. package/commands/annotate.md +2 -3
  8. package/commands/archive.md +2 -3
  9. package/commands/audit.md +3 -4
  10. package/commands/baseline.md +3 -4
  11. package/commands/brief.md +5 -6
  12. package/commands/budget.md +3 -4
  13. package/commands/calibrate.md +3 -4
  14. package/commands/card.md +3 -4
  15. package/commands/changelog.md +2 -3
  16. package/commands/checkpoint.md +3 -4
  17. package/commands/cite.md +2 -3
  18. package/commands/compare.md +1 -2
  19. package/commands/counterfactual.md +2 -3
  20. package/commands/curriculum.md +3 -4
  21. package/commands/design.md +3 -4
  22. package/commands/diagnose.md +4 -5
  23. package/commands/diff.md +3 -4
  24. package/commands/distill.md +3 -4
  25. package/commands/doctor.md +2 -3
  26. package/commands/ensemble.md +3 -4
  27. package/commands/explore.md +4 -5
  28. package/commands/export.md +3 -4
  29. package/commands/feature.md +3 -4
  30. package/commands/flashback.md +2 -3
  31. package/commands/fork.md +3 -4
  32. package/commands/frontier.md +3 -4
  33. package/commands/init.md +5 -6
  34. package/commands/leak.md +3 -4
  35. package/commands/lit.md +3 -4
  36. package/commands/logbook.md +5 -6
  37. package/commands/merge.md +2 -3
  38. package/commands/mode.md +1 -2
  39. package/commands/onboard.md +2 -3
  40. package/commands/paper.md +3 -4
  41. package/commands/plan.md +2 -3
  42. package/commands/poster.md +3 -4
  43. package/commands/postmortem.md +2 -3
  44. package/commands/preflight.md +5 -6
  45. package/commands/present.md +2 -3
  46. package/commands/profile.md +3 -4
  47. package/commands/prune.md +2 -3
  48. package/commands/quantize.md +2 -3
  49. package/commands/queue.md +3 -4
  50. package/commands/registry.md +2 -3
  51. package/commands/regress.md +3 -4
  52. package/commands/replay.md +2 -3
  53. package/commands/report.md +3 -4
  54. package/commands/reproduce.md +3 -4
  55. package/commands/retry.md +3 -4
  56. package/commands/review.md +2 -3
  57. package/commands/rules/loop-protocol.md +11 -11
  58. package/commands/sanity.md +3 -4
  59. package/commands/scale.md +4 -5
  60. package/commands/search.md +2 -3
  61. package/commands/seed.md +3 -4
  62. package/commands/sensitivity.md +3 -4
  63. package/commands/share.md +2 -3
  64. package/commands/simulate.md +2 -3
  65. package/commands/status.md +1 -2
  66. package/commands/stitch.md +3 -4
  67. package/commands/suggest.md +5 -6
  68. package/commands/surgery.md +2 -3
  69. package/commands/sweep.md +8 -9
  70. package/commands/template.md +2 -3
  71. package/commands/train.md +5 -6
  72. package/commands/transfer.md +3 -4
  73. package/commands/trend.md +2 -3
  74. package/commands/try.md +4 -5
  75. package/commands/turing.md +3 -3
  76. package/commands/update.md +2 -3
  77. package/commands/validate.md +4 -5
  78. package/commands/warm.md +3 -4
  79. package/commands/watch.md +4 -5
  80. package/commands/whatif.md +2 -3
  81. package/commands/xray.md +3 -4
  82. package/config/commands.yaml +75 -75
  83. package/package.json +3 -2
  84. package/skills/turing/SKILL.md +3 -3
  85. package/skills/turing/ablate/SKILL.md +3 -4
  86. package/skills/turing/annotate/SKILL.md +2 -3
  87. package/skills/turing/archive/SKILL.md +2 -3
  88. package/skills/turing/audit/SKILL.md +3 -4
  89. package/skills/turing/baseline/SKILL.md +3 -4
  90. package/skills/turing/brief/SKILL.md +5 -6
  91. package/skills/turing/budget/SKILL.md +3 -4
  92. package/skills/turing/calibrate/SKILL.md +3 -4
  93. package/skills/turing/card/SKILL.md +3 -4
  94. package/skills/turing/changelog/SKILL.md +2 -3
  95. package/skills/turing/checkpoint/SKILL.md +3 -4
  96. package/skills/turing/cite/SKILL.md +2 -3
  97. package/skills/turing/compare/SKILL.md +1 -2
  98. package/skills/turing/counterfactual/SKILL.md +2 -3
  99. package/skills/turing/curriculum/SKILL.md +3 -4
  100. package/skills/turing/design/SKILL.md +3 -4
  101. package/skills/turing/diagnose/SKILL.md +4 -5
  102. package/skills/turing/diff/SKILL.md +3 -4
  103. package/skills/turing/distill/SKILL.md +3 -4
  104. package/skills/turing/doctor/SKILL.md +2 -3
  105. package/skills/turing/ensemble/SKILL.md +3 -4
  106. package/skills/turing/explore/SKILL.md +4 -5
  107. package/skills/turing/export/SKILL.md +3 -4
  108. package/skills/turing/feature/SKILL.md +3 -4
  109. package/skills/turing/flashback/SKILL.md +2 -3
  110. package/skills/turing/fork/SKILL.md +3 -4
  111. package/skills/turing/frontier/SKILL.md +3 -4
  112. package/skills/turing/init/SKILL.md +5 -6
  113. package/skills/turing/leak/SKILL.md +3 -4
  114. package/skills/turing/lit/SKILL.md +3 -4
  115. package/skills/turing/logbook/SKILL.md +5 -6
  116. package/skills/turing/merge/SKILL.md +2 -3
  117. package/skills/turing/mode/SKILL.md +1 -2
  118. package/skills/turing/onboard/SKILL.md +2 -3
  119. package/skills/turing/paper/SKILL.md +3 -4
  120. package/skills/turing/plan/SKILL.md +2 -3
  121. package/skills/turing/poster/SKILL.md +3 -4
  122. package/skills/turing/postmortem/SKILL.md +2 -3
  123. package/skills/turing/preflight/SKILL.md +5 -6
  124. package/skills/turing/present/SKILL.md +2 -3
  125. package/skills/turing/profile/SKILL.md +3 -4
  126. package/skills/turing/prune/SKILL.md +2 -3
  127. package/skills/turing/quantize/SKILL.md +2 -3
  128. package/skills/turing/queue/SKILL.md +3 -4
  129. package/skills/turing/registry/SKILL.md +2 -3
  130. package/skills/turing/regress/SKILL.md +3 -4
  131. package/skills/turing/replay/SKILL.md +2 -3
  132. package/skills/turing/report/SKILL.md +3 -4
  133. package/skills/turing/reproduce/SKILL.md +3 -4
  134. package/skills/turing/retry/SKILL.md +3 -4
  135. package/skills/turing/review/SKILL.md +2 -3
  136. package/skills/turing/rules/loop-protocol.md +11 -11
  137. package/skills/turing/sanity/SKILL.md +3 -4
  138. package/skills/turing/scale/SKILL.md +4 -5
  139. package/skills/turing/search/SKILL.md +2 -3
  140. package/skills/turing/seed/SKILL.md +3 -4
  141. package/skills/turing/sensitivity/SKILL.md +3 -4
  142. package/skills/turing/share/SKILL.md +2 -3
  143. package/skills/turing/simulate/SKILL.md +2 -3
  144. package/skills/turing/status/SKILL.md +1 -2
  145. package/skills/turing/stitch/SKILL.md +3 -4
  146. package/skills/turing/suggest/SKILL.md +5 -6
  147. package/skills/turing/surgery/SKILL.md +2 -3
  148. package/skills/turing/sweep/SKILL.md +8 -9
  149. package/skills/turing/template/SKILL.md +2 -3
  150. package/skills/turing/train/SKILL.md +5 -6
  151. package/skills/turing/transfer/SKILL.md +3 -4
  152. package/skills/turing/trend/SKILL.md +2 -3
  153. package/skills/turing/try/SKILL.md +4 -5
  154. package/skills/turing/update/SKILL.md +2 -3
  155. package/skills/turing/validate/SKILL.md +4 -5
  156. package/skills/turing/warm/SKILL.md +3 -4
  157. package/skills/turing/watch/SKILL.md +4 -5
  158. package/skills/turing/whatif/SKILL.md +2 -3
  159. package/skills/turing/xray/SKILL.md +3 -4
  160. package/src/command-registry.js +12 -0
  161. package/src/install.js +4 -3
  162. package/src/sync-commands-layout.js +149 -0
  163. package/src/sync-skills-layout.js +4 -133
  164. package/templates/README.md +5 -8
  165. package/templates/program.md +18 -18
  166. package/templates/pyproject.toml +10 -0
  167. package/templates/requirements.txt +4 -1
  168. package/templates/scripts/generate_onboarding.py +1 -1
  169. package/templates/scripts/post-train-hook.sh +7 -8
  170. package/templates/scripts/scaffold.py +24 -26
  171. package/templates/scripts/stop-hook.sh +2 -3
  172. package/templates/scripts/turing-run-python.sh +9 -0
@@ -3,7 +3,7 @@ commands:
3
3
  description: "Run systematic ablation study \u2014 remove components one at a time, measure impact, produce publication-ready table with dead-weight flagging."
4
4
  lifecycle: analyze
5
5
  invocation_mode: slash_only
6
- model_invocation: disabled
6
+ model_invocation: enabled
7
7
  mutates_project: true
8
8
  tools:
9
9
  - Read
@@ -15,7 +15,7 @@ commands:
15
15
  description: "Retrospective experiment annotations \u2014 add human notes, tags, and context that automated metrics can't capture."
16
16
  lifecycle: record
17
17
  invocation_mode: slash_only
18
- model_invocation: disabled
18
+ model_invocation: enabled
19
19
  mutates_project: true
20
20
  tools:
21
21
  - Read
@@ -27,7 +27,7 @@ commands:
27
27
  description: "Experiment lifecycle cleanup \u2014 compress old artifacts, prune checkpoints, create queryable summary index. Reclaim disk space."
28
28
  lifecycle: manage
29
29
  invocation_mode: slash_only
30
- model_invocation: disabled
30
+ model_invocation: enabled
31
31
  mutates_project: true
32
32
  tools:
33
33
  - Read
@@ -39,7 +39,7 @@ commands:
39
39
  description: "Pre-submission methodology audit \u2014 catch data leakage, missing baselines, cherry-picked seeds, and incomplete ablations before a reviewer does."
40
40
  lifecycle: validate
41
41
  invocation_mode: slash_only
42
- model_invocation: disabled
42
+ model_invocation: enabled
43
43
  mutates_project: true
44
44
  tools:
45
45
  - Read
@@ -51,7 +51,7 @@ commands:
51
51
  description: "Automatic baseline generation \u2014 random, majority/mean, linear, k-NN baselines in 60 seconds. Every experiment needs a \"is this better than dumb?\" reference."
52
52
  lifecycle: analyze
53
53
  invocation_mode: slash_only
54
- model_invocation: disabled
54
+ model_invocation: enabled
55
55
  mutates_project: true
56
56
  tools:
57
57
  - Read
@@ -63,7 +63,7 @@ commands:
63
63
  description: "Generate a structured research intelligence report from experiment history \u2014 what's been learned, what's promising, what's exhausted, and what the human should consider next. Use --deep for literature-grounded suggestions."
64
64
  lifecycle: report
65
65
  invocation_mode: slash_only
66
- model_invocation: disabled
66
+ model_invocation: enabled
67
67
  mutates_project: true
68
68
  tools:
69
69
  - Read
@@ -77,7 +77,7 @@ commands:
77
77
  description: "Compute budget manager \u2014 set experiment/time limits, track allocation across explore/exploit phases, auto-shift modes, hard stop."
78
78
  lifecycle: manage
79
79
  invocation_mode: slash_only
80
- model_invocation: disabled
80
+ model_invocation: enabled
81
81
  mutates_project: true
82
82
  tools:
83
83
  - Read
@@ -89,7 +89,7 @@ commands:
89
89
  description: "Probability calibration \u2014 measure ECE, plot reliability diagrams, apply Platt scaling or isotonic regression."
90
90
  lifecycle: analyze
91
91
  invocation_mode: slash_only
92
- model_invocation: disabled
92
+ model_invocation: enabled
93
93
  mutates_project: true
94
94
  tools:
95
95
  - Read
@@ -101,7 +101,7 @@ commands:
101
101
  description: "Generate a standardized model card documenting the trained model \u2014 type, performance, training data, limitations, intended use, and artifact contract."
102
102
  lifecycle: document
103
103
  invocation_mode: slash_only
104
- model_invocation: disabled
104
+ model_invocation: enabled
105
105
  mutates_project: true
106
106
  tools:
107
107
  - Read
@@ -112,7 +112,7 @@ commands:
112
112
  description: "Model changelog generation \u2014 auto-generate human-readable progress narrative from experiment history for stakeholders."
113
113
  lifecycle: document
114
114
  invocation_mode: slash_only
115
- model_invocation: disabled
115
+ model_invocation: enabled
116
116
  mutates_project: true
117
117
  tools:
118
118
  - Read
@@ -124,7 +124,7 @@ commands:
124
124
  description: "Smart checkpoint management \u2014 list, prune (Pareto-based), average top-K, resume from any point, disk usage stats."
125
125
  lifecycle: check
126
126
  invocation_mode: slash_only
127
- model_invocation: disabled
127
+ model_invocation: enabled
128
128
  mutates_project: true
129
129
  tools:
130
130
  - Read
@@ -136,7 +136,7 @@ commands:
136
136
  description: "Citation & attribution manager \u2014 track papers, datasets, methods. Audit for missing citations, generate BibTeX."
137
137
  lifecycle: record
138
138
  invocation_mode: slash_only
139
- model_invocation: disabled
139
+ model_invocation: enabled
140
140
  mutates_project: true
141
141
  tools:
142
142
  - Read
@@ -148,7 +148,7 @@ commands:
148
148
  description: "Compare two ML experiment runs side-by-side \u2014 metrics, configuration deltas, and a verdict on which approach is more promising."
149
149
  lifecycle: analyze
150
150
  invocation_mode: slash_only
151
- model_invocation: disabled
151
+ model_invocation: enabled
152
152
  mutates_project: false
153
153
  tools:
154
154
  - Read
@@ -160,7 +160,7 @@ commands:
160
160
  description: "Input-level counterfactual explanations \u2014 find the smallest input change to flip a prediction."
161
161
  lifecycle: explain
162
162
  invocation_mode: slash_only
163
- model_invocation: disabled
163
+ model_invocation: enabled
164
164
  mutates_project: true
165
165
  tools:
166
166
  - Read
@@ -172,7 +172,7 @@ commands:
172
172
  description: "Training curriculum optimization \u2014 order data by difficulty, compare easy-to-hard vs hard-to-easy vs self-paced strategies."
173
173
  lifecycle: optimize
174
174
  invocation_mode: slash_only
175
- model_invocation: disabled
175
+ model_invocation: enabled
176
176
  mutates_project: true
177
177
  tools:
178
178
  - Read
@@ -184,7 +184,7 @@ commands:
184
184
  description: Generate a structured experiment design for a hypothesis. Reads experiment history, searches literature for methodology, produces a scored design document at experiments/designs/.
185
185
  lifecycle: design
186
186
  invocation_mode: slash_only
187
- model_invocation: disabled
187
+ model_invocation: enabled
188
188
  mutates_project: true
189
189
  tools:
190
190
  - Read
@@ -199,7 +199,7 @@ commands:
199
199
  description: "Error analysis \u2014 cluster failure cases, identify systematic failure modes, and suggest targeted fixes with auto-queued hypotheses."
200
200
  lifecycle: analyze
201
201
  invocation_mode: slash_only
202
- model_invocation: disabled
202
+ model_invocation: enabled
203
203
  mutates_project: true
204
204
  tools:
205
205
  - Read
@@ -211,7 +211,7 @@ commands:
211
211
  description: "Deep experiment comparison \u2014 config diffs, metric significance, per-class regressions, training curve divergence, feature importance shifts."
212
212
  lifecycle: analyze
213
213
  invocation_mode: slash_only
214
- model_invocation: disabled
214
+ model_invocation: enabled
215
215
  mutates_project: true
216
216
  tools:
217
217
  - Read
@@ -223,7 +223,7 @@ commands:
223
223
  description: "Model compression via distillation \u2014 train a smaller student model to match a larger teacher's predictions."
224
224
  lifecycle: deploy
225
225
  invocation_mode: slash_only
226
- model_invocation: disabled
226
+ model_invocation: enabled
227
227
  mutates_project: true
228
228
  tools:
229
229
  - Read
@@ -235,7 +235,7 @@ commands:
235
235
  description: "Harness self-diagnosis \u2014 check environment, project, resources, and git state. Auto-fix common issues."
236
236
  lifecycle: check
237
237
  invocation_mode: slash_only
238
- model_invocation: disabled
238
+ model_invocation: enabled
239
239
  mutates_project: true
240
240
  tools:
241
241
  - Read
@@ -247,7 +247,7 @@ commands:
247
247
  description: "Automated ensemble construction \u2014 combines top-K models via voting, stacking, and blending for zero-cost improvement."
248
248
  lifecycle: compose
249
249
  invocation_mode: slash_only
250
- model_invocation: disabled
250
+ model_invocation: enabled
251
251
  mutates_project: true
252
252
  tools:
253
253
  - Read
@@ -259,7 +259,7 @@ commands:
259
259
  description: Tree-search-guided hypothesis exploration using AB-MCTS. Explores the space of experiment ideas as a search tree, scored by the critique engine. Discovers non-obvious refinement chains that linear suggestion cannot find.
260
260
  lifecycle: research
261
261
  invocation_mode: slash_only
262
- model_invocation: disabled
262
+ model_invocation: enabled
263
263
  mutates_project: true
264
264
  tools:
265
265
  - Read
@@ -275,7 +275,7 @@ commands:
275
275
  description: Export model to production format with equivalence verification, latency benchmarking, and deployment model card.
276
276
  lifecycle: deploy
277
277
  invocation_mode: slash_only
278
- model_invocation: disabled
278
+ model_invocation: enabled
279
279
  mutates_project: true
280
280
  tools:
281
281
  - Read
@@ -287,7 +287,7 @@ commands:
287
287
  description: "Automated feature selection \u2014 multi-method importance consensus, redundancy detection, and interaction feature generation."
288
288
  lifecycle: analyze
289
289
  invocation_mode: slash_only
290
- model_invocation: disabled
290
+ model_invocation: enabled
291
291
  mutates_project: true
292
292
  tools:
293
293
  - Read
@@ -299,7 +299,7 @@ commands:
299
299
  description: "Session context restoration \u2014 \"where was I?\" summary after days away. Current best, pending hypotheses, last session, annotations."
300
300
  lifecycle: recall
301
301
  invocation_mode: slash_only
302
- model_invocation: disabled
302
+ model_invocation: enabled
303
303
  mutates_project: true
304
304
  tools:
305
305
  - Read
@@ -311,7 +311,7 @@ commands:
311
311
  description: "Branch an experiment into parallel tracks \u2014 run both A and B, report the winner."
312
312
  lifecycle: orchestrate
313
313
  invocation_mode: slash_only
314
- model_invocation: disabled
314
+ model_invocation: enabled
315
315
  mutates_project: true
316
316
  tools:
317
317
  - Read
@@ -323,7 +323,7 @@ commands:
323
323
  description: "Visualize Pareto frontier across multiple objectives \u2014 answers \"which model is actually best?\" when there are tradeoffs."
324
324
  lifecycle: analyze
325
325
  invocation_mode: slash_only
326
- model_invocation: disabled
326
+ model_invocation: enabled
327
327
  mutates_project: true
328
328
  tools:
329
329
  - Read
@@ -332,10 +332,10 @@ commands:
332
332
  - Glob
333
333
  argument_hint: '[--metrics "accuracy,train_seconds,n_params"] [--ascii]'
334
334
  init:
335
- description: "Initialize a new ML project with the Turing autoresearch harness. Scaffolds the full experiment infrastructure \u2014 immutable evaluation pipeline, agent-editable training code, structured logging, convergence detection hooks, and a Python virtual environment. Use --plan to generate a research plan."
335
+ description: "Initialize a new ML project with the Turing autoresearch harness. Scaffolds the full experiment infrastructure \u2014 immutable evaluation pipeline, agent-editable training code, structured logging, convergence detection hooks, and a uv-managed Python environment. Use --plan to generate a research plan."
336
336
  lifecycle: setup
337
337
  invocation_mode: slash_only
338
- model_invocation: disabled
338
+ model_invocation: enabled
339
339
  mutates_project: true
340
340
  tools:
341
341
  - Read
@@ -351,7 +351,7 @@ commands:
351
351
  description: "Targeted leakage detection \u2014 probe for data leakage with single-feature tests, correlation checks, and train/test overlap detection."
352
352
  lifecycle: validate
353
353
  invocation_mode: slash_only
354
- model_invocation: disabled
354
+ model_invocation: enabled
355
355
  mutates_project: true
356
356
  tools:
357
357
  - Read
@@ -363,7 +363,7 @@ commands:
363
363
  description: "Literature search scoped to the current experiment domain \u2014 find papers, SOTA baselines, and related work without leaving the terminal."
364
364
  lifecycle: research
365
365
  invocation_mode: slash_only
366
- model_invocation: disabled
366
+ model_invocation: enabled
367
367
  mutates_project: true
368
368
  tools:
369
369
  - Read
@@ -376,7 +376,7 @@ commands:
376
376
  description: "Generate a research logbook showing the full experiment narrative \u2014 hypotheses proposed, experiments run, decisions made, and progress over time. Outputs HTML (with interactive chart) or markdown."
377
377
  lifecycle: document
378
378
  invocation_mode: slash_only
379
- model_invocation: disabled
379
+ model_invocation: enabled
380
380
  mutates_project: true
381
381
  tools:
382
382
  - Read
@@ -388,7 +388,7 @@ commands:
388
388
  description: "Model merging \u2014 average weights from multiple checkpoints into a single model (soups, TIES, DARE). Free accuracy, zero latency cost."
389
389
  lifecycle: compose
390
390
  invocation_mode: slash_only
391
- model_invocation: disabled
391
+ model_invocation: enabled
392
392
  mutates_project: true
393
393
  tools:
394
394
  - Read
@@ -400,7 +400,7 @@ commands:
400
400
  description: "Set the research strategy mode \u2014 explore (try new things), exploit (refine what works), or replicate (verify results). Drives novelty guard policy and agent behavior."
401
401
  lifecycle: strategy
402
402
  invocation_mode: slash_only
403
- model_invocation: disabled
403
+ model_invocation: enabled
404
404
  mutates_project: true
405
405
  tools:
406
406
  - Read
@@ -410,7 +410,7 @@ commands:
410
410
  description: "Project onboarding \u2014 generate a walkthrough for new collaborators. Task, history, decisions, next steps."
411
411
  lifecycle: document
412
412
  invocation_mode: slash_only
413
- model_invocation: disabled
413
+ model_invocation: enabled
414
414
  mutates_project: true
415
415
  tools:
416
416
  - Read
@@ -422,7 +422,7 @@ commands:
422
422
  description: Draft mechanical paper sections (setup, results, ablation, hyperparameters) from experiment logs. LaTeX and markdown output.
423
423
  lifecycle: document
424
424
  invocation_mode: slash_only
425
- model_invocation: disabled
425
+ model_invocation: enabled
426
426
  mutates_project: true
427
427
  tools:
428
428
  - Read
@@ -434,7 +434,7 @@ commands:
434
434
  description: "Research planning assistant \u2014 design a strategic experiment campaign with budget-aware ROI allocation."
435
435
  lifecycle: plan
436
436
  invocation_mode: slash_only
437
- model_invocation: disabled
437
+ model_invocation: enabled
438
438
  mutates_project: true
439
439
  tools:
440
440
  - Read
@@ -446,7 +446,7 @@ commands:
446
446
  description: "Generate a single-page HTML research poster summarizing the experiment campaign \u2014 best result, trajectory, key findings, and methodology. Adapted from posterskill's self-contained HTML architecture."
447
447
  lifecycle: document
448
448
  invocation_mode: slash_only
449
- model_invocation: disabled
449
+ model_invocation: enabled
450
450
  mutates_project: true
451
451
  tools:
452
452
  - Read
@@ -460,7 +460,7 @@ commands:
460
460
  description: "Failure postmortem \u2014 diagnose why experiments stopped improving and get actionable next steps."
461
461
  lifecycle: diagnose
462
462
  invocation_mode: slash_only
463
- model_invocation: disabled
463
+ model_invocation: enabled
464
464
  mutates_project: true
465
465
  tools:
466
466
  - Read
@@ -472,7 +472,7 @@ commands:
472
472
  description: "Pre-flight resource check \u2014 estimates VRAM, RAM, and disk requirements before running ML training. Compares against available system resources and issues PASS/WARN/FAIL verdict. Use before training to catch OOM errors before they happen."
473
473
  lifecycle: check
474
474
  invocation_mode: slash_only
475
- model_invocation: disabled
475
+ model_invocation: enabled
476
476
  mutates_project: false
477
477
  tools:
478
478
  - Read
@@ -484,7 +484,7 @@ commands:
484
484
  description: "Presentation figure generation \u2014 training curves, comparison charts, ablation tables, Pareto plots, sensitivity heatmaps."
485
485
  lifecycle: document
486
486
  invocation_mode: slash_only
487
- model_invocation: disabled
487
+ model_invocation: enabled
488
488
  mutates_project: true
489
489
  tools:
490
490
  - Read
@@ -496,7 +496,7 @@ commands:
496
496
  description: "Profile a training run \u2014 timing breakdown, memory usage, throughput, bottleneck detection with actionable recommendations."
497
497
  lifecycle: check
498
498
  invocation_mode: slash_only
499
- model_invocation: disabled
499
+ model_invocation: enabled
500
500
  mutates_project: true
501
501
  tools:
502
502
  - Read
@@ -508,7 +508,7 @@ commands:
508
508
  description: "Weight pruning \u2014 measure accuracy at different sparsity levels, find the knee point, produce a smaller/faster model."
509
509
  lifecycle: optimize
510
510
  invocation_mode: slash_only
511
- model_invocation: disabled
511
+ model_invocation: enabled
512
512
  mutates_project: true
513
513
  tools:
514
514
  - Read
@@ -520,7 +520,7 @@ commands:
520
520
  description: "Post-training quantization \u2014 FP32\u2192INT8/FP16, measure accuracy loss, 2-4x speedup with <0.5% accuracy loss."
521
521
  lifecycle: optimize
522
522
  invocation_mode: slash_only
523
- model_invocation: disabled
523
+ model_invocation: enabled
524
524
  mutates_project: true
525
525
  tools:
526
526
  - Read
@@ -532,7 +532,7 @@ commands:
532
532
  description: Queue experiments for batch execution with priority ordering and dependency chains. Load the queue, walk away, read the summary.
533
533
  lifecycle: orchestrate
534
534
  invocation_mode: slash_only
535
- model_invocation: disabled
535
+ model_invocation: enabled
536
536
  mutates_project: true
537
537
  tools:
538
538
  - Read
@@ -544,7 +544,7 @@ commands:
544
544
  description: "Model registry \u2014 track, promote, and govern the model lifecycle from candidate to production."
545
545
  lifecycle: govern
546
546
  invocation_mode: slash_only
547
- model_invocation: disabled
547
+ model_invocation: enabled
548
548
  mutates_project: true
549
549
  tools:
550
550
  - Read
@@ -556,7 +556,7 @@ commands:
556
556
  description: "Performance regression gate \u2014 re-run best experiment after code/dependency changes and verify metrics haven't degraded."
557
557
  lifecycle: validate
558
558
  invocation_mode: slash_only
559
- model_invocation: disabled
559
+ model_invocation: enabled
560
560
  mutates_project: true
561
561
  tools:
562
562
  - Read
@@ -568,7 +568,7 @@ commands:
568
568
  description: "Experiment replay \u2014 re-run a historical experiment with current infrastructure to test if old approaches do better now."
569
569
  lifecycle: validate
570
570
  invocation_mode: slash_only
571
- model_invocation: disabled
571
+ model_invocation: enabled
572
572
  mutates_project: true
573
573
  tools:
574
574
  - Read
@@ -580,7 +580,7 @@ commands:
580
580
  description: "Generate a markdown research report from experiment history \u2014 structured for sharing, archiving, or including in documentation. More detailed than a brief, less visual than a poster."
581
581
  lifecycle: document
582
582
  invocation_mode: slash_only
583
- model_invocation: disabled
583
+ model_invocation: enabled
584
584
  mutates_project: true
585
585
  tools:
586
586
  - Read
@@ -592,7 +592,7 @@ commands:
592
592
  description: Verify reproducibility of a specific experiment by re-running from logged config and checking metrics fall within tolerance.
593
593
  lifecycle: validate
594
594
  invocation_mode: slash_only
595
- model_invocation: disabled
595
+ model_invocation: enabled
596
596
  mutates_project: true
597
597
  tools:
598
598
  - Read
@@ -604,7 +604,7 @@ commands:
604
604
  description: "Smart failure recovery \u2014 auto-diagnose crash type and retry with targeted fix. OOM \u2192 halve batch. NaN \u2192 add clipping."
605
605
  lifecycle: orchestrate
606
606
  invocation_mode: slash_only
607
- model_invocation: disabled
607
+ model_invocation: enabled
608
608
  mutates_project: true
609
609
  tools:
610
610
  - Read
@@ -616,7 +616,7 @@ commands:
616
616
  description: "Peer review simulation \u2014 generate likely reviewer objections with severity ratings and fix commands."
617
617
  lifecycle: validate
618
618
  invocation_mode: slash_only
619
- model_invocation: disabled
619
+ model_invocation: enabled
620
620
  mutates_project: true
621
621
  tools:
622
622
  - Read
@@ -628,7 +628,7 @@ commands:
628
628
  description: "Pre-training sanity checks \u2014 catch broken data loaders, misconfigured losses, and dead gradients in 30 seconds before wasting hours."
629
629
  lifecycle: check
630
630
  invocation_mode: slash_only
631
- model_invocation: disabled
631
+ model_invocation: enabled
632
632
  mutates_project: true
633
633
  tools:
634
634
  - Read
@@ -640,7 +640,7 @@ commands:
640
640
  description: "Scaling law estimator \u2014 run small experiments at different sizes, fit a power law, and predict full-scale performance before committing compute."
641
641
  lifecycle: analyze
642
642
  invocation_mode: slash_only
643
- model_invocation: disabled
643
+ model_invocation: enabled
644
644
  mutates_project: true
645
645
  tools:
646
646
  - Read
@@ -652,7 +652,7 @@ commands:
652
652
  description: "Natural language experiment search \u2014 query with text + structured filters over 200+ experiments."
653
653
  lifecycle: query
654
654
  invocation_mode: slash_only
655
- model_invocation: disabled
655
+ model_invocation: enabled
656
656
  mutates_project: false
657
657
  tools:
658
658
  - Read
@@ -664,7 +664,7 @@ commands:
664
664
  description: Run multi-seed study on an experiment to compute mean/std/CI and flag seed-sensitive results. Prevents publishing lucky seeds.
665
665
  lifecycle: validate
666
666
  invocation_mode: slash_only
667
- model_invocation: disabled
667
+ model_invocation: enabled
668
668
  mutates_project: true
669
669
  tools:
670
670
  - Read
@@ -676,7 +676,7 @@ commands:
676
676
  description: "Hyperparameter sensitivity analysis \u2014 rank parameters by impact, identify which matter and which are noise."
677
677
  lifecycle: analyze
678
678
  invocation_mode: slash_only
679
- model_invocation: disabled
679
+ model_invocation: enabled
680
680
  mutates_project: true
681
681
  tools:
682
682
  - Read
@@ -688,7 +688,7 @@ commands:
688
688
  description: "Experiment packaging \u2014 portable archive with config, metrics, seed study, annotations, reproduction instructions."
689
689
  lifecycle: share
690
690
  invocation_mode: slash_only
691
- model_invocation: disabled
691
+ model_invocation: enabled
692
692
  mutates_project: true
693
693
  tools:
694
694
  - Read
@@ -700,7 +700,7 @@ commands:
700
700
  description: "Experiment outcome prediction \u2014 predict which configs will beat the current best before running them."
701
701
  lifecycle: predict
702
702
  invocation_mode: slash_only
703
- model_invocation: disabled
703
+ model_invocation: enabled
704
704
  mutates_project: true
705
705
  tools:
706
706
  - Read
@@ -712,7 +712,7 @@ commands:
712
712
  description: "Show current ML experiment status \u2014 best model, recent experiments, convergence state, and trend analysis. Delegates to @ml-evaluator for read-only safety."
713
713
  lifecycle: observe
714
714
  invocation_mode: slash_only
715
- model_invocation: disabled
715
+ model_invocation: enabled
716
716
  mutates_project: false
717
717
  tools:
718
718
  - Read
@@ -723,7 +723,7 @@ commands:
723
723
  description: "Pipeline composition \u2014 decompose ML pipelines into swappable stages. Show, swap, cache, and run stages independently."
724
724
  lifecycle: compose
725
725
  invocation_mode: slash_only
726
- model_invocation: disabled
726
+ model_invocation: enabled
727
727
  mutates_project: true
728
728
  tools:
729
729
  - Read
@@ -735,7 +735,7 @@ commands:
735
735
  description: "Literature-grounded model selection. Reads the ML task context, searches recent literature, and suggests model architectures worth trying \u2014 with citations. Suggestions are auto-queued as hypotheses."
736
736
  lifecycle: research
737
737
  invocation_mode: slash_only
738
- model_invocation: disabled
738
+ model_invocation: enabled
739
739
  mutates_project: true
740
740
  tools:
741
741
  - Read
@@ -753,7 +753,7 @@ commands:
753
753
  description: "Architecture modification \u2014 add/remove layers, widen/narrow, swap activations, inject skip connections. Specify what to change, system handles how."
754
754
  lifecycle: modify
755
755
  invocation_mode: slash_only
756
- model_invocation: disabled
756
+ model_invocation: enabled
757
757
  mutates_project: true
758
758
  tools:
759
759
  - Read
@@ -765,7 +765,7 @@ commands:
765
765
  description: Generate and run a systematic hyperparameter sweep. Computes the cartesian product of configured parameter ranges and processes the queue sequentially with full experiment logging.
766
766
  lifecycle: explore
767
767
  invocation_mode: slash_only
768
- model_invocation: disabled
768
+ model_invocation: enabled
769
769
  mutates_project: true
770
770
  tools:
771
771
  - Read
@@ -779,7 +779,7 @@ commands:
779
779
  description: "Experiment template library \u2014 save winning configs as reusable templates, apply to new projects."
780
780
  lifecycle: manage
781
781
  invocation_mode: slash_only
782
- model_invocation: disabled
782
+ model_invocation: enabled
783
783
  mutates_project: true
784
784
  tools:
785
785
  - Read
@@ -791,7 +791,7 @@ commands:
791
791
  description: "Run the autonomous ML experiment loop. Iteratively hypothesizes, trains, evaluates, and decides \u2014 keeping only improvements. Implements the autoresearch pattern with formal convergence detection and git-disciplined rollback."
792
792
  lifecycle: execute
793
793
  invocation_mode: slash_only
794
- model_invocation: disabled
794
+ model_invocation: enabled
795
795
  mutates_project: true
796
796
  tools:
797
797
  - Read
@@ -805,7 +805,7 @@ commands:
805
805
  description: "Cross-project knowledge transfer \u2014 find similar prior projects and surface what worked. Builds institutional ML memory."
806
806
  lifecycle: research
807
807
  invocation_mode: slash_only
808
- model_invocation: disabled
808
+ model_invocation: enabled
809
809
  mutates_project: true
810
810
  tools:
811
811
  - Read
@@ -817,7 +817,7 @@ commands:
817
817
  description: "Long-term trend analysis \u2014 improvement velocity, family ROI, diminishing returns detection, strategic research direction."
818
818
  lifecycle: analyze
819
819
  invocation_mode: slash_only
820
- model_invocation: disabled
820
+ model_invocation: enabled
821
821
  mutates_project: true
822
822
  tools:
823
823
  - Read
@@ -829,7 +829,7 @@ commands:
829
829
  description: "Inject a hypothesis into the agent's experiment queue. This is how research taste reaches the agent \u2014 the human selects which coins to flip, the agent flips them."
830
830
  lifecycle: steer
831
831
  invocation_mode: slash_only
832
- model_invocation: disabled
832
+ model_invocation: enabled
833
833
  mutates_project: true
834
834
  tools:
835
835
  - Read
@@ -846,7 +846,7 @@ commands:
846
846
  description: "Incremental model update \u2014 add new data without full retraining, with forgetting detection."
847
847
  lifecycle: update
848
848
  invocation_mode: slash_only
849
- model_invocation: disabled
849
+ model_invocation: enabled
850
850
  mutates_project: true
851
851
  tools:
852
852
  - Read
@@ -858,7 +858,7 @@ commands:
858
858
  description: Run stability validation on the current experiment configuration. Executes N runs to measure metric variance and auto-configures multi-run evaluation if variance is too high.
859
859
  lifecycle: validate
860
860
  invocation_mode: slash_only
861
- model_invocation: disabled
861
+ model_invocation: enabled
862
862
  mutates_project: true
863
863
  tools:
864
864
  - Read
@@ -870,7 +870,7 @@ commands:
870
870
  description: "Warm-start from a prior model \u2014 load checkpoint, optionally freeze layers, adjust learning rate, and continue training."
871
871
  lifecycle: compose
872
872
  invocation_mode: slash_only
873
- model_invocation: disabled
873
+ model_invocation: enabled
874
874
  mutates_project: true
875
875
  tools:
876
876
  - Read
@@ -882,7 +882,7 @@ commands:
882
882
  description: Live training monitor with early-warning alerts for loss spikes, NaN, overfitting, and metric plateaus.
883
883
  lifecycle: monitor
884
884
  invocation_mode: slash_only
885
- model_invocation: disabled
885
+ model_invocation: enabled
886
886
  mutates_project: true
887
887
  tools:
888
888
  - Read
@@ -894,7 +894,7 @@ commands:
894
894
  description: "What-if analysis \u2014 answer hypotheticals from existing experiment data without running new experiments."
895
895
  lifecycle: analyze
896
896
  invocation_mode: slash_only
897
- model_invocation: disabled
897
+ model_invocation: enabled
898
898
  mutates_project: true
899
899
  tools:
900
900
  - Read
@@ -906,7 +906,7 @@ commands:
906
906
  description: "Internal model diagnostics \u2014 gradient flow, dead neurons, activation stats, weight distributions, tree depth analysis."
907
907
  lifecycle: analyze
908
908
  invocation_mode: slash_only
909
- model_invocation: disabled
909
+ model_invocation: enabled
910
910
  mutates_project: true
911
911
  tools:
912
912
  - Read
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-turing",
3
- "version": "4.7.0",
3
+ "version": "4.8.1",
4
4
  "type": "module",
5
5
  "description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
6
6
  "bin": {
@@ -9,7 +9,8 @@
9
9
  },
10
10
  "scripts": {
11
11
  "postinstall": "node src/postinstall.js",
12
- "sync:skills": "node src/sync-skills-layout.js"
12
+ "sync:commands": "node src/sync-commands-layout.js",
13
+ "sync:skills": "node src/sync-commands-layout.js"
13
14
  },
14
15
  "files": [
15
16
  "bin/",
@@ -7,10 +7,10 @@ You are the Turing ML research router. Detect the user's intent and identify the
7
7
 
8
8
  ## Execution Contract
9
9
 
10
- Turing sub-commands are explicit slash-command skills. Current sub-commands are `slash_only` and use `disable-model-invocation: true`, so router handling must not claim model dispatch into those skills.
10
+ Turing sub-commands are slash-command skills that allow model invocation, so router handling may select the focused skill when the user's intent matches a sub-command.
11
11
 
12
- - If the user explicitly invokes `/turing:<cmd>`, Claude Code runtime handles that slash command.
13
- - If the user invokes `/turing` as a router and the detected command is `slash_only`, give the exact slash command to run.
12
+ - If the user explicitly invokes `/turing:<cmd>`, handle that focused sub-command directly.
13
+ - If the user invokes `/turing` as a router and the detected command is `slash_only`, route to the focused sub-command skill when appropriate.
14
14
  - If a command has a documented safe equivalent script, the assistant may execute those documented steps inline when safe and appropriate.
15
15
 
16
16
  ## Routing Table