claude-turing 4.7.0 → 4.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +1 -1
- package/commands/ablate.md +0 -1
- package/commands/annotate.md +0 -1
- package/commands/archive.md +0 -1
- package/commands/audit.md +0 -1
- package/commands/baseline.md +0 -1
- package/commands/brief.md +0 -1
- package/commands/budget.md +0 -1
- package/commands/calibrate.md +0 -1
- package/commands/card.md +0 -1
- package/commands/changelog.md +0 -1
- package/commands/checkpoint.md +0 -1
- package/commands/cite.md +0 -1
- package/commands/compare.md +0 -1
- package/commands/counterfactual.md +0 -1
- package/commands/curriculum.md +0 -1
- package/commands/design.md +0 -1
- package/commands/diagnose.md +0 -1
- package/commands/diff.md +0 -1
- package/commands/distill.md +0 -1
- package/commands/doctor.md +0 -1
- package/commands/ensemble.md +0 -1
- package/commands/explore.md +0 -1
- package/commands/export.md +0 -1
- package/commands/feature.md +0 -1
- package/commands/flashback.md +0 -1
- package/commands/fork.md +0 -1
- package/commands/frontier.md +0 -1
- package/commands/init.md +0 -1
- package/commands/leak.md +0 -1
- package/commands/lit.md +0 -1
- package/commands/logbook.md +0 -1
- package/commands/merge.md +0 -1
- package/commands/mode.md +0 -1
- package/commands/onboard.md +0 -1
- package/commands/paper.md +0 -1
- package/commands/plan.md +0 -1
- package/commands/poster.md +0 -1
- package/commands/postmortem.md +0 -1
- package/commands/preflight.md +0 -1
- package/commands/present.md +0 -1
- package/commands/profile.md +0 -1
- package/commands/prune.md +0 -1
- package/commands/quantize.md +0 -1
- package/commands/queue.md +0 -1
- package/commands/registry.md +0 -1
- package/commands/regress.md +0 -1
- package/commands/replay.md +0 -1
- package/commands/report.md +0 -1
- package/commands/reproduce.md +0 -1
- package/commands/retry.md +0 -1
- package/commands/review.md +0 -1
- package/commands/sanity.md +0 -1
- package/commands/scale.md +0 -1
- package/commands/search.md +0 -1
- package/commands/seed.md +0 -1
- package/commands/sensitivity.md +0 -1
- package/commands/share.md +0 -1
- package/commands/simulate.md +0 -1
- package/commands/status.md +0 -1
- package/commands/stitch.md +0 -1
- package/commands/suggest.md +0 -1
- package/commands/surgery.md +0 -1
- package/commands/sweep.md +0 -1
- package/commands/template.md +0 -1
- package/commands/train.md +0 -1
- package/commands/transfer.md +0 -1
- package/commands/trend.md +0 -1
- package/commands/try.md +0 -1
- package/commands/turing.md +3 -3
- package/commands/update.md +0 -1
- package/commands/validate.md +0 -1
- package/commands/warm.md +0 -1
- package/commands/watch.md +0 -1
- package/commands/whatif.md +0 -1
- package/commands/xray.md +0 -1
- package/config/commands.yaml +74 -74
- package/package.json +3 -2
- package/skills/turing/SKILL.md +3 -3
- package/skills/turing/ablate/SKILL.md +0 -1
- package/skills/turing/annotate/SKILL.md +0 -1
- package/skills/turing/archive/SKILL.md +0 -1
- package/skills/turing/audit/SKILL.md +0 -1
- package/skills/turing/baseline/SKILL.md +0 -1
- package/skills/turing/brief/SKILL.md +0 -1
- package/skills/turing/budget/SKILL.md +0 -1
- package/skills/turing/calibrate/SKILL.md +0 -1
- package/skills/turing/card/SKILL.md +0 -1
- package/skills/turing/changelog/SKILL.md +0 -1
- package/skills/turing/checkpoint/SKILL.md +0 -1
- package/skills/turing/cite/SKILL.md +0 -1
- package/skills/turing/compare/SKILL.md +0 -1
- package/skills/turing/counterfactual/SKILL.md +0 -1
- package/skills/turing/curriculum/SKILL.md +0 -1
- package/skills/turing/design/SKILL.md +0 -1
- package/skills/turing/diagnose/SKILL.md +0 -1
- package/skills/turing/diff/SKILL.md +0 -1
- package/skills/turing/distill/SKILL.md +0 -1
- package/skills/turing/doctor/SKILL.md +0 -1
- package/skills/turing/ensemble/SKILL.md +0 -1
- package/skills/turing/explore/SKILL.md +0 -1
- package/skills/turing/export/SKILL.md +0 -1
- package/skills/turing/feature/SKILL.md +0 -1
- package/skills/turing/flashback/SKILL.md +0 -1
- package/skills/turing/fork/SKILL.md +0 -1
- package/skills/turing/frontier/SKILL.md +0 -1
- package/skills/turing/init/SKILL.md +0 -1
- package/skills/turing/leak/SKILL.md +0 -1
- package/skills/turing/lit/SKILL.md +0 -1
- package/skills/turing/logbook/SKILL.md +0 -1
- package/skills/turing/merge/SKILL.md +0 -1
- package/skills/turing/mode/SKILL.md +0 -1
- package/skills/turing/onboard/SKILL.md +0 -1
- package/skills/turing/paper/SKILL.md +0 -1
- package/skills/turing/plan/SKILL.md +0 -1
- package/skills/turing/poster/SKILL.md +0 -1
- package/skills/turing/postmortem/SKILL.md +0 -1
- package/skills/turing/preflight/SKILL.md +0 -1
- package/skills/turing/present/SKILL.md +0 -1
- package/skills/turing/profile/SKILL.md +0 -1
- package/skills/turing/prune/SKILL.md +0 -1
- package/skills/turing/quantize/SKILL.md +0 -1
- package/skills/turing/queue/SKILL.md +0 -1
- package/skills/turing/registry/SKILL.md +0 -1
- package/skills/turing/regress/SKILL.md +0 -1
- package/skills/turing/replay/SKILL.md +0 -1
- package/skills/turing/report/SKILL.md +0 -1
- package/skills/turing/reproduce/SKILL.md +0 -1
- package/skills/turing/retry/SKILL.md +0 -1
- package/skills/turing/review/SKILL.md +0 -1
- package/skills/turing/sanity/SKILL.md +0 -1
- package/skills/turing/scale/SKILL.md +0 -1
- package/skills/turing/search/SKILL.md +0 -1
- package/skills/turing/seed/SKILL.md +0 -1
- package/skills/turing/sensitivity/SKILL.md +0 -1
- package/skills/turing/share/SKILL.md +0 -1
- package/skills/turing/simulate/SKILL.md +0 -1
- package/skills/turing/status/SKILL.md +0 -1
- package/skills/turing/stitch/SKILL.md +0 -1
- package/skills/turing/suggest/SKILL.md +0 -1
- package/skills/turing/surgery/SKILL.md +0 -1
- package/skills/turing/sweep/SKILL.md +0 -1
- package/skills/turing/template/SKILL.md +0 -1
- package/skills/turing/train/SKILL.md +0 -1
- package/skills/turing/transfer/SKILL.md +0 -1
- package/skills/turing/trend/SKILL.md +0 -1
- package/skills/turing/try/SKILL.md +0 -1
- package/skills/turing/update/SKILL.md +0 -1
- package/skills/turing/validate/SKILL.md +0 -1
- package/skills/turing/warm/SKILL.md +0 -1
- package/skills/turing/watch/SKILL.md +0 -1
- package/skills/turing/whatif/SKILL.md +0 -1
- package/skills/turing/xray/SKILL.md +0 -1
- package/src/command-registry.js +12 -0
- package/src/install.js +4 -3
- package/src/sync-commands-layout.js +149 -0
- package/src/sync-skills-layout.js +4 -133
package/config/commands.yaml
CHANGED
|
@@ -3,7 +3,7 @@ commands:
|
|
|
3
3
|
description: "Run systematic ablation study \u2014 remove components one at a time, measure impact, produce publication-ready table with dead-weight flagging."
|
|
4
4
|
lifecycle: analyze
|
|
5
5
|
invocation_mode: slash_only
|
|
6
|
-
model_invocation:
|
|
6
|
+
model_invocation: enabled
|
|
7
7
|
mutates_project: true
|
|
8
8
|
tools:
|
|
9
9
|
- Read
|
|
@@ -15,7 +15,7 @@ commands:
|
|
|
15
15
|
description: "Retrospective experiment annotations \u2014 add human notes, tags, and context that automated metrics can't capture."
|
|
16
16
|
lifecycle: record
|
|
17
17
|
invocation_mode: slash_only
|
|
18
|
-
model_invocation:
|
|
18
|
+
model_invocation: enabled
|
|
19
19
|
mutates_project: true
|
|
20
20
|
tools:
|
|
21
21
|
- Read
|
|
@@ -27,7 +27,7 @@ commands:
|
|
|
27
27
|
description: "Experiment lifecycle cleanup \u2014 compress old artifacts, prune checkpoints, create queryable summary index. Reclaim disk space."
|
|
28
28
|
lifecycle: manage
|
|
29
29
|
invocation_mode: slash_only
|
|
30
|
-
model_invocation:
|
|
30
|
+
model_invocation: enabled
|
|
31
31
|
mutates_project: true
|
|
32
32
|
tools:
|
|
33
33
|
- Read
|
|
@@ -39,7 +39,7 @@ commands:
|
|
|
39
39
|
description: "Pre-submission methodology audit \u2014 catch data leakage, missing baselines, cherry-picked seeds, and incomplete ablations before a reviewer does."
|
|
40
40
|
lifecycle: validate
|
|
41
41
|
invocation_mode: slash_only
|
|
42
|
-
model_invocation:
|
|
42
|
+
model_invocation: enabled
|
|
43
43
|
mutates_project: true
|
|
44
44
|
tools:
|
|
45
45
|
- Read
|
|
@@ -51,7 +51,7 @@ commands:
|
|
|
51
51
|
description: "Automatic baseline generation \u2014 random, majority/mean, linear, k-NN baselines in 60 seconds. Every experiment needs a \"is this better than dumb?\" reference."
|
|
52
52
|
lifecycle: analyze
|
|
53
53
|
invocation_mode: slash_only
|
|
54
|
-
model_invocation:
|
|
54
|
+
model_invocation: enabled
|
|
55
55
|
mutates_project: true
|
|
56
56
|
tools:
|
|
57
57
|
- Read
|
|
@@ -63,7 +63,7 @@ commands:
|
|
|
63
63
|
description: "Generate a structured research intelligence report from experiment history \u2014 what's been learned, what's promising, what's exhausted, and what the human should consider next. Use --deep for literature-grounded suggestions."
|
|
64
64
|
lifecycle: report
|
|
65
65
|
invocation_mode: slash_only
|
|
66
|
-
model_invocation:
|
|
66
|
+
model_invocation: enabled
|
|
67
67
|
mutates_project: true
|
|
68
68
|
tools:
|
|
69
69
|
- Read
|
|
@@ -77,7 +77,7 @@ commands:
|
|
|
77
77
|
description: "Compute budget manager \u2014 set experiment/time limits, track allocation across explore/exploit phases, auto-shift modes, hard stop."
|
|
78
78
|
lifecycle: manage
|
|
79
79
|
invocation_mode: slash_only
|
|
80
|
-
model_invocation:
|
|
80
|
+
model_invocation: enabled
|
|
81
81
|
mutates_project: true
|
|
82
82
|
tools:
|
|
83
83
|
- Read
|
|
@@ -89,7 +89,7 @@ commands:
|
|
|
89
89
|
description: "Probability calibration \u2014 measure ECE, plot reliability diagrams, apply Platt scaling or isotonic regression."
|
|
90
90
|
lifecycle: analyze
|
|
91
91
|
invocation_mode: slash_only
|
|
92
|
-
model_invocation:
|
|
92
|
+
model_invocation: enabled
|
|
93
93
|
mutates_project: true
|
|
94
94
|
tools:
|
|
95
95
|
- Read
|
|
@@ -101,7 +101,7 @@ commands:
|
|
|
101
101
|
description: "Generate a standardized model card documenting the trained model \u2014 type, performance, training data, limitations, intended use, and artifact contract."
|
|
102
102
|
lifecycle: document
|
|
103
103
|
invocation_mode: slash_only
|
|
104
|
-
model_invocation:
|
|
104
|
+
model_invocation: enabled
|
|
105
105
|
mutates_project: true
|
|
106
106
|
tools:
|
|
107
107
|
- Read
|
|
@@ -112,7 +112,7 @@ commands:
|
|
|
112
112
|
description: "Model changelog generation \u2014 auto-generate human-readable progress narrative from experiment history for stakeholders."
|
|
113
113
|
lifecycle: document
|
|
114
114
|
invocation_mode: slash_only
|
|
115
|
-
model_invocation:
|
|
115
|
+
model_invocation: enabled
|
|
116
116
|
mutates_project: true
|
|
117
117
|
tools:
|
|
118
118
|
- Read
|
|
@@ -124,7 +124,7 @@ commands:
|
|
|
124
124
|
description: "Smart checkpoint management \u2014 list, prune (Pareto-based), average top-K, resume from any point, disk usage stats."
|
|
125
125
|
lifecycle: check
|
|
126
126
|
invocation_mode: slash_only
|
|
127
|
-
model_invocation:
|
|
127
|
+
model_invocation: enabled
|
|
128
128
|
mutates_project: true
|
|
129
129
|
tools:
|
|
130
130
|
- Read
|
|
@@ -136,7 +136,7 @@ commands:
|
|
|
136
136
|
description: "Citation & attribution manager \u2014 track papers, datasets, methods. Audit for missing citations, generate BibTeX."
|
|
137
137
|
lifecycle: record
|
|
138
138
|
invocation_mode: slash_only
|
|
139
|
-
model_invocation:
|
|
139
|
+
model_invocation: enabled
|
|
140
140
|
mutates_project: true
|
|
141
141
|
tools:
|
|
142
142
|
- Read
|
|
@@ -148,7 +148,7 @@ commands:
|
|
|
148
148
|
description: "Compare two ML experiment runs side-by-side \u2014 metrics, configuration deltas, and a verdict on which approach is more promising."
|
|
149
149
|
lifecycle: analyze
|
|
150
150
|
invocation_mode: slash_only
|
|
151
|
-
model_invocation:
|
|
151
|
+
model_invocation: enabled
|
|
152
152
|
mutates_project: false
|
|
153
153
|
tools:
|
|
154
154
|
- Read
|
|
@@ -160,7 +160,7 @@ commands:
|
|
|
160
160
|
description: "Input-level counterfactual explanations \u2014 find the smallest input change to flip a prediction."
|
|
161
161
|
lifecycle: explain
|
|
162
162
|
invocation_mode: slash_only
|
|
163
|
-
model_invocation:
|
|
163
|
+
model_invocation: enabled
|
|
164
164
|
mutates_project: true
|
|
165
165
|
tools:
|
|
166
166
|
- Read
|
|
@@ -172,7 +172,7 @@ commands:
|
|
|
172
172
|
description: "Training curriculum optimization \u2014 order data by difficulty, compare easy-to-hard vs hard-to-easy vs self-paced strategies."
|
|
173
173
|
lifecycle: optimize
|
|
174
174
|
invocation_mode: slash_only
|
|
175
|
-
model_invocation:
|
|
175
|
+
model_invocation: enabled
|
|
176
176
|
mutates_project: true
|
|
177
177
|
tools:
|
|
178
178
|
- Read
|
|
@@ -184,7 +184,7 @@ commands:
|
|
|
184
184
|
description: Generate a structured experiment design for a hypothesis. Reads experiment history, searches literature for methodology, produces a scored design document at experiments/designs/.
|
|
185
185
|
lifecycle: design
|
|
186
186
|
invocation_mode: slash_only
|
|
187
|
-
model_invocation:
|
|
187
|
+
model_invocation: enabled
|
|
188
188
|
mutates_project: true
|
|
189
189
|
tools:
|
|
190
190
|
- Read
|
|
@@ -199,7 +199,7 @@ commands:
|
|
|
199
199
|
description: "Error analysis \u2014 cluster failure cases, identify systematic failure modes, and suggest targeted fixes with auto-queued hypotheses."
|
|
200
200
|
lifecycle: analyze
|
|
201
201
|
invocation_mode: slash_only
|
|
202
|
-
model_invocation:
|
|
202
|
+
model_invocation: enabled
|
|
203
203
|
mutates_project: true
|
|
204
204
|
tools:
|
|
205
205
|
- Read
|
|
@@ -211,7 +211,7 @@ commands:
|
|
|
211
211
|
description: "Deep experiment comparison \u2014 config diffs, metric significance, per-class regressions, training curve divergence, feature importance shifts."
|
|
212
212
|
lifecycle: analyze
|
|
213
213
|
invocation_mode: slash_only
|
|
214
|
-
model_invocation:
|
|
214
|
+
model_invocation: enabled
|
|
215
215
|
mutates_project: true
|
|
216
216
|
tools:
|
|
217
217
|
- Read
|
|
@@ -223,7 +223,7 @@ commands:
|
|
|
223
223
|
description: "Model compression via distillation \u2014 train a smaller student model to match a larger teacher's predictions."
|
|
224
224
|
lifecycle: deploy
|
|
225
225
|
invocation_mode: slash_only
|
|
226
|
-
model_invocation:
|
|
226
|
+
model_invocation: enabled
|
|
227
227
|
mutates_project: true
|
|
228
228
|
tools:
|
|
229
229
|
- Read
|
|
@@ -235,7 +235,7 @@ commands:
|
|
|
235
235
|
description: "Harness self-diagnosis \u2014 check environment, project, resources, and git state. Auto-fix common issues."
|
|
236
236
|
lifecycle: check
|
|
237
237
|
invocation_mode: slash_only
|
|
238
|
-
model_invocation:
|
|
238
|
+
model_invocation: enabled
|
|
239
239
|
mutates_project: true
|
|
240
240
|
tools:
|
|
241
241
|
- Read
|
|
@@ -247,7 +247,7 @@ commands:
|
|
|
247
247
|
description: "Automated ensemble construction \u2014 combines top-K models via voting, stacking, and blending for zero-cost improvement."
|
|
248
248
|
lifecycle: compose
|
|
249
249
|
invocation_mode: slash_only
|
|
250
|
-
model_invocation:
|
|
250
|
+
model_invocation: enabled
|
|
251
251
|
mutates_project: true
|
|
252
252
|
tools:
|
|
253
253
|
- Read
|
|
@@ -259,7 +259,7 @@ commands:
|
|
|
259
259
|
description: Tree-search-guided hypothesis exploration using AB-MCTS. Explores the space of experiment ideas as a search tree, scored by the critique engine. Discovers non-obvious refinement chains that linear suggestion cannot find.
|
|
260
260
|
lifecycle: research
|
|
261
261
|
invocation_mode: slash_only
|
|
262
|
-
model_invocation:
|
|
262
|
+
model_invocation: enabled
|
|
263
263
|
mutates_project: true
|
|
264
264
|
tools:
|
|
265
265
|
- Read
|
|
@@ -275,7 +275,7 @@ commands:
|
|
|
275
275
|
description: Export model to production format with equivalence verification, latency benchmarking, and deployment model card.
|
|
276
276
|
lifecycle: deploy
|
|
277
277
|
invocation_mode: slash_only
|
|
278
|
-
model_invocation:
|
|
278
|
+
model_invocation: enabled
|
|
279
279
|
mutates_project: true
|
|
280
280
|
tools:
|
|
281
281
|
- Read
|
|
@@ -287,7 +287,7 @@ commands:
|
|
|
287
287
|
description: "Automated feature selection \u2014 multi-method importance consensus, redundancy detection, and interaction feature generation."
|
|
288
288
|
lifecycle: analyze
|
|
289
289
|
invocation_mode: slash_only
|
|
290
|
-
model_invocation:
|
|
290
|
+
model_invocation: enabled
|
|
291
291
|
mutates_project: true
|
|
292
292
|
tools:
|
|
293
293
|
- Read
|
|
@@ -299,7 +299,7 @@ commands:
|
|
|
299
299
|
description: "Session context restoration \u2014 \"where was I?\" summary after days away. Current best, pending hypotheses, last session, annotations."
|
|
300
300
|
lifecycle: recall
|
|
301
301
|
invocation_mode: slash_only
|
|
302
|
-
model_invocation:
|
|
302
|
+
model_invocation: enabled
|
|
303
303
|
mutates_project: true
|
|
304
304
|
tools:
|
|
305
305
|
- Read
|
|
@@ -311,7 +311,7 @@ commands:
|
|
|
311
311
|
description: "Branch an experiment into parallel tracks \u2014 run both A and B, report the winner."
|
|
312
312
|
lifecycle: orchestrate
|
|
313
313
|
invocation_mode: slash_only
|
|
314
|
-
model_invocation:
|
|
314
|
+
model_invocation: enabled
|
|
315
315
|
mutates_project: true
|
|
316
316
|
tools:
|
|
317
317
|
- Read
|
|
@@ -323,7 +323,7 @@ commands:
|
|
|
323
323
|
description: "Visualize Pareto frontier across multiple objectives \u2014 answers \"which model is actually best?\" when there are tradeoffs."
|
|
324
324
|
lifecycle: analyze
|
|
325
325
|
invocation_mode: slash_only
|
|
326
|
-
model_invocation:
|
|
326
|
+
model_invocation: enabled
|
|
327
327
|
mutates_project: true
|
|
328
328
|
tools:
|
|
329
329
|
- Read
|
|
@@ -335,7 +335,7 @@ commands:
|
|
|
335
335
|
description: "Initialize a new ML project with the Turing autoresearch harness. Scaffolds the full experiment infrastructure \u2014 immutable evaluation pipeline, agent-editable training code, structured logging, convergence detection hooks, and a Python virtual environment. Use --plan to generate a research plan."
|
|
336
336
|
lifecycle: setup
|
|
337
337
|
invocation_mode: slash_only
|
|
338
|
-
model_invocation:
|
|
338
|
+
model_invocation: enabled
|
|
339
339
|
mutates_project: true
|
|
340
340
|
tools:
|
|
341
341
|
- Read
|
|
@@ -351,7 +351,7 @@ commands:
|
|
|
351
351
|
description: "Targeted leakage detection \u2014 probe for data leakage with single-feature tests, correlation checks, and train/test overlap detection."
|
|
352
352
|
lifecycle: validate
|
|
353
353
|
invocation_mode: slash_only
|
|
354
|
-
model_invocation:
|
|
354
|
+
model_invocation: enabled
|
|
355
355
|
mutates_project: true
|
|
356
356
|
tools:
|
|
357
357
|
- Read
|
|
@@ -363,7 +363,7 @@ commands:
|
|
|
363
363
|
description: "Literature search scoped to the current experiment domain \u2014 find papers, SOTA baselines, and related work without leaving the terminal."
|
|
364
364
|
lifecycle: research
|
|
365
365
|
invocation_mode: slash_only
|
|
366
|
-
model_invocation:
|
|
366
|
+
model_invocation: enabled
|
|
367
367
|
mutates_project: true
|
|
368
368
|
tools:
|
|
369
369
|
- Read
|
|
@@ -376,7 +376,7 @@ commands:
|
|
|
376
376
|
description: "Generate a research logbook showing the full experiment narrative \u2014 hypotheses proposed, experiments run, decisions made, and progress over time. Outputs HTML (with interactive chart) or markdown."
|
|
377
377
|
lifecycle: document
|
|
378
378
|
invocation_mode: slash_only
|
|
379
|
-
model_invocation:
|
|
379
|
+
model_invocation: enabled
|
|
380
380
|
mutates_project: true
|
|
381
381
|
tools:
|
|
382
382
|
- Read
|
|
@@ -388,7 +388,7 @@ commands:
|
|
|
388
388
|
description: "Model merging \u2014 average weights from multiple checkpoints into a single model (soups, TIES, DARE). Free accuracy, zero latency cost."
|
|
389
389
|
lifecycle: compose
|
|
390
390
|
invocation_mode: slash_only
|
|
391
|
-
model_invocation:
|
|
391
|
+
model_invocation: enabled
|
|
392
392
|
mutates_project: true
|
|
393
393
|
tools:
|
|
394
394
|
- Read
|
|
@@ -400,7 +400,7 @@ commands:
|
|
|
400
400
|
description: "Set the research strategy mode \u2014 explore (try new things), exploit (refine what works), or replicate (verify results). Drives novelty guard policy and agent behavior."
|
|
401
401
|
lifecycle: strategy
|
|
402
402
|
invocation_mode: slash_only
|
|
403
|
-
model_invocation:
|
|
403
|
+
model_invocation: enabled
|
|
404
404
|
mutates_project: true
|
|
405
405
|
tools:
|
|
406
406
|
- Read
|
|
@@ -410,7 +410,7 @@ commands:
|
|
|
410
410
|
description: "Project onboarding \u2014 generate a walkthrough for new collaborators. Task, history, decisions, next steps."
|
|
411
411
|
lifecycle: document
|
|
412
412
|
invocation_mode: slash_only
|
|
413
|
-
model_invocation:
|
|
413
|
+
model_invocation: enabled
|
|
414
414
|
mutates_project: true
|
|
415
415
|
tools:
|
|
416
416
|
- Read
|
|
@@ -422,7 +422,7 @@ commands:
|
|
|
422
422
|
description: Draft mechanical paper sections (setup, results, ablation, hyperparameters) from experiment logs. LaTeX and markdown output.
|
|
423
423
|
lifecycle: document
|
|
424
424
|
invocation_mode: slash_only
|
|
425
|
-
model_invocation:
|
|
425
|
+
model_invocation: enabled
|
|
426
426
|
mutates_project: true
|
|
427
427
|
tools:
|
|
428
428
|
- Read
|
|
@@ -434,7 +434,7 @@ commands:
|
|
|
434
434
|
description: "Research planning assistant \u2014 design a strategic experiment campaign with budget-aware ROI allocation."
|
|
435
435
|
lifecycle: plan
|
|
436
436
|
invocation_mode: slash_only
|
|
437
|
-
model_invocation:
|
|
437
|
+
model_invocation: enabled
|
|
438
438
|
mutates_project: true
|
|
439
439
|
tools:
|
|
440
440
|
- Read
|
|
@@ -446,7 +446,7 @@ commands:
|
|
|
446
446
|
description: "Generate a single-page HTML research poster summarizing the experiment campaign \u2014 best result, trajectory, key findings, and methodology. Adapted from posterskill's self-contained HTML architecture."
|
|
447
447
|
lifecycle: document
|
|
448
448
|
invocation_mode: slash_only
|
|
449
|
-
model_invocation:
|
|
449
|
+
model_invocation: enabled
|
|
450
450
|
mutates_project: true
|
|
451
451
|
tools:
|
|
452
452
|
- Read
|
|
@@ -460,7 +460,7 @@ commands:
|
|
|
460
460
|
description: "Failure postmortem \u2014 diagnose why experiments stopped improving and get actionable next steps."
|
|
461
461
|
lifecycle: diagnose
|
|
462
462
|
invocation_mode: slash_only
|
|
463
|
-
model_invocation:
|
|
463
|
+
model_invocation: enabled
|
|
464
464
|
mutates_project: true
|
|
465
465
|
tools:
|
|
466
466
|
- Read
|
|
@@ -472,7 +472,7 @@ commands:
|
|
|
472
472
|
description: "Pre-flight resource check \u2014 estimates VRAM, RAM, and disk requirements before running ML training. Compares against available system resources and issues PASS/WARN/FAIL verdict. Use before training to catch OOM errors before they happen."
|
|
473
473
|
lifecycle: check
|
|
474
474
|
invocation_mode: slash_only
|
|
475
|
-
model_invocation:
|
|
475
|
+
model_invocation: enabled
|
|
476
476
|
mutates_project: false
|
|
477
477
|
tools:
|
|
478
478
|
- Read
|
|
@@ -484,7 +484,7 @@ commands:
|
|
|
484
484
|
description: "Presentation figure generation \u2014 training curves, comparison charts, ablation tables, Pareto plots, sensitivity heatmaps."
|
|
485
485
|
lifecycle: document
|
|
486
486
|
invocation_mode: slash_only
|
|
487
|
-
model_invocation:
|
|
487
|
+
model_invocation: enabled
|
|
488
488
|
mutates_project: true
|
|
489
489
|
tools:
|
|
490
490
|
- Read
|
|
@@ -496,7 +496,7 @@ commands:
|
|
|
496
496
|
description: "Profile a training run \u2014 timing breakdown, memory usage, throughput, bottleneck detection with actionable recommendations."
|
|
497
497
|
lifecycle: check
|
|
498
498
|
invocation_mode: slash_only
|
|
499
|
-
model_invocation:
|
|
499
|
+
model_invocation: enabled
|
|
500
500
|
mutates_project: true
|
|
501
501
|
tools:
|
|
502
502
|
- Read
|
|
@@ -508,7 +508,7 @@ commands:
|
|
|
508
508
|
description: "Weight pruning \u2014 measure accuracy at different sparsity levels, find the knee point, produce a smaller/faster model."
|
|
509
509
|
lifecycle: optimize
|
|
510
510
|
invocation_mode: slash_only
|
|
511
|
-
model_invocation:
|
|
511
|
+
model_invocation: enabled
|
|
512
512
|
mutates_project: true
|
|
513
513
|
tools:
|
|
514
514
|
- Read
|
|
@@ -520,7 +520,7 @@ commands:
|
|
|
520
520
|
description: "Post-training quantization \u2014 FP32\u2192INT8/FP16, measure accuracy loss, 2-4x speedup with <0.5% accuracy loss."
|
|
521
521
|
lifecycle: optimize
|
|
522
522
|
invocation_mode: slash_only
|
|
523
|
-
model_invocation:
|
|
523
|
+
model_invocation: enabled
|
|
524
524
|
mutates_project: true
|
|
525
525
|
tools:
|
|
526
526
|
- Read
|
|
@@ -532,7 +532,7 @@ commands:
|
|
|
532
532
|
description: Queue experiments for batch execution with priority ordering and dependency chains. Load the queue, walk away, read the summary.
|
|
533
533
|
lifecycle: orchestrate
|
|
534
534
|
invocation_mode: slash_only
|
|
535
|
-
model_invocation:
|
|
535
|
+
model_invocation: enabled
|
|
536
536
|
mutates_project: true
|
|
537
537
|
tools:
|
|
538
538
|
- Read
|
|
@@ -544,7 +544,7 @@ commands:
|
|
|
544
544
|
description: "Model registry \u2014 track, promote, and govern the model lifecycle from candidate to production."
|
|
545
545
|
lifecycle: govern
|
|
546
546
|
invocation_mode: slash_only
|
|
547
|
-
model_invocation:
|
|
547
|
+
model_invocation: enabled
|
|
548
548
|
mutates_project: true
|
|
549
549
|
tools:
|
|
550
550
|
- Read
|
|
@@ -556,7 +556,7 @@ commands:
|
|
|
556
556
|
description: "Performance regression gate \u2014 re-run best experiment after code/dependency changes and verify metrics haven't degraded."
|
|
557
557
|
lifecycle: validate
|
|
558
558
|
invocation_mode: slash_only
|
|
559
|
-
model_invocation:
|
|
559
|
+
model_invocation: enabled
|
|
560
560
|
mutates_project: true
|
|
561
561
|
tools:
|
|
562
562
|
- Read
|
|
@@ -568,7 +568,7 @@ commands:
|
|
|
568
568
|
description: "Experiment replay \u2014 re-run a historical experiment with current infrastructure to test if old approaches do better now."
|
|
569
569
|
lifecycle: validate
|
|
570
570
|
invocation_mode: slash_only
|
|
571
|
-
model_invocation:
|
|
571
|
+
model_invocation: enabled
|
|
572
572
|
mutates_project: true
|
|
573
573
|
tools:
|
|
574
574
|
- Read
|
|
@@ -580,7 +580,7 @@ commands:
|
|
|
580
580
|
description: "Generate a markdown research report from experiment history \u2014 structured for sharing, archiving, or including in documentation. More detailed than a brief, less visual than a poster."
|
|
581
581
|
lifecycle: document
|
|
582
582
|
invocation_mode: slash_only
|
|
583
|
-
model_invocation:
|
|
583
|
+
model_invocation: enabled
|
|
584
584
|
mutates_project: true
|
|
585
585
|
tools:
|
|
586
586
|
- Read
|
|
@@ -592,7 +592,7 @@ commands:
|
|
|
592
592
|
description: Verify reproducibility of a specific experiment by re-running from logged config and checking metrics fall within tolerance.
|
|
593
593
|
lifecycle: validate
|
|
594
594
|
invocation_mode: slash_only
|
|
595
|
-
model_invocation:
|
|
595
|
+
model_invocation: enabled
|
|
596
596
|
mutates_project: true
|
|
597
597
|
tools:
|
|
598
598
|
- Read
|
|
@@ -604,7 +604,7 @@ commands:
|
|
|
604
604
|
description: "Smart failure recovery \u2014 auto-diagnose crash type and retry with targeted fix. OOM \u2192 halve batch. NaN \u2192 add clipping."
|
|
605
605
|
lifecycle: orchestrate
|
|
606
606
|
invocation_mode: slash_only
|
|
607
|
-
model_invocation:
|
|
607
|
+
model_invocation: enabled
|
|
608
608
|
mutates_project: true
|
|
609
609
|
tools:
|
|
610
610
|
- Read
|
|
@@ -616,7 +616,7 @@ commands:
|
|
|
616
616
|
description: "Peer review simulation \u2014 generate likely reviewer objections with severity ratings and fix commands."
|
|
617
617
|
lifecycle: validate
|
|
618
618
|
invocation_mode: slash_only
|
|
619
|
-
model_invocation:
|
|
619
|
+
model_invocation: enabled
|
|
620
620
|
mutates_project: true
|
|
621
621
|
tools:
|
|
622
622
|
- Read
|
|
@@ -628,7 +628,7 @@ commands:
|
|
|
628
628
|
description: "Pre-training sanity checks \u2014 catch broken data loaders, misconfigured losses, and dead gradients in 30 seconds before wasting hours."
|
|
629
629
|
lifecycle: check
|
|
630
630
|
invocation_mode: slash_only
|
|
631
|
-
model_invocation:
|
|
631
|
+
model_invocation: enabled
|
|
632
632
|
mutates_project: true
|
|
633
633
|
tools:
|
|
634
634
|
- Read
|
|
@@ -640,7 +640,7 @@ commands:
|
|
|
640
640
|
description: "Scaling law estimator \u2014 run small experiments at different sizes, fit a power law, and predict full-scale performance before committing compute."
|
|
641
641
|
lifecycle: analyze
|
|
642
642
|
invocation_mode: slash_only
|
|
643
|
-
model_invocation:
|
|
643
|
+
model_invocation: enabled
|
|
644
644
|
mutates_project: true
|
|
645
645
|
tools:
|
|
646
646
|
- Read
|
|
@@ -652,7 +652,7 @@ commands:
|
|
|
652
652
|
description: "Natural language experiment search \u2014 query with text + structured filters over 200+ experiments."
|
|
653
653
|
lifecycle: query
|
|
654
654
|
invocation_mode: slash_only
|
|
655
|
-
model_invocation:
|
|
655
|
+
model_invocation: enabled
|
|
656
656
|
mutates_project: false
|
|
657
657
|
tools:
|
|
658
658
|
- Read
|
|
@@ -664,7 +664,7 @@ commands:
|
|
|
664
664
|
description: Run multi-seed study on an experiment to compute mean/std/CI and flag seed-sensitive results. Prevents publishing lucky seeds.
|
|
665
665
|
lifecycle: validate
|
|
666
666
|
invocation_mode: slash_only
|
|
667
|
-
model_invocation:
|
|
667
|
+
model_invocation: enabled
|
|
668
668
|
mutates_project: true
|
|
669
669
|
tools:
|
|
670
670
|
- Read
|
|
@@ -676,7 +676,7 @@ commands:
|
|
|
676
676
|
description: "Hyperparameter sensitivity analysis \u2014 rank parameters by impact, identify which matter and which are noise."
|
|
677
677
|
lifecycle: analyze
|
|
678
678
|
invocation_mode: slash_only
|
|
679
|
-
model_invocation:
|
|
679
|
+
model_invocation: enabled
|
|
680
680
|
mutates_project: true
|
|
681
681
|
tools:
|
|
682
682
|
- Read
|
|
@@ -688,7 +688,7 @@ commands:
|
|
|
688
688
|
description: "Experiment packaging \u2014 portable archive with config, metrics, seed study, annotations, reproduction instructions."
|
|
689
689
|
lifecycle: share
|
|
690
690
|
invocation_mode: slash_only
|
|
691
|
-
model_invocation:
|
|
691
|
+
model_invocation: enabled
|
|
692
692
|
mutates_project: true
|
|
693
693
|
tools:
|
|
694
694
|
- Read
|
|
@@ -700,7 +700,7 @@ commands:
|
|
|
700
700
|
description: "Experiment outcome prediction \u2014 predict which configs will beat the current best before running them."
|
|
701
701
|
lifecycle: predict
|
|
702
702
|
invocation_mode: slash_only
|
|
703
|
-
model_invocation:
|
|
703
|
+
model_invocation: enabled
|
|
704
704
|
mutates_project: true
|
|
705
705
|
tools:
|
|
706
706
|
- Read
|
|
@@ -712,7 +712,7 @@ commands:
|
|
|
712
712
|
description: "Show current ML experiment status \u2014 best model, recent experiments, convergence state, and trend analysis. Delegates to @ml-evaluator for read-only safety."
|
|
713
713
|
lifecycle: observe
|
|
714
714
|
invocation_mode: slash_only
|
|
715
|
-
model_invocation:
|
|
715
|
+
model_invocation: enabled
|
|
716
716
|
mutates_project: false
|
|
717
717
|
tools:
|
|
718
718
|
- Read
|
|
@@ -723,7 +723,7 @@ commands:
|
|
|
723
723
|
description: "Pipeline composition \u2014 decompose ML pipelines into swappable stages. Show, swap, cache, and run stages independently."
|
|
724
724
|
lifecycle: compose
|
|
725
725
|
invocation_mode: slash_only
|
|
726
|
-
model_invocation:
|
|
726
|
+
model_invocation: enabled
|
|
727
727
|
mutates_project: true
|
|
728
728
|
tools:
|
|
729
729
|
- Read
|
|
@@ -735,7 +735,7 @@ commands:
|
|
|
735
735
|
description: "Literature-grounded model selection. Reads the ML task context, searches recent literature, and suggests model architectures worth trying \u2014 with citations. Suggestions are auto-queued as hypotheses."
|
|
736
736
|
lifecycle: research
|
|
737
737
|
invocation_mode: slash_only
|
|
738
|
-
model_invocation:
|
|
738
|
+
model_invocation: enabled
|
|
739
739
|
mutates_project: true
|
|
740
740
|
tools:
|
|
741
741
|
- Read
|
|
@@ -753,7 +753,7 @@ commands:
|
|
|
753
753
|
description: "Architecture modification \u2014 add/remove layers, widen/narrow, swap activations, inject skip connections. Specify what to change, system handles how."
|
|
754
754
|
lifecycle: modify
|
|
755
755
|
invocation_mode: slash_only
|
|
756
|
-
model_invocation:
|
|
756
|
+
model_invocation: enabled
|
|
757
757
|
mutates_project: true
|
|
758
758
|
tools:
|
|
759
759
|
- Read
|
|
@@ -765,7 +765,7 @@ commands:
|
|
|
765
765
|
description: Generate and run a systematic hyperparameter sweep. Computes the cartesian product of configured parameter ranges and processes the queue sequentially with full experiment logging.
|
|
766
766
|
lifecycle: explore
|
|
767
767
|
invocation_mode: slash_only
|
|
768
|
-
model_invocation:
|
|
768
|
+
model_invocation: enabled
|
|
769
769
|
mutates_project: true
|
|
770
770
|
tools:
|
|
771
771
|
- Read
|
|
@@ -779,7 +779,7 @@ commands:
|
|
|
779
779
|
description: "Experiment template library \u2014 save winning configs as reusable templates, apply to new projects."
|
|
780
780
|
lifecycle: manage
|
|
781
781
|
invocation_mode: slash_only
|
|
782
|
-
model_invocation:
|
|
782
|
+
model_invocation: enabled
|
|
783
783
|
mutates_project: true
|
|
784
784
|
tools:
|
|
785
785
|
- Read
|
|
@@ -791,7 +791,7 @@ commands:
|
|
|
791
791
|
description: "Run the autonomous ML experiment loop. Iteratively hypothesizes, trains, evaluates, and decides \u2014 keeping only improvements. Implements the autoresearch pattern with formal convergence detection and git-disciplined rollback."
|
|
792
792
|
lifecycle: execute
|
|
793
793
|
invocation_mode: slash_only
|
|
794
|
-
model_invocation:
|
|
794
|
+
model_invocation: enabled
|
|
795
795
|
mutates_project: true
|
|
796
796
|
tools:
|
|
797
797
|
- Read
|
|
@@ -805,7 +805,7 @@ commands:
|
|
|
805
805
|
description: "Cross-project knowledge transfer \u2014 find similar prior projects and surface what worked. Builds institutional ML memory."
|
|
806
806
|
lifecycle: research
|
|
807
807
|
invocation_mode: slash_only
|
|
808
|
-
model_invocation:
|
|
808
|
+
model_invocation: enabled
|
|
809
809
|
mutates_project: true
|
|
810
810
|
tools:
|
|
811
811
|
- Read
|
|
@@ -817,7 +817,7 @@ commands:
|
|
|
817
817
|
description: "Long-term trend analysis \u2014 improvement velocity, family ROI, diminishing returns detection, strategic research direction."
|
|
818
818
|
lifecycle: analyze
|
|
819
819
|
invocation_mode: slash_only
|
|
820
|
-
model_invocation:
|
|
820
|
+
model_invocation: enabled
|
|
821
821
|
mutates_project: true
|
|
822
822
|
tools:
|
|
823
823
|
- Read
|
|
@@ -829,7 +829,7 @@ commands:
|
|
|
829
829
|
description: "Inject a hypothesis into the agent's experiment queue. This is how research taste reaches the agent \u2014 the human selects which coins to flip, the agent flips them."
|
|
830
830
|
lifecycle: steer
|
|
831
831
|
invocation_mode: slash_only
|
|
832
|
-
model_invocation:
|
|
832
|
+
model_invocation: enabled
|
|
833
833
|
mutates_project: true
|
|
834
834
|
tools:
|
|
835
835
|
- Read
|
|
@@ -846,7 +846,7 @@ commands:
|
|
|
846
846
|
description: "Incremental model update \u2014 add new data without full retraining, with forgetting detection."
|
|
847
847
|
lifecycle: update
|
|
848
848
|
invocation_mode: slash_only
|
|
849
|
-
model_invocation:
|
|
849
|
+
model_invocation: enabled
|
|
850
850
|
mutates_project: true
|
|
851
851
|
tools:
|
|
852
852
|
- Read
|
|
@@ -858,7 +858,7 @@ commands:
|
|
|
858
858
|
description: Run stability validation on the current experiment configuration. Executes N runs to measure metric variance and auto-configures multi-run evaluation if variance is too high.
|
|
859
859
|
lifecycle: validate
|
|
860
860
|
invocation_mode: slash_only
|
|
861
|
-
model_invocation:
|
|
861
|
+
model_invocation: enabled
|
|
862
862
|
mutates_project: true
|
|
863
863
|
tools:
|
|
864
864
|
- Read
|
|
@@ -870,7 +870,7 @@ commands:
|
|
|
870
870
|
description: "Warm-start from a prior model \u2014 load checkpoint, optionally freeze layers, adjust learning rate, and continue training."
|
|
871
871
|
lifecycle: compose
|
|
872
872
|
invocation_mode: slash_only
|
|
873
|
-
model_invocation:
|
|
873
|
+
model_invocation: enabled
|
|
874
874
|
mutates_project: true
|
|
875
875
|
tools:
|
|
876
876
|
- Read
|
|
@@ -882,7 +882,7 @@ commands:
|
|
|
882
882
|
description: Live training monitor with early-warning alerts for loss spikes, NaN, overfitting, and metric plateaus.
|
|
883
883
|
lifecycle: monitor
|
|
884
884
|
invocation_mode: slash_only
|
|
885
|
-
model_invocation:
|
|
885
|
+
model_invocation: enabled
|
|
886
886
|
mutates_project: true
|
|
887
887
|
tools:
|
|
888
888
|
- Read
|
|
@@ -894,7 +894,7 @@ commands:
|
|
|
894
894
|
description: "What-if analysis \u2014 answer hypotheticals from existing experiment data without running new experiments."
|
|
895
895
|
lifecycle: analyze
|
|
896
896
|
invocation_mode: slash_only
|
|
897
|
-
model_invocation:
|
|
897
|
+
model_invocation: enabled
|
|
898
898
|
mutates_project: true
|
|
899
899
|
tools:
|
|
900
900
|
- Read
|
|
@@ -906,7 +906,7 @@ commands:
|
|
|
906
906
|
description: "Internal model diagnostics \u2014 gradient flow, dead neurons, activation stats, weight distributions, tree depth analysis."
|
|
907
907
|
lifecycle: analyze
|
|
908
908
|
invocation_mode: slash_only
|
|
909
|
-
model_invocation:
|
|
909
|
+
model_invocation: enabled
|
|
910
910
|
mutates_project: true
|
|
911
911
|
tools:
|
|
912
912
|
- Read
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-turing",
|
|
3
|
-
"version": "4.
|
|
3
|
+
"version": "4.8.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
|
|
6
6
|
"bin": {
|
|
@@ -9,7 +9,8 @@
|
|
|
9
9
|
},
|
|
10
10
|
"scripts": {
|
|
11
11
|
"postinstall": "node src/postinstall.js",
|
|
12
|
-
"sync:
|
|
12
|
+
"sync:commands": "node src/sync-commands-layout.js",
|
|
13
|
+
"sync:skills": "node src/sync-commands-layout.js"
|
|
13
14
|
},
|
|
14
15
|
"files": [
|
|
15
16
|
"bin/",
|
package/skills/turing/SKILL.md
CHANGED
|
@@ -7,10 +7,10 @@ You are the Turing ML research router. Detect the user's intent and identify the
|
|
|
7
7
|
|
|
8
8
|
## Execution Contract
|
|
9
9
|
|
|
10
|
-
Turing sub-commands are
|
|
10
|
+
Turing sub-commands are slash-command skills that allow model invocation, so router handling may select the focused skill when the user's intent matches a sub-command.
|
|
11
11
|
|
|
12
|
-
- If the user explicitly invokes `/turing:<cmd>`,
|
|
13
|
-
- If the user invokes `/turing` as a router and the detected command is `slash_only`,
|
|
12
|
+
- If the user explicitly invokes `/turing:<cmd>`, handle that focused sub-command directly.
|
|
13
|
+
- If the user invokes `/turing` as a router and the detected command is `slash_only`, route to the focused sub-command skill when appropriate.
|
|
14
14
|
- If a command has a documented safe equivalent script, the assistant may execute those documented steps inline when safe and appropriate.
|
|
15
15
|
|
|
16
16
|
## Routing Table
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: ablate
|
|
3
3
|
description: Run systematic ablation study — remove components one at a time, measure impact, produce publication-ready table with dead-weight flagging.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[exp-id] [--components \"X,Y\"] [--seeds 3] [--latex]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|