claude-turing 4.5.0 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,928 @@
1
+ commands:
2
+ ablate:
3
+ description: "Run systematic ablation study \u2014 remove components one at a time, measure impact, produce publication-ready table with dead-weight flagging."
4
+ lifecycle: analyze
5
+ invocation_mode: slash_only
6
+ model_invocation: disabled
7
+ mutates_project: true
8
+ tools:
9
+ - Read
10
+ - Bash
11
+ - Grep
12
+ - Glob
13
+ argument_hint: '[exp-id] [--components "X,Y"] [--seeds 3] [--latex]'
14
+ annotate:
15
+ description: "Retrospective experiment annotations \u2014 add human notes, tags, and context that automated metrics can't capture."
16
+ lifecycle: record
17
+ invocation_mode: slash_only
18
+ model_invocation: disabled
19
+ mutates_project: true
20
+ tools:
21
+ - Read
22
+ - Bash
23
+ - Grep
24
+ - Glob
25
+ argument_hint: '<exp-id> "note" [--tag fragile] | --list | --search "keyword"'
26
+ archive:
27
+ description: "Experiment lifecycle cleanup \u2014 compress old artifacts, prune checkpoints, create queryable summary index. Reclaim disk space."
28
+ lifecycle: manage
29
+ invocation_mode: slash_only
30
+ model_invocation: disabled
31
+ mutates_project: true
32
+ tools:
33
+ - Read
34
+ - Bash
35
+ - Grep
36
+ - Glob
37
+ argument_hint: '[--older-than 30d] [--keep-best 10] [--dry-run]'
38
+ audit:
39
+ description: "Pre-submission methodology audit \u2014 catch data leakage, missing baselines, cherry-picked seeds, and incomplete ablations before a reviewer does."
40
+ lifecycle: validate
41
+ invocation_mode: slash_only
42
+ model_invocation: disabled
43
+ mutates_project: true
44
+ tools:
45
+ - Read
46
+ - Bash
47
+ - Grep
48
+ - Glob
49
+ argument_hint: '[--strict] [--checklist neurips]'
50
+ baseline:
51
+ description: "Automatic baseline generation \u2014 random, majority/mean, linear, k-NN baselines in 60 seconds. Every experiment needs a \"is this better than dumb?\" reference."
52
+ lifecycle: analyze
53
+ invocation_mode: slash_only
54
+ model_invocation: disabled
55
+ mutates_project: true
56
+ tools:
57
+ - Read
58
+ - Bash
59
+ - Grep
60
+ - Glob
61
+ argument_hint: '[--methods all|simple|linear] [--data data.npz]'
62
+ brief:
63
+ description: "Generate a structured research intelligence report from experiment history \u2014 what's been learned, what's promising, what's exhausted, and what the human should consider next. Use --deep for literature-grounded suggestions."
64
+ lifecycle: report
65
+ invocation_mode: slash_only
66
+ model_invocation: disabled
67
+ mutates_project: true
68
+ tools:
69
+ - Read
70
+ - Bash
71
+ - Grep
72
+ - Glob
73
+ - WebSearch
74
+ - WebFetch
75
+ argument_hint: '[ml/project] [--deep]'
76
+ budget:
77
+ description: "Compute budget manager \u2014 set experiment/time limits, track allocation across explore/exploit phases, auto-shift modes, hard stop."
78
+ lifecycle: manage
79
+ invocation_mode: slash_only
80
+ model_invocation: disabled
81
+ mutates_project: true
82
+ tools:
83
+ - Read
84
+ - Bash
85
+ - Grep
86
+ - Glob
87
+ argument_hint: <set|status|reset> [--experiments 50] [--hours 8]
88
+ calibrate:
89
+ description: "Probability calibration \u2014 measure ECE, plot reliability diagrams, apply Platt scaling or isotonic regression."
90
+ lifecycle: analyze
91
+ invocation_mode: slash_only
92
+ model_invocation: disabled
93
+ mutates_project: true
94
+ tools:
95
+ - Read
96
+ - Bash
97
+ - Grep
98
+ - Glob
99
+ argument_hint: '[exp-id] [--method platt|isotonic|temperature|auto]'
100
+ card:
101
+ description: "Generate a standardized model card documenting the trained model \u2014 type, performance, training data, limitations, intended use, and artifact contract."
102
+ lifecycle: document
103
+ invocation_mode: slash_only
104
+ model_invocation: disabled
105
+ mutates_project: true
106
+ tools:
107
+ - Read
108
+ - Bash
109
+ - Grep
110
+ - Glob
111
+ changelog:
112
+ description: "Model changelog generation \u2014 auto-generate human-readable progress narrative from experiment history for stakeholders."
113
+ lifecycle: document
114
+ invocation_mode: slash_only
115
+ model_invocation: disabled
116
+ mutates_project: true
117
+ tools:
118
+ - Read
119
+ - Bash
120
+ - Grep
121
+ - Glob
122
+ argument_hint: '[--since exp-id|date] [--audience technical|stakeholder]'
123
+ checkpoint:
124
+ description: "Smart checkpoint management \u2014 list, prune (Pareto-based), average top-K, resume from any point, disk usage stats."
125
+ lifecycle: check
126
+ invocation_mode: slash_only
127
+ model_invocation: disabled
128
+ mutates_project: true
129
+ tools:
130
+ - Read
131
+ - Bash
132
+ - Grep
133
+ - Glob
134
+ argument_hint: <list|prune|average|resume|stats> [exp-id] [--top 3] [--dry-run]
135
+ cite:
136
+ description: "Citation & attribution manager \u2014 track papers, datasets, methods. Audit for missing citations, generate BibTeX."
137
+ lifecycle: record
138
+ invocation_mode: slash_only
139
+ model_invocation: disabled
140
+ mutates_project: true
141
+ tools:
142
+ - Read
143
+ - Bash
144
+ - Grep
145
+ - Glob
146
+ argument_hint: <add|list|check|bib> [--key Chen2016 --title XGBoost --url ...]
147
+ compare:
148
+ description: "Compare two ML experiment runs side-by-side \u2014 metrics, configuration deltas, and a verdict on which approach is more promising."
149
+ lifecycle: analyze
150
+ invocation_mode: slash_only
151
+ model_invocation: disabled
152
+ mutates_project: false
153
+ tools:
154
+ - Read
155
+ - Bash
156
+ - Grep
157
+ - Glob
158
+ argument_hint: <exp-id-1> <exp-id-2>
159
+ counterfactual:
160
+ description: "Input-level counterfactual explanations \u2014 find the smallest input change to flip a prediction."
161
+ lifecycle: explain
162
+ invocation_mode: slash_only
163
+ model_invocation: disabled
164
+ mutates_project: true
165
+ tools:
166
+ - Read
167
+ - Bash
168
+ - Grep
169
+ - Glob
170
+ argument_hint: <exp-id> --sample <index> [--target <class>]
171
+ curriculum:
172
+ description: "Training curriculum optimization \u2014 order data by difficulty, compare easy-to-hard vs hard-to-easy vs self-paced strategies."
173
+ lifecycle: optimize
174
+ invocation_mode: slash_only
175
+ model_invocation: disabled
176
+ mutates_project: true
177
+ tools:
178
+ - Read
179
+ - Bash
180
+ - Grep
181
+ - Glob
182
+ argument_hint: '[exp-id] [--strategies easy-to-hard,random]'
183
+ design:
184
+ description: Generate a structured experiment design for a hypothesis. Reads experiment history, searches literature for methodology, produces a scored design document at experiments/designs/.
185
+ lifecycle: design
186
+ invocation_mode: slash_only
187
+ model_invocation: disabled
188
+ mutates_project: true
189
+ tools:
190
+ - Read
191
+ - Write
192
+ - Bash
193
+ - Grep
194
+ - Glob
195
+ - WebSearch
196
+ - WebFetch
197
+ argument_hint: <hypothesis-id or description>
198
+ diagnose:
199
+ description: "Error analysis \u2014 cluster failure cases, identify systematic failure modes, and suggest targeted fixes with auto-queued hypotheses."
200
+ lifecycle: analyze
201
+ invocation_mode: slash_only
202
+ model_invocation: disabled
203
+ mutates_project: true
204
+ tools:
205
+ - Read
206
+ - Bash
207
+ - Grep
208
+ - Glob
209
+ argument_hint: '[exp-id] [--auto-queue] [--top 5]'
210
+ diff:
211
+ description: "Deep experiment comparison \u2014 config diffs, metric significance, per-class regressions, training curve divergence, feature importance shifts."
212
+ lifecycle: analyze
213
+ invocation_mode: slash_only
214
+ model_invocation: disabled
215
+ mutates_project: true
216
+ tools:
217
+ - Read
218
+ - Bash
219
+ - Grep
220
+ - Glob
221
+ argument_hint: <exp-a> <exp-b> [--code]
222
+ distill:
223
+ description: "Model compression via distillation \u2014 train a smaller student model to match a larger teacher's predictions."
224
+ lifecycle: deploy
225
+ invocation_mode: slash_only
226
+ model_invocation: disabled
227
+ mutates_project: true
228
+ tools:
229
+ - Read
230
+ - Bash
231
+ - Grep
232
+ - Glob
233
+ argument_hint: <teacher-exp-id> [--compression 4] [--method soft-labels]
234
+ doctor:
235
+ description: "Harness self-diagnosis \u2014 check environment, project, resources, and git state. Auto-fix common issues."
236
+ lifecycle: check
237
+ invocation_mode: slash_only
238
+ model_invocation: disabled
239
+ mutates_project: true
240
+ tools:
241
+ - Read
242
+ - Bash
243
+ - Grep
244
+ - Glob
245
+ argument_hint: '[--fix] [--verbose]'
246
+ ensemble:
247
+ description: "Automated ensemble construction \u2014 combines top-K models via voting, stacking, and blending for zero-cost improvement."
248
+ lifecycle: compose
249
+ invocation_mode: slash_only
250
+ model_invocation: disabled
251
+ mutates_project: true
252
+ tools:
253
+ - Read
254
+ - Bash
255
+ - Grep
256
+ - Glob
257
+ argument_hint: '[--top-k 5] [--methods voting,stacking,blending]'
258
+ explore:
259
+ description: Tree-search-guided hypothesis exploration using AB-MCTS. Explores the space of experiment ideas as a search tree, scored by the critique engine. Discovers non-obvious refinement chains that linear suggestion cannot find.
260
+ lifecycle: research
261
+ invocation_mode: slash_only
262
+ model_invocation: disabled
263
+ mutates_project: true
264
+ tools:
265
+ - Read
266
+ - Write
267
+ - Bash
268
+ - Grep
269
+ - Glob
270
+ argument_hint: '[ml/project] [--iterations N] [--top N] [--strategy abmcts-a|abmcts-m|greedy]'
271
+ equivalent_script:
272
+ path: scripts/treequest_suggest.py
273
+ location: scaffold
274
+ export:
275
+ description: Export model to production format with equivalence verification, latency benchmarking, and deployment model card.
276
+ lifecycle: deploy
277
+ invocation_mode: slash_only
278
+ model_invocation: disabled
279
+ mutates_project: true
280
+ tools:
281
+ - Read
282
+ - Bash
283
+ - Grep
284
+ - Glob
285
+ argument_hint: '[exp-id] [--format joblib|xgboost_json|onnx|torchscript|tflite]'
286
+ feature:
287
+ description: "Automated feature selection \u2014 multi-method importance consensus, redundancy detection, and interaction feature generation."
288
+ lifecycle: analyze
289
+ invocation_mode: slash_only
290
+ model_invocation: disabled
291
+ mutates_project: true
292
+ tools:
293
+ - Read
294
+ - Bash
295
+ - Grep
296
+ - Glob
297
+ argument_hint: '[--method all|importance] [--top-k 20]'
298
+ flashback:
299
+ description: "Session context restoration \u2014 \"where was I?\" summary after days away. Current best, pending hypotheses, last session, annotations."
300
+ lifecycle: recall
301
+ invocation_mode: slash_only
302
+ model_invocation: disabled
303
+ mutates_project: true
304
+ tools:
305
+ - Read
306
+ - Bash
307
+ - Grep
308
+ - Glob
309
+ argument_hint: '[--days 7] [--last 10]'
310
+ fork:
311
+ description: "Branch an experiment into parallel tracks \u2014 run both A and B, report the winner."
312
+ lifecycle: orchestrate
313
+ invocation_mode: slash_only
314
+ model_invocation: disabled
315
+ mutates_project: true
316
+ tools:
317
+ - Read
318
+ - Bash
319
+ - Grep
320
+ - Glob
321
+ argument_hint: '<exp-id> --branches "approach A" "approach B" [--auto-promote]'
322
+ frontier:
323
+ description: "Visualize Pareto frontier across multiple objectives \u2014 answers \"which model is actually best?\" when there are tradeoffs."
324
+ lifecycle: analyze
325
+ invocation_mode: slash_only
326
+ model_invocation: disabled
327
+ mutates_project: true
328
+ tools:
329
+ - Read
330
+ - Bash
331
+ - Grep
332
+ - Glob
333
+ argument_hint: '[--metrics "accuracy,train_seconds,n_params"] [--ascii]'
334
+ init:
335
+ description: "Initialize a new ML project with the Turing autoresearch harness. Scaffolds the full experiment infrastructure \u2014 immutable evaluation pipeline, agent-editable training code, structured logging, convergence detection hooks, and a Python virtual environment. Use --plan to generate a research plan."
336
+ lifecycle: setup
337
+ invocation_mode: slash_only
338
+ model_invocation: disabled
339
+ mutates_project: true
340
+ tools:
341
+ - Read
342
+ - Write
343
+ - Edit
344
+ - Bash
345
+ - Grep
346
+ - Glob
347
+ - WebSearch
348
+ - WebFetch
349
+ argument_hint: '[project_name] [--plan]'
350
+ leak:
351
+ description: "Targeted leakage detection \u2014 probe for data leakage with single-feature tests, correlation checks, and train/test overlap detection."
352
+ lifecycle: validate
353
+ invocation_mode: slash_only
354
+ model_invocation: disabled
355
+ mutates_project: true
356
+ tools:
357
+ - Read
358
+ - Bash
359
+ - Grep
360
+ - Glob
361
+ argument_hint: '[--deep] [--features feature_1,feature_2]'
362
+ lit:
363
+ description: "Literature search scoped to the current experiment domain \u2014 find papers, SOTA baselines, and related work without leaving the terminal."
364
+ lifecycle: research
365
+ invocation_mode: slash_only
366
+ model_invocation: disabled
367
+ mutates_project: true
368
+ tools:
369
+ - Read
370
+ - Bash
371
+ - Grep
372
+ - Glob
373
+ - WebSearch
374
+ argument_hint: <query> | --baseline | --related <exp-id>
375
+ logbook:
376
+ description: "Generate a research logbook showing the full experiment narrative \u2014 hypotheses proposed, experiments run, decisions made, and progress over time. Outputs HTML (with interactive chart) or markdown."
377
+ lifecycle: document
378
+ invocation_mode: slash_only
379
+ model_invocation: disabled
380
+ mutates_project: true
381
+ tools:
382
+ - Read
383
+ - Bash
384
+ - Grep
385
+ - Glob
386
+ argument_hint: '[--since YYYY-MM-DD] [--format html|markdown] [--output path]'
387
+ merge:
388
+ description: "Model merging \u2014 average weights from multiple checkpoints into a single model (soups, TIES, DARE). Free accuracy, zero latency cost."
389
+ lifecycle: compose
390
+ invocation_mode: slash_only
391
+ model_invocation: disabled
392
+ mutates_project: true
393
+ tools:
394
+ - Read
395
+ - Bash
396
+ - Grep
397
+ - Glob
398
+ argument_hint: <exp-ids...> [--method uniform|greedy|ties|dare]
399
+ mode:
400
+ description: "Set the research strategy mode \u2014 explore (try new things), exploit (refine what works), or replicate (verify results). Drives novelty guard policy and agent behavior."
401
+ lifecycle: strategy
402
+ invocation_mode: slash_only
403
+ model_invocation: disabled
404
+ mutates_project: true
405
+ tools:
406
+ - Read
407
+ - Bash
408
+ argument_hint: <explore|exploit|replicate>
409
+ onboard:
410
+ description: "Project onboarding \u2014 generate a walkthrough for new collaborators. Task, history, decisions, next steps."
411
+ lifecycle: document
412
+ invocation_mode: slash_only
413
+ model_invocation: disabled
414
+ mutates_project: true
415
+ tools:
416
+ - Read
417
+ - Bash
418
+ - Grep
419
+ - Glob
420
+ argument_hint: '[--audience researcher|engineer|stakeholder] [--depth brief|full]'
421
+ paper:
422
+ description: Draft mechanical paper sections (setup, results, ablation, hyperparameters) from experiment logs. LaTeX and markdown output.
423
+ lifecycle: document
424
+ invocation_mode: slash_only
425
+ model_invocation: disabled
426
+ mutates_project: true
427
+ tools:
428
+ - Read
429
+ - Bash
430
+ - Grep
431
+ - Glob
432
+ argument_hint: '[--sections setup,results,ablation] [--format latex|markdown]'
433
+ plan:
434
+ description: "Research planning assistant \u2014 design a strategic experiment campaign with budget-aware ROI allocation."
435
+ lifecycle: plan
436
+ invocation_mode: slash_only
437
+ model_invocation: disabled
438
+ mutates_project: true
439
+ tools:
440
+ - Read
441
+ - Bash
442
+ - Grep
443
+ - Glob
444
+ argument_hint: '[--budget 20] [--goal "maximize F1 for production"]'
445
+ poster:
446
+ description: "Generate a single-page HTML research poster summarizing the experiment campaign \u2014 best result, trajectory, key findings, and methodology. Adapted from posterskill's self-contained HTML architecture."
447
+ lifecycle: document
448
+ invocation_mode: slash_only
449
+ model_invocation: disabled
450
+ mutates_project: true
451
+ tools:
452
+ - Read
453
+ - Write
454
+ - Edit
455
+ - Bash
456
+ - Grep
457
+ - Glob
458
+ argument_hint: '[title override]'
459
+ postmortem:
460
+ description: "Failure postmortem \u2014 diagnose why experiments stopped improving and get actionable next steps."
461
+ lifecycle: diagnose
462
+ invocation_mode: slash_only
463
+ model_invocation: disabled
464
+ mutates_project: true
465
+ tools:
466
+ - Read
467
+ - Bash
468
+ - Grep
469
+ - Glob
470
+ argument_hint: '[--window 10] [--auto-trigger 5]'
471
+ preflight:
472
+ description: "Pre-flight resource check \u2014 estimates VRAM, RAM, and disk requirements before running ML training. Compares against available system resources and issues PASS/WARN/FAIL verdict. Use before training to catch OOM errors before they happen."
473
+ lifecycle: check
474
+ invocation_mode: slash_only
475
+ model_invocation: disabled
476
+ mutates_project: false
477
+ tools:
478
+ - Read
479
+ - Bash
480
+ - Grep
481
+ - Glob
482
+ argument_hint: '[--model-type torch] [--params 10M] [--batch-size 32]'
483
+ present:
484
+ description: "Presentation figure generation \u2014 training curves, comparison charts, ablation tables, Pareto plots, sensitivity heatmaps."
485
+ lifecycle: document
486
+ invocation_mode: slash_only
487
+ model_invocation: disabled
488
+ mutates_project: true
489
+ tools:
490
+ - Read
491
+ - Bash
492
+ - Grep
493
+ - Glob
494
+ argument_hint: '[--figures training,comparison] [--style light|dark|poster]'
495
+ profile:
496
+ description: "Profile a training run \u2014 timing breakdown, memory usage, throughput, bottleneck detection with actionable recommendations."
497
+ lifecycle: check
498
+ invocation_mode: slash_only
499
+ model_invocation: disabled
500
+ mutates_project: true
501
+ tools:
502
+ - Read
503
+ - Bash
504
+ - Grep
505
+ - Glob
506
+ argument_hint: '[exp-id] [--seed 42]'
507
+ prune:
508
+ description: "Weight pruning \u2014 measure accuracy at different sparsity levels, find the knee point, produce a smaller/faster model."
509
+ lifecycle: optimize
510
+ invocation_mode: slash_only
511
+ model_invocation: disabled
512
+ mutates_project: true
513
+ tools:
514
+ - Read
515
+ - Bash
516
+ - Grep
517
+ - Glob
518
+ argument_hint: <exp-id> [--sparsity 0.5,0.75,0.9] [--method magnitude|structured|lottery]
519
+ quantize:
520
+ description: "Post-training quantization \u2014 FP32\u2192INT8/FP16, measure accuracy loss, 2-4x speedup with <0.5% accuracy loss."
521
+ lifecycle: optimize
522
+ invocation_mode: slash_only
523
+ model_invocation: disabled
524
+ mutates_project: true
525
+ tools:
526
+ - Read
527
+ - Bash
528
+ - Grep
529
+ - Glob
530
+ argument_hint: <exp-id> [--precision int8|fp16|dynamic]
531
+ queue:
532
+ description: Queue experiments for batch execution with priority ordering and dependency chains. Load the queue, walk away, read the summary.
533
+ lifecycle: orchestrate
534
+ invocation_mode: slash_only
535
+ model_invocation: disabled
536
+ mutates_project: true
537
+ tools:
538
+ - Read
539
+ - Bash
540
+ - Grep
541
+ - Glob
542
+ argument_hint: <add|list|run|pause|clear> [description] [--priority high] [--after q-001]
543
+ registry:
544
+ description: "Model registry \u2014 track, promote, and govern the model lifecycle from candidate to production."
545
+ lifecycle: govern
546
+ invocation_mode: slash_only
547
+ model_invocation: disabled
548
+ mutates_project: true
549
+ tools:
550
+ - Read
551
+ - Bash
552
+ - Grep
553
+ - Glob
554
+ argument_hint: '[list|register|promote|demote|archive|history] [exp-id] [stage]'
555
+ regress:
556
+ description: "Performance regression gate \u2014 re-run best experiment after code/dependency changes and verify metrics haven't degraded."
557
+ lifecycle: validate
558
+ invocation_mode: slash_only
559
+ model_invocation: disabled
560
+ mutates_project: true
561
+ tools:
562
+ - Read
563
+ - Bash
564
+ - Grep
565
+ - Glob
566
+ argument_hint: '[--tolerance 0.01] [--against exp-id] [--quick]'
567
+ replay:
568
+ description: "Experiment replay \u2014 re-run a historical experiment with current infrastructure to test if old approaches do better now."
569
+ lifecycle: validate
570
+ invocation_mode: slash_only
571
+ model_invocation: disabled
572
+ mutates_project: true
573
+ tools:
574
+ - Read
575
+ - Bash
576
+ - Grep
577
+ - Glob
578
+ argument_hint: <exp-id> [--with-current-data] [--with-current-preprocessing]
579
+ report:
580
+ description: "Generate a markdown research report from experiment history \u2014 structured for sharing, archiving, or including in documentation. More detailed than a brief, less visual than a poster."
581
+ lifecycle: document
582
+ invocation_mode: slash_only
583
+ model_invocation: disabled
584
+ mutates_project: true
585
+ tools:
586
+ - Read
587
+ - Bash
588
+ - Grep
589
+ - Glob
590
+ argument_hint: '[--since YYYY-MM-DD] [--output path]'
591
+ reproduce:
592
+ description: Verify reproducibility of a specific experiment by re-running from logged config and checking metrics fall within tolerance.
593
+ lifecycle: validate
594
+ invocation_mode: slash_only
595
+ model_invocation: disabled
596
+ mutates_project: true
597
+ tools:
598
+ - Read
599
+ - Bash
600
+ - Grep
601
+ - Glob
602
+ argument_hint: <exp-id> [--tolerance 0.02] [--strict] [--runs 3]
603
+ retry:
604
+ description: "Smart failure recovery \u2014 auto-diagnose crash type and retry with targeted fix. OOM \u2192 halve batch. NaN \u2192 add clipping."
605
+ lifecycle: orchestrate
606
+ invocation_mode: slash_only
607
+ model_invocation: disabled
608
+ mutates_project: true
609
+ tools:
610
+ - Read
611
+ - Bash
612
+ - Grep
613
+ - Glob
614
+ argument_hint: <exp-id> [--max-attempts 3]
615
+ review:
616
+ description: "Peer review simulation \u2014 generate likely reviewer objections with severity ratings and fix commands."
617
+ lifecycle: validate
618
+ invocation_mode: slash_only
619
+ model_invocation: disabled
620
+ mutates_project: true
621
+ tools:
622
+ - Read
623
+ - Bash
624
+ - Grep
625
+ - Glob
626
+ argument_hint: '[--venue neurips|icml|general] [--harsh]'
627
+ sanity:
628
+ description: "Pre-training sanity checks \u2014 catch broken data loaders, misconfigured losses, and dead gradients in 30 seconds before wasting hours."
629
+ lifecycle: check
630
+ invocation_mode: slash_only
631
+ model_invocation: disabled
632
+ mutates_project: true
633
+ tools:
634
+ - Read
635
+ - Bash
636
+ - Grep
637
+ - Glob
638
+ argument_hint: '[--quick] [--verbose]'
639
+ scale:
640
+ description: "Scaling law estimator \u2014 run small experiments at different sizes, fit a power law, and predict full-scale performance before committing compute."
641
+ lifecycle: analyze
642
+ invocation_mode: slash_only
643
+ model_invocation: disabled
644
+ mutates_project: true
645
+ tools:
646
+ - Read
647
+ - Bash
648
+ - Grep
649
+ - Glob
650
+ argument_hint: '[--axis data|compute|params] [--points 4] [--analyze results.yaml]'
651
+ search:
652
+ description: "Natural language experiment search \u2014 query with text + structured filters over 200+ experiments."
653
+ lifecycle: query
654
+ invocation_mode: slash_only
655
+ model_invocation: disabled
656
+ mutates_project: false
657
+ tools:
658
+ - Read
659
+ - Bash
660
+ - Grep
661
+ - Glob
662
+ argument_hint: '<query> [--filter "accuracy>0.85"] [--limit 10]'
663
+ seed:
664
+ description: Run multi-seed study on an experiment to compute mean/std/CI and flag seed-sensitive results. Prevents publishing lucky seeds.
665
+ lifecycle: validate
666
+ invocation_mode: slash_only
667
+ model_invocation: disabled
668
+ mutates_project: true
669
+ tools:
670
+ - Read
671
+ - Bash
672
+ - Grep
673
+ - Glob
674
+ argument_hint: '[N] [--quick] [--exp-id <id>]'
675
+ sensitivity:
676
+ description: "Hyperparameter sensitivity analysis \u2014 rank parameters by impact, identify which matter and which are noise."
677
+ lifecycle: analyze
678
+ invocation_mode: slash_only
679
+ model_invocation: disabled
680
+ mutates_project: true
681
+ tools:
682
+ - Read
683
+ - Bash
684
+ - Grep
685
+ - Glob
686
+ argument_hint: '[exp-id] [--params learning_rate,max_depth]'
687
+ share:
688
+ description: "Experiment packaging \u2014 portable archive with config, metrics, seed study, annotations, reproduction instructions."
689
+ lifecycle: share
690
+ invocation_mode: slash_only
691
+ model_invocation: disabled
692
+ mutates_project: true
693
+ tools:
694
+ - Read
695
+ - Bash
696
+ - Grep
697
+ - Glob
698
+ argument_hint: <exp-ids...> [--include model,figures,code]
699
+ simulate:
700
+ description: "Experiment outcome prediction \u2014 predict which configs will beat the current best before running them."
701
+ lifecycle: predict
702
+ invocation_mode: slash_only
703
+ model_invocation: disabled
704
+ mutates_project: true
705
+ tools:
706
+ - Read
707
+ - Bash
708
+ - Grep
709
+ - Glob
710
+ argument_hint: '[--configs configs.yaml] [--top-k 5] [--threshold 0.001]'
711
+ status:
712
+ description: "Show current ML experiment status \u2014 best model, recent experiments, convergence state, and trend analysis. Delegates to @ml-evaluator for read-only safety."
713
+ lifecycle: observe
714
+ invocation_mode: slash_only
715
+ model_invocation: disabled
716
+ mutates_project: false
717
+ tools:
718
+ - Read
719
+ - Bash
720
+ - Grep
721
+ - Glob
722
+ stitch:
723
+ description: "Pipeline composition \u2014 decompose ML pipelines into swappable stages. Show, swap, cache, and run stages independently."
724
+ lifecycle: compose
725
+ invocation_mode: slash_only
726
+ model_invocation: disabled
727
+ mutates_project: true
728
+ tools:
729
+ - Read
730
+ - Bash
731
+ - Grep
732
+ - Glob
733
+ argument_hint: <show|swap|cache|run> [stage] [--from exp-id]
734
+ suggest:
735
+ description: "Literature-grounded model selection. Reads the ML task context, searches recent literature, and suggests model architectures worth trying \u2014 with citations. Suggestions are auto-queued as hypotheses."
736
+ lifecycle: research
737
+ invocation_mode: slash_only
738
+ model_invocation: disabled
739
+ mutates_project: true
740
+ tools:
741
+ - Read
742
+ - Write
743
+ - Bash
744
+ - Grep
745
+ - Glob
746
+ - WebSearch
747
+ - WebFetch
748
+ argument_hint: '[task description override]'
749
+ equivalent_script:
750
+ path: scripts/suggest_next.py
751
+ location: scaffold
752
+ surgery:
753
+ description: "Architecture modification \u2014 add/remove layers, widen/narrow, swap activations, inject skip connections. Specify what to change, system handles how."
754
+ lifecycle: modify
755
+ invocation_mode: slash_only
756
+ model_invocation: disabled
757
+ mutates_project: true
758
+ tools:
759
+ - Read
760
+ - Bash
761
+ - Grep
762
+ - Glob
763
+ argument_hint: <exp-id> --op <operation> [args...]
764
+ sweep:
765
+ description: Generate and run a systematic hyperparameter sweep. Computes the cartesian product of configured parameter ranges and processes the queue sequentially with full experiment logging.
766
+ lifecycle: explore
767
+ invocation_mode: slash_only
768
+ model_invocation: disabled
769
+ mutates_project: true
770
+ tools:
771
+ - Read
772
+ - Write
773
+ - Edit
774
+ - Bash
775
+ - Grep
776
+ - Glob
777
+ argument_hint: '[sweep_config.yaml]'
778
+ template:
779
+ description: "Experiment template library \u2014 save winning configs as reusable templates, apply to new projects."
780
+ lifecycle: manage
781
+ invocation_mode: slash_only
782
+ model_invocation: disabled
783
+ mutates_project: true
784
+ tools:
785
+ - Read
786
+ - Bash
787
+ - Grep
788
+ - Glob
789
+ argument_hint: <save|list|apply|share> [--name name] [--from exp-id]
790
+ train:
791
+ description: "Run the autonomous ML experiment loop. Iteratively hypothesizes, trains, evaluates, and decides \u2014 keeping only improvements. Implements the autoresearch pattern with formal convergence detection and git-disciplined rollback."
792
+ lifecycle: execute
793
+ invocation_mode: slash_only
794
+ model_invocation: disabled
795
+ mutates_project: true
796
+ tools:
797
+ - Read
798
+ - Write
799
+ - Edit
800
+ - Bash
801
+ - Grep
802
+ - Glob
803
+ argument_hint: '[max_iterations]'
804
+ transfer:
805
+ description: "Cross-project knowledge transfer \u2014 find similar prior projects and surface what worked. Builds institutional ML memory."
806
+ lifecycle: research
807
+ invocation_mode: slash_only
808
+ model_invocation: disabled
809
+ mutates_project: true
810
+ tools:
811
+ - Read
812
+ - Bash
813
+ - Grep
814
+ - Glob
815
+ argument_hint: '[--from project-path] [--auto]'
816
+ trend:
817
+ description: "Long-term trend analysis \u2014 improvement velocity, family ROI, diminishing returns detection, strategic research direction."
818
+ lifecycle: analyze
819
+ invocation_mode: slash_only
820
+ model_invocation: disabled
821
+ mutates_project: true
822
+ tools:
823
+ - Read
824
+ - Bash
825
+ - Grep
826
+ - Glob
827
+ argument_hint: '[--window 30d] [--metric accuracy]'
828
+ try:
829
+ description: "Inject a hypothesis into the agent's experiment queue. This is how research taste reaches the agent \u2014 the human selects which coins to flip, the agent flips them."
830
+ lifecycle: steer
831
+ invocation_mode: slash_only
832
+ model_invocation: disabled
833
+ mutates_project: true
834
+ tools:
835
+ - Read
836
+ - Write
837
+ - Edit
838
+ - Bash
839
+ - Grep
840
+ - Glob
841
+ argument_hint: <hypothesis description>
842
+ equivalent_script:
843
+ path: scripts/manage_hypotheses.py
844
+ location: scaffold
845
+ update:
846
+ description: "Incremental model update \u2014 add new data without full retraining, with forgetting detection."
847
+ lifecycle: update
848
+ invocation_mode: slash_only
849
+ model_invocation: disabled
850
+ mutates_project: true
851
+ tools:
852
+ - Read
853
+ - Bash
854
+ - Grep
855
+ - Glob
856
+ argument_hint: <exp-id> --new-data <path> [--replay-ratio 0.1] [--tolerance 0.005]
857
+ validate:
858
+ description: Run stability validation on the current experiment configuration. Executes N runs to measure metric variance and auto-configures multi-run evaluation if variance is too high.
859
+ lifecycle: validate
860
+ invocation_mode: slash_only
861
+ model_invocation: disabled
862
+ mutates_project: true
863
+ tools:
864
+ - Read
865
+ - Bash
866
+ - Grep
867
+ - Glob
868
+ argument_hint: '[--auto]'
869
+ warm:
870
+ description: "Warm-start from a prior model \u2014 load checkpoint, optionally freeze layers, adjust learning rate, and continue training."
871
+ lifecycle: compose
872
+ invocation_mode: slash_only
873
+ model_invocation: disabled
874
+ mutates_project: true
875
+ tools:
876
+ - Read
877
+ - Bash
878
+ - Grep
879
+ - Glob
880
+ argument_hint: <exp-id> [--freeze-layers encoder] [--unfreeze-after 5]
881
+ watch:
882
+ description: Live training monitor with early-warning alerts for loss spikes, NaN, overfitting, and metric plateaus.
883
+ lifecycle: monitor
884
+ invocation_mode: slash_only
885
+ model_invocation: disabled
886
+ mutates_project: true
887
+ tools:
888
+ - Read
889
+ - Bash
890
+ - Grep
891
+ - Glob
892
+ argument_hint: '[--alerts] [--interval 10] [--analyze run.log]'
893
+ whatif:
894
+ description: "What-if analysis \u2014 answer hypotheticals from existing experiment data without running new experiments."
895
+ lifecycle: analyze
896
+ invocation_mode: slash_only
897
+ model_invocation: disabled
898
+ mutates_project: true
899
+ tools:
900
+ - Read
901
+ - Bash
902
+ - Grep
903
+ - Glob
904
+ argument_hint: '"<question>" [--json]'
905
+ xray:
906
+ description: "Internal model diagnostics \u2014 gradient flow, dead neurons, activation stats, weight distributions, tree depth analysis."
907
+ lifecycle: analyze
908
+ invocation_mode: slash_only
909
+ model_invocation: disabled
910
+ mutates_project: true
911
+ tools:
912
+ - Read
913
+ - Bash
914
+ - Grep
915
+ - Glob
916
+ argument_hint: '[exp-id] [--layer encoder.layer.2] [--compare exp-a exp-b]'
917
+ config_files:
918
+ - commands.yaml
919
+ - defaults.yaml
920
+ - experiment_archetypes.yaml
921
+ - failure_modes.yaml
922
+ - lifecycle.toml
923
+ - novelty_aliases.yaml
924
+ - relationships.toml
925
+ - state.toml
926
+ - task_taxonomy.yaml
927
+ - taxonomy.toml
928
+ - watch_alerts.yaml