claude-turing 4.7.0 → 4.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +1 -1
- package/commands/ablate.md +0 -1
- package/commands/annotate.md +0 -1
- package/commands/archive.md +0 -1
- package/commands/audit.md +0 -1
- package/commands/baseline.md +0 -1
- package/commands/brief.md +0 -1
- package/commands/budget.md +0 -1
- package/commands/calibrate.md +0 -1
- package/commands/card.md +0 -1
- package/commands/changelog.md +0 -1
- package/commands/checkpoint.md +0 -1
- package/commands/cite.md +0 -1
- package/commands/compare.md +0 -1
- package/commands/counterfactual.md +0 -1
- package/commands/curriculum.md +0 -1
- package/commands/design.md +0 -1
- package/commands/diagnose.md +0 -1
- package/commands/diff.md +0 -1
- package/commands/distill.md +0 -1
- package/commands/doctor.md +0 -1
- package/commands/ensemble.md +0 -1
- package/commands/explore.md +0 -1
- package/commands/export.md +0 -1
- package/commands/feature.md +0 -1
- package/commands/flashback.md +0 -1
- package/commands/fork.md +0 -1
- package/commands/frontier.md +0 -1
- package/commands/init.md +0 -1
- package/commands/leak.md +0 -1
- package/commands/lit.md +0 -1
- package/commands/logbook.md +0 -1
- package/commands/merge.md +0 -1
- package/commands/mode.md +0 -1
- package/commands/onboard.md +0 -1
- package/commands/paper.md +0 -1
- package/commands/plan.md +0 -1
- package/commands/poster.md +0 -1
- package/commands/postmortem.md +0 -1
- package/commands/preflight.md +0 -1
- package/commands/present.md +0 -1
- package/commands/profile.md +0 -1
- package/commands/prune.md +0 -1
- package/commands/quantize.md +0 -1
- package/commands/queue.md +0 -1
- package/commands/registry.md +0 -1
- package/commands/regress.md +0 -1
- package/commands/replay.md +0 -1
- package/commands/report.md +0 -1
- package/commands/reproduce.md +0 -1
- package/commands/retry.md +0 -1
- package/commands/review.md +0 -1
- package/commands/sanity.md +0 -1
- package/commands/scale.md +0 -1
- package/commands/search.md +0 -1
- package/commands/seed.md +0 -1
- package/commands/sensitivity.md +0 -1
- package/commands/share.md +0 -1
- package/commands/simulate.md +0 -1
- package/commands/status.md +0 -1
- package/commands/stitch.md +0 -1
- package/commands/suggest.md +0 -1
- package/commands/surgery.md +0 -1
- package/commands/sweep.md +0 -1
- package/commands/template.md +0 -1
- package/commands/train.md +0 -1
- package/commands/transfer.md +0 -1
- package/commands/trend.md +0 -1
- package/commands/try.md +0 -1
- package/commands/turing.md +3 -3
- package/commands/update.md +0 -1
- package/commands/validate.md +0 -1
- package/commands/warm.md +0 -1
- package/commands/watch.md +0 -1
- package/commands/whatif.md +0 -1
- package/commands/xray.md +0 -1
- package/config/commands.yaml +74 -74
- package/package.json +3 -2
- package/skills/turing/SKILL.md +3 -3
- package/skills/turing/ablate/SKILL.md +0 -1
- package/skills/turing/annotate/SKILL.md +0 -1
- package/skills/turing/archive/SKILL.md +0 -1
- package/skills/turing/audit/SKILL.md +0 -1
- package/skills/turing/baseline/SKILL.md +0 -1
- package/skills/turing/brief/SKILL.md +0 -1
- package/skills/turing/budget/SKILL.md +0 -1
- package/skills/turing/calibrate/SKILL.md +0 -1
- package/skills/turing/card/SKILL.md +0 -1
- package/skills/turing/changelog/SKILL.md +0 -1
- package/skills/turing/checkpoint/SKILL.md +0 -1
- package/skills/turing/cite/SKILL.md +0 -1
- package/skills/turing/compare/SKILL.md +0 -1
- package/skills/turing/counterfactual/SKILL.md +0 -1
- package/skills/turing/curriculum/SKILL.md +0 -1
- package/skills/turing/design/SKILL.md +0 -1
- package/skills/turing/diagnose/SKILL.md +0 -1
- package/skills/turing/diff/SKILL.md +0 -1
- package/skills/turing/distill/SKILL.md +0 -1
- package/skills/turing/doctor/SKILL.md +0 -1
- package/skills/turing/ensemble/SKILL.md +0 -1
- package/skills/turing/explore/SKILL.md +0 -1
- package/skills/turing/export/SKILL.md +0 -1
- package/skills/turing/feature/SKILL.md +0 -1
- package/skills/turing/flashback/SKILL.md +0 -1
- package/skills/turing/fork/SKILL.md +0 -1
- package/skills/turing/frontier/SKILL.md +0 -1
- package/skills/turing/init/SKILL.md +0 -1
- package/skills/turing/leak/SKILL.md +0 -1
- package/skills/turing/lit/SKILL.md +0 -1
- package/skills/turing/logbook/SKILL.md +0 -1
- package/skills/turing/merge/SKILL.md +0 -1
- package/skills/turing/mode/SKILL.md +0 -1
- package/skills/turing/onboard/SKILL.md +0 -1
- package/skills/turing/paper/SKILL.md +0 -1
- package/skills/turing/plan/SKILL.md +0 -1
- package/skills/turing/poster/SKILL.md +0 -1
- package/skills/turing/postmortem/SKILL.md +0 -1
- package/skills/turing/preflight/SKILL.md +0 -1
- package/skills/turing/present/SKILL.md +0 -1
- package/skills/turing/profile/SKILL.md +0 -1
- package/skills/turing/prune/SKILL.md +0 -1
- package/skills/turing/quantize/SKILL.md +0 -1
- package/skills/turing/queue/SKILL.md +0 -1
- package/skills/turing/registry/SKILL.md +0 -1
- package/skills/turing/regress/SKILL.md +0 -1
- package/skills/turing/replay/SKILL.md +0 -1
- package/skills/turing/report/SKILL.md +0 -1
- package/skills/turing/reproduce/SKILL.md +0 -1
- package/skills/turing/retry/SKILL.md +0 -1
- package/skills/turing/review/SKILL.md +0 -1
- package/skills/turing/sanity/SKILL.md +0 -1
- package/skills/turing/scale/SKILL.md +0 -1
- package/skills/turing/search/SKILL.md +0 -1
- package/skills/turing/seed/SKILL.md +0 -1
- package/skills/turing/sensitivity/SKILL.md +0 -1
- package/skills/turing/share/SKILL.md +0 -1
- package/skills/turing/simulate/SKILL.md +0 -1
- package/skills/turing/status/SKILL.md +0 -1
- package/skills/turing/stitch/SKILL.md +0 -1
- package/skills/turing/suggest/SKILL.md +0 -1
- package/skills/turing/surgery/SKILL.md +0 -1
- package/skills/turing/sweep/SKILL.md +0 -1
- package/skills/turing/template/SKILL.md +0 -1
- package/skills/turing/train/SKILL.md +0 -1
- package/skills/turing/transfer/SKILL.md +0 -1
- package/skills/turing/trend/SKILL.md +0 -1
- package/skills/turing/try/SKILL.md +0 -1
- package/skills/turing/update/SKILL.md +0 -1
- package/skills/turing/validate/SKILL.md +0 -1
- package/skills/turing/warm/SKILL.md +0 -1
- package/skills/turing/watch/SKILL.md +0 -1
- package/skills/turing/whatif/SKILL.md +0 -1
- package/skills/turing/xray/SKILL.md +0 -1
- package/src/command-registry.js +12 -0
- package/src/install.js +4 -3
- package/src/sync-commands-layout.js +149 -0
- package/src/sync-skills-layout.js +4 -133
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: annotate
|
|
3
3
|
description: Retrospective experiment annotations — add human notes, tags, and context that automated metrics can't capture.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id> \"note\" [--tag fragile] | --list | --search \"keyword\""
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: archive
|
|
3
3
|
description: Experiment lifecycle cleanup — compress old artifacts, prune checkpoints, create queryable summary index. Reclaim disk space.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--older-than 30d] [--keep-best 10] [--dry-run]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: audit
|
|
3
3
|
description: Pre-submission methodology audit — catch data leakage, missing baselines, cherry-picked seeds, and incomplete ablations before a reviewer does.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--strict] [--checklist neurips]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: baseline
|
|
3
3
|
description: Automatic baseline generation — random, majority/mean, linear, k-NN baselines in 60 seconds. Every experiment needs a "is this better than dumb?" reference.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--methods all|simple|linear] [--data data.npz]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: brief
|
|
3
3
|
description: Generate a structured research intelligence report from experiment history — what's been learned, what's promising, what's exhausted, and what the human should consider next. Use --deep for literature-grounded suggestions.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[ml/project] [--deep]"
|
|
6
5
|
allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*), Grep, Glob, WebSearch, WebFetch
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: budget
|
|
3
3
|
description: Compute budget manager — set experiment/time limits, track allocation across explore/exploit phases, auto-shift modes, hard stop.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<set|status|reset> [--experiments 50] [--hours 8]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: calibrate
|
|
3
3
|
description: Probability calibration — measure ECE, plot reliability diagrams, apply Platt scaling or isotonic regression.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[exp-id] [--method platt|isotonic|temperature|auto]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: card
|
|
3
3
|
description: Generate a standardized model card documenting the trained model — type, performance, training data, limitations, intended use, and artifact contract.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*), Grep, Glob
|
|
6
5
|
---
|
|
7
6
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: changelog
|
|
3
3
|
description: Model changelog generation — auto-generate human-readable progress narrative from experiment history for stakeholders.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--since exp-id|date] [--audience technical|stakeholder]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: checkpoint
|
|
3
3
|
description: Smart checkpoint management — list, prune (Pareto-based), average top-K, resume from any point, disk usage stats.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<list|prune|average|resume|stats> [exp-id] [--top 3] [--dry-run]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: cite
|
|
3
3
|
description: Citation & attribution manager — track papers, datasets, methods. Audit for missing citations, generate BibTeX.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<add|list|check|bib> [--key Chen2016 --title XGBoost --url ...]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: compare
|
|
3
3
|
description: Compare two ML experiment runs side-by-side — metrics, configuration deltas, and a verdict on which approach is more promising.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id-1> <exp-id-2>"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: counterfactual
|
|
3
3
|
description: Input-level counterfactual explanations — find the smallest input change to flip a prediction.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id> --sample <index> [--target <class>]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: curriculum
|
|
3
3
|
description: Training curriculum optimization — order data by difficulty, compare easy-to-hard vs hard-to-easy vs self-paced strategies.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[exp-id] [--strategies easy-to-hard,random]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: design
|
|
3
3
|
description: Generate a structured experiment design for a hypothesis. Reads experiment history, searches literature for methodology, produces a scored design document at experiments/designs/.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<hypothesis-id or description>"
|
|
6
5
|
allowed-tools: Read, Write, Bash(python scripts/*:*, source .venv/bin/activate:*, mkdir:*), Grep, Glob, WebSearch, WebFetch
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: diagnose
|
|
3
3
|
description: Error analysis — cluster failure cases, identify systematic failure modes, and suggest targeted fixes with auto-queued hypotheses.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[exp-id] [--auto-queue] [--top 5]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: diff
|
|
3
3
|
description: Deep experiment comparison — config diffs, metric significance, per-class regressions, training curve divergence, feature importance shifts.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-a> <exp-b> [--code]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: distill
|
|
3
3
|
description: Model compression via distillation — train a smaller student model to match a larger teacher's predictions.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<teacher-exp-id> [--compression 4] [--method soft-labels]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: ensemble
|
|
3
3
|
description: Automated ensemble construction — combines top-K models via voting, stacking, and blending for zero-cost improvement.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--top-k 5] [--methods voting,stacking,blending]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: explore
|
|
3
3
|
description: Tree-search-guided hypothesis exploration using AB-MCTS. Explores the space of experiment ideas as a search tree, scored by the critique engine. Discovers non-obvious refinement chains that linear suggestion cannot find.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[ml/project] [--iterations N] [--top N] [--strategy abmcts-a|abmcts-m|greedy]"
|
|
6
5
|
allowed-tools: Read, Write, Bash(python scripts/*:*, source .venv/bin/activate:*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: export
|
|
3
3
|
description: Export model to production format with equivalence verification, latency benchmarking, and deployment model card.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[exp-id] [--format joblib|xgboost_json|onnx|torchscript|tflite]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: feature
|
|
3
3
|
description: Automated feature selection — multi-method importance consensus, redundancy detection, and interaction feature generation.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--method all|importance] [--top-k 20]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: flashback
|
|
3
3
|
description: Session context restoration — "where was I?" summary after days away. Current best, pending hypotheses, last session, annotations.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--days 7] [--last 10]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: fork
|
|
3
3
|
description: Branch an experiment into parallel tracks — run both A and B, report the winner.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id> --branches \"approach A\" \"approach B\" [--auto-promote]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: frontier
|
|
3
3
|
description: Visualize Pareto frontier across multiple objectives — answers "which model is actually best?" when there are tradeoffs.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--metrics \"accuracy,train_seconds,n_params\"] [--ascii]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: init
|
|
3
3
|
description: Initialize a new ML project with the Turing autoresearch harness. Scaffolds the full experiment infrastructure — immutable evaluation pipeline, agent-editable training code, structured logging, convergence detection hooks, and a Python virtual environment. Use --plan to generate a research plan.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[project_name] [--plan]"
|
|
6
5
|
allowed-tools: Read, Write, Edit, Bash(*), Grep, Glob, WebSearch, WebFetch
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: leak
|
|
3
3
|
description: Targeted leakage detection — probe for data leakage with single-feature tests, correlation checks, and train/test overlap detection.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--deep] [--features feature_1,feature_2]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: lit
|
|
3
3
|
description: Literature search scoped to the current experiment domain — find papers, SOTA baselines, and related work without leaving the terminal.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<query> | --baseline | --related <exp-id>"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob, WebSearch
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: logbook
|
|
3
3
|
description: Generate a research logbook showing the full experiment narrative — hypotheses proposed, experiments run, decisions made, and progress over time. Outputs HTML (with interactive chart) or markdown.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--since YYYY-MM-DD] [--format html|markdown] [--output path]"
|
|
6
5
|
allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*, mkdir:*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: merge
|
|
3
3
|
description: Model merging — average weights from multiple checkpoints into a single model (soups, TIES, DARE). Free accuracy, zero latency cost.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-ids...> [--method uniform|greedy|ties|dare]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: mode
|
|
3
3
|
description: Set the research strategy mode — explore (try new things), exploit (refine what works), or replicate (verify results). Drives novelty guard policy and agent behavior.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<explore|exploit|replicate>"
|
|
6
5
|
---
|
|
7
6
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: onboard
|
|
3
3
|
description: Project onboarding — generate a walkthrough for new collaborators. Task, history, decisions, next steps.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--audience researcher|engineer|stakeholder] [--depth brief|full]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: paper
|
|
3
3
|
description: Draft mechanical paper sections (setup, results, ablation, hyperparameters) from experiment logs. LaTeX and markdown output.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--sections setup,results,ablation] [--format latex|markdown]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: plan
|
|
3
3
|
description: Research planning assistant — design a strategic experiment campaign with budget-aware ROI allocation.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--budget 20] [--goal \"maximize F1 for production\"]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: poster
|
|
3
3
|
description: Generate a single-page HTML research poster summarizing the experiment campaign — best result, trajectory, key findings, and methodology. Adapted from posterskill's self-contained HTML architecture.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[title override]"
|
|
6
5
|
allowed-tools: Read, Write, Edit, Bash(python scripts/*:*, source .venv/bin/activate:*, mkdir:*, open:*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: preflight
|
|
3
3
|
description: Pre-flight resource check — estimates VRAM, RAM, and disk requirements before running ML training. Compares against available system resources and issues PASS/WARN/FAIL verdict. Use before training to catch OOM errors before they happen.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--model-type torch] [--params 10M] [--batch-size 32]"
|
|
6
5
|
allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*, nvidia-smi:*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: present
|
|
3
3
|
description: Presentation figure generation — training curves, comparison charts, ablation tables, Pareto plots, sensitivity heatmaps.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--figures training,comparison] [--style light|dark|poster]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: profile
|
|
3
3
|
description: Profile a training run — timing breakdown, memory usage, throughput, bottleneck detection with actionable recommendations.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[exp-id] [--seed 42]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: prune
|
|
3
3
|
description: Weight pruning — measure accuracy at different sparsity levels, find the knee point, produce a smaller/faster model.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id> [--sparsity 0.5,0.75,0.9] [--method magnitude|structured|lottery]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: quantize
|
|
3
3
|
description: Post-training quantization — FP32→INT8/FP16, measure accuracy loss, 2-4x speedup with <0.5% accuracy loss.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id> [--precision int8|fp16|dynamic]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: queue
|
|
3
3
|
description: Queue experiments for batch execution with priority ordering and dependency chains. Load the queue, walk away, read the summary.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<add|list|run|pause|clear> [description] [--priority high] [--after q-001]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: registry
|
|
3
3
|
description: Model registry — track, promote, and govern the model lifecycle from candidate to production.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[list|register|promote|demote|archive|history] [exp-id] [stage]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: regress
|
|
3
3
|
description: Performance regression gate — re-run best experiment after code/dependency changes and verify metrics haven't degraded.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--tolerance 0.01] [--against exp-id] [--quick]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: replay
|
|
3
3
|
description: Experiment replay — re-run a historical experiment with current infrastructure to test if old approaches do better now.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id> [--with-current-data] [--with-current-preprocessing]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: report
|
|
3
3
|
description: Generate a markdown research report from experiment history — structured for sharing, archiving, or including in documentation. More detailed than a brief, less visual than a poster.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--since YYYY-MM-DD] [--output path]"
|
|
6
5
|
allowed-tools: Read, Bash(python scripts/*:*, source .venv/bin/activate:*, mkdir:*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: reproduce
|
|
3
3
|
description: Verify reproducibility of a specific experiment by re-running from logged config and checking metrics fall within tolerance.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id> [--tolerance 0.02] [--strict] [--runs 3]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: retry
|
|
3
3
|
description: Smart failure recovery — auto-diagnose crash type and retry with targeted fix. OOM → halve batch. NaN → add clipping.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id> [--max-attempts 3]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: review
|
|
3
3
|
description: Peer review simulation — generate likely reviewer objections with severity ratings and fix commands.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--venue neurips|icml|general] [--harsh]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: sanity
|
|
3
3
|
description: Pre-training sanity checks — catch broken data loaders, misconfigured losses, and dead gradients in 30 seconds before wasting hours.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--quick] [--verbose]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: scale
|
|
3
3
|
description: Scaling law estimator — run small experiments at different sizes, fit a power law, and predict full-scale performance before committing compute.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--axis data|compute|params] [--points 4] [--analyze results.yaml]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: search
|
|
3
3
|
description: Natural language experiment search — query with text + structured filters over 200+ experiments.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<query> [--filter \"accuracy>0.85\"] [--limit 10]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: seed
|
|
3
3
|
description: Run multi-seed study on an experiment to compute mean/std/CI and flag seed-sensitive results. Prevents publishing lucky seeds.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[N] [--quick] [--exp-id <id>]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: sensitivity
|
|
3
3
|
description: Hyperparameter sensitivity analysis — rank parameters by impact, identify which matter and which are noise.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[exp-id] [--params learning_rate,max_depth]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: share
|
|
3
3
|
description: Experiment packaging — portable archive with config, metrics, seed study, annotations, reproduction instructions.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-ids...> [--include model,figures,code]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: simulate
|
|
3
3
|
description: Experiment outcome prediction — predict which configs will beat the current best before running them.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--configs configs.yaml] [--top-k 5] [--threshold 0.001]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: status
|
|
3
3
|
description: Show current ML experiment status — best model, recent experiments, convergence state, and trend analysis. Delegates to @ml-evaluator for read-only safety.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
6
5
|
---
|
|
7
6
|
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: stitch
|
|
3
3
|
description: Pipeline composition — decompose ML pipelines into swappable stages. Show, swap, cache, and run stages independently.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<show|swap|cache|run> [stage] [--from exp-id]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: suggest
|
|
3
3
|
description: Literature-grounded model selection. Reads the ML task context, searches recent literature, and suggests model architectures worth trying — with citations. Suggestions are auto-queued as hypotheses.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[task description override]"
|
|
6
5
|
allowed-tools: Read, Write, Bash(python scripts/*:*, source .venv/bin/activate:*), Grep, Glob, WebSearch, WebFetch
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: surgery
|
|
3
3
|
description: Architecture modification — add/remove layers, widen/narrow, swap activations, inject skip connections. Specify what to change, system handles how.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id> --op <operation> [args...]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: sweep
|
|
3
3
|
description: Generate and run a systematic hyperparameter sweep. Computes the cartesian product of configured parameter ranges and processes the queue sequentially with full experiment logging.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[sweep_config.yaml]"
|
|
6
5
|
allowed-tools: Read, Write, Edit, Bash(python train.py:*, python scripts/*:*, git:*, source .venv/bin/activate:*, pip:*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: template
|
|
3
3
|
description: Experiment template library — save winning configs as reusable templates, apply to new projects.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<save|list|apply|share> [--name name] [--from exp-id]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: train
|
|
3
3
|
description: Run the autonomous ML experiment loop. Iteratively hypothesizes, trains, evaluates, and decides — keeping only improvements. Implements the autoresearch pattern with formal convergence detection and git-disciplined rollback.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[max_iterations]"
|
|
6
5
|
allowed-tools: Read, Write, Edit, Bash(python train.py:*, python scripts/*:*, git:*, source .venv/bin/activate:*, pip:*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: transfer
|
|
3
3
|
description: Cross-project knowledge transfer — find similar prior projects and surface what worked. Builds institutional ML memory.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--from project-path] [--auto]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: trend
|
|
3
3
|
description: Long-term trend analysis — improvement velocity, family ROI, diminishing returns detection, strategic research direction.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "[--window 30d] [--metric accuracy]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: try
|
|
3
3
|
description: Inject a hypothesis into the agent's experiment queue. This is how research taste reaches the agent — the human selects which coins to flip, the agent flips them.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<hypothesis description>"
|
|
6
5
|
allowed-tools: Read, Write, Edit, Bash(python scripts/*:*, source .venv/bin/activate:*), Grep, Glob
|
|
7
6
|
---
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: update
|
|
3
3
|
description: Incremental model update — add new data without full retraining, with forgetting detection.
|
|
4
|
-
disable-model-invocation: true
|
|
5
4
|
argument-hint: "<exp-id> --new-data <path> [--replay-ratio 0.1] [--tolerance 0.005]"
|
|
6
5
|
allowed-tools: Read, Bash(*), Grep, Glob
|
|
7
6
|
---
|