claude-turing 3.5.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "turing",
3
- "version": "3.5.0",
4
- "description": "Autonomous ML research harness — the autoresearch loop as a formal protocol. 60 commands, 2 specialized agents, experiment archaeology (trend + flashback + archive + annotate + search + template + replay), model surgery (prune + quantize + merge + surgery), feature & training intelligence, model debugging, pre-training intelligence, meta-intelligence, scaling & efficiency, model composition, deep analysis, experiment orchestration, literature + paper, model export, profiling, checkpoints, experiment intelligence, statistical rigor, tree-search, cost-performance, model cards, hypothesis database, novelty guard, anti-cheating, taste-leverage loop. Inspired by Karpathy's autoresearch and the scientific method itself.",
3
+ "version": "4.0.0",
4
+ "description": "Autonomous ML research harness — the autoresearch loop as a formal protocol. 63 commands, 2 specialized agents, research communication (cite + present + changelog), experiment archaeology (trend + flashback + archive + annotate + search + template + replay), model surgery (prune + quantize + merge + surgery), feature & training intelligence, model debugging, pre-training intelligence, meta-intelligence, scaling & efficiency, model composition, deep analysis, experiment orchestration, literature + paper, model export, profiling, checkpoints, experiment intelligence, statistical rigor, tree-search, cost-performance, model cards, hypothesis database, novelty guard, anti-cheating, taste-leverage loop. Inspired by Karpathy's autoresearch and the scientific method itself.",
5
5
  "author": {
6
6
  "name": "pragnition"
7
7
  },
package/README.md CHANGED
@@ -371,6 +371,9 @@ The index (`hypotheses.yaml`) is the lightweight queue. The detail files (`hypot
371
371
  | `/turing:search <query>` | Natural language experiment search — text + structured filters |
372
372
  | `/turing:template <action>` | Experiment template library — save/list/apply reusable configs |
373
373
  | `/turing:replay <exp-id>` | Experiment replay — re-run old approach with current infrastructure |
374
+ | `/turing:cite <action>` | Citation & attribution manager — track papers, audit missing citations, generate BibTeX |
375
+ | `/turing:present [--figures]` | Presentation figures — training curves, comparisons, ablation, Pareto, sensitivity |
376
+ | `/turing:changelog [--audience]` | Model changelog — version-grouped improvements for technical or stakeholder audiences |
374
377
 
375
378
  And for fully hands-off operation:
376
379
 
@@ -555,11 +558,11 @@ Each project gets independent config, data, experiments, models, and agent memor
555
558
 
556
559
  ## Architecture of Turing Itself
557
560
 
558
- 60 commands, 2 agents, 10 config files, 79 template scripts, model registry, artifact contract, cost-performance frontier, model cards, tree-search exploration, statistical rigor, experiment intelligence, performance profiling, smart checkpoints, production model export, literature integration, paper section drafting, experiment orchestration (queue + retry + fork), deep analysis (diff + watch + regress), model composition (ensemble + stitch + warm), scaling & efficiency (scale + budget + distill), meta-intelligence (transfer + audit), pre-training intelligence (sanity + baseline + leak), model debugging (xray + sensitivity + calibrate), feature & training intelligence (feature + curriculum), model surgery (prune + quantize + merge + surgery), experiment archaeology (trend + flashback + archive + annotate + search + template + replay), 16 ADRs. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full codemap.
561
+ 63 commands, 2 agents, 10 config files, 82 template scripts, model registry, artifact contract, cost-performance frontier, model cards, tree-search exploration, statistical rigor, experiment intelligence, performance profiling, smart checkpoints, production model export, literature integration, paper section drafting, experiment orchestration (queue + retry + fork), deep analysis (diff + watch + regress), model composition (ensemble + stitch + warm), scaling & efficiency (scale + budget + distill), meta-intelligence (transfer + audit), pre-training intelligence (sanity + baseline + leak), model debugging (xray + sensitivity + calibrate), feature & training intelligence (feature + curriculum), model surgery (prune + quantize + merge + surgery), experiment archaeology (trend + flashback + archive + annotate + search + template + replay), research communication (cite + present + changelog), 16 ADRs. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full codemap.
559
562
 
560
563
  ```
561
564
  turing/
562
- ├── commands/ 59 skill files (core + taste-leverage + reporting + exploration + statistical rigor + experiment intelligence + performance + deployment + research workflow + orchestration + deep analysis + model composition + scaling & efficiency + meta-intelligence + pre-training intelligence + model debugging + feature & training intelligence + model surgery + experiment archaeology)
565
+ ├── commands/ 62 skill files (core + taste-leverage + reporting + exploration + statistical rigor + experiment intelligence + performance + deployment + research workflow + orchestration + deep analysis + model composition + scaling & efficiency + meta-intelligence + pre-training intelligence + model debugging + feature & training intelligence + model surgery + experiment archaeology + research communication)
563
566
  ├── agents/ 2 agents (researcher: read/write, evaluator: read-only)
564
567
  ├── config/ 8 files (lifecycle, taxonomy, archetypes, novelty aliases)
565
568
  ├── templates/ Scaffolded into user projects by /turing:init
@@ -0,0 +1,22 @@
1
+ ---
2
+ name: changelog
3
+ description: Model changelog generation — auto-generate human-readable progress narrative from experiment history for stakeholders.
4
+ disable-model-invocation: true
5
+ argument-hint: "[--since exp-id|date] [--audience technical|stakeholder]"
6
+ allowed-tools: Read, Bash(*), Grep, Glob
7
+ ---
8
+
9
+ Translate experiment logs into a narrative that PMs and stakeholders can read in 2 minutes.
10
+
11
+ ## Steps
12
+ 1. **Activate environment:** `source .venv/bin/activate`
13
+ 2. **Run:** `python scripts/generate_changelog.py $ARGUMENTS`
14
+ 3. **Audience:** technical (experiment IDs, configs), stakeholder (plain English, percentages)
15
+ 4. **Saved output:** `paper/CHANGELOG.md`
16
+
17
+ ## Examples
18
+ ```
19
+ /turing:changelog # Full changelog
20
+ /turing:changelog --audience stakeholder # Non-technical summary
21
+ /turing:changelog --since exp-042 # Since specific experiment
22
+ ```
@@ -62,6 +62,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
62
62
  | "quantize", "quantization", "int8", "fp16", "reduce precision", "faster inference" | `/turing:quantize` | Optimize |
63
63
  | "merge", "model soup", "merge weights", "average models", "TIES", "DARE" | `/turing:merge` | Compose |
64
64
  | "surgery", "architecture", "add layer", "widen", "modify model", "swap activation" | `/turing:surgery` | Modify |
65
+ | "cite", "citation", "bibliography", "bibtex", "attribution", "references" | `/turing:cite` | Record |
66
+ | "present", "figures", "slides", "presentation", "charts", "plots" | `/turing:present` | Document |
67
+ | "changelog", "model changelog", "progress summary", "what improved" | `/turing:changelog` | Document |
65
68
  | "trend", "trends", "research direction", "improvement rate", "diminishing returns", "what's working" | `/turing:trend` | Analyze |
66
69
  | "flashback", "where was I", "context", "resume", "catch up", "what happened" | `/turing:flashback` | Recall |
67
70
  | "archive", "cleanup", "compress old", "disk space", "archive experiments" | `/turing:archive` | Manage |
@@ -134,6 +137,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
134
137
  | `/turing:search <query>` | Natural language experiment search with structured filters | (inline) |
135
138
  | `/turing:template <action>` | Experiment template library: save/list/apply reusable configs across projects | (inline) |
136
139
  | `/turing:replay <exp-id>` | Experiment replay: re-run old experiment with current infrastructure | (inline) |
140
+ | `/turing:cite <action>` | Citation manager: add/list/check/bib for papers, datasets, methods | (inline) |
141
+ | `/turing:present [--figures]` | Presentation figures: training curves, comparisons, ablation, Pareto, sensitivity | (inline) |
142
+ | `/turing:changelog [--audience]` | Model changelog: version-grouped improvements for technical or stakeholder audiences | (inline) |
137
143
 
138
144
  ## Proactive Detection
139
145
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-turing",
3
- "version": "3.5.0",
3
+ "version": "4.0.0",
4
4
  "type": "module",
5
5
  "description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
6
6
  "bin": {
package/src/install.js CHANGED
@@ -34,6 +34,7 @@ const SUB_COMMANDS = [
34
34
  "feature", "curriculum",
35
35
  "prune", "quantize", "merge", "surgery",
36
36
  "trend", "flashback", "archive", "annotate", "search", "template", "replay",
37
+ "cite", "present", "changelog",
37
38
  ];
38
39
 
39
40
  export async function install(opts = {}) {
package/src/verify.js CHANGED
@@ -74,6 +74,9 @@ const EXPECTED_COMMANDS = [
74
74
  "search/SKILL.md",
75
75
  "template/SKILL.md",
76
76
  "replay/SKILL.md",
77
+ "cite/SKILL.md",
78
+ "present/SKILL.md",
79
+ "changelog/SKILL.md",
77
80
  ];
78
81
 
79
82
  const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];
@@ -137,6 +137,9 @@ TEMPLATE_DIRS = {
137
137
  "experiment_search.py",
138
138
  "experiment_templates.py",
139
139
  "experiment_replay.py",
140
+ "citation_manager.py",
141
+ "generate_figures.py",
142
+ "generate_changelog.py",
140
143
  ],
141
144
  "tests": ["__init__.py", "conftest.py"],
142
145
  }
@@ -184,6 +187,8 @@ DIRECTORIES_TO_CREATE = [
184
187
  "experiments/archive",
185
188
  "experiments/searches",
186
189
  "experiments/replays",
190
+ "experiments/citations",
191
+ "paper/figures",
187
192
  "experiments/logs",
188
193
  "models/best",
189
194
  "models/archive",