npm - claude-turing - Versions diffs - 3.5.0 → 4.0.0 - Mend

claude-turing 3.5.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/.claude-plugin/plugin.json +2 -2
package/README.md +5 -2
package/commands/changelog.md +22 -0
package/commands/turing.md +6 -0
package/package.json +1 -1
package/src/install.js +1 -0
package/src/verify.js +3 -0
package/templates/scripts/__pycache__/citation_manager.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/generate_changelog.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/generate_figures.cpython-314.pyc +0 -0
package/templates/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
package/templates/scripts/scaffold.py +5 -0

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "turing",
-  "version": "3.5.0",
-  "description": "Autonomous ML research harness — the autoresearch loop as a formal protocol. 60 commands, 2 specialized agents, experiment archaeology (trend + flashback + archive + annotate + search + template + replay), model surgery (prune + quantize + merge + surgery), feature & training intelligence, model debugging, pre-training intelligence, meta-intelligence, scaling & efficiency, model composition, deep analysis, experiment orchestration, literature + paper, model export, profiling, checkpoints, experiment intelligence, statistical rigor, tree-search, cost-performance, model cards, hypothesis database, novelty guard, anti-cheating, taste-leverage loop. Inspired by Karpathy's autoresearch and the scientific method itself.",
+  "version": "4.0.0",
+  "description": "Autonomous ML research harness — the autoresearch loop as a formal protocol. 63 commands, 2 specialized agents, research communication (cite + present + changelog), experiment archaeology (trend + flashback + archive + annotate + search + template + replay), model surgery (prune + quantize + merge + surgery), feature & training intelligence, model debugging, pre-training intelligence, meta-intelligence, scaling & efficiency, model composition, deep analysis, experiment orchestration, literature + paper, model export, profiling, checkpoints, experiment intelligence, statistical rigor, tree-search, cost-performance, model cards, hypothesis database, novelty guard, anti-cheating, taste-leverage loop. Inspired by Karpathy's autoresearch and the scientific method itself.",
   "author": {
     "name": "pragnition"
   },

package/README.md CHANGED Viewed

@@ -371,6 +371,9 @@ The index (`hypotheses.yaml`) is the lightweight queue. The detail files (`hypot
 | `/turing:search <query>` | Natural language experiment search — text + structured filters |
 | `/turing:template <action>` | Experiment template library — save/list/apply reusable configs |
 | `/turing:replay <exp-id>` | Experiment replay — re-run old approach with current infrastructure |
+| `/turing:cite <action>` | Citation & attribution manager — track papers, audit missing citations, generate BibTeX |
+| `/turing:present [--figures]` | Presentation figures — training curves, comparisons, ablation, Pareto, sensitivity |
+| `/turing:changelog [--audience]` | Model changelog — version-grouped improvements for technical or stakeholder audiences |
 And for fully hands-off operation:
@@ -555,11 +558,11 @@ Each project gets independent config, data, experiments, models, and agent memor
 ## Architecture of Turing Itself
-60 commands, 2 agents, 10 config files, 79 template scripts, model registry, artifact contract, cost-performance frontier, model cards, tree-search exploration, statistical rigor, experiment intelligence, performance profiling, smart checkpoints, production model export, literature integration, paper section drafting, experiment orchestration (queue + retry + fork), deep analysis (diff + watch + regress), model composition (ensemble + stitch + warm), scaling & efficiency (scale + budget + distill), meta-intelligence (transfer + audit), pre-training intelligence (sanity + baseline + leak), model debugging (xray + sensitivity + calibrate), feature & training intelligence (feature + curriculum), model surgery (prune + quantize + merge + surgery), experiment archaeology (trend + flashback + archive + annotate + search + template + replay), 16 ADRs. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full codemap.
+63 commands, 2 agents, 10 config files, 82 template scripts, model registry, artifact contract, cost-performance frontier, model cards, tree-search exploration, statistical rigor, experiment intelligence, performance profiling, smart checkpoints, production model export, literature integration, paper section drafting, experiment orchestration (queue + retry + fork), deep analysis (diff + watch + regress), model composition (ensemble + stitch + warm), scaling & efficiency (scale + budget + distill), meta-intelligence (transfer + audit), pre-training intelligence (sanity + baseline + leak), model debugging (xray + sensitivity + calibrate), feature & training intelligence (feature + curriculum), model surgery (prune + quantize + merge + surgery), experiment archaeology (trend + flashback + archive + annotate + search + template + replay), research communication (cite + present + changelog), 16 ADRs. See [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) for the full codemap.
 ```
 turing/
-├── commands/              59 skill files (core + taste-leverage + reporting + exploration + statistical rigor + experiment intelligence + performance + deployment + research workflow + orchestration + deep analysis + model composition + scaling & efficiency + meta-intelligence + pre-training intelligence + model debugging + feature & training intelligence + model surgery + experiment archaeology)
+├── commands/              62 skill files (core + taste-leverage + reporting + exploration + statistical rigor + experiment intelligence + performance + deployment + research workflow + orchestration + deep analysis + model composition + scaling & efficiency + meta-intelligence + pre-training intelligence + model debugging + feature & training intelligence + model surgery + experiment archaeology + research communication)
 ├── agents/                2 agents (researcher: read/write, evaluator: read-only)
 ├── config/                8 files (lifecycle, taxonomy, archetypes, novelty aliases)
 ├── templates/             Scaffolded into user projects by /turing:init

package/commands/changelog.md ADDED Viewed

@@ -0,0 +1,22 @@
+---
+name: changelog
+description: Model changelog generation — auto-generate human-readable progress narrative from experiment history for stakeholders.
+disable-model-invocation: true
+argument-hint: "[--since exp-id|date] [--audience technical|stakeholder]"
+allowed-tools: Read, Bash(*), Grep, Glob
+---
+Translate experiment logs into a narrative that PMs and stakeholders can read in 2 minutes.
+## Steps
+1. **Activate environment:** `source .venv/bin/activate`
+2. **Run:** `python scripts/generate_changelog.py $ARGUMENTS`
+3. **Audience:** technical (experiment IDs, configs), stakeholder (plain English, percentages)
+4. **Saved output:** `paper/CHANGELOG.md`
+## Examples
+```
+/turing:changelog                                # Full changelog
+/turing:changelog --audience stakeholder         # Non-technical summary
+/turing:changelog --since exp-042                # Since specific experiment
+```

package/commands/turing.md CHANGED Viewed

@@ -62,6 +62,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
 | "quantize", "quantization", "int8", "fp16", "reduce precision", "faster inference" | `/turing:quantize` | Optimize |
 | "merge", "model soup", "merge weights", "average models", "TIES", "DARE" | `/turing:merge` | Compose |
 | "surgery", "architecture", "add layer", "widen", "modify model", "swap activation" | `/turing:surgery` | Modify |
+| "cite", "citation", "bibliography", "bibtex", "attribution", "references" | `/turing:cite` | Record |
+| "present", "figures", "slides", "presentation", "charts", "plots" | `/turing:present` | Document |
+| "changelog", "model changelog", "progress summary", "what improved" | `/turing:changelog` | Document |
 | "trend", "trends", "research direction", "improvement rate", "diminishing returns", "what's working" | `/turing:trend` | Analyze |
 | "flashback", "where was I", "context", "resume", "catch up", "what happened" | `/turing:flashback` | Recall |
 | "archive", "cleanup", "compress old", "disk space", "archive experiments" | `/turing:archive` | Manage |
@@ -134,6 +137,9 @@ You are the Turing ML research router. Detect the user's intent and route to the
 | `/turing:search <query>` | Natural language experiment search with structured filters | (inline) |
 | `/turing:template <action>` | Experiment template library: save/list/apply reusable configs across projects | (inline) |
 | `/turing:replay <exp-id>` | Experiment replay: re-run old experiment with current infrastructure | (inline) |
+| `/turing:cite <action>` | Citation manager: add/list/check/bib for papers, datasets, methods | (inline) |
+| `/turing:present [--figures]` | Presentation figures: training curves, comparisons, ablation, Pareto, sensitivity | (inline) |
+| `/turing:changelog [--audience]` | Model changelog: version-grouped improvements for technical or stakeholder audiences | (inline) |
 ## Proactive Detection

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "claude-turing",
-  "version": "3.5.0",
+  "version": "4.0.0",
   "type": "module",
   "description": "Autonomous ML research harness for Claude Code. The autoresearch loop as a formal protocol — iteratively trains, evaluates, and improves ML models with structured experiment tracking, convergence detection, immutable evaluation infrastructure, and safety guardrails.",
   "bin": {

package/src/install.js CHANGED Viewed

@@ -34,6 +34,7 @@ const SUB_COMMANDS = [
   "feature", "curriculum",
   "prune", "quantize", "merge", "surgery",
   "trend", "flashback", "archive", "annotate", "search", "template", "replay",
+  "cite", "present", "changelog",
 ];
 export async function install(opts = {}) {

package/src/verify.js CHANGED Viewed

@@ -74,6 +74,9 @@ const EXPECTED_COMMANDS = [
   "search/SKILL.md",
   "template/SKILL.md",
   "replay/SKILL.md",
+  "cite/SKILL.md",
+  "present/SKILL.md",
+  "changelog/SKILL.md",
 ];
 const EXPECTED_AGENTS = ["ml-researcher.md", "ml-evaluator.md"];

package/templates/scripts/__pycache__/citation_manager.cpython-314.pyc ADDED Viewed

Binary file

package/templates/scripts/__pycache__/generate_changelog.cpython-314.pyc ADDED Viewed

Binary file

package/templates/scripts/__pycache__/generate_figures.cpython-314.pyc ADDED Viewed

Binary file

package/templates/scripts/__pycache__/scaffold.cpython-314.pyc CHANGED Viewed

Binary file

package/templates/scripts/scaffold.py CHANGED Viewed

@@ -137,6 +137,9 @@ TEMPLATE_DIRS = {
         "experiment_search.py",
         "experiment_templates.py",
         "experiment_replay.py",
+        "citation_manager.py",
+        "generate_figures.py",
+        "generate_changelog.py",
     ],
     "tests": ["__init__.py", "conftest.py"],
 }
@@ -184,6 +187,8 @@ DIRECTORIES_TO_CREATE = [
     "experiments/archive",
     "experiments/searches",
     "experiments/replays",
+    "experiments/citations",
+    "paper/figures",
     "experiments/logs",
     "models/best",
     "models/archive",