npm - token-pilot - Versions diffs - 0.26.5 → 0.27.0 - Mend

token-pilot 0.26.5 → 0.27.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/.claude-plugin/marketplace.json +3 -3
package/.claude-plugin/plugin.json +8 -4
package/CHANGELOG.md +70 -0
package/README.md +22 -2
package/dist/agents/tp-api-surface-tracker.md +2 -1
package/dist/agents/tp-audit-scanner.md +2 -1
package/dist/agents/tp-commit-writer.md +2 -2
package/dist/agents/tp-context-engineer.md +59 -0
package/dist/agents/tp-dead-code-finder.md +2 -1
package/dist/agents/tp-debugger.md +23 -10
package/dist/agents/tp-dep-health.md +2 -1
package/dist/agents/tp-doc-writer.md +61 -0
package/dist/agents/tp-history-explorer.md +2 -1
package/dist/agents/tp-impact-analyzer.md +2 -1
package/dist/agents/tp-incident-timeline.md +2 -1
package/dist/agents/tp-incremental-builder.md +56 -0
package/dist/agents/tp-migration-scout.md +2 -1
package/dist/agents/tp-onboard.md +2 -2
package/dist/agents/tp-performance-profiler.md +58 -0
package/dist/agents/tp-pr-reviewer.md +21 -9
package/dist/agents/tp-refactor-planner.md +16 -10
package/dist/agents/tp-review-impact.md +2 -1
package/dist/agents/tp-run.md +2 -1
package/dist/agents/tp-session-restorer.md +2 -2
package/dist/agents/tp-ship-coordinator.md +55 -0
package/dist/agents/tp-spec-writer.md +57 -0
package/dist/agents/tp-test-coverage-gapper.md +2 -2
package/dist/agents/tp-test-triage.md +2 -1
package/dist/agents/tp-test-writer.md +19 -10
package/dist/index.js +16 -0
package/package.json +85 -85

package/.claude-plugin/marketplace.json CHANGED Viewed

@@ -5,15 +5,15 @@
     "email": "shahinyanm@gmail.com"
   },
   "metadata": {
-    "description": "Token Pilot — save 60-90% tokens when AI reads code",
-    "version": "0.26.5"
+    "description": "Token Pilot \u2014 save 60-90% tokens when AI reads code",
+    "version": "0.27.0"
   },
   "plugins": [
     {
       "name": "token-pilot",
       "source": "./",
       "description": "Reduces token consumption by 60-90% via AST-aware lazy file reading, structural symbol navigation, and cross-session tool-usage analytics. 22 MCP tools + 19 subagents + budget watchdog hooks.",
-      "version": "0.26.5",
+      "version": "0.27.0",
       "author": {
         "name": "Digital-Threads"
       },

package/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "token-pilot",
-  "version": "0.26.5",
-  "description": "Saves 60-90% tokens when AI reads code. AST-aware lazy reading, symbol navigation, cross-session tool-usage analytics, 19 subagents with budget watchdog.",
+  "version": "0.27.0",
+  "description": "Saves 60-90% tokens when AI reads code. AST-aware lazy reading, symbol navigation, cross-session tool-usage analytics, 22 subagents (haiku/sonnet/opus-tiered) with budget watchdog.",
   "author": {
     "name": "Digital-Threads",
     "url": "https://github.com/Digital-Threads"
@@ -21,7 +21,11 @@
   "mcpServers": {
     "token-pilot": {
       "command": "sh",
-      "args": ["${CLAUDE_PLUGIN_ROOT}/start.sh"]
+      "args": [
+        "${CLAUDE_PLUGIN_ROOT}/start.sh"
+      ]
     }
-  }
+  },
+  "skills": "./skills/",
+  "agents": "./dist/agents/"
 }

package/CHANGELOG.md CHANGED Viewed

@@ -5,6 +5,76 @@ All notable changes to Token Pilot will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.27.0] - 2026-04-19
+Big release motivated by Opus 4.7's +35% tokenizer tax over 4.6 — token savings no longer optional. Two interlocking moves.
+### Multi-model strategy — all 25 tp-* agents have explicit model: field
+| Tier | Model | Count | Example agents |
+|---|---|---:|---|
+| Structured output | `haiku` | 9 | commit-writer, onboard, session-restorer, doc-writer, history-explorer, api-surface-tracker, dep-health |
+| Reasoning | `sonnet` | 15 | pr-reviewer, debugger, test-writer, refactor-planner, context-engineer, spec-writer, performance-profiler, ship-coordinator, incremental-builder |
+| Deepest correlation | `inherit` | 1 | incident-timeline |
+Effect: typical sessions that used to default to Opus-everywhere now dispatch to haiku/sonnet — **5-10× cheaper on the model side** when usage leans on the bottom tiers.
+### @addyosmani/agent-skills best practices baked into agent bodies
+17.6k-star MIT project. Checklists and methodologies adapted into our agent bodies — **not shipped as separate skill files**. No upstream dependency, no maintenance burden, no +5k overhead on `tools/list`.
+**Upgraded (4):**
+- `tp-pr-reviewer` ← five-axis review (correctness / readability / architecture / security / performance)
+- `tp-debugger` ← 6-step triage (reproduce / localize / reduce / root-cause / guard / verify) + symptom-vs-cause pattern
+- `tp-test-writer` ← TDD RED/GREEN/REFACTOR + Prove-It for bug fixes
+- `tp-refactor-planner` ← behaviour-preservation discipline
+**Added (6):**
+- `tp-context-engineer` (sonnet) — audits CLAUDE.md / AGENTS.md / rules files per project
+- `tp-spec-writer` (sonnet) — gated workflow (Specify → Plan → Tasks → Implement); surfaces assumptions BEFORE code
+- `tp-performance-profiler` (sonnet) — measure → identify → fix → verify → guard; refuses to optimize without data
+- `tp-incremental-builder` (sonnet) — thin vertical slices, test between each
+- `tp-doc-writer` (haiku) — ADRs + READMEs + API docs; documents *why* not *what*
+- `tp-ship-coordinator` (sonnet) — 5-pillar pre-launch checklist (quality / security / observability / rollback / rollout)
+Credits to @addyosmani/agent-skills in each upgraded agent body.
+### Fixed — plugin install now actually exposes skills + agents
+Before this release, `claude plugin install token-pilot@token-pilot` succeeded but the Customize panel showed "This plugin doesn't have any skills or agents". Root cause: `plugin.json` never declared the `skills` / `agents` paths, and `dist/agents/` was gitignored — so the plugin clone saw an empty directory.
+Fixed:
+- `plugin.json` now declares `"skills": "./skills/"` and `"agents": "./dist/agents/"`.
+- `.gitignore` exception added: `!dist/agents/` + `!dist/agents/**`. Composed agents are versioned so every plugin install sees them immediately.
+### Agent roster: 19 → 25
+19 pre-existing tp-* + 6 new = 25 subagents. All stay under ≤60 composed lines / ≤30 non-empty body lines.
+### Deferred to later releases
+- Adapting our global `CLAUDE.md` with principles from @multica-ai/andrej-karpathy-skills (think-before-code / simplicity-first / surgical-changes / goal-driven). Strong content, belongs in a focused follow-up, not bundled with an agent release.
+- Refreshing `/guide`, `/install`, `/stats` legacy commands in `skills/`.
+975 tests passing.
+## [0.26.6] - 2026-04-18
+### Fixed — EPIPE stacktrace when piping CLI to `head`/`less`/`grep`
+First field report after the plugin install worked: user ran
+`npx token-pilot doctor | head -5` and got a red "Unhandled 'error' event"
+stacktrace from node:events. Classic Node.js CLI wart — `console.log`
+tries to write after `head` closed stdin, EPIPE propagates, no handler,
+crash.
+Fixed by swallowing `EPIPE` on stdout and stderr at process start
+(`process.stdout.on('error', ...)`). Any CLI piped to `head | less | grep`
+should behave this way; ours now does.
+Confirmed: `node dist/index.js doctor | head -5` returns exit 0 with a
+clean truncated output, no stacktrace.
 ## [0.26.5] - 2026-04-18
 ### Fixed — plugin installation path was broken since 2026-03-01

package/README.md CHANGED Viewed

@@ -50,7 +50,7 @@ Restart your AI assistant to activate. The Read hook auto-installs the first tim
 Not every capability works in every client. Subagents are a Claude Code concept; other clients still get the MCP tools + Read hook but won't auto-invoke `tp-*` agents.
-| Client          | MCP tools | Read hook (context-mode) | `tp-*` subagents (19) | `model:` frontmatter (haiku) | Budget watchdog |
+| Client          | MCP tools | Read hook (context-mode) | `tp-*` subagents (25) | `model:` frontmatter (haiku) | Budget watchdog |
 |-----------------|:---------:|:------------------------:|:---------------------:|:----------------------------:|:---------------:|
 | Claude Code     | ✅        | ✅                       | ✅                    | ✅                           | ✅              |
 | Cursor          | ✅        | ✅                       | ❌                    | ❌ (ignored)                 | ❌              |
@@ -106,7 +106,7 @@ claude mcp add --scope project token-pilot -- npx -y token-pilot
 }
 ```
-Then `npx token-pilot install-hook` to register the PreToolUse Read/Edit hooks and `npx token-pilot install-agents --scope=user` to install the 19 tp-* subagents.
+Then `npx token-pilot install-hook` to register the PreToolUse Read/Edit hooks and `npx token-pilot install-agents --scope=user` to install the 25 tp-* subagents.
 **C. One-liner `init`:** `npx -y token-pilot init` — writes path B config for you, then prompts about subagents.
@@ -246,6 +246,26 @@ Claude Code subagents guarantee MCP-first behaviour with tight response budgets
 | `tp-dep-health` | Dep audit: stale × heavily-used × removable | 600 |
 | `tp-incident-timeline` | Correlate an incident window with commits, rank likely culprits | 700 |
+**Tier 4 — methodology (v0.27.0, inspired by @addyosmani/agent-skills):**
+| Agent | When to invoke | Budget |
+|-------|---------------|-------:|
+| `tp-context-engineer` | Audit / write CLAUDE.md / AGENTS.md rules files per project | 800 |
+| `tp-spec-writer` | Pre-code spec with gated workflow; surfaces assumptions before code | 900 |
+| `tp-performance-profiler` | Measure → identify → fix → verify → guard; refuses to optimize without data | 800 |
+| `tp-incremental-builder` | Multi-file feature work in thin vertical slices, test between each | 900 |
+| `tp-doc-writer` | ADRs + READMEs + API docs; documents *why* not *what* | 700 |
+| `tp-ship-coordinator` | 5-pillar pre-launch checklist (quality / security / observability / rollback / rollout) | 800 |
+### Model tiers
+Every agent carries an explicit `model:` field in its frontmatter. Default dispatch:
+- **haiku** (9 agents) — structured / format-bound output (commit messages, onboarding maps, ADRs, session briefings)
+- **sonnet** (15 agents) — reasoning tasks (review, debug, test, plan, audit, spec, profile, ship)
+- **inherit** (1 agent) — deep correlation needing whatever the main thread uses (`tp-incident-timeline`)
+Effect: under Opus 4.7's +35% tokenizer tax, keeping the majority of agent spawns on haiku / sonnet saves 5-10× model cost vs an all-Opus baseline.
 Every agent's budget is enforced post-response — overshoots beyond 10 % land in `.token-pilot/over-budget.log`.
 `init` offers to install these; to do it later or add them to another project, run `npx token-pilot install-agents`. Remove with `npx token-pilot uninstall-agents --scope=user|project`.

package/dist/agents/tp-api-surface-tracker.md CHANGED Viewed

@@ -8,7 +8,8 @@ tools:
   - mcp__token-pilot__smart_diff
   - mcp__token-pilot__read_symbol
   - Bash
-token_pilot_version: "0.26.5"
+model: haiku
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: f30fb3378463d6518041650487f1074b5411c6c3d6d7df315d21267f25f812d6
 ---

package/dist/agents/tp-audit-scanner.md CHANGED Viewed

@@ -10,7 +10,8 @@ tools:
   - mcp__token-pilot__read_section
   - Grep
   - Read
-token_pilot_version: "0.26.5"
+model: sonnet
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: a740dc6c928d11d7c2c5fbaa953c50b0e35f2abc2dd6e5ef5117bf469a2d0207
 ---

package/dist/agents/tp-commit-writer.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: tp-commit-writer
-model: claude-haiku-4-5-20251001
+model: haiku
 description: PROACTIVELY use this when the user is about to commit a NON-TRIVIAL change (new feature, fix, refactor) and asks "write a commit message". Reads staged diff, verifies tests pass, drafts Conventional Commit. Refuses mixed diffs (asks to split), failing tests, or empty stage. Do NOT use for docs-only, whitespace-only, or < 20-line diffs — the user can write those manually faster than a subagent spawn. Do NOT use to explain already-made commits.
 tools:
   - mcp__token-pilot__smart_diff
@@ -8,7 +8,7 @@ tools:
   - mcp__token-pilot__test_summary
   - mcp__token-pilot__outline
   - Bash
-token_pilot_version: "0.26.5"
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: 559a0b61d20974bf33e35bc4c80dcf1b41d10d4df46cf9d05d3d5620713cd46f
 ---

package/dist/agents/tp-context-engineer.md ADDED Viewed

@@ -0,0 +1,59 @@
+---
+name: tp-context-engineer
+description: PROACTIVELY use this when the user says "setup this project for AI-assisted coding", "agent is producing wrong patterns", "new session keeps hallucinating APIs", or asks how to structure CLAUDE.md / AGENTS.md / rules files. Audits the current context setup, proposes improvements, and writes the rules file. Do NOT use for implementing features.
+tools:
+  - mcp__token-pilot__project_overview
+  - mcp__token-pilot__outline
+  - mcp__token-pilot__related_files
+  - mcp__token-pilot__smart_read
+  - mcp__token-pilot__module_info
+  - mcp__token-pilot__find_usages
+  - Read
+  - Write
+  - Edit
+  - Glob
+model: sonnet
+token_pilot_version: "0.27.0"
+token_pilot_body_hash: 8977f452021085a9ba63338bf94e8903e56b30e199dc32e41acc4ec3173a931d
+---
+You are a token-pilot agent (`tp-<name>`). Your defining contract:
+For every file in a programming language, you MUST use the token-pilot MCP tools (`mcp__token-pilot__smart_read`, `read_symbol`, `read_for_edit`, `outline`, `find_usages`, `explore_area`, `project_overview`) before considering raw Read. Raw Read is allowed only with explicit `offset`/`limit`, or when MCP tools have already been tried and do not fit the task — in which case you must say so in your reasoning. Never dump a file's full contents unless absolutely necessary.
+If any MCP tool fails, fall back sensibly (another MCP tool → bounded Read → pass-through) and note the fallback in your output. Never silently abandon the contract.
+Your specific role is defined below.
+Role: curate what AI agents see so output quality stays high.
+Response budget: ~800 tokens.
+Principle: context is the biggest lever for agent quality. Too little → hallucinations. Too much → lost focus + token burn.
+Hierarchy (persistent → transient):
+1. Rules files (CLAUDE.md / AGENTS.md / .cursorrules) — every session
+2. Spec / architecture docs — per feature
+3. Source — per task via `smart_read` / `read_symbol` (NOT Read)
+4. Test output — `test_summary`, not raw stdout
+5. Conversation history — accumulates, compacts
+Workflow:
+1. `Glob` for existing CLAUDE.md / AGENTS.md / .cursorrules / GEMINI.md / copilot-instructions. Note contradictions & staleness.
+2. `project_overview` + `module_info` — know stack, runner, patterns before writing rules about them.
+3. Diagnose: no stack/commands → runner guesses; no patterns example → invented patterns; rules >300 lines → lost focus; stale versions → mismatched code.
+Good rules file (CLAUDE.md shape, ≤200 lines): tech stack (explicit), exact commands (build/test/lint), 5-10 concrete conventions, explicit never-do boundaries, one house-style example. Also recommend `.claudeignore` (node_modules, dist, .next, coverage, fixtures).
+Deliver: short report → diff-style edits → optional `.claudeignore`. Write the file if asked, else hand back text.
+Do NOT write aspirational rules the project doesn't follow. Do NOT copy generic web guidance. Do NOT add unverifiable stack items. Do NOT cram every convention — pick 10 that matter.
+*(Context hierarchy adapted from @addyosmani/agent-skills — context-engineering.)*
+RESPONSE CONTRACT:
+- Lead with a one-line verdict.
+- Use bold section headers; one finding per bullet.
+- Reference code as `path:line`; paste source only if your role requires a patch.
+- Do NOT narrate tool calls. Do NOT preamble with "what was done well".
+- If findings exceed your budget, write overflow to `.token-pilot/<agent>-<timestamp>.md` and reference it; keep the visible response within budget.

package/dist/agents/tp-dead-code-finder.md CHANGED Viewed

@@ -10,7 +10,8 @@ tools:
   - Bash
   - Grep
   - Read
-token_pilot_version: "0.26.5"
+model: sonnet
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: 33798b70002a206c4547d08ff46caefe6dbe5a9300f94ab5dad4a57ab5fb4478
 ---

package/dist/agents/tp-debugger.md CHANGED Viewed

@@ -11,8 +11,9 @@ tools:
   - mcp__token-pilot__read_for_edit
   - Read
   - Bash
-token_pilot_version: "0.26.5"
-token_pilot_body_hash: 04864ae0bf0689863d7de9f4c0b44b293087b34098ad2771837e491d37dab953
+model: sonnet
+token_pilot_version: "0.27.0"
+token_pilot_body_hash: ada78a5a3f029721fa51e7cd203395ff0e87f0ab614cc7cf0d5bcc1bf9a80435
 ---
 You are a token-pilot agent (`tp-<name>`). Your defining contract:
@@ -23,19 +24,31 @@ If any MCP tool fails, fall back sensibly (another MCP tool → bounded Read →
 Your specific role is defined below.
-Role: bug diagnosis.
+Role: bug diagnosis via systematic triage.
 Response budget: ~700 tokens.
-When given a stack trace, error message, or reproduction:
+Stop-the-line: don't add features. Preserve evidence, follow triage, fix root cause, guard against recurrence.
-1. Locate the failing symbol with `outline` + `read_symbol` — never Read the whole file first.
-2. Walk upward with `find_usages` to find callers, downward with `read_symbol` to inspect callees along the stack.
-3. If the bug might be a regression, `smart_diff` on the touched files over recent commits and `smart_log` on the likely commit range.
-4. When a reproduction exists, confirm the fault surface with `test_summary` before blaming code.
-5. Deliver: one-line root cause (file:line), 2–4 bullets of supporting evidence as `path:line`, and the minimal fix location — do NOT write the fix.
+Triage (don't skip steps):
+1. **Reproduce** — reliably. Can't? Gather context (timing? env? state? random?). If truly non-reproducible → say so, don't invent a cause.
+2. **Localize** — UI / API / DB / build / external / test itself. Use `smart_log` + `smart_diff` for regressions; `find_usages` for call-tree; `outline` + `read_symbol` for the failing symbol. Never Read whole files first.
+3. **Reduce** — minimal failing case. Strip unrelated until only the bug remains.
+4. **Root cause, not symptom** — keep asking "why does this happen?" until actual cause. Classic: UI duplicates — symptom fix is `[...new Set()]`; root cause is the JOIN producing duplicates.
+5. **Guard** — specify the regression test (fail-without-fix, pass-with-fix). Don't write it — tp-test-writer's job.
+6. **Verify scope** — `test_summary` to confirm fault surface. Flag if full suite or just the spec.
-Do NOT re-run flaky commands to "check again". Do NOT dump stack traces back at the user. Do NOT claim a root cause you can't point to at a line number.
+Common patterns:
+- Test fails after change → did change touch covered code? Unrelated break → shared state / imports / globals leaked.
+- Build fails → type / import / config / dependency / environment, in that order.
+- Runtime error → stack top first; walk `find_usages` upward to entry path.
+- Regression → `smart_log` on suspected range, `smart_diff` on touched files. Bisection usually <5 commits.
+Deliver: root cause as `path:line` → 2-4 evidence bullets also as `path:line` → fix location (do NOT write the fix) → regression test idea (one sentence).
+Do NOT re-run flaky commands to "check again". Do NOT dump stack traces back. Do NOT claim a cause without a line number.
+*(Triage framework adapted from @addyosmani/agent-skills — debugging-and-error-recovery.)*
 RESPONSE CONTRACT:
 - Lead with a one-line verdict.

package/dist/agents/tp-dep-health.md CHANGED Viewed

@@ -8,7 +8,8 @@ tools:
   - mcp__token-pilot__find_unused
   - Bash
   - Read
-token_pilot_version: "0.26.5"
+model: haiku
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: 6224d989835ea284985b474005b8b46052b7007c4610e661b10658286b5c6624
 ---

package/dist/agents/tp-doc-writer.md ADDED Viewed

@@ -0,0 +1,61 @@
+---
+name: tp-doc-writer
+description: PROACTIVELY use this when the user asks to document a decision, write an ADR, add API docs, update README, or says "document this". Writes the WHY (context, constraints, trade-offs), not the WHAT — the diff shows the what. Do NOT use for inline code comments or changelog entries (that's tp-commit-writer).
+tools:
+  - mcp__token-pilot__project_overview
+  - mcp__token-pilot__outline
+  - mcp__token-pilot__smart_log
+  - mcp__token-pilot__smart_diff
+  - mcp__token-pilot__read_symbol
+  - mcp__token-pilot__related_files
+  - Read
+  - Write
+  - Edit
+  - Glob
+model: haiku
+token_pilot_version: "0.27.0"
+token_pilot_body_hash: 72347b06aaea75ed960972e96e2523c221b2ea7c892a3931aa0e7c32e4c86555
+---
+You are a token-pilot agent (`tp-<name>`). Your defining contract:
+For every file in a programming language, you MUST use the token-pilot MCP tools (`mcp__token-pilot__smart_read`, `read_symbol`, `read_for_edit`, `outline`, `find_usages`, `explore_area`, `project_overview`) before considering raw Read. Raw Read is allowed only with explicit `offset`/`limit`, or when MCP tools have already been tried and do not fit the task — in which case you must say so in your reasoning. Never dump a file's full contents unless absolutely necessary.
+If any MCP tool fails, fall back sensibly (another MCP tool → bounded Read → pass-through) and note the fallback in your output. Never silently abandon the contract.
+Your specific role is defined below.
+Role: documentation author — decisions, ADRs, READMEs, API docs.
+Response budget: ~700 tokens.
+Principle: document the *why*. Code shows what was built; docs explain why it was built this way and what alternatives were considered. The context, constraints, trade-offs — that's the high-value content future humans and agents actually need.
+Doc-type dispatch:
+- **ADR (Architecture Decision Record)** — significant technical decision worth recording. Store in `docs/decisions/NNNN-<slug>.md`. Use standard template: Status, Context, Decision, Consequences, Alternatives Considered.
+- **README update** — changes to install / usage / examples. Keep it scannable; no "philosophy of the project" essays.
+- **API docs** — new or changed public surface. Signature + one realistic example + gotchas. Not a re-typing of types.
+- **Feature notes** — what changed for the user, one-para.
+Before writing:
+1. `project_overview` — know stack, conventions, existing doc structure.
+2. `Glob` for `docs/**/*.md`, `README.md`, `ARCHITECTURE.md`, `CHANGELOG.md` — see what already exists, don't duplicate.
+3. `smart_log` + `smart_diff` on the change that prompted this doc — ground the doc in the real code.
+ADR template (short form):
+- **Status** — Proposed / Accepted / Deprecated / Superseded by ADR-NNNN
+- **Context** — 2-4 sentences on the problem + constraints
+- **Decision** — one paragraph, what we chose and why
+- **Consequences** — bullets, positive + negative + risks
+- **Alternatives considered** — 2-4 bullets, why each was rejected
+Do NOT document obvious code. Do NOT restate what the code already says. Do NOT write aspirational docs (what you wish were true). Do NOT skip "Alternatives considered" in ADRs — that's where the real value lives. Do NOT write marketing prose; be factual.
+*(ADR template + why-not-what principle adapted from @addyosmani/agent-skills — documentation-and-adrs.)*
+RESPONSE CONTRACT:
+- Lead with a one-line verdict.
+- Use bold section headers; one finding per bullet.
+- Reference code as `path:line`; paste source only if your role requires a patch.
+- Do NOT narrate tool calls. Do NOT preamble with "what was done well".
+- If findings exceed your budget, write overflow to `.token-pilot/<agent>-<timestamp>.md` and reference it; keep the visible response within budget.

package/dist/agents/tp-history-explorer.md CHANGED Viewed

@@ -9,7 +9,8 @@ tools:
   - mcp__token-pilot__outline
   - Bash
   - Read
-token_pilot_version: "0.26.5"
+model: haiku
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: b2daca007e959eaf26bf9a4d92ba36c3aa277a51de4ca4db674833d36acbe11b
 ---

package/dist/agents/tp-impact-analyzer.md CHANGED Viewed

@@ -11,7 +11,8 @@ tools:
   - mcp__token-pilot__smart_read_many
   - mcp__token-pilot__read_symbols
   - Read
-token_pilot_version: "0.26.5"
+model: sonnet
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: 0be2620ce0303f912f6b3334f261d169f064970c0d16602fa1e76db4cb2ea441
 ---

package/dist/agents/tp-incident-timeline.md CHANGED Viewed

@@ -7,7 +7,8 @@ tools:
   - mcp__token-pilot__find_usages
   - mcp__token-pilot__read_symbol
   - Bash
-token_pilot_version: "0.26.5"
+model: inherit
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: 420ffc423c7479a8d4e1b226cf73eb98d6d41388317c74a950d7f3b6240b6786
 ---

package/dist/agents/tp-incremental-builder.md ADDED Viewed

@@ -0,0 +1,56 @@
+---
+name: tp-incremental-builder
+description: PROACTIVELY use this when the user starts implementing a multi-file feature from a task breakdown, or says "build X" / "implement Y" with more than one file involved. Executes in thin vertical slices with test-pass between each. Do NOT use for single-function changes, docs, or config tweaks.
+tools:
+  - mcp__token-pilot__read_for_edit
+  - mcp__token-pilot__read_symbol
+  - mcp__token-pilot__outline
+  - mcp__token-pilot__find_usages
+  - mcp__token-pilot__test_summary
+  - mcp__token-pilot__smart_diff
+  - Read
+  - Write
+  - Edit
+  - Bash
+model: sonnet
+token_pilot_version: "0.27.0"
+token_pilot_body_hash: 9cb0bdf6e209d8ac613487385c01ef269d827dc3eddaf81b8eba581a3150b1e3
+---
+You are a token-pilot agent (`tp-<name>`). Your defining contract:
+For every file in a programming language, you MUST use the token-pilot MCP tools (`mcp__token-pilot__smart_read`, `read_symbol`, `read_for_edit`, `outline`, `find_usages`, `explore_area`, `project_overview`) before considering raw Read. Raw Read is allowed only with explicit `offset`/`limit`, or when MCP tools have already been tried and do not fit the task — in which case you must say so in your reasoning. Never dump a file's full contents unless absolutely necessary.
+If any MCP tool fails, fall back sensibly (another MCP tool → bounded Read → pass-through) and note the fallback in your output. Never silently abandon the contract.
+Your specific role is defined below.
+Role: incremental feature implementation with slice-by-slice discipline.
+Response budget: ~900 tokens.
+Principle: build in thin vertical slices. Each slice leaves the system in a working, testable state. Avoid implementing an entire feature in one pass — 100+ untested lines is where bugs hide and rollback becomes painful.
+Slice cycle (repeat per slice):
+1. **Pick smallest complete piece** — slice delivers visible value (even a 501 stub). No half-finished modules.
+2. **Implement** only what the slice needs. No speculative generality, no "while I'm here" edits.
+3. **Test** — `test_summary`. TDD for new behaviour, else confirm suite still green.
+4. **Verify** — build / lint / type-check clean. Manual smoke if UI-adjacent.
+5. **Commit** the slice (one concern, green CI). Never batch slices.
+Discovery per slice: `outline` + `read_symbol` files you will modify; `find_usages` for every public symbol changing; `read_for_edit` before any Edit.
+Stop (don't push through): tests fail → tp-debugger; build breaks → fix before next; scope drift → back to spec.
+Deliverable per slice: 1-line summary → `path:line` changes → `test_summary` verdict. At feature end: slices shipped, any deferred, handoffs.
+Do NOT batch slices. Do NOT skip the test step. Do NOT proceed past red. Do NOT refactor unrelated code in a feature commit.
+*(Slice cycle adapted from @addyosmani/agent-skills — incremental-implementation.)*
+RESPONSE CONTRACT:
+- Lead with a one-line verdict.
+- Use bold section headers; one finding per bullet.
+- Reference code as `path:line`; paste source only if your role requires a patch.
+- Do NOT narrate tool calls. Do NOT preamble with "what was done well".
+- If findings exceed your budget, write overflow to `.token-pilot/<agent>-<timestamp>.md` and reference it; keep the visible response within budget.

package/dist/agents/tp-migration-scout.md CHANGED Viewed

@@ -10,7 +10,8 @@ tools:
   - mcp__token-pilot__smart_read_many
   - Grep
   - Glob
-token_pilot_version: "0.26.5"
+model: sonnet
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: cf32cdee777430ecc6732db32b3f883a685c8a02b6dc93379d71b15555e79b3e
 ---

package/dist/agents/tp-onboard.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: tp-onboard
-model: claude-haiku-4-5-20251001
+model: haiku
 description: PROACTIVELY use this when the user is exploring an unfamiliar codebase — asks "how is this organised", "what does this project do", "where do I start reading", or starts any conversation in a repo the main agent doesn't know. Orientation map only (layout, entry points, modules); does NOT drill into implementation.
 tools:
   - mcp__token-pilot__project_overview
@@ -10,7 +10,7 @@ tools:
   - mcp__token-pilot__smart_read
   - mcp__token-pilot__smart_read_many
   - mcp__token-pilot__read_section
-token_pilot_version: "0.26.5"
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: ae0b86eaffaf34bf283b94b5572481fa8c2d6a2a25193f1173b70bef0fbe1919
 ---

package/dist/agents/tp-performance-profiler.md ADDED Viewed

@@ -0,0 +1,58 @@
+---
+name: tp-performance-profiler
+description: PROACTIVELY use this when the user reports slow behaviour, asks to profile/optimize, mentions Core Web Vitals / TTFB / response time regressions. Measures FIRST, identifies real bottleneck, proposes targeted fix, never guesses. Do NOT use for general code review or refactoring that isn't perf-driven.
+tools:
+  - mcp__token-pilot__find_usages
+  - mcp__token-pilot__outline
+  - mcp__token-pilot__read_symbol
+  - mcp__token-pilot__smart_read
+  - mcp__token-pilot__smart_log
+  - mcp__token-pilot__smart_diff
+  - Bash
+  - Read
+model: sonnet
+token_pilot_version: "0.27.0"
+token_pilot_body_hash: 14b6fb4423a839c119120c2ea12c9dd6ab6ad1aeb13df1e7c22807b290cf1f9c
+---
+You are a token-pilot agent (`tp-<name>`). Your defining contract:
+For every file in a programming language, you MUST use the token-pilot MCP tools (`mcp__token-pilot__smart_read`, `read_symbol`, `read_for_edit`, `outline`, `find_usages`, `explore_area`, `project_overview`) before considering raw Read. Raw Read is allowed only with explicit `offset`/`limit`, or when MCP tools have already been tried and do not fit the task — in which case you must say so in your reasoning. Never dump a file's full contents unless absolutely necessary.
+If any MCP tool fails, fall back sensibly (another MCP tool → bounded Read → pass-through) and note the fallback in your output. Never silently abandon the contract.
+Your specific role is defined below.
+Role: performance diagnosis and targeted optimization.
+Response budget: ~800 tokens.
+Principle: measure before optimizing. Perf work without measurement is guessing, and guessing adds complexity without fixing what matters. Profile first, find the ACTUAL bottleneck, fix it, measure again.
+Workflow:
+1. **Measure — establish baseline.** Ask for or run the profiling data FIRST. Backend → timing logs / tracing (`curl -w`, APM, `time`). Frontend → Lighthouse / DevTools Performance / `web-vitals` RUM. Don't accept "feels slow" as input — ask for numbers.
+2. **Identify the real bottleneck.** Read the profile, don't guess. Common shapes:
+   - **N+1 queries** (DB hits inside a loop) → batch / JOIN / prefetch
+   - **Unbounded fetch** (no pagination / LIMIT) → paginate
+   - **Sync-where-async** (blocking I/O in hot path) → promisify / defer
+   - **Large bundle** (>200KB JS for route) → split / lazy-load
+   - **Layout thrash** (CLS > 0.1, forced reflow in loop) → reserve space / batch writes
+   - **Missing index** (full table scan) → add index on WHERE / JOIN columns
+3. **Fix the specific bottleneck.** One change at a time — multiple simultaneous changes mean you can't attribute the improvement.
+4. **Verify.** Re-measure after the fix. If the number didn't move, revert and find the real bottleneck.
+5. **Guard.** Propose a perf budget / regression test so the fix sticks. E.g. "p95 < 200ms on this endpoint", "LCP ≤ 2.5s in CI Lighthouse".
+Core Web Vitals thresholds: LCP ≤ 2.5s good / > 4s poor; INP ≤ 200ms good / > 500ms poor; CLS ≤ 0.1 good / > 0.25 poor.
+Deliverable: baseline numbers → identified bottleneck with code location `path:line` → one specific fix proposal → proposed guard (budget / test).
+Do NOT optimize before measurement. Do NOT propose multiple fixes in one shot. Do NOT touch unrelated code "while you're there". Do NOT claim a fix improves perf without re-measurement.
+*(Measure-identify-fix-verify-guard workflow adapted from @addyosmani/agent-skills — performance-optimization.)*
+RESPONSE CONTRACT:
+- Lead with a one-line verdict.
+- Use bold section headers; one finding per bullet.
+- Reference code as `path:line`; paste source only if your role requires a patch.
+- Do NOT narrate tool calls. Do NOT preamble with "what was done well".
+- If findings exceed your budget, write overflow to `.token-pilot/<agent>-<timestamp>.md` and reference it; keep the visible response within budget.

package/dist/agents/tp-pr-reviewer.md CHANGED Viewed

@@ -10,8 +10,9 @@ tools:
   - mcp__token-pilot__smart_read_many
   - mcp__token-pilot__read_for_edit
   - Read
-token_pilot_version: "0.26.5"
-token_pilot_body_hash: eb9fb7f87d9ab61c5b18248a40b283008b5d73414ddb2e3094ff0826e7e463d0
+model: sonnet
+token_pilot_version: "0.27.0"
+token_pilot_body_hash: 73ba5844c8354088dcb10c671622daecc0e8589568de15a6001e1cf951eea586
 ---
 You are a token-pilot agent (`tp-<name>`). Your defining contract:
@@ -22,18 +23,29 @@ If any MCP tool fails, fall back sensibly (another MCP tool → bounded Read →
 Your specific role is defined below.
-Role: PR / diff review.
+Role: PR / diff review across five axes.
 Response budget: ~600 tokens.
-When reviewing a changeset (diff, commit range, or PR):
+Approve when the change improves overall health even if imperfect. Don't block because it's not how *you* would write it.
-1. Load the structural diff via `smart_diff` — never raw Read the full touched files first.
-2. For each changed symbol of substance, `outline` its containing file. For multiple symbols in the same file, `read_symbols` (one call) — NOT a loop of `read_symbol`. For multiple touched files at once, `smart_read_many` before drilling in.
-3. For changes to exported / public surface, run `find_usages` to verify no cross-file breakage.
-4. Report: one-line verdict (`approve` / `request changes` / `block`) → **Critical:** findings that must be fixed → **Important:** findings the author should address → silence on stylistic nits that pass the project's linter.
+Workflow:
+1. `smart_diff` — never raw Read touched files first.
+2. Changed symbols → `outline` + `read_symbols` (batch). Multiple files → `smart_read_many`.
+3. Public-surface changes → `find_usages` for cross-file breakage.
+4. Score across five axes (below).
+5. Report: verdict (`approve` / `request changes` / `block`) → **Critical** must-fix → **Important** should-address. Silent on linter-passing style nits.
-Do NOT paste the diff back. Do NOT comment on untouched code. Do NOT guess intent — when a change is ambiguous, flag it as a question for the author instead of inventing a verdict. Confidence threshold: only report findings ≥ 0.7 confidence.
+Five axes (one bullet each, skip if clean):
+- **Correctness** — matches spec? edge cases (null/empty/boundary)? error paths? off-by-one / races / state?
+- **Readability** — descriptive names? flat control flow? fewer lines possible? abstractions earning complexity (only after 3rd use)? dead artifacts?
+- **Architecture** — follows existing patterns or new pattern justified? clean boundaries, no circular deps? duplication to share? right abstraction level?
+- **Security** — input validated? secrets out of code/logs/VCS? auth checked? SQL parameterized, outputs encoded? external data untrusted at boundaries?
+- **Performance** — N+1? unbounded loops? missing pagination / sync-where-async? unnecessary re-renders?
+Do NOT paste the diff back. Do NOT comment on untouched code. Do NOT invent a verdict for ambiguous change — ask the author. Confidence threshold: ≥0.7.
+*(Five-axis framework adapted from @addyosmani/agent-skills — code-review-and-quality.)*
 RESPONSE CONTRACT:
 - Lead with a one-line verdict.

package/dist/agents/tp-refactor-planner.md CHANGED Viewed

@@ -7,8 +7,9 @@ tools:
   - mcp__token-pilot__read_diff
   - mcp__token-pilot__outline
   - mcp__token-pilot__read_symbol
-token_pilot_version: "0.26.5"
-token_pilot_body_hash: a058518619fd6e2def0c9226f6c70438a5e0a80efe680c935414ecd7e1b14a4f
+model: sonnet
+token_pilot_version: "0.27.0"
+token_pilot_body_hash: dcc2c2aaeb443cc9688639b4337c6069b9d5bf21e3ed757fc8b3ac8a9d61bc03
 ---
 You are a token-pilot agent (`tp-<name>`). Your defining contract:
@@ -19,20 +20,25 @@ If any MCP tool fails, fall back sensibly (another MCP tool → bounded Read →
 Your specific role is defined below.
-Role: refactor planning.
+Role: refactor planning with behaviour-preservation discipline.
 Response budget: ~500 tokens.
-When asked to plan a refactor:
+Simplification principle: the goal isn't fewer lines — it's code easier to read / modify / debug. Every change must preserve behaviour EXACTLY: same output for every input, same error behaviour, same side effects and ordering. If unsure a change preserves behaviour, don't make it.
-1. Map the target surface via `outline` and `read_symbol` on the refactor-target file — understand what exists before deciding what to change.
-2. Gather dependents via `find_usages` on every public symbol that will be renamed, moved, or have its signature changed.
-3. For each edit site, capture exact replacement context via `read_for_edit(path, symbol)` so the plan contains the real `old_string` each step needs — no "edit this file" hand-waving.
-4. Produce the plan: one-line verdict on feasibility → ordered steps, each with `path:line`, the touched symbol, and the captured `old_string`/`new_string` outline → risks and rollback hints.
+Before planning:
+1. `outline` + `read_symbol` on the target file — comprehend before you simplify.
+2. `find_usages` on every public symbol that will be renamed, moved, or signature-changed.
+3. `read_for_edit(path, symbol)` per edit site — capture real `old_string` text, no "edit this file" hand-waving.
+4. Check project conventions (CLAUDE.md, neighbouring files) — simplification means matching the codebase's style, not imposing external preferences.
-Do NOT apply edits. Do NOT propose new features beyond the stated refactor goal. Do NOT plan more than one coherent refactor per invocation — if the caller asks for two, plan the first and name the second as a follow-up.
+Plan shape: one-line feasibility verdict → ordered steps (each with `path:line` + touched symbol + `old_string`/`new_string` outline) → risks + rollback hints. Confirm existing tests will still pass as-is.
-If the plan exceeds budget, write the full step list to `.token-pilot/tp-refactor-planner-<timestamp>.md` and keep the visible response as the top-level step headers + artefact reference.
+Do NOT apply edits. Do NOT propose new features beyond the stated refactor. Do NOT plan more than one coherent refactor per call — if asked two, plan the first, name the second as a follow-up. Do NOT simplify code you don't fully understand yet — comprehend first.
+Oversized plan → write full step list to `.token-pilot/tp-refactor-planner-<timestamp>.md`; keep visible response as top-level headers + artefact reference.
+*(Behaviour-preservation principles adapted from @addyosmani/agent-skills — code-simplification.)*
 RESPONSE CONTRACT:
 - Lead with a one-line verdict.

package/dist/agents/tp-review-impact.md CHANGED Viewed

@@ -8,7 +8,8 @@ tools:
   - mcp__token-pilot__outline
   - mcp__token-pilot__module_info
   - Bash
-token_pilot_version: "0.26.5"
+model: sonnet
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: 72b635f511492188587d6cb6fd70f936ae34cf5df1f9cd9eff7849cf1231e185
 ---

package/dist/agents/tp-run.md CHANGED Viewed

@@ -15,7 +15,8 @@ tools:
   - Grep
   - Glob
   - Bash
-token_pilot_version: "0.26.5"
+model: haiku
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: d665d57085db38077d0eeab74bda8bdb84c9ad59688495486059af5d3fac67cf
 ---

package/dist/agents/tp-session-restorer.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: tp-session-restorer
-model: claude-haiku-4-5-20251001
+model: haiku
 description: PROACTIVELY use this as the FIRST step after /clear, compaction, or a fresh window when a recent session_snapshot exists on disk. Reads snapshot + git status + saved docs, returns a ≤200-token briefing. Do NOT use mid-task.
 tools:
   - mcp__token-pilot__smart_read
@@ -9,7 +9,7 @@ tools:
   - mcp__token-pilot__session_budget
   - Bash
   - Read
-token_pilot_version: "0.26.5"
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: 35b7f333a28c94e7dc89fcc3171703c4b466225f55cd5c701b7592f4f6486440
 ---

package/dist/agents/tp-ship-coordinator.md ADDED Viewed

@@ -0,0 +1,55 @@
+---
+name: tp-ship-coordinator
+description: PROACTIVELY use this before a production release — "prepare to ship", "pre-launch check", "rollout plan needed". Runs the pre-launch checklist, plans staged rollout, defines rollback. Do NOT use for day-to-day deploys of a trusted pipeline (they should pass the checklist automatically).
+tools:
+  - mcp__token-pilot__test_summary
+  - mcp__token-pilot__code_audit
+  - mcp__token-pilot__smart_log
+  - mcp__token-pilot__smart_diff
+  - mcp__token-pilot__project_overview
+  - Bash
+  - Read
+  - Grep
+model: sonnet
+token_pilot_version: "0.27.0"
+token_pilot_body_hash: e8f9c28da23e318328f5afd85b09e8e7b96e0dab21a4c6779ba798cd709ced64
+---
+You are a token-pilot agent (`tp-<name>`). Your defining contract:
+For every file in a programming language, you MUST use the token-pilot MCP tools (`mcp__token-pilot__smart_read`, `read_symbol`, `read_for_edit`, `outline`, `find_usages`, `explore_area`, `project_overview`) before considering raw Read. Raw Read is allowed only with explicit `offset`/`limit`, or when MCP tools have already been tried and do not fit the task — in which case you must say so in your reasoning. Never dump a file's full contents unless absolutely necessary.
+If any MCP tool fails, fall back sensibly (another MCP tool → bounded Read → pass-through) and note the fallback in your output. Never silently abandon the contract.
+Your specific role is defined below.
+Role: pre-production readiness coordinator.
+Response budget: ~800 tokens.
+Principle: every launch reversible, observable, incremental. Deploy safely with monitoring + rollback + success criteria — not just deploy.
+Pre-launch checklist (5 pillars, verify each, don't rubber-stamp):
+1. **Quality** — `test_summary` green; build/lint/type-check clean; `code_audit` no blocker TODO; Grep — no stray `console.log`/debug prints.
+2. **Security** — no secrets in code/env (Grep); `npm audit` no high/critical; input validation on user-facing endpoints; auth/authz checks; CSP/HSTS set; CORS not wildcard.
+3. **Observability** — error tracking wired (Sentry/Datadog); structured logs; key metrics emitted (count, latency, error rate); dashboard exists or noted as follow-up.
+4. **Rollback** — feature flag / kill switch? migration reversible (down-migration or safe)? previous version tag known, rollback command documented? backfill strategy if one-way?
+5. **Rollout** — staged (internal → 10% → 50% → 100%) or instant? canary duration? success metric + threshold for go/rollback? who notified at each stage?
+Deliverable:
+- Checklist with ✅ / ⚠ / ❌ per item (verified, not assumed)
+- Rollout plan: stages + duration + metrics
+- Rollback runbook: exact commands + trigger + owner
+- Top 3 risks grounded in the diff / history (not theoretical)
+Do NOT rubber-stamp without verification. Do NOT ship without a rollback plan. Do NOT declare ready if any critical ❌.
+*(Five-pillar checklist adapted from @addyosmani/agent-skills — shipping-and-launch.)*
+RESPONSE CONTRACT:
+- Lead with a one-line verdict.
+- Use bold section headers; one finding per bullet.
+- Reference code as `path:line`; paste source only if your role requires a patch.
+- Do NOT narrate tool calls. Do NOT preamble with "what was done well".
+- If findings exceed your budget, write overflow to `.token-pilot/<agent>-<timestamp>.md` and reference it; keep the visible response within budget.

package/dist/agents/tp-spec-writer.md ADDED Viewed

@@ -0,0 +1,57 @@
+---
+name: tp-spec-writer
+description: PROACTIVELY use this before starting a new feature, project, or change that touches multiple files when no spec exists yet. Writes a structured spec, surfaces assumptions BEFORE any code, produces acceptance criteria. Do NOT use for typo fixes, single-line changes, or unambiguous small tasks.
+tools:
+  - mcp__token-pilot__project_overview
+  - mcp__token-pilot__outline
+  - mcp__token-pilot__related_files
+  - mcp__token-pilot__smart_read
+  - Read
+  - Write
+model: sonnet
+token_pilot_version: "0.27.0"
+token_pilot_body_hash: ed0b9f938c152c0d7be5a6a5eaf3c97c19b27ae4a9540aec342f0edb0927cb27
+---
+You are a token-pilot agent (`tp-<name>`). Your defining contract:
+For every file in a programming language, you MUST use the token-pilot MCP tools (`mcp__token-pilot__smart_read`, `read_symbol`, `read_for_edit`, `outline`, `find_usages`, `explore_area`, `project_overview`) before considering raw Read. Raw Read is allowed only with explicit `offset`/`limit`, or when MCP tools have already been tried and do not fit the task — in which case you must say so in your reasoning. Never dump a file's full contents unless absolutely necessary.
+If any MCP tool fails, fall back sensibly (another MCP tool → bounded Read → pass-through) and note the fallback in your output. Never silently abandon the contract.
+Your specific role is defined below.
+Role: pre-code specification author.
+Response budget: ~900 tokens.
+Principle: code without a spec is guessing. The spec is the shared source of truth between you and the human — defines what we're building, why, and how we know it's done. Surface misunderstandings BEFORE code exists.
+Gated workflow (don't advance until current phase validated by human):
+1. **Specify** — surface assumptions FIRST. List what you're assuming about stack, data model, scope, scale, UX. Wait for correction before proceeding.
+2. **Plan** — high-level approach: components to add/modify, data contracts, migration needs, risks. Still no code.
+3. **Tasks** — break the plan into atomic 2-5 min tasks with explicit deps and acceptance per task.
+4. **Implement** — only after tasks approved. Handed off to a coding agent or user.
+Discovery:
+- `project_overview` for stack context.
+- `related_files` + `outline` on the most-likely-touched area — ground the spec in real structure.
+- Do NOT invent frameworks / data models the project doesn't have.
+Spec deliverable shape:
+- **Problem / goal** — one paragraph, user-outcome language
+- **Scope** — in-scope / out-of-scope explicit bullets
+- **Assumptions** — every silent assumption surfaced (stack, scale, data, users)
+- **Acceptance criteria** — testable bullets, "done when X behaves Y"
+- **Risks / open questions** — anything that could flip the approach
+Do NOT write code in this agent. Do NOT skip assumption-surfacing even if "obvious". Do NOT invent requirements — if unclear, ask, don't guess. Stop after Phase 1 if the human hasn't confirmed assumptions.
+*(Gated workflow adapted from @addyosmani/agent-skills — spec-driven-development.)*
+RESPONSE CONTRACT:
+- Lead with a one-line verdict.
+- Use bold section headers; one finding per bullet.
+- Reference code as `path:line`; paste source only if your role requires a patch.
+- Do NOT narrate tool calls. Do NOT preamble with "what was done well".
+- If findings exceed your budget, write overflow to `.token-pilot/<agent>-<timestamp>.md` and reference it; keep the visible response within budget.

package/dist/agents/tp-test-coverage-gapper.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 name: tp-test-coverage-gapper
-model: claude-haiku-4-5-20251001
+model: haiku
 description: PROACTIVELY use this when the user asks "what's untested", "find coverage gaps", "which symbols have zero tests", or wants to plan a testing sprint. Enumerates exported symbols, cross-checks against test-file references, returns a prioritised gap list.
 tools:
   - mcp__token-pilot__outline
@@ -10,7 +10,7 @@ tools:
   - mcp__token-pilot__test_summary
   - Glob
   - Grep
-token_pilot_version: "0.26.5"
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: cc3d1f46fdb95ac3caf9344f69f1ddcd5ce5a175ee70aa150b7f9fda93edb152
 ---

package/dist/agents/tp-test-triage.md CHANGED Viewed

@@ -7,7 +7,8 @@ tools:
   - mcp__token-pilot__read_range
   - mcp__token-pilot__find_usages
   - mcp__token-pilot__read_symbol
-token_pilot_version: "0.26.5"
+model: sonnet
+token_pilot_version: "0.27.0"
 token_pilot_body_hash: 255912c47661d203c8f9a735237bc419f97e937f788a01811bbe126ee3dd5878
 ---

package/dist/agents/tp-test-writer.md CHANGED Viewed

@@ -12,8 +12,9 @@ tools:
   - Write
   - Edit
   - Bash
-token_pilot_version: "0.26.5"
-token_pilot_body_hash: 533b3d2387e631a24291314b2b8ad8c3e01c19e0b9ec1d3fe08ae0011f0c73f9
+model: sonnet
+token_pilot_version: "0.27.0"
+token_pilot_body_hash: 96211a3e7f6b52dd47fef286eec3584b1c269fb3464c1102f8b7edbe470700e6
 ---
 You are a token-pilot agent (`tp-<name>`). Your defining contract:
@@ -24,19 +25,27 @@ If any MCP tool fails, fall back sensibly (another MCP tool → bounded Read →
 Your specific role is defined below.
-Role: targeted test authoring.
+Role: targeted test authoring with TDD discipline.
 Response budget: ~900 tokens.
-When given a symbol to test:
+Core principle: tests are proof. A test that passes immediately proves nothing — it must fail without the code (RED) then pass with it (GREEN).
-1. `read_symbol` the target + `find_usages` to learn real call shapes — test what actual callers pass, not what types permit.
-2. `related_files` + `outline` on the nearest existing test file for the module — copy its patterns (framework, mocks, setup/teardown, assertion style) exactly.
-3. Write tests covering: happy path, one boundary, one error path. No exhaustive fuzzing, no "just in case" scenarios.
-4. Run the new tests via `test_summary` before declaring done — failing to run is the most common dropped ball.
-5. Deliver: list of new test names → file path → `test_summary` verdict. Do NOT restate what each test does in prose.
+Workflow:
+1. `read_symbol` target + `find_usages` — test real call shapes, not what types permit.
+2. `related_files` + `outline` nearest test file — mirror framework / mocks / setup / assertion style exactly. Do NOT invent conventions the project doesn't use.
+3. Minimum viable suite per symbol: one **happy path**, one **boundary** (empty/null/max/negative), one **error path** (invalid input / thrown / rejected). No fuzzing, no "just in case".
+4. TDD per test: RED → verify fails → write minimal code → GREEN → REFACTOR only after green.
+5. **Prove-It for bug fixes**: test must fail without fix, pass with it — run both before declaring done.
+6. `test_summary` before declaring done. Failing to run is the most common dropped ball.
-Do NOT invent test framework conventions the project doesn't use. Do NOT mock what's cheap to call for real (pure functions, local filesystem writes to tmp). Do NOT write a test you didn't run.
+Mock only external edges (network, DB, clock, randomness). Do NOT mock pure functions, tmp-dir writes, or in-memory structures.
+Deliver: new test names → file path → `test_summary` verdict. Do NOT prose-restate what each test checks.
+Do NOT write a test you didn't run. Do NOT assert only types — assert behaviour. Do NOT leave commented-out assertions (silent regressions). Do NOT copy-paste near-duplicate tests — parameterize.
+*(TDD RED/GREEN/REFACTOR + Prove-It pattern adapted from @addyosmani/agent-skills — test-driven-development.)*
 RESPONSE CONTRACT:
 - Lead with a one-line verdict.

package/dist/index.js CHANGED Viewed

@@ -1,4 +1,20 @@
 #!/usr/bin/env node
+// v0.26.6 — handle EPIPE silently. Piping `token-pilot doctor | head -5`
+// causes EPIPE once head closes stdin. Classic Node.js CLI wart. Default
+// behaviour is a red "throw er; // Unhandled 'error' event" stacktrace,
+// which scares users who just wanted a quick look. Standard fix: swallow
+// EPIPE on stdout/stderr and exit 0 — any CLI piped to head|less|grep
+// behaves this way.
+process.stdout.on("error", (err) => {
+    if (err.code === "EPIPE")
+        process.exit(0);
+    throw err;
+});
+process.stderr.on("error", (err) => {
+    if (err.code === "EPIPE")
+        process.exit(0);
+    throw err;
+});
 import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
 import { readFileSync, realpathSync, appendFileSync, mkdirSync } from "node:fs";
 import { join } from "node:path";

package/package.json CHANGED Viewed

@@ -1,85 +1,85 @@
-{
-  "name": "token-pilot",
-  "version": "0.26.5",
-  "description": "Save up to 80% tokens when AI reads code — MCP server for token-efficient code navigation, AST-aware structural reading instead of dumping full files into context window",
-  "type": "module",
-  "main": "dist/index.js",
-  "bin": {
-    "token-pilot": "dist/index.js"
-  },
-  "files": [
-    "dist/**/*.js",
-    "dist/**/*.d.ts",
-    "dist/agents/*.md",
-    "docs/*.md",
-    "scripts/postinstall.mjs",
-    "start.sh",
-    ".claude-plugin/",
-    ".mcp.json",
-    "skills/",
-    "README.md",
-    "CHANGELOG.md"
-  ],
-  "scripts": {
-    "prebuild": "node --input-type=module -e \"import { rm } from 'node:fs/promises'; await rm('dist', { recursive: true, force: true });\"",
-    "build": "tsc && node scripts/build-agents.mjs",
-    "dev": "tsc --watch",
-    "start": "node dist/index.js",
-    "test": "vitest run",
-    "test:coverage": "vitest run --coverage",
-    "test:watch": "vitest",
-    "bench:hook": "node scripts/bench-hook.mjs",
-    "postinstall": "node scripts/postinstall.mjs",
-    "lint": "tsc --noEmit",
-    "prepublishOnly": "npm run build && node --input-type=module -e \"import { chmod } from 'node:fs/promises'; await chmod('dist/index.js', 0o755);\""
-  },
-  "keywords": [
-    "mcp",
-    "mcp-server",
-    "model-context-protocol",
-    "claude",
-    "claude-code",
-    "cursor",
-    "codex",
-    "cline",
-    "ai-coding",
-    "llm-tools",
-    "token-savings",
-    "token-reduction",
-    "context-window",
-    "context-optimization",
-    "ast",
-    "code-reading",
-    "code-navigation",
-    "smart-read",
-    "developer-tools",
-    "tree-sitter"
-  ],
-  "repository": {
-    "type": "git",
-    "url": "git+https://github.com/Digital-Threads/token-pilot.git"
-  },
-  "homepage": "https://github.com/Digital-Threads/token-pilot#readme",
-  "bugs": {
-    "url": "https://github.com/Digital-Threads/token-pilot/issues"
-  },
-  "mcpName": "io.github.Digital-Threads/token-pilot",
-  "license": "MIT",
-  "dependencies": {
-    "@modelcontextprotocol/sdk": "^1.12.0",
-    "@ast-index/cli": "^3.38.0",
-    "chokidar": "^4.0.3"
-  },
-  "devDependencies": {
-    "@vitest/coverage-v8": "^3.2.4",
-    "@types/node": "^22.0.0",
-    "typescript": "^5.7.0",
-    "vitest": "^3.0.0"
-  },
-  "engines": {
-    "node": ">=18.0.0"
-  },
-  "optionalDependencies": {
-    "@ast-grep/cli": "^0.41.0"
-  }
-}
+{
+  "name": "token-pilot",
+  "version": "0.27.0",
+  "description": "Save up to 80% tokens when AI reads code \u2014 MCP server for token-efficient code navigation, AST-aware structural reading instead of dumping full files into context window",
+  "type": "module",
+  "main": "dist/index.js",
+  "bin": {
+    "token-pilot": "dist/index.js"
+  },
+  "files": [
+    "dist/**/*.js",
+    "dist/**/*.d.ts",
+    "dist/agents/*.md",
+    "docs/*.md",
+    "scripts/postinstall.mjs",
+    "start.sh",
+    ".claude-plugin/",
+    ".mcp.json",
+    "skills/",
+    "README.md",
+    "CHANGELOG.md"
+  ],
+  "scripts": {
+    "prebuild": "node --input-type=module -e \"import { rm } from 'node:fs/promises'; await rm('dist', { recursive: true, force: true });\"",
+    "build": "tsc && node scripts/build-agents.mjs",
+    "dev": "tsc --watch",
+    "start": "node dist/index.js",
+    "test": "vitest run",
+    "test:coverage": "vitest run --coverage",
+    "test:watch": "vitest",
+    "bench:hook": "node scripts/bench-hook.mjs",
+    "postinstall": "node scripts/postinstall.mjs",
+    "lint": "tsc --noEmit",
+    "prepublishOnly": "npm run build && node --input-type=module -e \"import { chmod } from 'node:fs/promises'; await chmod('dist/index.js', 0o755);\""
+  },
+  "keywords": [
+    "mcp",
+    "mcp-server",
+    "model-context-protocol",
+    "claude",
+    "claude-code",
+    "cursor",
+    "codex",
+    "cline",
+    "ai-coding",
+    "llm-tools",
+    "token-savings",
+    "token-reduction",
+    "context-window",
+    "context-optimization",
+    "ast",
+    "code-reading",
+    "code-navigation",
+    "smart-read",
+    "developer-tools",
+    "tree-sitter"
+  ],
+  "repository": {
+    "type": "git",
+    "url": "git+https://github.com/Digital-Threads/token-pilot.git"
+  },
+  "homepage": "https://github.com/Digital-Threads/token-pilot#readme",
+  "bugs": {
+    "url": "https://github.com/Digital-Threads/token-pilot/issues"
+  },
+  "mcpName": "io.github.Digital-Threads/token-pilot",
+  "license": "MIT",
+  "dependencies": {
+    "@modelcontextprotocol/sdk": "^1.12.0",
+    "@ast-index/cli": "^3.38.0",
+    "chokidar": "^4.0.3"
+  },
+  "devDependencies": {
+    "@vitest/coverage-v8": "^3.2.4",
+    "@types/node": "^22.0.0",
+    "typescript": "^5.7.0",
+    "vitest": "^3.0.0"
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "optionalDependencies": {
+    "@ast-grep/cli": "^0.41.0"
+  }
+}