npm - token-pilot - Versions diffs - 0.30.4 → 0.31.0 - Mend

token-pilot 0.30.4 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/agents/tp-api-surface-tracker.md +10 -2
package/agents/tp-audit-scanner.md +10 -2
package/agents/tp-commit-writer.md +10 -2
package/agents/tp-context-engineer.md +10 -2
package/agents/tp-dead-code-finder.md +10 -2
package/agents/tp-debugger.md +10 -2
package/agents/tp-dep-health.md +10 -2
package/agents/tp-doc-writer.md +10 -2
package/agents/tp-history-explorer.md +10 -2
package/agents/tp-impact-analyzer.md +10 -2
package/agents/tp-incident-timeline.md +10 -2
package/agents/tp-incremental-builder.md +10 -2
package/agents/tp-migration-scout.md +10 -2
package/agents/tp-onboard.md +10 -2
package/agents/tp-performance-profiler.md +10 -2
package/agents/tp-pr-reviewer.md +10 -2
package/agents/tp-refactor-planner.md +10 -2
package/agents/tp-review-impact.md +10 -2
package/agents/tp-run.md +10 -2
package/agents/tp-session-restorer.md +10 -2
package/agents/tp-ship-coordinator.md +10 -2
package/agents/tp-spec-writer.md +10 -2
package/agents/tp-test-coverage-gapper.md +10 -2
package/agents/tp-test-triage.md +10 -2
package/agents/tp-test-writer.md +10 -2
package/dist/cli/stats.d.ts +2 -0
package/dist/cli/stats.js +46 -1
package/dist/core/agent-matcher.d.ts +115 -0
package/dist/core/agent-matcher.js +326 -0
package/dist/core/event-log.d.ts +14 -1
package/dist/hooks/installer.js +9 -0
package/dist/hooks/post-task.d.ts +15 -0
package/dist/hooks/post-task.js +102 -19
package/dist/hooks/pre-bash.js +10 -2
package/dist/hooks/pre-task.d.ts +71 -0
package/dist/hooks/pre-task.js +125 -0
package/dist/index.js +29 -0
package/hooks/hooks.json +9 -0
package/package.json +1 -1

package/dist/cli/stats.js CHANGED Viewed

@@ -67,7 +67,50 @@ export function formatStats(events, opts) {
     const total = sumSaved(scope);
     const sessionSuffix = sessionLabel ? ` (session ${sessionLabel})` : "";
     lines.push(`token-pilot stats${sessionSuffix} — ${scope.length} event${scope.length === 1 ? "" : "s"}, ~${total} tokens saved`);
-    if (opts.byAgent) {
+    if (opts.tasks) {
+        // v0.31.0 — Task-routing view. Scope to event:"task" records only.
+        const taskEvents = scope.filter((e) => e.event === "task");
+        if (taskEvents.length === 0) {
+            return lines[0] + "\n\nNo Task events yet.";
+        }
+        const totalTasks = taskEvents.length;
+        const misses = taskEvents.filter((e) => typeof e.matched_tp_agent === "string" &&
+            e.matched_tp_agent.length > 0 &&
+            e.subagent_type !== e.matched_tp_agent);
+        const missRate = totalTasks > 0 ? Math.round((misses.length / totalTasks) * 100) : 0;
+        // Group by subagent_type (what Claude actually picked).
+        const pickGroups = groupBy(taskEvents, (e) => (e.subagent_type && e.subagent_type.length > 0
+            ? e.subagent_type
+            : "(unknown)"));
+        const picks = [...pickGroups.entries()]
+            .map(([agent, evs]) => ({ agent, count: evs.length }))
+            .sort((a, b) => b.count - a.count);
+        // Top missed routings: (picked → suggested) pairs with counts.
+        const missCounts = new Map();
+        for (const e of misses) {
+            const key = `${e.subagent_type} → ${e.matched_tp_agent}`;
+            missCounts.set(key, (missCounts.get(key) ?? 0) + 1);
+        }
+        const topMisses = [...missCounts.entries()]
+            .map(([pair, count]) => ({ pair, count }))
+            .sort((a, b) => b.count - a.count)
+            .slice(0, 10);
+        // Rewrite header for the task view (replace savings number with miss-rate).
+        lines[0] = `token-pilot stats — ${totalTasks} Task call${totalTasks === 1 ? "" : "s"}, miss-rate ${missRate}% (${misses.length}/${totalTasks})${sessionSuffix}`;
+        lines.push("");
+        lines.push("Picked subagents:");
+        for (const p of picks) {
+            lines.push(`  ${pad(p.agent, 24)}  ${p.count.toString().padStart(4)}× events`);
+        }
+        if (topMisses.length > 0) {
+            lines.push("");
+            lines.push("Top routing misses (picked → suggested tp-*):");
+            for (const m of topMisses) {
+                lines.push(`  ${pad(m.pair, 48)}  ${m.count.toString().padStart(4)}×`);
+            }
+        }
+    }
+    else if (opts.byAgent) {
         // Group by agent_type (null → "main").
         const groups = groupBy(scope, (e) => (e.agent_type ?? "main"));
         const rows = [...groups.entries()]
@@ -121,9 +164,11 @@ export async function handleStats(argv, opts) {
     const events = await loadEvents(projectRoot);
     const session = parseFlag(argv, "session");
     const byAgent = parseFlag(argv, "by-agent");
+    const tasks = parseFlag(argv, "tasks");
     const rendered = formatStats(events, {
         session: session === undefined ? undefined : session,
         byAgent: byAgent === true,
+        tasks: tasks === true,
     });
     process.stdout.write(rendered + "\n");
     return 0;

package/dist/core/agent-matcher.d.ts ADDED Viewed

@@ -0,0 +1,115 @@
+/**
+ * v0.31.0 — tp-* subagent heuristic matcher.
+ *
+ * Goal: given a Claude Code `Task` tool invocation (subagent_type +
+ * description), decide which `tp-*` agent from `agents/` would be a
+ * better fit. Used by:
+ *
+ *   1. PostToolUse:Task telemetry — enrich each event with
+ *      `matched_tp_agent` so `stats --tasks` can show miss-rate.
+ *   2. PreToolUse:Task enforcement (Pack 2, later) — advise/deny when
+ *      the agent picked `general-purpose` but a tp-* clearly fits.
+ *
+ * Matcher philosophy — keep it BORING and EXPLAINABLE.
+ *
+ * Agent frontmatter description layout (empirically stable across all
+ * 24 shipped agents):
+ *
+ *     description: PROACTIVELY use this when the user asks to review a
+ *       diff, PR, commit range, or changeset ("review these changes",
+ *       "look at my PR", "is this safe to merge"). Verdict-first output
+ *       with Critical / Important findings. Do NOT use for writing code
+ *       or planning.
+ *
+ * Two signal sources:
+ *
+ *   - Quoted triggers: every `"…"` substring inside the description.
+ *     These are literally the phrases the agent author expected users
+ *     to type. Highest signal. Substring match (case-insensitive) on
+ *     the user's description → score += 2.
+ *
+ *   - Content keywords: stemmed word set from the 1st description
+ *     sentence, minus stopwords and boilerplate ("PROACTIVELY",
+ *     "use this", "when the user asks"). Each match on the user's
+ *     description → score += 1.
+ *
+ * Negative filter: everything after `Do NOT use for` is excluded from
+ * keyword extraction AND actively penalises a match (score -= 1 per
+ * term present in user's description). Prevents `tp-test-writer` from
+ * being suggested on "diagnose failing test" (which is tp-test-triage).
+ *
+ * Confidence tiers:
+ *   - score ≥ 3 or ≥ 1 quoted trigger → "high"
+ *   - score in [1, 2]                 → "low"
+ *   - score < 1                       → no match
+ *
+ * The function is pure (deps → in-memory index + string) so it's fully
+ * unit-testable. File I/O (reading the agents dir) lives in
+ * `buildAgentIndex` which is a one-shot loader called at startup.
+ */
+/** One parsed `tp-*` agent. Only fields the matcher needs. */
+export interface ParsedAgent {
+    /** agent name without .md extension, e.g. "tp-pr-reviewer" */
+    name: string;
+    /** phrases found in `"…"` inside the description — highest signal */
+    quotedTriggers: string[];
+    /** stemmed content keywords from the positive side of the description */
+    keywords: string[];
+    /** negative-filter terms from `Do NOT use for …` */
+    negative: string[];
+}
+export interface AgentIndex {
+    agents: ParsedAgent[];
+}
+export interface MatchResult {
+    agent: string;
+    confidence: "high" | "low";
+    score: number;
+}
+/** Extract the `description:` value from YAML frontmatter.
+ *  Supports multi-line values (continuation lines indented).
+ *  Returns null when the file has no frontmatter or no description. */
+export declare function extractDescription(body: string): string | null;
+/** Pull every `"…"` substring out of a string. Ignores empty pairs. */
+export declare function extractQuotedTriggers(s: string): string[];
+/**
+ * Split `description` around `Do NOT use for …` — everything on the
+ * positive side is keyword material; everything after contributes to
+ * the negative filter.
+ */
+export declare function splitAroundNegative(desc: string): {
+    positive: string;
+    negative: string;
+};
+/**
+ * Tokenise → lowercase → drop stopwords + ≤2 chars + quoted-trigger
+ * leftovers. Keywords stay in surface form; we do not stem. Stemming
+ * helps recall on English verb/noun pairs ("refactor"/"refactoring"),
+ * but libraries add cost for modest gain — use substring match on the
+ * user's description instead (covers most morphology).
+ */
+export declare function extractKeywords(text: string): string[];
+/**
+ * Parse one agent markdown body into its ParsedAgent representation.
+ * Returns null if frontmatter is missing / description is empty.
+ */
+export declare function parseAgent(name: string, body: string): ParsedAgent | null;
+/**
+ * Load every `tp-*.md` under a directory and build an in-memory index.
+ * Non-tp-* files are silently skipped. Unreadable files are skipped
+ * with no throw — an agent directory isn't a runtime dep.
+ */
+export declare function buildAgentIndex(agentsDir: string): Promise<AgentIndex>;
+/**
+ * Score a single agent against the user description. Surface the score
+ * so callers can inspect / threshold differently if needed.
+ */
+export declare function scoreAgent(agent: ParsedAgent, userDescriptionLower: string): number;
+/**
+ * Find the best `tp-*` match for a user description. Returns null when
+ * no agent clears the low-confidence threshold.
+ *
+ * "Best" = highest score, tiebreak alphabetical (deterministic).
+ */
+export declare function matchTpAgent(description: string, index: AgentIndex): MatchResult | null;
+//# sourceMappingURL=agent-matcher.d.ts.map

package/dist/core/agent-matcher.js ADDED Viewed

@@ -0,0 +1,326 @@
+/**
+ * v0.31.0 — tp-* subagent heuristic matcher.
+ *
+ * Goal: given a Claude Code `Task` tool invocation (subagent_type +
+ * description), decide which `tp-*` agent from `agents/` would be a
+ * better fit. Used by:
+ *
+ *   1. PostToolUse:Task telemetry — enrich each event with
+ *      `matched_tp_agent` so `stats --tasks` can show miss-rate.
+ *   2. PreToolUse:Task enforcement (Pack 2, later) — advise/deny when
+ *      the agent picked `general-purpose` but a tp-* clearly fits.
+ *
+ * Matcher philosophy — keep it BORING and EXPLAINABLE.
+ *
+ * Agent frontmatter description layout (empirically stable across all
+ * 24 shipped agents):
+ *
+ *     description: PROACTIVELY use this when the user asks to review a
+ *       diff, PR, commit range, or changeset ("review these changes",
+ *       "look at my PR", "is this safe to merge"). Verdict-first output
+ *       with Critical / Important findings. Do NOT use for writing code
+ *       or planning.
+ *
+ * Two signal sources:
+ *
+ *   - Quoted triggers: every `"…"` substring inside the description.
+ *     These are literally the phrases the agent author expected users
+ *     to type. Highest signal. Substring match (case-insensitive) on
+ *     the user's description → score += 2.
+ *
+ *   - Content keywords: stemmed word set from the 1st description
+ *     sentence, minus stopwords and boilerplate ("PROACTIVELY",
+ *     "use this", "when the user asks"). Each match on the user's
+ *     description → score += 1.
+ *
+ * Negative filter: everything after `Do NOT use for` is excluded from
+ * keyword extraction AND actively penalises a match (score -= 1 per
+ * term present in user's description). Prevents `tp-test-writer` from
+ * being suggested on "diagnose failing test" (which is tp-test-triage).
+ *
+ * Confidence tiers:
+ *   - score ≥ 3 or ≥ 1 quoted trigger → "high"
+ *   - score in [1, 2]                 → "low"
+ *   - score < 1                       → no match
+ *
+ * The function is pure (deps → in-memory index + string) so it's fully
+ * unit-testable. File I/O (reading the agents dir) lives in
+ * `buildAgentIndex` which is a one-shot loader called at startup.
+ */
+import { promises as fs } from "node:fs";
+import { join } from "node:path";
+/**
+ * Stopwords stripped from keyword extraction. Keep tiny — aggressive
+ * stopword lists kill recall. Only boilerplate from agent frontmatter
+ * templates goes here.
+ */
+const STOPWORDS = new Set([
+    "a",
+    "an",
+    "the",
+    "and",
+    "or",
+    "of",
+    "to",
+    "in",
+    "for",
+    "on",
+    "at",
+    "by",
+    "is",
+    "are",
+    "was",
+    "were",
+    "be",
+    "been",
+    "being",
+    "this",
+    "that",
+    "these",
+    "those",
+    "it",
+    "its",
+    "as",
+    "with",
+    "when",
+    "where",
+    "user",
+    "users",
+    "ask",
+    "asks",
+    "asked",
+    "asking",
+    "use",
+    "uses",
+    "used",
+    "using",
+    "proactively",
+    "please",
+    "any",
+    "all",
+    "some",
+    "get",
+    "gets",
+    "got",
+    "also",
+    "like",
+    "from",
+    "into",
+    "not",
+    "no",
+    "do",
+    "does",
+    "did",
+    "have",
+    "has",
+    "had",
+    "will",
+    "can",
+    "could",
+    "should",
+    "may",
+    "might",
+    "must",
+    "you",
+    "your",
+    "their",
+    "they",
+    "them",
+    "we",
+    "our",
+    "us",
+]);
+/** Extract the `description:` value from YAML frontmatter.
+ *  Supports multi-line values (continuation lines indented).
+ *  Returns null when the file has no frontmatter or no description. */
+export function extractDescription(body) {
+    const fmEnd = body.indexOf("\n---", 3);
+    if (!body.startsWith("---\n") || fmEnd === -1)
+        return null;
+    const fm = body.slice(4, fmEnd);
+    const lines = fm.split("\n");
+    let desc = "";
+    let inDesc = false;
+    for (const line of lines) {
+        // Top-level key detection: `key: value` at column 0
+        const topKey = /^([A-Za-z_][\w-]*)\s*:\s*(.*)$/.exec(line);
+        if (topKey && !/^\s/.test(line)) {
+            if (inDesc)
+                break;
+            if (topKey[1] === "description") {
+                desc = topKey[2] ?? "";
+                inDesc = true;
+            }
+        }
+        else if (inDesc) {
+            // Continuation line (indented or blank) — append with a space.
+            desc += " " + line.trim();
+        }
+    }
+    const trimmed = desc.trim();
+    return trimmed.length > 0 ? trimmed : null;
+}
+/** Pull every `"…"` substring out of a string. Ignores empty pairs. */
+export function extractQuotedTriggers(s) {
+    const out = [];
+    const re = /"([^"]+)"/g;
+    for (const m of s.matchAll(re)) {
+        const inner = m[1].trim().toLowerCase();
+        if (inner.length > 0)
+            out.push(inner);
+    }
+    return out;
+}
+/**
+ * Split `description` around `Do NOT use for …` — everything on the
+ * positive side is keyword material; everything after contributes to
+ * the negative filter.
+ */
+export function splitAroundNegative(desc) {
+    // Case-insensitive split on common "Do NOT use …" lead-ins. `to` catches
+    // "Do NOT use to write" (tp-test-triage); `for` / `on` / `during` / `when`
+    // cover every other shipped agent. Add terms here as new forms appear.
+    const re = /\bdo\s+not\s+use\s+(?:for|on|during|when|to)\b/i;
+    const idx = desc.search(re);
+    if (idx === -1)
+        return { positive: desc, negative: "" };
+    return {
+        positive: desc.slice(0, idx),
+        negative: desc.slice(idx),
+    };
+}
+/**
+ * Tokenise → lowercase → drop stopwords + ≤2 chars + quoted-trigger
+ * leftovers. Keywords stay in surface form; we do not stem. Stemming
+ * helps recall on English verb/noun pairs ("refactor"/"refactoring"),
+ * but libraries add cost for modest gain — use substring match on the
+ * user's description instead (covers most morphology).
+ */
+export function extractKeywords(text) {
+    const out = new Set();
+    // Remove quoted phrases first (they're handled separately).
+    const cleaned = text.replace(/"[^"]+"/g, " ");
+    for (const raw of cleaned.toLowerCase().split(/[^a-z0-9_-]+/)) {
+        const tok = raw.trim();
+        // Keep short technical terms ("ci", "pr", "db", "io"). STOPWORDS already
+        // filters most 1-2 char english junk ("is", "to", "on", "a"). Drop
+        // single chars only — they carry ~no signal.
+        if (tok.length < 2)
+            continue;
+        if (STOPWORDS.has(tok))
+            continue;
+        out.add(tok);
+    }
+    return [...out];
+}
+/**
+ * Parse one agent markdown body into its ParsedAgent representation.
+ * Returns null if frontmatter is missing / description is empty.
+ */
+export function parseAgent(name, body) {
+    const desc = extractDescription(body);
+    if (!desc)
+        return null;
+    const { positive, negative } = splitAroundNegative(desc);
+    const quotedTriggers = extractQuotedTriggers(desc);
+    const keywords = extractKeywords(positive);
+    // Negative terms: only the core ones (tp-* names, salient nouns).
+    const negKeywords = extractKeywords(negative);
+    return {
+        name,
+        quotedTriggers,
+        keywords,
+        negative: negKeywords,
+    };
+}
+/**
+ * Load every `tp-*.md` under a directory and build an in-memory index.
+ * Non-tp-* files are silently skipped. Unreadable files are skipped
+ * with no throw — an agent directory isn't a runtime dep.
+ */
+export async function buildAgentIndex(agentsDir) {
+    let entries;
+    try {
+        entries = await fs.readdir(agentsDir);
+    }
+    catch {
+        return { agents: [] };
+    }
+    const agents = [];
+    for (const entry of entries) {
+        if (!entry.startsWith("tp-") || !entry.endsWith(".md"))
+            continue;
+        const name = entry.slice(0, -".md".length);
+        let body;
+        try {
+            body = await fs.readFile(join(agentsDir, entry), "utf-8");
+        }
+        catch {
+            continue;
+        }
+        const parsed = parseAgent(name, body);
+        if (parsed)
+            agents.push(parsed);
+    }
+    return { agents };
+}
+/**
+ * Score a single agent against the user description. Surface the score
+ * so callers can inspect / threshold differently if needed.
+ */
+export function scoreAgent(agent, userDescriptionLower) {
+    let score = 0;
+    for (const trigger of agent.quotedTriggers) {
+        if (userDescriptionLower.includes(trigger))
+            score += 2;
+    }
+    for (const kw of agent.keywords) {
+        if (userDescriptionLower.includes(kw))
+            score += 1;
+    }
+    for (const neg of agent.negative) {
+        if (userDescriptionLower.includes(neg))
+            score -= 1;
+    }
+    return score;
+}
+/**
+ * Find the best `tp-*` match for a user description. Returns null when
+ * no agent clears the low-confidence threshold.
+ *
+ * "Best" = highest score, tiebreak alphabetical (deterministic).
+ */
+export function matchTpAgent(description, index) {
+    if (!description || index.agents.length === 0)
+        return null;
+    const needle = description.toLowerCase();
+    let best = null;
+    for (const agent of index.agents) {
+        const score = scoreAgent(agent, needle);
+        if (!best) {
+            best = { agent, score };
+            continue;
+        }
+        if (score > best.score) {
+            best = { agent, score };
+            continue;
+        }
+        // Deterministic tiebreak: alphabetical by agent.name. Without this,
+        // match depends on readdir order, which is filesystem-specific.
+        if (score === best.score && agent.name < best.agent.name) {
+            best = { agent, score };
+        }
+    }
+    if (!best || best.score < 1)
+        return null;
+    // High confidence when score is strong OR at least one quoted trigger
+    // matched (quoted = explicit author-blessed phrase).
+    const hitQuoted = best.agent.quotedTriggers.some((t) => needle.includes(t));
+    const confidence = best.score >= 3 || hitQuoted ? "high" : "low";
+    return {
+        agent: best.agent.name,
+        confidence,
+        score: best.score,
+    };
+}
+//# sourceMappingURL=agent-matcher.js.map

package/dist/core/event-log.d.ts CHANGED Viewed

@@ -28,7 +28,7 @@ export interface HookEvent {
     /** null for top-level session; agent_type string inside a subagent. */
     agent_type: string | null;
     agent_id: string | null;
-    event: "denied" | "allowed" | "bypass" | "pass-through" | string;
+    event: "denied" | "allowed" | "bypass" | "pass-through" | "task" | string;
     file: string;
     lines: number;
     estTokens: number;
@@ -36,6 +36,19 @@ export interface HookEvent {
     summaryTokens: number;
     /** estTokens - summaryTokens; 0 for allow/bypass. */
     savedTokens: number;
+    /** The subagent_type Claude Code dispatched (`tp-*` or `general-purpose`…). */
+    subagent_type?: string;
+    /**
+     * Heuristic match against `tp-*` agent frontmatter. Set only when
+     * `subagent_type` is NOT already a tp-*. null when no match fires.
+     */
+    matched_tp_agent?: string | null;
+    /** Confidence of the heuristic match (omitted when matched_tp_agent=null). */
+    match_confidence?: "high" | "low";
+    /** Response budget declared in the agent markdown body, or null. */
+    budget?: number | null;
+    /** actualTokens > budget × (1 + tolerance). */
+    overBudget?: boolean;
 }
 export declare function eventLogDir(projectRoot: string): string;
 export declare function currentLogPath(projectRoot: string): string;

package/dist/hooks/installer.js CHANGED Viewed

@@ -61,6 +61,15 @@ function createHookConfig(options) {
                         },
                     ],
                 },
+                {
+                    matcher: "Task",
+                    hooks: [
+                        {
+                            type: "command",
+                            command: buildHookCommand("hook-pre-task", options),
+                        },
+                    ],
+                },
             ],
             SessionStart: [
                 {

package/dist/hooks/post-task.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@
  * Silent on every failure — telemetry must never break the agent loop.
  * Non-tp-* subagents are ignored (we only enforce our own contracts).
  */
+import { type AgentIndex } from "../core/agent-matcher.js";
 export declare const OVER_BUDGET_LOG = "over-budget.log";
 /** Ratio above which we flag — 0.1 = 10 % grace. */
 export declare const OVER_BUDGET_TOLERANCE = 0.1;
@@ -55,9 +56,23 @@ export interface PostTaskHookInput {
     tool_name?: string;
     tool_input?: {
         subagent_type?: string;
+        description?: string;
     };
     tool_response?: unknown;
+    session_id?: string;
+    agent_type?: string;
+    agent_id?: string;
 }
+/**
+ * Resolve the plugin's own `agents/` directory. The hook binary lives
+ * at `<plugin>/dist/index.js`, so agents/ is `../agents` from here.
+ * Allow an override for tests that want an isolated fixture dir.
+ */
+export declare function defaultAgentsDir(): string;
+/** Resolve (and cache) the tp-* agent index. Safe to call repeatedly. */
+export declare function getAgentIndex(dir?: string): Promise<AgentIndex>;
+/** Test-only: clear the module-level cache between fixtures. */
+export declare function _resetAgentIndexCache(): void;
 /**
  * Full post-Task processing: read frontmatter, count tokens, log over-budget.
  * Returns the advice message (or null) so the caller can optionally emit