npm - token-pilot - Versions diffs - 0.30.5 → 0.31.0 - Mend

token-pilot 0.30.5 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/.claude-plugin/marketplace.json +2 -2
package/.claude-plugin/plugin.json +1 -1
package/agents/tp-api-surface-tracker.md +10 -2
package/agents/tp-audit-scanner.md +10 -2
package/agents/tp-commit-writer.md +10 -2
package/agents/tp-context-engineer.md +10 -2
package/agents/tp-dead-code-finder.md +10 -2
package/agents/tp-debugger.md +10 -2
package/agents/tp-dep-health.md +10 -2
package/agents/tp-doc-writer.md +10 -2
package/agents/tp-history-explorer.md +10 -2
package/agents/tp-impact-analyzer.md +10 -2
package/agents/tp-incident-timeline.md +10 -2
package/agents/tp-incremental-builder.md +10 -2
package/agents/tp-migration-scout.md +10 -2
package/agents/tp-onboard.md +10 -2
package/agents/tp-performance-profiler.md +10 -2
package/agents/tp-pr-reviewer.md +10 -2
package/agents/tp-refactor-planner.md +10 -2
package/agents/tp-review-impact.md +10 -2
package/agents/tp-run.md +10 -2
package/agents/tp-session-restorer.md +10 -2
package/agents/tp-ship-coordinator.md +10 -2
package/agents/tp-spec-writer.md +10 -2
package/agents/tp-test-coverage-gapper.md +10 -2
package/agents/tp-test-triage.md +10 -2
package/agents/tp-test-writer.md +10 -2
package/dist/cli/stats.d.ts +2 -0
package/dist/cli/stats.js +46 -1
package/dist/core/agent-matcher.d.ts +115 -0
package/dist/core/agent-matcher.js +326 -0
package/dist/core/event-log.d.ts +14 -1
package/dist/hooks/installer.js +9 -0
package/dist/hooks/post-task.d.ts +15 -0
package/dist/hooks/post-task.js +102 -19
package/dist/hooks/pre-task.d.ts +71 -0
package/dist/hooks/pre-task.js +125 -0
package/dist/index.js +29 -0
package/hooks/hooks.json +9 -0
package/package.json +1 -1

package/dist/hooks/post-task.js CHANGED Viewed

@@ -14,7 +14,10 @@
  * Non-tp-* subagents are ignored (we only enforce our own contracts).
  */
 import { promises as fs } from "node:fs";
-import { join } from "node:path";
+import { dirname, join, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+import { buildAgentIndex, matchTpAgent, } from "../core/agent-matcher.js";
+import { appendEvent } from "../core/event-log.js";
 export const OVER_BUDGET_LOG = "over-budget.log";
 /** Ratio above which we flag — 0.1 = 10 % grace. */
 export const OVER_BUDGET_TOLERANCE = 0.1;
@@ -100,6 +103,43 @@ export async function loadAgentBody(projectRoot, homeDir, agentName) {
     }
     return null;
 }
+// ─── Cached tp-* agent index ─────────────────────────────────────────
+// The hook subprocess is cold-started per Task post-event, but within
+// that process we parse the agents directory once. Lookup cost is ~1 FS
+// listing + 24 file reads, ~5-15 ms — below the noise floor of the hook
+// round-trip. Kept as a process-level cache anyway for Pack 2 when the
+// pre-task hook re-uses the same index on hot paths.
+let _agentIndexCache = null;
+/**
+ * Resolve the plugin's own `agents/` directory. The hook binary lives
+ * at `<plugin>/dist/index.js`, so agents/ is `../agents` from here.
+ * Allow an override for tests that want an isolated fixture dir.
+ */
+export function defaultAgentsDir() {
+    // `import.meta.url` resolves to the bundled dist location, which is
+    // already one step below the repo root (`dist/hooks/post-task.js`).
+    // Walk up twice: `hooks` → `dist` → plugin root, then join `agents`.
+    try {
+        const here = fileURLToPath(import.meta.url);
+        return resolve(dirname(here), "..", "..", "agents");
+    }
+    catch {
+        // Not running as a bundled module (eg. vitest in-source) — fall
+        // back to CWD/agents. Production path uses the URL resolver above.
+        return resolve(process.cwd(), "agents");
+    }
+}
+/** Resolve (and cache) the tp-* agent index. Safe to call repeatedly. */
+export async function getAgentIndex(dir = defaultAgentsDir()) {
+    if (_agentIndexCache)
+        return _agentIndexCache;
+    _agentIndexCache = await buildAgentIndex(dir);
+    return _agentIndexCache;
+}
+/** Test-only: clear the module-level cache between fixtures. */
+export function _resetAgentIndexCache() {
+    _agentIndexCache = null;
+}
 /**
  * Full post-Task processing: read frontmatter, count tokens, log over-budget.
  * Returns the advice message (or null) so the caller can optionally emit
@@ -108,29 +148,72 @@ export async function loadAgentBody(projectRoot, homeDir, agentName) {
 export async function processPostTask(projectRoot, homeDir, input) {
     if (input.tool_name !== "Task")
         return null;
-    const agentName = input.tool_input?.subagent_type;
-    if (typeof agentName !== "string" || !agentName.startsWith("tp-")) {
-        return null;
+    const subagentType = input.tool_input?.subagent_type;
+    const description = input.tool_input?.description ?? "";
+    const actualTokens = extractSubagentTokens(input) ?? 0;
+    const isTpAgent = typeof subagentType === "string" && subagentType.startsWith("tp-");
+    // ─── existing tp-* budget logic (unchanged) ─────────────────────
+    let budget = null;
+    let decision = {
+        overBudget: false,
+        overByRatio: 0,
+        message: null,
+    };
+    if (isTpAgent && actualTokens > 0) {
+        const body = await loadAgentBody(projectRoot, homeDir, subagentType);
+        budget = body ? parseAgentBudget(body) : null;
+        decision = decideBudgetAdvice({
+            agentName: subagentType,
+            budget,
+            actualTokens,
+        });
+        if (decision.overBudget && budget != null) {
+            await appendOverBudgetLog(projectRoot, {
+                ts: Date.now(),
+                agent: subagentType,
+                budget,
+                actualTokens,
+                overByRatio: decision.overByRatio,
+            });
+        }
     }
-    const actualTokens = extractSubagentTokens(input);
-    if (actualTokens == null)
-        return null;
-    const body = await loadAgentBody(projectRoot, homeDir, agentName);
-    const budget = body ? parseAgentBudget(body) : null;
-    const decision = decideBudgetAdvice({
-        agentName,
-        budget,
-        actualTokens,
-    });
-    if (decision.overBudget && budget != null) {
-        await appendOverBudgetLog(projectRoot, {
+    // ─── v0.31.0 Task telemetry ────────────────────────────────────
+    // One event per Task call, regardless of tp-*. For non-tp agents we
+    // run the heuristic matcher so `stats --tasks` can surface routing
+    // misses (general-purpose picked when a tp-* would have fit).
+    // Silent on any error — telemetry must never break hook dispatch.
+    try {
+        let matched = null;
+        let matchConfidence;
+        if (!isTpAgent && description.length > 0) {
+            const index = await getAgentIndex();
+            const hit = matchTpAgent(description, index);
+            if (hit) {
+                matched = hit.agent;
+                matchConfidence = hit.confidence;
+            }
+        }
+        await appendEvent(projectRoot, {
             ts: Date.now(),
-            agent: agentName,
+            session_id: input.session_id ?? "",
+            agent_type: input.agent_type ?? null,
+            agent_id: input.agent_id ?? null,
+            event: "task",
+            file: "",
+            lines: 0,
+            estTokens: actualTokens,
+            summaryTokens: 0,
+            savedTokens: 0,
+            subagent_type: typeof subagentType === "string" ? subagentType : "",
+            matched_tp_agent: matched,
+            ...(matchConfidence ? { match_confidence: matchConfidence } : {}),
             budget,
-            actualTokens,
-            overByRatio: decision.overByRatio,
+            overBudget: decision.overBudget,
         });
     }
+    catch {
+        /* silent */
+    }
     return decision.message;
 }
 //# sourceMappingURL=post-task.js.map

package/dist/hooks/pre-task.d.ts ADDED Viewed

@@ -0,0 +1,71 @@
+/**
+ * v0.31.0 Pack 2 — PreToolUse:Task routing enforcement.
+ *
+ * Pack 1 (already shipped) built the matcher and telemetry. Pack 2 acts
+ * on that matcher: BEFORE a Task dispatch fires, we inspect
+ * `tool_input.subagent_type` + `tool_input.description`, heuristically
+ * match against the shipped `tp-*` catalog, and redirect (advise / deny)
+ * general-purpose calls that clearly fit a specialised agent.
+ *
+ * Why not straight-deny:
+ *   - The pre-edit rollback in v0.30.4 taught us the cost of a false
+ *     hard-block (stuck sessions, BYPASS env creep). Task routing has
+ *     MORE ambiguity than Edit (descriptions are terse; recall on
+ *     keyword match is imperfect), so the default mode = advise.
+ *
+ * Tier logic (first match wins):
+ *
+ *   1. tool_name !== "Task"                          → allow
+ *   2. subagent_type ∈ tp-*                          → allow
+ *   3. description contains an ESCAPE phrase         → allow
+ *      (ad-hoc / research / explore / multi-step / across the codebase)
+ *   4. matchTpAgent returns null                     → allow
+ *   5. TOKEN_PILOT_FORCE_SUBAGENTS=1 OR mode=strict  → deny
+ *      (hard-block: agent author opted into pedantic routing)
+ *   6. confidence=high                               → advise
+ *   7. confidence=low                                → advise (softer msg)
+ *
+ * Pure decide — all context (agent index, env, mode) is pre-resolved
+ * by the caller so the function stays deterministic and unit-testable.
+ */
+import type { EnforcementMode } from "../server/enforcement-mode.js";
+import type { AgentIndex } from "../core/agent-matcher.js";
+export interface PreTaskInput {
+    tool_name?: string;
+    tool_input?: {
+        subagent_type?: string;
+        description?: string;
+        [k: string]: unknown;
+    };
+}
+export type PreTaskDecision = {
+    kind: "allow";
+} | {
+    kind: "advise";
+    message: string;
+} | {
+    kind: "deny";
+    reason: string;
+};
+export interface PreTaskContext {
+    /** Parsed enforcement mode. `strict` is the only hard-block tier. */
+    mode: EnforcementMode;
+    /** Agent catalog built at startup by buildAgentIndex. */
+    agentIndex: AgentIndex;
+    /** TOKEN_PILOT_FORCE_SUBAGENTS=1 — opt-in strictness regardless of mode. */
+    force: boolean;
+}
+/**
+ * Pure decision function. Caller resolves all context (env, mode,
+ * agent index) up front so this stays a plain input → output mapping.
+ */
+export declare function decidePreTask(input: PreTaskInput, ctx: PreTaskContext): PreTaskDecision;
+/**
+ * Render the Claude Code hook JSON response.
+ *
+ * - allow  → no output (pass-through)
+ * - advise → permissionDecision=allow + additionalContext
+ * - deny   → permissionDecision=deny + reason
+ */
+export declare function renderPreTaskOutput(decision: PreTaskDecision): string | null;
+//# sourceMappingURL=pre-task.d.ts.map

package/dist/hooks/pre-task.js ADDED Viewed

@@ -0,0 +1,125 @@
+/**
+ * v0.31.0 Pack 2 — PreToolUse:Task routing enforcement.
+ *
+ * Pack 1 (already shipped) built the matcher and telemetry. Pack 2 acts
+ * on that matcher: BEFORE a Task dispatch fires, we inspect
+ * `tool_input.subagent_type` + `tool_input.description`, heuristically
+ * match against the shipped `tp-*` catalog, and redirect (advise / deny)
+ * general-purpose calls that clearly fit a specialised agent.
+ *
+ * Why not straight-deny:
+ *   - The pre-edit rollback in v0.30.4 taught us the cost of a false
+ *     hard-block (stuck sessions, BYPASS env creep). Task routing has
+ *     MORE ambiguity than Edit (descriptions are terse; recall on
+ *     keyword match is imperfect), so the default mode = advise.
+ *
+ * Tier logic (first match wins):
+ *
+ *   1. tool_name !== "Task"                          → allow
+ *   2. subagent_type ∈ tp-*                          → allow
+ *   3. description contains an ESCAPE phrase         → allow
+ *      (ad-hoc / research / explore / multi-step / across the codebase)
+ *   4. matchTpAgent returns null                     → allow
+ *   5. TOKEN_PILOT_FORCE_SUBAGENTS=1 OR mode=strict  → deny
+ *      (hard-block: agent author opted into pedantic routing)
+ *   6. confidence=high                               → advise
+ *   7. confidence=low                                → advise (softer msg)
+ *
+ * Pure decide — all context (agent index, env, mode) is pre-resolved
+ * by the caller so the function stays deterministic and unit-testable.
+ */
+import { matchTpAgent } from "../core/agent-matcher.js";
+/**
+ * Escape phrases that tell us the user genuinely wants open-ended
+ * general-purpose work. Short list of boilerplate — keeping it tight
+ * prevents the escape from eating otherwise-valid routing.
+ *
+ * All checks are lowercased substring matches. Author new entries here
+ * only when tool-audit shows a legitimate pattern getting false-flagged.
+ */
+const ESCAPE_PHRASES = [
+    "ad-hoc",
+    "ad hoc",
+    "one-off",
+    "one off",
+    "open-ended",
+    "research across",
+    "explore multiple",
+    "multi-step",
+    "across the codebase",
+    "across the repo",
+    "general purpose",
+];
+function containsEscape(description) {
+    const n = description.toLowerCase();
+    return ESCAPE_PHRASES.some((p) => n.includes(p));
+}
+/**
+ * Pure decision function. Caller resolves all context (env, mode,
+ * agent index) up front so this stays a plain input → output mapping.
+ */
+export function decidePreTask(input, ctx) {
+    if (input.tool_name !== "Task")
+        return { kind: "allow" };
+    const subagentType = input.tool_input?.subagent_type ?? "";
+    const description = input.tool_input?.description ?? "";
+    // Already a tp-* — routing intent matches catalog. Let it run.
+    if (typeof subagentType === "string" && subagentType.startsWith("tp-")) {
+        return { kind: "allow" };
+    }
+    // No description → nothing to match against. Allow (Claude Code
+    // sometimes dispatches Task with only a subagent_type + session id).
+    if (!description || description.length === 0)
+        return { kind: "allow" };
+    // Author-blessed escape clauses — user is explicitly saying
+    // "this is broad". Respect that.
+    if (containsEscape(description))
+        return { kind: "allow" };
+    const hit = matchTpAgent(description, ctx.agentIndex);
+    if (!hit)
+        return { kind: "allow" };
+    const suggestion = `Consider dispatching \`${hit.agent}\` instead of \`${subagentType || "general-purpose"}\` — ` +
+        `the description matches its trigger phrases (confidence: ${hit.confidence}). ` +
+        `tp-* agents run under a tighter budget and output in terse style, typically ` +
+        `~50-70 % fewer tokens than general-purpose. ` +
+        `Escape: add "ad-hoc" or "open-ended" to the description to bypass, or set ` +
+        `TOKEN_PILOT_MODE=advisory for warn-only behaviour.`;
+    const hardBlock = ctx.force ||
+        ctx.mode === "strict" ||
+        (ctx.mode === "deny" && hit.confidence === "high" && ctx.force);
+    if (hardBlock) {
+        return {
+            kind: "deny",
+            reason: suggestion,
+        };
+    }
+    return { kind: "advise", message: suggestion };
+}
+/**
+ * Render the Claude Code hook JSON response.
+ *
+ * - allow  → no output (pass-through)
+ * - advise → permissionDecision=allow + additionalContext
+ * - deny   → permissionDecision=deny + reason
+ */
+export function renderPreTaskOutput(decision) {
+    if (decision.kind === "allow")
+        return null;
+    if (decision.kind === "advise") {
+        return JSON.stringify({
+            hookSpecificOutput: {
+                hookEventName: "PreToolUse",
+                permissionDecision: "allow",
+                additionalContext: decision.message,
+            },
+        });
+    }
+    return JSON.stringify({
+        hookSpecificOutput: {
+            hookEventName: "PreToolUse",
+            permissionDecision: "deny",
+            permissionDecisionReason: decision.reason,
+        },
+    });
+}
+//# sourceMappingURL=pre-task.js.map

package/dist/index.js CHANGED Viewed

@@ -52,6 +52,8 @@ import { assessClaudeMd } from "./cli/claudemd-hygiene.js";
 import { decidePostBashAdvice, renderPostBashHookOutput, } from "./hooks/post-bash.js";
 import { decidePreBash, renderPreBashOutput } from "./hooks/pre-bash.js";
 import { decidePreGrep, renderPreGrepOutput } from "./hooks/pre-grep.js";
+import { decidePreTask, renderPreTaskOutput } from "./hooks/pre-task.js";
+import { getAgentIndex } from "./hooks/post-task.js";
 import { decidePreEdit, renderPreEditOutput, } from "./hooks/pre-edit.js";
 import { isEditPrepared as isEditPreparedFn } from "./core/edit-prep-state.js";
 import { maybeEmitEcosystemReminder } from "./cli/ecosystem-reminder.js";
@@ -184,6 +186,33 @@ export async function main(cliArgs = process.argv.slice(2)) {
             process.exit(0);
             return;
         }
+        case "hook-pre-task": {
+            // v0.31.0 Pack 2 — route general-purpose Task dispatches to a
+            // `tp-*` specialist when the description clearly matches. Default
+            // (deny / advisory mode) is a non-blocking advise; strict mode or
+            // TOKEN_PILOT_FORCE_SUBAGENTS=1 hard-denies on a high-confidence
+            // match. The matcher is lenient by design (false deny is much
+            // worse than a missed nudge — see pre-edit v0.30.4 rollback).
+            try {
+                const stdin = readFileSync(0, "utf-8");
+                const input = JSON.parse(stdin);
+                const agentIndex = await getAgentIndex();
+                const force = process.env.TOKEN_PILOT_FORCE_SUBAGENTS === "1";
+                const decision = decidePreTask(input, {
+                    mode: parseEnforcementMode(process.env.TOKEN_PILOT_MODE),
+                    agentIndex,
+                    force,
+                });
+                const rendered = renderPreTaskOutput(decision);
+                if (rendered)
+                    process.stdout.write(rendered);
+            }
+            catch {
+                /* silent — hook must not break */
+            }
+            process.exit(0);
+            return;
+        }
         case "hook-post-task": {
             try {
                 const stdin = readFileSync(0, "utf-8");

package/hooks/hooks.json CHANGED Viewed

@@ -45,6 +45,15 @@
             "command": "node ${CLAUDE_PLUGIN_ROOT}/dist/index.js hook-pre-grep"
           }
         ]
+      },
+      {
+        "matcher": "Task",
+        "hooks": [
+          {
+            "type": "command",
+            "command": "node ${CLAUDE_PLUGIN_ROOT}/dist/index.js hook-pre-task"
+          }
+        ]
       }
     ],
     "SessionStart": [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "token-pilot",
-  "version": "0.30.5",
+  "version": "0.31.0",
   "description": "Save up to 80% tokens when AI reads code — MCP server for token-efficient code navigation, AST-aware structural reading instead of dumping full files into context window",
   "type": "module",
   "main": "dist/index.js",