token-pilot 0.30.0 → 0.30.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/.claude-plugin/marketplace.json +2 -2
  2. package/.claude-plugin/plugin.json +2 -4
  3. package/README.md +24 -0
  4. package/agents/tp-api-surface-tracker.md +1 -1
  5. package/agents/tp-audit-scanner.md +1 -1
  6. package/agents/tp-commit-writer.md +1 -1
  7. package/agents/tp-context-engineer.md +1 -1
  8. package/agents/tp-dead-code-finder.md +1 -1
  9. package/agents/tp-debugger.md +1 -1
  10. package/agents/tp-dep-health.md +1 -1
  11. package/agents/tp-doc-writer.md +1 -1
  12. package/agents/tp-history-explorer.md +1 -1
  13. package/agents/tp-impact-analyzer.md +1 -1
  14. package/agents/tp-incident-timeline.md +1 -1
  15. package/agents/tp-incremental-builder.md +1 -1
  16. package/agents/tp-migration-scout.md +1 -1
  17. package/agents/tp-onboard.md +1 -1
  18. package/agents/tp-performance-profiler.md +1 -1
  19. package/agents/tp-pr-reviewer.md +1 -1
  20. package/agents/tp-refactor-planner.md +1 -1
  21. package/agents/tp-review-impact.md +1 -1
  22. package/agents/tp-run.md +1 -1
  23. package/agents/tp-session-restorer.md +1 -1
  24. package/agents/tp-ship-coordinator.md +1 -1
  25. package/agents/tp-spec-writer.md +1 -1
  26. package/agents/tp-test-coverage-gapper.md +1 -1
  27. package/agents/tp-test-triage.md +1 -1
  28. package/agents/tp-test-writer.md +1 -1
  29. package/dist/ast-index/client.d.ts +17 -2
  30. package/dist/ast-index/client.js +233 -107
  31. package/dist/core/edit-prep-state.d.ts +42 -0
  32. package/dist/core/edit-prep-state.js +108 -0
  33. package/dist/handlers/explore-area.js +6 -1
  34. package/dist/handlers/read-for-edit.d.ts +5 -5
  35. package/dist/handlers/read-for-edit.js +188 -110
  36. package/dist/hooks/installer.js +18 -0
  37. package/dist/hooks/pre-bash.d.ts +9 -0
  38. package/dist/hooks/pre-bash.js +48 -0
  39. package/dist/hooks/pre-edit.d.ts +69 -0
  40. package/dist/hooks/pre-edit.js +104 -0
  41. package/dist/hooks/pre-grep.d.ts +10 -0
  42. package/dist/hooks/pre-grep.js +38 -2
  43. package/dist/index.d.ts +30 -0
  44. package/dist/index.js +83 -20
  45. package/dist/server/tool-definitions.js +18 -6
  46. package/dist/server.js +21 -5
  47. package/docs/installation.md +27 -1
  48. package/hooks/hooks.json +18 -0
  49. package/package.json +1 -1
  50. package/start.sh +19 -9
@@ -141,8 +141,47 @@ function detectHeavyPatternSingle(command) {
141
141
  "or `git diff --stat` / `git diff <path>` to scope. Re-run scoped to bypass.",
142
142
  };
143
143
  }
144
+ // 6. Test runners — suggest test_summary. Advisory only (allow + hint):
145
+ // tests are legitimate to run; we just want the token-lean summary by
146
+ // default. Tool-audit 2026-04-24 showed test_summary = 0 calls across
147
+ // three real projects — agents always go straight to the raw runner.
148
+ if (isTestRunnerCommand(cmd)) {
149
+ return {
150
+ kind: "advise",
151
+ reason: "Running tests via raw command dumps stdout into context. " +
152
+ 'Prefer mcp__token-pilot__test_summary(command="<your runner>") — ' +
153
+ "returns structured pass/fail/flaky counts and only the failing output, " +
154
+ "typically 70-90% fewer tokens than raw runner output.",
155
+ };
156
+ }
144
157
  return { kind: "allow" };
145
158
  }
159
+ /**
160
+ * Detect common test-runner invocations. Returns true for anything we'd
161
+ * route through `test_summary`. Kept as a pure string test so it's unit-
162
+ * testable without spinning up child processes.
163
+ */
164
+ export function isTestRunnerCommand(cmd) {
165
+ const trimmed = cmd.trim();
166
+ if (!trimmed)
167
+ return false;
168
+ // npm/yarn/pnpm run test[:suite], yarn workspace <x> test, etc.
169
+ if (/\b(?:npm|yarn|pnpm)\s+(?:run\s+)?test(?:[:\s]|$)/.test(trimmed)) {
170
+ return true;
171
+ }
172
+ if (/\byarn\s+workspace\s+\S+\s+test\b/.test(trimmed))
173
+ return true;
174
+ // Direct runner invocations (bare or via npx / pnpx / dlx wrappers)
175
+ if (/\b(?:npx|pnpx|pnpm dlx|yarn dlx)?\s*(?:vitest|jest|mocha|phpunit|rspec|pytest)\b/.test(trimmed)) {
176
+ return true;
177
+ }
178
+ // Go / Cargo native test drivers
179
+ if (/\bgo\s+test\b/.test(trimmed))
180
+ return true;
181
+ if (/\bcargo\s+test\b/.test(trimmed))
182
+ return true;
183
+ return false;
184
+ }
146
185
  export function decidePreBash(input, mode = "deny") {
147
186
  if (mode === "advisory")
148
187
  return { kind: "allow" };
@@ -156,6 +195,15 @@ export function decidePreBash(input, mode = "deny") {
156
195
  export function renderPreBashOutput(decision) {
157
196
  if (decision.kind === "allow")
158
197
  return null;
198
+ if (decision.kind === "advise") {
199
+ return JSON.stringify({
200
+ hookSpecificOutput: {
201
+ hookEventName: "PreToolUse",
202
+ permissionDecision: "allow",
203
+ additionalContext: decision.reason,
204
+ },
205
+ });
206
+ }
159
207
  return JSON.stringify({
160
208
  hookSpecificOutput: {
161
209
  hookEventName: "PreToolUse",
@@ -0,0 +1,69 @@
1
+ /**
2
+ * v0.30.0 — PreToolUse:Edit/MultiEdit/Write enforcement.
3
+ *
4
+ * Background: tool-audit data across three real projects (2026-04-24)
5
+ * showed Codex calling `read_for_edit` at 33% of its MCP volume while
6
+ * Claude sat at 0-1% despite our MCP instructions marking it MANDATORY.
7
+ * Text rules alone don't flip trained agent instincts. The pattern that
8
+ * did move Claude — pre-grep → find_usages — is hook-based deny.
9
+ *
10
+ * This hook closes the gap: before Claude executes Edit/MultiEdit/Write
11
+ * on an existing code file, we check a shared prep-state file that
12
+ * read_for_edit updates on every call. If the file isn't prepared we
13
+ * block (deny) or warn (advisory), depending on TOKEN_PILOT_MODE.
14
+ *
15
+ * Scope rules, in order:
16
+ * 1. Non-code files → allow (config, markdown, etc.)
17
+ * 2. Write on non-existent file → allow (new-file creation is fine)
18
+ * 3. TOKEN_PILOT_BYPASS=1 → allow (escape hatch)
19
+ * 4. advisory mode → allow + additionalContext hint
20
+ * 5. File already prepared → allow
21
+ * 6. Otherwise → deny with actionable message
22
+ *
23
+ * The decide function is pure — no I/O, no process.env reads — so it is
24
+ * trivially unit-testable. All side effects (existsSync, state read,
25
+ * enforcement-mode env) are resolved in the thin wrapper before the call.
26
+ */
27
+ import type { EnforcementMode } from "../server/enforcement-mode.js";
28
+ export interface PreEditInput {
29
+ tool_name?: string;
30
+ tool_input?: {
31
+ file_path?: string;
32
+ [k: string]: unknown;
33
+ };
34
+ }
35
+ export type PreEditDecision = {
36
+ kind: "allow";
37
+ } | {
38
+ kind: "advise";
39
+ message: string;
40
+ } | {
41
+ kind: "deny";
42
+ reason: string;
43
+ };
44
+ export interface PreEditContext {
45
+ /** Enforcement mode from TOKEN_PILOT_MODE */
46
+ mode: EnforcementMode;
47
+ /** File extension is a code file we care about */
48
+ isCodeFile: boolean;
49
+ /** The target file already exists on disk */
50
+ fileExists: boolean;
51
+ /** read_for_edit was called for this file recently */
52
+ isPrepared: boolean;
53
+ /** TOKEN_PILOT_BYPASS=1 set in env */
54
+ bypassed: boolean;
55
+ }
56
+ /**
57
+ * Pure decision function. Caller resolves all context (FS, env, state)
58
+ * beforehand so this stays a deterministic mapping input → decision.
59
+ */
60
+ export declare function decidePreEdit(input: PreEditInput, ctx: PreEditContext): PreEditDecision;
61
+ /**
62
+ * Render the Claude Code hook JSON response.
63
+ *
64
+ * - allow → no output (hook passes through with no side-effect)
65
+ * - advise → permissionDecision=allow + additionalContext hint
66
+ * - deny → permissionDecision=deny + reason
67
+ */
68
+ export declare function renderPreEditOutput(decision: PreEditDecision): string | null;
69
+ //# sourceMappingURL=pre-edit.d.ts.map
@@ -0,0 +1,104 @@
1
+ /**
2
+ * v0.30.0 — PreToolUse:Edit/MultiEdit/Write enforcement.
3
+ *
4
+ * Background: tool-audit data across three real projects (2026-04-24)
5
+ * showed Codex calling `read_for_edit` at 33% of its MCP volume while
6
+ * Claude sat at 0-1% despite our MCP instructions marking it MANDATORY.
7
+ * Text rules alone don't flip trained agent instincts. The pattern that
8
+ * did move Claude — pre-grep → find_usages — is hook-based deny.
9
+ *
10
+ * This hook closes the gap: before Claude executes Edit/MultiEdit/Write
11
+ * on an existing code file, we check a shared prep-state file that
12
+ * read_for_edit updates on every call. If the file isn't prepared we
13
+ * block (deny) or warn (advisory), depending on TOKEN_PILOT_MODE.
14
+ *
15
+ * Scope rules, in order:
16
+ * 1. Non-code files → allow (config, markdown, etc.)
17
+ * 2. Write on non-existent file → allow (new-file creation is fine)
18
+ * 3. TOKEN_PILOT_BYPASS=1 → allow (escape hatch)
19
+ * 4. advisory mode → allow + additionalContext hint
20
+ * 5. File already prepared → allow
21
+ * 6. Otherwise → deny with actionable message
22
+ *
23
+ * The decide function is pure — no I/O, no process.env reads — so it is
24
+ * trivially unit-testable. All side effects (existsSync, state read,
25
+ * enforcement-mode env) are resolved in the thin wrapper before the call.
26
+ */
27
+ /**
28
+ * Pure decision function. Caller resolves all context (FS, env, state)
29
+ * beforehand so this stays a deterministic mapping input → decision.
30
+ */
31
+ export function decidePreEdit(input, ctx) {
32
+ const toolName = input.tool_name ?? "";
33
+ if (toolName !== "Edit" && toolName !== "MultiEdit" && toolName !== "Write") {
34
+ return { kind: "allow" };
35
+ }
36
+ const filePath = input.tool_input?.file_path;
37
+ if (typeof filePath !== "string" || filePath.length === 0) {
38
+ return { kind: "allow" };
39
+ }
40
+ // Non-code files: config, markdown, JSON — Read-based edit-prep doesn't
41
+ // carry the same value, skip enforcement.
42
+ if (!ctx.isCodeFile)
43
+ return { kind: "allow" };
44
+ // Non-existent files are out of scope for the enforcement:
45
+ // - Write on a new file is legitimate new-file creation
46
+ // - Edit / MultiEdit on a missing path will error downstream in
47
+ // Claude Code itself — nothing for us to add there
48
+ if (!ctx.fileExists)
49
+ return { kind: "allow" };
50
+ // Explicit escape hatch. Documented as TOKEN_PILOT_BYPASS=1.
51
+ if (ctx.bypassed)
52
+ return { kind: "allow" };
53
+ // Already prepared → allow.
54
+ if (ctx.isPrepared)
55
+ return { kind: "allow" };
56
+ const suggestion = `mcp__token-pilot__read_for_edit(path="${filePath}", symbol="<target>")`;
57
+ // advisory mode: inject a non-blocking hint. The agent still runs the
58
+ // Edit, but next time should see the pattern.
59
+ if (ctx.mode === "advisory") {
60
+ return {
61
+ kind: "advise",
62
+ message: `File "${filePath}" was not prepared with read_for_edit. ` +
63
+ `Consider calling ${suggestion} first — the exact old_string it returns is what Edit actually needs. ` +
64
+ `Edit built from smart_read / Read snippets frequently mismatches on whitespace.`,
65
+ };
66
+ }
67
+ // deny / strict: hard block with an actionable message.
68
+ const reason = `File "${filePath}" was not prepared with read_for_edit. ` +
69
+ `Call ${suggestion} FIRST to obtain the exact old_string for Edit — ` +
70
+ `this is the canonical flow. Building old_string from smart_read or Read ` +
71
+ `snippets diverges from disk (whitespace, line-number prefixes) and Edit ` +
72
+ `silently mismatches. ` +
73
+ `Escape hatch: set TOKEN_PILOT_BYPASS=1 in the environment, or switch to ` +
74
+ `TOKEN_PILOT_MODE=advisory for warn-only behaviour.`;
75
+ return { kind: "deny", reason };
76
+ }
77
+ /**
78
+ * Render the Claude Code hook JSON response.
79
+ *
80
+ * - allow → no output (hook passes through with no side-effect)
81
+ * - advise → permissionDecision=allow + additionalContext hint
82
+ * - deny → permissionDecision=deny + reason
83
+ */
84
+ export function renderPreEditOutput(decision) {
85
+ if (decision.kind === "allow")
86
+ return null;
87
+ if (decision.kind === "advise") {
88
+ return JSON.stringify({
89
+ hookSpecificOutput: {
90
+ hookEventName: "PreToolUse",
91
+ permissionDecision: "allow",
92
+ additionalContext: decision.message,
93
+ },
94
+ });
95
+ }
96
+ return JSON.stringify({
97
+ hookSpecificOutput: {
98
+ hookEventName: "PreToolUse",
99
+ permissionDecision: "deny",
100
+ permissionDecisionReason: decision.reason,
101
+ },
102
+ });
103
+ }
104
+ //# sourceMappingURL=pre-edit.js.map
@@ -32,10 +32,20 @@ export interface PreGrepInput {
32
32
  }
33
33
  export type PreGrepDecision = {
34
34
  kind: "allow";
35
+ } | {
36
+ kind: "advise";
37
+ reason: string;
35
38
  } | {
36
39
  kind: "deny";
37
40
  reason: string;
38
41
  };
42
+ /**
43
+ * Shapes that look like TODO / FIXME / HACK / XXX / BUG tag scans —
44
+ * route these to `code_audit` which returns deduplicated, categorised
45
+ * results instead of N raw grep hits. Zero code_audit calls across three
46
+ * projects (tool-audit 2026-04-24) = agents reach for Grep every time.
47
+ */
48
+ export declare function isTodoScanPattern(pattern: string): boolean;
39
49
  /**
40
50
  * Heuristic: does `pattern` look like a code identifier worth sending
41
51
  * through find_usages?
@@ -20,6 +20,18 @@
20
20
  * find_usages after the block, we keep it. If they bypass via `-E` or
21
21
  * raw shell, we soften to advisory.
22
22
  */
23
+ /**
24
+ * Shapes that look like TODO / FIXME / HACK / XXX / BUG tag scans —
25
+ * route these to `code_audit` which returns deduplicated, categorised
26
+ * results instead of N raw grep hits. Zero code_audit calls across three
27
+ * projects (tool-audit 2026-04-24) = agents reach for Grep every time.
28
+ */
29
+ export function isTodoScanPattern(pattern) {
30
+ // Strip common grep-alternation syntax to compare the symbol cores
31
+ const normalised = pattern.replace(/[()\s]/g, "").toUpperCase();
32
+ const tagRe = /^(TODO|FIXME|HACK|XXX|BUG|NOTE|OPTIMIZE|REFACTOR)(\|(TODO|FIXME|HACK|XXX|BUG|NOTE|OPTIMIZE|REFACTOR))*$/;
33
+ return tagRe.test(normalised);
34
+ }
23
35
  /**
24
36
  * Heuristic: does `pattern` look like a code identifier worth sending
25
37
  * through find_usages?
@@ -65,14 +77,29 @@ export function isSymbolLikePattern(pattern) {
65
77
  * return whether to allow or deny (with a suggestion).
66
78
  */
67
79
  export function decidePreGrep(input, mode = "deny") {
68
- if (mode === "advisory")
69
- return { kind: "allow" };
70
80
  if (input.tool_name !== "Grep")
71
81
  return { kind: "allow" };
72
82
  const pattern = input.tool_input?.pattern;
73
83
  if (typeof pattern !== "string" || pattern.length === 0) {
74
84
  return { kind: "allow" };
75
85
  }
86
+ // TODO / FIXME / HACK tag scan → route to code_audit. Emitted as an
87
+ // advisory ("allow" + hint) regardless of enforcement mode: blocking
88
+ // would frustrate a legitimate one-off scan, but nudging the agent
89
+ // toward code_audit compounds the benefit across a session.
90
+ if (isTodoScanPattern(pattern)) {
91
+ return {
92
+ kind: "advise",
93
+ reason: `Grep pattern "${pattern}" is a TODO / FIXME / HACK scan. ` +
94
+ `Prefer mcp__token-pilot__code_audit — it returns deduplicated, ` +
95
+ `categorised tags across the project with file/line references, ` +
96
+ `typically 3-5× fewer tokens than raw Grep and ignores generated/` +
97
+ `vendored code automatically.`,
98
+ };
99
+ }
100
+ // Advisory mode disables the symbol-like deny (legacy behaviour).
101
+ if (mode === "advisory")
102
+ return { kind: "allow" };
76
103
  if (!isSymbolLikePattern(pattern))
77
104
  return { kind: "allow" };
78
105
  const reason = `Grep pattern "${pattern}" looks like a code identifier. ` +
@@ -89,6 +116,15 @@ export function decidePreGrep(input, mode = "deny") {
89
116
  export function renderPreGrepOutput(decision) {
90
117
  if (decision.kind === "allow")
91
118
  return null;
119
+ if (decision.kind === "advise") {
120
+ return JSON.stringify({
121
+ hookSpecificOutput: {
122
+ hookEventName: "PreToolUse",
123
+ permissionDecision: "allow",
124
+ additionalContext: decision.reason,
125
+ },
126
+ });
127
+ }
92
128
  return JSON.stringify({
93
129
  hookSpecificOutput: {
94
130
  hookEventName: "PreToolUse",
package/dist/index.d.ts CHANGED
@@ -3,6 +3,18 @@ import type { HookMode } from "./types.js";
3
3
  export declare const CODE_EXTENSIONS: Set<string>;
4
4
  export declare function getVersion(): string;
5
5
  export declare function main(cliArgs?: string[]): Promise<void>;
6
+ /**
7
+ * Defensive check for the Claude Code plugin `start.sh` bug (fixed 2026-04-24,
8
+ * but older installs still in the wild). If the caller passed the plugin's own
9
+ * cache dir as projectRoot, every relative path like `front/src/File.php` gets
10
+ * resolved inside the plugin install instead of the user's repo (ENOENT).
11
+ *
12
+ * Matches the canonical Claude Code plugin cache pattern
13
+ * ~/.claude/plugins/cache/token-pilot/token-pilot/<version>/
14
+ * on both POSIX and Windows separators. Intentionally narrow — does NOT match
15
+ * dev installs (cloning the repo and running against itself stays legal).
16
+ */
17
+ export declare function looksLikePluginCacheDir(candidate: string): boolean;
6
18
  export declare function startServer(cliArgs?: string[]): Promise<void>;
7
19
  export interface HookReadAdaptiveOptions {
8
20
  adaptiveThreshold?: boolean;
@@ -16,6 +28,24 @@ export declare function handleHookRead(filePathArg?: string, mode?: HookMode, de
16
28
  * wrapping.
17
29
  */
18
30
  export declare function runHookReadDispatch(filePathArg: string | undefined, mode: HookMode, denyThresholdArg?: number, projectRootArg?: string, adaptive?: HookReadAdaptiveOptions): Promise<string | null>;
31
+ /**
32
+ * PreToolUse:Edit / MultiEdit / Write enforcement.
33
+ *
34
+ * v0.30.0 upgraded this from a passive advisory hint into a real gate.
35
+ * The previous implementation always returned `allow` + a TIP; Claude
36
+ * ignored the TIP and kept building Edit's old_string from smart_read
37
+ * snippets (tool-audit 2026-04-24: read_for_edit = 0-1% of Claude calls
38
+ * vs 33% for Codex, which gets explicit prompt-level enforcement).
39
+ *
40
+ * New behaviour driven by TOKEN_PILOT_MODE:
41
+ * - advisory → allow + non-blocking hint when the file wasn't prepped
42
+ * - deny → block when the file wasn't prepped (the default)
43
+ * - strict → same as deny, plus event log for telemetry
44
+ *
45
+ * Pure decision logic lives in src/hooks/pre-edit.ts — this wrapper is
46
+ * responsible only for stdin parsing and I/O-bound context resolution
47
+ * (file existence, prep-state lookup, env vars).
48
+ */
19
49
  export declare function handleHookEdit(): void;
20
50
  export declare function handleInstallHook(projectRoot: string): Promise<void>;
21
51
  export declare function handleUninstallHook(projectRoot: string): Promise<void>;
package/dist/index.js CHANGED
@@ -16,8 +16,8 @@ process.stderr.on("error", (err) => {
16
16
  throw err;
17
17
  });
18
18
  import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
19
- import { readFileSync, realpathSync, appendFileSync, mkdirSync } from "node:fs";
20
- import { join } from "node:path";
19
+ import { existsSync, readFileSync, realpathSync, appendFileSync, mkdirSync, } from "node:fs";
20
+ import { join, resolve } from "node:path";
21
21
  import { homedir } from "node:os";
22
22
  import { execFile } from "node:child_process";
23
23
  import { promisify } from "node:util";
@@ -52,6 +52,8 @@ import { assessClaudeMd } from "./cli/claudemd-hygiene.js";
52
52
  import { decidePostBashAdvice, renderPostBashHookOutput, } from "./hooks/post-bash.js";
53
53
  import { decidePreBash, renderPreBashOutput } from "./hooks/pre-bash.js";
54
54
  import { decidePreGrep, renderPreGrepOutput } from "./hooks/pre-grep.js";
55
+ import { decidePreEdit, renderPreEditOutput, } from "./hooks/pre-edit.js";
56
+ import { isEditPrepared as isEditPreparedFn } from "./core/edit-prep-state.js";
55
57
  import { parseEnforcementMode } from "./server/enforcement-mode.js";
56
58
  const execFileAsync = promisify(execFile);
57
59
  export const CODE_EXTENSIONS = new Set([
@@ -293,11 +295,41 @@ export async function main(cliArgs = process.argv.slice(2)) {
293
295
  return;
294
296
  }
295
297
  }
298
+ /**
299
+ * Defensive check for the Claude Code plugin `start.sh` bug (fixed 2026-04-24,
300
+ * but older installs still in the wild). If the caller passed the plugin's own
301
+ * cache dir as projectRoot, every relative path like `front/src/File.php` gets
302
+ * resolved inside the plugin install instead of the user's repo (ENOENT).
303
+ *
304
+ * Matches the canonical Claude Code plugin cache pattern
305
+ * ~/.claude/plugins/cache/token-pilot/token-pilot/<version>/
306
+ * on both POSIX and Windows separators. Intentionally narrow — does NOT match
307
+ * dev installs (cloning the repo and running against itself stays legal).
308
+ */
309
+ export function looksLikePluginCacheDir(candidate) {
310
+ if (!candidate)
311
+ return false;
312
+ try {
313
+ const resolved = resolve(candidate);
314
+ return /[\\/]plugins[\\/]cache[\\/]token-pilot[\\/]/.test(resolved);
315
+ }
316
+ catch {
317
+ return false;
318
+ }
319
+ }
296
320
  export async function startServer(cliArgs = process.argv.slice(2)) {
297
- let projectRoot = cliArgs[0] || process.cwd();
321
+ // Defensive: ignore a poisoned cliArgs[0] pointing into the plugin install
322
+ // dir. Fall through to the INIT_CWD / PWD / cwd detection below — same
323
+ // behaviour as if the argument had never been passed.
324
+ let explicitRoot = cliArgs[0];
325
+ if (explicitRoot && looksLikePluginCacheDir(explicitRoot)) {
326
+ console.error(`[token-pilot] ignoring "${explicitRoot}" — looks like the plugin cache dir (start.sh bug). Auto-detecting project root instead.`);
327
+ explicitRoot = "";
328
+ }
329
+ let projectRoot = explicitRoot || process.cwd();
298
330
  // Detect git root for reliable project root
299
331
  // Try multiple sources: args[0] → INIT_CWD (npm/npx invoking dir) → PWD → cwd
300
- if (!cliArgs[0]) {
332
+ if (!explicitRoot) {
301
333
  const candidates = [
302
334
  process.env.INIT_CWD, // npm/npx sets this to invoking directory
303
335
  process.env.PWD, // shell working directory (may differ from cwd)
@@ -558,34 +590,65 @@ async function runHookReadDispatchImpl(filePathArg, mode, denyThreshold, project
558
590
  },
559
591
  });
560
592
  }
593
+ /**
594
+ * PreToolUse:Edit / MultiEdit / Write enforcement.
595
+ *
596
+ * v0.30.0 upgraded this from a passive advisory hint into a real gate.
597
+ * The previous implementation always returned `allow` + a TIP; Claude
598
+ * ignored the TIP and kept building Edit's old_string from smart_read
599
+ * snippets (tool-audit 2026-04-24: read_for_edit = 0-1% of Claude calls
600
+ * vs 33% for Codex, which gets explicit prompt-level enforcement).
601
+ *
602
+ * New behaviour driven by TOKEN_PILOT_MODE:
603
+ * - advisory → allow + non-blocking hint when the file wasn't prepped
604
+ * - deny → block when the file wasn't prepped (the default)
605
+ * - strict → same as deny, plus event log for telemetry
606
+ *
607
+ * Pure decision logic lives in src/hooks/pre-edit.ts — this wrapper is
608
+ * responsible only for stdin parsing and I/O-bound context resolution
609
+ * (file existence, prep-state lookup, env vars).
610
+ */
561
611
  export function handleHookEdit() {
562
- // Parse stdin for Edit tool_input
563
- let filePath;
612
+ let input;
564
613
  try {
565
614
  const stdin = readFileSync(0, "utf-8");
566
- const input = JSON.parse(stdin);
567
- filePath = input?.tool_input?.file_path;
615
+ input = JSON.parse(stdin);
568
616
  }
569
617
  catch {
570
618
  process.exit(0);
571
619
  }
572
- if (!filePath) {
620
+ const filePath = input.tool_input?.file_path;
621
+ if (typeof filePath !== "string" || filePath.length === 0) {
573
622
  process.exit(0);
574
623
  }
624
+ const projectRoot = process.env.CLAUDE_PROJECT_DIR || process.cwd();
575
625
  const ext = filePath.split(".").pop()?.toLowerCase() ?? "";
576
- // Only add context for code files
577
- if (!CODE_EXTENSIONS.has(ext)) {
578
- process.exit(0);
626
+ const isCodeFile = CODE_EXTENSIONS.has(ext);
627
+ const mode = parseEnforcementMode(process.env.TOKEN_PILOT_MODE);
628
+ const bypassed = process.env.TOKEN_PILOT_BYPASS === "1";
629
+ // Existence check must be sync + cheap — the hook is on the request hot path.
630
+ let fileExists = false;
631
+ try {
632
+ fileExists = existsSync(filePath);
579
633
  }
580
- // Add additionalContext suggesting read_for_edit — doesn't block Edit
581
- const context = JSON.stringify({
582
- hookSpecificOutput: {
583
- hookEventName: "PreToolUse",
584
- permissionDecision: "allow",
585
- additionalContext: `TIP: Use read_for_edit("${filePath}", symbol="<name>") to get minimal raw code for Edit's old_string — 97% fewer tokens than Read.`,
586
- },
634
+ catch {
635
+ // If we can't even stat it, fall back to "does not exist" so Write-on-new
636
+ // still flows through; Edit on a missing file would error anyway.
637
+ fileExists = false;
638
+ }
639
+ const isPrepared = isCodeFile
640
+ ? isEditPreparedFn(projectRoot, filePath)
641
+ : false;
642
+ const decision = decidePreEdit(input, {
643
+ mode,
644
+ isCodeFile,
645
+ fileExists,
646
+ isPrepared,
647
+ bypassed,
587
648
  });
588
- process.stdout.write(context);
649
+ const rendered = renderPreEditOutput(decision);
650
+ if (rendered)
651
+ process.stdout.write(rendered);
589
652
  process.exit(0);
590
653
  }
591
654
  export async function handleInstallHook(projectRoot) {
@@ -47,6 +47,12 @@ const MCP_INSTRUCTIONS_NAV = [
47
47
  const MCP_INSTRUCTIONS_EDIT = [
48
48
  "Token Pilot — token-efficient code reading (saves 60-80% tokens). ALWAYS prefer these tools over Read/cat/grep.",
49
49
  "",
50
+ "MANDATORY EDIT SAFETY — before ANY Edit/Write tool call on an existing code file:",
51
+ " → FIRST call read_for_edit(path, symbol=<target>) to obtain the exact old_string.",
52
+ " → NEVER build Edit's old_string from a smart_read / Read snippet — whitespace and",
53
+ " line-number prefixes diverge from disk and Edit silently mismatches.",
54
+ " → For a brand-new file, Write is fine; read_for_edit is only required for edits.",
55
+ "",
50
56
  "DECISION RULES — pick the first match:",
51
57
  "1. New codebase / unfamiliar project → project_overview",
52
58
  "2. Starting work on a directory → explore_area (outline + imports + tests + git log in one call)",
@@ -56,7 +62,7 @@ const MCP_INSTRUCTIONS_EDIT = [
56
62
  "4. Need one function/class body → read_symbol (loads only that symbol, NOT the whole file)",
57
63
  " - Preparing edit? Add include_edit_context=true to skip separate read_for_edit call",
58
64
  "5. Need MULTIPLE function/class bodies from same file → read_symbols (batch — one call instead of N)",
59
- "6. Preparing an edit → read_for_edit (returns exact text for Edit old_string)",
65
+ "6. Preparing an Edit → read_for_edit MANDATORY, not optional. Returns exact old_string.",
60
66
  "7. Verify edits after editing → read_diff (only changed hunks — REQUIRES smart_read BEFORE editing)",
61
67
  "8. Multiple files at once → smart_read_many (batch up to 20 files)",
62
68
  "9. Find where a symbol is used → find_usages (semantic: definitions + imports + usages)",
@@ -75,7 +81,7 @@ const MCP_INSTRUCTIONS_EDIT = [
75
81
  "",
76
82
  "WORKFLOWS:",
77
83
  "• Explore: project_overview → explore_area → smart_read → read_symbol",
78
- "• Edit: smart_read read_symbol(include_edit_context=true) → Edit → read_diff",
84
+ "• Edit (mandatory): smart_read (to pick target) → read_for_edit → Edit → read_diff",
79
85
  "• Docs: smart_read (outline) → read_section → read_for_edit(section=) → Edit → read_diff",
80
86
  "• Refactor: find_usages → read_symbols → read_for_edit → Edit",
81
87
  "• Long session: session_snapshot → compact context → continue with minimal state",
@@ -86,6 +92,12 @@ const MCP_INSTRUCTIONS_EDIT = [
86
92
  const MCP_INSTRUCTIONS_FULL = [
87
93
  "Token Pilot — token-efficient code reading (saves 60-80% tokens). ALWAYS prefer these tools over Read/cat/grep.",
88
94
  "",
95
+ "MANDATORY EDIT SAFETY — before ANY Edit/Write tool call on an existing code file:",
96
+ " → FIRST call read_for_edit(path, symbol=<target>) to obtain the exact old_string.",
97
+ " → NEVER build Edit's old_string from a smart_read / Read snippet — whitespace and",
98
+ " line-number prefixes diverge from disk and Edit silently mismatches.",
99
+ " → For a brand-new file, Write is fine; read_for_edit is only required for edits.",
100
+ "",
89
101
  "DECISION RULES — pick the first match:",
90
102
  "1. New codebase / unfamiliar project → project_overview",
91
103
  "2. Starting work on a directory → explore_area (outline + imports + tests + git log in one call)",
@@ -95,7 +107,7 @@ const MCP_INSTRUCTIONS_FULL = [
95
107
  "4. Need one function/class body → read_symbol (loads only that symbol, NOT the whole file)",
96
108
  " - Preparing edit? Add include_edit_context=true to skip separate read_for_edit call",
97
109
  "5. Need MULTIPLE function/class bodies from same file → read_symbols (batch — one call instead of N)",
98
- "6. Preparing an edit → read_for_edit (returns exact text for Edit old_string)",
110
+ "6. Preparing an Edit → read_for_edit MANDATORY, not optional. Returns exact old_string.",
99
111
  "7. Verify edits after editing → read_diff (only changed hunks — REQUIRES smart_read BEFORE editing)",
100
112
  "8. Multiple files at once → smart_read_many (batch up to 20 files)",
101
113
  "9. Find where a symbol is used → find_usages (semantic: definitions + imports + usages)",
@@ -117,7 +129,7 @@ const MCP_INSTRUCTIONS_FULL = [
117
129
  "",
118
130
  "WORKFLOWS:",
119
131
  "• Explore: project_overview → explore_area → smart_read → read_symbol",
120
- "• Edit: smart_read read_symbol(include_edit_context=true) → Edit → read_diff",
132
+ "• Edit (mandatory): smart_read (to pick target) → read_for_edit → Edit → read_diff",
121
133
  "• Docs: smart_read (outline) → read_section → read_for_edit(section=) → Edit → read_diff",
122
134
  "• Refactor: find_usages → read_symbols → read_for_edit → Edit → test_summary",
123
135
  "• Audit: code_audit + find_unused + Grep (for regex patterns)",
@@ -580,7 +592,7 @@ export const TOOL_DEFINITIONS = [
580
592
  },
581
593
  {
582
594
  name: "explore_area",
583
- description: "One-call exploration of a directory: outline (all symbols), imports (external deps + who imports this area), tests (matching test files), recent git changes. Use INSTEAD OF separate outline + related_files + git log calls.",
595
+ description: "One-call exploration of a directory: outline (all symbols), imports (external deps + who imports this area), tests (matching test files), recent git changes. Use INSTEAD OF separate outline + related_files + git log calls. Default since v0.30.0 returns only outline+changes — telemetry showed the all-4 default producing negative token reduction for small areas. Opt into imports/tests explicitly via `include` when you need them.",
584
596
  inputSchema: {
585
597
  type: "object",
586
598
  properties: {
@@ -594,7 +606,7 @@ export const TOOL_DEFINITIONS = [
594
606
  type: "string",
595
607
  enum: ["outline", "imports", "tests", "changes"],
596
608
  },
597
- description: "Sections to include (default: all)",
609
+ description: 'Sections to include. Default: ["outline","changes"]. Add "imports" for dep graph, "tests" to map test files — both can be heavy on large areas.',
598
610
  },
599
611
  },
600
612
  required: ["path"],
package/dist/server.js CHANGED
@@ -69,6 +69,10 @@ export async function createServer(projectRoot, options) {
69
69
  // entirely; callers that care about durability should not use it.
70
70
  const shutdownFlush = () => {
71
71
  void sessionRegistries.flushAll();
72
+ // Stop the 5-minute ast-index tick so we don't block exit on SIGINT/SIGTERM.
73
+ // .unref() already makes it non-keeping, but clearing is defensive and
74
+ // avoids a stray `update` firing during shutdown.
75
+ astIndex.stopPeriodicUpdate();
72
76
  };
73
77
  process.once("beforeExit", shutdownFlush);
74
78
  process.once("SIGINT", shutdownFlush);
@@ -222,13 +226,25 @@ export async function createServer(projectRoot, options) {
222
226
  fileWatcher.onAstUpdate(() => sessionCache.invalidateByAst());
223
227
  }
224
228
  }
225
- // Wire session cache to git watcher
226
- if (sessionCache) {
227
- gitWatcher.onBranchSwitchEvent((changedFiles) => {
229
+ // Wire git-watcher → session cache + AST index.
230
+ // Always registers — even without sessionCache — so branch-switch still
231
+ // triggers the index update. Without this the index went stale on every
232
+ // `git checkout` until the next file-touch (or never, for branches that
233
+ // only moved files the agent hadn't read yet).
234
+ gitWatcher.onBranchSwitchEvent((changedFiles) => {
235
+ if (sessionCache) {
228
236
  sessionCache.invalidateByFiles(changedFiles);
229
237
  sessionCache.invalidateByGit();
230
- });
231
- }
238
+ }
239
+ // Fire-and-forget. incrementalUpdate self-guards against
240
+ // disabled / oversized / uninitialised index states.
241
+ void astIndex.incrementalUpdate();
242
+ });
243
+ // 5-minute safety-net for long sessions where FileWatcher may miss events
244
+ // (Docker bind mounts, NFS, files mutated by sibling processes). Cheap —
245
+ // each tick is a single `ast-index update` call that bails early if the
246
+ // index isn't ready or the previous tick is still running.
247
+ astIndex.startPeriodicUpdate();
232
248
  // Read version from package.json
233
249
  let pkgVersion = "0.1.1";
234
250
  try {