pi-doc-injector 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/cache.ts ADDED
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Keyword cache persistence — load/save the `.pi/doc-injector-cache.json` file.
3
+ *
4
+ * Cache format:
5
+ * { version: 1, files: { [relativePath]: { mtimeMs: number, keywords: string[] } } }
6
+ *
7
+ * Invalid files (wrong version, bad JSON, ENOENT) result in an empty cache.
8
+ */
9
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
10
+ import { dirname, join } from "node:path";
11
+ import type { KeywordCache } from "./types";
12
+
13
+ const CACHE_FILENAME = ".pi/doc-injector-cache.json";
14
+ const CACHE_VERSION = 1;
15
+
16
+ /**
17
+ * Load the keyword cache from disk.
18
+ * Returns an empty cache (version 1, no files) if the file doesn't exist,
19
+ * has wrong version, or is corrupted.
20
+ */
21
+ export async function loadCache(cwd: string): Promise<KeywordCache> {
22
+ const cachePath = join(cwd, CACHE_FILENAME);
23
+
24
+ try {
25
+ const raw = await readFile(cachePath, "utf-8");
26
+ const parsed: unknown = JSON.parse(raw);
27
+
28
+ if (!isValidCache(parsed)) {
29
+ console.warn(
30
+ `[doc-injector] Invalid cache format or version at ${cachePath}, resetting.`,
31
+ );
32
+ return emptyCache();
33
+ }
34
+
35
+ return parsed;
36
+ } catch (err) {
37
+ // ENOENT = no cache file yet, that's fine
38
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
39
+ console.warn(
40
+ `[doc-injector] Failed to read cache at ${cachePath}:`,
41
+ err instanceof Error ? err.message : String(err),
42
+ );
43
+ }
44
+ return emptyCache();
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Save the keyword cache to disk.
50
+ * Creates parent directories if needed.
51
+ */
52
+ export async function saveCache(
53
+ cwd: string,
54
+ cache: KeywordCache,
55
+ ): Promise<void> {
56
+ const cachePath = join(cwd, CACHE_FILENAME);
57
+
58
+ try {
59
+ await mkdir(dirname(cachePath), { recursive: true });
60
+ } catch {
61
+ // Ignore — directory may already exist
62
+ }
63
+
64
+ await writeFile(cachePath, JSON.stringify(cache, null, 2), "utf-8");
65
+ }
66
+
67
+ /** Check that a parsed value matches the KeywordCache shape. */
68
+ function isValidCache(value: unknown): value is KeywordCache {
69
+ if (!value || typeof value !== "object") return false;
70
+ const c = value as Record<string, unknown>;
71
+ if (c.version !== CACHE_VERSION) return false;
72
+ if (!c.files || typeof c.files !== "object") return false;
73
+ return true;
74
+ }
75
+
76
+ /** Return a fresh empty cache. */
77
+ function emptyCache(): KeywordCache {
78
+ return { version: CACHE_VERSION, files: {} };
79
+ }
package/commands.ts CHANGED
@@ -3,14 +3,26 @@
3
3
  */
4
4
  import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
5
5
  import type { DocRegistry } from "./registry";
6
+ import type { DocInjectorConfig } from "./types";
6
7
 
8
+ /** Dependencies injected into the command registrar. */
7
9
  export interface CommandDeps {
8
10
  getRegistry: () => DocRegistry | null;
9
11
  getEnabled: () => boolean;
10
12
  setEnabled: (v: boolean) => void;
11
13
  reloadRegistry: () => Promise<number>;
14
+ getConfig: () => DocInjectorConfig;
15
+ generateKeywordsLLM: (files: Array<{ path: string; snippet: string; existingKeywords: string[] }>) => Promise<void>;
12
16
  }
13
17
 
18
+ /**
19
+ * Register all doc-injector slash commands on the given ExtensionAPI.
20
+ *
21
+ * Commands:
22
+ * - `/doc-inject [on|off|toggle|list|reset|status]` — manage injection state
23
+ * - `/doc-reload` — re-scan docs folder
24
+ * - `/doc-keywords-gen [path]` — generate LLM keywords for keyword-less files
25
+ */
14
26
  export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
15
27
  const cmd = (name: string, desc: string, handler: (args: string, ctx: ExtensionContext) => Promise<void>) => {
16
28
  pi.registerCommand(name, { description: desc, handler });
@@ -49,7 +61,8 @@ export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
49
61
  }
50
62
  const lines = entries.map((e) => {
51
63
  const status = e.injected ? "✅" : "⬜";
52
- return `${status} ${e.relativePath}: "${e.title}" — keywords: [${e.keywords.join(", ")}]`;
64
+ const sourceTag = `[${e.keywordSource}]`;
65
+ return `${status} ${sourceTag} ${e.relativePath}: "${e.title}" — keywords: [${e.keywords.join(", ")}]`;
53
66
  });
54
67
  ctx.ui.notify(`📄 Registered docs:\n${lines.join("\n")}`, "info");
55
68
  } else {
@@ -81,4 +94,58 @@ export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
81
94
  ctx.ui.notify(`📄 Reload failed: ${err instanceof Error ? err.message : String(err)}`, "error");
82
95
  }
83
96
  });
97
+
98
+ cmd("doc-keywords-gen", "Generate LLM keywords: /doc-keywords-gen [path] — no arg = all keyword-less files", async (args, ctx) => {
99
+ const reg = deps.getRegistry();
100
+ if (!reg) {
101
+ ctx.ui.notify("📄 No registry loaded", "warning");
102
+ return;
103
+ }
104
+
105
+ const config = deps.getConfig();
106
+ if (!config.llmKeywords) {
107
+ ctx.ui.notify("📄 LLM keyword generation is disabled (llmKeywords: false in config)", "warning");
108
+ return;
109
+ }
110
+
111
+ const targetPath = args.trim();
112
+
113
+ // Filter to keyword-less entries (keywordSource !== "frontmatter", "cache", or "llm")
114
+ let candidates = reg.getEntries().filter((e) => {
115
+ if (e.keywordSource === "frontmatter") return false;
116
+ if (e.keywordSource === "cache") return false;
117
+ if (e.keywordSource === "llm") return false; // already LLM-generated
118
+ return true;
119
+ });
120
+
121
+ if (targetPath) {
122
+ candidates = candidates.filter((e) => e.relativePath.includes(targetPath));
123
+ if (candidates.length === 0) {
124
+ ctx.ui.notify(`📄 No keyword-less files matching "${targetPath}"`, "info");
125
+ return;
126
+ }
127
+ }
128
+
129
+ if (candidates.length === 0) {
130
+ ctx.ui.notify("📄 All files already have keywords", "info");
131
+ return;
132
+ }
133
+
134
+ const batchSize = config.llmBatchSize;
135
+ const batches: Array<Array<{ path: string; snippet: string; existingKeywords: string[] }>> = [];
136
+ for (let i = 0; i < candidates.length; i += batchSize) {
137
+ const batch = candidates.slice(i, i + batchSize).map((e) => ({
138
+ path: e.relativePath,
139
+ snippet: e.content.slice(0, 500),
140
+ existingKeywords: e.keywords,
141
+ }));
142
+ batches.push(batch);
143
+ }
144
+
145
+ ctx.ui.notify(`📄 Sending ${batches.length} keyword-generation batch(es) for ${candidates.length} file(s)...`, "info");
146
+
147
+ for (const batch of batches) {
148
+ await deps.generateKeywordsLLM(batch);
149
+ }
150
+ });
84
151
  }
package/config.ts CHANGED
@@ -2,50 +2,85 @@
2
2
  * Configuration loader for the Doc Injector extension.
3
3
  * Reads from `.pi/doc-injector.json` with fallback to defaults.
4
4
  */
5
- import { existsSync, readFileSync } from "node:fs";
5
+ import { readFile } from "node:fs/promises";
6
6
  import { join } from "node:path";
7
7
  import { DEFAULT_CONFIG, type DocInjectorConfig } from "./types";
8
8
 
9
9
  /**
10
- * Load config from `.pi/doc-injector.json` relative to the given cwd.
11
- * Falls back to DEFAULT_CONFIG if file doesn't exist or is invalid.
10
+ * Clamp an integer value to [min, max] range.
11
+ * Warns and clamps if out of range. Returns the default if not a number.
12
12
  */
13
- export function loadConfig(cwd: string): DocInjectorConfig {
14
- const configPath = join(cwd, ".pi", "doc-injector.json");
13
+ function clampInt(
14
+ value: unknown,
15
+ defaultVal: number,
16
+ min: number,
17
+ max: number,
18
+ fieldName: string,
19
+ ): number {
20
+ if (typeof value !== "number" || Number.isNaN(value)) {
21
+ return defaultVal;
22
+ }
23
+ const intVal = Math.trunc(value);
24
+ if (intVal < min || intVal > max) {
25
+ const clamped = Math.max(min, Math.min(max, intVal));
26
+ console.warn(`[doc-injector] ${fieldName} must be ${min}-${max}, got ${intVal}. Clamping to ${clamped}.`);
27
+ return clamped;
28
+ }
29
+ return intVal;
30
+ }
15
31
 
16
- if (!existsSync(configPath)) {
17
- return { ...DEFAULT_CONFIG };
32
+ /**
33
+ * Validate a glob pattern array.
34
+ * Rejects non-array or entries that aren't strings. Returns default on error.
35
+ */
36
+ function validateGlobArray(value: unknown, defaultVal: string[]): string[] {
37
+ if (!Array.isArray(value)) {
38
+ return [...defaultVal];
18
39
  }
40
+ const result: string[] = [];
41
+ for (const item of value) {
42
+ if (typeof item === "string") {
43
+ result.push(item);
44
+ } else {
45
+ console.warn(`[doc-injector] Non-string entry in glob array ignored: ${String(item)}`);
46
+ }
47
+ }
48
+ return result.length > 0 ? result : [...defaultVal];
49
+ }
50
+
51
+ /**
52
+ * Load config from `.pi/doc-injector.json` relative to the given cwd.
53
+ * Now async — uses readFile from fs/promises.
54
+ * Validates and clamps all numeric fields. Falls back to DEFAULT_CONFIG
55
+ * if file doesn't exist or is invalid.
56
+ */
57
+ export async function loadConfig(cwd: string): Promise<DocInjectorConfig> {
58
+ const configPath = join(cwd, ".pi", "doc-injector.json");
19
59
 
20
60
  try {
21
- const raw = readFileSync(configPath, "utf-8");
61
+ const raw = await readFile(configPath, "utf-8");
22
62
  const parsed = JSON.parse(raw) as Partial<DocInjectorConfig>;
23
63
 
24
- // Clamp contextThreshold to 0-100 range
25
- let contextThreshold = parsed.contextThreshold ?? DEFAULT_CONFIG.contextThreshold;
26
- if (typeof contextThreshold === "number" && (contextThreshold < 0 || contextThreshold > 100)) {
27
- console.warn(`[doc-injector] contextThreshold must be 0-100, got ${contextThreshold}. Clamping.`);
28
- contextThreshold = Math.max(0, Math.min(100, contextThreshold));
29
- }
30
-
31
- // Clamp matchThreshold to positive integers
32
- let matchThreshold = parsed.matchThreshold ?? DEFAULT_CONFIG.matchThreshold;
33
- if (typeof matchThreshold === "number" && matchThreshold < 1) {
34
- console.warn(`[doc-injector] matchThreshold must be >= 1, got ${matchThreshold}. Using 1.`);
35
- matchThreshold = 1;
36
- }
37
-
38
64
  return {
39
65
  docsPath: parsed.docsPath ?? DEFAULT_CONFIG.docsPath,
40
- matchThreshold,
41
- contextThreshold,
66
+ matchThreshold: clampInt(parsed.matchThreshold, DEFAULT_CONFIG.matchThreshold, 1, Infinity, "matchThreshold"),
67
+ contextThreshold: clampInt(parsed.contextThreshold, DEFAULT_CONFIG.contextThreshold, 0, 100, "contextThreshold"),
42
68
  recursive: parsed.recursive ?? DEFAULT_CONFIG.recursive,
69
+ include: validateGlobArray(parsed.include, DEFAULT_CONFIG.include),
70
+ exclude: validateGlobArray(parsed.exclude, DEFAULT_CONFIG.exclude),
71
+ maxFileSize: clampInt(parsed.maxFileSize, DEFAULT_CONFIG.maxFileSize, 1024, 10 * 1024 * 1024, "maxFileSize"),
72
+ autoKeywords: parsed.autoKeywords ?? DEFAULT_CONFIG.autoKeywords,
73
+ llmKeywords: parsed.llmKeywords ?? DEFAULT_CONFIG.llmKeywords,
74
+ maxConcurrent: clampInt(parsed.maxConcurrent, DEFAULT_CONFIG.maxConcurrent, 1, 100, "maxConcurrent"),
75
+ llmBatchSize: clampInt(parsed.llmBatchSize, DEFAULT_CONFIG.llmBatchSize, 1, 100, "llmBatchSize"),
43
76
  };
44
77
  } catch (err) {
45
- console.warn(
46
- `[doc-injector] Failed to parse config at ${configPath}:`,
47
- err instanceof Error ? err.message : String(err),
48
- );
78
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
79
+ console.warn(
80
+ `[doc-injector] Failed to parse config at ${configPath}:`,
81
+ err instanceof Error ? err.message : String(err),
82
+ );
83
+ }
49
84
  return { ...DEFAULT_CONFIG };
50
85
  }
51
86
  }
package/globber.ts ADDED
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Glob filter for include/exclude pattern matching.
3
+ * Uses picomatch (0 deps, ~18 KB) to compile patterns once for O(1) matching.
4
+ */
5
+ import picomatch from "picomatch";
6
+ import type { GlobFilter } from "./types";
7
+
8
+ /**
9
+ * Create a glob filter from include and exclude patterns.
10
+ *
11
+ * A path matches if it matches at least one `include` pattern AND
12
+ * does not match any `exclude` pattern.
13
+ *
14
+ * When `include` is empty, all files are considered included
15
+ * (subject to exclude filtering).
16
+ *
17
+ * @param include - Glob patterns for files to include
18
+ * @param exclude - Glob patterns for files/dirs to exclude
19
+ * @returns A GlobFilter with a `match` method
20
+ */
21
+ export function createGlobFilter(
22
+ include: string[],
23
+ exclude: string[],
24
+ ): GlobFilter {
25
+ const includeMatcher =
26
+ include.length > 0
27
+ ? picomatch(include, { dot: true })
28
+ : null;
29
+
30
+ const excludeMatcher =
31
+ exclude.length > 0
32
+ ? picomatch(exclude, { dot: true })
33
+ : null;
34
+
35
+ return {
36
+ match(relativePath: string): boolean {
37
+ // If include patterns are specified, path must match at least one
38
+ if (includeMatcher && !includeMatcher(relativePath)) {
39
+ return false;
40
+ }
41
+ // Path must not match any exclude pattern
42
+ if (excludeMatcher && excludeMatcher(relativePath)) {
43
+ return false;
44
+ }
45
+ return true;
46
+ },
47
+ };
48
+ }
package/index.ts CHANGED
@@ -53,18 +53,21 @@
53
53
  * is cleared after injection, and `markInjected()` operates on the registry's
54
54
  * current entries, not the stale array.
55
55
  */
56
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
56
+ import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
57
+ import { Type } from "@sinclair/typebox";
57
58
  import { resolve } from "node:path";
59
+ import { loadCache, saveCache } from "./cache";
58
60
  import { loadConfig } from "./config";
59
61
  import { buildSystemPromptAppend, notifyInjection } from "./injector";
62
+ import { buildKeywordGenPrompt } from "./keyword-llm";
60
63
  import { extractText, KeywordMatcher } from "./matcher";
61
64
  import { DocRegistry } from "./registry";
62
- import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult } from "./types";
65
+ import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
63
66
  import { registerCommands } from "./commands";
64
67
 
65
68
  export default async function docInjectorExtension(pi: ExtensionAPI) {
66
69
  // ---- State ----
67
- let config = loadConfig(process.cwd());
70
+ let config = await loadConfig(process.cwd());
68
71
  let registry: DocRegistry | null = null;
69
72
  let initRegistryPromise: Promise<void> | null = null;
70
73
  let enabled = true;
@@ -72,17 +75,50 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
72
75
  let pendingMatches = new Map<string, string[]>(); // filePath → matchedKeywords
73
76
  let abortingForInjection = false; // guard against cascading aborts
74
77
 
78
+ // P5.4b — Guard flags for LLM keyword generation
79
+ let keywordGenInFlight = false;
80
+ let llmBatchesCompleted = 0;
81
+ let llmTotalFiles = 0;
82
+ let cache: KeywordCache = { version: 1, files: {} };
83
+
75
84
  // ---- Helpers ----
76
85
  const getRegistry = () => registry;
77
86
  const getEnabled = () => enabled;
78
87
  const setEnabled = (v: boolean) => {
79
88
  enabled = v;
80
89
  };
90
+ const getConfig = () => config;
91
+
92
+ const safeSaveCache = async (cwd: string, dirtyEntries: Record<string, CacheEntry>) => {
93
+ // MAJOR-2 fix: before saveCache, re-read cache from disk to merge
94
+ // LLM-written entries that may have landed during the scan.
95
+ const freshCache = await loadCache(cwd);
96
+ const mergedCache: KeywordCache = { version: 1, files: {} };
97
+
98
+ // Start with fresh (disk) entries — includes any LLM writes during scan
99
+ for (const [key, entry] of Object.entries(freshCache.files)) {
100
+ mergedCache.files[key] = entry;
101
+ }
102
+
103
+ // Overlay dirty entries from this scan (scan results take precedence)
104
+ for (const [key, entry] of Object.entries(dirtyEntries)) {
105
+ mergedCache.files[key] = entry;
106
+ }
107
+
108
+ await saveCache(cwd, mergedCache);
109
+ };
81
110
 
82
111
  const initRegistry = async (cwd: string) => {
83
- config = loadConfig(cwd);
112
+ config = await loadConfig(cwd);
84
113
  const docsPath = resolve(cwd, config.docsPath);
85
- registry = await DocRegistry.create(docsPath, config.recursive);
114
+ cache = await loadCache(cwd);
115
+ registry = await DocRegistry.create(docsPath, config, cache);
116
+
117
+ const dirty = registry.getDirtyCache();
118
+ if (Object.keys(dirty).length > 0) {
119
+ await safeSaveCache(cwd, dirty);
120
+ }
121
+
86
122
  const count = registry.getEntries().length;
87
123
  if (count > 0) {
88
124
  console.log(`[doc-injector] Loaded ${count} documents from ${docsPath}`);
@@ -99,11 +135,70 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
99
135
  );
100
136
  };
101
137
 
138
+ // P5.4f — generateKeywordsLLM: sets keywordGenInFlight and sends a user message
139
+ // with the prompt built by buildKeywordGenPrompt. The LLM will respond by
140
+ // calling the _doc_injector_keywords tool.
141
+ const generateKeywordsLLM = async (
142
+ files: Array<{ path: string; snippet: string; existingKeywords: string[] }>,
143
+ ) => {
144
+ keywordGenInFlight = true;
145
+ const prompt = buildKeywordGenPrompt(files);
146
+ pi.sendUserMessage(prompt, { deliverAs: "followUp" });
147
+ };
148
+
149
+ // P5.4a — Inline tool registration (BLOCKER-2 fix).
150
+ // Registered inside the factory for closure access to cache, cwd, saveCache,
151
+ // and llmBatchesCompleted. Uses real mtime from stat().
152
+ pi.registerTool({
153
+ name: "_doc_injector_keywords",
154
+ label: "Doc Injector Keywords",
155
+ description:
156
+ "Save LLM-generated keywords for documentation files. Call this tool with the keywords array after analyzing file snippets.",
157
+ parameters: Type.Object({
158
+ keywords: Type.Array(
159
+ Type.Object({
160
+ path: Type.String(),
161
+ keywords: Type.Array(Type.String()),
162
+ }),
163
+ ),
164
+ }),
165
+ execute: async (_id, params, _signal, _onUpdate, ctx) => {
166
+ const generated = params.keywords as Array<{ path: string; keywords: string[] }>;
167
+ const { stat } = await import("node:fs/promises");
168
+ let saved = 0;
169
+ for (const item of generated) {
170
+ const absPath = resolve(ctx.cwd, config.docsPath, item.path);
171
+ const fileStat = await stat(absPath).catch(() => null);
172
+ if (!fileStat) {
173
+ console.warn(`[doc-injector] Skipping keyword save for ${item.path}: file not found`);
174
+ continue;
175
+ }
176
+ cache.files[item.path] = {
177
+ mtimeMs: fileStat.mtimeMs,
178
+ keywords: item.keywords.map((k) => k.toLowerCase()).slice(0, 20),
179
+ };
180
+ saved++;
181
+ }
182
+ await saveCache(ctx.cwd, cache);
183
+ llmBatchesCompleted++;
184
+ llmTotalFiles += saved;
185
+ return {
186
+ content: [{ type: "text" as const, text: `Keywords saved for ${saved} files.` }],
187
+ details: undefined as never,
188
+ };
189
+ },
190
+ });
191
+
102
192
  // ---- Event: session_start ----
103
193
  // Pi emits session_start for startup, reload, and real session transitions.
104
194
  // Skip the reload variant because resources_discover will rebuild docs right
105
195
  // after it, and deduplicate any overlapping non-reload inits.
106
196
  pi.on("session_start", async (event, ctx) => {
197
+ // P5.4d — Safety unbind: clear all LLM keyword gen state on session start
198
+ keywordGenInFlight = false;
199
+ llmBatchesCompleted = 0;
200
+ llmTotalFiles = 0;
201
+
107
202
  if (event.reason === "reload") return;
108
203
 
109
204
  if (initRegistryPromise) {
@@ -119,17 +214,30 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
119
214
  }
120
215
  });
121
216
 
122
- const reloadRegistry = async (): Promise<number> => {
217
+ const reloadRegistry = async (cwd?: string): Promise<number> => {
123
218
  if (!registry) throw new Error("No registry loaded");
219
+ const effectiveCwd = cwd ?? process.cwd();
220
+
221
+ // Reload cache from disk to pick up LLM-generated entries
222
+ const freshCache = await loadCache(effectiveCwd);
223
+ cache = freshCache;
224
+ registry.updateCache(cache);
225
+
124
226
  await registry.rebuild();
227
+
228
+ const dirty = registry.getDirtyCache();
229
+ if (Object.keys(dirty).length > 0) {
230
+ await safeSaveCache(effectiveCwd, dirty);
231
+ }
232
+
125
233
  const count = registry.getEntries().length;
126
234
  console.log(`[doc-injector] Reloaded: ${count} documents`);
127
235
  return count;
128
236
  };
129
237
 
130
238
  // ---- Event: resources_discover (reload) ----
131
- pi.on("resources_discover", async (_event, _ctx) => {
132
- await reloadRegistry();
239
+ pi.on("resources_discover", async (_event, ctx) => {
240
+ await reloadRegistry(ctx.cwd);
133
241
  });
134
242
 
135
243
  // ---- Event: input (user message matching) ----
@@ -138,6 +246,17 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
138
246
  // BEFORE before_agent_start fires, so docs are injected in time for
139
247
  // the assistant's immediate response.
140
248
  pi.on("input", async (event, _ctx) => {
249
+ // P5.4d — Safety unbind: if the user is typing interactively, clear all
250
+ // LLM keyword gen state (they may have aborted the generation).
251
+ if (event.source === "interactive") {
252
+ keywordGenInFlight = false;
253
+ llmBatchesCompleted = 0;
254
+ llmTotalFiles = 0;
255
+ }
256
+
257
+ // P5.4b — Guard: skip keyword matching during LLM keyword generation
258
+ if (keywordGenInFlight) return;
259
+
141
260
  if (!enabled || !registry) return;
142
261
  if (!event.text) return;
143
262
 
@@ -155,6 +274,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
155
274
  // non-injected docs, abort the current generation and restart with the
156
275
  // injected context — no waiting for the next turn.
157
276
  pi.on("message_update", async (event, ctx) => {
277
+ // P5.4b — Guard: skip auto-abort logic during LLM keyword generation
278
+ if (keywordGenInFlight) return;
279
+
158
280
  if (!enabled || !registry) return;
159
281
 
160
282
  const msg = event.message;
@@ -195,6 +317,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
195
317
 
196
318
  // ---- Event: before_agent_start (inject into system prompt) ----
197
319
  pi.on("before_agent_start", async (event, ctx) => {
320
+ // P5.4b — Guard: skip injection during LLM keyword generation
321
+ if (keywordGenInFlight) return;
322
+
198
323
  if (!enabled || !registry || pendingMatches.size === 0) return;
199
324
 
200
325
  const matchedEntries: DocEntry[] = [];
@@ -235,13 +360,27 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
235
360
  };
236
361
  });
237
362
 
238
- // ---- Event: agent_end (restart after auto-abort) ----
239
- pi.on("agent_end", async () => {
363
+ // ---- Event: agent_end (restart after auto-abort + LLM batch summary) ----
364
+ pi.on("agent_end", async (event, ctx) => {
365
+ // P5.4c — Summary notification from agent_end (BLOCKER-3)
366
+ keywordGenInFlight = false;
367
+ if (llmBatchesCompleted > 0) {
368
+ await ctx.ui.notify(
369
+ `Doc keywords: ${llmTotalFiles} files across ${llmBatchesCompleted} batch(es)`,
370
+ "info",
371
+ );
372
+ llmBatchesCompleted = 0;
373
+ llmTotalFiles = 0;
374
+ }
375
+
240
376
  if (abortingForInjection) {
241
377
  abortingForInjection = false;
242
- // Send a follow-up message to restart the turn.
243
- // before_agent_start will inject the matched docs into context.
244
- pi.sendUserMessage("continue", { deliverAs: "followUp" });
378
+ // Defer sendUserMessage to next tick to avoid re-entrancy issues.
379
+ setTimeout(() => {
380
+ pi.sendUserMessage("continue");
381
+ }, 0);
382
+ } else {
383
+ console.log('[doc-injector] agent_end: abortingForInjection is false, skipping');
245
384
  }
246
385
  });
247
386
 
@@ -251,5 +390,7 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
251
390
  getEnabled,
252
391
  setEnabled,
253
392
  reloadRegistry,
393
+ getConfig,
394
+ generateKeywordsLLM,
254
395
  });
255
396
  }
package/injector.ts CHANGED
@@ -13,6 +13,21 @@ export interface NotifyCapability {
13
13
  notify: (msg: string, type?: "info" | "warning" | "error") => void;
14
14
  }
15
15
 
16
+ /**
17
+ * Sanitize keywords for safe injection into the system prompt.
18
+ *
19
+ * - Strips \n and \r (replaces with space) to prevent prompt injection
20
+ * - Caps each keyword at 100 characters
21
+ * - Enforces a hard limit of 20 keywords
22
+ */
23
+ function sanitizeKeywords(keywords: string[]): string[] {
24
+ return keywords
25
+ .map((k) => k.replace(/[\n\r]/g, " ").trim())
26
+ .filter((k) => k.length > 0)
27
+ .map((k) => (k.length > 100 ? k.slice(0, 100) : k))
28
+ .slice(0, 20);
29
+ }
30
+
16
31
  /**
17
32
  * Build a system prompt append string from matched documents.
18
33
  */
@@ -29,7 +44,9 @@ export function buildSystemPromptAppend(
29
44
  ];
30
45
 
31
46
  for (const entry of entries) {
32
- const keywords = matchedKeywords.get(entry.filePath) ?? [];
47
+ // Sanitize keywords before display to prevent prompt injection
48
+ const rawKeywords = matchedKeywords.get(entry.filePath) ?? [];
49
+ const keywords = sanitizeKeywords(rawKeywords);
33
50
  sections.push(`### ${entry.title}`);
34
51
  sections.push(`Source: \`${entry.relativePath}\``);
35
52
  if (keywords.length > 0) {