pi-doc-injector 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -101,11 +101,18 @@ Injection is also skipped if the current context usage exceeds 80% of the token
101
101
 
102
102
  The extension uses a per-session injection model:
103
103
 
104
- - On `session_start`, the registry is rebuilt from scratch, resetting all `injected` flags.
104
+ - On `session_start`, the registry scans `docs/` and indexes all valid documents.
105
105
  - Within a session, once a document is injected, it won't be re-injected automatically.
106
106
  - Use `/doc-inject reset` to manually reset all flags and allow docs to be injected again.
107
107
  - Use `/doc-inject list` to see which docs have been injected (✅) and which are pending (⬜).
108
108
 
109
+ ### Injection Timing
110
+
111
+ - **User messages**: matched via the `input` event, injected before the assistant
112
+ responds — **same turn**, no delay.
113
+ - **Assistant streaming**: if the assistant mentions a NEW keyword mid-response,
114
+ generation is aborted and restarted with the doc injected immediately.
115
+
109
116
  ### System Prompt Lifecycle
110
117
 
111
118
  Pi **reconstructs the system prompt from source files each turn** (verified against pi v0.70.6).
package/cache.ts ADDED
@@ -0,0 +1,79 @@
1
+ /**
2
+ * Keyword cache persistence — load/save the `.pi/doc-injector-cache.json` file.
3
+ *
4
+ * Cache format:
5
+ * { version: 1, files: { [relativePath]: { mtimeMs: number, keywords: string[] } } }
6
+ *
7
+ * Invalid files (wrong version, bad JSON, ENOENT) result in an empty cache.
8
+ */
9
+ import { mkdir, readFile, writeFile } from "node:fs/promises";
10
+ import { dirname, join } from "node:path";
11
+ import type { KeywordCache } from "./types";
12
+
13
+ const CACHE_FILENAME = ".pi/doc-injector-cache.json";
14
+ const CACHE_VERSION = 1;
15
+
16
+ /**
17
+ * Load the keyword cache from disk.
18
+ * Returns an empty cache (version 1, no files) if the file doesn't exist,
19
+ * has wrong version, or is corrupted.
20
+ */
21
+ export async function loadCache(cwd: string): Promise<KeywordCache> {
22
+ const cachePath = join(cwd, CACHE_FILENAME);
23
+
24
+ try {
25
+ const raw = await readFile(cachePath, "utf-8");
26
+ const parsed: unknown = JSON.parse(raw);
27
+
28
+ if (!isValidCache(parsed)) {
29
+ console.warn(
30
+ `[doc-injector] Invalid cache format or version at ${cachePath}, resetting.`,
31
+ );
32
+ return emptyCache();
33
+ }
34
+
35
+ return parsed;
36
+ } catch (err) {
37
+ // ENOENT = no cache file yet, that's fine
38
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
39
+ console.warn(
40
+ `[doc-injector] Failed to read cache at ${cachePath}:`,
41
+ err instanceof Error ? err.message : String(err),
42
+ );
43
+ }
44
+ return emptyCache();
45
+ }
46
+ }
47
+
48
+ /**
49
+ * Save the keyword cache to disk.
50
+ * Creates parent directories if needed.
51
+ */
52
+ export async function saveCache(
53
+ cwd: string,
54
+ cache: KeywordCache,
55
+ ): Promise<void> {
56
+ const cachePath = join(cwd, CACHE_FILENAME);
57
+
58
+ try {
59
+ await mkdir(dirname(cachePath), { recursive: true });
60
+ } catch {
61
+ // Ignore — directory may already exist
62
+ }
63
+
64
+ await writeFile(cachePath, JSON.stringify(cache, null, 2), "utf-8");
65
+ }
66
+
67
+ /** Check that a parsed value matches the KeywordCache shape. */
68
+ function isValidCache(value: unknown): value is KeywordCache {
69
+ if (!value || typeof value !== "object") return false;
70
+ const c = value as Record<string, unknown>;
71
+ if (c.version !== CACHE_VERSION) return false;
72
+ if (!c.files || typeof c.files !== "object") return false;
73
+ return true;
74
+ }
75
+
76
+ /** Return a fresh empty cache. */
77
+ function emptyCache(): KeywordCache {
78
+ return { version: CACHE_VERSION, files: {} };
79
+ }
package/commands.ts CHANGED
@@ -3,14 +3,26 @@
3
3
  */
4
4
  import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
5
5
  import type { DocRegistry } from "./registry";
6
+ import type { DocInjectorConfig } from "./types";
6
7
 
8
+ /** Dependencies injected into the command registrar. */
7
9
  export interface CommandDeps {
8
10
  getRegistry: () => DocRegistry | null;
9
11
  getEnabled: () => boolean;
10
12
  setEnabled: (v: boolean) => void;
11
13
  reloadRegistry: () => Promise<number>;
14
+ getConfig: () => DocInjectorConfig;
15
+ generateKeywordsLLM: (files: Array<{ path: string; snippet: string; existingKeywords: string[] }>) => Promise<void>;
12
16
  }
13
17
 
18
+ /**
19
+ * Register all doc-injector slash commands on the given ExtensionAPI.
20
+ *
21
+ * Commands:
22
+ * - `/doc-inject [on|off|toggle|list|reset|status]` — manage injection state
23
+ * - `/doc-reload` — re-scan docs folder
24
+ * - `/doc-keywords-gen [path]` — generate LLM keywords for keyword-less files
25
+ */
14
26
  export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
15
27
  const cmd = (name: string, desc: string, handler: (args: string, ctx: ExtensionContext) => Promise<void>) => {
16
28
  pi.registerCommand(name, { description: desc, handler });
@@ -49,7 +61,8 @@ export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
49
61
  }
50
62
  const lines = entries.map((e) => {
51
63
  const status = e.injected ? "✅" : "⬜";
52
- return `${status} ${e.relativePath}: "${e.title}" — keywords: [${e.keywords.join(", ")}]`;
64
+ const sourceTag = `[${e.keywordSource}]`;
65
+ return `${status} ${sourceTag} ${e.relativePath}: "${e.title}" — keywords: [${e.keywords.join(", ")}]`;
53
66
  });
54
67
  ctx.ui.notify(`📄 Registered docs:\n${lines.join("\n")}`, "info");
55
68
  } else {
@@ -81,4 +94,58 @@ export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
81
94
  ctx.ui.notify(`📄 Reload failed: ${err instanceof Error ? err.message : String(err)}`, "error");
82
95
  }
83
96
  });
97
+
98
+ cmd("doc-keywords-gen", "Generate LLM keywords: /doc-keywords-gen [path] — no arg = all keyword-less files", async (args, ctx) => {
99
+ const reg = deps.getRegistry();
100
+ if (!reg) {
101
+ ctx.ui.notify("📄 No registry loaded", "warning");
102
+ return;
103
+ }
104
+
105
+ const config = deps.getConfig();
106
+ if (!config.llmKeywords) {
107
+ ctx.ui.notify("📄 LLM keyword generation is disabled (llmKeywords: false in config)", "warning");
108
+ return;
109
+ }
110
+
111
+ const targetPath = args.trim();
112
+
113
+ // Filter to keyword-less entries (keywordSource !== "frontmatter", "cache", or "llm")
114
+ let candidates = reg.getEntries().filter((e) => {
115
+ if (e.keywordSource === "frontmatter") return false;
116
+ if (e.keywordSource === "cache") return false;
117
+ if (e.keywordSource === "llm") return false; // already LLM-generated
118
+ return true;
119
+ });
120
+
121
+ if (targetPath) {
122
+ candidates = candidates.filter((e) => e.relativePath.includes(targetPath));
123
+ if (candidates.length === 0) {
124
+ ctx.ui.notify(`📄 No keyword-less files matching "${targetPath}"`, "info");
125
+ return;
126
+ }
127
+ }
128
+
129
+ if (candidates.length === 0) {
130
+ ctx.ui.notify("📄 All files already have keywords", "info");
131
+ return;
132
+ }
133
+
134
+ const batchSize = config.llmBatchSize;
135
+ const batches: Array<Array<{ path: string; snippet: string; existingKeywords: string[] }>> = [];
136
+ for (let i = 0; i < candidates.length; i += batchSize) {
137
+ const batch = candidates.slice(i, i + batchSize).map((e) => ({
138
+ path: e.relativePath,
139
+ snippet: e.content.slice(0, 500),
140
+ existingKeywords: e.keywords,
141
+ }));
142
+ batches.push(batch);
143
+ }
144
+
145
+ ctx.ui.notify(`📄 Sending ${batches.length} keyword-generation batch(es) for ${candidates.length} file(s)...`, "info");
146
+
147
+ for (const batch of batches) {
148
+ await deps.generateKeywordsLLM(batch);
149
+ }
150
+ });
84
151
  }
package/config.ts CHANGED
@@ -2,50 +2,85 @@
2
2
  * Configuration loader for the Doc Injector extension.
3
3
  * Reads from `.pi/doc-injector.json` with fallback to defaults.
4
4
  */
5
- import { existsSync, readFileSync } from "node:fs";
5
+ import { readFile } from "node:fs/promises";
6
6
  import { join } from "node:path";
7
7
  import { DEFAULT_CONFIG, type DocInjectorConfig } from "./types";
8
8
 
9
9
  /**
10
- * Load config from `.pi/doc-injector.json` relative to the given cwd.
11
- * Falls back to DEFAULT_CONFIG if file doesn't exist or is invalid.
10
+ * Clamp an integer value to [min, max] range.
11
+ * Warns and clamps if out of range. Returns the default if not a number.
12
12
  */
13
- export function loadConfig(cwd: string): DocInjectorConfig {
14
- const configPath = join(cwd, ".pi", "doc-injector.json");
13
+ function clampInt(
14
+ value: unknown,
15
+ defaultVal: number,
16
+ min: number,
17
+ max: number,
18
+ fieldName: string,
19
+ ): number {
20
+ if (typeof value !== "number" || Number.isNaN(value)) {
21
+ return defaultVal;
22
+ }
23
+ const intVal = Math.trunc(value);
24
+ if (intVal < min || intVal > max) {
25
+ const clamped = Math.max(min, Math.min(max, intVal));
26
+ console.warn(`[doc-injector] ${fieldName} must be ${min}-${max}, got ${intVal}. Clamping to ${clamped}.`);
27
+ return clamped;
28
+ }
29
+ return intVal;
30
+ }
15
31
 
16
- if (!existsSync(configPath)) {
17
- return { ...DEFAULT_CONFIG };
32
+ /**
33
+ * Validate a glob pattern array.
34
+ * Rejects non-array or entries that aren't strings. Returns default on error.
35
+ */
36
+ function validateGlobArray(value: unknown, defaultVal: string[]): string[] {
37
+ if (!Array.isArray(value)) {
38
+ return [...defaultVal];
18
39
  }
40
+ const result: string[] = [];
41
+ for (const item of value) {
42
+ if (typeof item === "string") {
43
+ result.push(item);
44
+ } else {
45
+ console.warn(`[doc-injector] Non-string entry in glob array ignored: ${String(item)}`);
46
+ }
47
+ }
48
+ return result.length > 0 ? result : [...defaultVal];
49
+ }
50
+
51
+ /**
52
+ * Load config from `.pi/doc-injector.json` relative to the given cwd.
53
+ * Now async — uses readFile from fs/promises.
54
+ * Validates and clamps all numeric fields. Falls back to DEFAULT_CONFIG
55
+ * if file doesn't exist or is invalid.
56
+ */
57
+ export async function loadConfig(cwd: string): Promise<DocInjectorConfig> {
58
+ const configPath = join(cwd, ".pi", "doc-injector.json");
19
59
 
20
60
  try {
21
- const raw = readFileSync(configPath, "utf-8");
61
+ const raw = await readFile(configPath, "utf-8");
22
62
  const parsed = JSON.parse(raw) as Partial<DocInjectorConfig>;
23
63
 
24
- // Clamp contextThreshold to 0-100 range
25
- let contextThreshold = parsed.contextThreshold ?? DEFAULT_CONFIG.contextThreshold;
26
- if (typeof contextThreshold === "number" && (contextThreshold < 0 || contextThreshold > 100)) {
27
- console.warn(`[doc-injector] contextThreshold must be 0-100, got ${contextThreshold}. Clamping.`);
28
- contextThreshold = Math.max(0, Math.min(100, contextThreshold));
29
- }
30
-
31
- // Clamp matchThreshold to positive integers
32
- let matchThreshold = parsed.matchThreshold ?? DEFAULT_CONFIG.matchThreshold;
33
- if (typeof matchThreshold === "number" && matchThreshold < 1) {
34
- console.warn(`[doc-injector] matchThreshold must be >= 1, got ${matchThreshold}. Using 1.`);
35
- matchThreshold = 1;
36
- }
37
-
38
64
  return {
39
65
  docsPath: parsed.docsPath ?? DEFAULT_CONFIG.docsPath,
40
- matchThreshold,
41
- contextThreshold,
66
+ matchThreshold: clampInt(parsed.matchThreshold, DEFAULT_CONFIG.matchThreshold, 1, Infinity, "matchThreshold"),
67
+ contextThreshold: clampInt(parsed.contextThreshold, DEFAULT_CONFIG.contextThreshold, 0, 100, "contextThreshold"),
42
68
  recursive: parsed.recursive ?? DEFAULT_CONFIG.recursive,
69
+ include: validateGlobArray(parsed.include, DEFAULT_CONFIG.include),
70
+ exclude: validateGlobArray(parsed.exclude, DEFAULT_CONFIG.exclude),
71
+ maxFileSize: clampInt(parsed.maxFileSize, DEFAULT_CONFIG.maxFileSize, 1024, 10 * 1024 * 1024, "maxFileSize"),
72
+ autoKeywords: parsed.autoKeywords ?? DEFAULT_CONFIG.autoKeywords,
73
+ llmKeywords: parsed.llmKeywords ?? DEFAULT_CONFIG.llmKeywords,
74
+ maxConcurrent: clampInt(parsed.maxConcurrent, DEFAULT_CONFIG.maxConcurrent, 1, 100, "maxConcurrent"),
75
+ llmBatchSize: clampInt(parsed.llmBatchSize, DEFAULT_CONFIG.llmBatchSize, 1, 100, "llmBatchSize"),
43
76
  };
44
77
  } catch (err) {
45
- console.warn(
46
- `[doc-injector] Failed to parse config at ${configPath}:`,
47
- err instanceof Error ? err.message : String(err),
48
- );
78
+ if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
79
+ console.warn(
80
+ `[doc-injector] Failed to parse config at ${configPath}:`,
81
+ err instanceof Error ? err.message : String(err),
82
+ );
83
+ }
49
84
  return { ...DEFAULT_CONFIG };
50
85
  }
51
86
  }
package/globber.ts ADDED
@@ -0,0 +1,48 @@
1
+ /**
2
+ * Glob filter for include/exclude pattern matching.
3
+ * Uses picomatch (0 deps, ~18 KB) to compile patterns once for O(1) matching.
4
+ */
5
+ import picomatch from "picomatch";
6
+ import type { GlobFilter } from "./types";
7
+
8
+ /**
9
+ * Create a glob filter from include and exclude patterns.
10
+ *
11
+ * A path matches if it matches at least one `include` pattern AND
12
+ * does not match any `exclude` pattern.
13
+ *
14
+ * When `include` is empty, all files are considered included
15
+ * (subject to exclude filtering).
16
+ *
17
+ * @param include - Glob patterns for files to include
18
+ * @param exclude - Glob patterns for files/dirs to exclude
19
+ * @returns A GlobFilter with a `match` method
20
+ */
21
+ export function createGlobFilter(
22
+ include: string[],
23
+ exclude: string[],
24
+ ): GlobFilter {
25
+ const includeMatcher =
26
+ include.length > 0
27
+ ? picomatch(include, { dot: true })
28
+ : null;
29
+
30
+ const excludeMatcher =
31
+ exclude.length > 0
32
+ ? picomatch(exclude, { dot: true })
33
+ : null;
34
+
35
+ return {
36
+ match(relativePath: string): boolean {
37
+ // If include patterns are specified, path must match at least one
38
+ if (includeMatcher && !includeMatcher(relativePath)) {
39
+ return false;
40
+ }
41
+ // Path must not match any exclude pattern
42
+ if (excludeMatcher && excludeMatcher(relativePath)) {
43
+ return false;
44
+ }
45
+ return true;
46
+ },
47
+ };
48
+ }
package/index.ts CHANGED
@@ -53,35 +53,72 @@
53
53
  * is cleared after injection, and `markInjected()` operates on the registry's
54
54
  * current entries, not the stale array.
55
55
  */
56
- import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
56
+ import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
57
+ import { Type } from "@sinclair/typebox";
57
58
  import { resolve } from "node:path";
59
+ import { loadCache, saveCache } from "./cache";
58
60
  import { loadConfig } from "./config";
59
61
  import { buildSystemPromptAppend, notifyInjection } from "./injector";
62
+ import { buildKeywordGenPrompt } from "./keyword-llm";
60
63
  import { extractText, KeywordMatcher } from "./matcher";
61
64
  import { DocRegistry } from "./registry";
62
- import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult } from "./types";
65
+ import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
63
66
  import { registerCommands } from "./commands";
64
67
 
65
68
  export default async function docInjectorExtension(pi: ExtensionAPI) {
66
69
  // ---- State ----
67
- let config = loadConfig(process.cwd());
70
+ let config = await loadConfig(process.cwd());
68
71
  let registry: DocRegistry | null = null;
72
+ let initRegistryPromise: Promise<void> | null = null;
69
73
  let enabled = true;
70
74
  let textBuffer = "";
71
75
  let pendingMatches = new Map<string, string[]>(); // filePath → matchedKeywords
72
76
  let abortingForInjection = false; // guard against cascading aborts
73
77
 
78
+ // P5.4b — Guard flags for LLM keyword generation
79
+ let keywordGenInFlight = false;
80
+ let llmBatchesCompleted = 0;
81
+ let llmTotalFiles = 0;
82
+ let cache: KeywordCache = { version: 1, files: {} };
83
+
74
84
  // ---- Helpers ----
75
85
  const getRegistry = () => registry;
76
86
  const getEnabled = () => enabled;
77
87
  const setEnabled = (v: boolean) => {
78
88
  enabled = v;
79
89
  };
90
+ const getConfig = () => config;
91
+
92
+ const safeSaveCache = async (cwd: string, dirtyEntries: Record<string, CacheEntry>) => {
93
+ // MAJOR-2 fix: before saveCache, re-read cache from disk to merge
94
+ // LLM-written entries that may have landed during the scan.
95
+ const freshCache = await loadCache(cwd);
96
+ const mergedCache: KeywordCache = { version: 1, files: {} };
97
+
98
+ // Start with fresh (disk) entries — includes any LLM writes during scan
99
+ for (const [key, entry] of Object.entries(freshCache.files)) {
100
+ mergedCache.files[key] = entry;
101
+ }
102
+
103
+ // Overlay dirty entries from this scan (scan results take precedence)
104
+ for (const [key, entry] of Object.entries(dirtyEntries)) {
105
+ mergedCache.files[key] = entry;
106
+ }
107
+
108
+ await saveCache(cwd, mergedCache);
109
+ };
80
110
 
81
111
  const initRegistry = async (cwd: string) => {
82
- config = loadConfig(cwd);
112
+ config = await loadConfig(cwd);
83
113
  const docsPath = resolve(cwd, config.docsPath);
84
- registry = await DocRegistry.create(docsPath, config.recursive);
114
+ cache = await loadCache(cwd);
115
+ registry = await DocRegistry.create(docsPath, config, cache);
116
+
117
+ const dirty = registry.getDirtyCache();
118
+ if (Object.keys(dirty).length > 0) {
119
+ await safeSaveCache(cwd, dirty);
120
+ }
121
+
85
122
  const count = registry.getEntries().length;
86
123
  if (count > 0) {
87
124
  console.log(`[doc-injector] Loaded ${count} documents from ${docsPath}`);
@@ -98,30 +135,109 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
98
135
  );
99
136
  };
100
137
 
101
- let lastInitTime = 0;
138
+ // P5.4f generateKeywordsLLM: sets keywordGenInFlight and sends a user message
139
+ // with the prompt built by buildKeywordGenPrompt. The LLM will respond by
140
+ // calling the _doc_injector_keywords tool.
141
+ const generateKeywordsLLM = async (
142
+ files: Array<{ path: string; snippet: string; existingKeywords: string[] }>,
143
+ ) => {
144
+ keywordGenInFlight = true;
145
+ const prompt = buildKeywordGenPrompt(files);
146
+ pi.sendUserMessage(prompt, { deliverAs: "followUp" });
147
+ };
148
+
149
+ // P5.4a — Inline tool registration (BLOCKER-2 fix).
150
+ // Registered inside the factory for closure access to cache, cwd, saveCache,
151
+ // and llmBatchesCompleted. Uses real mtime from stat().
152
+ pi.registerTool({
153
+ name: "_doc_injector_keywords",
154
+ label: "Doc Injector Keywords",
155
+ description:
156
+ "Save LLM-generated keywords for documentation files. Call this tool with the keywords array after analyzing file snippets.",
157
+ parameters: Type.Object({
158
+ keywords: Type.Array(
159
+ Type.Object({
160
+ path: Type.String(),
161
+ keywords: Type.Array(Type.String()),
162
+ }),
163
+ ),
164
+ }),
165
+ execute: async (_id, params, _signal, _onUpdate, ctx) => {
166
+ const generated = params.keywords as Array<{ path: string; keywords: string[] }>;
167
+ const { stat } = await import("node:fs/promises");
168
+ let saved = 0;
169
+ for (const item of generated) {
170
+ const absPath = resolve(ctx.cwd, config.docsPath, item.path);
171
+ const fileStat = await stat(absPath).catch(() => null);
172
+ if (!fileStat) {
173
+ console.warn(`[doc-injector] Skipping keyword save for ${item.path}: file not found`);
174
+ continue;
175
+ }
176
+ cache.files[item.path] = {
177
+ mtimeMs: fileStat.mtimeMs,
178
+ keywords: item.keywords.map((k) => k.toLowerCase()).slice(0, 20),
179
+ };
180
+ saved++;
181
+ }
182
+ await saveCache(ctx.cwd, cache);
183
+ llmBatchesCompleted++;
184
+ llmTotalFiles += saved;
185
+ return {
186
+ content: [{ type: "text" as const, text: `Keywords saved for ${saved} files.` }],
187
+ details: undefined as never,
188
+ };
189
+ },
190
+ });
102
191
 
103
192
  // ---- Event: session_start ----
104
- // Pi fires session_start twice on startup (both with reason "startup").
105
- // Use a 2-second dedup window to skip the duplicate. Real session changes
106
- // (/new, /resume, /fork) happen well outside this window.
107
- pi.on("session_start", async (_event, ctx) => {
108
- const now = Date.now();
109
- if (now - lastInitTime < 100) return;
110
- lastInitTime = now;
111
- await initRegistry(ctx.cwd);
193
+ // Pi emits session_start for startup, reload, and real session transitions.
194
+ // Skip the reload variant because resources_discover will rebuild docs right
195
+ // after it, and deduplicate any overlapping non-reload inits.
196
+ pi.on("session_start", async (event, ctx) => {
197
+ // P5.4d Safety unbind: clear all LLM keyword gen state on session start
198
+ keywordGenInFlight = false;
199
+ llmBatchesCompleted = 0;
200
+ llmTotalFiles = 0;
201
+
202
+ if (event.reason === "reload") return;
203
+
204
+ if (initRegistryPromise) {
205
+ await initRegistryPromise;
206
+ return;
207
+ }
208
+
209
+ initRegistryPromise = initRegistry(ctx.cwd);
210
+ try {
211
+ await initRegistryPromise;
212
+ } finally {
213
+ initRegistryPromise = null;
214
+ }
112
215
  });
113
216
 
114
- const reloadRegistry = async (): Promise<number> => {
217
+ const reloadRegistry = async (cwd?: string): Promise<number> => {
115
218
  if (!registry) throw new Error("No registry loaded");
219
+ const effectiveCwd = cwd ?? process.cwd();
220
+
221
+ // Reload cache from disk to pick up LLM-generated entries
222
+ const freshCache = await loadCache(effectiveCwd);
223
+ cache = freshCache;
224
+ registry.updateCache(cache);
225
+
116
226
  await registry.rebuild();
227
+
228
+ const dirty = registry.getDirtyCache();
229
+ if (Object.keys(dirty).length > 0) {
230
+ await safeSaveCache(effectiveCwd, dirty);
231
+ }
232
+
117
233
  const count = registry.getEntries().length;
118
234
  console.log(`[doc-injector] Reloaded: ${count} documents`);
119
235
  return count;
120
236
  };
121
237
 
122
238
  // ---- Event: resources_discover (reload) ----
123
- pi.on("resources_discover", async (_event, _ctx) => {
124
- await reloadRegistry();
239
+ pi.on("resources_discover", async (_event, ctx) => {
240
+ await reloadRegistry(ctx.cwd);
125
241
  });
126
242
 
127
243
  // ---- Event: input (user message matching) ----
@@ -130,6 +246,17 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
130
246
  // BEFORE before_agent_start fires, so docs are injected in time for
131
247
  // the assistant's immediate response.
132
248
  pi.on("input", async (event, _ctx) => {
249
+ // P5.4d — Safety unbind: if the user is typing interactively, clear all
250
+ // LLM keyword gen state (they may have aborted the generation).
251
+ if (event.source === "interactive") {
252
+ keywordGenInFlight = false;
253
+ llmBatchesCompleted = 0;
254
+ llmTotalFiles = 0;
255
+ }
256
+
257
+ // P5.4b — Guard: skip keyword matching during LLM keyword generation
258
+ if (keywordGenInFlight) return;
259
+
133
260
  if (!enabled || !registry) return;
134
261
  if (!event.text) return;
135
262
 
@@ -147,6 +274,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
147
274
  // non-injected docs, abort the current generation and restart with the
148
275
  // injected context — no waiting for the next turn.
149
276
  pi.on("message_update", async (event, ctx) => {
277
+ // P5.4b — Guard: skip auto-abort logic during LLM keyword generation
278
+ if (keywordGenInFlight) return;
279
+
150
280
  if (!enabled || !registry) return;
151
281
 
152
282
  const msg = event.message;
@@ -187,6 +317,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
187
317
 
188
318
  // ---- Event: before_agent_start (inject into system prompt) ----
189
319
  pi.on("before_agent_start", async (event, ctx) => {
320
+ // P5.4b — Guard: skip injection during LLM keyword generation
321
+ if (keywordGenInFlight) return;
322
+
190
323
  if (!enabled || !registry || pendingMatches.size === 0) return;
191
324
 
192
325
  const matchedEntries: DocEntry[] = [];
@@ -227,13 +360,27 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
227
360
  };
228
361
  });
229
362
 
230
- // ---- Event: agent_end (restart after auto-abort) ----
231
- pi.on("agent_end", async () => {
363
+ // ---- Event: agent_end (restart after auto-abort + LLM batch summary) ----
364
+ pi.on("agent_end", async (event, ctx) => {
365
+ // P5.4c — Summary notification from agent_end (BLOCKER-3)
366
+ keywordGenInFlight = false;
367
+ if (llmBatchesCompleted > 0) {
368
+ await ctx.ui.notify(
369
+ `Doc keywords: ${llmTotalFiles} files across ${llmBatchesCompleted} batch(es)`,
370
+ "info",
371
+ );
372
+ llmBatchesCompleted = 0;
373
+ llmTotalFiles = 0;
374
+ }
375
+
232
376
  if (abortingForInjection) {
233
377
  abortingForInjection = false;
234
- // Send a follow-up message to restart the turn.
235
- // before_agent_start will inject the matched docs into context.
236
- pi.sendUserMessage("continue", { deliverAs: "followUp" });
378
+ // Defer sendUserMessage to next tick to avoid re-entrancy issues.
379
+ setTimeout(() => {
380
+ pi.sendUserMessage("continue");
381
+ }, 0);
382
+ } else {
383
+ console.log('[doc-injector] agent_end: abortingForInjection is false, skipping');
237
384
  }
238
385
  });
239
386
 
@@ -243,5 +390,7 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
243
390
  getEnabled,
244
391
  setEnabled,
245
392
  reloadRegistry,
393
+ getConfig,
394
+ generateKeywordsLLM,
246
395
  });
247
396
  }
package/injector.ts CHANGED
@@ -13,6 +13,21 @@ export interface NotifyCapability {
13
13
  notify: (msg: string, type?: "info" | "warning" | "error") => void;
14
14
  }
15
15
 
16
+ /**
17
+ * Sanitize keywords for safe injection into the system prompt.
18
+ *
19
+ * - Strips \n and \r (replaces with space) to prevent prompt injection
20
+ * - Caps each keyword at 100 characters
21
+ * - Enforces a hard limit of 20 keywords
22
+ */
23
+ function sanitizeKeywords(keywords: string[]): string[] {
24
+ return keywords
25
+ .map((k) => k.replace(/[\n\r]/g, " ").trim())
26
+ .filter((k) => k.length > 0)
27
+ .map((k) => (k.length > 100 ? k.slice(0, 100) : k))
28
+ .slice(0, 20);
29
+ }
30
+
16
31
  /**
17
32
  * Build a system prompt append string from matched documents.
18
33
  */
@@ -29,7 +44,9 @@ export function buildSystemPromptAppend(
29
44
  ];
30
45
 
31
46
  for (const entry of entries) {
32
- const keywords = matchedKeywords.get(entry.filePath) ?? [];
47
+ // Sanitize keywords before display to prevent prompt injection
48
+ const rawKeywords = matchedKeywords.get(entry.filePath) ?? [];
49
+ const keywords = sanitizeKeywords(rawKeywords);
33
50
  sections.push(`### ${entry.title}`);
34
51
  sections.push(`Source: \`${entry.relativePath}\``);
35
52
  if (keywords.length > 0) {