pi-doc-injector 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/cache.ts +79 -0
- package/commands.ts +68 -1
- package/config.ts +63 -28
- package/globber.ts +48 -0
- package/index.ts +171 -22
- package/injector.ts +18 -1
- package/keyword-gen.ts +142 -0
- package/keyword-llm.ts +57 -0
- package/matcher.ts +14 -10
- package/package.json +5 -1
- package/picomatch.d.ts +11 -0
- package/registry.ts +361 -72
- package/types.ts +62 -3
package/README.md
CHANGED
|
@@ -101,11 +101,18 @@ Injection is also skipped if the current context usage exceeds 80% of the token
|
|
|
101
101
|
|
|
102
102
|
The extension uses a per-session injection model:
|
|
103
103
|
|
|
104
|
-
- On `session_start`, the registry
|
|
104
|
+
- On `session_start`, the registry scans `docs/` and indexes all valid documents.
|
|
105
105
|
- Within a session, once a document is injected, it won't be re-injected automatically.
|
|
106
106
|
- Use `/doc-inject reset` to manually reset all flags and allow docs to be injected again.
|
|
107
107
|
- Use `/doc-inject list` to see which docs have been injected (✅) and which are pending (⬜).
|
|
108
108
|
|
|
109
|
+
### Injection Timing
|
|
110
|
+
|
|
111
|
+
- **User messages**: matched via the `input` event, injected before the assistant
|
|
112
|
+
responds — **same turn**, no delay.
|
|
113
|
+
- **Assistant streaming**: if the assistant mentions a NEW keyword mid-response,
|
|
114
|
+
generation is aborted and restarted with the doc injected immediately.
|
|
115
|
+
|
|
109
116
|
### System Prompt Lifecycle
|
|
110
117
|
|
|
111
118
|
Pi **reconstructs the system prompt from source files each turn** (verified against pi v0.70.6).
|
package/cache.ts
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Keyword cache persistence — load/save the `.pi/doc-injector-cache.json` file.
|
|
3
|
+
*
|
|
4
|
+
* Cache format:
|
|
5
|
+
* { version: 1, files: { [relativePath]: { mtimeMs: number, keywords: string[] } } }
|
|
6
|
+
*
|
|
7
|
+
* Invalid files (wrong version, bad JSON, ENOENT) result in an empty cache.
|
|
8
|
+
*/
|
|
9
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
10
|
+
import { dirname, join } from "node:path";
|
|
11
|
+
import type { KeywordCache } from "./types";
|
|
12
|
+
|
|
13
|
+
const CACHE_FILENAME = ".pi/doc-injector-cache.json";
|
|
14
|
+
const CACHE_VERSION = 1;
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Load the keyword cache from disk.
|
|
18
|
+
* Returns an empty cache (version 1, no files) if the file doesn't exist,
|
|
19
|
+
* has wrong version, or is corrupted.
|
|
20
|
+
*/
|
|
21
|
+
export async function loadCache(cwd: string): Promise<KeywordCache> {
|
|
22
|
+
const cachePath = join(cwd, CACHE_FILENAME);
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
const raw = await readFile(cachePath, "utf-8");
|
|
26
|
+
const parsed: unknown = JSON.parse(raw);
|
|
27
|
+
|
|
28
|
+
if (!isValidCache(parsed)) {
|
|
29
|
+
console.warn(
|
|
30
|
+
`[doc-injector] Invalid cache format or version at ${cachePath}, resetting.`,
|
|
31
|
+
);
|
|
32
|
+
return emptyCache();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return parsed;
|
|
36
|
+
} catch (err) {
|
|
37
|
+
// ENOENT = no cache file yet, that's fine
|
|
38
|
+
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
|
39
|
+
console.warn(
|
|
40
|
+
`[doc-injector] Failed to read cache at ${cachePath}:`,
|
|
41
|
+
err instanceof Error ? err.message : String(err),
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
return emptyCache();
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Save the keyword cache to disk.
|
|
50
|
+
* Creates parent directories if needed.
|
|
51
|
+
*/
|
|
52
|
+
export async function saveCache(
|
|
53
|
+
cwd: string,
|
|
54
|
+
cache: KeywordCache,
|
|
55
|
+
): Promise<void> {
|
|
56
|
+
const cachePath = join(cwd, CACHE_FILENAME);
|
|
57
|
+
|
|
58
|
+
try {
|
|
59
|
+
await mkdir(dirname(cachePath), { recursive: true });
|
|
60
|
+
} catch {
|
|
61
|
+
// Ignore — directory may already exist
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
await writeFile(cachePath, JSON.stringify(cache, null, 2), "utf-8");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** Check that a parsed value matches the KeywordCache shape. */
|
|
68
|
+
function isValidCache(value: unknown): value is KeywordCache {
|
|
69
|
+
if (!value || typeof value !== "object") return false;
|
|
70
|
+
const c = value as Record<string, unknown>;
|
|
71
|
+
if (c.version !== CACHE_VERSION) return false;
|
|
72
|
+
if (!c.files || typeof c.files !== "object") return false;
|
|
73
|
+
return true;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Return a fresh empty cache. */
|
|
77
|
+
function emptyCache(): KeywordCache {
|
|
78
|
+
return { version: CACHE_VERSION, files: {} };
|
|
79
|
+
}
|
package/commands.ts
CHANGED
|
@@ -3,14 +3,26 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
5
5
|
import type { DocRegistry } from "./registry";
|
|
6
|
+
import type { DocInjectorConfig } from "./types";
|
|
6
7
|
|
|
8
|
+
/** Dependencies injected into the command registrar. */
|
|
7
9
|
export interface CommandDeps {
|
|
8
10
|
getRegistry: () => DocRegistry | null;
|
|
9
11
|
getEnabled: () => boolean;
|
|
10
12
|
setEnabled: (v: boolean) => void;
|
|
11
13
|
reloadRegistry: () => Promise<number>;
|
|
14
|
+
getConfig: () => DocInjectorConfig;
|
|
15
|
+
generateKeywordsLLM: (files: Array<{ path: string; snippet: string; existingKeywords: string[] }>) => Promise<void>;
|
|
12
16
|
}
|
|
13
17
|
|
|
18
|
+
/**
|
|
19
|
+
* Register all doc-injector slash commands on the given ExtensionAPI.
|
|
20
|
+
*
|
|
21
|
+
* Commands:
|
|
22
|
+
* - `/doc-inject [on|off|toggle|list|reset|status]` — manage injection state
|
|
23
|
+
* - `/doc-reload` — re-scan docs folder
|
|
24
|
+
* - `/doc-keywords-gen [path]` — generate LLM keywords for keyword-less files
|
|
25
|
+
*/
|
|
14
26
|
export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
|
|
15
27
|
const cmd = (name: string, desc: string, handler: (args: string, ctx: ExtensionContext) => Promise<void>) => {
|
|
16
28
|
pi.registerCommand(name, { description: desc, handler });
|
|
@@ -49,7 +61,8 @@ export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
|
|
|
49
61
|
}
|
|
50
62
|
const lines = entries.map((e) => {
|
|
51
63
|
const status = e.injected ? "✅" : "⬜";
|
|
52
|
-
|
|
64
|
+
const sourceTag = `[${e.keywordSource}]`;
|
|
65
|
+
return `${status} ${sourceTag} ${e.relativePath}: "${e.title}" — keywords: [${e.keywords.join(", ")}]`;
|
|
53
66
|
});
|
|
54
67
|
ctx.ui.notify(`📄 Registered docs:\n${lines.join("\n")}`, "info");
|
|
55
68
|
} else {
|
|
@@ -81,4 +94,58 @@ export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
|
|
|
81
94
|
ctx.ui.notify(`📄 Reload failed: ${err instanceof Error ? err.message : String(err)}`, "error");
|
|
82
95
|
}
|
|
83
96
|
});
|
|
97
|
+
|
|
98
|
+
cmd("doc-keywords-gen", "Generate LLM keywords: /doc-keywords-gen [path] — no arg = all keyword-less files", async (args, ctx) => {
|
|
99
|
+
const reg = deps.getRegistry();
|
|
100
|
+
if (!reg) {
|
|
101
|
+
ctx.ui.notify("📄 No registry loaded", "warning");
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const config = deps.getConfig();
|
|
106
|
+
if (!config.llmKeywords) {
|
|
107
|
+
ctx.ui.notify("📄 LLM keyword generation is disabled (llmKeywords: false in config)", "warning");
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const targetPath = args.trim();
|
|
112
|
+
|
|
113
|
+
// Filter to keyword-less entries (keywordSource !== "frontmatter", "cache", or "llm")
|
|
114
|
+
let candidates = reg.getEntries().filter((e) => {
|
|
115
|
+
if (e.keywordSource === "frontmatter") return false;
|
|
116
|
+
if (e.keywordSource === "cache") return false;
|
|
117
|
+
if (e.keywordSource === "llm") return false; // already LLM-generated
|
|
118
|
+
return true;
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
if (targetPath) {
|
|
122
|
+
candidates = candidates.filter((e) => e.relativePath.includes(targetPath));
|
|
123
|
+
if (candidates.length === 0) {
|
|
124
|
+
ctx.ui.notify(`📄 No keyword-less files matching "${targetPath}"`, "info");
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (candidates.length === 0) {
|
|
130
|
+
ctx.ui.notify("📄 All files already have keywords", "info");
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const batchSize = config.llmBatchSize;
|
|
135
|
+
const batches: Array<Array<{ path: string; snippet: string; existingKeywords: string[] }>> = [];
|
|
136
|
+
for (let i = 0; i < candidates.length; i += batchSize) {
|
|
137
|
+
const batch = candidates.slice(i, i + batchSize).map((e) => ({
|
|
138
|
+
path: e.relativePath,
|
|
139
|
+
snippet: e.content.slice(0, 500),
|
|
140
|
+
existingKeywords: e.keywords,
|
|
141
|
+
}));
|
|
142
|
+
batches.push(batch);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
ctx.ui.notify(`📄 Sending ${batches.length} keyword-generation batch(es) for ${candidates.length} file(s)...`, "info");
|
|
146
|
+
|
|
147
|
+
for (const batch of batches) {
|
|
148
|
+
await deps.generateKeywordsLLM(batch);
|
|
149
|
+
}
|
|
150
|
+
});
|
|
84
151
|
}
|
package/config.ts
CHANGED
|
@@ -2,50 +2,85 @@
|
|
|
2
2
|
* Configuration loader for the Doc Injector extension.
|
|
3
3
|
* Reads from `.pi/doc-injector.json` with fallback to defaults.
|
|
4
4
|
*/
|
|
5
|
-
import {
|
|
5
|
+
import { readFile } from "node:fs/promises";
|
|
6
6
|
import { join } from "node:path";
|
|
7
7
|
import { DEFAULT_CONFIG, type DocInjectorConfig } from "./types";
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
|
-
*
|
|
11
|
-
*
|
|
10
|
+
* Clamp an integer value to [min, max] range.
|
|
11
|
+
* Warns and clamps if out of range. Returns the default if not a number.
|
|
12
12
|
*/
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
function clampInt(
|
|
14
|
+
value: unknown,
|
|
15
|
+
defaultVal: number,
|
|
16
|
+
min: number,
|
|
17
|
+
max: number,
|
|
18
|
+
fieldName: string,
|
|
19
|
+
): number {
|
|
20
|
+
if (typeof value !== "number" || Number.isNaN(value)) {
|
|
21
|
+
return defaultVal;
|
|
22
|
+
}
|
|
23
|
+
const intVal = Math.trunc(value);
|
|
24
|
+
if (intVal < min || intVal > max) {
|
|
25
|
+
const clamped = Math.max(min, Math.min(max, intVal));
|
|
26
|
+
console.warn(`[doc-injector] ${fieldName} must be ${min}-${max}, got ${intVal}. Clamping to ${clamped}.`);
|
|
27
|
+
return clamped;
|
|
28
|
+
}
|
|
29
|
+
return intVal;
|
|
30
|
+
}
|
|
15
31
|
|
|
16
|
-
|
|
17
|
-
|
|
32
|
+
/**
|
|
33
|
+
* Validate a glob pattern array.
|
|
34
|
+
* Rejects non-array or entries that aren't strings. Returns default on error.
|
|
35
|
+
*/
|
|
36
|
+
function validateGlobArray(value: unknown, defaultVal: string[]): string[] {
|
|
37
|
+
if (!Array.isArray(value)) {
|
|
38
|
+
return [...defaultVal];
|
|
18
39
|
}
|
|
40
|
+
const result: string[] = [];
|
|
41
|
+
for (const item of value) {
|
|
42
|
+
if (typeof item === "string") {
|
|
43
|
+
result.push(item);
|
|
44
|
+
} else {
|
|
45
|
+
console.warn(`[doc-injector] Non-string entry in glob array ignored: ${String(item)}`);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return result.length > 0 ? result : [...defaultVal];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Load config from `.pi/doc-injector.json` relative to the given cwd.
|
|
53
|
+
* Now async — uses readFile from fs/promises.
|
|
54
|
+
* Validates and clamps all numeric fields. Falls back to DEFAULT_CONFIG
|
|
55
|
+
* if file doesn't exist or is invalid.
|
|
56
|
+
*/
|
|
57
|
+
export async function loadConfig(cwd: string): Promise<DocInjectorConfig> {
|
|
58
|
+
const configPath = join(cwd, ".pi", "doc-injector.json");
|
|
19
59
|
|
|
20
60
|
try {
|
|
21
|
-
const raw =
|
|
61
|
+
const raw = await readFile(configPath, "utf-8");
|
|
22
62
|
const parsed = JSON.parse(raw) as Partial<DocInjectorConfig>;
|
|
23
63
|
|
|
24
|
-
// Clamp contextThreshold to 0-100 range
|
|
25
|
-
let contextThreshold = parsed.contextThreshold ?? DEFAULT_CONFIG.contextThreshold;
|
|
26
|
-
if (typeof contextThreshold === "number" && (contextThreshold < 0 || contextThreshold > 100)) {
|
|
27
|
-
console.warn(`[doc-injector] contextThreshold must be 0-100, got ${contextThreshold}. Clamping.`);
|
|
28
|
-
contextThreshold = Math.max(0, Math.min(100, contextThreshold));
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
// Clamp matchThreshold to positive integers
|
|
32
|
-
let matchThreshold = parsed.matchThreshold ?? DEFAULT_CONFIG.matchThreshold;
|
|
33
|
-
if (typeof matchThreshold === "number" && matchThreshold < 1) {
|
|
34
|
-
console.warn(`[doc-injector] matchThreshold must be >= 1, got ${matchThreshold}. Using 1.`);
|
|
35
|
-
matchThreshold = 1;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
64
|
return {
|
|
39
65
|
docsPath: parsed.docsPath ?? DEFAULT_CONFIG.docsPath,
|
|
40
|
-
matchThreshold,
|
|
41
|
-
contextThreshold,
|
|
66
|
+
matchThreshold: clampInt(parsed.matchThreshold, DEFAULT_CONFIG.matchThreshold, 1, Infinity, "matchThreshold"),
|
|
67
|
+
contextThreshold: clampInt(parsed.contextThreshold, DEFAULT_CONFIG.contextThreshold, 0, 100, "contextThreshold"),
|
|
42
68
|
recursive: parsed.recursive ?? DEFAULT_CONFIG.recursive,
|
|
69
|
+
include: validateGlobArray(parsed.include, DEFAULT_CONFIG.include),
|
|
70
|
+
exclude: validateGlobArray(parsed.exclude, DEFAULT_CONFIG.exclude),
|
|
71
|
+
maxFileSize: clampInt(parsed.maxFileSize, DEFAULT_CONFIG.maxFileSize, 1024, 10 * 1024 * 1024, "maxFileSize"),
|
|
72
|
+
autoKeywords: parsed.autoKeywords ?? DEFAULT_CONFIG.autoKeywords,
|
|
73
|
+
llmKeywords: parsed.llmKeywords ?? DEFAULT_CONFIG.llmKeywords,
|
|
74
|
+
maxConcurrent: clampInt(parsed.maxConcurrent, DEFAULT_CONFIG.maxConcurrent, 1, 100, "maxConcurrent"),
|
|
75
|
+
llmBatchSize: clampInt(parsed.llmBatchSize, DEFAULT_CONFIG.llmBatchSize, 1, 100, "llmBatchSize"),
|
|
43
76
|
};
|
|
44
77
|
} catch (err) {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
78
|
+
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
|
79
|
+
console.warn(
|
|
80
|
+
`[doc-injector] Failed to parse config at ${configPath}:`,
|
|
81
|
+
err instanceof Error ? err.message : String(err),
|
|
82
|
+
);
|
|
83
|
+
}
|
|
49
84
|
return { ...DEFAULT_CONFIG };
|
|
50
85
|
}
|
|
51
86
|
}
|
package/globber.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Glob filter for include/exclude pattern matching.
|
|
3
|
+
* Uses picomatch (0 deps, ~18 KB) to compile patterns once for O(1) matching.
|
|
4
|
+
*/
|
|
5
|
+
import picomatch from "picomatch";
|
|
6
|
+
import type { GlobFilter } from "./types";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Create a glob filter from include and exclude patterns.
|
|
10
|
+
*
|
|
11
|
+
* A path matches if it matches at least one `include` pattern AND
|
|
12
|
+
* does not match any `exclude` pattern.
|
|
13
|
+
*
|
|
14
|
+
* When `include` is empty, all files are considered included
|
|
15
|
+
* (subject to exclude filtering).
|
|
16
|
+
*
|
|
17
|
+
* @param include - Glob patterns for files to include
|
|
18
|
+
* @param exclude - Glob patterns for files/dirs to exclude
|
|
19
|
+
* @returns A GlobFilter with a `match` method
|
|
20
|
+
*/
|
|
21
|
+
export function createGlobFilter(
|
|
22
|
+
include: string[],
|
|
23
|
+
exclude: string[],
|
|
24
|
+
): GlobFilter {
|
|
25
|
+
const includeMatcher =
|
|
26
|
+
include.length > 0
|
|
27
|
+
? picomatch(include, { dot: true })
|
|
28
|
+
: null;
|
|
29
|
+
|
|
30
|
+
const excludeMatcher =
|
|
31
|
+
exclude.length > 0
|
|
32
|
+
? picomatch(exclude, { dot: true })
|
|
33
|
+
: null;
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
match(relativePath: string): boolean {
|
|
37
|
+
// If include patterns are specified, path must match at least one
|
|
38
|
+
if (includeMatcher && !includeMatcher(relativePath)) {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
// Path must not match any exclude pattern
|
|
42
|
+
if (excludeMatcher && excludeMatcher(relativePath)) {
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
return true;
|
|
46
|
+
},
|
|
47
|
+
};
|
|
48
|
+
}
|
package/index.ts
CHANGED
|
@@ -53,35 +53,72 @@
|
|
|
53
53
|
* is cleared after injection, and `markInjected()` operates on the registry's
|
|
54
54
|
* current entries, not the stale array.
|
|
55
55
|
*/
|
|
56
|
-
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
56
|
+
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
57
|
+
import { Type } from "@sinclair/typebox";
|
|
57
58
|
import { resolve } from "node:path";
|
|
59
|
+
import { loadCache, saveCache } from "./cache";
|
|
58
60
|
import { loadConfig } from "./config";
|
|
59
61
|
import { buildSystemPromptAppend, notifyInjection } from "./injector";
|
|
62
|
+
import { buildKeywordGenPrompt } from "./keyword-llm";
|
|
60
63
|
import { extractText, KeywordMatcher } from "./matcher";
|
|
61
64
|
import { DocRegistry } from "./registry";
|
|
62
|
-
import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult } from "./types";
|
|
65
|
+
import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
|
|
63
66
|
import { registerCommands } from "./commands";
|
|
64
67
|
|
|
65
68
|
export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
66
69
|
// ---- State ----
|
|
67
|
-
let config = loadConfig(process.cwd());
|
|
70
|
+
let config = await loadConfig(process.cwd());
|
|
68
71
|
let registry: DocRegistry | null = null;
|
|
72
|
+
let initRegistryPromise: Promise<void> | null = null;
|
|
69
73
|
let enabled = true;
|
|
70
74
|
let textBuffer = "";
|
|
71
75
|
let pendingMatches = new Map<string, string[]>(); // filePath → matchedKeywords
|
|
72
76
|
let abortingForInjection = false; // guard against cascading aborts
|
|
73
77
|
|
|
78
|
+
// P5.4b — Guard flags for LLM keyword generation
|
|
79
|
+
let keywordGenInFlight = false;
|
|
80
|
+
let llmBatchesCompleted = 0;
|
|
81
|
+
let llmTotalFiles = 0;
|
|
82
|
+
let cache: KeywordCache = { version: 1, files: {} };
|
|
83
|
+
|
|
74
84
|
// ---- Helpers ----
|
|
75
85
|
const getRegistry = () => registry;
|
|
76
86
|
const getEnabled = () => enabled;
|
|
77
87
|
const setEnabled = (v: boolean) => {
|
|
78
88
|
enabled = v;
|
|
79
89
|
};
|
|
90
|
+
const getConfig = () => config;
|
|
91
|
+
|
|
92
|
+
const safeSaveCache = async (cwd: string, dirtyEntries: Record<string, CacheEntry>) => {
|
|
93
|
+
// MAJOR-2 fix: before saveCache, re-read cache from disk to merge
|
|
94
|
+
// LLM-written entries that may have landed during the scan.
|
|
95
|
+
const freshCache = await loadCache(cwd);
|
|
96
|
+
const mergedCache: KeywordCache = { version: 1, files: {} };
|
|
97
|
+
|
|
98
|
+
// Start with fresh (disk) entries — includes any LLM writes during scan
|
|
99
|
+
for (const [key, entry] of Object.entries(freshCache.files)) {
|
|
100
|
+
mergedCache.files[key] = entry;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Overlay dirty entries from this scan (scan results take precedence)
|
|
104
|
+
for (const [key, entry] of Object.entries(dirtyEntries)) {
|
|
105
|
+
mergedCache.files[key] = entry;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
await saveCache(cwd, mergedCache);
|
|
109
|
+
};
|
|
80
110
|
|
|
81
111
|
const initRegistry = async (cwd: string) => {
|
|
82
|
-
config = loadConfig(cwd);
|
|
112
|
+
config = await loadConfig(cwd);
|
|
83
113
|
const docsPath = resolve(cwd, config.docsPath);
|
|
84
|
-
|
|
114
|
+
cache = await loadCache(cwd);
|
|
115
|
+
registry = await DocRegistry.create(docsPath, config, cache);
|
|
116
|
+
|
|
117
|
+
const dirty = registry.getDirtyCache();
|
|
118
|
+
if (Object.keys(dirty).length > 0) {
|
|
119
|
+
await safeSaveCache(cwd, dirty);
|
|
120
|
+
}
|
|
121
|
+
|
|
85
122
|
const count = registry.getEntries().length;
|
|
86
123
|
if (count > 0) {
|
|
87
124
|
console.log(`[doc-injector] Loaded ${count} documents from ${docsPath}`);
|
|
@@ -98,30 +135,109 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
98
135
|
);
|
|
99
136
|
};
|
|
100
137
|
|
|
101
|
-
|
|
138
|
+
// P5.4f — generateKeywordsLLM: sets keywordGenInFlight and sends a user message
|
|
139
|
+
// with the prompt built by buildKeywordGenPrompt. The LLM will respond by
|
|
140
|
+
// calling the _doc_injector_keywords tool.
|
|
141
|
+
const generateKeywordsLLM = async (
|
|
142
|
+
files: Array<{ path: string; snippet: string; existingKeywords: string[] }>,
|
|
143
|
+
) => {
|
|
144
|
+
keywordGenInFlight = true;
|
|
145
|
+
const prompt = buildKeywordGenPrompt(files);
|
|
146
|
+
pi.sendUserMessage(prompt, { deliverAs: "followUp" });
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
// P5.4a — Inline tool registration (BLOCKER-2 fix).
|
|
150
|
+
// Registered inside the factory for closure access to cache, cwd, saveCache,
|
|
151
|
+
// and llmBatchesCompleted. Uses real mtime from stat().
|
|
152
|
+
pi.registerTool({
|
|
153
|
+
name: "_doc_injector_keywords",
|
|
154
|
+
label: "Doc Injector Keywords",
|
|
155
|
+
description:
|
|
156
|
+
"Save LLM-generated keywords for documentation files. Call this tool with the keywords array after analyzing file snippets.",
|
|
157
|
+
parameters: Type.Object({
|
|
158
|
+
keywords: Type.Array(
|
|
159
|
+
Type.Object({
|
|
160
|
+
path: Type.String(),
|
|
161
|
+
keywords: Type.Array(Type.String()),
|
|
162
|
+
}),
|
|
163
|
+
),
|
|
164
|
+
}),
|
|
165
|
+
execute: async (_id, params, _signal, _onUpdate, ctx) => {
|
|
166
|
+
const generated = params.keywords as Array<{ path: string; keywords: string[] }>;
|
|
167
|
+
const { stat } = await import("node:fs/promises");
|
|
168
|
+
let saved = 0;
|
|
169
|
+
for (const item of generated) {
|
|
170
|
+
const absPath = resolve(ctx.cwd, config.docsPath, item.path);
|
|
171
|
+
const fileStat = await stat(absPath).catch(() => null);
|
|
172
|
+
if (!fileStat) {
|
|
173
|
+
console.warn(`[doc-injector] Skipping keyword save for ${item.path}: file not found`);
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
cache.files[item.path] = {
|
|
177
|
+
mtimeMs: fileStat.mtimeMs,
|
|
178
|
+
keywords: item.keywords.map((k) => k.toLowerCase()).slice(0, 20),
|
|
179
|
+
};
|
|
180
|
+
saved++;
|
|
181
|
+
}
|
|
182
|
+
await saveCache(ctx.cwd, cache);
|
|
183
|
+
llmBatchesCompleted++;
|
|
184
|
+
llmTotalFiles += saved;
|
|
185
|
+
return {
|
|
186
|
+
content: [{ type: "text" as const, text: `Keywords saved for ${saved} files.` }],
|
|
187
|
+
details: undefined as never,
|
|
188
|
+
};
|
|
189
|
+
},
|
|
190
|
+
});
|
|
102
191
|
|
|
103
192
|
// ---- Event: session_start ----
|
|
104
|
-
// Pi
|
|
105
|
-
//
|
|
106
|
-
//
|
|
107
|
-
pi.on("session_start", async (
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
193
|
+
// Pi emits session_start for startup, reload, and real session transitions.
|
|
194
|
+
// Skip the reload variant because resources_discover will rebuild docs right
|
|
195
|
+
// after it, and deduplicate any overlapping non-reload inits.
|
|
196
|
+
pi.on("session_start", async (event, ctx) => {
|
|
197
|
+
// P5.4d — Safety unbind: clear all LLM keyword gen state on session start
|
|
198
|
+
keywordGenInFlight = false;
|
|
199
|
+
llmBatchesCompleted = 0;
|
|
200
|
+
llmTotalFiles = 0;
|
|
201
|
+
|
|
202
|
+
if (event.reason === "reload") return;
|
|
203
|
+
|
|
204
|
+
if (initRegistryPromise) {
|
|
205
|
+
await initRegistryPromise;
|
|
206
|
+
return;
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
initRegistryPromise = initRegistry(ctx.cwd);
|
|
210
|
+
try {
|
|
211
|
+
await initRegistryPromise;
|
|
212
|
+
} finally {
|
|
213
|
+
initRegistryPromise = null;
|
|
214
|
+
}
|
|
112
215
|
});
|
|
113
216
|
|
|
114
|
-
const reloadRegistry = async (): Promise<number> => {
|
|
217
|
+
const reloadRegistry = async (cwd?: string): Promise<number> => {
|
|
115
218
|
if (!registry) throw new Error("No registry loaded");
|
|
219
|
+
const effectiveCwd = cwd ?? process.cwd();
|
|
220
|
+
|
|
221
|
+
// Reload cache from disk to pick up LLM-generated entries
|
|
222
|
+
const freshCache = await loadCache(effectiveCwd);
|
|
223
|
+
cache = freshCache;
|
|
224
|
+
registry.updateCache(cache);
|
|
225
|
+
|
|
116
226
|
await registry.rebuild();
|
|
227
|
+
|
|
228
|
+
const dirty = registry.getDirtyCache();
|
|
229
|
+
if (Object.keys(dirty).length > 0) {
|
|
230
|
+
await safeSaveCache(effectiveCwd, dirty);
|
|
231
|
+
}
|
|
232
|
+
|
|
117
233
|
const count = registry.getEntries().length;
|
|
118
234
|
console.log(`[doc-injector] Reloaded: ${count} documents`);
|
|
119
235
|
return count;
|
|
120
236
|
};
|
|
121
237
|
|
|
122
238
|
// ---- Event: resources_discover (reload) ----
|
|
123
|
-
pi.on("resources_discover", async (_event,
|
|
124
|
-
await reloadRegistry();
|
|
239
|
+
pi.on("resources_discover", async (_event, ctx) => {
|
|
240
|
+
await reloadRegistry(ctx.cwd);
|
|
125
241
|
});
|
|
126
242
|
|
|
127
243
|
// ---- Event: input (user message matching) ----
|
|
@@ -130,6 +246,17 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
130
246
|
// BEFORE before_agent_start fires, so docs are injected in time for
|
|
131
247
|
// the assistant's immediate response.
|
|
132
248
|
pi.on("input", async (event, _ctx) => {
|
|
249
|
+
// P5.4d — Safety unbind: if the user is typing interactively, clear all
|
|
250
|
+
// LLM keyword gen state (they may have aborted the generation).
|
|
251
|
+
if (event.source === "interactive") {
|
|
252
|
+
keywordGenInFlight = false;
|
|
253
|
+
llmBatchesCompleted = 0;
|
|
254
|
+
llmTotalFiles = 0;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// P5.4b — Guard: skip keyword matching during LLM keyword generation
|
|
258
|
+
if (keywordGenInFlight) return;
|
|
259
|
+
|
|
133
260
|
if (!enabled || !registry) return;
|
|
134
261
|
if (!event.text) return;
|
|
135
262
|
|
|
@@ -147,6 +274,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
147
274
|
// non-injected docs, abort the current generation and restart with the
|
|
148
275
|
// injected context — no waiting for the next turn.
|
|
149
276
|
pi.on("message_update", async (event, ctx) => {
|
|
277
|
+
// P5.4b — Guard: skip auto-abort logic during LLM keyword generation
|
|
278
|
+
if (keywordGenInFlight) return;
|
|
279
|
+
|
|
150
280
|
if (!enabled || !registry) return;
|
|
151
281
|
|
|
152
282
|
const msg = event.message;
|
|
@@ -187,6 +317,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
187
317
|
|
|
188
318
|
// ---- Event: before_agent_start (inject into system prompt) ----
|
|
189
319
|
pi.on("before_agent_start", async (event, ctx) => {
|
|
320
|
+
// P5.4b — Guard: skip injection during LLM keyword generation
|
|
321
|
+
if (keywordGenInFlight) return;
|
|
322
|
+
|
|
190
323
|
if (!enabled || !registry || pendingMatches.size === 0) return;
|
|
191
324
|
|
|
192
325
|
const matchedEntries: DocEntry[] = [];
|
|
@@ -227,13 +360,27 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
227
360
|
};
|
|
228
361
|
});
|
|
229
362
|
|
|
230
|
-
// ---- Event: agent_end (restart after auto-abort) ----
|
|
231
|
-
pi.on("agent_end", async () => {
|
|
363
|
+
// ---- Event: agent_end (restart after auto-abort + LLM batch summary) ----
|
|
364
|
+
pi.on("agent_end", async (event, ctx) => {
|
|
365
|
+
// P5.4c — Summary notification from agent_end (BLOCKER-3)
|
|
366
|
+
keywordGenInFlight = false;
|
|
367
|
+
if (llmBatchesCompleted > 0) {
|
|
368
|
+
await ctx.ui.notify(
|
|
369
|
+
`Doc keywords: ${llmTotalFiles} files across ${llmBatchesCompleted} batch(es)`,
|
|
370
|
+
"info",
|
|
371
|
+
);
|
|
372
|
+
llmBatchesCompleted = 0;
|
|
373
|
+
llmTotalFiles = 0;
|
|
374
|
+
}
|
|
375
|
+
|
|
232
376
|
if (abortingForInjection) {
|
|
233
377
|
abortingForInjection = false;
|
|
234
|
-
//
|
|
235
|
-
|
|
236
|
-
|
|
378
|
+
// Defer sendUserMessage to next tick to avoid re-entrancy issues.
|
|
379
|
+
setTimeout(() => {
|
|
380
|
+
pi.sendUserMessage("continue");
|
|
381
|
+
}, 0);
|
|
382
|
+
} else {
|
|
383
|
+
console.log('[doc-injector] agent_end: abortingForInjection is false, skipping');
|
|
237
384
|
}
|
|
238
385
|
});
|
|
239
386
|
|
|
@@ -243,5 +390,7 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
243
390
|
getEnabled,
|
|
244
391
|
setEnabled,
|
|
245
392
|
reloadRegistry,
|
|
393
|
+
getConfig,
|
|
394
|
+
generateKeywordsLLM,
|
|
246
395
|
});
|
|
247
396
|
}
|
package/injector.ts
CHANGED
|
@@ -13,6 +13,21 @@ export interface NotifyCapability {
|
|
|
13
13
|
notify: (msg: string, type?: "info" | "warning" | "error") => void;
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
+
/**
|
|
17
|
+
* Sanitize keywords for safe injection into the system prompt.
|
|
18
|
+
*
|
|
19
|
+
* - Strips \n and \r (replaces with space) to prevent prompt injection
|
|
20
|
+
* - Caps each keyword at 100 characters
|
|
21
|
+
* - Enforces a hard limit of 20 keywords
|
|
22
|
+
*/
|
|
23
|
+
function sanitizeKeywords(keywords: string[]): string[] {
|
|
24
|
+
return keywords
|
|
25
|
+
.map((k) => k.replace(/[\n\r]/g, " ").trim())
|
|
26
|
+
.filter((k) => k.length > 0)
|
|
27
|
+
.map((k) => (k.length > 100 ? k.slice(0, 100) : k))
|
|
28
|
+
.slice(0, 20);
|
|
29
|
+
}
|
|
30
|
+
|
|
16
31
|
/**
|
|
17
32
|
* Build a system prompt append string from matched documents.
|
|
18
33
|
*/
|
|
@@ -29,7 +44,9 @@ export function buildSystemPromptAppend(
|
|
|
29
44
|
];
|
|
30
45
|
|
|
31
46
|
for (const entry of entries) {
|
|
32
|
-
|
|
47
|
+
// Sanitize keywords before display to prevent prompt injection
|
|
48
|
+
const rawKeywords = matchedKeywords.get(entry.filePath) ?? [];
|
|
49
|
+
const keywords = sanitizeKeywords(rawKeywords);
|
|
33
50
|
sections.push(`### ${entry.title}`);
|
|
34
51
|
sections.push(`Source: \`${entry.relativePath}\``);
|
|
35
52
|
if (keywords.length > 0) {
|