pi-doc-injector 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cache.ts +79 -0
- package/commands.ts +68 -1
- package/config.ts +63 -28
- package/globber.ts +48 -0
- package/index.ts +154 -13
- package/injector.ts +18 -1
- package/keyword-gen.ts +142 -0
- package/keyword-llm.ts +57 -0
- package/matcher.ts +14 -10
- package/package.json +5 -1
- package/picomatch.d.ts +11 -0
- package/registry.ts +361 -72
- package/types.ts +62 -3
package/cache.ts
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Keyword cache persistence — load/save the `.pi/doc-injector-cache.json` file.
|
|
3
|
+
*
|
|
4
|
+
* Cache format:
|
|
5
|
+
* { version: 1, files: { [relativePath]: { mtimeMs: number, keywords: string[] } } }
|
|
6
|
+
*
|
|
7
|
+
* Invalid files (wrong version, bad JSON, ENOENT) result in an empty cache.
|
|
8
|
+
*/
|
|
9
|
+
import { mkdir, readFile, writeFile } from "node:fs/promises";
|
|
10
|
+
import { dirname, join } from "node:path";
|
|
11
|
+
import type { KeywordCache } from "./types";
|
|
12
|
+
|
|
13
|
+
const CACHE_FILENAME = ".pi/doc-injector-cache.json";
|
|
14
|
+
const CACHE_VERSION = 1;
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Load the keyword cache from disk.
|
|
18
|
+
* Returns an empty cache (version 1, no files) if the file doesn't exist,
|
|
19
|
+
* has wrong version, or is corrupted.
|
|
20
|
+
*/
|
|
21
|
+
export async function loadCache(cwd: string): Promise<KeywordCache> {
|
|
22
|
+
const cachePath = join(cwd, CACHE_FILENAME);
|
|
23
|
+
|
|
24
|
+
try {
|
|
25
|
+
const raw = await readFile(cachePath, "utf-8");
|
|
26
|
+
const parsed: unknown = JSON.parse(raw);
|
|
27
|
+
|
|
28
|
+
if (!isValidCache(parsed)) {
|
|
29
|
+
console.warn(
|
|
30
|
+
`[doc-injector] Invalid cache format or version at ${cachePath}, resetting.`,
|
|
31
|
+
);
|
|
32
|
+
return emptyCache();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
return parsed;
|
|
36
|
+
} catch (err) {
|
|
37
|
+
// ENOENT = no cache file yet, that's fine
|
|
38
|
+
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
|
39
|
+
console.warn(
|
|
40
|
+
`[doc-injector] Failed to read cache at ${cachePath}:`,
|
|
41
|
+
err instanceof Error ? err.message : String(err),
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
return emptyCache();
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Save the keyword cache to disk.
|
|
50
|
+
* Creates parent directories if needed.
|
|
51
|
+
*/
|
|
52
|
+
export async function saveCache(
|
|
53
|
+
cwd: string,
|
|
54
|
+
cache: KeywordCache,
|
|
55
|
+
): Promise<void> {
|
|
56
|
+
const cachePath = join(cwd, CACHE_FILENAME);
|
|
57
|
+
|
|
58
|
+
try {
|
|
59
|
+
await mkdir(dirname(cachePath), { recursive: true });
|
|
60
|
+
} catch {
|
|
61
|
+
// Ignore — directory may already exist
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
await writeFile(cachePath, JSON.stringify(cache, null, 2), "utf-8");
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/** Check that a parsed value matches the KeywordCache shape. */
|
|
68
|
+
function isValidCache(value: unknown): value is KeywordCache {
|
|
69
|
+
if (!value || typeof value !== "object") return false;
|
|
70
|
+
const c = value as Record<string, unknown>;
|
|
71
|
+
if (c.version !== CACHE_VERSION) return false;
|
|
72
|
+
if (!c.files || typeof c.files !== "object") return false;
|
|
73
|
+
return true;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/** Return a fresh empty cache. */
|
|
77
|
+
function emptyCache(): KeywordCache {
|
|
78
|
+
return { version: CACHE_VERSION, files: {} };
|
|
79
|
+
}
|
package/commands.ts
CHANGED
|
@@ -3,14 +3,26 @@
|
|
|
3
3
|
*/
|
|
4
4
|
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
5
5
|
import type { DocRegistry } from "./registry";
|
|
6
|
+
import type { DocInjectorConfig } from "./types";
|
|
6
7
|
|
|
8
|
+
/** Dependencies injected into the command registrar. */
|
|
7
9
|
export interface CommandDeps {
|
|
8
10
|
getRegistry: () => DocRegistry | null;
|
|
9
11
|
getEnabled: () => boolean;
|
|
10
12
|
setEnabled: (v: boolean) => void;
|
|
11
13
|
reloadRegistry: () => Promise<number>;
|
|
14
|
+
getConfig: () => DocInjectorConfig;
|
|
15
|
+
generateKeywordsLLM: (files: Array<{ path: string; snippet: string; existingKeywords: string[] }>) => Promise<void>;
|
|
12
16
|
}
|
|
13
17
|
|
|
18
|
+
/**
|
|
19
|
+
* Register all doc-injector slash commands on the given ExtensionAPI.
|
|
20
|
+
*
|
|
21
|
+
* Commands:
|
|
22
|
+
* - `/doc-inject [on|off|toggle|list|reset|status]` — manage injection state
|
|
23
|
+
* - `/doc-reload` — re-scan docs folder
|
|
24
|
+
* - `/doc-keywords-gen [path]` — generate LLM keywords for keyword-less files
|
|
25
|
+
*/
|
|
14
26
|
export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
|
|
15
27
|
const cmd = (name: string, desc: string, handler: (args: string, ctx: ExtensionContext) => Promise<void>) => {
|
|
16
28
|
pi.registerCommand(name, { description: desc, handler });
|
|
@@ -49,7 +61,8 @@ export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
|
|
|
49
61
|
}
|
|
50
62
|
const lines = entries.map((e) => {
|
|
51
63
|
const status = e.injected ? "✅" : "⬜";
|
|
52
|
-
|
|
64
|
+
const sourceTag = `[${e.keywordSource}]`;
|
|
65
|
+
return `${status} ${sourceTag} ${e.relativePath}: "${e.title}" — keywords: [${e.keywords.join(", ")}]`;
|
|
53
66
|
});
|
|
54
67
|
ctx.ui.notify(`📄 Registered docs:\n${lines.join("\n")}`, "info");
|
|
55
68
|
} else {
|
|
@@ -81,4 +94,58 @@ export function registerCommands(pi: ExtensionAPI, deps: CommandDeps): void {
|
|
|
81
94
|
ctx.ui.notify(`📄 Reload failed: ${err instanceof Error ? err.message : String(err)}`, "error");
|
|
82
95
|
}
|
|
83
96
|
});
|
|
97
|
+
|
|
98
|
+
cmd("doc-keywords-gen", "Generate LLM keywords: /doc-keywords-gen [path] — no arg = all keyword-less files", async (args, ctx) => {
|
|
99
|
+
const reg = deps.getRegistry();
|
|
100
|
+
if (!reg) {
|
|
101
|
+
ctx.ui.notify("📄 No registry loaded", "warning");
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const config = deps.getConfig();
|
|
106
|
+
if (!config.llmKeywords) {
|
|
107
|
+
ctx.ui.notify("📄 LLM keyword generation is disabled (llmKeywords: false in config)", "warning");
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const targetPath = args.trim();
|
|
112
|
+
|
|
113
|
+
// Filter to keyword-less entries (keywordSource !== "frontmatter", "cache", or "llm")
|
|
114
|
+
let candidates = reg.getEntries().filter((e) => {
|
|
115
|
+
if (e.keywordSource === "frontmatter") return false;
|
|
116
|
+
if (e.keywordSource === "cache") return false;
|
|
117
|
+
if (e.keywordSource === "llm") return false; // already LLM-generated
|
|
118
|
+
return true;
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
if (targetPath) {
|
|
122
|
+
candidates = candidates.filter((e) => e.relativePath.includes(targetPath));
|
|
123
|
+
if (candidates.length === 0) {
|
|
124
|
+
ctx.ui.notify(`📄 No keyword-less files matching "${targetPath}"`, "info");
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
if (candidates.length === 0) {
|
|
130
|
+
ctx.ui.notify("📄 All files already have keywords", "info");
|
|
131
|
+
return;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const batchSize = config.llmBatchSize;
|
|
135
|
+
const batches: Array<Array<{ path: string; snippet: string; existingKeywords: string[] }>> = [];
|
|
136
|
+
for (let i = 0; i < candidates.length; i += batchSize) {
|
|
137
|
+
const batch = candidates.slice(i, i + batchSize).map((e) => ({
|
|
138
|
+
path: e.relativePath,
|
|
139
|
+
snippet: e.content.slice(0, 500),
|
|
140
|
+
existingKeywords: e.keywords,
|
|
141
|
+
}));
|
|
142
|
+
batches.push(batch);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
ctx.ui.notify(`📄 Sending ${batches.length} keyword-generation batch(es) for ${candidates.length} file(s)...`, "info");
|
|
146
|
+
|
|
147
|
+
for (const batch of batches) {
|
|
148
|
+
await deps.generateKeywordsLLM(batch);
|
|
149
|
+
}
|
|
150
|
+
});
|
|
84
151
|
}
|
package/config.ts
CHANGED
|
@@ -2,50 +2,85 @@
|
|
|
2
2
|
* Configuration loader for the Doc Injector extension.
|
|
3
3
|
* Reads from `.pi/doc-injector.json` with fallback to defaults.
|
|
4
4
|
*/
|
|
5
|
-
import {
|
|
5
|
+
import { readFile } from "node:fs/promises";
|
|
6
6
|
import { join } from "node:path";
|
|
7
7
|
import { DEFAULT_CONFIG, type DocInjectorConfig } from "./types";
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
|
-
*
|
|
11
|
-
*
|
|
10
|
+
* Clamp an integer value to [min, max] range.
|
|
11
|
+
* Warns and clamps if out of range. Returns the default if not a number.
|
|
12
12
|
*/
|
|
13
|
-
|
|
14
|
-
|
|
13
|
+
function clampInt(
|
|
14
|
+
value: unknown,
|
|
15
|
+
defaultVal: number,
|
|
16
|
+
min: number,
|
|
17
|
+
max: number,
|
|
18
|
+
fieldName: string,
|
|
19
|
+
): number {
|
|
20
|
+
if (typeof value !== "number" || Number.isNaN(value)) {
|
|
21
|
+
return defaultVal;
|
|
22
|
+
}
|
|
23
|
+
const intVal = Math.trunc(value);
|
|
24
|
+
if (intVal < min || intVal > max) {
|
|
25
|
+
const clamped = Math.max(min, Math.min(max, intVal));
|
|
26
|
+
console.warn(`[doc-injector] ${fieldName} must be ${min}-${max}, got ${intVal}. Clamping to ${clamped}.`);
|
|
27
|
+
return clamped;
|
|
28
|
+
}
|
|
29
|
+
return intVal;
|
|
30
|
+
}
|
|
15
31
|
|
|
16
|
-
|
|
17
|
-
|
|
32
|
+
/**
|
|
33
|
+
* Validate a glob pattern array.
|
|
34
|
+
* Rejects non-array or entries that aren't strings. Returns default on error.
|
|
35
|
+
*/
|
|
36
|
+
function validateGlobArray(value: unknown, defaultVal: string[]): string[] {
|
|
37
|
+
if (!Array.isArray(value)) {
|
|
38
|
+
return [...defaultVal];
|
|
18
39
|
}
|
|
40
|
+
const result: string[] = [];
|
|
41
|
+
for (const item of value) {
|
|
42
|
+
if (typeof item === "string") {
|
|
43
|
+
result.push(item);
|
|
44
|
+
} else {
|
|
45
|
+
console.warn(`[doc-injector] Non-string entry in glob array ignored: ${String(item)}`);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return result.length > 0 ? result : [...defaultVal];
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
* Load config from `.pi/doc-injector.json` relative to the given cwd.
|
|
53
|
+
* Now async — uses readFile from fs/promises.
|
|
54
|
+
* Validates and clamps all numeric fields. Falls back to DEFAULT_CONFIG
|
|
55
|
+
* if file doesn't exist or is invalid.
|
|
56
|
+
*/
|
|
57
|
+
export async function loadConfig(cwd: string): Promise<DocInjectorConfig> {
|
|
58
|
+
const configPath = join(cwd, ".pi", "doc-injector.json");
|
|
19
59
|
|
|
20
60
|
try {
|
|
21
|
-
const raw =
|
|
61
|
+
const raw = await readFile(configPath, "utf-8");
|
|
22
62
|
const parsed = JSON.parse(raw) as Partial<DocInjectorConfig>;
|
|
23
63
|
|
|
24
|
-
// Clamp contextThreshold to 0-100 range
|
|
25
|
-
let contextThreshold = parsed.contextThreshold ?? DEFAULT_CONFIG.contextThreshold;
|
|
26
|
-
if (typeof contextThreshold === "number" && (contextThreshold < 0 || contextThreshold > 100)) {
|
|
27
|
-
console.warn(`[doc-injector] contextThreshold must be 0-100, got ${contextThreshold}. Clamping.`);
|
|
28
|
-
contextThreshold = Math.max(0, Math.min(100, contextThreshold));
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
// Clamp matchThreshold to positive integers
|
|
32
|
-
let matchThreshold = parsed.matchThreshold ?? DEFAULT_CONFIG.matchThreshold;
|
|
33
|
-
if (typeof matchThreshold === "number" && matchThreshold < 1) {
|
|
34
|
-
console.warn(`[doc-injector] matchThreshold must be >= 1, got ${matchThreshold}. Using 1.`);
|
|
35
|
-
matchThreshold = 1;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
64
|
return {
|
|
39
65
|
docsPath: parsed.docsPath ?? DEFAULT_CONFIG.docsPath,
|
|
40
|
-
matchThreshold,
|
|
41
|
-
contextThreshold,
|
|
66
|
+
matchThreshold: clampInt(parsed.matchThreshold, DEFAULT_CONFIG.matchThreshold, 1, Infinity, "matchThreshold"),
|
|
67
|
+
contextThreshold: clampInt(parsed.contextThreshold, DEFAULT_CONFIG.contextThreshold, 0, 100, "contextThreshold"),
|
|
42
68
|
recursive: parsed.recursive ?? DEFAULT_CONFIG.recursive,
|
|
69
|
+
include: validateGlobArray(parsed.include, DEFAULT_CONFIG.include),
|
|
70
|
+
exclude: validateGlobArray(parsed.exclude, DEFAULT_CONFIG.exclude),
|
|
71
|
+
maxFileSize: clampInt(parsed.maxFileSize, DEFAULT_CONFIG.maxFileSize, 1024, 10 * 1024 * 1024, "maxFileSize"),
|
|
72
|
+
autoKeywords: parsed.autoKeywords ?? DEFAULT_CONFIG.autoKeywords,
|
|
73
|
+
llmKeywords: parsed.llmKeywords ?? DEFAULT_CONFIG.llmKeywords,
|
|
74
|
+
maxConcurrent: clampInt(parsed.maxConcurrent, DEFAULT_CONFIG.maxConcurrent, 1, 100, "maxConcurrent"),
|
|
75
|
+
llmBatchSize: clampInt(parsed.llmBatchSize, DEFAULT_CONFIG.llmBatchSize, 1, 100, "llmBatchSize"),
|
|
43
76
|
};
|
|
44
77
|
} catch (err) {
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
78
|
+
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
|
79
|
+
console.warn(
|
|
80
|
+
`[doc-injector] Failed to parse config at ${configPath}:`,
|
|
81
|
+
err instanceof Error ? err.message : String(err),
|
|
82
|
+
);
|
|
83
|
+
}
|
|
49
84
|
return { ...DEFAULT_CONFIG };
|
|
50
85
|
}
|
|
51
86
|
}
|
package/globber.ts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Glob filter for include/exclude pattern matching.
|
|
3
|
+
* Uses picomatch (0 deps, ~18 KB) to compile patterns once for O(1) matching.
|
|
4
|
+
*/
|
|
5
|
+
import picomatch from "picomatch";
|
|
6
|
+
import type { GlobFilter } from "./types";
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Create a glob filter from include and exclude patterns.
|
|
10
|
+
*
|
|
11
|
+
* A path matches if it matches at least one `include` pattern AND
|
|
12
|
+
* does not match any `exclude` pattern.
|
|
13
|
+
*
|
|
14
|
+
* When `include` is empty, all files are considered included
|
|
15
|
+
* (subject to exclude filtering).
|
|
16
|
+
*
|
|
17
|
+
* @param include - Glob patterns for files to include
|
|
18
|
+
* @param exclude - Glob patterns for files/dirs to exclude
|
|
19
|
+
* @returns A GlobFilter with a `match` method
|
|
20
|
+
*/
|
|
21
|
+
export function createGlobFilter(
|
|
22
|
+
include: string[],
|
|
23
|
+
exclude: string[],
|
|
24
|
+
): GlobFilter {
|
|
25
|
+
const includeMatcher =
|
|
26
|
+
include.length > 0
|
|
27
|
+
? picomatch(include, { dot: true })
|
|
28
|
+
: null;
|
|
29
|
+
|
|
30
|
+
const excludeMatcher =
|
|
31
|
+
exclude.length > 0
|
|
32
|
+
? picomatch(exclude, { dot: true })
|
|
33
|
+
: null;
|
|
34
|
+
|
|
35
|
+
return {
|
|
36
|
+
match(relativePath: string): boolean {
|
|
37
|
+
// If include patterns are specified, path must match at least one
|
|
38
|
+
if (includeMatcher && !includeMatcher(relativePath)) {
|
|
39
|
+
return false;
|
|
40
|
+
}
|
|
41
|
+
// Path must not match any exclude pattern
|
|
42
|
+
if (excludeMatcher && excludeMatcher(relativePath)) {
|
|
43
|
+
return false;
|
|
44
|
+
}
|
|
45
|
+
return true;
|
|
46
|
+
},
|
|
47
|
+
};
|
|
48
|
+
}
|
package/index.ts
CHANGED
|
@@ -53,18 +53,21 @@
|
|
|
53
53
|
* is cleared after injection, and `markInjected()` operates on the registry's
|
|
54
54
|
* current entries, not the stale array.
|
|
55
55
|
*/
|
|
56
|
-
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
|
56
|
+
import type { ExtensionAPI, ExtensionContext } from "@mariozechner/pi-coding-agent";
|
|
57
|
+
import { Type } from "@sinclair/typebox";
|
|
57
58
|
import { resolve } from "node:path";
|
|
59
|
+
import { loadCache, saveCache } from "./cache";
|
|
58
60
|
import { loadConfig } from "./config";
|
|
59
61
|
import { buildSystemPromptAppend, notifyInjection } from "./injector";
|
|
62
|
+
import { buildKeywordGenPrompt } from "./keyword-llm";
|
|
60
63
|
import { extractText, KeywordMatcher } from "./matcher";
|
|
61
64
|
import { DocRegistry } from "./registry";
|
|
62
|
-
import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult } from "./types";
|
|
65
|
+
import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
|
|
63
66
|
import { registerCommands } from "./commands";
|
|
64
67
|
|
|
65
68
|
export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
66
69
|
// ---- State ----
|
|
67
|
-
let config = loadConfig(process.cwd());
|
|
70
|
+
let config = await loadConfig(process.cwd());
|
|
68
71
|
let registry: DocRegistry | null = null;
|
|
69
72
|
let initRegistryPromise: Promise<void> | null = null;
|
|
70
73
|
let enabled = true;
|
|
@@ -72,17 +75,50 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
72
75
|
let pendingMatches = new Map<string, string[]>(); // filePath → matchedKeywords
|
|
73
76
|
let abortingForInjection = false; // guard against cascading aborts
|
|
74
77
|
|
|
78
|
+
// P5.4b — Guard flags for LLM keyword generation
|
|
79
|
+
let keywordGenInFlight = false;
|
|
80
|
+
let llmBatchesCompleted = 0;
|
|
81
|
+
let llmTotalFiles = 0;
|
|
82
|
+
let cache: KeywordCache = { version: 1, files: {} };
|
|
83
|
+
|
|
75
84
|
// ---- Helpers ----
|
|
76
85
|
const getRegistry = () => registry;
|
|
77
86
|
const getEnabled = () => enabled;
|
|
78
87
|
const setEnabled = (v: boolean) => {
|
|
79
88
|
enabled = v;
|
|
80
89
|
};
|
|
90
|
+
const getConfig = () => config;
|
|
91
|
+
|
|
92
|
+
const safeSaveCache = async (cwd: string, dirtyEntries: Record<string, CacheEntry>) => {
|
|
93
|
+
// MAJOR-2 fix: before saveCache, re-read cache from disk to merge
|
|
94
|
+
// LLM-written entries that may have landed during the scan.
|
|
95
|
+
const freshCache = await loadCache(cwd);
|
|
96
|
+
const mergedCache: KeywordCache = { version: 1, files: {} };
|
|
97
|
+
|
|
98
|
+
// Start with fresh (disk) entries — includes any LLM writes during scan
|
|
99
|
+
for (const [key, entry] of Object.entries(freshCache.files)) {
|
|
100
|
+
mergedCache.files[key] = entry;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Overlay dirty entries from this scan (scan results take precedence)
|
|
104
|
+
for (const [key, entry] of Object.entries(dirtyEntries)) {
|
|
105
|
+
mergedCache.files[key] = entry;
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
await saveCache(cwd, mergedCache);
|
|
109
|
+
};
|
|
81
110
|
|
|
82
111
|
const initRegistry = async (cwd: string) => {
|
|
83
|
-
config = loadConfig(cwd);
|
|
112
|
+
config = await loadConfig(cwd);
|
|
84
113
|
const docsPath = resolve(cwd, config.docsPath);
|
|
85
|
-
|
|
114
|
+
cache = await loadCache(cwd);
|
|
115
|
+
registry = await DocRegistry.create(docsPath, config, cache);
|
|
116
|
+
|
|
117
|
+
const dirty = registry.getDirtyCache();
|
|
118
|
+
if (Object.keys(dirty).length > 0) {
|
|
119
|
+
await safeSaveCache(cwd, dirty);
|
|
120
|
+
}
|
|
121
|
+
|
|
86
122
|
const count = registry.getEntries().length;
|
|
87
123
|
if (count > 0) {
|
|
88
124
|
console.log(`[doc-injector] Loaded ${count} documents from ${docsPath}`);
|
|
@@ -99,11 +135,70 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
99
135
|
);
|
|
100
136
|
};
|
|
101
137
|
|
|
138
|
+
// P5.4f — generateKeywordsLLM: sets keywordGenInFlight and sends a user message
|
|
139
|
+
// with the prompt built by buildKeywordGenPrompt. The LLM will respond by
|
|
140
|
+
// calling the _doc_injector_keywords tool.
|
|
141
|
+
const generateKeywordsLLM = async (
|
|
142
|
+
files: Array<{ path: string; snippet: string; existingKeywords: string[] }>,
|
|
143
|
+
) => {
|
|
144
|
+
keywordGenInFlight = true;
|
|
145
|
+
const prompt = buildKeywordGenPrompt(files);
|
|
146
|
+
pi.sendUserMessage(prompt, { deliverAs: "followUp" });
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
// P5.4a — Inline tool registration (BLOCKER-2 fix).
|
|
150
|
+
// Registered inside the factory for closure access to cache, cwd, saveCache,
|
|
151
|
+
// and llmBatchesCompleted. Uses real mtime from stat().
|
|
152
|
+
pi.registerTool({
|
|
153
|
+
name: "_doc_injector_keywords",
|
|
154
|
+
label: "Doc Injector Keywords",
|
|
155
|
+
description:
|
|
156
|
+
"Save LLM-generated keywords for documentation files. Call this tool with the keywords array after analyzing file snippets.",
|
|
157
|
+
parameters: Type.Object({
|
|
158
|
+
keywords: Type.Array(
|
|
159
|
+
Type.Object({
|
|
160
|
+
path: Type.String(),
|
|
161
|
+
keywords: Type.Array(Type.String()),
|
|
162
|
+
}),
|
|
163
|
+
),
|
|
164
|
+
}),
|
|
165
|
+
execute: async (_id, params, _signal, _onUpdate, ctx) => {
|
|
166
|
+
const generated = params.keywords as Array<{ path: string; keywords: string[] }>;
|
|
167
|
+
const { stat } = await import("node:fs/promises");
|
|
168
|
+
let saved = 0;
|
|
169
|
+
for (const item of generated) {
|
|
170
|
+
const absPath = resolve(ctx.cwd, config.docsPath, item.path);
|
|
171
|
+
const fileStat = await stat(absPath).catch(() => null);
|
|
172
|
+
if (!fileStat) {
|
|
173
|
+
console.warn(`[doc-injector] Skipping keyword save for ${item.path}: file not found`);
|
|
174
|
+
continue;
|
|
175
|
+
}
|
|
176
|
+
cache.files[item.path] = {
|
|
177
|
+
mtimeMs: fileStat.mtimeMs,
|
|
178
|
+
keywords: item.keywords.map((k) => k.toLowerCase()).slice(0, 20),
|
|
179
|
+
};
|
|
180
|
+
saved++;
|
|
181
|
+
}
|
|
182
|
+
await saveCache(ctx.cwd, cache);
|
|
183
|
+
llmBatchesCompleted++;
|
|
184
|
+
llmTotalFiles += saved;
|
|
185
|
+
return {
|
|
186
|
+
content: [{ type: "text" as const, text: `Keywords saved for ${saved} files.` }],
|
|
187
|
+
details: undefined as never,
|
|
188
|
+
};
|
|
189
|
+
},
|
|
190
|
+
});
|
|
191
|
+
|
|
102
192
|
// ---- Event: session_start ----
|
|
103
193
|
// Pi emits session_start for startup, reload, and real session transitions.
|
|
104
194
|
// Skip the reload variant because resources_discover will rebuild docs right
|
|
105
195
|
// after it, and deduplicate any overlapping non-reload inits.
|
|
106
196
|
pi.on("session_start", async (event, ctx) => {
|
|
197
|
+
// P5.4d — Safety unbind: clear all LLM keyword gen state on session start
|
|
198
|
+
keywordGenInFlight = false;
|
|
199
|
+
llmBatchesCompleted = 0;
|
|
200
|
+
llmTotalFiles = 0;
|
|
201
|
+
|
|
107
202
|
if (event.reason === "reload") return;
|
|
108
203
|
|
|
109
204
|
if (initRegistryPromise) {
|
|
@@ -119,17 +214,30 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
119
214
|
}
|
|
120
215
|
});
|
|
121
216
|
|
|
122
|
-
const reloadRegistry = async (): Promise<number> => {
|
|
217
|
+
const reloadRegistry = async (cwd?: string): Promise<number> => {
|
|
123
218
|
if (!registry) throw new Error("No registry loaded");
|
|
219
|
+
const effectiveCwd = cwd ?? process.cwd();
|
|
220
|
+
|
|
221
|
+
// Reload cache from disk to pick up LLM-generated entries
|
|
222
|
+
const freshCache = await loadCache(effectiveCwd);
|
|
223
|
+
cache = freshCache;
|
|
224
|
+
registry.updateCache(cache);
|
|
225
|
+
|
|
124
226
|
await registry.rebuild();
|
|
227
|
+
|
|
228
|
+
const dirty = registry.getDirtyCache();
|
|
229
|
+
if (Object.keys(dirty).length > 0) {
|
|
230
|
+
await safeSaveCache(effectiveCwd, dirty);
|
|
231
|
+
}
|
|
232
|
+
|
|
125
233
|
const count = registry.getEntries().length;
|
|
126
234
|
console.log(`[doc-injector] Reloaded: ${count} documents`);
|
|
127
235
|
return count;
|
|
128
236
|
};
|
|
129
237
|
|
|
130
238
|
// ---- Event: resources_discover (reload) ----
|
|
131
|
-
pi.on("resources_discover", async (_event,
|
|
132
|
-
await reloadRegistry();
|
|
239
|
+
pi.on("resources_discover", async (_event, ctx) => {
|
|
240
|
+
await reloadRegistry(ctx.cwd);
|
|
133
241
|
});
|
|
134
242
|
|
|
135
243
|
// ---- Event: input (user message matching) ----
|
|
@@ -138,6 +246,17 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
138
246
|
// BEFORE before_agent_start fires, so docs are injected in time for
|
|
139
247
|
// the assistant's immediate response.
|
|
140
248
|
pi.on("input", async (event, _ctx) => {
|
|
249
|
+
// P5.4d — Safety unbind: if the user is typing interactively, clear all
|
|
250
|
+
// LLM keyword gen state (they may have aborted the generation).
|
|
251
|
+
if (event.source === "interactive") {
|
|
252
|
+
keywordGenInFlight = false;
|
|
253
|
+
llmBatchesCompleted = 0;
|
|
254
|
+
llmTotalFiles = 0;
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// P5.4b — Guard: skip keyword matching during LLM keyword generation
|
|
258
|
+
if (keywordGenInFlight) return;
|
|
259
|
+
|
|
141
260
|
if (!enabled || !registry) return;
|
|
142
261
|
if (!event.text) return;
|
|
143
262
|
|
|
@@ -155,6 +274,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
155
274
|
// non-injected docs, abort the current generation and restart with the
|
|
156
275
|
// injected context — no waiting for the next turn.
|
|
157
276
|
pi.on("message_update", async (event, ctx) => {
|
|
277
|
+
// P5.4b — Guard: skip auto-abort logic during LLM keyword generation
|
|
278
|
+
if (keywordGenInFlight) return;
|
|
279
|
+
|
|
158
280
|
if (!enabled || !registry) return;
|
|
159
281
|
|
|
160
282
|
const msg = event.message;
|
|
@@ -195,6 +317,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
195
317
|
|
|
196
318
|
// ---- Event: before_agent_start (inject into system prompt) ----
|
|
197
319
|
pi.on("before_agent_start", async (event, ctx) => {
|
|
320
|
+
// P5.4b — Guard: skip injection during LLM keyword generation
|
|
321
|
+
if (keywordGenInFlight) return;
|
|
322
|
+
|
|
198
323
|
if (!enabled || !registry || pendingMatches.size === 0) return;
|
|
199
324
|
|
|
200
325
|
const matchedEntries: DocEntry[] = [];
|
|
@@ -235,13 +360,27 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
235
360
|
};
|
|
236
361
|
});
|
|
237
362
|
|
|
238
|
-
// ---- Event: agent_end (restart after auto-abort) ----
|
|
239
|
-
pi.on("agent_end", async () => {
|
|
363
|
+
// ---- Event: agent_end (restart after auto-abort + LLM batch summary) ----
|
|
364
|
+
pi.on("agent_end", async (event, ctx) => {
|
|
365
|
+
// P5.4c — Summary notification from agent_end (BLOCKER-3)
|
|
366
|
+
keywordGenInFlight = false;
|
|
367
|
+
if (llmBatchesCompleted > 0) {
|
|
368
|
+
await ctx.ui.notify(
|
|
369
|
+
`Doc keywords: ${llmTotalFiles} files across ${llmBatchesCompleted} batch(es)`,
|
|
370
|
+
"info",
|
|
371
|
+
);
|
|
372
|
+
llmBatchesCompleted = 0;
|
|
373
|
+
llmTotalFiles = 0;
|
|
374
|
+
}
|
|
375
|
+
|
|
240
376
|
if (abortingForInjection) {
|
|
241
377
|
abortingForInjection = false;
|
|
242
|
-
//
|
|
243
|
-
|
|
244
|
-
|
|
378
|
+
// Defer sendUserMessage to next tick to avoid re-entrancy issues.
|
|
379
|
+
setTimeout(() => {
|
|
380
|
+
pi.sendUserMessage("continue");
|
|
381
|
+
}, 0);
|
|
382
|
+
} else {
|
|
383
|
+
console.log('[doc-injector] agent_end: abortingForInjection is false, skipping');
|
|
245
384
|
}
|
|
246
385
|
});
|
|
247
386
|
|
|
@@ -251,5 +390,7 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
251
390
|
getEnabled,
|
|
252
391
|
setEnabled,
|
|
253
392
|
reloadRegistry,
|
|
393
|
+
getConfig,
|
|
394
|
+
generateKeywordsLLM,
|
|
254
395
|
});
|
|
255
396
|
}
|
package/injector.ts
CHANGED
|
@@ -13,6 +13,21 @@ export interface NotifyCapability {
|
|
|
13
13
|
notify: (msg: string, type?: "info" | "warning" | "error") => void;
|
|
14
14
|
}
|
|
15
15
|
|
|
16
|
+
/**
|
|
17
|
+
* Sanitize keywords for safe injection into the system prompt.
|
|
18
|
+
*
|
|
19
|
+
* - Strips \n and \r (replaces with space) to prevent prompt injection
|
|
20
|
+
* - Caps each keyword at 100 characters
|
|
21
|
+
* - Enforces a hard limit of 20 keywords
|
|
22
|
+
*/
|
|
23
|
+
function sanitizeKeywords(keywords: string[]): string[] {
|
|
24
|
+
return keywords
|
|
25
|
+
.map((k) => k.replace(/[\n\r]/g, " ").trim())
|
|
26
|
+
.filter((k) => k.length > 0)
|
|
27
|
+
.map((k) => (k.length > 100 ? k.slice(0, 100) : k))
|
|
28
|
+
.slice(0, 20);
|
|
29
|
+
}
|
|
30
|
+
|
|
16
31
|
/**
|
|
17
32
|
* Build a system prompt append string from matched documents.
|
|
18
33
|
*/
|
|
@@ -29,7 +44,9 @@ export function buildSystemPromptAppend(
|
|
|
29
44
|
];
|
|
30
45
|
|
|
31
46
|
for (const entry of entries) {
|
|
32
|
-
|
|
47
|
+
// Sanitize keywords before display to prevent prompt injection
|
|
48
|
+
const rawKeywords = matchedKeywords.get(entry.filePath) ?? [];
|
|
49
|
+
const keywords = sanitizeKeywords(rawKeywords);
|
|
33
50
|
sections.push(`### ${entry.title}`);
|
|
34
51
|
sections.push(`Source: \`${entry.relativePath}\``);
|
|
35
52
|
if (keywords.length > 0) {
|