pi-doc-injector 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -1
- package/cache.ts +79 -0
- package/commands.ts +68 -1
- package/config.ts +63 -28
- package/globber.ts +48 -0
- package/index.ts +171 -22
- package/injector.ts +18 -1
- package/keyword-gen.ts +142 -0
- package/keyword-llm.ts +57 -0
- package/matcher.ts +14 -10
- package/package.json +5 -1
- package/picomatch.d.ts +11 -0
- package/registry.ts +361 -72
- package/types.ts +62 -3
package/registry.ts
CHANGED
|
@@ -1,38 +1,31 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Document Registry — scans a docs folder, parses frontmatter, maintains index.
|
|
3
|
+
*
|
|
4
|
+
* Processing pipeline:
|
|
5
|
+
* 1. stat(filePath) → size check, mtime check, cache hit
|
|
6
|
+
* 2. readFile(filePath) → parse frontmatter or generate keywords
|
|
3
7
|
*/
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import
|
|
8
|
+
import type { Dirent } from "node:fs";
|
|
9
|
+
import { readdir, readFile, stat } from "node:fs/promises";
|
|
10
|
+
import { basename, extname, join, relative, resolve } from "node:path";
|
|
11
|
+
import type { CacheEntry, DocEntry, DocInjectorConfig, KeywordCache } from "./types";
|
|
12
|
+
import { createGlobFilter } from "./globber";
|
|
13
|
+
import { generateKeywords } from "./keyword-gen";
|
|
7
14
|
|
|
8
15
|
/**
|
|
9
|
-
*
|
|
10
|
-
*
|
|
16
|
+
* Shared parser for frontmatter block content (title + keywords).
|
|
17
|
+
* Extracts title and keywords from YAML-like content between delimiters.
|
|
11
18
|
*/
|
|
12
|
-
|
|
13
|
-
content: string,
|
|
14
|
-
): { title: string; keywords: string[]; body: string } | null {
|
|
15
|
-
if (!content.startsWith("---")) {
|
|
16
|
-
return null;
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
const secondDash = content.indexOf("---", 3);
|
|
20
|
-
if (secondDash === -1) {
|
|
21
|
-
return null;
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
const frontmatterBlock = content.slice(3, secondDash).trim();
|
|
25
|
-
const body = content.slice(secondDash + 3).trim();
|
|
26
|
-
|
|
19
|
+
function parseFrontmatterBlock(block: string): { title: string; keywords: string[] } | null {
|
|
27
20
|
// Extract title
|
|
28
|
-
const titleMatch =
|
|
21
|
+
const titleMatch = block.match(/^title:\s*["']?([^"'\n]+)["']?$/m);
|
|
29
22
|
const title = titleMatch ? titleMatch[1].trim() : "";
|
|
30
23
|
|
|
31
24
|
// Extract keywords — supports both flow array [a, b] and block array
|
|
32
25
|
const keywords: string[] = [];
|
|
33
26
|
|
|
34
27
|
// Try flow array: keywords: [a, b, c]
|
|
35
|
-
const flowMatch =
|
|
28
|
+
const flowMatch = block.match(/keywords:\s*\[([^\]]*)\]/);
|
|
36
29
|
if (flowMatch) {
|
|
37
30
|
keywords.push(
|
|
38
31
|
...flowMatch[1]
|
|
@@ -42,7 +35,7 @@ export function parseFrontmatter(
|
|
|
42
35
|
);
|
|
43
36
|
} else {
|
|
44
37
|
// Try block array: keywords:\n - a\n - b
|
|
45
|
-
const blockMatches =
|
|
38
|
+
const blockMatches = block.matchAll(/keywords:\s*\n((?:\s*-\s*.+\n?)+)/g);
|
|
46
39
|
for (const bm of blockMatches) {
|
|
47
40
|
const items = bm[1].matchAll(/^\s*-\s*["']?([^"'\n]+)["']?$/gm);
|
|
48
41
|
for (const im of items) {
|
|
@@ -56,25 +49,201 @@ export function parseFrontmatter(
|
|
|
56
49
|
return null;
|
|
57
50
|
}
|
|
58
51
|
|
|
59
|
-
return { title: title || "Untitled", keywords
|
|
52
|
+
return { title: title || "Untitled", keywords };
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Parse YAML-style frontmatter: `--- ... ---`
|
|
57
|
+
*/
|
|
58
|
+
function parseYamlFrontmatter(
|
|
59
|
+
content: string,
|
|
60
|
+
): { title: string; keywords: string[]; body: string } | null {
|
|
61
|
+
if (!content.startsWith("---")) return null;
|
|
62
|
+
|
|
63
|
+
const secondDash = content.indexOf("---", 3);
|
|
64
|
+
if (secondDash === -1) return null;
|
|
65
|
+
|
|
66
|
+
const block = content.slice(3, secondDash).trim();
|
|
67
|
+
const body = content.slice(secondDash + 3).trim();
|
|
68
|
+
|
|
69
|
+
const parsed = parseFrontmatterBlock(block);
|
|
70
|
+
if (!parsed) return null;
|
|
71
|
+
|
|
72
|
+
return { ...parsed, body };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Parse C-style block comment frontmatter: `/*--- ... ---*/`
|
|
77
|
+
*/
|
|
78
|
+
function parseCStyleFrontmatter(
|
|
79
|
+
content: string,
|
|
80
|
+
): { title: string; keywords: string[]; body: string } | null {
|
|
81
|
+
if (!content.startsWith("/*---")) return null;
|
|
82
|
+
|
|
83
|
+
const end = content.indexOf("---*/", 5);
|
|
84
|
+
if (end === -1) return null;
|
|
85
|
+
|
|
86
|
+
let block = content.slice(5, end).trim();
|
|
87
|
+
const body = content.slice(end + 5).trim();
|
|
88
|
+
|
|
89
|
+
// Strip optional "* " or " * " prefix from each line (common in block comments)
|
|
90
|
+
block = block
|
|
91
|
+
.split("\n")
|
|
92
|
+
.map((line) => line.replace(/^\s*\*\s?/, ""))
|
|
93
|
+
.join("\n");
|
|
94
|
+
|
|
95
|
+
const parsed = parseFrontmatterBlock(block);
|
|
96
|
+
if (!parsed) return null;
|
|
97
|
+
|
|
98
|
+
return { ...parsed, body };
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
/**
|
|
102
|
+
* Parse HTML comment frontmatter: `<!-- ... -->`
|
|
103
|
+
*/
|
|
104
|
+
function parseHTMLFrontmatter(
|
|
105
|
+
content: string,
|
|
106
|
+
): { title: string; keywords: string[]; body: string } | null {
|
|
107
|
+
if (!content.startsWith("<!--")) return null;
|
|
108
|
+
|
|
109
|
+
const end = content.indexOf("-->", 4);
|
|
110
|
+
if (end === -1) return null;
|
|
111
|
+
|
|
112
|
+
const block = content.slice(4, end).trim();
|
|
113
|
+
const body = content.slice(end + 3).trim();
|
|
114
|
+
|
|
115
|
+
const parsed = parseFrontmatterBlock(block);
|
|
116
|
+
if (!parsed) return null;
|
|
117
|
+
|
|
118
|
+
return { ...parsed, body };
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Parse slash-slash comment frontmatter: `//--- ...` (blank line terminates).
|
|
123
|
+
*/
|
|
124
|
+
function parseSlashSlashFrontmatter(
|
|
125
|
+
content: string,
|
|
126
|
+
): { title: string; keywords: string[]; body: string } | null {
|
|
127
|
+
if (!content.startsWith("//---")) return null;
|
|
128
|
+
|
|
129
|
+
// Ensure //--- is followed by a newline (its own line)
|
|
130
|
+
const afterOpener = content.indexOf("\n", 5);
|
|
131
|
+
if (afterOpener === -1) return null;
|
|
132
|
+
|
|
133
|
+
const rest = content.slice(afterOpener + 1);
|
|
134
|
+
|
|
135
|
+
// Find blank line terminator
|
|
136
|
+
const blankLineIdx = rest.indexOf("\n\n");
|
|
137
|
+
|
|
138
|
+
let block: string;
|
|
139
|
+
let body: string;
|
|
140
|
+
|
|
141
|
+
if (blankLineIdx === -1) {
|
|
142
|
+
// No blank line — remaining content is frontmatter block, body is empty
|
|
143
|
+
block = rest;
|
|
144
|
+
body = "";
|
|
145
|
+
} else {
|
|
146
|
+
block = rest.slice(0, blankLineIdx);
|
|
147
|
+
body = rest.slice(blankLineIdx + 2).trim();
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Strip optional "//" prefix from each line
|
|
151
|
+
block = block
|
|
152
|
+
.split("\n")
|
|
153
|
+
.map((line) => line.replace(/^\/\/\s?/, ""))
|
|
154
|
+
.join("\n")
|
|
155
|
+
.trim();
|
|
156
|
+
|
|
157
|
+
const parsed = parseFrontmatterBlock(block);
|
|
158
|
+
if (!parsed) return null;
|
|
159
|
+
|
|
160
|
+
return { ...parsed, body };
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
/**
|
|
164
|
+
* Parse frontmatter from content, trying each supported style in order.
|
|
165
|
+
* Returns { title, keywords, body } or null if no valid frontmatter found.
|
|
166
|
+
*
|
|
167
|
+
* Styles tried: YAML (---), C-style block (/*---), HTML comment (<!--),
|
|
168
|
+
* slash-slash comment (//---, blank-line terminated).
|
|
169
|
+
*/
|
|
170
|
+
export function parseFrontmatter(
|
|
171
|
+
content: string,
|
|
172
|
+
): { title: string; keywords: string[]; body: string } | null {
|
|
173
|
+
return (
|
|
174
|
+
parseYamlFrontmatter(content)
|
|
175
|
+
?? parseCStyleFrontmatter(content)
|
|
176
|
+
?? parseHTMLFrontmatter(content)
|
|
177
|
+
?? parseSlashSlashFrontmatter(content)
|
|
178
|
+
);
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
interface ScanResult {
|
|
182
|
+
filePath: string;
|
|
183
|
+
relativePath: string;
|
|
184
|
+
fileName: string;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// ─── PromisePool ───────────────────────────────────────────────────────
|
|
188
|
+
|
|
189
|
+
/**
|
|
190
|
+
* Simple promise pool that runs async tasks with a concurrency limit.
|
|
191
|
+
* Used for parallel file I/O during rebuild.
|
|
192
|
+
*/
|
|
193
|
+
class PromisePool {
|
|
194
|
+
private running = 0;
|
|
195
|
+
private waitResolve: (() => void) | null = null;
|
|
196
|
+
|
|
197
|
+
constructor(private concurrency: number) {}
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* Run all tasks with at most `concurrency` in flight at once.
|
|
201
|
+
* Returns results in the same order as the input tasks.
|
|
202
|
+
*/
|
|
203
|
+
async all<T>(tasks: Array<() => Promise<T>>): Promise<T[]> {
|
|
204
|
+
const results: T[] = new Array(tasks.length);
|
|
205
|
+
let nextIndex = 0;
|
|
206
|
+
|
|
207
|
+
const worker = async (): Promise<void> => {
|
|
208
|
+
while (nextIndex < tasks.length) {
|
|
209
|
+
const idx = nextIndex++;
|
|
210
|
+
results[idx] = await tasks[idx]();
|
|
211
|
+
}
|
|
212
|
+
};
|
|
213
|
+
|
|
214
|
+
const workerCount = Math.min(this.concurrency, tasks.length);
|
|
215
|
+
const workers = Array.from({ length: workerCount }, () => worker());
|
|
216
|
+
await Promise.all(workers);
|
|
217
|
+
|
|
218
|
+
return results;
|
|
219
|
+
}
|
|
60
220
|
}
|
|
61
221
|
|
|
222
|
+
// ─── DocRegistry ───────────────────────────────────────────────────────
|
|
223
|
+
|
|
62
224
|
/**
|
|
63
225
|
* Document Registry class. Scans a docs folder and maintains an index of DocEntry.
|
|
64
226
|
*/
|
|
65
227
|
export class DocRegistry {
|
|
66
228
|
private entries: DocEntry[] = [];
|
|
67
229
|
private docsPath: string;
|
|
68
|
-
private
|
|
230
|
+
private config: DocInjectorConfig;
|
|
231
|
+
private cache: KeywordCache | null = null;
|
|
232
|
+
private dirtyCache: KeywordCache = { version: 1, files: {} };
|
|
69
233
|
|
|
70
|
-
private constructor(docsPath: string,
|
|
234
|
+
private constructor(docsPath: string, config: DocInjectorConfig, cache?: KeywordCache) {
|
|
71
235
|
this.docsPath = docsPath;
|
|
72
|
-
this.
|
|
236
|
+
this.config = config;
|
|
237
|
+
this.cache = cache ?? null;
|
|
73
238
|
}
|
|
74
239
|
|
|
75
240
|
/** Create a registry by scanning the docs folder. */
|
|
76
|
-
static async create(
|
|
77
|
-
|
|
241
|
+
static async create(
|
|
242
|
+
docsPath: string,
|
|
243
|
+
config: DocInjectorConfig,
|
|
244
|
+
cache?: KeywordCache,
|
|
245
|
+
): Promise<DocRegistry> {
|
|
246
|
+
const registry = new DocRegistry(docsPath, config, cache);
|
|
78
247
|
await registry.rebuild();
|
|
79
248
|
return registry;
|
|
80
249
|
}
|
|
@@ -87,48 +256,135 @@ export class DocRegistry {
|
|
|
87
256
|
preserved.set(e.filePath, e.injected);
|
|
88
257
|
}
|
|
89
258
|
|
|
259
|
+
// Start with a fresh dirty cache — only files that changed get added
|
|
260
|
+
this.dirtyCache = { version: 1, files: {} };
|
|
261
|
+
|
|
90
262
|
try {
|
|
91
|
-
const scanResults = this.recursive
|
|
92
|
-
? this.scanRecursive(resolved)
|
|
93
|
-
: this.scanFlat(resolved);
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
console.warn(`[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`);
|
|
102
|
-
continue;
|
|
103
|
-
}
|
|
104
|
-
newEntries.push({
|
|
105
|
-
filePath,
|
|
106
|
-
fileName,
|
|
107
|
-
relativePath,
|
|
108
|
-
title: parsed.title,
|
|
109
|
-
keywords: parsed.keywords,
|
|
110
|
-
content: raw,
|
|
111
|
-
injected: preserved.get(filePath) ?? false,
|
|
112
|
-
});
|
|
113
|
-
} catch (err) {
|
|
114
|
-
// Only warn for unexpected errors, not ENOENT (file deleted/moved after scan)
|
|
115
|
-
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
|
116
|
-
console.warn(`[doc-injector] Error reading ${relativePath}:`, err);
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
}
|
|
263
|
+
const scanResults = this.config.recursive
|
|
264
|
+
? await this.scanRecursive(resolved)
|
|
265
|
+
: await this.scanFlat(resolved);
|
|
266
|
+
|
|
267
|
+
// Process files concurrently with PromisePool
|
|
268
|
+
const pool = new PromisePool(this.config.maxConcurrent);
|
|
269
|
+
|
|
270
|
+
const tasks = scanResults.map((sr) => async (): Promise<DocEntry | null> => {
|
|
271
|
+
return this.processFile(sr, preserved);
|
|
272
|
+
});
|
|
120
273
|
|
|
121
|
-
|
|
274
|
+
const results = await pool.all(tasks);
|
|
275
|
+
this.entries = results.filter((e): e is DocEntry => e !== null);
|
|
122
276
|
} catch {
|
|
123
277
|
console.warn(`[doc-injector] Docs folder not found: ${resolved}`);
|
|
124
278
|
this.entries = [];
|
|
125
279
|
}
|
|
126
280
|
}
|
|
127
281
|
|
|
128
|
-
/**
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
282
|
+
/**
|
|
283
|
+
* Process a single file through the full pipeline.
|
|
284
|
+
* Returns a DocEntry or null if the file should be skipped.
|
|
285
|
+
*/
|
|
286
|
+
private async processFile(
|
|
287
|
+
{ filePath, relativePath, fileName }: ScanResult,
|
|
288
|
+
preserved: Map<string, boolean>,
|
|
289
|
+
): Promise<DocEntry | null> {
|
|
290
|
+
try {
|
|
291
|
+
// ═══ METADATA + CACHE ═══
|
|
292
|
+
|
|
293
|
+
// Step 1: Stat the file for size and mtime
|
|
294
|
+
const fileStat = await stat(filePath);
|
|
295
|
+
|
|
296
|
+
// Step 2: Skip files exceeding maxFileSize
|
|
297
|
+
if (fileStat.size > this.config.maxFileSize) {
|
|
298
|
+
console.warn(
|
|
299
|
+
`[doc-injector] Skipping ${relativePath}: size ${fileStat.size} > max ${this.config.maxFileSize}`,
|
|
300
|
+
);
|
|
301
|
+
return null;
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
const cachedEntry = this.cache?.files[relativePath];
|
|
305
|
+
|
|
306
|
+
// Step 6: Cache hit — mtime matches, use cached keywords
|
|
307
|
+
if (cachedEntry && cachedEntry.mtimeMs === fileStat.mtimeMs) {
|
|
308
|
+
// Still read the file for content and title (needed for injection),
|
|
309
|
+
// but skip keyword generation entirely
|
|
310
|
+
const raw = await readFile(filePath, "utf-8");
|
|
311
|
+
const title = extractTitle(raw, fileName);
|
|
312
|
+
|
|
313
|
+
return {
|
|
314
|
+
filePath,
|
|
315
|
+
fileName,
|
|
316
|
+
relativePath,
|
|
317
|
+
title,
|
|
318
|
+
keywords: cachedEntry.keywords,
|
|
319
|
+
content: raw,
|
|
320
|
+
injected: preserved.get(filePath) ?? false,
|
|
321
|
+
keywordSource: "cache",
|
|
322
|
+
};
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// ═══ FULL READ + PARSE (cache miss) ═══
|
|
326
|
+
|
|
327
|
+
// Step 7: Read file content
|
|
328
|
+
const raw = await readFile(filePath, "utf-8");
|
|
329
|
+
|
|
330
|
+
// Step 8: Try frontmatter parsing
|
|
331
|
+
const parsed = parseFrontmatter(raw);
|
|
332
|
+
|
|
333
|
+
let title: string;
|
|
334
|
+
let keywords: string[];
|
|
335
|
+
let keywordSource: DocEntry["keywordSource"];
|
|
336
|
+
|
|
337
|
+
if (parsed) {
|
|
338
|
+
// Step 9: Frontmatter found — use its title and keywords
|
|
339
|
+
title = parsed.title;
|
|
340
|
+
keywords = parsed.keywords;
|
|
341
|
+
keywordSource = "frontmatter";
|
|
342
|
+
} else if (this.config.autoKeywords) {
|
|
343
|
+
// Step 10: No frontmatter, generate keywords heuristically
|
|
344
|
+
title = extractTitle(raw, fileName);
|
|
345
|
+
keywords = generateKeywords(fileName, raw);
|
|
346
|
+
keywordSource = "heuristic";
|
|
347
|
+
} else {
|
|
348
|
+
// Step 11: No frontmatter and autoKeywords disabled — skip
|
|
349
|
+
console.warn(
|
|
350
|
+
`[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
|
|
351
|
+
);
|
|
352
|
+
return null;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// ═══ CACHE UPDATE ═══
|
|
356
|
+
|
|
357
|
+
// Step 12: Mark as dirty (mtime changed or keywords generated)
|
|
358
|
+
this.dirtyCache.files[relativePath] = {
|
|
359
|
+
mtimeMs: fileStat.mtimeMs,
|
|
360
|
+
keywords,
|
|
361
|
+
};
|
|
362
|
+
|
|
363
|
+
return {
|
|
364
|
+
filePath,
|
|
365
|
+
fileName,
|
|
366
|
+
relativePath,
|
|
367
|
+
title,
|
|
368
|
+
keywords,
|
|
369
|
+
content: raw,
|
|
370
|
+
injected: preserved.get(filePath) ?? false,
|
|
371
|
+
keywordSource,
|
|
372
|
+
};
|
|
373
|
+
} catch (err) {
|
|
374
|
+
// Only warn for unexpected errors, not ENOENT (file deleted/moved after scan)
|
|
375
|
+
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
|
376
|
+
console.warn(`[doc-injector] Error reading ${relativePath}:`, err);
|
|
377
|
+
}
|
|
378
|
+
return null;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/** Scan files (non-recursive) filtered by glob. */
|
|
383
|
+
private async scanFlat(dir: string): Promise<ScanResult[]> {
|
|
384
|
+
const filter = createGlobFilter(this.config.include, this.config.exclude);
|
|
385
|
+
const entries = await readdir(dir);
|
|
386
|
+
return entries
|
|
387
|
+
.filter((f) => filter.match(f))
|
|
132
388
|
.map((f) => ({
|
|
133
389
|
filePath: join(dir, f),
|
|
134
390
|
relativePath: f,
|
|
@@ -136,19 +392,18 @@ export class DocRegistry {
|
|
|
136
392
|
}));
|
|
137
393
|
}
|
|
138
394
|
|
|
139
|
-
/** Scan
|
|
140
|
-
private scanRecursive(dir: string):
|
|
141
|
-
const
|
|
142
|
-
const
|
|
395
|
+
/** Scan files recursively filtered by glob. */
|
|
396
|
+
private async scanRecursive(dir: string): Promise<ScanResult[]> {
|
|
397
|
+
const filter = createGlobFilter(this.config.include, this.config.exclude);
|
|
398
|
+
const results: ScanResult[] = [];
|
|
399
|
+
const dirents = await readdir(dir, { recursive: true, withFileTypes: true }) as Dirent[];
|
|
143
400
|
|
|
144
401
|
for (const dirent of dirents) {
|
|
145
|
-
if (!dirent.isFile()
|
|
402
|
+
if (!dirent.isFile()) continue;
|
|
146
403
|
|
|
147
404
|
const fileName = basename(dirent.name);
|
|
148
405
|
|
|
149
|
-
//
|
|
150
|
-
// relative path from the parent directory. Use parentPath (Node 18+)
|
|
151
|
-
// with fallback to .path (Bun) for older runtimes.
|
|
406
|
+
// Resolve relative path cross-runtime
|
|
152
407
|
let relPath: string;
|
|
153
408
|
if (dirent.name === fileName) {
|
|
154
409
|
const parentPath = (dirent as Dirent & { parentPath?: string; path?: string }).parentPath
|
|
@@ -158,10 +413,12 @@ export class DocRegistry {
|
|
|
158
413
|
? relative(dir, join(parentPath, dirent.name))
|
|
159
414
|
: dirent.name;
|
|
160
415
|
} else {
|
|
161
|
-
// Node-style: dirent.name already contains the relative path from dir
|
|
162
416
|
relPath = dirent.name;
|
|
163
417
|
}
|
|
164
418
|
|
|
419
|
+
// Apply glob filter
|
|
420
|
+
if (!filter.match(relPath)) continue;
|
|
421
|
+
|
|
165
422
|
results.push({
|
|
166
423
|
filePath: join(dir, relPath),
|
|
167
424
|
relativePath: relPath,
|
|
@@ -172,6 +429,24 @@ export class DocRegistry {
|
|
|
172
429
|
return results;
|
|
173
430
|
}
|
|
174
431
|
|
|
432
|
+
/**
|
|
433
|
+
* Return cache entries that were dirtied (created or updated) during the
|
|
434
|
+
* most recent rebuild. These need to be persisted to disk.
|
|
435
|
+
*/
|
|
436
|
+
getDirtyCache(): Record<string, CacheEntry> {
|
|
437
|
+
return { ...this.dirtyCache.files };
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
/**
|
|
441
|
+
* Update the cache reference without rebuilding.
|
|
442
|
+
* Used when reloading from disk (e.g. resources_discover) to pick up
|
|
443
|
+
* LLM-written entries before the next rebuild.
|
|
444
|
+
*/
|
|
445
|
+
updateCache(cache: KeywordCache): void {
|
|
446
|
+
this.cache = cache;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
|
|
175
450
|
/**
|
|
176
451
|
* Get all registered entries.
|
|
177
452
|
*
|
|
@@ -210,3 +485,17 @@ export class DocRegistry {
|
|
|
210
485
|
this.markAllNotInjected();
|
|
211
486
|
}
|
|
212
487
|
}
|
|
488
|
+
|
|
489
|
+
// ─── Helpers ────────────────────────────────────────────────────────────
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Extract a title from file content.
|
|
493
|
+
* Uses the first markdown heading if present, otherwise falls back to filename.
|
|
494
|
+
*/
|
|
495
|
+
function extractTitle(content: string, fileName: string): string {
|
|
496
|
+
const match = content.match(/^#\s+(.+)$/m);
|
|
497
|
+
if (match) return match[1].trim();
|
|
498
|
+
|
|
499
|
+
// Fall back to filename without extension
|
|
500
|
+
return fileName.replace(/\.[^.]+$/, "");
|
|
501
|
+
}
|
package/types.ts
CHANGED
|
@@ -2,22 +2,33 @@
|
|
|
2
2
|
* Shared type definitions for the Doc Injector extension.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
/** Source of keywords for a doc entry. */
|
|
6
|
+
export type KeywordSource = "frontmatter" | "heuristic" | "llm" | "cache";
|
|
7
|
+
|
|
5
8
|
/** A parsed document from the docs folder. */
|
|
6
9
|
export interface DocEntry {
|
|
10
|
+
/** Absolute path on disk */
|
|
7
11
|
filePath: string;
|
|
12
|
+
/** Basename (e.g. "setup.md") */
|
|
8
13
|
fileName: string;
|
|
14
|
+
/** Path relative to docsPath (e.g. "guides/setup.md") */
|
|
9
15
|
relativePath: string;
|
|
16
|
+
/** Document title (from frontmatter or auto-generated) */
|
|
10
17
|
title: string;
|
|
18
|
+
/** Keywords for matching */
|
|
11
19
|
keywords: string[];
|
|
20
|
+
/** Full file content */
|
|
12
21
|
content: string;
|
|
22
|
+
/** Whether this doc has been injected in current session */
|
|
13
23
|
injected: boolean;
|
|
24
|
+
/** Source of keywords */
|
|
25
|
+
keywordSource: KeywordSource;
|
|
14
26
|
}
|
|
15
27
|
|
|
16
28
|
/** Options for the keyword matcher. */
|
|
17
29
|
export interface MatcherOptions {
|
|
18
30
|
matchThreshold: number;
|
|
19
31
|
caseSensitive: boolean;
|
|
20
|
-
wordBoundary: boolean;
|
|
21
32
|
}
|
|
22
33
|
|
|
23
34
|
/** Result from a keyword match. */
|
|
@@ -27,12 +38,54 @@ export interface MatchResult {
|
|
|
27
38
|
hitCount: number;
|
|
28
39
|
}
|
|
29
40
|
|
|
41
|
+
/** Keyword cache file structure. */
|
|
42
|
+
export interface KeywordCache {
|
|
43
|
+
version: 1;
|
|
44
|
+
files: Record<string, CacheEntry>; // relativePath → CacheEntry
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** A single cache entry for a file. */
|
|
48
|
+
export interface CacheEntry {
|
|
49
|
+
mtimeMs: number;
|
|
50
|
+
keywords: string[];
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
/** Result from binary content detection. */
|
|
54
|
+
export interface BinaryDetectResult {
|
|
55
|
+
isBinary: boolean;
|
|
56
|
+
reason: "nullByte" | "nonPrintable" | "none";
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/** Glob filter for include/exclude pattern matching. */
|
|
60
|
+
export interface GlobFilter {
|
|
61
|
+
/** Returns true if the path matches any include pattern and no exclude pattern. */
|
|
62
|
+
match(relativePath: string): boolean;
|
|
63
|
+
}
|
|
64
|
+
|
|
30
65
|
/** Extension configuration. */
|
|
31
66
|
export interface DocInjectorConfig {
|
|
67
|
+
/** Path to docs folder (relative to cwd) */
|
|
32
68
|
docsPath: string;
|
|
69
|
+
/** Minimum keyword matches to trigger injection */
|
|
33
70
|
matchThreshold: number;
|
|
71
|
+
/** Skip injection if context usage exceeds this % (0-100) */
|
|
34
72
|
contextThreshold: number;
|
|
73
|
+
/** Whether to scan subdirectories */
|
|
35
74
|
recursive: boolean;
|
|
75
|
+
/** Glob patterns for files to include */
|
|
76
|
+
include: string[];
|
|
77
|
+
/** Glob patterns for files/dirs to exclude */
|
|
78
|
+
exclude: string[];
|
|
79
|
+
/** Maximum file size in bytes to parse */
|
|
80
|
+
maxFileSize: number;
|
|
81
|
+
/** Enable auto-generation of keywords when frontmatter is missing */
|
|
82
|
+
autoKeywords: boolean;
|
|
83
|
+
/** Enable LLM-based keyword generation via /doc-keywords-gen */
|
|
84
|
+
llmKeywords: boolean;
|
|
85
|
+
/** Max concurrent file I/O operations */
|
|
86
|
+
maxConcurrent: number;
|
|
87
|
+
/** Max files per LLM keyword-gen batch */
|
|
88
|
+
llmBatchSize: number;
|
|
36
89
|
}
|
|
37
90
|
|
|
38
91
|
/** Default configuration values. */
|
|
@@ -41,11 +94,17 @@ export const DEFAULT_CONFIG: DocInjectorConfig = {
|
|
|
41
94
|
matchThreshold: 1,
|
|
42
95
|
contextThreshold: 80,
|
|
43
96
|
recursive: true,
|
|
97
|
+
include: ["**/*.md", "**/*.txt"],
|
|
98
|
+
exclude: ["node_modules/**", ".git/**", "dist/**", "build/**", ".next/**"],
|
|
99
|
+
maxFileSize: 102400, // 100 KB
|
|
100
|
+
autoKeywords: true,
|
|
101
|
+
llmKeywords: true,
|
|
102
|
+
maxConcurrent: 20,
|
|
103
|
+
llmBatchSize: 20,
|
|
44
104
|
};
|
|
45
105
|
|
|
46
106
|
/** Default matcher options derived from config. */
|
|
47
107
|
export const DEFAULT_MATCHER_OPTIONS: MatcherOptions = {
|
|
48
108
|
matchThreshold: DEFAULT_CONFIG.matchThreshold,
|
|
49
109
|
caseSensitive: false,
|
|
50
|
-
|
|
51
|
-
};
|
|
110
|
+
};
|