pi-doc-injector 0.5.1 → 0.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +4 -2
- package/package.json +1 -1
- package/registry.ts +106 -62
- package/types.ts +13 -1
package/index.ts
CHANGED
|
@@ -73,7 +73,7 @@ import { buildKeywordGenPrompt } from "./keyword-llm";
|
|
|
73
73
|
import { extractText, KeywordMatcher } from "./matcher";
|
|
74
74
|
import { ExtensionNotifier, type Notifier } from "./notifier";
|
|
75
75
|
import { DocRegistry } from "./registry";
|
|
76
|
-
import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
|
|
76
|
+
import { DEFAULT_MATCHER_OPTIONS, LLM_CACHE_SENTINEL, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
|
|
77
77
|
import { registerCommands } from "./commands";
|
|
78
78
|
|
|
79
79
|
export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
@@ -184,7 +184,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
184
184
|
continue;
|
|
185
185
|
}
|
|
186
186
|
cache.files[item.path] = {
|
|
187
|
-
|
|
187
|
+
// Use the sentinel — never the real mtime — so the next rebuild
|
|
188
|
+
// surfaces this entry as keywordSource: "llm" instead of "cache".
|
|
189
|
+
mtimeMs: LLM_CACHE_SENTINEL,
|
|
188
190
|
keywords: item.keywords.map((k) => k.toLowerCase()).slice(0, 20),
|
|
189
191
|
};
|
|
190
192
|
saved++;
|
package/package.json
CHANGED
package/registry.ts
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
import type { Dirent } from "node:fs";
|
|
9
9
|
import { readdir, readFile, stat } from "node:fs/promises";
|
|
10
10
|
import { basename, extname, join, relative, resolve } from "node:path";
|
|
11
|
-
import type
|
|
11
|
+
import { LLM_CACHE_SENTINEL, type CacheEntry, type DocEntry, type DocInjectorConfig, type KeywordCache } from "./types";
|
|
12
12
|
import type { Notifier } from "./notifier";
|
|
13
13
|
import { createGlobFilter } from "./globber";
|
|
14
14
|
import { generateKeywords } from "./keyword-gen";
|
|
@@ -232,6 +232,12 @@ export class DocRegistry {
|
|
|
232
232
|
private cache: KeywordCache | null = null;
|
|
233
233
|
private dirtyCache: KeywordCache = { version: 1, files: {} };
|
|
234
234
|
private notifier: Notifier;
|
|
235
|
+
// Per-registry flag: warn about a missing docs folder at most once.
|
|
236
|
+
// rebuild() is called twice at startup (once from session_start, once
|
|
237
|
+
// from resources_discover); without this flag the user sees the
|
|
238
|
+
// same warning twice. Not reset across rebuilds — a missing folder
|
|
239
|
+
// is a persistent condition, not a transient one.
|
|
240
|
+
private warnedMissingDocs = false;
|
|
235
241
|
|
|
236
242
|
private constructor(
|
|
237
243
|
docsPath: string,
|
|
@@ -268,6 +274,21 @@ export class DocRegistry {
|
|
|
268
274
|
// Start with a fresh dirty cache — only files that changed get added
|
|
269
275
|
this.dirtyCache = { version: 1, files: {} };
|
|
270
276
|
|
|
277
|
+
// Pre-check folder existence. The previous catch-all "Docs folder not
|
|
278
|
+
// found" warning was misleading (it also fired for scan errors) and was
|
|
279
|
+
// emitted twice at startup (once from session_start, once from
|
|
280
|
+
// resources_discover). The warnedMissingDocs flag deduplicates across
|
|
281
|
+
// rebuilds for the lifetime of this registry.
|
|
282
|
+
const folderStat = await stat(resolved).catch(() => null);
|
|
283
|
+
if (!folderStat || !folderStat.isDirectory()) {
|
|
284
|
+
if (!this.warnedMissingDocs) {
|
|
285
|
+
this.notifier.warn(`[doc-injector] Docs folder not found: ${resolved}`);
|
|
286
|
+
this.warnedMissingDocs = true;
|
|
287
|
+
}
|
|
288
|
+
this.entries = [];
|
|
289
|
+
return;
|
|
290
|
+
}
|
|
291
|
+
|
|
271
292
|
try {
|
|
272
293
|
const scanResults = this.config.recursive
|
|
273
294
|
? await this.scanRecursive(resolved)
|
|
@@ -282,27 +303,39 @@ export class DocRegistry {
|
|
|
282
303
|
|
|
283
304
|
const results = await pool.all(tasks);
|
|
284
305
|
this.entries = results.filter((e): e is DocEntry => e !== null);
|
|
285
|
-
} catch {
|
|
286
|
-
|
|
306
|
+
} catch (err) {
|
|
307
|
+
// This catch now only fires for actual scan errors (not folder-missing).
|
|
308
|
+
this.notifier.warn(
|
|
309
|
+
`[doc-injector] Error scanning docs folder ${resolved}: ${
|
|
310
|
+
err instanceof Error ? err.message : String(err)
|
|
311
|
+
}`,
|
|
312
|
+
);
|
|
287
313
|
this.entries = [];
|
|
288
314
|
}
|
|
289
315
|
}
|
|
290
316
|
|
|
291
317
|
/**
|
|
292
|
-
* Process a single file through the
|
|
318
|
+
* Process a single file through the priority chain.
|
|
293
319
|
* Returns a DocEntry or null if the file should be skipped.
|
|
320
|
+
*
|
|
321
|
+
* Priority (highest to lowest):
|
|
322
|
+
* 1. Frontmatter (authoritative — explicitly written by the doc author)
|
|
323
|
+
* 2. Cache (perf layer — mtime match means content hasn't changed)
|
|
324
|
+
* 3. Heuristic (free, automatic, local — filename + headings + code symbols)
|
|
325
|
+
* 4. Skip (no frontmatter, no cache, autoKeywords disabled)
|
|
326
|
+
*
|
|
327
|
+
* LLM-generated keywords populate the cache via the `_doc_injector_keywords`
|
|
328
|
+
* tool, so they surface as `keywordSource: "cache"` on the next rebuild
|
|
329
|
+
* (their `mtimeMs` is set to the file's current mtime when written).
|
|
294
330
|
*/
|
|
295
331
|
private async processFile(
|
|
296
332
|
{ filePath, relativePath, fileName }: ScanResult,
|
|
297
333
|
preserved: Map<string, boolean>,
|
|
298
334
|
): Promise<DocEntry | null> {
|
|
299
335
|
try {
|
|
300
|
-
//
|
|
301
|
-
|
|
302
|
-
// Step 1: Stat the file for size and mtime
|
|
336
|
+
// ─── METADATA ─────────────────────────────────────────────
|
|
303
337
|
const fileStat = await stat(filePath);
|
|
304
338
|
|
|
305
|
-
// Step 2: Skip files exceeding maxFileSize
|
|
306
339
|
if (fileStat.size > this.config.maxFileSize) {
|
|
307
340
|
this.notifier.warn(
|
|
308
341
|
`[doc-injector] Skipping ${relativePath}: size ${fileStat.size} > max ${this.config.maxFileSize}`,
|
|
@@ -310,75 +343,86 @@ export class DocRegistry {
|
|
|
310
343
|
return null;
|
|
311
344
|
}
|
|
312
345
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
// Step 6: Cache hit — mtime matches, use cached keywords
|
|
316
|
-
if (cachedEntry && cachedEntry.mtimeMs === fileStat.mtimeMs) {
|
|
317
|
-
// Still read the file for content and title (needed for injection),
|
|
318
|
-
// but skip keyword generation entirely
|
|
319
|
-
const raw = await readFile(filePath, "utf-8");
|
|
320
|
-
const title = extractTitle(raw, fileName);
|
|
346
|
+
// Read once — needed for frontmatter parse, content, and title.
|
|
347
|
+
const raw = await readFile(filePath, "utf-8");
|
|
321
348
|
|
|
349
|
+
// ─── PRIORITY 1: Frontmatter (authoritative) ─────────────
|
|
350
|
+
const parsed = parseFrontmatter(raw);
|
|
351
|
+
if (parsed) {
|
|
352
|
+
// Frontmatter is self-caching (lives in the file), no dirty mark needed.
|
|
322
353
|
return {
|
|
323
354
|
filePath,
|
|
324
355
|
fileName,
|
|
325
356
|
relativePath,
|
|
326
|
-
title,
|
|
327
|
-
keywords:
|
|
357
|
+
title: parsed.title,
|
|
358
|
+
keywords: parsed.keywords,
|
|
328
359
|
content: raw,
|
|
329
360
|
injected: preserved.get(filePath) ?? false,
|
|
330
|
-
keywordSource: "
|
|
361
|
+
keywordSource: "frontmatter",
|
|
331
362
|
};
|
|
332
363
|
}
|
|
333
364
|
|
|
334
|
-
//
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
365
|
+
// ─── PRIORITY 2: Cache (mtime match means content unchanged) ──
|
|
366
|
+
const cachedEntry = this.cache?.files[relativePath];
|
|
367
|
+
if (cachedEntry) {
|
|
368
|
+
// LLM-generated: sentinel mtime never matches a real file
|
|
369
|
+
if (cachedEntry.mtimeMs === LLM_CACHE_SENTINEL) {
|
|
370
|
+
const title = extractTitle(raw, fileName);
|
|
371
|
+
return {
|
|
372
|
+
filePath,
|
|
373
|
+
fileName,
|
|
374
|
+
relativePath,
|
|
375
|
+
title,
|
|
376
|
+
keywords: cachedEntry.keywords,
|
|
377
|
+
content: raw,
|
|
378
|
+
injected: preserved.get(filePath) ?? false,
|
|
379
|
+
keywordSource: "llm",
|
|
380
|
+
};
|
|
381
|
+
}
|
|
382
|
+
// Real mtime match: heuristic or prior LLM-upgrade cache hit
|
|
383
|
+
if (cachedEntry.mtimeMs === fileStat.mtimeMs) {
|
|
384
|
+
const title = extractTitle(raw, fileName);
|
|
385
|
+
return {
|
|
386
|
+
filePath,
|
|
387
|
+
fileName,
|
|
388
|
+
relativePath,
|
|
389
|
+
title,
|
|
390
|
+
keywords: cachedEntry.keywords,
|
|
391
|
+
content: raw,
|
|
392
|
+
injected: preserved.get(filePath) ?? false,
|
|
393
|
+
keywordSource: "cache",
|
|
394
|
+
};
|
|
395
|
+
}
|
|
396
|
+
}
|
|
338
397
|
|
|
339
|
-
//
|
|
340
|
-
|
|
398
|
+
// ─── PRIORITY 3: Heuristic (free, automatic fallback) ─────────
|
|
399
|
+
if (this.config.autoKeywords) {
|
|
400
|
+
const title = extractTitle(raw, fileName);
|
|
401
|
+
const keywords = generateKeywords(fileName, raw);
|
|
341
402
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
403
|
+
// Mark cache dirty (newly generated keywords must be persisted).
|
|
404
|
+
this.dirtyCache.files[relativePath] = {
|
|
405
|
+
mtimeMs: fileStat.mtimeMs,
|
|
406
|
+
keywords,
|
|
407
|
+
};
|
|
345
408
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
} else {
|
|
357
|
-
// Step 11: No frontmatter and autoKeywords disabled — skip
|
|
358
|
-
this.notifier.warn(
|
|
359
|
-
`[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
|
|
360
|
-
);
|
|
361
|
-
return null;
|
|
409
|
+
return {
|
|
410
|
+
filePath,
|
|
411
|
+
fileName,
|
|
412
|
+
relativePath,
|
|
413
|
+
title,
|
|
414
|
+
keywords,
|
|
415
|
+
content: raw,
|
|
416
|
+
injected: preserved.get(filePath) ?? false,
|
|
417
|
+
keywordSource: "heuristic",
|
|
418
|
+
};
|
|
362
419
|
}
|
|
363
420
|
|
|
364
|
-
//
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
keywords,
|
|
370
|
-
};
|
|
371
|
-
|
|
372
|
-
return {
|
|
373
|
-
filePath,
|
|
374
|
-
fileName,
|
|
375
|
-
relativePath,
|
|
376
|
-
title,
|
|
377
|
-
keywords,
|
|
378
|
-
content: raw,
|
|
379
|
-
injected: preserved.get(filePath) ?? false,
|
|
380
|
-
keywordSource,
|
|
381
|
-
};
|
|
421
|
+
// ─── PRIORITY 4: Skip ───────────────────────────────────────────
|
|
422
|
+
this.notifier.warn(
|
|
423
|
+
`[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
|
|
424
|
+
);
|
|
425
|
+
return null;
|
|
382
426
|
} catch (err) {
|
|
383
427
|
// Only warn for unexpected errors, not ENOENT (file deleted/moved after scan)
|
|
384
428
|
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
package/types.ts
CHANGED
|
@@ -107,4 +107,16 @@ export const DEFAULT_CONFIG: DocInjectorConfig = {
|
|
|
107
107
|
export const DEFAULT_MATCHER_OPTIONS: MatcherOptions = {
|
|
108
108
|
matchThreshold: DEFAULT_CONFIG.matchThreshold,
|
|
109
109
|
caseSensitive: false,
|
|
110
|
-
};
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Sentinel value used in CacheEntry.mtimeMs to mark entries written by the
|
|
114
|
+
* LLM keyword generator. -1 is chosen because Node.Stats.mtimeMs is documented
|
|
115
|
+
* as a non-negative integer (milliseconds since the Unix Epoch), so a real
|
|
116
|
+
* file can never have mtimeMs === -1. Heuristic-written entries use the real
|
|
117
|
+
* file mtime, which is always >= 0.
|
|
118
|
+
*
|
|
119
|
+
* If you find yourself writing LLM_CACHE_SENTINEL into a real cache entry
|
|
120
|
+
* from a non-LLM code path, that's a bug.
|
|
121
|
+
*/
|
|
122
|
+
export const LLM_CACHE_SENTINEL = -1;
|