pi-doc-injector 0.5.1 → 0.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +4 -2
- package/package.json +1 -1
- package/registry.ts +78 -60
- package/types.ts +13 -1
package/index.ts
CHANGED
|
@@ -73,7 +73,7 @@ import { buildKeywordGenPrompt } from "./keyword-llm";
|
|
|
73
73
|
import { extractText, KeywordMatcher } from "./matcher";
|
|
74
74
|
import { ExtensionNotifier, type Notifier } from "./notifier";
|
|
75
75
|
import { DocRegistry } from "./registry";
|
|
76
|
-
import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
|
|
76
|
+
import { DEFAULT_MATCHER_OPTIONS, LLM_CACHE_SENTINEL, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
|
|
77
77
|
import { registerCommands } from "./commands";
|
|
78
78
|
|
|
79
79
|
export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
@@ -184,7 +184,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
|
|
|
184
184
|
continue;
|
|
185
185
|
}
|
|
186
186
|
cache.files[item.path] = {
|
|
187
|
-
|
|
187
|
+
// Use the sentinel — never the real mtime — so the next rebuild
|
|
188
|
+
// surfaces this entry as keywordSource: "llm" instead of "cache".
|
|
189
|
+
mtimeMs: LLM_CACHE_SENTINEL,
|
|
188
190
|
keywords: item.keywords.map((k) => k.toLowerCase()).slice(0, 20),
|
|
189
191
|
};
|
|
190
192
|
saved++;
|
package/package.json
CHANGED
package/registry.ts
CHANGED
|
@@ -8,7 +8,7 @@
|
|
|
8
8
|
import type { Dirent } from "node:fs";
|
|
9
9
|
import { readdir, readFile, stat } from "node:fs/promises";
|
|
10
10
|
import { basename, extname, join, relative, resolve } from "node:path";
|
|
11
|
-
import type
|
|
11
|
+
import { LLM_CACHE_SENTINEL, type CacheEntry, type DocEntry, type DocInjectorConfig, type KeywordCache } from "./types";
|
|
12
12
|
import type { Notifier } from "./notifier";
|
|
13
13
|
import { createGlobFilter } from "./globber";
|
|
14
14
|
import { generateKeywords } from "./keyword-gen";
|
|
@@ -289,20 +289,27 @@ export class DocRegistry {
|
|
|
289
289
|
}
|
|
290
290
|
|
|
291
291
|
/**
|
|
292
|
-
* Process a single file through the
|
|
292
|
+
* Process a single file through the priority chain.
|
|
293
293
|
* Returns a DocEntry or null if the file should be skipped.
|
|
294
|
+
*
|
|
295
|
+
* Priority (highest to lowest):
|
|
296
|
+
* 1. Frontmatter (authoritative — explicitly written by the doc author)
|
|
297
|
+
* 2. Cache (perf layer — mtime match means content hasn't changed)
|
|
298
|
+
* 3. Heuristic (free, automatic, local — filename + headings + code symbols)
|
|
299
|
+
* 4. Skip (no frontmatter, no cache, autoKeywords disabled)
|
|
300
|
+
*
|
|
301
|
+
* LLM-generated keywords populate the cache via the `_doc_injector_keywords`
|
|
302
|
+
* tool, so they surface as `keywordSource: "cache"` on the next rebuild
|
|
303
|
+
* (their `mtimeMs` is set to the file's current mtime when written).
|
|
294
304
|
*/
|
|
295
305
|
private async processFile(
|
|
296
306
|
{ filePath, relativePath, fileName }: ScanResult,
|
|
297
307
|
preserved: Map<string, boolean>,
|
|
298
308
|
): Promise<DocEntry | null> {
|
|
299
309
|
try {
|
|
300
|
-
//
|
|
301
|
-
|
|
302
|
-
// Step 1: Stat the file for size and mtime
|
|
310
|
+
// ─── METADATA ─────────────────────────────────────────────
|
|
303
311
|
const fileStat = await stat(filePath);
|
|
304
312
|
|
|
305
|
-
// Step 2: Skip files exceeding maxFileSize
|
|
306
313
|
if (fileStat.size > this.config.maxFileSize) {
|
|
307
314
|
this.notifier.warn(
|
|
308
315
|
`[doc-injector] Skipping ${relativePath}: size ${fileStat.size} > max ${this.config.maxFileSize}`,
|
|
@@ -310,75 +317,86 @@ export class DocRegistry {
|
|
|
310
317
|
return null;
|
|
311
318
|
}
|
|
312
319
|
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
// Step 6: Cache hit — mtime matches, use cached keywords
|
|
316
|
-
if (cachedEntry && cachedEntry.mtimeMs === fileStat.mtimeMs) {
|
|
317
|
-
// Still read the file for content and title (needed for injection),
|
|
318
|
-
// but skip keyword generation entirely
|
|
319
|
-
const raw = await readFile(filePath, "utf-8");
|
|
320
|
-
const title = extractTitle(raw, fileName);
|
|
320
|
+
// Read once — needed for frontmatter parse, content, and title.
|
|
321
|
+
const raw = await readFile(filePath, "utf-8");
|
|
321
322
|
|
|
323
|
+
// ─── PRIORITY 1: Frontmatter (authoritative) ─────────────
|
|
324
|
+
const parsed = parseFrontmatter(raw);
|
|
325
|
+
if (parsed) {
|
|
326
|
+
// Frontmatter is self-caching (lives in the file), no dirty mark needed.
|
|
322
327
|
return {
|
|
323
328
|
filePath,
|
|
324
329
|
fileName,
|
|
325
330
|
relativePath,
|
|
326
|
-
title,
|
|
327
|
-
keywords:
|
|
331
|
+
title: parsed.title,
|
|
332
|
+
keywords: parsed.keywords,
|
|
328
333
|
content: raw,
|
|
329
334
|
injected: preserved.get(filePath) ?? false,
|
|
330
|
-
keywordSource: "
|
|
335
|
+
keywordSource: "frontmatter",
|
|
331
336
|
};
|
|
332
337
|
}
|
|
333
338
|
|
|
334
|
-
//
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
339
|
+
// ─── PRIORITY 2: Cache (mtime match means content unchanged) ──
|
|
340
|
+
const cachedEntry = this.cache?.files[relativePath];
|
|
341
|
+
if (cachedEntry) {
|
|
342
|
+
// LLM-generated: sentinel mtime never matches a real file
|
|
343
|
+
if (cachedEntry.mtimeMs === LLM_CACHE_SENTINEL) {
|
|
344
|
+
const title = extractTitle(raw, fileName);
|
|
345
|
+
return {
|
|
346
|
+
filePath,
|
|
347
|
+
fileName,
|
|
348
|
+
relativePath,
|
|
349
|
+
title,
|
|
350
|
+
keywords: cachedEntry.keywords,
|
|
351
|
+
content: raw,
|
|
352
|
+
injected: preserved.get(filePath) ?? false,
|
|
353
|
+
keywordSource: "llm",
|
|
354
|
+
};
|
|
355
|
+
}
|
|
356
|
+
// Real mtime match: heuristic or prior LLM-upgrade cache hit
|
|
357
|
+
if (cachedEntry.mtimeMs === fileStat.mtimeMs) {
|
|
358
|
+
const title = extractTitle(raw, fileName);
|
|
359
|
+
return {
|
|
360
|
+
filePath,
|
|
361
|
+
fileName,
|
|
362
|
+
relativePath,
|
|
363
|
+
title,
|
|
364
|
+
keywords: cachedEntry.keywords,
|
|
365
|
+
content: raw,
|
|
366
|
+
injected: preserved.get(filePath) ?? false,
|
|
367
|
+
keywordSource: "cache",
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
}
|
|
338
371
|
|
|
339
|
-
//
|
|
340
|
-
|
|
372
|
+
// ─── PRIORITY 3: Heuristic (free, automatic fallback) ─────────
|
|
373
|
+
if (this.config.autoKeywords) {
|
|
374
|
+
const title = extractTitle(raw, fileName);
|
|
375
|
+
const keywords = generateKeywords(fileName, raw);
|
|
341
376
|
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
377
|
+
// Mark cache dirty (newly generated keywords must be persisted).
|
|
378
|
+
this.dirtyCache.files[relativePath] = {
|
|
379
|
+
mtimeMs: fileStat.mtimeMs,
|
|
380
|
+
keywords,
|
|
381
|
+
};
|
|
345
382
|
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
} else {
|
|
357
|
-
// Step 11: No frontmatter and autoKeywords disabled — skip
|
|
358
|
-
this.notifier.warn(
|
|
359
|
-
`[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
|
|
360
|
-
);
|
|
361
|
-
return null;
|
|
383
|
+
return {
|
|
384
|
+
filePath,
|
|
385
|
+
fileName,
|
|
386
|
+
relativePath,
|
|
387
|
+
title,
|
|
388
|
+
keywords,
|
|
389
|
+
content: raw,
|
|
390
|
+
injected: preserved.get(filePath) ?? false,
|
|
391
|
+
keywordSource: "heuristic",
|
|
392
|
+
};
|
|
362
393
|
}
|
|
363
394
|
|
|
364
|
-
//
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
keywords,
|
|
370
|
-
};
|
|
371
|
-
|
|
372
|
-
return {
|
|
373
|
-
filePath,
|
|
374
|
-
fileName,
|
|
375
|
-
relativePath,
|
|
376
|
-
title,
|
|
377
|
-
keywords,
|
|
378
|
-
content: raw,
|
|
379
|
-
injected: preserved.get(filePath) ?? false,
|
|
380
|
-
keywordSource,
|
|
381
|
-
};
|
|
395
|
+
// ─── PRIORITY 4: Skip ───────────────────────────────────────────
|
|
396
|
+
this.notifier.warn(
|
|
397
|
+
`[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
|
|
398
|
+
);
|
|
399
|
+
return null;
|
|
382
400
|
} catch (err) {
|
|
383
401
|
// Only warn for unexpected errors, not ENOENT (file deleted/moved after scan)
|
|
384
402
|
if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
|
package/types.ts
CHANGED
|
@@ -107,4 +107,16 @@ export const DEFAULT_CONFIG: DocInjectorConfig = {
|
|
|
107
107
|
export const DEFAULT_MATCHER_OPTIONS: MatcherOptions = {
|
|
108
108
|
matchThreshold: DEFAULT_CONFIG.matchThreshold,
|
|
109
109
|
caseSensitive: false,
|
|
110
|
-
};
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
/**
|
|
113
|
+
* Sentinel value used in CacheEntry.mtimeMs to mark entries written by the
|
|
114
|
+
* LLM keyword generator. -1 is chosen because Node.Stats.mtimeMs is documented
|
|
115
|
+
* as a non-negative integer (milliseconds since the Unix Epoch), so a real
|
|
116
|
+
* file can never have mtimeMs === -1. Heuristic-written entries use the real
|
|
117
|
+
* file mtime, which is always >= 0.
|
|
118
|
+
*
|
|
119
|
+
* If you find yourself writing LLM_CACHE_SENTINEL into a real cache entry
|
|
120
|
+
* from a non-LLM code path, that's a bug.
|
|
121
|
+
*/
|
|
122
|
+
export const LLM_CACHE_SENTINEL = -1;
|