pi-doc-injector 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.ts CHANGED
@@ -73,7 +73,7 @@ import { buildKeywordGenPrompt } from "./keyword-llm";
73
73
  import { extractText, KeywordMatcher } from "./matcher";
74
74
  import { ExtensionNotifier, type Notifier } from "./notifier";
75
75
  import { DocRegistry } from "./registry";
76
- import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
76
+ import { DEFAULT_MATCHER_OPTIONS, LLM_CACHE_SENTINEL, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
77
77
  import { registerCommands } from "./commands";
78
78
 
79
79
  export default async function docInjectorExtension(pi: ExtensionAPI) {
@@ -184,7 +184,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
184
184
  continue;
185
185
  }
186
186
  cache.files[item.path] = {
187
- mtimeMs: fileStat.mtimeMs,
187
+ // Use the sentinel — never the real mtime — so the next rebuild
188
+ // surfaces this entry as keywordSource: "llm" instead of "cache".
189
+ mtimeMs: LLM_CACHE_SENTINEL,
188
190
  keywords: item.keywords.map((k) => k.toLowerCase()).slice(0, 20),
189
191
  };
190
192
  saved++;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-doc-injector",
3
- "version": "0.5.1",
3
+ "version": "0.5.2",
4
4
  "description": "Auto-inject relevant project documentation into Pi's LLM context based on keyword matching",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
package/registry.ts CHANGED
@@ -8,7 +8,7 @@
8
8
  import type { Dirent } from "node:fs";
9
9
  import { readdir, readFile, stat } from "node:fs/promises";
10
10
  import { basename, extname, join, relative, resolve } from "node:path";
11
- import type { CacheEntry, DocEntry, DocInjectorConfig, KeywordCache } from "./types";
11
+ import { LLM_CACHE_SENTINEL, type CacheEntry, type DocEntry, type DocInjectorConfig, type KeywordCache } from "./types";
12
12
  import type { Notifier } from "./notifier";
13
13
  import { createGlobFilter } from "./globber";
14
14
  import { generateKeywords } from "./keyword-gen";
@@ -289,20 +289,27 @@ export class DocRegistry {
289
289
  }
290
290
 
291
291
  /**
292
- * Process a single file through the full pipeline.
292
+ * Process a single file through the priority chain.
293
293
  * Returns a DocEntry or null if the file should be skipped.
294
+ *
295
+ * Priority (highest to lowest):
296
+ * 1. Frontmatter (authoritative — explicitly written by the doc author)
297
+ * 2. Cache (perf layer — mtime match means content hasn't changed)
298
+ * 3. Heuristic (free, automatic, local — filename + headings + code symbols)
299
+ * 4. Skip (no frontmatter, no cache, autoKeywords disabled)
300
+ *
301
+ * LLM-generated keywords populate the cache via the `_doc_injector_keywords`
302
+ * tool, so they surface as `keywordSource: "cache"` on the next rebuild
303
+ * (their `mtimeMs` is set to the file's current mtime when written).
294
304
  */
295
305
  private async processFile(
296
306
  { filePath, relativePath, fileName }: ScanResult,
297
307
  preserved: Map<string, boolean>,
298
308
  ): Promise<DocEntry | null> {
299
309
  try {
300
- // ═══ METADATA + CACHE ═══
301
-
302
- // Step 1: Stat the file for size and mtime
310
+ // ─── METADATA ─────────────────────────────────────────────
303
311
  const fileStat = await stat(filePath);
304
312
 
305
- // Step 2: Skip files exceeding maxFileSize
306
313
  if (fileStat.size > this.config.maxFileSize) {
307
314
  this.notifier.warn(
308
315
  `[doc-injector] Skipping ${relativePath}: size ${fileStat.size} > max ${this.config.maxFileSize}`,
@@ -310,75 +317,86 @@ export class DocRegistry {
310
317
  return null;
311
318
  }
312
319
 
313
- const cachedEntry = this.cache?.files[relativePath];
314
-
315
- // Step 6: Cache hit — mtime matches, use cached keywords
316
- if (cachedEntry && cachedEntry.mtimeMs === fileStat.mtimeMs) {
317
- // Still read the file for content and title (needed for injection),
318
- // but skip keyword generation entirely
319
- const raw = await readFile(filePath, "utf-8");
320
- const title = extractTitle(raw, fileName);
320
+ // Read once — needed for frontmatter parse, content, and title.
321
+ const raw = await readFile(filePath, "utf-8");
321
322
 
323
+ // ─── PRIORITY 1: Frontmatter (authoritative) ─────────────
324
+ const parsed = parseFrontmatter(raw);
325
+ if (parsed) {
326
+ // Frontmatter is self-caching (lives in the file), no dirty mark needed.
322
327
  return {
323
328
  filePath,
324
329
  fileName,
325
330
  relativePath,
326
- title,
327
- keywords: cachedEntry.keywords,
331
+ title: parsed.title,
332
+ keywords: parsed.keywords,
328
333
  content: raw,
329
334
  injected: preserved.get(filePath) ?? false,
330
- keywordSource: "cache",
335
+ keywordSource: "frontmatter",
331
336
  };
332
337
  }
333
338
 
334
- // ═══ FULL READ + PARSE (cache miss) ═══
335
-
336
- // Step 7: Read file content
337
- const raw = await readFile(filePath, "utf-8");
339
+ // ─── PRIORITY 2: Cache (mtime match means content unchanged) ──
340
+ const cachedEntry = this.cache?.files[relativePath];
341
+ if (cachedEntry) {
342
+ // LLM-generated: sentinel mtime never matches a real file
343
+ if (cachedEntry.mtimeMs === LLM_CACHE_SENTINEL) {
344
+ const title = extractTitle(raw, fileName);
345
+ return {
346
+ filePath,
347
+ fileName,
348
+ relativePath,
349
+ title,
350
+ keywords: cachedEntry.keywords,
351
+ content: raw,
352
+ injected: preserved.get(filePath) ?? false,
353
+ keywordSource: "llm",
354
+ };
355
+ }
356
+ // Real mtime match: heuristic or prior LLM-upgrade cache hit
357
+ if (cachedEntry.mtimeMs === fileStat.mtimeMs) {
358
+ const title = extractTitle(raw, fileName);
359
+ return {
360
+ filePath,
361
+ fileName,
362
+ relativePath,
363
+ title,
364
+ keywords: cachedEntry.keywords,
365
+ content: raw,
366
+ injected: preserved.get(filePath) ?? false,
367
+ keywordSource: "cache",
368
+ };
369
+ }
370
+ }
338
371
 
339
- // Step 8: Try frontmatter parsing
340
- const parsed = parseFrontmatter(raw);
372
+ // ─── PRIORITY 3: Heuristic (free, automatic fallback) ─────────
373
+ if (this.config.autoKeywords) {
374
+ const title = extractTitle(raw, fileName);
375
+ const keywords = generateKeywords(fileName, raw);
341
376
 
342
- let title: string;
343
- let keywords: string[];
344
- let keywordSource: DocEntry["keywordSource"];
377
+ // Mark cache dirty (newly generated keywords must be persisted).
378
+ this.dirtyCache.files[relativePath] = {
379
+ mtimeMs: fileStat.mtimeMs,
380
+ keywords,
381
+ };
345
382
 
346
- if (parsed) {
347
- // Step 9: Frontmatter found — use its title and keywords
348
- title = parsed.title;
349
- keywords = parsed.keywords;
350
- keywordSource = "frontmatter";
351
- } else if (this.config.autoKeywords) {
352
- // Step 10: No frontmatter, generate keywords heuristically
353
- title = extractTitle(raw, fileName);
354
- keywords = generateKeywords(fileName, raw);
355
- keywordSource = "heuristic";
356
- } else {
357
- // Step 11: No frontmatter and autoKeywords disabled — skip
358
- this.notifier.warn(
359
- `[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
360
- );
361
- return null;
383
+ return {
384
+ filePath,
385
+ fileName,
386
+ relativePath,
387
+ title,
388
+ keywords,
389
+ content: raw,
390
+ injected: preserved.get(filePath) ?? false,
391
+ keywordSource: "heuristic",
392
+ };
362
393
  }
363
394
 
364
- // ═══ CACHE UPDATE ═══
365
-
366
- // Step 12: Mark as dirty (mtime changed or keywords generated)
367
- this.dirtyCache.files[relativePath] = {
368
- mtimeMs: fileStat.mtimeMs,
369
- keywords,
370
- };
371
-
372
- return {
373
- filePath,
374
- fileName,
375
- relativePath,
376
- title,
377
- keywords,
378
- content: raw,
379
- injected: preserved.get(filePath) ?? false,
380
- keywordSource,
381
- };
395
+ // ─── PRIORITY 4: Skip ───────────────────────────────────────────
396
+ this.notifier.warn(
397
+ `[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
398
+ );
399
+ return null;
382
400
  } catch (err) {
383
401
  // Only warn for unexpected errors, not ENOENT (file deleted/moved after scan)
384
402
  if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
package/types.ts CHANGED
@@ -107,4 +107,16 @@ export const DEFAULT_CONFIG: DocInjectorConfig = {
107
107
  export const DEFAULT_MATCHER_OPTIONS: MatcherOptions = {
108
108
  matchThreshold: DEFAULT_CONFIG.matchThreshold,
109
109
  caseSensitive: false,
110
- };
110
+ };
111
+
112
+ /**
113
+ * Sentinel value used in CacheEntry.mtimeMs to mark entries written by the
114
+ * LLM keyword generator. -1 is chosen because Node.Stats.mtimeMs is documented
115
+ * as a non-negative integer (milliseconds since the Unix Epoch), so a real
116
+ * file can never have mtimeMs === -1. Heuristic-written entries use the real
117
+ * file mtime, which is always >= 0.
118
+ *
119
+ * If you find yourself writing LLM_CACHE_SENTINEL into a real cache entry
120
+ * from a non-LLM code path, that's a bug.
121
+ */
122
+ export const LLM_CACHE_SENTINEL = -1;