pi-doc-injector 0.5.1 → 0.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. package/index.ts +4 -2
  2. package/package.json +1 -1
  3. package/registry.ts +106 -62
  4. package/types.ts +13 -1
package/index.ts CHANGED
@@ -73,7 +73,7 @@ import { buildKeywordGenPrompt } from "./keyword-llm";
73
73
  import { extractText, KeywordMatcher } from "./matcher";
74
74
  import { ExtensionNotifier, type Notifier } from "./notifier";
75
75
  import { DocRegistry } from "./registry";
76
- import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
76
+ import { DEFAULT_MATCHER_OPTIONS, LLM_CACHE_SENTINEL, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
77
77
  import { registerCommands } from "./commands";
78
78
 
79
79
  export default async function docInjectorExtension(pi: ExtensionAPI) {
@@ -184,7 +184,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
184
184
  continue;
185
185
  }
186
186
  cache.files[item.path] = {
187
- mtimeMs: fileStat.mtimeMs,
187
+ // Use the sentinel — never the real mtime — so the next rebuild
188
+ // surfaces this entry as keywordSource: "llm" instead of "cache".
189
+ mtimeMs: LLM_CACHE_SENTINEL,
188
190
  keywords: item.keywords.map((k) => k.toLowerCase()).slice(0, 20),
189
191
  };
190
192
  saved++;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "pi-doc-injector",
3
- "version": "0.5.1",
3
+ "version": "0.5.3",
4
4
  "description": "Auto-inject relevant project documentation into Pi's LLM context based on keyword matching",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
package/registry.ts CHANGED
@@ -8,7 +8,7 @@
8
8
  import type { Dirent } from "node:fs";
9
9
  import { readdir, readFile, stat } from "node:fs/promises";
10
10
  import { basename, extname, join, relative, resolve } from "node:path";
11
- import type { CacheEntry, DocEntry, DocInjectorConfig, KeywordCache } from "./types";
11
+ import { LLM_CACHE_SENTINEL, type CacheEntry, type DocEntry, type DocInjectorConfig, type KeywordCache } from "./types";
12
12
  import type { Notifier } from "./notifier";
13
13
  import { createGlobFilter } from "./globber";
14
14
  import { generateKeywords } from "./keyword-gen";
@@ -232,6 +232,12 @@ export class DocRegistry {
232
232
  private cache: KeywordCache | null = null;
233
233
  private dirtyCache: KeywordCache = { version: 1, files: {} };
234
234
  private notifier: Notifier;
235
+ // Per-registry flag: warn about a missing docs folder at most once.
236
+ // rebuild() is called twice at startup (once from session_start, once
237
+ // from resources_discover); without this flag the user sees the
238
+ // same warning twice. Not reset across rebuilds — a missing folder
239
+ // is a persistent condition, not a transient one.
240
+ private warnedMissingDocs = false;
235
241
 
236
242
  private constructor(
237
243
  docsPath: string,
@@ -268,6 +274,21 @@ export class DocRegistry {
268
274
  // Start with a fresh dirty cache — only files that changed get added
269
275
  this.dirtyCache = { version: 1, files: {} };
270
276
 
277
+ // Pre-check folder existence. The previous catch-all "Docs folder not
278
+ // found" warning was misleading (it also fired for scan errors) and was
279
+ // emitted twice at startup (once from session_start, once from
280
+ // resources_discover). The warnedMissingDocs flag deduplicates across
281
+ // rebuilds for the lifetime of this registry.
282
+ const folderStat = await stat(resolved).catch(() => null);
283
+ if (!folderStat || !folderStat.isDirectory()) {
284
+ if (!this.warnedMissingDocs) {
285
+ this.notifier.warn(`[doc-injector] Docs folder not found: ${resolved}`);
286
+ this.warnedMissingDocs = true;
287
+ }
288
+ this.entries = [];
289
+ return;
290
+ }
291
+
271
292
  try {
272
293
  const scanResults = this.config.recursive
273
294
  ? await this.scanRecursive(resolved)
@@ -282,27 +303,39 @@ export class DocRegistry {
282
303
 
283
304
  const results = await pool.all(tasks);
284
305
  this.entries = results.filter((e): e is DocEntry => e !== null);
285
- } catch {
286
- this.notifier.warn(`[doc-injector] Docs folder not found: ${resolved}`);
306
+ } catch (err) {
307
+ // This catch now only fires for actual scan errors (not folder-missing).
308
+ this.notifier.warn(
309
+ `[doc-injector] Error scanning docs folder ${resolved}: ${
310
+ err instanceof Error ? err.message : String(err)
311
+ }`,
312
+ );
287
313
  this.entries = [];
288
314
  }
289
315
  }
290
316
 
291
317
  /**
292
- * Process a single file through the full pipeline.
318
+ * Process a single file through the priority chain.
293
319
  * Returns a DocEntry or null if the file should be skipped.
320
+ *
321
+ * Priority (highest to lowest):
322
+ * 1. Frontmatter (authoritative — explicitly written by the doc author)
323
+ * 2. Cache (perf layer — mtime match means content hasn't changed)
324
+ * 3. Heuristic (free, automatic, local — filename + headings + code symbols)
325
+ * 4. Skip (no frontmatter, no cache, autoKeywords disabled)
326
+ *
327
+ * LLM-generated keywords populate the cache via the `_doc_injector_keywords`
328
+ * tool, so they surface as `keywordSource: "cache"` on the next rebuild
329
+ * (their `mtimeMs` is set to the file's current mtime when written).
294
330
  */
295
331
  private async processFile(
296
332
  { filePath, relativePath, fileName }: ScanResult,
297
333
  preserved: Map<string, boolean>,
298
334
  ): Promise<DocEntry | null> {
299
335
  try {
300
- // ═══ METADATA + CACHE ═══
301
-
302
- // Step 1: Stat the file for size and mtime
336
+ // ─── METADATA ─────────────────────────────────────────────
303
337
  const fileStat = await stat(filePath);
304
338
 
305
- // Step 2: Skip files exceeding maxFileSize
306
339
  if (fileStat.size > this.config.maxFileSize) {
307
340
  this.notifier.warn(
308
341
  `[doc-injector] Skipping ${relativePath}: size ${fileStat.size} > max ${this.config.maxFileSize}`,
@@ -310,75 +343,86 @@ export class DocRegistry {
310
343
  return null;
311
344
  }
312
345
 
313
- const cachedEntry = this.cache?.files[relativePath];
314
-
315
- // Step 6: Cache hit — mtime matches, use cached keywords
316
- if (cachedEntry && cachedEntry.mtimeMs === fileStat.mtimeMs) {
317
- // Still read the file for content and title (needed for injection),
318
- // but skip keyword generation entirely
319
- const raw = await readFile(filePath, "utf-8");
320
- const title = extractTitle(raw, fileName);
346
+ // Read once — needed for frontmatter parse, content, and title.
347
+ const raw = await readFile(filePath, "utf-8");
321
348
 
349
+ // ─── PRIORITY 1: Frontmatter (authoritative) ─────────────
350
+ const parsed = parseFrontmatter(raw);
351
+ if (parsed) {
352
+ // Frontmatter is self-caching (lives in the file), no dirty mark needed.
322
353
  return {
323
354
  filePath,
324
355
  fileName,
325
356
  relativePath,
326
- title,
327
- keywords: cachedEntry.keywords,
357
+ title: parsed.title,
358
+ keywords: parsed.keywords,
328
359
  content: raw,
329
360
  injected: preserved.get(filePath) ?? false,
330
- keywordSource: "cache",
361
+ keywordSource: "frontmatter",
331
362
  };
332
363
  }
333
364
 
334
- // ═══ FULL READ + PARSE (cache miss) ═══
335
-
336
- // Step 7: Read file content
337
- const raw = await readFile(filePath, "utf-8");
365
+ // ─── PRIORITY 2: Cache (mtime match means content unchanged) ──
366
+ const cachedEntry = this.cache?.files[relativePath];
367
+ if (cachedEntry) {
368
+ // LLM-generated: sentinel mtime never matches a real file
369
+ if (cachedEntry.mtimeMs === LLM_CACHE_SENTINEL) {
370
+ const title = extractTitle(raw, fileName);
371
+ return {
372
+ filePath,
373
+ fileName,
374
+ relativePath,
375
+ title,
376
+ keywords: cachedEntry.keywords,
377
+ content: raw,
378
+ injected: preserved.get(filePath) ?? false,
379
+ keywordSource: "llm",
380
+ };
381
+ }
382
+ // Real mtime match: heuristic or prior LLM-upgrade cache hit
383
+ if (cachedEntry.mtimeMs === fileStat.mtimeMs) {
384
+ const title = extractTitle(raw, fileName);
385
+ return {
386
+ filePath,
387
+ fileName,
388
+ relativePath,
389
+ title,
390
+ keywords: cachedEntry.keywords,
391
+ content: raw,
392
+ injected: preserved.get(filePath) ?? false,
393
+ keywordSource: "cache",
394
+ };
395
+ }
396
+ }
338
397
 
339
- // Step 8: Try frontmatter parsing
340
- const parsed = parseFrontmatter(raw);
398
+ // ─── PRIORITY 3: Heuristic (free, automatic fallback) ─────────
399
+ if (this.config.autoKeywords) {
400
+ const title = extractTitle(raw, fileName);
401
+ const keywords = generateKeywords(fileName, raw);
341
402
 
342
- let title: string;
343
- let keywords: string[];
344
- let keywordSource: DocEntry["keywordSource"];
403
+ // Mark cache dirty (newly generated keywords must be persisted).
404
+ this.dirtyCache.files[relativePath] = {
405
+ mtimeMs: fileStat.mtimeMs,
406
+ keywords,
407
+ };
345
408
 
346
- if (parsed) {
347
- // Step 9: Frontmatter found — use its title and keywords
348
- title = parsed.title;
349
- keywords = parsed.keywords;
350
- keywordSource = "frontmatter";
351
- } else if (this.config.autoKeywords) {
352
- // Step 10: No frontmatter, generate keywords heuristically
353
- title = extractTitle(raw, fileName);
354
- keywords = generateKeywords(fileName, raw);
355
- keywordSource = "heuristic";
356
- } else {
357
- // Step 11: No frontmatter and autoKeywords disabled — skip
358
- this.notifier.warn(
359
- `[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
360
- );
361
- return null;
409
+ return {
410
+ filePath,
411
+ fileName,
412
+ relativePath,
413
+ title,
414
+ keywords,
415
+ content: raw,
416
+ injected: preserved.get(filePath) ?? false,
417
+ keywordSource: "heuristic",
418
+ };
362
419
  }
363
420
 
364
- // ═══ CACHE UPDATE ═══
365
-
366
- // Step 12: Mark as dirty (mtime changed or keywords generated)
367
- this.dirtyCache.files[relativePath] = {
368
- mtimeMs: fileStat.mtimeMs,
369
- keywords,
370
- };
371
-
372
- return {
373
- filePath,
374
- fileName,
375
- relativePath,
376
- title,
377
- keywords,
378
- content: raw,
379
- injected: preserved.get(filePath) ?? false,
380
- keywordSource,
381
- };
421
+ // ─── PRIORITY 4: Skip ───────────────────────────────────────────
422
+ this.notifier.warn(
423
+ `[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
424
+ );
425
+ return null;
382
426
  } catch (err) {
383
427
  // Only warn for unexpected errors, not ENOENT (file deleted/moved after scan)
384
428
  if ((err as NodeJS.ErrnoException).code !== "ENOENT") {
package/types.ts CHANGED
@@ -107,4 +107,16 @@ export const DEFAULT_CONFIG: DocInjectorConfig = {
107
107
  export const DEFAULT_MATCHER_OPTIONS: MatcherOptions = {
108
108
  matchThreshold: DEFAULT_CONFIG.matchThreshold,
109
109
  caseSensitive: false,
110
- };
110
+ };
111
+
112
+ /**
113
+ * Sentinel value used in CacheEntry.mtimeMs to mark entries written by the
114
+ * LLM keyword generator. -1 is chosen because Node.Stats.mtimeMs is documented
115
+ * as a non-negative integer (milliseconds since the Unix Epoch), so a real
116
+ * file can never have mtimeMs === -1. Heuristic-written entries use the real
117
+ * file mtime, which is always >= 0.
118
+ *
119
+ * If you find yourself writing LLM_CACHE_SENTINEL into a real cache entry
120
+ * from a non-LLM code path, that's a bug.
121
+ */
122
+ export const LLM_CACHE_SENTINEL = -1;