npm - pi-doc-injector - Versions diffs - 0.5.1 → 0.5.2 - Mend

pi-doc-injector 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/index.ts CHANGED Viewed

@@ -73,7 +73,7 @@ import { buildKeywordGenPrompt } from "./keyword-llm";
 import { extractText, KeywordMatcher } from "./matcher";
 import { ExtensionNotifier, type Notifier } from "./notifier";
 import { DocRegistry } from "./registry";
-import { DEFAULT_MATCHER_OPTIONS, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
+import { DEFAULT_MATCHER_OPTIONS, LLM_CACHE_SENTINEL, type DocEntry, type MatchResult, type KeywordCache, type CacheEntry } from "./types";
 import { registerCommands } from "./commands";
 export default async function docInjectorExtension(pi: ExtensionAPI) {
@@ -184,7 +184,9 @@ export default async function docInjectorExtension(pi: ExtensionAPI) {
           continue;
         }
         cache.files[item.path] = {
-          mtimeMs: fileStat.mtimeMs,
+          // Use the sentinel — never the real mtime — so the next rebuild
+          // surfaces this entry as keywordSource: "llm" instead of "cache".
+          mtimeMs: LLM_CACHE_SENTINEL,
           keywords: item.keywords.map((k) => k.toLowerCase()).slice(0, 20),
         };
         saved++;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "pi-doc-injector",
-  "version": "0.5.1",
+  "version": "0.5.2",
   "description": "Auto-inject relevant project documentation into Pi's LLM context based on keyword matching",
   "type": "module",
   "main": "./index.ts",

package/registry.ts CHANGED Viewed

@@ -8,7 +8,7 @@
 import type { Dirent } from "node:fs";
 import { readdir, readFile, stat } from "node:fs/promises";
 import { basename, extname, join, relative, resolve } from "node:path";
-import type { CacheEntry, DocEntry, DocInjectorConfig, KeywordCache } from "./types";
+import { LLM_CACHE_SENTINEL, type CacheEntry, type DocEntry, type DocInjectorConfig, type KeywordCache } from "./types";
 import type { Notifier } from "./notifier";
 import { createGlobFilter } from "./globber";
 import { generateKeywords } from "./keyword-gen";
@@ -289,20 +289,27 @@ export class DocRegistry {
   }
   /**
-   * Process a single file through the full pipeline.
+   * Process a single file through the priority chain.
    * Returns a DocEntry or null if the file should be skipped.
+   *
+   * Priority (highest to lowest):
+   *   1. Frontmatter (authoritative — explicitly written by the doc author)
+   *   2. Cache (perf layer — mtime match means content hasn't changed)
+   *   3. Heuristic (free, automatic, local — filename + headings + code symbols)
+   *   4. Skip (no frontmatter, no cache, autoKeywords disabled)
+   *
+   * LLM-generated keywords populate the cache via the `_doc_injector_keywords`
+   * tool, so they surface as `keywordSource: "cache"` on the next rebuild
+   * (their `mtimeMs` is set to the file's current mtime when written).
    */
   private async processFile(
     { filePath, relativePath, fileName }: ScanResult,
     preserved: Map<string, boolean>,
   ): Promise<DocEntry | null> {
     try {
-      // ═══ METADATA + CACHE ═══
-      // Step 1: Stat the file for size and mtime
+      // ─── METADATA ─────────────────────────────────────────────
       const fileStat = await stat(filePath);
-      // Step 2: Skip files exceeding maxFileSize
       if (fileStat.size > this.config.maxFileSize) {
         this.notifier.warn(
           `[doc-injector] Skipping ${relativePath}: size ${fileStat.size} > max ${this.config.maxFileSize}`,
@@ -310,75 +317,86 @@ export class DocRegistry {
         return null;
       }
-      const cachedEntry = this.cache?.files[relativePath];
-      // Step 6: Cache hit — mtime matches, use cached keywords
-      if (cachedEntry && cachedEntry.mtimeMs === fileStat.mtimeMs) {
-        // Still read the file for content and title (needed for injection),
-        // but skip keyword generation entirely
-        const raw = await readFile(filePath, "utf-8");
-        const title = extractTitle(raw, fileName);
+      // Read once — needed for frontmatter parse, content, and title.
+      const raw = await readFile(filePath, "utf-8");
+      // ─── PRIORITY 1: Frontmatter (authoritative) ─────────────
+      const parsed = parseFrontmatter(raw);
+      if (parsed) {
+        // Frontmatter is self-caching (lives in the file), no dirty mark needed.
         return {
           filePath,
           fileName,
           relativePath,
-          title,
-          keywords: cachedEntry.keywords,
+          title: parsed.title,
+          keywords: parsed.keywords,
           content: raw,
           injected: preserved.get(filePath) ?? false,
-          keywordSource: "cache",
+          keywordSource: "frontmatter",
         };
       }
-      // ═══ FULL READ + PARSE (cache miss) ═══
-      // Step 7: Read file content
-      const raw = await readFile(filePath, "utf-8");
+      // ─── PRIORITY 2: Cache (mtime match means content unchanged) ──
+      const cachedEntry = this.cache?.files[relativePath];
+      if (cachedEntry) {
+        // LLM-generated: sentinel mtime never matches a real file
+        if (cachedEntry.mtimeMs === LLM_CACHE_SENTINEL) {
+          const title = extractTitle(raw, fileName);
+          return {
+            filePath,
+            fileName,
+            relativePath,
+            title,
+            keywords: cachedEntry.keywords,
+            content: raw,
+            injected: preserved.get(filePath) ?? false,
+            keywordSource: "llm",
+          };
+        }
+        // Real mtime match: heuristic or prior LLM-upgrade cache hit
+        if (cachedEntry.mtimeMs === fileStat.mtimeMs) {
+          const title = extractTitle(raw, fileName);
+          return {
+            filePath,
+            fileName,
+            relativePath,
+            title,
+            keywords: cachedEntry.keywords,
+            content: raw,
+            injected: preserved.get(filePath) ?? false,
+            keywordSource: "cache",
+          };
+        }
+      }
-      // Step 8: Try frontmatter parsing
-      const parsed = parseFrontmatter(raw);
+      // ─── PRIORITY 3: Heuristic (free, automatic fallback) ─────────
+      if (this.config.autoKeywords) {
+        const title = extractTitle(raw, fileName);
+        const keywords = generateKeywords(fileName, raw);
-      let title: string;
-      let keywords: string[];
-      let keywordSource: DocEntry["keywordSource"];
+        // Mark cache dirty (newly generated keywords must be persisted).
+        this.dirtyCache.files[relativePath] = {
+          mtimeMs: fileStat.mtimeMs,
+          keywords,
+        };
-      if (parsed) {
-        // Step 9: Frontmatter found — use its title and keywords
-        title = parsed.title;
-        keywords = parsed.keywords;
-        keywordSource = "frontmatter";
-      } else if (this.config.autoKeywords) {
-        // Step 10: No frontmatter, generate keywords heuristically
-        title = extractTitle(raw, fileName);
-        keywords = generateKeywords(fileName, raw);
-        keywordSource = "heuristic";
-      } else {
-        // Step 11: No frontmatter and autoKeywords disabled — skip
-        this.notifier.warn(
-          `[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
-        );
-        return null;
+        return {
+          filePath,
+          fileName,
+          relativePath,
+          title,
+          keywords,
+          content: raw,
+          injected: preserved.get(filePath) ?? false,
+          keywordSource: "heuristic",
+        };
       }
-      // ═══ CACHE UPDATE ═══
-      // Step 12: Mark as dirty (mtime changed or keywords generated)
-      this.dirtyCache.files[relativePath] = {
-        mtimeMs: fileStat.mtimeMs,
-        keywords,
-      };
-      return {
-        filePath,
-        fileName,
-        relativePath,
-        title,
-        keywords,
-        content: raw,
-        injected: preserved.get(filePath) ?? false,
-        keywordSource,
-      };
+      // ─── PRIORITY 4: Skip ───────────────────────────────────────────
+      this.notifier.warn(
+        `[doc-injector] Skipping ${relativePath}: no valid frontmatter with keywords`,
+      );
+      return null;
     } catch (err) {
       // Only warn for unexpected errors, not ENOENT (file deleted/moved after scan)
       if ((err as NodeJS.ErrnoException).code !== "ENOENT") {

package/types.ts CHANGED Viewed

@@ -107,4 +107,16 @@ export const DEFAULT_CONFIG: DocInjectorConfig = {
 export const DEFAULT_MATCHER_OPTIONS: MatcherOptions = {
   matchThreshold: DEFAULT_CONFIG.matchThreshold,
   caseSensitive: false,
-};
+};
+/**
+ * Sentinel value used in CacheEntry.mtimeMs to mark entries written by the
+ * LLM keyword generator. -1 is chosen because Node.Stats.mtimeMs is documented
+ * as a non-negative integer (milliseconds since the Unix Epoch), so a real
+ * file can never have mtimeMs === -1. Heuristic-written entries use the real
+ * file mtime, which is always >= 0.
+ *
+ * If you find yourself writing LLM_CACHE_SENTINEL into a real cache entry
+ * from a non-LLM code path, that's a bug.
+ */
+export const LLM_CACHE_SENTINEL = -1;