npm - pi-local-rag - Versions diffs - 0.1.0 → 0.2.0 - Mend

pi-local-rag 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,16 @@
 # Changelog
+## 0.2.0
+- **Hybrid RAG**: BM25 + local vector embeddings via `@xenova/transformers` (Transformers.js)
+- **Auto-injection**: `before_agent_start` hook injects relevant chunks into every LLM prompt
+- **Embedding model**: `Xenova/all-MiniLM-L6-v2` (384-dim, ~23MB, downloads once, runs fully offline)
+- **Score transparency**: search results now show `bm25`, `vector`, and `hybrid` scores
+- **`/lens rag on|off`**: toggle auto-injection at runtime
+- **`/lens status`**: now shows vector coverage %
+- **Config file**: `~/.pi/lens/config.json` for `ragEnabled`, `ragTopK`, `ragScoreThreshold`, `ragAlpha`
+- Bumped to `dependencies` for `@xenova/transformers`
 ## 0.1.0
 - Initial release

package/index.ts CHANGED Viewed

@@ -1,16 +1,16 @@
 /**
- * pi-local-rag — Local RAG Pipeline
- *
- * Index local files → chunk → store → retrieve. AI consults YOUR knowledge before hallucinating.
- * Zero cloud dependency. Embeddings via Ollama (local) or keyword fallback.
- *
- * /lens index <path>     → index a file or directory
- * /lens search <query>   → search indexed content
+ * pi-local-rag — Hybrid RAG Pipeline (BM25 + Vector + Auto-injection)
+ *
+ * Index local files → chunk → embed → store → retrieve → inject into LLM context.
+ * Uses Transformers.js (ONNX) for local embeddings — zero cloud dependency.
+ *
+ * /lens index <path>     → index + embed a file or directory
+ * /lens search <query>   → hybrid search (BM25 + vector)
  * /lens status           → show index stats
  * /lens rebuild          → rebuild entire index
  * /lens clear            → clear index
- * /lens context <query>  → generate context.md snippet for injection
- *
+ * /lens rag on|off       → toggle auto-injection
+ *
  * Tools: lens_index, lens_query, lens_status
  */
 import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
@@ -20,10 +20,17 @@ import { join, extname, basename } from "node:path";
 import { homedir } from "node:os";
 import { createHash } from "node:crypto";
+// ─── Constants ───────────────────────────────────────────────────────────────
 const RAG_DIR = join(homedir(), ".pi", "lens");
 const INDEX_FILE = join(RAG_DIR, "index.json");
+const CONFIG_FILE = join(RAG_DIR, "config.json");
 const RST = "\x1b[0m", B = "\x1b[1m", D = "\x1b[2m";
-const GREEN = "\x1b[32m", YELLOW = "\x1b[33m", CYAN = "\x1b[36m", RED = "\x1b[31m";
+const GREEN = "\x1b[32m", YELLOW = "\x1b[33m", CYAN = "\x1b[36m", RED = "\x1b[31m", MAGENTA = "\x1b[35m";
+const EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
+const VECTOR_DIM = 384;
 const TEXT_EXTS = new Set([
   ".md", ".txt", ".ts", ".js", ".py", ".rs", ".go", ".java", ".c", ".cpp", ".h",
@@ -31,7 +38,11 @@ const TEXT_EXTS = new Set([
   ".sql", ".graphql", ".proto", ".env", ".gitignore", ".dockerfile",
 ]);
-const SKIP_DIRS = new Set(["node_modules", ".git", ".next", "dist", "build", "__pycache__", ".venv", "venv", ".cache"]);
+const SKIP_DIRS = new Set([
+  "node_modules", ".git", ".next", "dist", "build", "__pycache__", ".venv", "venv", ".cache",
+]);
+// ─── Types ───────────────────────────────────────────────────────────────────
 interface Chunk {
   id: string;
@@ -42,14 +53,44 @@ interface Chunk {
   hash: string;
   indexed: string;
   tokens: number;
+  vector?: number[]; // 384-dim embedding, present after embed step
 }
 interface IndexMeta {
   chunks: Chunk[];
-  files: Record<string, { hash: string; chunks: number; indexed: string; size: number }>;
+  files: Record<string, { hash: string; chunks: number; indexed: string; size: number; embedded?: boolean }>;
   lastBuild: string;
+  embeddingModel?: string;
+}
+interface RagConfig {
+  ragEnabled: boolean;
+  ragTopK: number;
+  ragScoreThreshold: number;
+  ragAlpha: number; // 0 = pure vector, 1 = pure BM25
+}
+// ─── Config ──────────────────────────────────────────────────────────────────
+function loadConfig(): RagConfig {
+  ensureDir();
+  if (!existsSync(CONFIG_FILE)) return defaultConfig();
+  try {
+    return { ...defaultConfig(), ...JSON.parse(readFileSync(CONFIG_FILE, "utf-8")) };
+  } catch { return defaultConfig(); }
+}
+function defaultConfig(): RagConfig {
+  return { ragEnabled: true, ragTopK: 5, ragScoreThreshold: 0.1, ragAlpha: 0.4 };
+}
+function saveConfig(config: RagConfig) {
+  ensureDir();
+  writeFileSync(CONFIG_FILE, JSON.stringify(config, null, 2));
 }
+// ─── Index I/O ───────────────────────────────────────────────────────────────
 function ensureDir() {
   if (!existsSync(RAG_DIR)) mkdirSync(RAG_DIR, { recursive: true });
 }
@@ -63,6 +104,7 @@ function loadIndex(): IndexMeta {
       chunks: Array.isArray(data.chunks) ? data.chunks : [],
       files: data.files && typeof data.files === "object" ? data.files : {},
       lastBuild: data.lastBuild ?? "",
+      embeddingModel: data.embeddingModel,
     };
   } catch { return { chunks: [], files: {}, lastBuild: "" }; }
 }
@@ -76,13 +118,61 @@ function sha256(data: string): string {
   return createHash("sha256").update(data).digest("hex").slice(0, 12);
 }
+// ─── Embeddings ──────────────────────────────────────────────────────────────
+let _pipeline: any = null;
+async function getEmbedder() {
+  if (_pipeline) return _pipeline;
+  const { pipeline } = await import("@xenova/transformers");
+  _pipeline = await pipeline("feature-extraction", EMBEDDING_MODEL);
+  return _pipeline;
+}
+async function embed(text: string): Promise<number[]> {
+  const embedder = await getEmbedder();
+  const output = await embedder(text, { pooling: "mean", normalize: true });
+  return Array.from(output.data as Float32Array);
+}
+async function embedBatch(texts: string[], onProgress?: (i: number, total: number) => void): Promise<number[][]> {
+  const results: number[][] = [];
+  for (let i = 0; i < texts.length; i++) {
+    results.push(await embed(texts[i]));
+    onProgress?.(i + 1, texts.length);
+  }
+  return results;
+}
+// ─── Math ────────────────────────────────────────────────────────────────────
+function cosineSimilarity(a: number[], b: number[]): number {
+  if (a.length !== b.length) return 0;
+  let dot = 0, normA = 0, normB = 0;
+  for (let i = 0; i < a.length; i++) {
+    dot += a[i] * b[i];
+    normA += a[i] * a[i];
+    normB += b[i] * b[i];
+  }
+  const denom = Math.sqrt(normA) * Math.sqrt(normB);
+  return denom === 0 ? 0 : dot / denom;
+}
+function normalize(scores: number[]): number[] {
+  const max = Math.max(...scores);
+  const min = Math.min(...scores);
+  const range = max - min;
+  if (range === 0) return scores.map(() => 0);
+  return scores.map(s => (s - min) / range);
+}
+// ─── Chunking & File Collection ──────────────────────────────────────────────
 function chunkText(text: string, maxLines = 50): { content: string; lineStart: number; lineEnd: number }[] {
   const lines = text.split("\n");
   const chunks: { content: string; lineStart: number; lineEnd: number }[] = [];
   let i = 0;
   while (i < lines.length) {
-    // Try to break at a natural blank-line boundary near the end of the window
     let end = Math.min(i + maxLines, lines.length);
     for (let j = end - 1; j > i + 10 && j > end - 15; j--) {
       if (lines[j]?.trim() === "") { end = j + 1; break; }
@@ -91,7 +181,7 @@ function chunkText(text: string, maxLines = 50): { content: string; lineStart: n
     if (chunk.trim().length > 20) {
       chunks.push({ content: chunk, lineStart: i + 1, lineEnd: end });
     }
-    i = end; // advance past this chunk; no off-by-one with += maxLines
+    i = end;
   }
   return chunks;
 }
@@ -104,26 +194,20 @@ function collectFiles(dirPath: string, maxFiles = 500): string[] {
       for (const entry of readdirSync(dir, { withFileTypes: true })) {
         if (files.length >= maxFiles) return;
         if (entry.isDirectory()) {
-          if (!SKIP_DIRS.has(entry.name) && !entry.name.startsWith(".")) {
-            walk(join(dir, entry.name));
-          }
+          if (!SKIP_DIRS.has(entry.name) && !entry.name.startsWith(".")) walk(join(dir, entry.name));
         } else if (TEXT_EXTS.has(extname(entry.name).toLowerCase())) {
           const fp = join(dir, entry.name);
           try {
-            const stat = statSync(fp);
-            if (stat.size < 500_000) files.push(fp); // Skip files > 500KB
+            if (statSync(fp).size < 500_000) files.push(fp);
           } catch {}
         }
       }
     } catch {}
   }
   try {
     const stat = statSync(dirPath);
-    // Single file: apply the same extension + size guards as the directory walker
     if (stat.isFile()) {
-      if (!TEXT_EXTS.has(extname(dirPath).toLowerCase())) return [];
-      if (stat.size >= 500_000) return [];
+      if (!TEXT_EXTS.has(extname(dirPath).toLowerCase()) || stat.size >= 500_000) return [];
       return [dirPath];
     }
   } catch { return []; }
@@ -131,24 +215,31 @@ function collectFiles(dirPath: string, maxFiles = 500): string[] {
   return files;
 }
-function indexFiles(paths: string[]): { indexed: number; chunks: number; skipped: number } {
+// ─── Indexing ─────────────────────────────────────────────────────────────────
+async function indexFiles(
+  paths: string[],
+  onProgress?: (msg: string) => void
+): Promise<{ indexed: number; chunks: number; skipped: number }> {
   const index = loadIndex();
   let indexed = 0, chunked = 0, skipped = 0;
   for (const fp of paths) {
     try {
       const content = readFileSync(fp, "utf-8");
       const hash = sha256(content);
-      // Skip if unchanged
-      if (index.files[fp]?.hash === hash) { skipped++; continue; }
-      // Remove old chunks for this file
+      if (index.files[fp]?.hash === hash && index.files[fp]?.embedded) { skipped++; continue; }
       index.chunks = index.chunks.filter(c => c.file !== fp);
-      // Chunk and add
-      const chunks = chunkText(content);
-      for (const chunk of chunks) {
+      const rawChunks = chunkText(content);
+      onProgress?.(`Embedding ${basename(fp)} (${rawChunks.length} chunks)...`);
+      const vectors = await embedBatch(rawChunks.map(c => c.content));
+      for (let i = 0; i < rawChunks.length; i++) {
+        const chunk = rawChunks[i];
         index.chunks.push({
           id: `${sha256(fp)}-${chunk.lineStart}`,
           file: fp,
@@ -158,137 +249,222 @@ function indexFiles(paths: string[]): { indexed: number; chunks: number; skipped
           hash: sha256(chunk.content),
           indexed: new Date().toISOString(),
           tokens: Math.ceil(chunk.content.length / 4),
+          vector: vectors[i],
         });
         chunked++;
       }
-      index.files[fp] = { hash, chunks: chunks.length, indexed: new Date().toISOString(), size: content.length };
+      index.files[fp] = { hash, chunks: rawChunks.length, indexed: new Date().toISOString(), size: content.length, embedded: true };
       indexed++;
-    } catch { skipped++; }
+    } catch (e) { skipped++; }
   }
   index.lastBuild = new Date().toISOString();
+  index.embeddingModel = EMBEDDING_MODEL;
   saveIndex(index);
   return { indexed, chunks: chunked, skipped };
 }
-// BM25-style keyword search (no embeddings needed)
-function searchChunks(query: string, index: IndexMeta, limit = 10): Chunk[] {
-  const terms = query.toLowerCase().split(/\s+/).filter(t => t.length > 1);
-  if (!terms.length) return [];
+// ─── Search ───────────────────────────────────────────────────────────────────
+interface ScoredChunk {
+  chunk: Chunk;
+  bm25: number;
+  vector: number;
+  hybrid: number;
+}
+async function hybridSearch(
+  query: string,
+  index: IndexMeta,
+  limit = 10,
+  alpha = 0.4
+): Promise<ScoredChunk[]> {
+  if (!index.chunks.length) return [];
-  // Pre-compute IDF per term once (avoids O(n²) re-scan inside the map)
+  // ── BM25 ──
+  const terms = query.toLowerCase().split(/\s+/).filter(t => t.length > 1);
+  const queryLower = query.toLowerCase();
   const idfMap = new Map<string, number>();
   for (const term of terms) {
     const docsWithTerm = index.chunks.filter(c => c.content.toLowerCase().includes(term)).length;
     idfMap.set(term, Math.log(1 + index.chunks.length / (1 + docsWithTerm)));
   }
-  const queryLower = query.toLowerCase();
-  const scored = index.chunks.map(chunk => {
+  const bm25Raw = index.chunks.map(chunk => {
     const lower = chunk.content.toLowerCase();
     let score = 0;
     for (const term of terms) {
-      const count = (lower.match(new RegExp(term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), "g")) || []).length;
-      if (count > 0) {
-        const tf = Math.log(1 + count);
-        score += tf * idfMap.get(term)!;
-      }
+      const count = (lower.match(new RegExp(term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g")) || []).length;
+      if (count > 0) score += Math.log(1 + count) * (idfMap.get(term) ?? 0);
     }
-    // Boost for exact phrase match
     if (lower.includes(queryLower)) score *= 2;
-    // Boost for filename match
-    if (chunk.file.toLowerCase().includes(terms[0])) score *= 1.5;
-    return { chunk, score };
+    if (chunk.file.toLowerCase().includes(terms[0] ?? "")) score *= 1.5;
+    return score;
   });
+  const bm25Norm = normalize(bm25Raw);
+  // ── Vector ──
+  const chunksWithVectors = index.chunks.filter(c => c.vector && c.vector.length === VECTOR_DIM);
+  const hasVectors = chunksWithVectors.length > 0;
+  let vectorNorm: number[] = new Array(index.chunks.length).fill(0);
+  if (hasVectors) {
+    const queryVec = await embed(query);
+    const vectorRaw = index.chunks.map(chunk =>
+      chunk.vector && chunk.vector.length === VECTOR_DIM
+        ? cosineSimilarity(queryVec, chunk.vector)
+        : 0
+    );
+    vectorNorm = normalize(vectorRaw);
+  }
+  // ── Hybrid ──
+  const scored: ScoredChunk[] = index.chunks.map((chunk, i) => ({
+    chunk,
+    bm25: bm25Norm[i],
+    vector: vectorNorm[i],
+    hybrid: hasVectors
+      ? alpha * bm25Norm[i] + (1 - alpha) * vectorNorm[i]
+      : bm25Norm[i],
+  }));
   return scored
-    .filter(s => s.score > 0)
-    .sort((a, b) => b.score - a.score)
-    .slice(0, limit)
-    .map(s => s.chunk);
+    .filter(s => s.hybrid > 0)
+    .sort((a, b) => b.hybrid - a.hybrid)
+    .slice(0, limit);
 }
+// ─── Extension ────────────────────────────────────────────────────────────────
 export default function (pi: ExtensionAPI) {
   ensureDir();
+  // ── Auto-inject RAG context before every agent turn ──
+  pi.on("before_agent_start", async (event, _ctx) => {
+    const config = loadConfig();
+    if (!config.ragEnabled) return;
+    const index = loadIndex();
+    if (!index.chunks.length) return;
+    const results = await hybridSearch(event.prompt, index, config.ragTopK, config.ragAlpha);
+    const relevant = results.filter(r => r.hybrid >= config.ragScoreThreshold);
+    if (!relevant.length) return;
+    const context = relevant.map(r =>
+      `### ${basename(r.chunk.file)} (lines ${r.chunk.lineStart}-${r.chunk.lineEnd})\n` +
+      `\`\`\`\n${r.chunk.content.slice(0, 600)}\n\`\`\``
+    ).join("\n\n");
+    return {
+      systemPrompt: event.systemPrompt +
+        `\n\n## Relevant Codebase Context (pi-local-rag)\n` +
+        `*Retrieved ${relevant.length} chunks via hybrid search (BM25 + vector)*\n\n` +
+        context,
+    };
+  });
+  // ── /lens command ──
   pi.registerCommand("lens", {
-    description: "pi-local-rag pipeline: /lens index|search|status|rebuild|clear|context",
+    description: "pi-local-rag: /lens index|search|status|rebuild|clear|rag",
     handler: async (args, ctx) => {
       const parts = (args || "").trim().split(/\s+/);
       const cmd = parts[0] || "status";
+      // ── index ──
       if (cmd === "index") {
         const path = parts[1] || ".";
         if (!existsSync(path)) return `${RED}Path not found:${RST} ${path}`;
         const files = collectFiles(path);
         if (!files.length) return `${YELLOW}No indexable files found in:${RST} ${path}`;
-        const result = indexFiles(files);
-        return `${GREEN}✅ Indexed:${RST} ${result.indexed} files, ${result.chunks} chunks (${result.skipped} skipped/unchanged)`;
+        ctx.ui.notify(`Indexing ${files.length} files...`, "info");
+        const result = await indexFiles(files, msg => ctx.ui.notify(msg, "info"));
+        return `${GREEN}✅ Indexed:${RST} ${result.indexed} files, ${result.chunks} chunks, ${result.skipped} unchanged\n` +
+          `${D}Embeddings: ${EMBEDDING_MODEL}${RST}`;
       }
+      // ── search ──
       if (cmd === "search") {
         const query = parts.slice(1).join(" ");
         if (!query) return `${YELLOW}Usage:${RST} /lens search <query>`;
         const index = loadIndex();
-        const results = searchChunks(query, index);
+        const config = loadConfig();
+        const results = await hybridSearch(query, index, 10, config.ragAlpha);
         if (!results.length) return `${YELLOW}No results for:${RST} ${query}`;
-        let out = `${B}${CYAN}🔍 ${results.length} results for "${query}"${RST}\n\n`;
+        const hasVectors = index.chunks.some(c => c.vector);
+        let out = `${B}${CYAN}🔍 ${results.length} results for "${query}"${RST}`;
+        out += ` ${D}(${hasVectors ? "hybrid BM25+vector" : "BM25 only — run /lens index to add vectors"})${RST}\n\n`;
         for (const r of results) {
-          out += `${GREEN}${basename(r.file)}${RST}:${r.lineStart}-${r.lineEnd} ${D}(${r.tokens} tokens)${RST}\n`;
-          const preview = r.content.split("\n").slice(0, 3).join("\n");
+          const bar = "█".repeat(Math.round(r.hybrid * 10)) + "░".repeat(10 - Math.round(r.hybrid * 10));
+          out += `${GREEN}${basename(r.chunk.file)}${RST}:${r.chunk.lineStart}-${r.chunk.lineEnd} `;
+          out += `${D}bm25=${r.bm25.toFixed(2)} vec=${r.vector.toFixed(2)} hybrid=${r.hybrid.toFixed(2)}${RST} ${CYAN}${bar}${RST}\n`;
+          const preview = r.chunk.content.split("\n").slice(0, 3).join("\n");
           out += `${D}${preview.slice(0, 200)}${RST}\n\n`;
         }
         return out;
       }
-      if (cmd === "context") {
-        const query = parts.slice(1).join(" ");
-        if (!query) return `${YELLOW}Usage:${RST} /lens context <query>`;
-        const index = loadIndex();
-        const results = searchChunks(query, index, 5);
-        if (!results.length) return `${YELLOW}No relevant context found for:${RST} ${query}`;
-        let context = `# Relevant Context for: ${query}\n\n`;
-        for (const r of results) {
-          context += `## ${basename(r.file)} (lines ${r.lineStart}-${r.lineEnd})\n\`\`\`\n${r.content.slice(0, 500)}\n\`\`\`\n\n`;
-        }
-        return context;
+      // ── rag toggle ──
+      if (cmd === "rag") {
+        const config = loadConfig();
+        const sub = parts[1];
+        if (sub === "on") { config.ragEnabled = true; saveConfig(config); return `${GREEN}✅ RAG auto-injection enabled${RST}`; }
+        if (sub === "off") { config.ragEnabled = false; saveConfig(config); return `${YELLOW}RAG auto-injection disabled${RST}`; }
+        return `${B}RAG:${RST} ${config.ragEnabled ? `${GREEN}enabled${RST}` : `${YELLOW}disabled${RST}`}\n` +
+          `  topK: ${config.ragTopK}  threshold: ${config.ragScoreThreshold}  alpha: ${config.ragAlpha} ${D}(0=pure vector, 1=pure BM25)${RST}`;
       }
+      // ── rebuild ──
       if (cmd === "rebuild") {
         const index = loadIndex();
         const allFiles = Object.keys(index.files);
         if (!allFiles.length) return `${YELLOW}No files in index. Run /lens index <path> first.${RST}`;
-        // Prune deleted files without clearing hashes of surviving files
         const existingFiles = allFiles.filter(f => existsSync(f));
         const deletedFiles = allFiles.filter(f => !existsSync(f));
         for (const f of deletedFiles) {
           index.chunks = index.chunks.filter(c => c.file !== f);
           delete index.files[f];
         }
-        saveIndex(index); // hashes intact so unchanged files will be skipped
-        const result = indexFiles(existingFiles);
+        // Force re-embed by clearing embedded flag
+        for (const f of existingFiles) { if (index.files[f]) index.files[f].embedded = false; }
+        saveIndex(index);
+        ctx.ui.notify(`Rebuilding ${existingFiles.length} files...`, "info");
+        const result = await indexFiles(existingFiles, msg => ctx.ui.notify(msg, "info"));
         return `${GREEN}✅ Rebuilt:${RST} pruned ${deletedFiles.length} deleted, re-indexed ${result.indexed} changed, ${result.skipped} unchanged (${result.chunks} new chunks)`;
       }
+      // ── clear ──
       if (cmd === "clear") {
         saveIndex({ chunks: [], files: {}, lastBuild: "" });
         return `${GREEN}✅ Index cleared.${RST}`;
       }
-      // Default: status
+      // ── status ──
       const index = loadIndex();
+      const config = loadConfig();
       const fileCount = Object.keys(index.files).length;
       const totalTokens = index.chunks.reduce((sum, c) => sum + c.tokens, 0);
-      let out = `${B}${CYAN}🔍 pi-local-rag Index Status${RST}\n\n`;
-      out += `  Files indexed: ${GREEN}${fileCount}${RST}\n`;
-      out += `  Chunks: ${GREEN}${index.chunks.length}${RST}\n`;
-      out += `  Total tokens: ${GREEN}${totalTokens.toLocaleString()}${RST}\n`;
-      out += `  Last build: ${index.lastBuild || "never"}\n`;
-      out += `  Storage: ${D}${RAG_DIR}${RST}\n`;
+      const embeddedCount = index.chunks.filter(c => c.vector).length;
+      const vectorCoverage = index.chunks.length ? Math.round(embeddedCount / index.chunks.length * 100) : 0;
+      let out = `${B}${CYAN}🔍 pi-local-rag Status${RST}\n\n`;
+      out += `  Files indexed:  ${GREEN}${fileCount}${RST}\n`;
+      out += `  Chunks:         ${GREEN}${index.chunks.length}${RST}\n`;
+      out += `  Vectors:        ${GREEN}${embeddedCount}${RST} ${D}(${vectorCoverage}% coverage)${RST}\n`;
+      out += `  Total tokens:   ${GREEN}${totalTokens.toLocaleString()}${RST}\n`;
+      out += `  Embedding model: ${D}${index.embeddingModel || "none"}${RST}\n`;
+      out += `  Last build:     ${index.lastBuild || "never"}\n`;
+      out += `  Storage:        ${D}${RAG_DIR}${RST}\n\n`;
+      out += `  RAG injection:  ${config.ragEnabled ? `${GREEN}enabled${RST}` : `${YELLOW}disabled${RST}`}`;
+      out += `  topK=${config.ragTopK}  threshold=${config.ragScoreThreshold}  alpha=${config.ragAlpha}\n`;
       if (fileCount) {
-        out += `\n  ${B}Top file types:${RST}\n`;
+        out += `\n  ${B}File types:${RST}\n`;
         const byExt: Record<string, number> = {};
         for (const f of Object.keys(index.files)) byExt[extname(f)] = (byExt[extname(f)] || 0) + 1;
         for (const [ext, count] of Object.entries(byExt).sort((a, b) => b[1] - a[1]).slice(0, 8)) {
@@ -296,67 +472,70 @@ export default function (pi: ExtensionAPI) {
         }
       }
       return out;
-    }
+    },
   });
+  // ── Tools ──
   pi.registerTool({
     name: "lens_index",
-    description: "Index a file or directory into the local pi-local-rag pipeline. Chunks text files, stores for BM25 keyword search.",
+    description: "Index a file or directory into the local pi-local-rag pipeline. Chunks text files, generates embeddings, stores for hybrid BM25+vector search.",
     parameters: Type.Object({
       path: Type.String({ description: "File or directory path to index" }),
     }),
     execute: async (_toolCallId, params) => {
-      let text: string;
-      if (!existsSync(params.path)) text = `Path not found: ${params.path}`;
-      else {
-        const files = collectFiles(params.path);
-        if (!files.length) text = `No indexable text files found in: ${params.path}`;
-        else {
-          const result = indexFiles(files);
-          text = `Indexed ${result.indexed} files (${result.chunks} chunks). ${result.skipped} unchanged.`;
-        }
-      }
-      return { content: [{ type: "text" as const, text }] };
-    }
+      if (!existsSync(params.path)) return { content: [{ type: "text" as const, text: `Path not found: ${params.path}` }] };
+      const files = collectFiles(params.path);
+      if (!files.length) return { content: [{ type: "text" as const, text: `No indexable text files found in: ${params.path}` }] };
+      const result = await indexFiles(files);
+      return { content: [{ type: "text" as const, text: `Indexed ${result.indexed} files (${result.chunks} chunks, embeddings generated). ${result.skipped} unchanged.` }] };
+    },
   });
   pi.registerTool({
     name: "lens_query",
-    description: "Search the local pi-local-rag index using BM25 keyword matching. Returns relevant chunks from indexed files with file paths and line numbers.",
+    description: "Search the local pi-local-rag index using hybrid BM25+vector search. Returns relevant chunks with file paths, line numbers, and relevance scores.",
     parameters: Type.Object({
       query: Type.String({ description: "Search query" }),
       limit: Type.Optional(Type.Number({ description: "Max results (default 10)" })),
     }),
     execute: async (_toolCallId, params) => {
       const index = loadIndex();
-      let text: string;
-      if (!index.chunks.length) text = "pi-local-rag index is empty. Run lens_index first."
-      else {
-        const results = searchChunks(params.query, index, params.limit || 10);
-        if (!results.length) text = `No results for: ${params.query}`;
-        else text = JSON.stringify(results.map(r => ({
-          file: r.file, lines: `${r.lineStart}-${r.lineEnd}`,
-          tokens: r.tokens, preview: r.content.slice(0, 300)
-        })), null, 2);
-      }
+      if (!index.chunks.length) return { content: [{ type: "text" as const, text: "pi-local-rag index is empty. Run lens_index first." }] };
+      const config = loadConfig();
+      const results = await hybridSearch(params.query, index, params.limit ?? 10, config.ragAlpha);
+      if (!results.length) return { content: [{ type: "text" as const, text: `No results for: ${params.query}` }] };
+      const text = JSON.stringify(results.map(r => ({
+        file: r.chunk.file,
+        lines: `${r.chunk.lineStart}-${r.chunk.lineEnd}`,
+        tokens: r.chunk.tokens,
+        scores: { bm25: r.bm25.toFixed(3), vector: r.vector.toFixed(3), hybrid: r.hybrid.toFixed(3) },
+        preview: r.chunk.content.slice(0, 300),
+      })), null, 2);
       return { content: [{ type: "text" as const, text }] };
-    }
+    },
   });
   pi.registerTool({
     name: "lens_status",
-    description: "Show pi-local-rag index statistics: file count, chunk count, total tokens, last build time.",
+    description: "Show pi-local-rag index statistics: file count, chunk count, vector coverage, embedding model, RAG config.",
     parameters: Type.Object({}),
     execute: async (_toolCallId) => {
       const index = loadIndex();
+      const config = loadConfig();
+      const embeddedCount = index.chunks.filter(c => c.vector).length;
       const text = JSON.stringify({
         files: Object.keys(index.files).length,
         chunks: index.chunks.length,
+        vectorsEmbedded: embeddedCount,
+        vectorCoverage: index.chunks.length ? `${Math.round(embeddedCount / index.chunks.length * 100)}%` : "0%",
+        embeddingModel: index.embeddingModel ?? "none",
         totalTokens: index.chunks.reduce((s, c) => s + c.tokens, 0),
         lastBuild: index.lastBuild || "never",
-        storagePath: RAG_DIR, // ~/.pi/lens
+        ragConfig: config,
+        storagePath: RAG_DIR,
       }, null, 2);
       return { content: [{ type: "text" as const, text }] };
-    }
+    },
   });
 }

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "pi-local-rag",
-  "version": "0.1.0",
-  "description": "Local BM25 RAG pipeline for the Pi coding agent. Index local files and search them with keyword matching — zero cloud dependency.",
+  "version": "0.2.0",
+  "description": "Hybrid RAG pipeline for the Pi coding agent. BM25 + local vector embeddings (Transformers.js) + auto-injection into LLM context. Zero cloud dependency.",
   "type": "module",
   "main": "./index.ts",
   "exports": {
@@ -24,9 +24,13 @@
     "rag",
     "search",
     "bm25",
+    "vector",
+    "embeddings",
+    "hybrid-search",
     "index",
     "local",
-    "offline"
+    "offline",
+    "transformers"
   ],
   "author": "kowsari",
   "license": "MIT",
@@ -49,6 +53,9 @@
       "./index.ts"
     ]
   },
+  "dependencies": {
+    "@xenova/transformers": "^2.17.2"
+  },
   "peerDependencies": {
     "@mariozechner/pi-coding-agent": ">=0.60.0"
   }