pi-local-rag 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/CHANGELOG.md +11 -0
  2. package/index.ts +297 -118
  3. package/package.json +10 -3
package/CHANGELOG.md CHANGED
@@ -1,5 +1,16 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.2.0
4
+
5
+ - **Hybrid RAG**: BM25 + local vector embeddings via `@xenova/transformers` (Transformers.js)
6
+ - **Auto-injection**: `before_agent_start` hook injects relevant chunks into every LLM prompt
7
+ - **Embedding model**: `Xenova/all-MiniLM-L6-v2` (384-dim, ~23MB, downloads once, runs fully offline)
8
+ - **Score transparency**: search results now show `bm25`, `vector`, and `hybrid` scores
9
+ - **`/lens rag on|off`**: toggle auto-injection at runtime
10
+ - **`/lens status`**: now shows vector coverage %
11
+ - **Config file**: `~/.pi/lens/config.json` for `ragEnabled`, `ragTopK`, `ragScoreThreshold`, `ragAlpha`
12
+ - Bumped to `dependencies` for `@xenova/transformers`
13
+
3
14
  ## 0.1.0
4
15
 
5
16
  - Initial release
package/index.ts CHANGED
@@ -1,16 +1,16 @@
1
1
  /**
2
- * pi-local-rag — Local RAG Pipeline
3
- *
4
- * Index local files → chunk → store → retrieve. AI consults YOUR knowledge before hallucinating.
5
- * Zero cloud dependency. Embeddings via Ollama (local) or keyword fallback.
6
- *
7
- * /lens index <path> → index a file or directory
8
- * /lens search <query> → search indexed content
2
+ * pi-local-rag — Hybrid RAG Pipeline (BM25 + Vector + Auto-injection)
3
+ *
4
+ * Index local files → chunk → embed → store → retrieve inject into LLM context.
5
+ * Uses Transformers.js (ONNX) for local embeddings zero cloud dependency.
6
+ *
7
+ * /lens index <path> → index + embed a file or directory
8
+ * /lens search <query> → hybrid search (BM25 + vector)
9
9
  * /lens status → show index stats
10
10
  * /lens rebuild → rebuild entire index
11
11
  * /lens clear → clear index
12
- * /lens context <query> generate context.md snippet for injection
13
- *
12
+ * /lens rag on|off toggle auto-injection
13
+ *
14
14
  * Tools: lens_index, lens_query, lens_status
15
15
  */
16
16
  import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
@@ -20,10 +20,17 @@ import { join, extname, basename } from "node:path";
20
20
  import { homedir } from "node:os";
21
21
  import { createHash } from "node:crypto";
22
22
 
23
+ // ─── Constants ───────────────────────────────────────────────────────────────
24
+
23
25
  const RAG_DIR = join(homedir(), ".pi", "lens");
24
26
  const INDEX_FILE = join(RAG_DIR, "index.json");
27
+ const CONFIG_FILE = join(RAG_DIR, "config.json");
28
+
25
29
  const RST = "\x1b[0m", B = "\x1b[1m", D = "\x1b[2m";
26
- const GREEN = "\x1b[32m", YELLOW = "\x1b[33m", CYAN = "\x1b[36m", RED = "\x1b[31m";
30
+ const GREEN = "\x1b[32m", YELLOW = "\x1b[33m", CYAN = "\x1b[36m", RED = "\x1b[31m", MAGENTA = "\x1b[35m";
31
+
32
+ const EMBEDDING_MODEL = "Xenova/all-MiniLM-L6-v2";
33
+ const VECTOR_DIM = 384;
27
34
 
28
35
  const TEXT_EXTS = new Set([
29
36
  ".md", ".txt", ".ts", ".js", ".py", ".rs", ".go", ".java", ".c", ".cpp", ".h",
@@ -31,7 +38,11 @@ const TEXT_EXTS = new Set([
31
38
  ".sql", ".graphql", ".proto", ".env", ".gitignore", ".dockerfile",
32
39
  ]);
33
40
 
34
- const SKIP_DIRS = new Set(["node_modules", ".git", ".next", "dist", "build", "__pycache__", ".venv", "venv", ".cache"]);
41
+ const SKIP_DIRS = new Set([
42
+ "node_modules", ".git", ".next", "dist", "build", "__pycache__", ".venv", "venv", ".cache",
43
+ ]);
44
+
45
+ // ─── Types ───────────────────────────────────────────────────────────────────
35
46
 
36
47
  interface Chunk {
37
48
  id: string;
@@ -42,14 +53,44 @@ interface Chunk {
42
53
  hash: string;
43
54
  indexed: string;
44
55
  tokens: number;
56
+ vector?: number[]; // 384-dim embedding, present after embed step
45
57
  }
46
58
 
47
59
  interface IndexMeta {
48
60
  chunks: Chunk[];
49
- files: Record<string, { hash: string; chunks: number; indexed: string; size: number }>;
61
+ files: Record<string, { hash: string; chunks: number; indexed: string; size: number; embedded?: boolean }>;
50
62
  lastBuild: string;
63
+ embeddingModel?: string;
64
+ }
65
+
66
+ interface RagConfig {
67
+ ragEnabled: boolean;
68
+ ragTopK: number;
69
+ ragScoreThreshold: number;
70
+ ragAlpha: number; // 0 = pure vector, 1 = pure BM25
71
+ }
72
+
73
+ // ─── Config ──────────────────────────────────────────────────────────────────
74
+
75
+ function loadConfig(): RagConfig {
76
+ ensureDir();
77
+ if (!existsSync(CONFIG_FILE)) return defaultConfig();
78
+ try {
79
+ return { ...defaultConfig(), ...JSON.parse(readFileSync(CONFIG_FILE, "utf-8")) };
80
+ } catch { return defaultConfig(); }
81
+ }
82
+
83
+ function defaultConfig(): RagConfig {
84
+ return { ragEnabled: true, ragTopK: 5, ragScoreThreshold: 0.1, ragAlpha: 0.4 };
85
+ }
86
+
87
+ function saveConfig(config: RagConfig) {
88
+ ensureDir();
89
+ writeFileSync(CONFIG_FILE, JSON.stringify(config, null, 2));
51
90
  }
52
91
 
92
+ // ─── Index I/O ───────────────────────────────────────────────────────────────
93
+
53
94
  function ensureDir() {
54
95
  if (!existsSync(RAG_DIR)) mkdirSync(RAG_DIR, { recursive: true });
55
96
  }
@@ -63,6 +104,7 @@ function loadIndex(): IndexMeta {
63
104
  chunks: Array.isArray(data.chunks) ? data.chunks : [],
64
105
  files: data.files && typeof data.files === "object" ? data.files : {},
65
106
  lastBuild: data.lastBuild ?? "",
107
+ embeddingModel: data.embeddingModel,
66
108
  };
67
109
  } catch { return { chunks: [], files: {}, lastBuild: "" }; }
68
110
  }
@@ -76,13 +118,61 @@ function sha256(data: string): string {
76
118
  return createHash("sha256").update(data).digest("hex").slice(0, 12);
77
119
  }
78
120
 
121
+ // ─── Embeddings ──────────────────────────────────────────────────────────────
122
+
123
+ let _pipeline: any = null;
124
+
125
+ async function getEmbedder() {
126
+ if (_pipeline) return _pipeline;
127
+ const { pipeline } = await import("@xenova/transformers");
128
+ _pipeline = await pipeline("feature-extraction", EMBEDDING_MODEL);
129
+ return _pipeline;
130
+ }
131
+
132
+ async function embed(text: string): Promise<number[]> {
133
+ const embedder = await getEmbedder();
134
+ const output = await embedder(text, { pooling: "mean", normalize: true });
135
+ return Array.from(output.data as Float32Array);
136
+ }
137
+
138
+ async function embedBatch(texts: string[], onProgress?: (i: number, total: number) => void): Promise<number[][]> {
139
+ const results: number[][] = [];
140
+ for (let i = 0; i < texts.length; i++) {
141
+ results.push(await embed(texts[i]));
142
+ onProgress?.(i + 1, texts.length);
143
+ }
144
+ return results;
145
+ }
146
+
147
+ // ─── Math ────────────────────────────────────────────────────────────────────
148
+
149
+ function cosineSimilarity(a: number[], b: number[]): number {
150
+ if (a.length !== b.length) return 0;
151
+ let dot = 0, normA = 0, normB = 0;
152
+ for (let i = 0; i < a.length; i++) {
153
+ dot += a[i] * b[i];
154
+ normA += a[i] * a[i];
155
+ normB += b[i] * b[i];
156
+ }
157
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
158
+ return denom === 0 ? 0 : dot / denom;
159
+ }
160
+
161
+ function normalize(scores: number[]): number[] {
162
+ const max = Math.max(...scores);
163
+ const min = Math.min(...scores);
164
+ const range = max - min;
165
+ if (range === 0) return scores.map(() => 0);
166
+ return scores.map(s => (s - min) / range);
167
+ }
168
+
169
+ // ─── Chunking & File Collection ──────────────────────────────────────────────
170
+
79
171
  function chunkText(text: string, maxLines = 50): { content: string; lineStart: number; lineEnd: number }[] {
80
172
  const lines = text.split("\n");
81
173
  const chunks: { content: string; lineStart: number; lineEnd: number }[] = [];
82
-
83
174
  let i = 0;
84
175
  while (i < lines.length) {
85
- // Try to break at a natural blank-line boundary near the end of the window
86
176
  let end = Math.min(i + maxLines, lines.length);
87
177
  for (let j = end - 1; j > i + 10 && j > end - 15; j--) {
88
178
  if (lines[j]?.trim() === "") { end = j + 1; break; }
@@ -91,7 +181,7 @@ function chunkText(text: string, maxLines = 50): { content: string; lineStart: n
91
181
  if (chunk.trim().length > 20) {
92
182
  chunks.push({ content: chunk, lineStart: i + 1, lineEnd: end });
93
183
  }
94
- i = end; // advance past this chunk; no off-by-one with += maxLines
184
+ i = end;
95
185
  }
96
186
  return chunks;
97
187
  }
@@ -104,26 +194,20 @@ function collectFiles(dirPath: string, maxFiles = 500): string[] {
104
194
  for (const entry of readdirSync(dir, { withFileTypes: true })) {
105
195
  if (files.length >= maxFiles) return;
106
196
  if (entry.isDirectory()) {
107
- if (!SKIP_DIRS.has(entry.name) && !entry.name.startsWith(".")) {
108
- walk(join(dir, entry.name));
109
- }
197
+ if (!SKIP_DIRS.has(entry.name) && !entry.name.startsWith(".")) walk(join(dir, entry.name));
110
198
  } else if (TEXT_EXTS.has(extname(entry.name).toLowerCase())) {
111
199
  const fp = join(dir, entry.name);
112
200
  try {
113
- const stat = statSync(fp);
114
- if (stat.size < 500_000) files.push(fp); // Skip files > 500KB
201
+ if (statSync(fp).size < 500_000) files.push(fp);
115
202
  } catch {}
116
203
  }
117
204
  }
118
205
  } catch {}
119
206
  }
120
-
121
207
  try {
122
208
  const stat = statSync(dirPath);
123
- // Single file: apply the same extension + size guards as the directory walker
124
209
  if (stat.isFile()) {
125
- if (!TEXT_EXTS.has(extname(dirPath).toLowerCase())) return [];
126
- if (stat.size >= 500_000) return [];
210
+ if (!TEXT_EXTS.has(extname(dirPath).toLowerCase()) || stat.size >= 500_000) return [];
127
211
  return [dirPath];
128
212
  }
129
213
  } catch { return []; }
@@ -131,24 +215,31 @@ function collectFiles(dirPath: string, maxFiles = 500): string[] {
131
215
  return files;
132
216
  }
133
217
 
134
- function indexFiles(paths: string[]): { indexed: number; chunks: number; skipped: number } {
218
+ // ─── Indexing ─────────────────────────────────────────────────────────────────
219
+
220
+ async function indexFiles(
221
+ paths: string[],
222
+ onProgress?: (msg: string) => void
223
+ ): Promise<{ indexed: number; chunks: number; skipped: number }> {
135
224
  const index = loadIndex();
136
225
  let indexed = 0, chunked = 0, skipped = 0;
137
-
226
+
138
227
  for (const fp of paths) {
139
228
  try {
140
229
  const content = readFileSync(fp, "utf-8");
141
230
  const hash = sha256(content);
142
-
143
- // Skip if unchanged
144
- if (index.files[fp]?.hash === hash) { skipped++; continue; }
145
-
146
- // Remove old chunks for this file
231
+
232
+ if (index.files[fp]?.hash === hash && index.files[fp]?.embedded) { skipped++; continue; }
233
+
147
234
  index.chunks = index.chunks.filter(c => c.file !== fp);
148
-
149
- // Chunk and add
150
- const chunks = chunkText(content);
151
- for (const chunk of chunks) {
235
+
236
+ const rawChunks = chunkText(content);
237
+ onProgress?.(`Embedding ${basename(fp)} (${rawChunks.length} chunks)...`);
238
+
239
+ const vectors = await embedBatch(rawChunks.map(c => c.content));
240
+
241
+ for (let i = 0; i < rawChunks.length; i++) {
242
+ const chunk = rawChunks[i];
152
243
  index.chunks.push({
153
244
  id: `${sha256(fp)}-${chunk.lineStart}`,
154
245
  file: fp,
@@ -158,137 +249,222 @@ function indexFiles(paths: string[]): { indexed: number; chunks: number; skipped
158
249
  hash: sha256(chunk.content),
159
250
  indexed: new Date().toISOString(),
160
251
  tokens: Math.ceil(chunk.content.length / 4),
252
+ vector: vectors[i],
161
253
  });
162
254
  chunked++;
163
255
  }
164
-
165
- index.files[fp] = { hash, chunks: chunks.length, indexed: new Date().toISOString(), size: content.length };
256
+
257
+ index.files[fp] = { hash, chunks: rawChunks.length, indexed: new Date().toISOString(), size: content.length, embedded: true };
166
258
  indexed++;
167
- } catch { skipped++; }
259
+ } catch (e) { skipped++; }
168
260
  }
169
-
261
+
170
262
  index.lastBuild = new Date().toISOString();
263
+ index.embeddingModel = EMBEDDING_MODEL;
171
264
  saveIndex(index);
172
265
  return { indexed, chunks: chunked, skipped };
173
266
  }
174
267
 
175
- // BM25-style keyword search (no embeddings needed)
176
- function searchChunks(query: string, index: IndexMeta, limit = 10): Chunk[] {
177
- const terms = query.toLowerCase().split(/\s+/).filter(t => t.length > 1);
178
- if (!terms.length) return [];
268
+ // ─── Search ───────────────────────────────────────────────────────────────────
269
+
270
+ interface ScoredChunk {
271
+ chunk: Chunk;
272
+ bm25: number;
273
+ vector: number;
274
+ hybrid: number;
275
+ }
276
+
277
+ async function hybridSearch(
278
+ query: string,
279
+ index: IndexMeta,
280
+ limit = 10,
281
+ alpha = 0.4
282
+ ): Promise<ScoredChunk[]> {
283
+ if (!index.chunks.length) return [];
179
284
 
180
- // Pre-compute IDF per term once (avoids O(n²) re-scan inside the map)
285
+ // ── BM25 ──
286
+ const terms = query.toLowerCase().split(/\s+/).filter(t => t.length > 1);
287
+ const queryLower = query.toLowerCase();
181
288
  const idfMap = new Map<string, number>();
182
289
  for (const term of terms) {
183
290
  const docsWithTerm = index.chunks.filter(c => c.content.toLowerCase().includes(term)).length;
184
291
  idfMap.set(term, Math.log(1 + index.chunks.length / (1 + docsWithTerm)));
185
292
  }
186
- const queryLower = query.toLowerCase();
187
293
 
188
- const scored = index.chunks.map(chunk => {
294
+ const bm25Raw = index.chunks.map(chunk => {
189
295
  const lower = chunk.content.toLowerCase();
190
296
  let score = 0;
191
297
  for (const term of terms) {
192
- const count = (lower.match(new RegExp(term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), "g")) || []).length;
193
- if (count > 0) {
194
- const tf = Math.log(1 + count);
195
- score += tf * idfMap.get(term)!;
196
- }
298
+ const count = (lower.match(new RegExp(term.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"), "g")) || []).length;
299
+ if (count > 0) score += Math.log(1 + count) * (idfMap.get(term) ?? 0);
197
300
  }
198
- // Boost for exact phrase match
199
301
  if (lower.includes(queryLower)) score *= 2;
200
- // Boost for filename match
201
- if (chunk.file.toLowerCase().includes(terms[0])) score *= 1.5;
202
-
203
- return { chunk, score };
302
+ if (chunk.file.toLowerCase().includes(terms[0] ?? "")) score *= 1.5;
303
+ return score;
204
304
  });
205
305
 
306
+ const bm25Norm = normalize(bm25Raw);
307
+
308
+ // ── Vector ──
309
+ const chunksWithVectors = index.chunks.filter(c => c.vector && c.vector.length === VECTOR_DIM);
310
+ const hasVectors = chunksWithVectors.length > 0;
311
+
312
+ let vectorNorm: number[] = new Array(index.chunks.length).fill(0);
313
+
314
+ if (hasVectors) {
315
+ const queryVec = await embed(query);
316
+ const vectorRaw = index.chunks.map(chunk =>
317
+ chunk.vector && chunk.vector.length === VECTOR_DIM
318
+ ? cosineSimilarity(queryVec, chunk.vector)
319
+ : 0
320
+ );
321
+ vectorNorm = normalize(vectorRaw);
322
+ }
323
+
324
+ // ── Hybrid ──
325
+ const scored: ScoredChunk[] = index.chunks.map((chunk, i) => ({
326
+ chunk,
327
+ bm25: bm25Norm[i],
328
+ vector: vectorNorm[i],
329
+ hybrid: hasVectors
330
+ ? alpha * bm25Norm[i] + (1 - alpha) * vectorNorm[i]
331
+ : bm25Norm[i],
332
+ }));
333
+
206
334
  return scored
207
- .filter(s => s.score > 0)
208
- .sort((a, b) => b.score - a.score)
209
- .slice(0, limit)
210
- .map(s => s.chunk);
335
+ .filter(s => s.hybrid > 0)
336
+ .sort((a, b) => b.hybrid - a.hybrid)
337
+ .slice(0, limit);
211
338
  }
212
339
 
340
+ // ─── Extension ────────────────────────────────────────────────────────────────
341
+
213
342
  export default function (pi: ExtensionAPI) {
214
343
  ensureDir();
215
344
 
345
+ // ── Auto-inject RAG context before every agent turn ──
346
+ pi.on("before_agent_start", async (event, _ctx) => {
347
+ const config = loadConfig();
348
+ if (!config.ragEnabled) return;
349
+
350
+ const index = loadIndex();
351
+ if (!index.chunks.length) return;
352
+
353
+ const results = await hybridSearch(event.prompt, index, config.ragTopK, config.ragAlpha);
354
+ const relevant = results.filter(r => r.hybrid >= config.ragScoreThreshold);
355
+ if (!relevant.length) return;
356
+
357
+ const context = relevant.map(r =>
358
+ `### ${basename(r.chunk.file)} (lines ${r.chunk.lineStart}-${r.chunk.lineEnd})\n` +
359
+ `\`\`\`\n${r.chunk.content.slice(0, 600)}\n\`\`\``
360
+ ).join("\n\n");
361
+
362
+ return {
363
+ systemPrompt: event.systemPrompt +
364
+ `\n\n## Relevant Codebase Context (pi-local-rag)\n` +
365
+ `*Retrieved ${relevant.length} chunks via hybrid search (BM25 + vector)*\n\n` +
366
+ context,
367
+ };
368
+ });
369
+
370
+ // ── /lens command ──
216
371
  pi.registerCommand("lens", {
217
- description: "pi-local-rag pipeline: /lens index|search|status|rebuild|clear|context",
372
+ description: "pi-local-rag: /lens index|search|status|rebuild|clear|rag",
218
373
  handler: async (args, ctx) => {
219
374
  const parts = (args || "").trim().split(/\s+/);
220
375
  const cmd = parts[0] || "status";
221
376
 
377
+ // ── index ──
222
378
  if (cmd === "index") {
223
379
  const path = parts[1] || ".";
224
380
  if (!existsSync(path)) return `${RED}Path not found:${RST} ${path}`;
225
381
  const files = collectFiles(path);
226
382
  if (!files.length) return `${YELLOW}No indexable files found in:${RST} ${path}`;
227
- const result = indexFiles(files);
228
- return `${GREEN}✅ Indexed:${RST} ${result.indexed} files, ${result.chunks} chunks (${result.skipped} skipped/unchanged)`;
383
+ ctx.ui.notify(`Indexing ${files.length} files...`, "info");
384
+ const result = await indexFiles(files, msg => ctx.ui.notify(msg, "info"));
385
+ return `${GREEN}✅ Indexed:${RST} ${result.indexed} files, ${result.chunks} chunks, ${result.skipped} unchanged\n` +
386
+ `${D}Embeddings: ${EMBEDDING_MODEL}${RST}`;
229
387
  }
230
388
 
389
+ // ── search ──
231
390
  if (cmd === "search") {
232
391
  const query = parts.slice(1).join(" ");
233
392
  if (!query) return `${YELLOW}Usage:${RST} /lens search <query>`;
234
393
  const index = loadIndex();
235
- const results = searchChunks(query, index);
394
+ const config = loadConfig();
395
+ const results = await hybridSearch(query, index, 10, config.ragAlpha);
236
396
  if (!results.length) return `${YELLOW}No results for:${RST} ${query}`;
237
- let out = `${B}${CYAN}🔍 ${results.length} results for "${query}"${RST}\n\n`;
397
+
398
+ const hasVectors = index.chunks.some(c => c.vector);
399
+ let out = `${B}${CYAN}🔍 ${results.length} results for "${query}"${RST}`;
400
+ out += ` ${D}(${hasVectors ? "hybrid BM25+vector" : "BM25 only — run /lens index to add vectors"})${RST}\n\n`;
401
+
238
402
  for (const r of results) {
239
- out += `${GREEN}${basename(r.file)}${RST}:${r.lineStart}-${r.lineEnd} ${D}(${r.tokens} tokens)${RST}\n`;
240
- const preview = r.content.split("\n").slice(0, 3).join("\n");
403
+ const bar = "█".repeat(Math.round(r.hybrid * 10)) + "░".repeat(10 - Math.round(r.hybrid * 10));
404
+ out += `${GREEN}${basename(r.chunk.file)}${RST}:${r.chunk.lineStart}-${r.chunk.lineEnd} `;
405
+ out += `${D}bm25=${r.bm25.toFixed(2)} vec=${r.vector.toFixed(2)} hybrid=${r.hybrid.toFixed(2)}${RST} ${CYAN}${bar}${RST}\n`;
406
+ const preview = r.chunk.content.split("\n").slice(0, 3).join("\n");
241
407
  out += `${D}${preview.slice(0, 200)}${RST}\n\n`;
242
408
  }
243
409
  return out;
244
410
  }
245
411
 
246
- if (cmd === "context") {
247
- const query = parts.slice(1).join(" ");
248
- if (!query) return `${YELLOW}Usage:${RST} /lens context <query>`;
249
- const index = loadIndex();
250
- const results = searchChunks(query, index, 5);
251
- if (!results.length) return `${YELLOW}No relevant context found for:${RST} ${query}`;
252
- let context = `# Relevant Context for: ${query}\n\n`;
253
- for (const r of results) {
254
- context += `## ${basename(r.file)} (lines ${r.lineStart}-${r.lineEnd})\n\`\`\`\n${r.content.slice(0, 500)}\n\`\`\`\n\n`;
255
- }
256
- return context;
412
+ // ── rag toggle ──
413
+ if (cmd === "rag") {
414
+ const config = loadConfig();
415
+ const sub = parts[1];
416
+ if (sub === "on") { config.ragEnabled = true; saveConfig(config); return `${GREEN}✅ RAG auto-injection enabled${RST}`; }
417
+ if (sub === "off") { config.ragEnabled = false; saveConfig(config); return `${YELLOW}RAG auto-injection disabled${RST}`; }
418
+ return `${B}RAG:${RST} ${config.ragEnabled ? `${GREEN}enabled${RST}` : `${YELLOW}disabled${RST}`}\n` +
419
+ ` topK: ${config.ragTopK} threshold: ${config.ragScoreThreshold} alpha: ${config.ragAlpha} ${D}(0=pure vector, 1=pure BM25)${RST}`;
257
420
  }
258
421
 
422
+ // ── rebuild ──
259
423
  if (cmd === "rebuild") {
260
424
  const index = loadIndex();
261
425
  const allFiles = Object.keys(index.files);
262
426
  if (!allFiles.length) return `${YELLOW}No files in index. Run /lens index <path> first.${RST}`;
263
- // Prune deleted files without clearing hashes of surviving files
264
427
  const existingFiles = allFiles.filter(f => existsSync(f));
265
428
  const deletedFiles = allFiles.filter(f => !existsSync(f));
266
429
  for (const f of deletedFiles) {
267
430
  index.chunks = index.chunks.filter(c => c.file !== f);
268
431
  delete index.files[f];
269
432
  }
270
- saveIndex(index); // hashes intact so unchanged files will be skipped
271
- const result = indexFiles(existingFiles);
433
+ // Force re-embed by clearing embedded flag
434
+ for (const f of existingFiles) { if (index.files[f]) index.files[f].embedded = false; }
435
+ saveIndex(index);
436
+ ctx.ui.notify(`Rebuilding ${existingFiles.length} files...`, "info");
437
+ const result = await indexFiles(existingFiles, msg => ctx.ui.notify(msg, "info"));
272
438
  return `${GREEN}✅ Rebuilt:${RST} pruned ${deletedFiles.length} deleted, re-indexed ${result.indexed} changed, ${result.skipped} unchanged (${result.chunks} new chunks)`;
273
439
  }
274
440
 
441
+ // ── clear ──
275
442
  if (cmd === "clear") {
276
443
  saveIndex({ chunks: [], files: {}, lastBuild: "" });
277
444
  return `${GREEN}✅ Index cleared.${RST}`;
278
445
  }
279
446
 
280
- // Default: status
447
+ // ── status ──
281
448
  const index = loadIndex();
449
+ const config = loadConfig();
282
450
  const fileCount = Object.keys(index.files).length;
283
451
  const totalTokens = index.chunks.reduce((sum, c) => sum + c.tokens, 0);
284
- let out = `${B}${CYAN}🔍 pi-local-rag Index Status${RST}\n\n`;
285
- out += ` Files indexed: ${GREEN}${fileCount}${RST}\n`;
286
- out += ` Chunks: ${GREEN}${index.chunks.length}${RST}\n`;
287
- out += ` Total tokens: ${GREEN}${totalTokens.toLocaleString()}${RST}\n`;
288
- out += ` Last build: ${index.lastBuild || "never"}\n`;
289
- out += ` Storage: ${D}${RAG_DIR}${RST}\n`;
452
+ const embeddedCount = index.chunks.filter(c => c.vector).length;
453
+ const vectorCoverage = index.chunks.length ? Math.round(embeddedCount / index.chunks.length * 100) : 0;
454
+
455
+ let out = `${B}${CYAN}🔍 pi-local-rag Status${RST}\n\n`;
456
+ out += ` Files indexed: ${GREEN}${fileCount}${RST}\n`;
457
+ out += ` Chunks: ${GREEN}${index.chunks.length}${RST}\n`;
458
+ out += ` Vectors: ${GREEN}${embeddedCount}${RST} ${D}(${vectorCoverage}% coverage)${RST}\n`;
459
+ out += ` Total tokens: ${GREEN}${totalTokens.toLocaleString()}${RST}\n`;
460
+ out += ` Embedding model: ${D}${index.embeddingModel || "none"}${RST}\n`;
461
+ out += ` Last build: ${index.lastBuild || "never"}\n`;
462
+ out += ` Storage: ${D}${RAG_DIR}${RST}\n\n`;
463
+ out += ` RAG injection: ${config.ragEnabled ? `${GREEN}enabled${RST}` : `${YELLOW}disabled${RST}`}`;
464
+ out += ` topK=${config.ragTopK} threshold=${config.ragScoreThreshold} alpha=${config.ragAlpha}\n`;
465
+
290
466
  if (fileCount) {
291
- out += `\n ${B}Top file types:${RST}\n`;
467
+ out += `\n ${B}File types:${RST}\n`;
292
468
  const byExt: Record<string, number> = {};
293
469
  for (const f of Object.keys(index.files)) byExt[extname(f)] = (byExt[extname(f)] || 0) + 1;
294
470
  for (const [ext, count] of Object.entries(byExt).sort((a, b) => b[1] - a[1]).slice(0, 8)) {
@@ -296,67 +472,70 @@ export default function (pi: ExtensionAPI) {
296
472
  }
297
473
  }
298
474
  return out;
299
- }
475
+ },
300
476
  });
301
477
 
478
+ // ── Tools ──
479
+
302
480
  pi.registerTool({
303
481
  name: "lens_index",
304
- description: "Index a file or directory into the local pi-local-rag pipeline. Chunks text files, stores for BM25 keyword search.",
482
+ description: "Index a file or directory into the local pi-local-rag pipeline. Chunks text files, generates embeddings, stores for hybrid BM25+vector search.",
305
483
  parameters: Type.Object({
306
484
  path: Type.String({ description: "File or directory path to index" }),
307
485
  }),
308
486
  execute: async (_toolCallId, params) => {
309
- let text: string;
310
- if (!existsSync(params.path)) text = `Path not found: ${params.path}`;
311
- else {
312
- const files = collectFiles(params.path);
313
- if (!files.length) text = `No indexable text files found in: ${params.path}`;
314
- else {
315
- const result = indexFiles(files);
316
- text = `Indexed ${result.indexed} files (${result.chunks} chunks). ${result.skipped} unchanged.`;
317
- }
318
- }
319
- return { content: [{ type: "text" as const, text }] };
320
- }
487
+ if (!existsSync(params.path)) return { content: [{ type: "text" as const, text: `Path not found: ${params.path}` }] };
488
+ const files = collectFiles(params.path);
489
+ if (!files.length) return { content: [{ type: "text" as const, text: `No indexable text files found in: ${params.path}` }] };
490
+ const result = await indexFiles(files);
491
+ return { content: [{ type: "text" as const, text: `Indexed ${result.indexed} files (${result.chunks} chunks, embeddings generated). ${result.skipped} unchanged.` }] };
492
+ },
321
493
  });
322
494
 
323
495
  pi.registerTool({
324
496
  name: "lens_query",
325
- description: "Search the local pi-local-rag index using BM25 keyword matching. Returns relevant chunks from indexed files with file paths and line numbers.",
497
+ description: "Search the local pi-local-rag index using hybrid BM25+vector search. Returns relevant chunks with file paths, line numbers, and relevance scores.",
326
498
  parameters: Type.Object({
327
499
  query: Type.String({ description: "Search query" }),
328
500
  limit: Type.Optional(Type.Number({ description: "Max results (default 10)" })),
329
501
  }),
330
502
  execute: async (_toolCallId, params) => {
331
503
  const index = loadIndex();
332
- let text: string;
333
- if (!index.chunks.length) text = "pi-local-rag index is empty. Run lens_index first."
334
- else {
335
- const results = searchChunks(params.query, index, params.limit || 10);
336
- if (!results.length) text = `No results for: ${params.query}`;
337
- else text = JSON.stringify(results.map(r => ({
338
- file: r.file, lines: `${r.lineStart}-${r.lineEnd}`,
339
- tokens: r.tokens, preview: r.content.slice(0, 300)
340
- })), null, 2);
341
- }
504
+ if (!index.chunks.length) return { content: [{ type: "text" as const, text: "pi-local-rag index is empty. Run lens_index first." }] };
505
+ const config = loadConfig();
506
+ const results = await hybridSearch(params.query, index, params.limit ?? 10, config.ragAlpha);
507
+ if (!results.length) return { content: [{ type: "text" as const, text: `No results for: ${params.query}` }] };
508
+ const text = JSON.stringify(results.map(r => ({
509
+ file: r.chunk.file,
510
+ lines: `${r.chunk.lineStart}-${r.chunk.lineEnd}`,
511
+ tokens: r.chunk.tokens,
512
+ scores: { bm25: r.bm25.toFixed(3), vector: r.vector.toFixed(3), hybrid: r.hybrid.toFixed(3) },
513
+ preview: r.chunk.content.slice(0, 300),
514
+ })), null, 2);
342
515
  return { content: [{ type: "text" as const, text }] };
343
- }
516
+ },
344
517
  });
345
518
 
346
519
  pi.registerTool({
347
520
  name: "lens_status",
348
- description: "Show pi-local-rag index statistics: file count, chunk count, total tokens, last build time.",
521
+ description: "Show pi-local-rag index statistics: file count, chunk count, vector coverage, embedding model, RAG config.",
349
522
  parameters: Type.Object({}),
350
523
  execute: async (_toolCallId) => {
351
524
  const index = loadIndex();
525
+ const config = loadConfig();
526
+ const embeddedCount = index.chunks.filter(c => c.vector).length;
352
527
  const text = JSON.stringify({
353
528
  files: Object.keys(index.files).length,
354
529
  chunks: index.chunks.length,
530
+ vectorsEmbedded: embeddedCount,
531
+ vectorCoverage: index.chunks.length ? `${Math.round(embeddedCount / index.chunks.length * 100)}%` : "0%",
532
+ embeddingModel: index.embeddingModel ?? "none",
355
533
  totalTokens: index.chunks.reduce((s, c) => s + c.tokens, 0),
356
534
  lastBuild: index.lastBuild || "never",
357
- storagePath: RAG_DIR, // ~/.pi/lens
535
+ ragConfig: config,
536
+ storagePath: RAG_DIR,
358
537
  }, null, 2);
359
538
  return { content: [{ type: "text" as const, text }] };
360
- }
539
+ },
361
540
  });
362
541
  }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "pi-local-rag",
3
- "version": "0.1.0",
4
- "description": "Local BM25 RAG pipeline for the Pi coding agent. Index local files and search them with keyword matching zero cloud dependency.",
3
+ "version": "0.2.0",
4
+ "description": "Hybrid RAG pipeline for the Pi coding agent. BM25 + local vector embeddings (Transformers.js) + auto-injection into LLM context. Zero cloud dependency.",
5
5
  "type": "module",
6
6
  "main": "./index.ts",
7
7
  "exports": {
@@ -24,9 +24,13 @@
24
24
  "rag",
25
25
  "search",
26
26
  "bm25",
27
+ "vector",
28
+ "embeddings",
29
+ "hybrid-search",
27
30
  "index",
28
31
  "local",
29
- "offline"
32
+ "offline",
33
+ "transformers"
30
34
  ],
31
35
  "author": "kowsari",
32
36
  "license": "MIT",
@@ -49,6 +53,9 @@
49
53
  "./index.ts"
50
54
  ]
51
55
  },
56
+ "dependencies": {
57
+ "@xenova/transformers": "^2.17.2"
58
+ },
52
59
  "peerDependencies": {
53
60
  "@mariozechner/pi-coding-agent": ">=0.60.0"
54
61
  }