pi-local-rag 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md ADDED
@@ -0,0 +1,8 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0
4
+
5
+ - Initial release
6
+ - BM25 keyword search over local files
7
+ - Tools: `lens_index`, `lens_query`, `lens_status`
8
+ - Commands: `/lens index|search|status|rebuild|clear|context`
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 kowsari
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,70 @@
1
+ # pi-local-rag
2
+
3
+ Local BM25 RAG pipeline for the [Pi coding agent](https://github.com/badlogic/pi-mono). Index your local files and search them with keyword matching — **zero cloud dependency, works fully offline**.
4
+
5
+ ## Features
6
+
7
+ - **BM25 keyword search** — TF-IDF scoring with exact phrase and filename boosts
8
+ - **Smart chunking** — splits files into ~50-line blocks at natural blank-line boundaries
9
+ - **Incremental indexing** — skips unchanged files (SHA-256 hash check)
10
+ - **Zero dependencies** — uses only Node.js built-ins
11
+ - **3 AI tools** — `lens_index`, `lens_query`, `lens_status` for the agent to use directly
12
+
13
+ ## Install
14
+
15
+ ```bash
16
+ pi install npm:pi-local-rag
17
+ ```
18
+
19
+ Or via git:
20
+
21
+ ```bash
22
+ pi install git:github.com/vahidkowsari/pi-local-rag
23
+ ```
24
+
25
+ ## Commands
26
+
27
+ | Command | Description |
28
+ |---|---|
29
+ | `/lens index <path>` | Index a file or directory |
30
+ | `/lens search <query>` | Search indexed content |
31
+ | `/lens status` | Show index stats (files, chunks, tokens) |
32
+ | `/lens rebuild` | Re-index changed files, prune deleted |
33
+ | `/lens clear` | Wipe the entire index |
34
+ | `/lens context <query>` | Generate a context snippet for injection |
35
+
36
+ ## AI Tools
37
+
38
+ The extension registers three tools the agent can call directly:
39
+
40
+ - **`lens_index`** — Index a path into the pipeline
41
+ - **`lens_query`** — BM25 search, returns file paths + line numbers + previews
42
+ - **`lens_status`** — Index stats (file count, chunk count, total tokens, last build)
43
+
44
+ ## How It Works
45
+
46
+ 1. Files are chunked into ~50-line blocks (splits at blank lines)
47
+ 2. Chunks are stored in `~/.pi/lens/index.json`
48
+ 3. Search scores each chunk with BM25 (TF × IDF), boosted for exact phrase matches and filename matches
49
+ 4. Results include file path, line range, token count, and a content preview
50
+
51
+ ## Supported File Types
52
+
53
+ `.md` `.txt` `.ts` `.js` `.py` `.rs` `.go` `.java` `.c` `.cpp` `.h` `.css` `.html` `.json` `.yaml` `.yml` `.toml` `.xml` `.csv` `.sh` `.sql` `.graphql` `.proto`
54
+
55
+ ## Skipped Directories
56
+
57
+ `node_modules` `.git` `.next` `dist` `build` `__pycache__` `.venv` `venv` `.cache`
58
+
59
+ ## Limits
60
+
61
+ - Max 500 files per index run
62
+ - Max 500KB per file
63
+
64
+ ## Storage
65
+
66
+ Index is stored at `~/.pi/lens/index.json`.
67
+
68
+ ## License
69
+
70
+ MIT
package/index.ts ADDED
@@ -0,0 +1,362 @@
1
+ /**
2
+ * pi-local-rag — Local RAG Pipeline
3
+ *
4
+ * Index local files → chunk → store → retrieve. AI consults YOUR knowledge before hallucinating.
5
+ * Zero cloud dependency. Embeddings via Ollama (local) or keyword fallback.
6
+ *
7
+ * /lens index <path> → index a file or directory
8
+ * /lens search <query> → search indexed content
9
+ * /lens status → show index stats
10
+ * /lens rebuild → rebuild entire index
11
+ * /lens clear → clear index
12
+ * /lens context <query> → generate context.md snippet for injection
13
+ *
14
+ * Tools: lens_index, lens_query, lens_status
15
+ */
16
+ import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
17
+ import { Type } from "@sinclair/typebox";
18
+ import { existsSync, readFileSync, writeFileSync, mkdirSync, readdirSync, statSync } from "node:fs";
19
+ import { join, extname, basename } from "node:path";
20
+ import { homedir } from "node:os";
21
+ import { createHash } from "node:crypto";
22
+
23
+ const RAG_DIR = join(homedir(), ".pi", "lens");
24
+ const INDEX_FILE = join(RAG_DIR, "index.json");
25
+ const RST = "\x1b[0m", B = "\x1b[1m", D = "\x1b[2m";
26
+ const GREEN = "\x1b[32m", YELLOW = "\x1b[33m", CYAN = "\x1b[36m", RED = "\x1b[31m";
27
+
28
+ const TEXT_EXTS = new Set([
29
+ ".md", ".txt", ".ts", ".js", ".py", ".rs", ".go", ".java", ".c", ".cpp", ".h",
30
+ ".css", ".html", ".json", ".yaml", ".yml", ".toml", ".xml", ".csv", ".sh",
31
+ ".sql", ".graphql", ".proto", ".env", ".gitignore", ".dockerfile",
32
+ ]);
33
+
34
+ const SKIP_DIRS = new Set(["node_modules", ".git", ".next", "dist", "build", "__pycache__", ".venv", "venv", ".cache"]);
35
+
36
+ interface Chunk {
37
+ id: string;
38
+ file: string;
39
+ content: string;
40
+ lineStart: number;
41
+ lineEnd: number;
42
+ hash: string;
43
+ indexed: string;
44
+ tokens: number;
45
+ }
46
+
47
+ interface IndexMeta {
48
+ chunks: Chunk[];
49
+ files: Record<string, { hash: string; chunks: number; indexed: string; size: number }>;
50
+ lastBuild: string;
51
+ }
52
+
53
+ function ensureDir() {
54
+ if (!existsSync(RAG_DIR)) mkdirSync(RAG_DIR, { recursive: true });
55
+ }
56
+
57
+ function loadIndex(): IndexMeta {
58
+ ensureDir();
59
+ if (!existsSync(INDEX_FILE)) return { chunks: [], files: {}, lastBuild: "" };
60
+ try {
61
+ const data = JSON.parse(readFileSync(INDEX_FILE, "utf-8"));
62
+ return {
63
+ chunks: Array.isArray(data.chunks) ? data.chunks : [],
64
+ files: data.files && typeof data.files === "object" ? data.files : {},
65
+ lastBuild: data.lastBuild ?? "",
66
+ };
67
+ } catch { return { chunks: [], files: {}, lastBuild: "" }; }
68
+ }
69
+
70
+ function saveIndex(index: IndexMeta) {
71
+ ensureDir();
72
+ writeFileSync(INDEX_FILE, JSON.stringify(index, null, 2));
73
+ }
74
+
75
+ function sha256(data: string): string {
76
+ return createHash("sha256").update(data).digest("hex").slice(0, 12);
77
+ }
78
+
79
+ function chunkText(text: string, maxLines = 50): { content: string; lineStart: number; lineEnd: number }[] {
80
+ const lines = text.split("\n");
81
+ const chunks: { content: string; lineStart: number; lineEnd: number }[] = [];
82
+
83
+ let i = 0;
84
+ while (i < lines.length) {
85
+ // Try to break at a natural blank-line boundary near the end of the window
86
+ let end = Math.min(i + maxLines, lines.length);
87
+ for (let j = end - 1; j > i + 10 && j > end - 15; j--) {
88
+ if (lines[j]?.trim() === "") { end = j + 1; break; }
89
+ }
90
+ const chunk = lines.slice(i, end).join("\n");
91
+ if (chunk.trim().length > 20) {
92
+ chunks.push({ content: chunk, lineStart: i + 1, lineEnd: end });
93
+ }
94
+ i = end; // advance past this chunk; no off-by-one with += maxLines
95
+ }
96
+ return chunks;
97
+ }
98
+
99
+ function collectFiles(dirPath: string, maxFiles = 500): string[] {
100
+ const files: string[] = [];
101
+ function walk(dir: string) {
102
+ if (files.length >= maxFiles) return;
103
+ try {
104
+ for (const entry of readdirSync(dir, { withFileTypes: true })) {
105
+ if (files.length >= maxFiles) return;
106
+ if (entry.isDirectory()) {
107
+ if (!SKIP_DIRS.has(entry.name) && !entry.name.startsWith(".")) {
108
+ walk(join(dir, entry.name));
109
+ }
110
+ } else if (TEXT_EXTS.has(extname(entry.name).toLowerCase())) {
111
+ const fp = join(dir, entry.name);
112
+ try {
113
+ const stat = statSync(fp);
114
+ if (stat.size < 500_000) files.push(fp); // Skip files > 500KB
115
+ } catch {}
116
+ }
117
+ }
118
+ } catch {}
119
+ }
120
+
121
+ try {
122
+ const stat = statSync(dirPath);
123
+ // Single file: apply the same extension + size guards as the directory walker
124
+ if (stat.isFile()) {
125
+ if (!TEXT_EXTS.has(extname(dirPath).toLowerCase())) return [];
126
+ if (stat.size >= 500_000) return [];
127
+ return [dirPath];
128
+ }
129
+ } catch { return []; }
130
+ walk(dirPath);
131
+ return files;
132
+ }
133
+
134
+ function indexFiles(paths: string[]): { indexed: number; chunks: number; skipped: number } {
135
+ const index = loadIndex();
136
+ let indexed = 0, chunked = 0, skipped = 0;
137
+
138
+ for (const fp of paths) {
139
+ try {
140
+ const content = readFileSync(fp, "utf-8");
141
+ const hash = sha256(content);
142
+
143
+ // Skip if unchanged
144
+ if (index.files[fp]?.hash === hash) { skipped++; continue; }
145
+
146
+ // Remove old chunks for this file
147
+ index.chunks = index.chunks.filter(c => c.file !== fp);
148
+
149
+ // Chunk and add
150
+ const chunks = chunkText(content);
151
+ for (const chunk of chunks) {
152
+ index.chunks.push({
153
+ id: `${sha256(fp)}-${chunk.lineStart}`,
154
+ file: fp,
155
+ content: chunk.content,
156
+ lineStart: chunk.lineStart,
157
+ lineEnd: chunk.lineEnd,
158
+ hash: sha256(chunk.content),
159
+ indexed: new Date().toISOString(),
160
+ tokens: Math.ceil(chunk.content.length / 4),
161
+ });
162
+ chunked++;
163
+ }
164
+
165
+ index.files[fp] = { hash, chunks: chunks.length, indexed: new Date().toISOString(), size: content.length };
166
+ indexed++;
167
+ } catch { skipped++; }
168
+ }
169
+
170
+ index.lastBuild = new Date().toISOString();
171
+ saveIndex(index);
172
+ return { indexed, chunks: chunked, skipped };
173
+ }
174
+
175
+ // BM25-style keyword search (no embeddings needed)
176
+ function searchChunks(query: string, index: IndexMeta, limit = 10): Chunk[] {
177
+ const terms = query.toLowerCase().split(/\s+/).filter(t => t.length > 1);
178
+ if (!terms.length) return [];
179
+
180
+ // Pre-compute IDF per term once (avoids O(n²) re-scan inside the map)
181
+ const idfMap = new Map<string, number>();
182
+ for (const term of terms) {
183
+ const docsWithTerm = index.chunks.filter(c => c.content.toLowerCase().includes(term)).length;
184
+ idfMap.set(term, Math.log(1 + index.chunks.length / (1 + docsWithTerm)));
185
+ }
186
+ const queryLower = query.toLowerCase();
187
+
188
+ const scored = index.chunks.map(chunk => {
189
+ const lower = chunk.content.toLowerCase();
190
+ let score = 0;
191
+ for (const term of terms) {
192
+ const count = (lower.match(new RegExp(term.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), "g")) || []).length;
193
+ if (count > 0) {
194
+ const tf = Math.log(1 + count);
195
+ score += tf * idfMap.get(term)!;
196
+ }
197
+ }
198
+ // Boost for exact phrase match
199
+ if (lower.includes(queryLower)) score *= 2;
200
+ // Boost for filename match
201
+ if (chunk.file.toLowerCase().includes(terms[0])) score *= 1.5;
202
+
203
+ return { chunk, score };
204
+ });
205
+
206
+ return scored
207
+ .filter(s => s.score > 0)
208
+ .sort((a, b) => b.score - a.score)
209
+ .slice(0, limit)
210
+ .map(s => s.chunk);
211
+ }
212
+
213
+ export default function (pi: ExtensionAPI) {
214
+ ensureDir();
215
+
216
+ pi.registerCommand("lens", {
217
+ description: "pi-local-rag pipeline: /lens index|search|status|rebuild|clear|context",
218
+ handler: async (args, ctx) => {
219
+ const parts = (args || "").trim().split(/\s+/);
220
+ const cmd = parts[0] || "status";
221
+
222
+ if (cmd === "index") {
223
+ const path = parts[1] || ".";
224
+ if (!existsSync(path)) return `${RED}Path not found:${RST} ${path}`;
225
+ const files = collectFiles(path);
226
+ if (!files.length) return `${YELLOW}No indexable files found in:${RST} ${path}`;
227
+ const result = indexFiles(files);
228
+ return `${GREEN}✅ Indexed:${RST} ${result.indexed} files, ${result.chunks} chunks (${result.skipped} skipped/unchanged)`;
229
+ }
230
+
231
+ if (cmd === "search") {
232
+ const query = parts.slice(1).join(" ");
233
+ if (!query) return `${YELLOW}Usage:${RST} /lens search <query>`;
234
+ const index = loadIndex();
235
+ const results = searchChunks(query, index);
236
+ if (!results.length) return `${YELLOW}No results for:${RST} ${query}`;
237
+ let out = `${B}${CYAN}🔍 ${results.length} results for "${query}"${RST}\n\n`;
238
+ for (const r of results) {
239
+ out += `${GREEN}${basename(r.file)}${RST}:${r.lineStart}-${r.lineEnd} ${D}(${r.tokens} tokens)${RST}\n`;
240
+ const preview = r.content.split("\n").slice(0, 3).join("\n");
241
+ out += `${D}${preview.slice(0, 200)}${RST}\n\n`;
242
+ }
243
+ return out;
244
+ }
245
+
246
+ if (cmd === "context") {
247
+ const query = parts.slice(1).join(" ");
248
+ if (!query) return `${YELLOW}Usage:${RST} /lens context <query>`;
249
+ const index = loadIndex();
250
+ const results = searchChunks(query, index, 5);
251
+ if (!results.length) return `${YELLOW}No relevant context found for:${RST} ${query}`;
252
+ let context = `# Relevant Context for: ${query}\n\n`;
253
+ for (const r of results) {
254
+ context += `## ${basename(r.file)} (lines ${r.lineStart}-${r.lineEnd})\n\`\`\`\n${r.content.slice(0, 500)}\n\`\`\`\n\n`;
255
+ }
256
+ return context;
257
+ }
258
+
259
+ if (cmd === "rebuild") {
260
+ const index = loadIndex();
261
+ const allFiles = Object.keys(index.files);
262
+ if (!allFiles.length) return `${YELLOW}No files in index. Run /lens index <path> first.${RST}`;
263
+ // Prune deleted files without clearing hashes of surviving files
264
+ const existingFiles = allFiles.filter(f => existsSync(f));
265
+ const deletedFiles = allFiles.filter(f => !existsSync(f));
266
+ for (const f of deletedFiles) {
267
+ index.chunks = index.chunks.filter(c => c.file !== f);
268
+ delete index.files[f];
269
+ }
270
+ saveIndex(index); // hashes intact so unchanged files will be skipped
271
+ const result = indexFiles(existingFiles);
272
+ return `${GREEN}✅ Rebuilt:${RST} pruned ${deletedFiles.length} deleted, re-indexed ${result.indexed} changed, ${result.skipped} unchanged (${result.chunks} new chunks)`;
273
+ }
274
+
275
+ if (cmd === "clear") {
276
+ saveIndex({ chunks: [], files: {}, lastBuild: "" });
277
+ return `${GREEN}✅ Index cleared.${RST}`;
278
+ }
279
+
280
+ // Default: status
281
+ const index = loadIndex();
282
+ const fileCount = Object.keys(index.files).length;
283
+ const totalTokens = index.chunks.reduce((sum, c) => sum + c.tokens, 0);
284
+ let out = `${B}${CYAN}🔍 pi-local-rag Index Status${RST}\n\n`;
285
+ out += ` Files indexed: ${GREEN}${fileCount}${RST}\n`;
286
+ out += ` Chunks: ${GREEN}${index.chunks.length}${RST}\n`;
287
+ out += ` Total tokens: ${GREEN}${totalTokens.toLocaleString()}${RST}\n`;
288
+ out += ` Last build: ${index.lastBuild || "never"}\n`;
289
+ out += ` Storage: ${D}${RAG_DIR}${RST}\n`;
290
+ if (fileCount) {
291
+ out += `\n ${B}Top file types:${RST}\n`;
292
+ const byExt: Record<string, number> = {};
293
+ for (const f of Object.keys(index.files)) byExt[extname(f)] = (byExt[extname(f)] || 0) + 1;
294
+ for (const [ext, count] of Object.entries(byExt).sort((a, b) => b[1] - a[1]).slice(0, 8)) {
295
+ out += ` ${ext}: ${count}\n`;
296
+ }
297
+ }
298
+ return out;
299
+ }
300
+ });
301
+
302
+ pi.registerTool({
303
+ name: "lens_index",
304
+ description: "Index a file or directory into the local pi-local-rag pipeline. Chunks text files, stores for BM25 keyword search.",
305
+ parameters: Type.Object({
306
+ path: Type.String({ description: "File or directory path to index" }),
307
+ }),
308
+ execute: async (_toolCallId, params) => {
309
+ let text: string;
310
+ if (!existsSync(params.path)) text = `Path not found: ${params.path}`;
311
+ else {
312
+ const files = collectFiles(params.path);
313
+ if (!files.length) text = `No indexable text files found in: ${params.path}`;
314
+ else {
315
+ const result = indexFiles(files);
316
+ text = `Indexed ${result.indexed} files (${result.chunks} chunks). ${result.skipped} unchanged.`;
317
+ }
318
+ }
319
+ return { content: [{ type: "text" as const, text }] };
320
+ }
321
+ });
322
+
323
+ pi.registerTool({
324
+ name: "lens_query",
325
+ description: "Search the local pi-local-rag index using BM25 keyword matching. Returns relevant chunks from indexed files with file paths and line numbers.",
326
+ parameters: Type.Object({
327
+ query: Type.String({ description: "Search query" }),
328
+ limit: Type.Optional(Type.Number({ description: "Max results (default 10)" })),
329
+ }),
330
+ execute: async (_toolCallId, params) => {
331
+ const index = loadIndex();
332
+ let text: string;
333
+ if (!index.chunks.length) text = "pi-local-rag index is empty. Run lens_index first."
334
+ else {
335
+ const results = searchChunks(params.query, index, params.limit || 10);
336
+ if (!results.length) text = `No results for: ${params.query}`;
337
+ else text = JSON.stringify(results.map(r => ({
338
+ file: r.file, lines: `${r.lineStart}-${r.lineEnd}`,
339
+ tokens: r.tokens, preview: r.content.slice(0, 300)
340
+ })), null, 2);
341
+ }
342
+ return { content: [{ type: "text" as const, text }] };
343
+ }
344
+ });
345
+
346
+ pi.registerTool({
347
+ name: "lens_status",
348
+ description: "Show pi-local-rag index statistics: file count, chunk count, total tokens, last build time.",
349
+ parameters: Type.Object({}),
350
+ execute: async (_toolCallId) => {
351
+ const index = loadIndex();
352
+ const text = JSON.stringify({
353
+ files: Object.keys(index.files).length,
354
+ chunks: index.chunks.length,
355
+ totalTokens: index.chunks.reduce((s, c) => s + c.tokens, 0),
356
+ lastBuild: index.lastBuild || "never",
357
+ storagePath: RAG_DIR, // ~/.pi/lens
358
+ }, null, 2);
359
+ return { content: [{ type: "text" as const, text }] };
360
+ }
361
+ });
362
+ }
package/package.json ADDED
@@ -0,0 +1,55 @@
1
+ {
2
+ "name": "pi-local-rag",
3
+ "version": "0.1.0",
4
+ "description": "Local BM25 RAG pipeline for the Pi coding agent. Index local files and search them with keyword matching — zero cloud dependency.",
5
+ "type": "module",
6
+ "main": "./index.ts",
7
+ "exports": {
8
+ ".": "./index.ts"
9
+ },
10
+ "files": [
11
+ "index.ts",
12
+ "README.md",
13
+ "CHANGELOG.md",
14
+ "LICENSE"
15
+ ],
16
+ "scripts": {
17
+ "build": "npx --yes -p typescript@5.7.3 tsc -p tsconfig.json --noCheck"
18
+ },
19
+ "keywords": [
20
+ "pi-package",
21
+ "pi",
22
+ "pi-extension",
23
+ "pi-coding-agent",
24
+ "rag",
25
+ "search",
26
+ "bm25",
27
+ "index",
28
+ "local",
29
+ "offline"
30
+ ],
31
+ "author": "kowsari",
32
+ "license": "MIT",
33
+ "repository": {
34
+ "type": "git",
35
+ "url": "git+https://github.com/vahidkowsari/pi-local-rag.git"
36
+ },
37
+ "homepage": "https://github.com/vahidkowsari/pi-local-rag#readme",
38
+ "bugs": {
39
+ "url": "https://github.com/vahidkowsari/pi-local-rag/issues"
40
+ },
41
+ "engines": {
42
+ "node": ">=20"
43
+ },
44
+ "publishConfig": {
45
+ "access": "public"
46
+ },
47
+ "pi": {
48
+ "extensions": [
49
+ "./index.ts"
50
+ ]
51
+ },
52
+ "peerDependencies": {
53
+ "@mariozechner/pi-coding-agent": ">=0.60.0"
54
+ }
55
+ }