@winci/local-rag 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.claude-plugin/plugin.json +24 -0
  2. package/.mcp.json +11 -0
  3. package/LICENSE +21 -0
  4. package/README.md +567 -0
  5. package/hooks/hooks.json +25 -0
  6. package/hooks/scripts/reindex-file.sh +19 -0
  7. package/hooks/scripts/session-start.sh +11 -0
  8. package/package.json +52 -0
  9. package/skills/local-rag/SKILL.md +42 -0
  10. package/src/cli/commands/analytics.ts +58 -0
  11. package/src/cli/commands/benchmark.ts +30 -0
  12. package/src/cli/commands/checkpoint.ts +85 -0
  13. package/src/cli/commands/conversation.ts +102 -0
  14. package/src/cli/commands/demo.ts +119 -0
  15. package/src/cli/commands/eval.ts +31 -0
  16. package/src/cli/commands/index-cmd.ts +26 -0
  17. package/src/cli/commands/init.ts +35 -0
  18. package/src/cli/commands/map.ts +21 -0
  19. package/src/cli/commands/remove.ts +15 -0
  20. package/src/cli/commands/search-cmd.ts +59 -0
  21. package/src/cli/commands/serve.ts +5 -0
  22. package/src/cli/commands/status.ts +13 -0
  23. package/src/cli/index.ts +117 -0
  24. package/src/cli/progress.ts +21 -0
  25. package/src/cli/setup.ts +192 -0
  26. package/src/config/index.ts +101 -0
  27. package/src/conversation/indexer.ts +147 -0
  28. package/src/conversation/parser.ts +323 -0
  29. package/src/db/analytics.ts +116 -0
  30. package/src/db/annotations.ts +161 -0
  31. package/src/db/checkpoints.ts +166 -0
  32. package/src/db/conversation.ts +241 -0
  33. package/src/db/files.ts +146 -0
  34. package/src/db/graph.ts +250 -0
  35. package/src/db/index.ts +468 -0
  36. package/src/db/search.ts +244 -0
  37. package/src/db/types.ts +85 -0
  38. package/src/embeddings/embed.ts +73 -0
  39. package/src/graph/resolver.ts +305 -0
  40. package/src/indexing/chunker.ts +523 -0
  41. package/src/indexing/indexer.ts +263 -0
  42. package/src/indexing/parse.ts +99 -0
  43. package/src/indexing/watcher.ts +84 -0
  44. package/src/main.ts +8 -0
  45. package/src/search/benchmark.ts +139 -0
  46. package/src/search/eval.ts +171 -0
  47. package/src/search/hybrid.ts +194 -0
  48. package/src/search/reranker.ts +99 -0
  49. package/src/search/usages.ts +27 -0
  50. package/src/server/index.ts +126 -0
  51. package/src/tools/analytics-tools.ts +58 -0
  52. package/src/tools/annotation-tools.ts +89 -0
  53. package/src/tools/checkpoint-tools.ts +147 -0
  54. package/src/tools/conversation-tools.ts +86 -0
  55. package/src/tools/git-tools.ts +103 -0
  56. package/src/tools/graph-tools.ts +163 -0
  57. package/src/tools/index-tools.ts +91 -0
  58. package/src/tools/index.ts +33 -0
  59. package/src/tools/search.ts +238 -0
  60. package/src/types.ts +9 -0
  61. package/src/utils/log.ts +39 -0
@@ -0,0 +1,468 @@
1
+ import { Database } from "bun:sqlite";
2
+ import * as sqliteVec from "sqlite-vec";
3
+ import { EMBEDDING_DIM } from "../embeddings/embed";
4
+ import { join, resolve } from "path";
5
+ import { mkdirSync, existsSync } from "fs";
6
+ import { platform } from "os";
7
+ import { type EmbeddedChunk } from "../types";
8
+
9
+ // Store modules
10
+ import * as fileOps from "./files";
11
+ import * as searchOps from "./search";
12
+ import * as graphOps from "./graph";
13
+ import * as conversationOps from "./conversation";
14
+ import * as checkpointOps from "./checkpoints";
15
+ import * as annotationOps from "./annotations";
16
+ import * as analyticsOps from "./analytics";
17
+
18
+ // Re-export all types so consumers keep importing from "../db"
19
+ export type {
20
+ StoredChunk,
21
+ StoredFile,
22
+ SearchResult,
23
+ ChunkSearchResult,
24
+ UsageResult,
25
+ AnnotationRow,
26
+ SymbolResult,
27
+ CheckpointRow,
28
+ ConversationSearchResult,
29
+ } from "./types";
30
+
31
+ // macOS ships with Apple's SQLite which doesn't support extensions.
32
+ // Point bun:sqlite at Homebrew's vanilla build if available.
33
+ let sqliteLoaded = false;
34
+ function loadCustomSQLite() {
35
+ if (sqliteLoaded) return;
36
+ sqliteLoaded = true;
37
+
38
+ if (platform() !== "darwin") return;
39
+
40
+ const paths = [
41
+ "/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib", // Apple Silicon
42
+ "/usr/local/opt/sqlite/lib/libsqlite3.dylib", // Intel Mac
43
+ ];
44
+
45
+ for (const p of paths) {
46
+ if (existsSync(p)) {
47
+ Database.setCustomSQLite(p);
48
+ return;
49
+ }
50
+ }
51
+
52
+ throw new Error(
53
+ "sqlite-vec requires vanilla SQLite on macOS. Install it with: brew install sqlite"
54
+ );
55
+ }
56
+
57
+ export class RagDB {
58
+ private db: Database;
59
+
60
+ constructor(projectDir: string) {
61
+ loadCustomSQLite();
62
+
63
+ const ragDir = process.env.RAG_DB_DIR
64
+ ? resolve(process.env.RAG_DB_DIR)
65
+ : join(projectDir, ".rag");
66
+
67
+ try {
68
+ mkdirSync(ragDir, { recursive: true });
69
+ } catch (err: any) {
70
+ if (err.code === "EROFS" || err.code === "EACCES") {
71
+ const where = process.env.RAG_DB_DIR
72
+ ? `RAG_DB_DIR path "${ragDir}"`
73
+ : `project directory "${projectDir}"`;
74
+ throw new Error(
75
+ `local-rag: cannot write to ${where} (${err.code}).\n` +
76
+ `Set RAG_DB_DIR to a writable directory in your MCP server config:\n` +
77
+ ` "env": { "RAG_DB_DIR": "/tmp/my-project-rag", "RAG_PROJECT_DIR": "..." }`
78
+ );
79
+ }
80
+ throw err;
81
+ }
82
+
83
+ this.db = new Database(join(ragDir, "index.db"));
84
+ this.db.exec("PRAGMA journal_mode=WAL");
85
+ this.db.exec("PRAGMA busy_timeout = 5000");
86
+ sqliteVec.load(this.db);
87
+ this.initSchema();
88
+ }
89
+
90
+ private initSchema() {
91
+ this.db.exec(`
92
+ CREATE TABLE IF NOT EXISTS files (
93
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
94
+ path TEXT UNIQUE NOT NULL,
95
+ hash TEXT NOT NULL,
96
+ indexed_at TEXT NOT NULL
97
+ );
98
+
99
+ CREATE TABLE IF NOT EXISTS chunks (
100
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
101
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
102
+ chunk_index INTEGER NOT NULL,
103
+ snippet TEXT NOT NULL,
104
+ entity_name TEXT,
105
+ chunk_type TEXT,
106
+ start_line INTEGER,
107
+ end_line INTEGER
108
+ );
109
+
110
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(
111
+ chunk_id INTEGER PRIMARY KEY,
112
+ embedding FLOAT[${EMBEDDING_DIM}]
113
+ );
114
+
115
+ CREATE VIRTUAL TABLE IF NOT EXISTS fts_chunks USING fts5(
116
+ snippet,
117
+ content='chunks',
118
+ content_rowid='id'
119
+ );
120
+
121
+ CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
122
+ INSERT INTO fts_chunks(rowid, snippet) VALUES (new.id, new.snippet);
123
+ END;
124
+ CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
125
+ INSERT INTO fts_chunks(fts_chunks, rowid, snippet) VALUES ('delete', old.id, old.snippet);
126
+ END;
127
+ CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
128
+ INSERT INTO fts_chunks(fts_chunks, rowid, snippet) VALUES ('delete', old.id, old.snippet);
129
+ INSERT INTO fts_chunks(rowid, snippet) VALUES (new.id, new.snippet);
130
+ END;
131
+
132
+ CREATE TABLE IF NOT EXISTS file_imports (
133
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
134
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
135
+ source TEXT NOT NULL,
136
+ names TEXT NOT NULL,
137
+ resolved_file_id INTEGER REFERENCES files(id) ON DELETE SET NULL
138
+ );
139
+
140
+ CREATE TABLE IF NOT EXISTS file_exports (
141
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
142
+ file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
143
+ name TEXT NOT NULL,
144
+ type TEXT NOT NULL
145
+ );
146
+
147
+ CREATE INDEX IF NOT EXISTS idx_file_imports_file ON file_imports(file_id);
148
+ CREATE INDEX IF NOT EXISTS idx_file_imports_resolved ON file_imports(resolved_file_id);
149
+ CREATE INDEX IF NOT EXISTS idx_file_exports_file ON file_exports(file_id);
150
+ CREATE INDEX IF NOT EXISTS idx_file_exports_name ON file_exports(name);
151
+
152
+ CREATE TABLE IF NOT EXISTS conversation_sessions (
153
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
154
+ session_id TEXT UNIQUE NOT NULL,
155
+ jsonl_path TEXT NOT NULL,
156
+ started_at TEXT NOT NULL,
157
+ ended_at TEXT,
158
+ turn_count INTEGER DEFAULT 0,
159
+ total_tokens INTEGER DEFAULT 0,
160
+ indexed_at TEXT NOT NULL,
161
+ file_mtime REAL NOT NULL,
162
+ read_offset INTEGER DEFAULT 0
163
+ );
164
+
165
+ CREATE TABLE IF NOT EXISTS conversation_turns (
166
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
167
+ session_id TEXT NOT NULL,
168
+ turn_index INTEGER NOT NULL,
169
+ timestamp TEXT NOT NULL,
170
+ user_text TEXT,
171
+ assistant_text TEXT,
172
+ tools_used TEXT,
173
+ files_referenced TEXT,
174
+ token_cost INTEGER DEFAULT 0,
175
+ summary TEXT,
176
+ UNIQUE(session_id, turn_index)
177
+ );
178
+
179
+ CREATE TABLE IF NOT EXISTS conversation_chunks (
180
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
181
+ turn_id INTEGER NOT NULL REFERENCES conversation_turns(id) ON DELETE CASCADE,
182
+ chunk_index INTEGER NOT NULL,
183
+ snippet TEXT NOT NULL
184
+ );
185
+
186
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_conversation USING vec0(
187
+ chunk_id INTEGER PRIMARY KEY,
188
+ embedding FLOAT[${EMBEDDING_DIM}]
189
+ );
190
+
191
+ CREATE VIRTUAL TABLE IF NOT EXISTS fts_conversation USING fts5(
192
+ snippet,
193
+ content='conversation_chunks',
194
+ content_rowid='id'
195
+ );
196
+
197
+ CREATE TRIGGER IF NOT EXISTS conv_chunks_ai AFTER INSERT ON conversation_chunks BEGIN
198
+ INSERT INTO fts_conversation(rowid, snippet) VALUES (new.id, new.snippet);
199
+ END;
200
+ CREATE TRIGGER IF NOT EXISTS conv_chunks_ad AFTER DELETE ON conversation_chunks BEGIN
201
+ INSERT INTO fts_conversation(fts_conversation, rowid, snippet) VALUES ('delete', old.id, old.snippet);
202
+ END;
203
+ CREATE TRIGGER IF NOT EXISTS conv_chunks_au AFTER UPDATE ON conversation_chunks BEGIN
204
+ INSERT INTO fts_conversation(fts_conversation, rowid, snippet) VALUES ('delete', old.id, old.snippet);
205
+ INSERT INTO fts_conversation(rowid, snippet) VALUES (new.id, new.snippet);
206
+ END;
207
+
208
+ CREATE INDEX IF NOT EXISTS idx_conv_turns_session ON conversation_turns(session_id);
209
+
210
+ CREATE TABLE IF NOT EXISTS conversation_checkpoints (
211
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
212
+ session_id TEXT NOT NULL,
213
+ turn_index INTEGER NOT NULL,
214
+ timestamp TEXT NOT NULL,
215
+ type TEXT NOT NULL,
216
+ title TEXT NOT NULL,
217
+ summary TEXT NOT NULL,
218
+ files_involved TEXT,
219
+ tags TEXT,
220
+ embedding BLOB
221
+ );
222
+
223
+ CREATE INDEX IF NOT EXISTS idx_checkpoints_session ON conversation_checkpoints(session_id);
224
+ CREATE INDEX IF NOT EXISTS idx_checkpoints_type ON conversation_checkpoints(type);
225
+
226
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_checkpoints USING vec0(
227
+ checkpoint_id INTEGER PRIMARY KEY,
228
+ embedding FLOAT[${EMBEDDING_DIM}]
229
+ );
230
+
231
+ CREATE TABLE IF NOT EXISTS query_log (
232
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
233
+ query TEXT NOT NULL,
234
+ result_count INTEGER NOT NULL,
235
+ top_score REAL,
236
+ top_path TEXT,
237
+ duration_ms INTEGER NOT NULL,
238
+ created_at TEXT NOT NULL
239
+ );
240
+ `);
241
+
242
+ this.db.exec(`
243
+ CREATE TABLE IF NOT EXISTS annotations (
244
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
245
+ path TEXT NOT NULL,
246
+ symbol_name TEXT,
247
+ note TEXT NOT NULL,
248
+ author TEXT,
249
+ created_at TEXT NOT NULL,
250
+ updated_at TEXT NOT NULL
251
+ );
252
+ CREATE INDEX IF NOT EXISTS idx_ann_path ON annotations(path);
253
+
254
+ CREATE VIRTUAL TABLE IF NOT EXISTS fts_annotations USING fts5(
255
+ note,
256
+ content='annotations',
257
+ content_rowid='id'
258
+ );
259
+
260
+ CREATE VIRTUAL TABLE IF NOT EXISTS vec_annotations USING vec0(
261
+ annotation_id INTEGER PRIMARY KEY,
262
+ embedding FLOAT[${EMBEDDING_DIM}]
263
+ );
264
+ `);
265
+
266
+ this.migrateChunksEntityColumns();
267
+ }
268
+
269
+ private migrateChunksEntityColumns() {
270
+ const cols = this.db
271
+ .query<{ name: string }, []>("PRAGMA table_info(chunks)")
272
+ .all()
273
+ .map((c) => c.name);
274
+
275
+ if (!cols.includes("entity_name")) {
276
+ this.db.exec("ALTER TABLE chunks ADD COLUMN entity_name TEXT");
277
+ }
278
+ if (!cols.includes("chunk_type")) {
279
+ this.db.exec("ALTER TABLE chunks ADD COLUMN chunk_type TEXT");
280
+ }
281
+ if (!cols.includes("start_line")) {
282
+ this.db.exec("ALTER TABLE chunks ADD COLUMN start_line INTEGER");
283
+ }
284
+ if (!cols.includes("end_line")) {
285
+ this.db.exec("ALTER TABLE chunks ADD COLUMN end_line INTEGER");
286
+ }
287
+ }
288
+
289
+ // ── File operations ───────────────────────────────────────────
290
+
291
+ getFileByPath(path: string) {
292
+ return fileOps.getFileByPath(this.db, path);
293
+ }
294
+ upsertFileStart(path: string, hash: string) {
295
+ return fileOps.upsertFileStart(this.db, path, hash);
296
+ }
297
+ insertChunkBatch(fileId: number, chunks: EmbeddedChunk[], startIndex: number) {
298
+ fileOps.insertChunkBatch(this.db, fileId, chunks, startIndex);
299
+ }
300
+ upsertFile(path: string, hash: string, chunks: EmbeddedChunk[]) {
301
+ fileOps.upsertFile(this.db, path, hash, chunks);
302
+ }
303
+ removeFile(path: string) {
304
+ return fileOps.removeFile(this.db, path);
305
+ }
306
+ pruneDeleted(existingPaths: Set<string>) {
307
+ return fileOps.pruneDeleted(this.db, existingPaths);
308
+ }
309
+ getAllFilePaths() {
310
+ return fileOps.getAllFilePaths(this.db);
311
+ }
312
+ getStatus() {
313
+ return fileOps.getStatus(this.db);
314
+ }
315
+
316
+ // ── Search operations ─────────────────────────────────────────
317
+
318
+ search(queryEmbedding: Float32Array, topK?: number) {
319
+ return searchOps.vectorSearch(this.db, queryEmbedding, topK);
320
+ }
321
+ textSearch(query: string, topK?: number) {
322
+ return searchOps.textSearch(this.db, query, topK);
323
+ }
324
+ searchChunks(queryEmbedding: Float32Array, topK?: number) {
325
+ return searchOps.vectorSearchChunks(this.db, queryEmbedding, topK);
326
+ }
327
+ textSearchChunks(query: string, topK?: number) {
328
+ return searchOps.textSearchChunks(this.db, query, topK);
329
+ }
330
+ searchSymbols(query: string, exact?: boolean, type?: string, topK?: number) {
331
+ return searchOps.searchSymbols(this.db, query, exact, type, topK);
332
+ }
333
+ findUsages(symbolName: string, exact: boolean, top: number) {
334
+ return searchOps.findUsages(this.db, symbolName, exact, top);
335
+ }
336
+
337
+ // ── Graph operations ──────────────────────────────────────────
338
+
339
+ upsertFileGraph(
340
+ fileId: number,
341
+ imports: { name: string; source: string }[],
342
+ exports: { name: string; type: string }[]
343
+ ) {
344
+ graphOps.upsertFileGraph(this.db, fileId, imports, exports);
345
+ }
346
+ resolveImport(importId: number, resolvedFileId: number) {
347
+ graphOps.resolveImport(this.db, importId, resolvedFileId);
348
+ }
349
+ getUnresolvedImports() {
350
+ return graphOps.getUnresolvedImports(this.db);
351
+ }
352
+ getGraph() {
353
+ return graphOps.getGraph(this.db);
354
+ }
355
+ getSubgraph(fileIds: number[], maxHops?: number) {
356
+ return graphOps.getSubgraph(this.db, fileIds, maxHops);
357
+ }
358
+ getImportsForFile(fileId: number) {
359
+ return graphOps.getImportsForFile(this.db, fileId);
360
+ }
361
+ getImportersOf(fileId: number) {
362
+ return graphOps.getImportersOf(this.db, fileId);
363
+ }
364
+ getDependsOn(fileId: number) {
365
+ return graphOps.getDependsOn(this.db, fileId);
366
+ }
367
+ getDependedOnBy(fileId: number) {
368
+ return graphOps.getDependedOnBy(this.db, fileId);
369
+ }
370
+
371
+ // ── Conversation operations ───────────────────────────────────
372
+
373
+ upsertSession(
374
+ sessionId: string, jsonlPath: string, startedAt: string,
375
+ mtime: number, readOffset: number
376
+ ) {
377
+ conversationOps.upsertSession(this.db, sessionId, jsonlPath, startedAt, mtime, readOffset);
378
+ }
379
+ getSession(sessionId: string) {
380
+ return conversationOps.getSession(this.db, sessionId);
381
+ }
382
+ updateSessionStats(
383
+ sessionId: string, turnCount: number, totalTokens: number, readOffset: number
384
+ ) {
385
+ conversationOps.updateSessionStats(this.db, sessionId, turnCount, totalTokens, readOffset);
386
+ }
387
+ insertTurn(
388
+ sessionId: string, turnIndex: number, timestamp: string,
389
+ userText: string, assistantText: string, toolsUsed: string[],
390
+ filesReferenced: string[], tokenCost: number, summary: string,
391
+ chunks: { snippet: string; embedding: Float32Array }[]
392
+ ) {
393
+ return conversationOps.insertTurn(
394
+ this.db, sessionId, turnIndex, timestamp, userText,
395
+ assistantText, toolsUsed, filesReferenced, tokenCost, summary, chunks
396
+ );
397
+ }
398
+ getTurnCount(sessionId: string) {
399
+ return conversationOps.getTurnCount(this.db, sessionId);
400
+ }
401
+ searchConversation(queryEmbedding: Float32Array, topK?: number, sessionId?: string) {
402
+ return conversationOps.searchConversation(this.db, queryEmbedding, topK, sessionId);
403
+ }
404
+ textSearchConversation(query: string, topK?: number, sessionId?: string) {
405
+ return conversationOps.textSearchConversation(this.db, query, topK, sessionId);
406
+ }
407
+
408
+ // ── Checkpoint operations ─────────────────────────────────────
409
+
410
+ createCheckpoint(
411
+ sessionId: string, turnIndex: number, timestamp: string,
412
+ type: string, title: string, summary: string,
413
+ filesInvolved: string[], tags: string[], embedding: Float32Array
414
+ ) {
415
+ return checkpointOps.createCheckpoint(
416
+ this.db, sessionId, turnIndex, timestamp, type, title,
417
+ summary, filesInvolved, tags, embedding
418
+ );
419
+ }
420
+ listCheckpoints(sessionId?: string, type?: string, limit?: number) {
421
+ return checkpointOps.listCheckpoints(this.db, sessionId, type, limit);
422
+ }
423
+ searchCheckpoints(queryEmbedding: Float32Array, topK?: number, type?: string) {
424
+ return checkpointOps.searchCheckpoints(this.db, queryEmbedding, topK, type);
425
+ }
426
+ getCheckpoint(id: number) {
427
+ return checkpointOps.getCheckpoint(this.db, id);
428
+ }
429
+
430
+ // ── Annotation operations ─────────────────────────────────────
431
+
432
+ upsertAnnotation(
433
+ path: string, note: string, embedding: Float32Array,
434
+ symbolName?: string | null, author?: string | null
435
+ ) {
436
+ return annotationOps.upsertAnnotation(this.db, path, note, embedding, symbolName, author);
437
+ }
438
+ getAnnotations(path?: string, symbolName?: string | null) {
439
+ return annotationOps.getAnnotations(this.db, path, symbolName);
440
+ }
441
+ searchAnnotations(queryEmbedding: Float32Array, topK?: number) {
442
+ return annotationOps.searchAnnotations(this.db, queryEmbedding, topK);
443
+ }
444
+ deleteAnnotation(id: number) {
445
+ return annotationOps.deleteAnnotation(this.db, id);
446
+ }
447
+
448
+ // ── Analytics operations ──────────────────────────────────────
449
+
450
+ logQuery(
451
+ query: string, resultCount: number,
452
+ topScore: number | null, topPath: string | null, durationMs: number
453
+ ) {
454
+ analyticsOps.logQuery(this.db, query, resultCount, topScore, topPath, durationMs);
455
+ }
456
+ getAnalytics(days?: number) {
457
+ return analyticsOps.getAnalytics(this.db, days);
458
+ }
459
+ getAnalyticsTrend(days?: number) {
460
+ return analyticsOps.getAnalyticsTrend(this.db, days);
461
+ }
462
+
463
+ // ── Lifecycle ─────────────────────────────────────────────────
464
+
465
+ close() {
466
+ this.db.close();
467
+ }
468
+ }
@@ -0,0 +1,244 @@
1
+ import { Database } from "bun:sqlite";
2
+ import { type SearchResult, type ChunkSearchResult, type SymbolResult, type UsageResult } from "./types";
3
+ import { escapeRegex, sanitizeFTS } from "../search/usages";
4
+
5
+ export function vectorSearch(db: Database, queryEmbedding: Float32Array, topK: number = 5): SearchResult[] {
6
+ return db
7
+ .query<
8
+ {
9
+ chunk_id: number;
10
+ distance: number;
11
+ snippet: string;
12
+ chunk_index: number;
13
+ entity_name: string | null;
14
+ chunk_type: string | null;
15
+ path: string;
16
+ },
17
+ [Uint8Array, number]
18
+ >(
19
+ `SELECT v.chunk_id, v.distance, c.snippet, c.chunk_index, c.entity_name, c.chunk_type, f.path
20
+ FROM (SELECT chunk_id, distance FROM vec_chunks WHERE embedding MATCH ? ORDER BY distance LIMIT ?) v
21
+ JOIN chunks c ON c.id = v.chunk_id
22
+ JOIN files f ON f.id = c.file_id`
23
+ )
24
+ .all(new Uint8Array(queryEmbedding.buffer), topK)
25
+ .map((row) => ({
26
+ path: row.path,
27
+ score: 1 / (1 + row.distance),
28
+ snippet: row.snippet,
29
+ chunkIndex: row.chunk_index,
30
+ entityName: row.entity_name,
31
+ chunkType: row.chunk_type,
32
+ }));
33
+ }
34
+
35
+ export function textSearch(db: Database, query: string, topK: number = 5): SearchResult[] {
36
+ return db
37
+ .query<
38
+ {
39
+ snippet: string;
40
+ chunk_index: number;
41
+ entity_name: string | null;
42
+ chunk_type: string | null;
43
+ rank: number;
44
+ path: string;
45
+ },
46
+ [string, number]
47
+ >(
48
+ `SELECT c.snippet, c.chunk_index, c.entity_name, c.chunk_type, f.path, rank
49
+ FROM fts_chunks fts
50
+ JOIN chunks c ON c.id = fts.rowid
51
+ JOIN files f ON f.id = c.file_id
52
+ WHERE fts_chunks MATCH ?
53
+ ORDER BY rank
54
+ LIMIT ?`
55
+ )
56
+ .all(sanitizeFTS(query), topK)
57
+ .map((row) => ({
58
+ path: row.path,
59
+ score: 1 / (1 + Math.abs(row.rank)),
60
+ snippet: row.snippet,
61
+ chunkIndex: row.chunk_index,
62
+ entityName: row.entity_name,
63
+ chunkType: row.chunk_type,
64
+ }));
65
+ }
66
+
67
+ export function vectorSearchChunks(db: Database, queryEmbedding: Float32Array, topK: number = 8): ChunkSearchResult[] {
68
+ return db
69
+ .query<
70
+ {
71
+ chunk_id: number;
72
+ distance: number;
73
+ snippet: string;
74
+ chunk_index: number;
75
+ entity_name: string | null;
76
+ chunk_type: string | null;
77
+ start_line: number | null;
78
+ end_line: number | null;
79
+ path: string;
80
+ },
81
+ [Uint8Array, number]
82
+ >(
83
+ `SELECT v.chunk_id, v.distance, c.snippet, c.chunk_index, c.entity_name, c.chunk_type,
84
+ c.start_line, c.end_line, f.path
85
+ FROM (SELECT chunk_id, distance FROM vec_chunks WHERE embedding MATCH ? ORDER BY distance LIMIT ?) v
86
+ JOIN chunks c ON c.id = v.chunk_id
87
+ JOIN files f ON f.id = c.file_id`
88
+ )
89
+ .all(new Uint8Array(queryEmbedding.buffer), topK)
90
+ .map((row) => ({
91
+ path: row.path,
92
+ score: 1 / (1 + row.distance),
93
+ content: row.snippet,
94
+ chunkIndex: row.chunk_index,
95
+ entityName: row.entity_name,
96
+ chunkType: row.chunk_type,
97
+ startLine: row.start_line,
98
+ endLine: row.end_line,
99
+ }));
100
+ }
101
+
102
+ export function textSearchChunks(db: Database, query: string, topK: number = 8): ChunkSearchResult[] {
103
+ return db
104
+ .query<
105
+ {
106
+ snippet: string;
107
+ chunk_index: number;
108
+ entity_name: string | null;
109
+ chunk_type: string | null;
110
+ start_line: number | null;
111
+ end_line: number | null;
112
+ rank: number;
113
+ path: string;
114
+ },
115
+ [string, number]
116
+ >(
117
+ `SELECT c.snippet, c.chunk_index, c.entity_name, c.chunk_type, c.start_line, c.end_line,
118
+ f.path, rank
119
+ FROM fts_chunks fts
120
+ JOIN chunks c ON c.id = fts.rowid
121
+ JOIN files f ON f.id = c.file_id
122
+ WHERE fts_chunks MATCH ?
123
+ ORDER BY rank
124
+ LIMIT ?`
125
+ )
126
+ .all(sanitizeFTS(query), topK)
127
+ .map((row) => ({
128
+ path: row.path,
129
+ score: 1 / (1 + Math.abs(row.rank)),
130
+ content: row.snippet,
131
+ chunkIndex: row.chunk_index,
132
+ entityName: row.entity_name,
133
+ chunkType: row.chunk_type,
134
+ startLine: row.start_line,
135
+ endLine: row.end_line,
136
+ }));
137
+ }
138
+
139
+ export function searchSymbols(
140
+ db: Database,
141
+ query: string,
142
+ exact: boolean = false,
143
+ type?: string,
144
+ topK: number = 20
145
+ ): SymbolResult[] {
146
+ const pattern = exact ? query : `%${query}%`;
147
+
148
+ let sql = `
149
+ SELECT fe.name AS symbol_name, fe.type AS symbol_type, f.path,
150
+ (SELECT snippet FROM chunks
151
+ WHERE file_id = fe.file_id AND LOWER(entity_name) = LOWER(fe.name)
152
+ ORDER BY chunk_index LIMIT 1) AS snippet,
153
+ (SELECT chunk_index FROM chunks
154
+ WHERE file_id = fe.file_id AND LOWER(entity_name) = LOWER(fe.name)
155
+ ORDER BY chunk_index LIMIT 1) AS chunk_index
156
+ FROM file_exports fe
157
+ JOIN files f ON f.id = fe.file_id
158
+ WHERE LOWER(fe.name) LIKE LOWER(?)
159
+ `;
160
+ const params: (string | number)[] = [pattern];
161
+
162
+ if (type) {
163
+ sql += " AND fe.type = ?";
164
+ params.push(type);
165
+ }
166
+
167
+ sql += " ORDER BY fe.name LIMIT ?";
168
+ params.push(topK);
169
+
170
+ return db
171
+ .query<{ symbol_name: string; symbol_type: string; path: string; snippet: string | null; chunk_index: number | null }, any[]>(sql)
172
+ .all(...params)
173
+ .map((r) => ({
174
+ path: r.path,
175
+ symbolName: r.symbol_name,
176
+ symbolType: r.symbol_type,
177
+ snippet: r.snippet,
178
+ chunkIndex: r.chunk_index,
179
+ }));
180
+ }
181
+
182
+ export function findUsages(db: Database, symbolName: string, exact: boolean, top: number): UsageResult[] {
183
+ const definingFileIds = new Set(
184
+ db
185
+ .query<{ file_id: number }, [string]>(
186
+ "SELECT file_id FROM file_exports WHERE LOWER(name) = LOWER(?)"
187
+ )
188
+ .all(symbolName)
189
+ .map((r) => r.file_id)
190
+ );
191
+
192
+ let rows: { id: number; snippet: string; file_id: number; chunk_index: number; start_line: number | null; path: string }[] = [];
193
+ try {
194
+ const ftsQuery = `"${symbolName.replace(/"/g, '""')}"`;
195
+ rows = db
196
+ .query<
197
+ { id: number; snippet: string; file_id: number; chunk_index: number; start_line: number | null; path: string },
198
+ [string, number]
199
+ >(
200
+ `SELECT c.id, c.snippet, c.file_id, c.chunk_index, c.start_line, f.path
201
+ FROM fts_chunks fts
202
+ JOIN chunks c ON c.id = fts.rowid
203
+ JOIN files f ON f.id = c.file_id
204
+ WHERE fts_chunks MATCH ?
205
+ ORDER BY rank
206
+ LIMIT ?`
207
+ )
208
+ .all(ftsQuery, top * 5);
209
+ } catch {
210
+ return [];
211
+ }
212
+
213
+ const pattern = exact
214
+ ? new RegExp(`\\b${escapeRegex(symbolName)}\\b`, "i")
215
+ : new RegExp(`\\b${escapeRegex(symbolName)}`, "i");
216
+
217
+ const results: UsageResult[] = [];
218
+
219
+ for (const row of rows) {
220
+ if (definingFileIds.has(row.file_id)) continue;
221
+
222
+ const lines = row.snippet.split("\n");
223
+ let matchOffset = -1;
224
+ let matchSnippet = row.snippet.slice(0, 120).trim();
225
+
226
+ for (let i = 0; i < lines.length; i++) {
227
+ if (pattern.test(lines[i])) {
228
+ matchOffset = i;
229
+ matchSnippet = lines[i].trim();
230
+ break;
231
+ }
232
+ }
233
+
234
+ const line =
235
+ row.start_line != null && matchOffset >= 0
236
+ ? row.start_line + matchOffset
237
+ : row.start_line;
238
+
239
+ results.push({ path: row.path, line, snippet: matchSnippet });
240
+ if (results.length >= top) break;
241
+ }
242
+
243
+ return results;
244
+ }