@winci/local-rag 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +24 -0
- package/.mcp.json +11 -0
- package/LICENSE +21 -0
- package/README.md +567 -0
- package/hooks/hooks.json +25 -0
- package/hooks/scripts/reindex-file.sh +19 -0
- package/hooks/scripts/session-start.sh +11 -0
- package/package.json +52 -0
- package/skills/local-rag/SKILL.md +42 -0
- package/src/cli/commands/analytics.ts +58 -0
- package/src/cli/commands/benchmark.ts +30 -0
- package/src/cli/commands/checkpoint.ts +85 -0
- package/src/cli/commands/conversation.ts +102 -0
- package/src/cli/commands/demo.ts +119 -0
- package/src/cli/commands/eval.ts +31 -0
- package/src/cli/commands/index-cmd.ts +26 -0
- package/src/cli/commands/init.ts +35 -0
- package/src/cli/commands/map.ts +21 -0
- package/src/cli/commands/remove.ts +15 -0
- package/src/cli/commands/search-cmd.ts +59 -0
- package/src/cli/commands/serve.ts +5 -0
- package/src/cli/commands/status.ts +13 -0
- package/src/cli/index.ts +117 -0
- package/src/cli/progress.ts +21 -0
- package/src/cli/setup.ts +192 -0
- package/src/config/index.ts +101 -0
- package/src/conversation/indexer.ts +147 -0
- package/src/conversation/parser.ts +323 -0
- package/src/db/analytics.ts +116 -0
- package/src/db/annotations.ts +161 -0
- package/src/db/checkpoints.ts +166 -0
- package/src/db/conversation.ts +241 -0
- package/src/db/files.ts +146 -0
- package/src/db/graph.ts +250 -0
- package/src/db/index.ts +468 -0
- package/src/db/search.ts +244 -0
- package/src/db/types.ts +85 -0
- package/src/embeddings/embed.ts +73 -0
- package/src/graph/resolver.ts +305 -0
- package/src/indexing/chunker.ts +523 -0
- package/src/indexing/indexer.ts +263 -0
- package/src/indexing/parse.ts +99 -0
- package/src/indexing/watcher.ts +84 -0
- package/src/main.ts +8 -0
- package/src/search/benchmark.ts +139 -0
- package/src/search/eval.ts +171 -0
- package/src/search/hybrid.ts +194 -0
- package/src/search/reranker.ts +99 -0
- package/src/search/usages.ts +27 -0
- package/src/server/index.ts +126 -0
- package/src/tools/analytics-tools.ts +58 -0
- package/src/tools/annotation-tools.ts +89 -0
- package/src/tools/checkpoint-tools.ts +147 -0
- package/src/tools/conversation-tools.ts +86 -0
- package/src/tools/git-tools.ts +103 -0
- package/src/tools/graph-tools.ts +163 -0
- package/src/tools/index-tools.ts +91 -0
- package/src/tools/index.ts +33 -0
- package/src/tools/search.ts +238 -0
- package/src/types.ts +9 -0
- package/src/utils/log.ts +39 -0
package/src/db/index.ts
ADDED
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
import { Database } from "bun:sqlite";
|
|
2
|
+
import * as sqliteVec from "sqlite-vec";
|
|
3
|
+
import { EMBEDDING_DIM } from "../embeddings/embed";
|
|
4
|
+
import { join, resolve } from "path";
|
|
5
|
+
import { mkdirSync, existsSync } from "fs";
|
|
6
|
+
import { platform } from "os";
|
|
7
|
+
import { type EmbeddedChunk } from "../types";
|
|
8
|
+
|
|
9
|
+
// Store modules
|
|
10
|
+
import * as fileOps from "./files";
|
|
11
|
+
import * as searchOps from "./search";
|
|
12
|
+
import * as graphOps from "./graph";
|
|
13
|
+
import * as conversationOps from "./conversation";
|
|
14
|
+
import * as checkpointOps from "./checkpoints";
|
|
15
|
+
import * as annotationOps from "./annotations";
|
|
16
|
+
import * as analyticsOps from "./analytics";
|
|
17
|
+
|
|
18
|
+
// Re-export all types so consumers keep importing from "../db"
|
|
19
|
+
export type {
|
|
20
|
+
StoredChunk,
|
|
21
|
+
StoredFile,
|
|
22
|
+
SearchResult,
|
|
23
|
+
ChunkSearchResult,
|
|
24
|
+
UsageResult,
|
|
25
|
+
AnnotationRow,
|
|
26
|
+
SymbolResult,
|
|
27
|
+
CheckpointRow,
|
|
28
|
+
ConversationSearchResult,
|
|
29
|
+
} from "./types";
|
|
30
|
+
|
|
31
|
+
// macOS ships with Apple's SQLite which doesn't support extensions.
|
|
32
|
+
// Point bun:sqlite at Homebrew's vanilla build if available.
|
|
33
|
+
let sqliteLoaded = false;
|
|
34
|
+
function loadCustomSQLite() {
|
|
35
|
+
if (sqliteLoaded) return;
|
|
36
|
+
sqliteLoaded = true;
|
|
37
|
+
|
|
38
|
+
if (platform() !== "darwin") return;
|
|
39
|
+
|
|
40
|
+
const paths = [
|
|
41
|
+
"/opt/homebrew/opt/sqlite/lib/libsqlite3.dylib", // Apple Silicon
|
|
42
|
+
"/usr/local/opt/sqlite/lib/libsqlite3.dylib", // Intel Mac
|
|
43
|
+
];
|
|
44
|
+
|
|
45
|
+
for (const p of paths) {
|
|
46
|
+
if (existsSync(p)) {
|
|
47
|
+
Database.setCustomSQLite(p);
|
|
48
|
+
return;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
throw new Error(
|
|
53
|
+
"sqlite-vec requires vanilla SQLite on macOS. Install it with: brew install sqlite"
|
|
54
|
+
);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export class RagDB {
|
|
58
|
+
private db: Database;
|
|
59
|
+
|
|
60
|
+
constructor(projectDir: string) {
|
|
61
|
+
loadCustomSQLite();
|
|
62
|
+
|
|
63
|
+
const ragDir = process.env.RAG_DB_DIR
|
|
64
|
+
? resolve(process.env.RAG_DB_DIR)
|
|
65
|
+
: join(projectDir, ".rag");
|
|
66
|
+
|
|
67
|
+
try {
|
|
68
|
+
mkdirSync(ragDir, { recursive: true });
|
|
69
|
+
} catch (err: any) {
|
|
70
|
+
if (err.code === "EROFS" || err.code === "EACCES") {
|
|
71
|
+
const where = process.env.RAG_DB_DIR
|
|
72
|
+
? `RAG_DB_DIR path "${ragDir}"`
|
|
73
|
+
: `project directory "${projectDir}"`;
|
|
74
|
+
throw new Error(
|
|
75
|
+
`local-rag: cannot write to ${where} (${err.code}).\n` +
|
|
76
|
+
`Set RAG_DB_DIR to a writable directory in your MCP server config:\n` +
|
|
77
|
+
` "env": { "RAG_DB_DIR": "/tmp/my-project-rag", "RAG_PROJECT_DIR": "..." }`
|
|
78
|
+
);
|
|
79
|
+
}
|
|
80
|
+
throw err;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
this.db = new Database(join(ragDir, "index.db"));
|
|
84
|
+
this.db.exec("PRAGMA journal_mode=WAL");
|
|
85
|
+
this.db.exec("PRAGMA busy_timeout = 5000");
|
|
86
|
+
sqliteVec.load(this.db);
|
|
87
|
+
this.initSchema();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
private initSchema() {
|
|
91
|
+
this.db.exec(`
|
|
92
|
+
CREATE TABLE IF NOT EXISTS files (
|
|
93
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
94
|
+
path TEXT UNIQUE NOT NULL,
|
|
95
|
+
hash TEXT NOT NULL,
|
|
96
|
+
indexed_at TEXT NOT NULL
|
|
97
|
+
);
|
|
98
|
+
|
|
99
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
100
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
101
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
102
|
+
chunk_index INTEGER NOT NULL,
|
|
103
|
+
snippet TEXT NOT NULL,
|
|
104
|
+
entity_name TEXT,
|
|
105
|
+
chunk_type TEXT,
|
|
106
|
+
start_line INTEGER,
|
|
107
|
+
end_line INTEGER
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vec_chunks USING vec0(
|
|
111
|
+
chunk_id INTEGER PRIMARY KEY,
|
|
112
|
+
embedding FLOAT[${EMBEDDING_DIM}]
|
|
113
|
+
);
|
|
114
|
+
|
|
115
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS fts_chunks USING fts5(
|
|
116
|
+
snippet,
|
|
117
|
+
content='chunks',
|
|
118
|
+
content_rowid='id'
|
|
119
|
+
);
|
|
120
|
+
|
|
121
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ai AFTER INSERT ON chunks BEGIN
|
|
122
|
+
INSERT INTO fts_chunks(rowid, snippet) VALUES (new.id, new.snippet);
|
|
123
|
+
END;
|
|
124
|
+
CREATE TRIGGER IF NOT EXISTS chunks_ad AFTER DELETE ON chunks BEGIN
|
|
125
|
+
INSERT INTO fts_chunks(fts_chunks, rowid, snippet) VALUES ('delete', old.id, old.snippet);
|
|
126
|
+
END;
|
|
127
|
+
CREATE TRIGGER IF NOT EXISTS chunks_au AFTER UPDATE ON chunks BEGIN
|
|
128
|
+
INSERT INTO fts_chunks(fts_chunks, rowid, snippet) VALUES ('delete', old.id, old.snippet);
|
|
129
|
+
INSERT INTO fts_chunks(rowid, snippet) VALUES (new.id, new.snippet);
|
|
130
|
+
END;
|
|
131
|
+
|
|
132
|
+
CREATE TABLE IF NOT EXISTS file_imports (
|
|
133
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
134
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
135
|
+
source TEXT NOT NULL,
|
|
136
|
+
names TEXT NOT NULL,
|
|
137
|
+
resolved_file_id INTEGER REFERENCES files(id) ON DELETE SET NULL
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
CREATE TABLE IF NOT EXISTS file_exports (
|
|
141
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
142
|
+
file_id INTEGER NOT NULL REFERENCES files(id) ON DELETE CASCADE,
|
|
143
|
+
name TEXT NOT NULL,
|
|
144
|
+
type TEXT NOT NULL
|
|
145
|
+
);
|
|
146
|
+
|
|
147
|
+
CREATE INDEX IF NOT EXISTS idx_file_imports_file ON file_imports(file_id);
|
|
148
|
+
CREATE INDEX IF NOT EXISTS idx_file_imports_resolved ON file_imports(resolved_file_id);
|
|
149
|
+
CREATE INDEX IF NOT EXISTS idx_file_exports_file ON file_exports(file_id);
|
|
150
|
+
CREATE INDEX IF NOT EXISTS idx_file_exports_name ON file_exports(name);
|
|
151
|
+
|
|
152
|
+
CREATE TABLE IF NOT EXISTS conversation_sessions (
|
|
153
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
154
|
+
session_id TEXT UNIQUE NOT NULL,
|
|
155
|
+
jsonl_path TEXT NOT NULL,
|
|
156
|
+
started_at TEXT NOT NULL,
|
|
157
|
+
ended_at TEXT,
|
|
158
|
+
turn_count INTEGER DEFAULT 0,
|
|
159
|
+
total_tokens INTEGER DEFAULT 0,
|
|
160
|
+
indexed_at TEXT NOT NULL,
|
|
161
|
+
file_mtime REAL NOT NULL,
|
|
162
|
+
read_offset INTEGER DEFAULT 0
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
CREATE TABLE IF NOT EXISTS conversation_turns (
|
|
166
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
167
|
+
session_id TEXT NOT NULL,
|
|
168
|
+
turn_index INTEGER NOT NULL,
|
|
169
|
+
timestamp TEXT NOT NULL,
|
|
170
|
+
user_text TEXT,
|
|
171
|
+
assistant_text TEXT,
|
|
172
|
+
tools_used TEXT,
|
|
173
|
+
files_referenced TEXT,
|
|
174
|
+
token_cost INTEGER DEFAULT 0,
|
|
175
|
+
summary TEXT,
|
|
176
|
+
UNIQUE(session_id, turn_index)
|
|
177
|
+
);
|
|
178
|
+
|
|
179
|
+
CREATE TABLE IF NOT EXISTS conversation_chunks (
|
|
180
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
181
|
+
turn_id INTEGER NOT NULL REFERENCES conversation_turns(id) ON DELETE CASCADE,
|
|
182
|
+
chunk_index INTEGER NOT NULL,
|
|
183
|
+
snippet TEXT NOT NULL
|
|
184
|
+
);
|
|
185
|
+
|
|
186
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vec_conversation USING vec0(
|
|
187
|
+
chunk_id INTEGER PRIMARY KEY,
|
|
188
|
+
embedding FLOAT[${EMBEDDING_DIM}]
|
|
189
|
+
);
|
|
190
|
+
|
|
191
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS fts_conversation USING fts5(
|
|
192
|
+
snippet,
|
|
193
|
+
content='conversation_chunks',
|
|
194
|
+
content_rowid='id'
|
|
195
|
+
);
|
|
196
|
+
|
|
197
|
+
CREATE TRIGGER IF NOT EXISTS conv_chunks_ai AFTER INSERT ON conversation_chunks BEGIN
|
|
198
|
+
INSERT INTO fts_conversation(rowid, snippet) VALUES (new.id, new.snippet);
|
|
199
|
+
END;
|
|
200
|
+
CREATE TRIGGER IF NOT EXISTS conv_chunks_ad AFTER DELETE ON conversation_chunks BEGIN
|
|
201
|
+
INSERT INTO fts_conversation(fts_conversation, rowid, snippet) VALUES ('delete', old.id, old.snippet);
|
|
202
|
+
END;
|
|
203
|
+
CREATE TRIGGER IF NOT EXISTS conv_chunks_au AFTER UPDATE ON conversation_chunks BEGIN
|
|
204
|
+
INSERT INTO fts_conversation(fts_conversation, rowid, snippet) VALUES ('delete', old.id, old.snippet);
|
|
205
|
+
INSERT INTO fts_conversation(rowid, snippet) VALUES (new.id, new.snippet);
|
|
206
|
+
END;
|
|
207
|
+
|
|
208
|
+
CREATE INDEX IF NOT EXISTS idx_conv_turns_session ON conversation_turns(session_id);
|
|
209
|
+
|
|
210
|
+
CREATE TABLE IF NOT EXISTS conversation_checkpoints (
|
|
211
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
212
|
+
session_id TEXT NOT NULL,
|
|
213
|
+
turn_index INTEGER NOT NULL,
|
|
214
|
+
timestamp TEXT NOT NULL,
|
|
215
|
+
type TEXT NOT NULL,
|
|
216
|
+
title TEXT NOT NULL,
|
|
217
|
+
summary TEXT NOT NULL,
|
|
218
|
+
files_involved TEXT,
|
|
219
|
+
tags TEXT,
|
|
220
|
+
embedding BLOB
|
|
221
|
+
);
|
|
222
|
+
|
|
223
|
+
CREATE INDEX IF NOT EXISTS idx_checkpoints_session ON conversation_checkpoints(session_id);
|
|
224
|
+
CREATE INDEX IF NOT EXISTS idx_checkpoints_type ON conversation_checkpoints(type);
|
|
225
|
+
|
|
226
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vec_checkpoints USING vec0(
|
|
227
|
+
checkpoint_id INTEGER PRIMARY KEY,
|
|
228
|
+
embedding FLOAT[${EMBEDDING_DIM}]
|
|
229
|
+
);
|
|
230
|
+
|
|
231
|
+
CREATE TABLE IF NOT EXISTS query_log (
|
|
232
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
233
|
+
query TEXT NOT NULL,
|
|
234
|
+
result_count INTEGER NOT NULL,
|
|
235
|
+
top_score REAL,
|
|
236
|
+
top_path TEXT,
|
|
237
|
+
duration_ms INTEGER NOT NULL,
|
|
238
|
+
created_at TEXT NOT NULL
|
|
239
|
+
);
|
|
240
|
+
`);
|
|
241
|
+
|
|
242
|
+
this.db.exec(`
|
|
243
|
+
CREATE TABLE IF NOT EXISTS annotations (
|
|
244
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
245
|
+
path TEXT NOT NULL,
|
|
246
|
+
symbol_name TEXT,
|
|
247
|
+
note TEXT NOT NULL,
|
|
248
|
+
author TEXT,
|
|
249
|
+
created_at TEXT NOT NULL,
|
|
250
|
+
updated_at TEXT NOT NULL
|
|
251
|
+
);
|
|
252
|
+
CREATE INDEX IF NOT EXISTS idx_ann_path ON annotations(path);
|
|
253
|
+
|
|
254
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS fts_annotations USING fts5(
|
|
255
|
+
note,
|
|
256
|
+
content='annotations',
|
|
257
|
+
content_rowid='id'
|
|
258
|
+
);
|
|
259
|
+
|
|
260
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS vec_annotations USING vec0(
|
|
261
|
+
annotation_id INTEGER PRIMARY KEY,
|
|
262
|
+
embedding FLOAT[${EMBEDDING_DIM}]
|
|
263
|
+
);
|
|
264
|
+
`);
|
|
265
|
+
|
|
266
|
+
this.migrateChunksEntityColumns();
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
private migrateChunksEntityColumns() {
|
|
270
|
+
const cols = this.db
|
|
271
|
+
.query<{ name: string }, []>("PRAGMA table_info(chunks)")
|
|
272
|
+
.all()
|
|
273
|
+
.map((c) => c.name);
|
|
274
|
+
|
|
275
|
+
if (!cols.includes("entity_name")) {
|
|
276
|
+
this.db.exec("ALTER TABLE chunks ADD COLUMN entity_name TEXT");
|
|
277
|
+
}
|
|
278
|
+
if (!cols.includes("chunk_type")) {
|
|
279
|
+
this.db.exec("ALTER TABLE chunks ADD COLUMN chunk_type TEXT");
|
|
280
|
+
}
|
|
281
|
+
if (!cols.includes("start_line")) {
|
|
282
|
+
this.db.exec("ALTER TABLE chunks ADD COLUMN start_line INTEGER");
|
|
283
|
+
}
|
|
284
|
+
if (!cols.includes("end_line")) {
|
|
285
|
+
this.db.exec("ALTER TABLE chunks ADD COLUMN end_line INTEGER");
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// ── File operations ───────────────────────────────────────────
|
|
290
|
+
|
|
291
|
+
getFileByPath(path: string) {
|
|
292
|
+
return fileOps.getFileByPath(this.db, path);
|
|
293
|
+
}
|
|
294
|
+
upsertFileStart(path: string, hash: string) {
|
|
295
|
+
return fileOps.upsertFileStart(this.db, path, hash);
|
|
296
|
+
}
|
|
297
|
+
insertChunkBatch(fileId: number, chunks: EmbeddedChunk[], startIndex: number) {
|
|
298
|
+
fileOps.insertChunkBatch(this.db, fileId, chunks, startIndex);
|
|
299
|
+
}
|
|
300
|
+
upsertFile(path: string, hash: string, chunks: EmbeddedChunk[]) {
|
|
301
|
+
fileOps.upsertFile(this.db, path, hash, chunks);
|
|
302
|
+
}
|
|
303
|
+
removeFile(path: string) {
|
|
304
|
+
return fileOps.removeFile(this.db, path);
|
|
305
|
+
}
|
|
306
|
+
pruneDeleted(existingPaths: Set<string>) {
|
|
307
|
+
return fileOps.pruneDeleted(this.db, existingPaths);
|
|
308
|
+
}
|
|
309
|
+
getAllFilePaths() {
|
|
310
|
+
return fileOps.getAllFilePaths(this.db);
|
|
311
|
+
}
|
|
312
|
+
getStatus() {
|
|
313
|
+
return fileOps.getStatus(this.db);
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// ── Search operations ─────────────────────────────────────────
|
|
317
|
+
|
|
318
|
+
search(queryEmbedding: Float32Array, topK?: number) {
|
|
319
|
+
return searchOps.vectorSearch(this.db, queryEmbedding, topK);
|
|
320
|
+
}
|
|
321
|
+
textSearch(query: string, topK?: number) {
|
|
322
|
+
return searchOps.textSearch(this.db, query, topK);
|
|
323
|
+
}
|
|
324
|
+
searchChunks(queryEmbedding: Float32Array, topK?: number) {
|
|
325
|
+
return searchOps.vectorSearchChunks(this.db, queryEmbedding, topK);
|
|
326
|
+
}
|
|
327
|
+
textSearchChunks(query: string, topK?: number) {
|
|
328
|
+
return searchOps.textSearchChunks(this.db, query, topK);
|
|
329
|
+
}
|
|
330
|
+
searchSymbols(query: string, exact?: boolean, type?: string, topK?: number) {
|
|
331
|
+
return searchOps.searchSymbols(this.db, query, exact, type, topK);
|
|
332
|
+
}
|
|
333
|
+
findUsages(symbolName: string, exact: boolean, top: number) {
|
|
334
|
+
return searchOps.findUsages(this.db, symbolName, exact, top);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
// ── Graph operations ──────────────────────────────────────────
|
|
338
|
+
|
|
339
|
+
upsertFileGraph(
|
|
340
|
+
fileId: number,
|
|
341
|
+
imports: { name: string; source: string }[],
|
|
342
|
+
exports: { name: string; type: string }[]
|
|
343
|
+
) {
|
|
344
|
+
graphOps.upsertFileGraph(this.db, fileId, imports, exports);
|
|
345
|
+
}
|
|
346
|
+
resolveImport(importId: number, resolvedFileId: number) {
|
|
347
|
+
graphOps.resolveImport(this.db, importId, resolvedFileId);
|
|
348
|
+
}
|
|
349
|
+
getUnresolvedImports() {
|
|
350
|
+
return graphOps.getUnresolvedImports(this.db);
|
|
351
|
+
}
|
|
352
|
+
getGraph() {
|
|
353
|
+
return graphOps.getGraph(this.db);
|
|
354
|
+
}
|
|
355
|
+
getSubgraph(fileIds: number[], maxHops?: number) {
|
|
356
|
+
return graphOps.getSubgraph(this.db, fileIds, maxHops);
|
|
357
|
+
}
|
|
358
|
+
getImportsForFile(fileId: number) {
|
|
359
|
+
return graphOps.getImportsForFile(this.db, fileId);
|
|
360
|
+
}
|
|
361
|
+
getImportersOf(fileId: number) {
|
|
362
|
+
return graphOps.getImportersOf(this.db, fileId);
|
|
363
|
+
}
|
|
364
|
+
getDependsOn(fileId: number) {
|
|
365
|
+
return graphOps.getDependsOn(this.db, fileId);
|
|
366
|
+
}
|
|
367
|
+
getDependedOnBy(fileId: number) {
|
|
368
|
+
return graphOps.getDependedOnBy(this.db, fileId);
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
// ── Conversation operations ───────────────────────────────────
|
|
372
|
+
|
|
373
|
+
upsertSession(
|
|
374
|
+
sessionId: string, jsonlPath: string, startedAt: string,
|
|
375
|
+
mtime: number, readOffset: number
|
|
376
|
+
) {
|
|
377
|
+
conversationOps.upsertSession(this.db, sessionId, jsonlPath, startedAt, mtime, readOffset);
|
|
378
|
+
}
|
|
379
|
+
getSession(sessionId: string) {
|
|
380
|
+
return conversationOps.getSession(this.db, sessionId);
|
|
381
|
+
}
|
|
382
|
+
updateSessionStats(
|
|
383
|
+
sessionId: string, turnCount: number, totalTokens: number, readOffset: number
|
|
384
|
+
) {
|
|
385
|
+
conversationOps.updateSessionStats(this.db, sessionId, turnCount, totalTokens, readOffset);
|
|
386
|
+
}
|
|
387
|
+
insertTurn(
|
|
388
|
+
sessionId: string, turnIndex: number, timestamp: string,
|
|
389
|
+
userText: string, assistantText: string, toolsUsed: string[],
|
|
390
|
+
filesReferenced: string[], tokenCost: number, summary: string,
|
|
391
|
+
chunks: { snippet: string; embedding: Float32Array }[]
|
|
392
|
+
) {
|
|
393
|
+
return conversationOps.insertTurn(
|
|
394
|
+
this.db, sessionId, turnIndex, timestamp, userText,
|
|
395
|
+
assistantText, toolsUsed, filesReferenced, tokenCost, summary, chunks
|
|
396
|
+
);
|
|
397
|
+
}
|
|
398
|
+
getTurnCount(sessionId: string) {
|
|
399
|
+
return conversationOps.getTurnCount(this.db, sessionId);
|
|
400
|
+
}
|
|
401
|
+
searchConversation(queryEmbedding: Float32Array, topK?: number, sessionId?: string) {
|
|
402
|
+
return conversationOps.searchConversation(this.db, queryEmbedding, topK, sessionId);
|
|
403
|
+
}
|
|
404
|
+
textSearchConversation(query: string, topK?: number, sessionId?: string) {
|
|
405
|
+
return conversationOps.textSearchConversation(this.db, query, topK, sessionId);
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// ── Checkpoint operations ─────────────────────────────────────
|
|
409
|
+
|
|
410
|
+
createCheckpoint(
|
|
411
|
+
sessionId: string, turnIndex: number, timestamp: string,
|
|
412
|
+
type: string, title: string, summary: string,
|
|
413
|
+
filesInvolved: string[], tags: string[], embedding: Float32Array
|
|
414
|
+
) {
|
|
415
|
+
return checkpointOps.createCheckpoint(
|
|
416
|
+
this.db, sessionId, turnIndex, timestamp, type, title,
|
|
417
|
+
summary, filesInvolved, tags, embedding
|
|
418
|
+
);
|
|
419
|
+
}
|
|
420
|
+
listCheckpoints(sessionId?: string, type?: string, limit?: number) {
|
|
421
|
+
return checkpointOps.listCheckpoints(this.db, sessionId, type, limit);
|
|
422
|
+
}
|
|
423
|
+
searchCheckpoints(queryEmbedding: Float32Array, topK?: number, type?: string) {
|
|
424
|
+
return checkpointOps.searchCheckpoints(this.db, queryEmbedding, topK, type);
|
|
425
|
+
}
|
|
426
|
+
getCheckpoint(id: number) {
|
|
427
|
+
return checkpointOps.getCheckpoint(this.db, id);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
// ── Annotation operations ─────────────────────────────────────
|
|
431
|
+
|
|
432
|
+
upsertAnnotation(
|
|
433
|
+
path: string, note: string, embedding: Float32Array,
|
|
434
|
+
symbolName?: string | null, author?: string | null
|
|
435
|
+
) {
|
|
436
|
+
return annotationOps.upsertAnnotation(this.db, path, note, embedding, symbolName, author);
|
|
437
|
+
}
|
|
438
|
+
getAnnotations(path?: string, symbolName?: string | null) {
|
|
439
|
+
return annotationOps.getAnnotations(this.db, path, symbolName);
|
|
440
|
+
}
|
|
441
|
+
searchAnnotations(queryEmbedding: Float32Array, topK?: number) {
|
|
442
|
+
return annotationOps.searchAnnotations(this.db, queryEmbedding, topK);
|
|
443
|
+
}
|
|
444
|
+
deleteAnnotation(id: number) {
|
|
445
|
+
return annotationOps.deleteAnnotation(this.db, id);
|
|
446
|
+
}
|
|
447
|
+
|
|
448
|
+
// ── Analytics operations ──────────────────────────────────────
|
|
449
|
+
|
|
450
|
+
logQuery(
|
|
451
|
+
query: string, resultCount: number,
|
|
452
|
+
topScore: number | null, topPath: string | null, durationMs: number
|
|
453
|
+
) {
|
|
454
|
+
analyticsOps.logQuery(this.db, query, resultCount, topScore, topPath, durationMs);
|
|
455
|
+
}
|
|
456
|
+
getAnalytics(days?: number) {
|
|
457
|
+
return analyticsOps.getAnalytics(this.db, days);
|
|
458
|
+
}
|
|
459
|
+
getAnalyticsTrend(days?: number) {
|
|
460
|
+
return analyticsOps.getAnalyticsTrend(this.db, days);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
// ── Lifecycle ─────────────────────────────────────────────────
|
|
464
|
+
|
|
465
|
+
close() {
|
|
466
|
+
this.db.close();
|
|
467
|
+
}
|
|
468
|
+
}
|
package/src/db/search.ts
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
1
|
+
import { Database } from "bun:sqlite";
|
|
2
|
+
import { type SearchResult, type ChunkSearchResult, type SymbolResult, type UsageResult } from "./types";
|
|
3
|
+
import { escapeRegex, sanitizeFTS } from "../search/usages";
|
|
4
|
+
|
|
5
|
+
export function vectorSearch(db: Database, queryEmbedding: Float32Array, topK: number = 5): SearchResult[] {
|
|
6
|
+
return db
|
|
7
|
+
.query<
|
|
8
|
+
{
|
|
9
|
+
chunk_id: number;
|
|
10
|
+
distance: number;
|
|
11
|
+
snippet: string;
|
|
12
|
+
chunk_index: number;
|
|
13
|
+
entity_name: string | null;
|
|
14
|
+
chunk_type: string | null;
|
|
15
|
+
path: string;
|
|
16
|
+
},
|
|
17
|
+
[Uint8Array, number]
|
|
18
|
+
>(
|
|
19
|
+
`SELECT v.chunk_id, v.distance, c.snippet, c.chunk_index, c.entity_name, c.chunk_type, f.path
|
|
20
|
+
FROM (SELECT chunk_id, distance FROM vec_chunks WHERE embedding MATCH ? ORDER BY distance LIMIT ?) v
|
|
21
|
+
JOIN chunks c ON c.id = v.chunk_id
|
|
22
|
+
JOIN files f ON f.id = c.file_id`
|
|
23
|
+
)
|
|
24
|
+
.all(new Uint8Array(queryEmbedding.buffer), topK)
|
|
25
|
+
.map((row) => ({
|
|
26
|
+
path: row.path,
|
|
27
|
+
score: 1 / (1 + row.distance),
|
|
28
|
+
snippet: row.snippet,
|
|
29
|
+
chunkIndex: row.chunk_index,
|
|
30
|
+
entityName: row.entity_name,
|
|
31
|
+
chunkType: row.chunk_type,
|
|
32
|
+
}));
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function textSearch(db: Database, query: string, topK: number = 5): SearchResult[] {
|
|
36
|
+
return db
|
|
37
|
+
.query<
|
|
38
|
+
{
|
|
39
|
+
snippet: string;
|
|
40
|
+
chunk_index: number;
|
|
41
|
+
entity_name: string | null;
|
|
42
|
+
chunk_type: string | null;
|
|
43
|
+
rank: number;
|
|
44
|
+
path: string;
|
|
45
|
+
},
|
|
46
|
+
[string, number]
|
|
47
|
+
>(
|
|
48
|
+
`SELECT c.snippet, c.chunk_index, c.entity_name, c.chunk_type, f.path, rank
|
|
49
|
+
FROM fts_chunks fts
|
|
50
|
+
JOIN chunks c ON c.id = fts.rowid
|
|
51
|
+
JOIN files f ON f.id = c.file_id
|
|
52
|
+
WHERE fts_chunks MATCH ?
|
|
53
|
+
ORDER BY rank
|
|
54
|
+
LIMIT ?`
|
|
55
|
+
)
|
|
56
|
+
.all(sanitizeFTS(query), topK)
|
|
57
|
+
.map((row) => ({
|
|
58
|
+
path: row.path,
|
|
59
|
+
score: 1 / (1 + Math.abs(row.rank)),
|
|
60
|
+
snippet: row.snippet,
|
|
61
|
+
chunkIndex: row.chunk_index,
|
|
62
|
+
entityName: row.entity_name,
|
|
63
|
+
chunkType: row.chunk_type,
|
|
64
|
+
}));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function vectorSearchChunks(db: Database, queryEmbedding: Float32Array, topK: number = 8): ChunkSearchResult[] {
|
|
68
|
+
return db
|
|
69
|
+
.query<
|
|
70
|
+
{
|
|
71
|
+
chunk_id: number;
|
|
72
|
+
distance: number;
|
|
73
|
+
snippet: string;
|
|
74
|
+
chunk_index: number;
|
|
75
|
+
entity_name: string | null;
|
|
76
|
+
chunk_type: string | null;
|
|
77
|
+
start_line: number | null;
|
|
78
|
+
end_line: number | null;
|
|
79
|
+
path: string;
|
|
80
|
+
},
|
|
81
|
+
[Uint8Array, number]
|
|
82
|
+
>(
|
|
83
|
+
`SELECT v.chunk_id, v.distance, c.snippet, c.chunk_index, c.entity_name, c.chunk_type,
|
|
84
|
+
c.start_line, c.end_line, f.path
|
|
85
|
+
FROM (SELECT chunk_id, distance FROM vec_chunks WHERE embedding MATCH ? ORDER BY distance LIMIT ?) v
|
|
86
|
+
JOIN chunks c ON c.id = v.chunk_id
|
|
87
|
+
JOIN files f ON f.id = c.file_id`
|
|
88
|
+
)
|
|
89
|
+
.all(new Uint8Array(queryEmbedding.buffer), topK)
|
|
90
|
+
.map((row) => ({
|
|
91
|
+
path: row.path,
|
|
92
|
+
score: 1 / (1 + row.distance),
|
|
93
|
+
content: row.snippet,
|
|
94
|
+
chunkIndex: row.chunk_index,
|
|
95
|
+
entityName: row.entity_name,
|
|
96
|
+
chunkType: row.chunk_type,
|
|
97
|
+
startLine: row.start_line,
|
|
98
|
+
endLine: row.end_line,
|
|
99
|
+
}));
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
export function textSearchChunks(db: Database, query: string, topK: number = 8): ChunkSearchResult[] {
|
|
103
|
+
return db
|
|
104
|
+
.query<
|
|
105
|
+
{
|
|
106
|
+
snippet: string;
|
|
107
|
+
chunk_index: number;
|
|
108
|
+
entity_name: string | null;
|
|
109
|
+
chunk_type: string | null;
|
|
110
|
+
start_line: number | null;
|
|
111
|
+
end_line: number | null;
|
|
112
|
+
rank: number;
|
|
113
|
+
path: string;
|
|
114
|
+
},
|
|
115
|
+
[string, number]
|
|
116
|
+
>(
|
|
117
|
+
`SELECT c.snippet, c.chunk_index, c.entity_name, c.chunk_type, c.start_line, c.end_line,
|
|
118
|
+
f.path, rank
|
|
119
|
+
FROM fts_chunks fts
|
|
120
|
+
JOIN chunks c ON c.id = fts.rowid
|
|
121
|
+
JOIN files f ON f.id = c.file_id
|
|
122
|
+
WHERE fts_chunks MATCH ?
|
|
123
|
+
ORDER BY rank
|
|
124
|
+
LIMIT ?`
|
|
125
|
+
)
|
|
126
|
+
.all(sanitizeFTS(query), topK)
|
|
127
|
+
.map((row) => ({
|
|
128
|
+
path: row.path,
|
|
129
|
+
score: 1 / (1 + Math.abs(row.rank)),
|
|
130
|
+
content: row.snippet,
|
|
131
|
+
chunkIndex: row.chunk_index,
|
|
132
|
+
entityName: row.entity_name,
|
|
133
|
+
chunkType: row.chunk_type,
|
|
134
|
+
startLine: row.start_line,
|
|
135
|
+
endLine: row.end_line,
|
|
136
|
+
}));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
export function searchSymbols(
|
|
140
|
+
db: Database,
|
|
141
|
+
query: string,
|
|
142
|
+
exact: boolean = false,
|
|
143
|
+
type?: string,
|
|
144
|
+
topK: number = 20
|
|
145
|
+
): SymbolResult[] {
|
|
146
|
+
const pattern = exact ? query : `%${query}%`;
|
|
147
|
+
|
|
148
|
+
let sql = `
|
|
149
|
+
SELECT fe.name AS symbol_name, fe.type AS symbol_type, f.path,
|
|
150
|
+
(SELECT snippet FROM chunks
|
|
151
|
+
WHERE file_id = fe.file_id AND LOWER(entity_name) = LOWER(fe.name)
|
|
152
|
+
ORDER BY chunk_index LIMIT 1) AS snippet,
|
|
153
|
+
(SELECT chunk_index FROM chunks
|
|
154
|
+
WHERE file_id = fe.file_id AND LOWER(entity_name) = LOWER(fe.name)
|
|
155
|
+
ORDER BY chunk_index LIMIT 1) AS chunk_index
|
|
156
|
+
FROM file_exports fe
|
|
157
|
+
JOIN files f ON f.id = fe.file_id
|
|
158
|
+
WHERE LOWER(fe.name) LIKE LOWER(?)
|
|
159
|
+
`;
|
|
160
|
+
const params: (string | number)[] = [pattern];
|
|
161
|
+
|
|
162
|
+
if (type) {
|
|
163
|
+
sql += " AND fe.type = ?";
|
|
164
|
+
params.push(type);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
sql += " ORDER BY fe.name LIMIT ?";
|
|
168
|
+
params.push(topK);
|
|
169
|
+
|
|
170
|
+
return db
|
|
171
|
+
.query<{ symbol_name: string; symbol_type: string; path: string; snippet: string | null; chunk_index: number | null }, any[]>(sql)
|
|
172
|
+
.all(...params)
|
|
173
|
+
.map((r) => ({
|
|
174
|
+
path: r.path,
|
|
175
|
+
symbolName: r.symbol_name,
|
|
176
|
+
symbolType: r.symbol_type,
|
|
177
|
+
snippet: r.snippet,
|
|
178
|
+
chunkIndex: r.chunk_index,
|
|
179
|
+
}));
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
export function findUsages(db: Database, symbolName: string, exact: boolean, top: number): UsageResult[] {
|
|
183
|
+
const definingFileIds = new Set(
|
|
184
|
+
db
|
|
185
|
+
.query<{ file_id: number }, [string]>(
|
|
186
|
+
"SELECT file_id FROM file_exports WHERE LOWER(name) = LOWER(?)"
|
|
187
|
+
)
|
|
188
|
+
.all(symbolName)
|
|
189
|
+
.map((r) => r.file_id)
|
|
190
|
+
);
|
|
191
|
+
|
|
192
|
+
let rows: { id: number; snippet: string; file_id: number; chunk_index: number; start_line: number | null; path: string }[] = [];
|
|
193
|
+
try {
|
|
194
|
+
const ftsQuery = `"${symbolName.replace(/"/g, '""')}"`;
|
|
195
|
+
rows = db
|
|
196
|
+
.query<
|
|
197
|
+
{ id: number; snippet: string; file_id: number; chunk_index: number; start_line: number | null; path: string },
|
|
198
|
+
[string, number]
|
|
199
|
+
>(
|
|
200
|
+
`SELECT c.id, c.snippet, c.file_id, c.chunk_index, c.start_line, f.path
|
|
201
|
+
FROM fts_chunks fts
|
|
202
|
+
JOIN chunks c ON c.id = fts.rowid
|
|
203
|
+
JOIN files f ON f.id = c.file_id
|
|
204
|
+
WHERE fts_chunks MATCH ?
|
|
205
|
+
ORDER BY rank
|
|
206
|
+
LIMIT ?`
|
|
207
|
+
)
|
|
208
|
+
.all(ftsQuery, top * 5);
|
|
209
|
+
} catch {
|
|
210
|
+
return [];
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
const pattern = exact
|
|
214
|
+
? new RegExp(`\\b${escapeRegex(symbolName)}\\b`, "i")
|
|
215
|
+
: new RegExp(`\\b${escapeRegex(symbolName)}`, "i");
|
|
216
|
+
|
|
217
|
+
const results: UsageResult[] = [];
|
|
218
|
+
|
|
219
|
+
for (const row of rows) {
|
|
220
|
+
if (definingFileIds.has(row.file_id)) continue;
|
|
221
|
+
|
|
222
|
+
const lines = row.snippet.split("\n");
|
|
223
|
+
let matchOffset = -1;
|
|
224
|
+
let matchSnippet = row.snippet.slice(0, 120).trim();
|
|
225
|
+
|
|
226
|
+
for (let i = 0; i < lines.length; i++) {
|
|
227
|
+
if (pattern.test(lines[i])) {
|
|
228
|
+
matchOffset = i;
|
|
229
|
+
matchSnippet = lines[i].trim();
|
|
230
|
+
break;
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const line =
|
|
235
|
+
row.start_line != null && matchOffset >= 0
|
|
236
|
+
? row.start_line + matchOffset
|
|
237
|
+
: row.start_line;
|
|
238
|
+
|
|
239
|
+
results.push({ path: row.path, line, snippet: matchSnippet });
|
|
240
|
+
if (results.length >= top) break;
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return results;
|
|
244
|
+
}
|