@winci/local-rag 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/.claude-plugin/plugin.json +24 -0
  2. package/.mcp.json +11 -0
  3. package/LICENSE +21 -0
  4. package/README.md +567 -0
  5. package/hooks/hooks.json +25 -0
  6. package/hooks/scripts/reindex-file.sh +19 -0
  7. package/hooks/scripts/session-start.sh +11 -0
  8. package/package.json +52 -0
  9. package/skills/local-rag/SKILL.md +42 -0
  10. package/src/cli/commands/analytics.ts +58 -0
  11. package/src/cli/commands/benchmark.ts +30 -0
  12. package/src/cli/commands/checkpoint.ts +85 -0
  13. package/src/cli/commands/conversation.ts +102 -0
  14. package/src/cli/commands/demo.ts +119 -0
  15. package/src/cli/commands/eval.ts +31 -0
  16. package/src/cli/commands/index-cmd.ts +26 -0
  17. package/src/cli/commands/init.ts +35 -0
  18. package/src/cli/commands/map.ts +21 -0
  19. package/src/cli/commands/remove.ts +15 -0
  20. package/src/cli/commands/search-cmd.ts +59 -0
  21. package/src/cli/commands/serve.ts +5 -0
  22. package/src/cli/commands/status.ts +13 -0
  23. package/src/cli/index.ts +117 -0
  24. package/src/cli/progress.ts +21 -0
  25. package/src/cli/setup.ts +192 -0
  26. package/src/config/index.ts +101 -0
  27. package/src/conversation/indexer.ts +147 -0
  28. package/src/conversation/parser.ts +323 -0
  29. package/src/db/analytics.ts +116 -0
  30. package/src/db/annotations.ts +161 -0
  31. package/src/db/checkpoints.ts +166 -0
  32. package/src/db/conversation.ts +241 -0
  33. package/src/db/files.ts +146 -0
  34. package/src/db/graph.ts +250 -0
  35. package/src/db/index.ts +468 -0
  36. package/src/db/search.ts +244 -0
  37. package/src/db/types.ts +85 -0
  38. package/src/embeddings/embed.ts +73 -0
  39. package/src/graph/resolver.ts +305 -0
  40. package/src/indexing/chunker.ts +523 -0
  41. package/src/indexing/indexer.ts +263 -0
  42. package/src/indexing/parse.ts +99 -0
  43. package/src/indexing/watcher.ts +84 -0
  44. package/src/main.ts +8 -0
  45. package/src/search/benchmark.ts +139 -0
  46. package/src/search/eval.ts +171 -0
  47. package/src/search/hybrid.ts +194 -0
  48. package/src/search/reranker.ts +99 -0
  49. package/src/search/usages.ts +27 -0
  50. package/src/server/index.ts +126 -0
  51. package/src/tools/analytics-tools.ts +58 -0
  52. package/src/tools/annotation-tools.ts +89 -0
  53. package/src/tools/checkpoint-tools.ts +147 -0
  54. package/src/tools/conversation-tools.ts +86 -0
  55. package/src/tools/git-tools.ts +103 -0
  56. package/src/tools/graph-tools.ts +163 -0
  57. package/src/tools/index-tools.ts +91 -0
  58. package/src/tools/index.ts +33 -0
  59. package/src/tools/search.ts +238 -0
  60. package/src/types.ts +9 -0
  61. package/src/utils/log.ts +39 -0
@@ -0,0 +1,166 @@
1
+ import { Database } from "bun:sqlite";
2
+ import { type CheckpointRow } from "./types";
3
+
4
+ export function createCheckpoint(
5
+ db: Database,
6
+ sessionId: string,
7
+ turnIndex: number,
8
+ timestamp: string,
9
+ type: string,
10
+ title: string,
11
+ summary: string,
12
+ filesInvolved: string[],
13
+ tags: string[],
14
+ embedding: Float32Array
15
+ ): number {
16
+ let checkpointId = 0;
17
+
18
+ const tx = db.transaction(() => {
19
+ db.run(
20
+ `INSERT INTO conversation_checkpoints
21
+ (session_id, turn_index, timestamp, type, title, summary, files_involved, tags, embedding)
22
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
23
+ [
24
+ sessionId,
25
+ turnIndex,
26
+ timestamp,
27
+ type,
28
+ title,
29
+ summary,
30
+ JSON.stringify(filesInvolved),
31
+ JSON.stringify(tags),
32
+ null,
33
+ ]
34
+ );
35
+
36
+ checkpointId = Number(
37
+ db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
38
+ );
39
+
40
+ db.run(
41
+ "INSERT INTO vec_checkpoints (checkpoint_id, embedding) VALUES (?, ?)",
42
+ [checkpointId, new Uint8Array(embedding.buffer)]
43
+ );
44
+ });
45
+
46
+ tx();
47
+ return checkpointId;
48
+ }
49
+
50
+ export function listCheckpoints(
51
+ db: Database,
52
+ sessionId?: string,
53
+ type?: string,
54
+ limit: number = 20
55
+ ): CheckpointRow[] {
56
+ let sql = "SELECT * FROM conversation_checkpoints WHERE 1=1";
57
+ const params: (string | number)[] = [];
58
+
59
+ if (sessionId) {
60
+ sql += " AND session_id = ?";
61
+ params.push(sessionId);
62
+ }
63
+ if (type) {
64
+ sql += " AND type = ?";
65
+ params.push(type);
66
+ }
67
+
68
+ sql += " ORDER BY timestamp DESC LIMIT ?";
69
+ params.push(limit);
70
+
71
+ return db
72
+ .query<
73
+ { id: number; session_id: string; turn_index: number; timestamp: string; type: string; title: string; summary: string; files_involved: string; tags: string },
74
+ (string | number)[]
75
+ >(sql)
76
+ .all(...params)
77
+ .map((r) => ({
78
+ id: r.id,
79
+ sessionId: r.session_id,
80
+ turnIndex: r.turn_index,
81
+ timestamp: r.timestamp,
82
+ type: r.type,
83
+ title: r.title,
84
+ summary: r.summary,
85
+ filesInvolved: JSON.parse(r.files_involved || "[]"),
86
+ tags: JSON.parse(r.tags || "[]"),
87
+ }));
88
+ }
89
+
90
+ export function searchCheckpoints(
91
+ db: Database,
92
+ queryEmbedding: Float32Array,
93
+ topK: number = 5,
94
+ type?: string
95
+ ): (CheckpointRow & { score: number })[] {
96
+ const rows = db
97
+ .query<
98
+ {
99
+ checkpoint_id: number;
100
+ distance: number;
101
+ id: number;
102
+ session_id: string;
103
+ turn_index: number;
104
+ timestamp: string;
105
+ type: string;
106
+ title: string;
107
+ summary: string;
108
+ files_involved: string;
109
+ tags: string;
110
+ },
111
+ [Uint8Array, number]
112
+ >(
113
+ `SELECT v.checkpoint_id, v.distance,
114
+ cp.id, cp.session_id, cp.turn_index, cp.timestamp, cp.type,
115
+ cp.title, cp.summary, cp.files_involved, cp.tags
116
+ FROM (SELECT checkpoint_id, distance FROM vec_checkpoints WHERE embedding MATCH ? ORDER BY distance LIMIT ?) v
117
+ JOIN conversation_checkpoints cp ON cp.id = v.checkpoint_id`
118
+ )
119
+ .all(new Uint8Array(queryEmbedding.buffer), topK * 2);
120
+
121
+ const results: (CheckpointRow & { score: number })[] = [];
122
+
123
+ for (const row of rows) {
124
+ if (type && row.type !== type) continue;
125
+
126
+ results.push({
127
+ id: row.id,
128
+ sessionId: row.session_id,
129
+ turnIndex: row.turn_index,
130
+ timestamp: row.timestamp,
131
+ type: row.type,
132
+ title: row.title,
133
+ summary: row.summary,
134
+ filesInvolved: JSON.parse(row.files_involved || "[]"),
135
+ tags: JSON.parse(row.tags || "[]"),
136
+ score: 1 / (1 + row.distance),
137
+ });
138
+
139
+ if (results.length >= topK) break;
140
+ }
141
+
142
+ return results;
143
+ }
144
+
145
+ export function getCheckpoint(db: Database, id: number): CheckpointRow | null {
146
+ const r = db
147
+ .query<
148
+ { id: number; session_id: string; turn_index: number; timestamp: string; type: string; title: string; summary: string; files_involved: string; tags: string },
149
+ [number]
150
+ >(
151
+ "SELECT id, session_id, turn_index, timestamp, type, title, summary, files_involved, tags FROM conversation_checkpoints WHERE id = ?"
152
+ )
153
+ .get(id);
154
+ if (!r) return null;
155
+ return {
156
+ id: r.id,
157
+ sessionId: r.session_id,
158
+ turnIndex: r.turn_index,
159
+ timestamp: r.timestamp,
160
+ type: r.type,
161
+ title: r.title,
162
+ summary: r.summary,
163
+ filesInvolved: JSON.parse(r.files_involved || "[]"),
164
+ tags: JSON.parse(r.tags || "[]"),
165
+ };
166
+ }
@@ -0,0 +1,241 @@
1
+ import { Database } from "bun:sqlite";
2
+ import { type ConversationSearchResult } from "./types";
3
+ import { sanitizeFTS } from "../search/usages";
4
+
5
+ export function upsertSession(
6
+ db: Database,
7
+ sessionId: string,
8
+ jsonlPath: string,
9
+ startedAt: string,
10
+ mtime: number,
11
+ readOffset: number
12
+ ) {
13
+ db.run(
14
+ `INSERT INTO conversation_sessions (session_id, jsonl_path, started_at, indexed_at, file_mtime, read_offset)
15
+ VALUES (?, ?, ?, ?, ?, ?)
16
+ ON CONFLICT(session_id) DO UPDATE SET
17
+ file_mtime = excluded.file_mtime,
18
+ indexed_at = excluded.indexed_at,
19
+ read_offset = excluded.read_offset`,
20
+ [sessionId, jsonlPath, startedAt, new Date().toISOString(), mtime, readOffset]
21
+ );
22
+ }
23
+
24
+ export function getSession(db: Database, sessionId: string): {
25
+ id: number;
26
+ sessionId: string;
27
+ jsonlPath: string;
28
+ mtime: number;
29
+ readOffset: number;
30
+ turnCount: number;
31
+ } | null {
32
+ const row = db
33
+ .query<
34
+ { id: number; session_id: string; jsonl_path: string; file_mtime: number; read_offset: number; turn_count: number },
35
+ [string]
36
+ >("SELECT id, session_id, jsonl_path, file_mtime, read_offset, turn_count FROM conversation_sessions WHERE session_id = ?")
37
+ .get(sessionId);
38
+ if (!row) return null;
39
+ return {
40
+ id: row.id,
41
+ sessionId: row.session_id,
42
+ jsonlPath: row.jsonl_path,
43
+ mtime: row.file_mtime,
44
+ readOffset: row.read_offset,
45
+ turnCount: row.turn_count,
46
+ };
47
+ }
48
+
49
+ export function updateSessionStats(db: Database, sessionId: string, turnCount: number, totalTokens: number, readOffset: number) {
50
+ db.run(
51
+ `UPDATE conversation_sessions SET turn_count = ?, total_tokens = ?, read_offset = ?, indexed_at = ? WHERE session_id = ?`,
52
+ [turnCount, totalTokens, readOffset, new Date().toISOString(), sessionId]
53
+ );
54
+ }
55
+
56
+ export function insertTurn(
57
+ db: Database,
58
+ sessionId: string,
59
+ turnIndex: number,
60
+ timestamp: string,
61
+ userText: string,
62
+ assistantText: string,
63
+ toolsUsed: string[],
64
+ filesReferenced: string[],
65
+ tokenCost: number,
66
+ summary: string,
67
+ chunks: { snippet: string; embedding: Float32Array }[]
68
+ ): number {
69
+ let turnId = 0;
70
+
71
+ const tx = db.transaction(() => {
72
+ db.run(
73
+ `INSERT OR IGNORE INTO conversation_turns
74
+ (session_id, turn_index, timestamp, user_text, assistant_text, tools_used, files_referenced, token_cost, summary)
75
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
76
+ [
77
+ sessionId,
78
+ turnIndex,
79
+ timestamp,
80
+ userText,
81
+ assistantText,
82
+ JSON.stringify(toolsUsed),
83
+ JSON.stringify(filesReferenced),
84
+ tokenCost,
85
+ summary,
86
+ ]
87
+ );
88
+
89
+ // If the INSERT was ignored (duplicate), changes() returns 0
90
+ const inserted = db.query<{ c: number }, []>("SELECT changes() as c").get()!.c;
91
+ if (inserted === 0) return;
92
+
93
+ turnId = Number(
94
+ db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
95
+ );
96
+
97
+ for (let i = 0; i < chunks.length; i++) {
98
+ const { snippet, embedding } = chunks[i];
99
+ db.run(
100
+ "INSERT INTO conversation_chunks (turn_id, chunk_index, snippet) VALUES (?, ?, ?)",
101
+ [turnId, i, snippet]
102
+ );
103
+ const chunkId = Number(
104
+ db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
105
+ );
106
+ db.run(
107
+ "INSERT INTO vec_conversation (chunk_id, embedding) VALUES (?, ?)",
108
+ [chunkId, new Uint8Array(embedding.buffer)]
109
+ );
110
+ }
111
+ });
112
+
113
+ tx();
114
+ return turnId;
115
+ }
116
+
117
+ export function getTurnCount(db: Database, sessionId: string): number {
118
+ const row = db
119
+ .query<{ count: number }, [string]>(
120
+ "SELECT COUNT(*) as count FROM conversation_turns WHERE session_id = ?"
121
+ )
122
+ .get(sessionId)!;
123
+ return row.count;
124
+ }
125
+
126
+ export function searchConversation(
127
+ db: Database,
128
+ queryEmbedding: Float32Array,
129
+ topK: number = 5,
130
+ sessionId?: string
131
+ ): ConversationSearchResult[] {
132
+ // Use subquery for vector search, then JOIN for turn data
133
+ const rows = db
134
+ .query<
135
+ {
136
+ chunk_id: number;
137
+ distance: number;
138
+ snippet: string;
139
+ turn_id: number;
140
+ turn_index: number;
141
+ session_id: string;
142
+ timestamp: string;
143
+ summary: string;
144
+ tools_used: string;
145
+ files_referenced: string;
146
+ },
147
+ [Uint8Array, number]
148
+ >(
149
+ `SELECT v.chunk_id, v.distance, cc.snippet, cc.turn_id,
150
+ ct.turn_index, ct.session_id, ct.timestamp, ct.summary, ct.tools_used, ct.files_referenced
151
+ FROM (SELECT chunk_id, distance FROM vec_conversation WHERE embedding MATCH ? ORDER BY distance LIMIT ?) v
152
+ JOIN conversation_chunks cc ON cc.id = v.chunk_id
153
+ JOIN conversation_turns ct ON ct.id = cc.turn_id`
154
+ )
155
+ .all(new Uint8Array(queryEmbedding.buffer), sessionId ? topK * 10 : topK * 3);
156
+
157
+ const results: ConversationSearchResult[] = [];
158
+ const seenTurns = new Set<number>();
159
+
160
+ for (const row of rows) {
161
+ if (seenTurns.has(row.turn_id)) continue;
162
+ seenTurns.add(row.turn_id);
163
+
164
+ if (sessionId && row.session_id !== sessionId) continue;
165
+
166
+ results.push({
167
+ turnId: row.turn_id,
168
+ turnIndex: row.turn_index,
169
+ sessionId: row.session_id,
170
+ timestamp: row.timestamp,
171
+ summary: row.summary || "",
172
+ snippet: row.snippet,
173
+ toolsUsed: JSON.parse(row.tools_used || "[]"),
174
+ filesReferenced: JSON.parse(row.files_referenced || "[]"),
175
+ score: 1 / (1 + row.distance),
176
+ });
177
+
178
+ if (results.length >= topK) break;
179
+ }
180
+
181
+ return results;
182
+ }
183
+
184
+ export function textSearchConversation(
185
+ db: Database,
186
+ query: string,
187
+ topK: number = 5,
188
+ sessionId?: string
189
+ ): ConversationSearchResult[] {
190
+ const rows = db
191
+ .query<
192
+ {
193
+ snippet: string;
194
+ turn_id: number;
195
+ rank: number;
196
+ turn_index: number;
197
+ session_id: string;
198
+ timestamp: string;
199
+ summary: string;
200
+ tools_used: string;
201
+ files_referenced: string;
202
+ },
203
+ [string, number]
204
+ >(
205
+ `SELECT cc.snippet, cc.turn_id, rank,
206
+ ct.turn_index, ct.session_id, ct.timestamp, ct.summary, ct.tools_used, ct.files_referenced
207
+ FROM fts_conversation fts
208
+ JOIN conversation_chunks cc ON cc.id = fts.rowid
209
+ JOIN conversation_turns ct ON ct.id = cc.turn_id
210
+ WHERE fts_conversation MATCH ?
211
+ ORDER BY rank
212
+ LIMIT ?`
213
+ )
214
+ .all(sanitizeFTS(query), sessionId ? topK * 10 : topK * 3);
215
+
216
+ const results: ConversationSearchResult[] = [];
217
+ const seenTurns = new Set<number>();
218
+
219
+ for (const row of rows) {
220
+ if (seenTurns.has(row.turn_id)) continue;
221
+ seenTurns.add(row.turn_id);
222
+
223
+ if (sessionId && row.session_id !== sessionId) continue;
224
+
225
+ results.push({
226
+ turnId: row.turn_id,
227
+ turnIndex: row.turn_index,
228
+ sessionId: row.session_id,
229
+ timestamp: row.timestamp,
230
+ summary: row.summary || "",
231
+ snippet: row.snippet,
232
+ toolsUsed: JSON.parse(row.tools_used || "[]"),
233
+ filesReferenced: JSON.parse(row.files_referenced || "[]"),
234
+ score: 1 / (1 + Math.abs(row.rank)),
235
+ });
236
+
237
+ if (results.length >= topK) break;
238
+ }
239
+
240
+ return results;
241
+ }
@@ -0,0 +1,146 @@
1
+ import { Database } from "bun:sqlite";
2
+ import { type EmbeddedChunk } from "../types";
3
+ import { type StoredFile } from "./types";
4
+
5
+ export function getFileByPath(db: Database, path: string): StoredFile | null {
6
+ return db
7
+ .query<StoredFile, [string]>(
8
+ "SELECT id, path, hash, indexed_at as indexedAt FROM files WHERE path = ?"
9
+ )
10
+ .get(path);
11
+ }
12
+
13
+ export function upsertFileStart(db: Database, path: string, hash: string): number {
14
+ const existing = getFileByPath(db, path);
15
+ if (existing) {
16
+ // UPDATE instead of DELETE+INSERT to preserve files.id — this keeps
17
+ // file_imports.resolved_file_id FKs pointing at this file intact.
18
+ const tx = db.transaction(() => {
19
+ const oldChunks = db
20
+ .query<{ id: number }, [number]>("SELECT id FROM chunks WHERE file_id = ?")
21
+ .all(existing.id);
22
+ for (const c of oldChunks) {
23
+ db.run("DELETE FROM vec_chunks WHERE chunk_id = ?", [c.id]);
24
+ }
25
+ db.run("DELETE FROM chunks WHERE file_id = ?", [existing.id]);
26
+ db.run(
27
+ "UPDATE files SET hash = ?, indexed_at = ? WHERE id = ?",
28
+ [hash, new Date().toISOString(), existing.id]
29
+ );
30
+ });
31
+ tx();
32
+ return existing.id;
33
+ }
34
+
35
+ db.run(
36
+ "INSERT INTO files (path, hash, indexed_at) VALUES (?, ?, ?)",
37
+ [path, hash, new Date().toISOString()]
38
+ );
39
+ return Number(
40
+ db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
41
+ );
42
+ }
43
+
44
+ export function insertChunkBatch(
45
+ db: Database,
46
+ fileId: number,
47
+ chunks: EmbeddedChunk[],
48
+ startIndex: number
49
+ ) {
50
+ const tx = db.transaction(() => {
51
+ for (let i = 0; i < chunks.length; i++) {
52
+ const { snippet, embedding, entityName, chunkType, startLine, endLine } = chunks[i];
53
+ db.run(
54
+ "INSERT INTO chunks (file_id, chunk_index, snippet, entity_name, chunk_type, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
55
+ [fileId, startIndex + i, snippet, entityName ?? null, chunkType ?? null, startLine ?? null, endLine ?? null]
56
+ );
57
+ const chunkId = Number(
58
+ db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
59
+ );
60
+ db.run(
61
+ "INSERT INTO vec_chunks (chunk_id, embedding) VALUES (?, ?)",
62
+ [chunkId, new Uint8Array(embedding.buffer)]
63
+ );
64
+ }
65
+ });
66
+ tx();
67
+ }
68
+
69
+ export function upsertFile(
70
+ db: Database,
71
+ path: string,
72
+ hash: string,
73
+ chunks: EmbeddedChunk[]
74
+ ) {
75
+ const fileId = upsertFileStart(db, path, hash);
76
+ insertChunkBatch(db, fileId, chunks, 0);
77
+ }
78
+
79
+ export function removeFile(db: Database, path: string): boolean {
80
+ const existing = getFileByPath(db, path);
81
+ if (!existing) return false;
82
+
83
+ const tx = db.transaction(() => {
84
+ const oldChunks = db
85
+ .query<{ id: number }, [number]>("SELECT id FROM chunks WHERE file_id = ?")
86
+ .all(existing.id);
87
+ for (const c of oldChunks) {
88
+ db.run("DELETE FROM vec_chunks WHERE chunk_id = ?", [c.id]);
89
+ }
90
+ db.run("DELETE FROM chunks WHERE file_id = ?", [existing.id]);
91
+ db.run("DELETE FROM files WHERE id = ?", [existing.id]);
92
+ });
93
+
94
+ tx();
95
+ return true;
96
+ }
97
+
98
+ export function pruneDeleted(db: Database, existingPaths: Set<string>): number {
99
+ const allFiles = db
100
+ .query<{ id: number; path: string }, []>("SELECT id, path FROM files")
101
+ .all();
102
+
103
+ const toRemove = allFiles.filter(f => !existingPaths.has(f.path));
104
+ if (toRemove.length === 0) return 0;
105
+
106
+ const tx = db.transaction(() => {
107
+ for (const file of toRemove) {
108
+ const oldChunks = db
109
+ .query<{ id: number }, [number]>("SELECT id FROM chunks WHERE file_id = ?")
110
+ .all(file.id);
111
+ for (const c of oldChunks) {
112
+ db.run("DELETE FROM vec_chunks WHERE chunk_id = ?", [c.id]);
113
+ }
114
+ db.run("DELETE FROM chunks WHERE file_id = ?", [file.id]);
115
+ db.run("DELETE FROM files WHERE id = ?", [file.id]);
116
+ }
117
+ });
118
+ tx();
119
+ return toRemove.length;
120
+ }
121
+
122
+ export function getAllFilePaths(db: Database): { id: number; path: string }[] {
123
+ return db
124
+ .query<{ id: number; path: string }, []>("SELECT id, path FROM files")
125
+ .all();
126
+ }
127
+
128
+ export function getStatus(db: Database): { totalFiles: number; totalChunks: number; lastIndexed: string | null } {
129
+ const files = db
130
+ .query<{ count: number }, []>("SELECT COUNT(*) as count FROM files")
131
+ .get()!;
132
+ const chunks = db
133
+ .query<{ count: number }, []>("SELECT COUNT(*) as count FROM chunks")
134
+ .get()!;
135
+ const last = db
136
+ .query<{ indexed_at: string }, []>(
137
+ "SELECT indexed_at FROM files ORDER BY indexed_at DESC LIMIT 1"
138
+ )
139
+ .get();
140
+
141
+ return {
142
+ totalFiles: files.count,
143
+ totalChunks: chunks.count,
144
+ lastIndexed: last?.indexed_at ?? null,
145
+ };
146
+ }