@winci/local-rag 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +24 -0
- package/.mcp.json +11 -0
- package/LICENSE +21 -0
- package/README.md +567 -0
- package/hooks/hooks.json +25 -0
- package/hooks/scripts/reindex-file.sh +19 -0
- package/hooks/scripts/session-start.sh +11 -0
- package/package.json +52 -0
- package/skills/local-rag/SKILL.md +42 -0
- package/src/cli/commands/analytics.ts +58 -0
- package/src/cli/commands/benchmark.ts +30 -0
- package/src/cli/commands/checkpoint.ts +85 -0
- package/src/cli/commands/conversation.ts +102 -0
- package/src/cli/commands/demo.ts +119 -0
- package/src/cli/commands/eval.ts +31 -0
- package/src/cli/commands/index-cmd.ts +26 -0
- package/src/cli/commands/init.ts +35 -0
- package/src/cli/commands/map.ts +21 -0
- package/src/cli/commands/remove.ts +15 -0
- package/src/cli/commands/search-cmd.ts +59 -0
- package/src/cli/commands/serve.ts +5 -0
- package/src/cli/commands/status.ts +13 -0
- package/src/cli/index.ts +117 -0
- package/src/cli/progress.ts +21 -0
- package/src/cli/setup.ts +192 -0
- package/src/config/index.ts +101 -0
- package/src/conversation/indexer.ts +147 -0
- package/src/conversation/parser.ts +323 -0
- package/src/db/analytics.ts +116 -0
- package/src/db/annotations.ts +161 -0
- package/src/db/checkpoints.ts +166 -0
- package/src/db/conversation.ts +241 -0
- package/src/db/files.ts +146 -0
- package/src/db/graph.ts +250 -0
- package/src/db/index.ts +468 -0
- package/src/db/search.ts +244 -0
- package/src/db/types.ts +85 -0
- package/src/embeddings/embed.ts +73 -0
- package/src/graph/resolver.ts +305 -0
- package/src/indexing/chunker.ts +523 -0
- package/src/indexing/indexer.ts +263 -0
- package/src/indexing/parse.ts +99 -0
- package/src/indexing/watcher.ts +84 -0
- package/src/main.ts +8 -0
- package/src/search/benchmark.ts +139 -0
- package/src/search/eval.ts +171 -0
- package/src/search/hybrid.ts +194 -0
- package/src/search/reranker.ts +99 -0
- package/src/search/usages.ts +27 -0
- package/src/server/index.ts +126 -0
- package/src/tools/analytics-tools.ts +58 -0
- package/src/tools/annotation-tools.ts +89 -0
- package/src/tools/checkpoint-tools.ts +147 -0
- package/src/tools/conversation-tools.ts +86 -0
- package/src/tools/git-tools.ts +103 -0
- package/src/tools/graph-tools.ts +163 -0
- package/src/tools/index-tools.ts +91 -0
- package/src/tools/index.ts +33 -0
- package/src/tools/search.ts +238 -0
- package/src/types.ts +9 -0
- package/src/utils/log.ts +39 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import { Database } from "bun:sqlite";
|
|
2
|
+
import { type CheckpointRow } from "./types";
|
|
3
|
+
|
|
4
|
+
export function createCheckpoint(
|
|
5
|
+
db: Database,
|
|
6
|
+
sessionId: string,
|
|
7
|
+
turnIndex: number,
|
|
8
|
+
timestamp: string,
|
|
9
|
+
type: string,
|
|
10
|
+
title: string,
|
|
11
|
+
summary: string,
|
|
12
|
+
filesInvolved: string[],
|
|
13
|
+
tags: string[],
|
|
14
|
+
embedding: Float32Array
|
|
15
|
+
): number {
|
|
16
|
+
let checkpointId = 0;
|
|
17
|
+
|
|
18
|
+
const tx = db.transaction(() => {
|
|
19
|
+
db.run(
|
|
20
|
+
`INSERT INTO conversation_checkpoints
|
|
21
|
+
(session_id, turn_index, timestamp, type, title, summary, files_involved, tags, embedding)
|
|
22
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
23
|
+
[
|
|
24
|
+
sessionId,
|
|
25
|
+
turnIndex,
|
|
26
|
+
timestamp,
|
|
27
|
+
type,
|
|
28
|
+
title,
|
|
29
|
+
summary,
|
|
30
|
+
JSON.stringify(filesInvolved),
|
|
31
|
+
JSON.stringify(tags),
|
|
32
|
+
null,
|
|
33
|
+
]
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
checkpointId = Number(
|
|
37
|
+
db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
db.run(
|
|
41
|
+
"INSERT INTO vec_checkpoints (checkpoint_id, embedding) VALUES (?, ?)",
|
|
42
|
+
[checkpointId, new Uint8Array(embedding.buffer)]
|
|
43
|
+
);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
tx();
|
|
47
|
+
return checkpointId;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
export function listCheckpoints(
|
|
51
|
+
db: Database,
|
|
52
|
+
sessionId?: string,
|
|
53
|
+
type?: string,
|
|
54
|
+
limit: number = 20
|
|
55
|
+
): CheckpointRow[] {
|
|
56
|
+
let sql = "SELECT * FROM conversation_checkpoints WHERE 1=1";
|
|
57
|
+
const params: (string | number)[] = [];
|
|
58
|
+
|
|
59
|
+
if (sessionId) {
|
|
60
|
+
sql += " AND session_id = ?";
|
|
61
|
+
params.push(sessionId);
|
|
62
|
+
}
|
|
63
|
+
if (type) {
|
|
64
|
+
sql += " AND type = ?";
|
|
65
|
+
params.push(type);
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
sql += " ORDER BY timestamp DESC LIMIT ?";
|
|
69
|
+
params.push(limit);
|
|
70
|
+
|
|
71
|
+
return db
|
|
72
|
+
.query<
|
|
73
|
+
{ id: number; session_id: string; turn_index: number; timestamp: string; type: string; title: string; summary: string; files_involved: string; tags: string },
|
|
74
|
+
(string | number)[]
|
|
75
|
+
>(sql)
|
|
76
|
+
.all(...params)
|
|
77
|
+
.map((r) => ({
|
|
78
|
+
id: r.id,
|
|
79
|
+
sessionId: r.session_id,
|
|
80
|
+
turnIndex: r.turn_index,
|
|
81
|
+
timestamp: r.timestamp,
|
|
82
|
+
type: r.type,
|
|
83
|
+
title: r.title,
|
|
84
|
+
summary: r.summary,
|
|
85
|
+
filesInvolved: JSON.parse(r.files_involved || "[]"),
|
|
86
|
+
tags: JSON.parse(r.tags || "[]"),
|
|
87
|
+
}));
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
export function searchCheckpoints(
|
|
91
|
+
db: Database,
|
|
92
|
+
queryEmbedding: Float32Array,
|
|
93
|
+
topK: number = 5,
|
|
94
|
+
type?: string
|
|
95
|
+
): (CheckpointRow & { score: number })[] {
|
|
96
|
+
const rows = db
|
|
97
|
+
.query<
|
|
98
|
+
{
|
|
99
|
+
checkpoint_id: number;
|
|
100
|
+
distance: number;
|
|
101
|
+
id: number;
|
|
102
|
+
session_id: string;
|
|
103
|
+
turn_index: number;
|
|
104
|
+
timestamp: string;
|
|
105
|
+
type: string;
|
|
106
|
+
title: string;
|
|
107
|
+
summary: string;
|
|
108
|
+
files_involved: string;
|
|
109
|
+
tags: string;
|
|
110
|
+
},
|
|
111
|
+
[Uint8Array, number]
|
|
112
|
+
>(
|
|
113
|
+
`SELECT v.checkpoint_id, v.distance,
|
|
114
|
+
cp.id, cp.session_id, cp.turn_index, cp.timestamp, cp.type,
|
|
115
|
+
cp.title, cp.summary, cp.files_involved, cp.tags
|
|
116
|
+
FROM (SELECT checkpoint_id, distance FROM vec_checkpoints WHERE embedding MATCH ? ORDER BY distance LIMIT ?) v
|
|
117
|
+
JOIN conversation_checkpoints cp ON cp.id = v.checkpoint_id`
|
|
118
|
+
)
|
|
119
|
+
.all(new Uint8Array(queryEmbedding.buffer), topK * 2);
|
|
120
|
+
|
|
121
|
+
const results: (CheckpointRow & { score: number })[] = [];
|
|
122
|
+
|
|
123
|
+
for (const row of rows) {
|
|
124
|
+
if (type && row.type !== type) continue;
|
|
125
|
+
|
|
126
|
+
results.push({
|
|
127
|
+
id: row.id,
|
|
128
|
+
sessionId: row.session_id,
|
|
129
|
+
turnIndex: row.turn_index,
|
|
130
|
+
timestamp: row.timestamp,
|
|
131
|
+
type: row.type,
|
|
132
|
+
title: row.title,
|
|
133
|
+
summary: row.summary,
|
|
134
|
+
filesInvolved: JSON.parse(row.files_involved || "[]"),
|
|
135
|
+
tags: JSON.parse(row.tags || "[]"),
|
|
136
|
+
score: 1 / (1 + row.distance),
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
if (results.length >= topK) break;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return results;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export function getCheckpoint(db: Database, id: number): CheckpointRow | null {
|
|
146
|
+
const r = db
|
|
147
|
+
.query<
|
|
148
|
+
{ id: number; session_id: string; turn_index: number; timestamp: string; type: string; title: string; summary: string; files_involved: string; tags: string },
|
|
149
|
+
[number]
|
|
150
|
+
>(
|
|
151
|
+
"SELECT id, session_id, turn_index, timestamp, type, title, summary, files_involved, tags FROM conversation_checkpoints WHERE id = ?"
|
|
152
|
+
)
|
|
153
|
+
.get(id);
|
|
154
|
+
if (!r) return null;
|
|
155
|
+
return {
|
|
156
|
+
id: r.id,
|
|
157
|
+
sessionId: r.session_id,
|
|
158
|
+
turnIndex: r.turn_index,
|
|
159
|
+
timestamp: r.timestamp,
|
|
160
|
+
type: r.type,
|
|
161
|
+
title: r.title,
|
|
162
|
+
summary: r.summary,
|
|
163
|
+
filesInvolved: JSON.parse(r.files_involved || "[]"),
|
|
164
|
+
tags: JSON.parse(r.tags || "[]"),
|
|
165
|
+
};
|
|
166
|
+
}
|
|
@@ -0,0 +1,241 @@
|
|
|
1
|
+
import { Database } from "bun:sqlite";
|
|
2
|
+
import { type ConversationSearchResult } from "./types";
|
|
3
|
+
import { sanitizeFTS } from "../search/usages";
|
|
4
|
+
|
|
5
|
+
export function upsertSession(
|
|
6
|
+
db: Database,
|
|
7
|
+
sessionId: string,
|
|
8
|
+
jsonlPath: string,
|
|
9
|
+
startedAt: string,
|
|
10
|
+
mtime: number,
|
|
11
|
+
readOffset: number
|
|
12
|
+
) {
|
|
13
|
+
db.run(
|
|
14
|
+
`INSERT INTO conversation_sessions (session_id, jsonl_path, started_at, indexed_at, file_mtime, read_offset)
|
|
15
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
16
|
+
ON CONFLICT(session_id) DO UPDATE SET
|
|
17
|
+
file_mtime = excluded.file_mtime,
|
|
18
|
+
indexed_at = excluded.indexed_at,
|
|
19
|
+
read_offset = excluded.read_offset`,
|
|
20
|
+
[sessionId, jsonlPath, startedAt, new Date().toISOString(), mtime, readOffset]
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export function getSession(db: Database, sessionId: string): {
|
|
25
|
+
id: number;
|
|
26
|
+
sessionId: string;
|
|
27
|
+
jsonlPath: string;
|
|
28
|
+
mtime: number;
|
|
29
|
+
readOffset: number;
|
|
30
|
+
turnCount: number;
|
|
31
|
+
} | null {
|
|
32
|
+
const row = db
|
|
33
|
+
.query<
|
|
34
|
+
{ id: number; session_id: string; jsonl_path: string; file_mtime: number; read_offset: number; turn_count: number },
|
|
35
|
+
[string]
|
|
36
|
+
>("SELECT id, session_id, jsonl_path, file_mtime, read_offset, turn_count FROM conversation_sessions WHERE session_id = ?")
|
|
37
|
+
.get(sessionId);
|
|
38
|
+
if (!row) return null;
|
|
39
|
+
return {
|
|
40
|
+
id: row.id,
|
|
41
|
+
sessionId: row.session_id,
|
|
42
|
+
jsonlPath: row.jsonl_path,
|
|
43
|
+
mtime: row.file_mtime,
|
|
44
|
+
readOffset: row.read_offset,
|
|
45
|
+
turnCount: row.turn_count,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function updateSessionStats(db: Database, sessionId: string, turnCount: number, totalTokens: number, readOffset: number) {
|
|
50
|
+
db.run(
|
|
51
|
+
`UPDATE conversation_sessions SET turn_count = ?, total_tokens = ?, read_offset = ?, indexed_at = ? WHERE session_id = ?`,
|
|
52
|
+
[turnCount, totalTokens, readOffset, new Date().toISOString(), sessionId]
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
export function insertTurn(
|
|
57
|
+
db: Database,
|
|
58
|
+
sessionId: string,
|
|
59
|
+
turnIndex: number,
|
|
60
|
+
timestamp: string,
|
|
61
|
+
userText: string,
|
|
62
|
+
assistantText: string,
|
|
63
|
+
toolsUsed: string[],
|
|
64
|
+
filesReferenced: string[],
|
|
65
|
+
tokenCost: number,
|
|
66
|
+
summary: string,
|
|
67
|
+
chunks: { snippet: string; embedding: Float32Array }[]
|
|
68
|
+
): number {
|
|
69
|
+
let turnId = 0;
|
|
70
|
+
|
|
71
|
+
const tx = db.transaction(() => {
|
|
72
|
+
db.run(
|
|
73
|
+
`INSERT OR IGNORE INTO conversation_turns
|
|
74
|
+
(session_id, turn_index, timestamp, user_text, assistant_text, tools_used, files_referenced, token_cost, summary)
|
|
75
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
|
|
76
|
+
[
|
|
77
|
+
sessionId,
|
|
78
|
+
turnIndex,
|
|
79
|
+
timestamp,
|
|
80
|
+
userText,
|
|
81
|
+
assistantText,
|
|
82
|
+
JSON.stringify(toolsUsed),
|
|
83
|
+
JSON.stringify(filesReferenced),
|
|
84
|
+
tokenCost,
|
|
85
|
+
summary,
|
|
86
|
+
]
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
// If the INSERT was ignored (duplicate), changes() returns 0
|
|
90
|
+
const inserted = db.query<{ c: number }, []>("SELECT changes() as c").get()!.c;
|
|
91
|
+
if (inserted === 0) return;
|
|
92
|
+
|
|
93
|
+
turnId = Number(
|
|
94
|
+
db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
98
|
+
const { snippet, embedding } = chunks[i];
|
|
99
|
+
db.run(
|
|
100
|
+
"INSERT INTO conversation_chunks (turn_id, chunk_index, snippet) VALUES (?, ?, ?)",
|
|
101
|
+
[turnId, i, snippet]
|
|
102
|
+
);
|
|
103
|
+
const chunkId = Number(
|
|
104
|
+
db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
|
|
105
|
+
);
|
|
106
|
+
db.run(
|
|
107
|
+
"INSERT INTO vec_conversation (chunk_id, embedding) VALUES (?, ?)",
|
|
108
|
+
[chunkId, new Uint8Array(embedding.buffer)]
|
|
109
|
+
);
|
|
110
|
+
}
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
tx();
|
|
114
|
+
return turnId;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
export function getTurnCount(db: Database, sessionId: string): number {
|
|
118
|
+
const row = db
|
|
119
|
+
.query<{ count: number }, [string]>(
|
|
120
|
+
"SELECT COUNT(*) as count FROM conversation_turns WHERE session_id = ?"
|
|
121
|
+
)
|
|
122
|
+
.get(sessionId)!;
|
|
123
|
+
return row.count;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function searchConversation(
|
|
127
|
+
db: Database,
|
|
128
|
+
queryEmbedding: Float32Array,
|
|
129
|
+
topK: number = 5,
|
|
130
|
+
sessionId?: string
|
|
131
|
+
): ConversationSearchResult[] {
|
|
132
|
+
// Use subquery for vector search, then JOIN for turn data
|
|
133
|
+
const rows = db
|
|
134
|
+
.query<
|
|
135
|
+
{
|
|
136
|
+
chunk_id: number;
|
|
137
|
+
distance: number;
|
|
138
|
+
snippet: string;
|
|
139
|
+
turn_id: number;
|
|
140
|
+
turn_index: number;
|
|
141
|
+
session_id: string;
|
|
142
|
+
timestamp: string;
|
|
143
|
+
summary: string;
|
|
144
|
+
tools_used: string;
|
|
145
|
+
files_referenced: string;
|
|
146
|
+
},
|
|
147
|
+
[Uint8Array, number]
|
|
148
|
+
>(
|
|
149
|
+
`SELECT v.chunk_id, v.distance, cc.snippet, cc.turn_id,
|
|
150
|
+
ct.turn_index, ct.session_id, ct.timestamp, ct.summary, ct.tools_used, ct.files_referenced
|
|
151
|
+
FROM (SELECT chunk_id, distance FROM vec_conversation WHERE embedding MATCH ? ORDER BY distance LIMIT ?) v
|
|
152
|
+
JOIN conversation_chunks cc ON cc.id = v.chunk_id
|
|
153
|
+
JOIN conversation_turns ct ON ct.id = cc.turn_id`
|
|
154
|
+
)
|
|
155
|
+
.all(new Uint8Array(queryEmbedding.buffer), sessionId ? topK * 10 : topK * 3);
|
|
156
|
+
|
|
157
|
+
const results: ConversationSearchResult[] = [];
|
|
158
|
+
const seenTurns = new Set<number>();
|
|
159
|
+
|
|
160
|
+
for (const row of rows) {
|
|
161
|
+
if (seenTurns.has(row.turn_id)) continue;
|
|
162
|
+
seenTurns.add(row.turn_id);
|
|
163
|
+
|
|
164
|
+
if (sessionId && row.session_id !== sessionId) continue;
|
|
165
|
+
|
|
166
|
+
results.push({
|
|
167
|
+
turnId: row.turn_id,
|
|
168
|
+
turnIndex: row.turn_index,
|
|
169
|
+
sessionId: row.session_id,
|
|
170
|
+
timestamp: row.timestamp,
|
|
171
|
+
summary: row.summary || "",
|
|
172
|
+
snippet: row.snippet,
|
|
173
|
+
toolsUsed: JSON.parse(row.tools_used || "[]"),
|
|
174
|
+
filesReferenced: JSON.parse(row.files_referenced || "[]"),
|
|
175
|
+
score: 1 / (1 + row.distance),
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
if (results.length >= topK) break;
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
return results;
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export function textSearchConversation(
|
|
185
|
+
db: Database,
|
|
186
|
+
query: string,
|
|
187
|
+
topK: number = 5,
|
|
188
|
+
sessionId?: string
|
|
189
|
+
): ConversationSearchResult[] {
|
|
190
|
+
const rows = db
|
|
191
|
+
.query<
|
|
192
|
+
{
|
|
193
|
+
snippet: string;
|
|
194
|
+
turn_id: number;
|
|
195
|
+
rank: number;
|
|
196
|
+
turn_index: number;
|
|
197
|
+
session_id: string;
|
|
198
|
+
timestamp: string;
|
|
199
|
+
summary: string;
|
|
200
|
+
tools_used: string;
|
|
201
|
+
files_referenced: string;
|
|
202
|
+
},
|
|
203
|
+
[string, number]
|
|
204
|
+
>(
|
|
205
|
+
`SELECT cc.snippet, cc.turn_id, rank,
|
|
206
|
+
ct.turn_index, ct.session_id, ct.timestamp, ct.summary, ct.tools_used, ct.files_referenced
|
|
207
|
+
FROM fts_conversation fts
|
|
208
|
+
JOIN conversation_chunks cc ON cc.id = fts.rowid
|
|
209
|
+
JOIN conversation_turns ct ON ct.id = cc.turn_id
|
|
210
|
+
WHERE fts_conversation MATCH ?
|
|
211
|
+
ORDER BY rank
|
|
212
|
+
LIMIT ?`
|
|
213
|
+
)
|
|
214
|
+
.all(sanitizeFTS(query), sessionId ? topK * 10 : topK * 3);
|
|
215
|
+
|
|
216
|
+
const results: ConversationSearchResult[] = [];
|
|
217
|
+
const seenTurns = new Set<number>();
|
|
218
|
+
|
|
219
|
+
for (const row of rows) {
|
|
220
|
+
if (seenTurns.has(row.turn_id)) continue;
|
|
221
|
+
seenTurns.add(row.turn_id);
|
|
222
|
+
|
|
223
|
+
if (sessionId && row.session_id !== sessionId) continue;
|
|
224
|
+
|
|
225
|
+
results.push({
|
|
226
|
+
turnId: row.turn_id,
|
|
227
|
+
turnIndex: row.turn_index,
|
|
228
|
+
sessionId: row.session_id,
|
|
229
|
+
timestamp: row.timestamp,
|
|
230
|
+
summary: row.summary || "",
|
|
231
|
+
snippet: row.snippet,
|
|
232
|
+
toolsUsed: JSON.parse(row.tools_used || "[]"),
|
|
233
|
+
filesReferenced: JSON.parse(row.files_referenced || "[]"),
|
|
234
|
+
score: 1 / (1 + Math.abs(row.rank)),
|
|
235
|
+
});
|
|
236
|
+
|
|
237
|
+
if (results.length >= topK) break;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
return results;
|
|
241
|
+
}
|
package/src/db/files.ts
ADDED
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import { Database } from "bun:sqlite";
|
|
2
|
+
import { type EmbeddedChunk } from "../types";
|
|
3
|
+
import { type StoredFile } from "./types";
|
|
4
|
+
|
|
5
|
+
export function getFileByPath(db: Database, path: string): StoredFile | null {
|
|
6
|
+
return db
|
|
7
|
+
.query<StoredFile, [string]>(
|
|
8
|
+
"SELECT id, path, hash, indexed_at as indexedAt FROM files WHERE path = ?"
|
|
9
|
+
)
|
|
10
|
+
.get(path);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function upsertFileStart(db: Database, path: string, hash: string): number {
|
|
14
|
+
const existing = getFileByPath(db, path);
|
|
15
|
+
if (existing) {
|
|
16
|
+
// UPDATE instead of DELETE+INSERT to preserve files.id — this keeps
|
|
17
|
+
// file_imports.resolved_file_id FKs pointing at this file intact.
|
|
18
|
+
const tx = db.transaction(() => {
|
|
19
|
+
const oldChunks = db
|
|
20
|
+
.query<{ id: number }, [number]>("SELECT id FROM chunks WHERE file_id = ?")
|
|
21
|
+
.all(existing.id);
|
|
22
|
+
for (const c of oldChunks) {
|
|
23
|
+
db.run("DELETE FROM vec_chunks WHERE chunk_id = ?", [c.id]);
|
|
24
|
+
}
|
|
25
|
+
db.run("DELETE FROM chunks WHERE file_id = ?", [existing.id]);
|
|
26
|
+
db.run(
|
|
27
|
+
"UPDATE files SET hash = ?, indexed_at = ? WHERE id = ?",
|
|
28
|
+
[hash, new Date().toISOString(), existing.id]
|
|
29
|
+
);
|
|
30
|
+
});
|
|
31
|
+
tx();
|
|
32
|
+
return existing.id;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
db.run(
|
|
36
|
+
"INSERT INTO files (path, hash, indexed_at) VALUES (?, ?, ?)",
|
|
37
|
+
[path, hash, new Date().toISOString()]
|
|
38
|
+
);
|
|
39
|
+
return Number(
|
|
40
|
+
db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
|
|
41
|
+
);
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function insertChunkBatch(
|
|
45
|
+
db: Database,
|
|
46
|
+
fileId: number,
|
|
47
|
+
chunks: EmbeddedChunk[],
|
|
48
|
+
startIndex: number
|
|
49
|
+
) {
|
|
50
|
+
const tx = db.transaction(() => {
|
|
51
|
+
for (let i = 0; i < chunks.length; i++) {
|
|
52
|
+
const { snippet, embedding, entityName, chunkType, startLine, endLine } = chunks[i];
|
|
53
|
+
db.run(
|
|
54
|
+
"INSERT INTO chunks (file_id, chunk_index, snippet, entity_name, chunk_type, start_line, end_line) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
55
|
+
[fileId, startIndex + i, snippet, entityName ?? null, chunkType ?? null, startLine ?? null, endLine ?? null]
|
|
56
|
+
);
|
|
57
|
+
const chunkId = Number(
|
|
58
|
+
db.query<{ id: number }, []>("SELECT last_insert_rowid() as id").get()!.id
|
|
59
|
+
);
|
|
60
|
+
db.run(
|
|
61
|
+
"INSERT INTO vec_chunks (chunk_id, embedding) VALUES (?, ?)",
|
|
62
|
+
[chunkId, new Uint8Array(embedding.buffer)]
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
});
|
|
66
|
+
tx();
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
export function upsertFile(
|
|
70
|
+
db: Database,
|
|
71
|
+
path: string,
|
|
72
|
+
hash: string,
|
|
73
|
+
chunks: EmbeddedChunk[]
|
|
74
|
+
) {
|
|
75
|
+
const fileId = upsertFileStart(db, path, hash);
|
|
76
|
+
insertChunkBatch(db, fileId, chunks, 0);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export function removeFile(db: Database, path: string): boolean {
|
|
80
|
+
const existing = getFileByPath(db, path);
|
|
81
|
+
if (!existing) return false;
|
|
82
|
+
|
|
83
|
+
const tx = db.transaction(() => {
|
|
84
|
+
const oldChunks = db
|
|
85
|
+
.query<{ id: number }, [number]>("SELECT id FROM chunks WHERE file_id = ?")
|
|
86
|
+
.all(existing.id);
|
|
87
|
+
for (const c of oldChunks) {
|
|
88
|
+
db.run("DELETE FROM vec_chunks WHERE chunk_id = ?", [c.id]);
|
|
89
|
+
}
|
|
90
|
+
db.run("DELETE FROM chunks WHERE file_id = ?", [existing.id]);
|
|
91
|
+
db.run("DELETE FROM files WHERE id = ?", [existing.id]);
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
tx();
|
|
95
|
+
return true;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export function pruneDeleted(db: Database, existingPaths: Set<string>): number {
|
|
99
|
+
const allFiles = db
|
|
100
|
+
.query<{ id: number; path: string }, []>("SELECT id, path FROM files")
|
|
101
|
+
.all();
|
|
102
|
+
|
|
103
|
+
const toRemove = allFiles.filter(f => !existingPaths.has(f.path));
|
|
104
|
+
if (toRemove.length === 0) return 0;
|
|
105
|
+
|
|
106
|
+
const tx = db.transaction(() => {
|
|
107
|
+
for (const file of toRemove) {
|
|
108
|
+
const oldChunks = db
|
|
109
|
+
.query<{ id: number }, [number]>("SELECT id FROM chunks WHERE file_id = ?")
|
|
110
|
+
.all(file.id);
|
|
111
|
+
for (const c of oldChunks) {
|
|
112
|
+
db.run("DELETE FROM vec_chunks WHERE chunk_id = ?", [c.id]);
|
|
113
|
+
}
|
|
114
|
+
db.run("DELETE FROM chunks WHERE file_id = ?", [file.id]);
|
|
115
|
+
db.run("DELETE FROM files WHERE id = ?", [file.id]);
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
tx();
|
|
119
|
+
return toRemove.length;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
export function getAllFilePaths(db: Database): { id: number; path: string }[] {
|
|
123
|
+
return db
|
|
124
|
+
.query<{ id: number; path: string }, []>("SELECT id, path FROM files")
|
|
125
|
+
.all();
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
export function getStatus(db: Database): { totalFiles: number; totalChunks: number; lastIndexed: string | null } {
|
|
129
|
+
const files = db
|
|
130
|
+
.query<{ count: number }, []>("SELECT COUNT(*) as count FROM files")
|
|
131
|
+
.get()!;
|
|
132
|
+
const chunks = db
|
|
133
|
+
.query<{ count: number }, []>("SELECT COUNT(*) as count FROM chunks")
|
|
134
|
+
.get()!;
|
|
135
|
+
const last = db
|
|
136
|
+
.query<{ indexed_at: string }, []>(
|
|
137
|
+
"SELECT indexed_at FROM files ORDER BY indexed_at DESC LIMIT 1"
|
|
138
|
+
)
|
|
139
|
+
.get();
|
|
140
|
+
|
|
141
|
+
return {
|
|
142
|
+
totalFiles: files.count,
|
|
143
|
+
totalChunks: chunks.count,
|
|
144
|
+
lastIndexed: last?.indexed_at ?? null,
|
|
145
|
+
};
|
|
146
|
+
}
|