botholomew 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/chat/session.ts +2 -2
- package/src/commands/context.ts +53 -42
- package/src/commands/daemon.ts +1 -1
- package/src/commands/schedule.ts +1 -1
- package/src/commands/task.ts +2 -1
- package/src/commands/thread.ts +6 -40
- package/src/commands/with-db.ts +2 -2
- package/src/constants.ts +1 -1
- package/src/context/chunker.ts +23 -46
- package/src/context/describer.ts +146 -0
- package/src/context/ingest.ts +27 -25
- package/src/daemon/index.ts +51 -5
- package/src/daemon/llm.ts +80 -12
- package/src/daemon/prompt.ts +3 -4
- package/src/daemon/schedules.ts +7 -1
- package/src/daemon/tick.ts +17 -5
- package/src/db/connection.ts +102 -40
- package/src/db/context.ts +120 -94
- package/src/db/embeddings.ts +55 -77
- package/src/db/query.ts +11 -0
- package/src/db/schedules.ts +27 -28
- package/src/db/schema.ts +9 -9
- package/src/db/sql/1-core_tables.sql +11 -11
- package/src/db/sql/2-logging_tables.sql +3 -3
- package/src/db/sql/3-daemon_state.sql +2 -2
- package/src/db/sql/6-vss_index.sql +1 -0
- package/src/db/sql/7-drop_embeddings_fk.sql +24 -0
- package/src/db/sql/8-task_output.sql +1 -0
- package/src/db/tasks.ts +89 -78
- package/src/db/threads.ts +52 -41
- package/src/init/index.ts +2 -2
- package/src/tools/file/move.ts +5 -3
- package/src/tools/file/write.ts +2 -30
- package/src/tools/search/semantic.ts +7 -4
- package/src/tools/task/list.ts +2 -0
- package/src/tools/task/view.ts +2 -0
- package/src/tui/App.tsx +20 -3
- package/src/tui/components/SchedulePanel.tsx +389 -0
- package/src/tui/components/TabBar.tsx +3 -2
- package/src/tui/components/TaskPanel.tsx +6 -0
package/src/db/context.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import type { DbConnection } from "./connection.ts";
|
|
2
|
-
import { buildSetClauses, buildWhereClause } from "./query.ts";
|
|
2
|
+
import { buildSetClauses, buildWhereClause, sanitizeInt } from "./query.ts";
|
|
3
3
|
import { uuidv7 } from "./uuid.ts";
|
|
4
4
|
|
|
5
5
|
export interface ContextItem {
|
|
@@ -29,7 +29,7 @@ interface ContextItemRow {
|
|
|
29
29
|
content: string | null;
|
|
30
30
|
content_blob: unknown;
|
|
31
31
|
mime_type: string;
|
|
32
|
-
is_textual:
|
|
32
|
+
is_textual: boolean;
|
|
33
33
|
source_path: string | null;
|
|
34
34
|
context_path: string;
|
|
35
35
|
indexed_at: string | null;
|
|
@@ -44,7 +44,7 @@ function rowToContextItem(row: ContextItemRow): ContextItem {
|
|
|
44
44
|
description: row.description,
|
|
45
45
|
content: row.content,
|
|
46
46
|
mime_type: row.mime_type,
|
|
47
|
-
is_textual: row.is_textual
|
|
47
|
+
is_textual: !!row.is_textual,
|
|
48
48
|
source_path: row.source_path,
|
|
49
49
|
context_path: row.context_path,
|
|
50
50
|
indexed_at: row.indexed_at ? new Date(row.indexed_at) : null,
|
|
@@ -68,33 +68,64 @@ export async function createContextItem(
|
|
|
68
68
|
},
|
|
69
69
|
): Promise<ContextItem> {
|
|
70
70
|
const id = uuidv7();
|
|
71
|
-
const row = db
|
|
72
|
-
|
|
73
|
-
`INSERT INTO context_items (id, title, description, content, mime_type, is_textual, source_path, context_path)
|
|
71
|
+
const row = await db.queryGet<ContextItemRow>(
|
|
72
|
+
`INSERT INTO context_items (id, title, description, content, mime_type, is_textual, source_path, context_path)
|
|
74
73
|
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)
|
|
75
74
|
RETURNING *`,
|
|
76
|
-
|
|
77
|
-
.
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
params.contextPath,
|
|
86
|
-
) as ContextItemRow | null;
|
|
75
|
+
id,
|
|
76
|
+
params.title,
|
|
77
|
+
params.description ?? "",
|
|
78
|
+
params.content ?? null,
|
|
79
|
+
params.mimeType ?? "text/plain",
|
|
80
|
+
params.isTextual !== false,
|
|
81
|
+
params.sourcePath ?? null,
|
|
82
|
+
params.contextPath,
|
|
83
|
+
);
|
|
87
84
|
if (!row) throw new Error("INSERT did not return a row");
|
|
88
85
|
return rowToContextItem(row);
|
|
89
86
|
}
|
|
90
87
|
|
|
88
|
+
/**
|
|
89
|
+
* Atomic upsert by context_path: updates if the path exists, inserts otherwise.
|
|
90
|
+
*
|
|
91
|
+
* DuckDB implements UPDATE as delete+insert on tables with unique indexes,
|
|
92
|
+
* which violates foreign keys from the embeddings table. We must delete
|
|
93
|
+
* embeddings before updating; callers (context add, file_write) re-create
|
|
94
|
+
* them in their ingestion phase.
|
|
95
|
+
*/
|
|
96
|
+
export async function upsertContextItem(
|
|
97
|
+
db: DbConnection,
|
|
98
|
+
params: {
|
|
99
|
+
title: string;
|
|
100
|
+
content?: string;
|
|
101
|
+
mimeType?: string;
|
|
102
|
+
sourcePath?: string;
|
|
103
|
+
contextPath: string;
|
|
104
|
+
description?: string;
|
|
105
|
+
isTextual?: boolean;
|
|
106
|
+
},
|
|
107
|
+
): Promise<ContextItem> {
|
|
108
|
+
const existing = await getContextItemByPath(db, params.contextPath);
|
|
109
|
+
if (existing) {
|
|
110
|
+
const updated = await updateContextItem(db, existing.id, {
|
|
111
|
+
title: params.title,
|
|
112
|
+
content: params.content,
|
|
113
|
+
mime_type: params.mimeType,
|
|
114
|
+
});
|
|
115
|
+
if (!updated) throw new Error(`Failed to update: ${params.contextPath}`);
|
|
116
|
+
return updated;
|
|
117
|
+
}
|
|
118
|
+
return createContextItem(db, params);
|
|
119
|
+
}
|
|
120
|
+
|
|
91
121
|
export async function getContextItem(
|
|
92
122
|
db: DbConnection,
|
|
93
123
|
id: string,
|
|
94
124
|
): Promise<ContextItem | null> {
|
|
95
|
-
const row = db
|
|
96
|
-
|
|
97
|
-
|
|
125
|
+
const row = await db.queryGet<ContextItemRow>(
|
|
126
|
+
"SELECT * FROM context_items WHERE id = ?1",
|
|
127
|
+
id,
|
|
128
|
+
);
|
|
98
129
|
return row ? rowToContextItem(row) : null;
|
|
99
130
|
}
|
|
100
131
|
|
|
@@ -102,9 +133,10 @@ export async function getContextItemByPath(
|
|
|
102
133
|
db: DbConnection,
|
|
103
134
|
contextPath: string,
|
|
104
135
|
): Promise<ContextItem | null> {
|
|
105
|
-
const row = db
|
|
106
|
-
|
|
107
|
-
|
|
136
|
+
const row = await db.queryGet<ContextItemRow>(
|
|
137
|
+
"SELECT * FROM context_items WHERE context_path = ?1",
|
|
138
|
+
contextPath,
|
|
139
|
+
);
|
|
108
140
|
return row ? rowToContextItem(row) : null;
|
|
109
141
|
}
|
|
110
142
|
|
|
@@ -121,14 +153,13 @@ export async function listContextItems(
|
|
|
121
153
|
["context_path", filters?.contextPath],
|
|
122
154
|
["mime_type", filters?.mimeType],
|
|
123
155
|
]);
|
|
124
|
-
const limit = filters?.limit ? `LIMIT ${filters.limit}` : "";
|
|
125
|
-
const offset = filters?.offset ? `OFFSET ${filters.offset}` : "";
|
|
126
|
-
|
|
127
|
-
const rows = db
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
.all(...params) as ContextItemRow[];
|
|
156
|
+
const limit = filters?.limit ? `LIMIT ${sanitizeInt(filters.limit)}` : "";
|
|
157
|
+
const offset = filters?.offset ? `OFFSET ${sanitizeInt(filters.offset)}` : "";
|
|
158
|
+
|
|
159
|
+
const rows = await db.queryAll<ContextItemRow>(
|
|
160
|
+
`SELECT * FROM context_items ${where} ORDER BY context_path ASC ${limit} ${offset}`,
|
|
161
|
+
...params,
|
|
162
|
+
);
|
|
132
163
|
return rows.map(rowToContextItem);
|
|
133
164
|
}
|
|
134
165
|
|
|
@@ -139,31 +170,27 @@ export async function listContextItemsByPrefix(
|
|
|
139
170
|
): Promise<ContextItem[]> {
|
|
140
171
|
const normalizedPrefix = prefix.endsWith("/") ? prefix : `${prefix}/`;
|
|
141
172
|
|
|
142
|
-
const limit = opts?.limit ? `LIMIT ${opts.limit}` : "";
|
|
143
|
-
const offset = opts?.offset ? `OFFSET ${opts.offset}` : "";
|
|
173
|
+
const limit = opts?.limit ? `LIMIT ${sanitizeInt(opts.limit)}` : "";
|
|
174
|
+
const offset = opts?.offset ? `OFFSET ${sanitizeInt(opts.offset)}` : "";
|
|
144
175
|
|
|
145
176
|
let rows: ContextItemRow[];
|
|
146
177
|
if (opts?.recursive) {
|
|
147
|
-
rows = db
|
|
148
|
-
|
|
149
|
-
`SELECT * FROM context_items
|
|
178
|
+
rows = await db.queryAll<ContextItemRow>(
|
|
179
|
+
`SELECT * FROM context_items
|
|
150
180
|
WHERE context_path LIKE ?1
|
|
151
181
|
ORDER BY context_path ASC ${limit} ${offset}`,
|
|
152
|
-
|
|
153
|
-
|
|
182
|
+
`${normalizedPrefix}%`,
|
|
183
|
+
);
|
|
154
184
|
} else {
|
|
155
185
|
// Only immediate children: match prefix but no further slashes
|
|
156
|
-
rows = db
|
|
157
|
-
|
|
158
|
-
`SELECT * FROM context_items
|
|
186
|
+
rows = await db.queryAll<ContextItemRow>(
|
|
187
|
+
`SELECT * FROM context_items
|
|
159
188
|
WHERE context_path LIKE ?1
|
|
160
189
|
AND context_path NOT LIKE ?2
|
|
161
190
|
ORDER BY context_path ASC ${limit} ${offset}`,
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
`${normalizedPrefix}%/%`,
|
|
166
|
-
) as ContextItemRow[];
|
|
191
|
+
`${normalizedPrefix}%`,
|
|
192
|
+
`${normalizedPrefix}%/%`,
|
|
193
|
+
);
|
|
167
194
|
}
|
|
168
195
|
|
|
169
196
|
return rows.map(rowToContextItem);
|
|
@@ -173,11 +200,10 @@ export async function contextPathExists(
|
|
|
173
200
|
db: DbConnection,
|
|
174
201
|
contextPath: string,
|
|
175
202
|
): Promise<boolean> {
|
|
176
|
-
const row = db
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
.get(contextPath);
|
|
203
|
+
const row = await db.queryGet(
|
|
204
|
+
"SELECT 1 AS found FROM context_items WHERE context_path = ?1 LIMIT 1",
|
|
205
|
+
contextPath,
|
|
206
|
+
);
|
|
181
207
|
return row != null;
|
|
182
208
|
}
|
|
183
209
|
|
|
@@ -192,19 +218,19 @@ export async function getDistinctDirectories(
|
|
|
192
218
|
: "/";
|
|
193
219
|
|
|
194
220
|
// Extract the first path segment after the prefix
|
|
195
|
-
const rows = db
|
|
196
|
-
|
|
197
|
-
`SELECT DISTINCT
|
|
221
|
+
const rows = await db.queryAll<{ dir: string }>(
|
|
222
|
+
`SELECT DISTINCT
|
|
198
223
|
?1 || CASE
|
|
199
|
-
WHEN
|
|
200
|
-
THEN substr(substr(context_path, length(?1) + 1), 1,
|
|
224
|
+
WHEN strpos(substr(context_path, length(?1) + 1), '/') > 0
|
|
225
|
+
THEN substr(substr(context_path, length(?1) + 1), 1, strpos(substr(context_path, length(?1) + 1), '/') - 1)
|
|
201
226
|
ELSE substr(context_path, length(?1) + 1)
|
|
202
227
|
END AS dir
|
|
203
228
|
FROM context_items
|
|
204
229
|
WHERE context_path LIKE ?2
|
|
205
230
|
ORDER BY dir ASC`,
|
|
206
|
-
|
|
207
|
-
|
|
231
|
+
normalizedPrefix,
|
|
232
|
+
`${normalizedPrefix}%/%`,
|
|
233
|
+
);
|
|
208
234
|
|
|
209
235
|
return rows.map((row) => row.dir);
|
|
210
236
|
}
|
|
@@ -225,17 +251,16 @@ export async function updateContextItem(
|
|
|
225
251
|
["mime_type", updates.mime_type],
|
|
226
252
|
]);
|
|
227
253
|
|
|
228
|
-
setClauses.push("updated_at =
|
|
254
|
+
setClauses.push("updated_at = current_timestamp::VARCHAR");
|
|
229
255
|
params.push(id);
|
|
230
256
|
|
|
231
|
-
const row = db
|
|
232
|
-
|
|
233
|
-
`UPDATE context_items
|
|
257
|
+
const row = await db.queryGet<ContextItemRow>(
|
|
258
|
+
`UPDATE context_items
|
|
234
259
|
SET ${setClauses.join(", ")}
|
|
235
260
|
WHERE id = ?${params.length}
|
|
236
261
|
RETURNING *`,
|
|
237
|
-
|
|
238
|
-
|
|
262
|
+
...params,
|
|
263
|
+
);
|
|
239
264
|
return row ? rowToContextItem(row) : null;
|
|
240
265
|
}
|
|
241
266
|
|
|
@@ -244,14 +269,14 @@ export async function updateContextItemContent(
|
|
|
244
269
|
contextPath: string,
|
|
245
270
|
content: string,
|
|
246
271
|
): Promise<ContextItem | null> {
|
|
247
|
-
const row = db
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
SET content = ?1, updated_at = datetime('now')
|
|
272
|
+
const row = await db.queryGet<ContextItemRow>(
|
|
273
|
+
`UPDATE context_items
|
|
274
|
+
SET content = ?1, updated_at = current_timestamp::VARCHAR
|
|
251
275
|
WHERE context_path = ?2
|
|
252
276
|
RETURNING *`,
|
|
253
|
-
|
|
254
|
-
|
|
277
|
+
content,
|
|
278
|
+
contextPath,
|
|
279
|
+
);
|
|
255
280
|
return row ? rowToContextItem(row) : null;
|
|
256
281
|
}
|
|
257
282
|
|
|
@@ -312,14 +337,14 @@ export async function moveContextItem(
|
|
|
312
337
|
oldPath: string,
|
|
313
338
|
newPath: string,
|
|
314
339
|
): Promise<void> {
|
|
315
|
-
const row = db
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
SET context_path = ?1, updated_at = datetime('now')
|
|
340
|
+
const row = await db.queryGet(
|
|
341
|
+
`UPDATE context_items
|
|
342
|
+
SET context_path = ?1, updated_at = current_timestamp::VARCHAR
|
|
319
343
|
WHERE context_path = ?2
|
|
320
344
|
RETURNING id`,
|
|
321
|
-
|
|
322
|
-
|
|
345
|
+
newPath,
|
|
346
|
+
oldPath,
|
|
347
|
+
);
|
|
323
348
|
if (!row) {
|
|
324
349
|
throw new Error(`Not found: ${oldPath}`);
|
|
325
350
|
}
|
|
@@ -332,10 +357,11 @@ export async function deleteContextItem(
|
|
|
332
357
|
id: string,
|
|
333
358
|
): Promise<boolean> {
|
|
334
359
|
// Delete embeddings first (foreign key)
|
|
335
|
-
db.
|
|
336
|
-
const row = db
|
|
337
|
-
|
|
338
|
-
|
|
360
|
+
await db.queryRun("DELETE FROM embeddings WHERE context_item_id = ?1", id);
|
|
361
|
+
const row = await db.queryGet(
|
|
362
|
+
"DELETE FROM context_items WHERE id = ?1 RETURNING id",
|
|
363
|
+
id,
|
|
364
|
+
);
|
|
339
365
|
return row != null;
|
|
340
366
|
}
|
|
341
367
|
|
|
@@ -356,21 +382,21 @@ export async function deleteContextItemsByPrefix(
|
|
|
356
382
|
const normalizedPrefix = prefix.endsWith("/") ? prefix : `${prefix}/`;
|
|
357
383
|
|
|
358
384
|
// Delete embeddings for all matching items
|
|
359
|
-
db.
|
|
385
|
+
await db.queryRun(
|
|
360
386
|
`DELETE FROM embeddings
|
|
361
387
|
WHERE context_item_id IN (
|
|
362
388
|
SELECT id FROM context_items
|
|
363
389
|
WHERE context_path LIKE ?1
|
|
364
390
|
)`,
|
|
365
|
-
|
|
391
|
+
`${normalizedPrefix}%`,
|
|
392
|
+
);
|
|
366
393
|
|
|
367
|
-
const rows = db
|
|
368
|
-
|
|
369
|
-
`DELETE FROM context_items
|
|
394
|
+
const rows = await db.queryAll(
|
|
395
|
+
`DELETE FROM context_items
|
|
370
396
|
WHERE context_path LIKE ?1
|
|
371
397
|
RETURNING id`,
|
|
372
|
-
|
|
373
|
-
|
|
398
|
+
`${normalizedPrefix}%`,
|
|
399
|
+
);
|
|
374
400
|
return rows.length;
|
|
375
401
|
}
|
|
376
402
|
|
|
@@ -382,17 +408,17 @@ export async function searchContextByKeyword(
|
|
|
382
408
|
limit = 20,
|
|
383
409
|
): Promise<ContextItem[]> {
|
|
384
410
|
const pattern = `%${query}%`;
|
|
385
|
-
const rows = db
|
|
386
|
-
|
|
387
|
-
`SELECT * FROM context_items
|
|
411
|
+
const rows = await db.queryAll<ContextItemRow>(
|
|
412
|
+
`SELECT * FROM context_items
|
|
388
413
|
WHERE content IS NOT NULL
|
|
389
414
|
AND (
|
|
390
|
-
content
|
|
391
|
-
OR title
|
|
415
|
+
content ILIKE ?1
|
|
416
|
+
OR title ILIKE ?1
|
|
392
417
|
)
|
|
393
418
|
ORDER BY updated_at DESC
|
|
394
419
|
LIMIT ?2`,
|
|
395
|
-
|
|
396
|
-
|
|
420
|
+
pattern,
|
|
421
|
+
limit,
|
|
422
|
+
);
|
|
397
423
|
return rows.map(rowToContextItem);
|
|
398
424
|
}
|
package/src/db/embeddings.ts
CHANGED
|
@@ -2,23 +2,8 @@ import { EMBEDDING_DIMENSION } from "../constants.ts";
|
|
|
2
2
|
import type { DbConnection } from "./connection.ts";
|
|
3
3
|
import { uuidv7 } from "./uuid.ts";
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
/**
|
|
9
|
-
* Initialize sqlite-vector on the embeddings table for this connection.
|
|
10
|
-
* Must be called once per connection before vector operations.
|
|
11
|
-
* The dimension parameter allows overriding for tests.
|
|
12
|
-
*/
|
|
13
|
-
export function initVectorSearch(
|
|
14
|
-
conn: DbConnection,
|
|
15
|
-
dimension = EMBEDDING_DIMENSION,
|
|
16
|
-
): void {
|
|
17
|
-
if (initializedConnections.has(conn)) return;
|
|
18
|
-
conn.exec(
|
|
19
|
-
`SELECT vector_init('embeddings', 'embedding', 'dimension=${dimension},type=FLOAT32,distance=COSINE')`,
|
|
20
|
-
);
|
|
21
|
-
initializedConnections.add(conn);
|
|
5
|
+
if (!Number.isInteger(EMBEDDING_DIMENSION) || EMBEDDING_DIMENSION <= 0) {
|
|
6
|
+
throw new Error(`Invalid EMBEDDING_DIMENSION: ${EMBEDDING_DIMENSION}`);
|
|
22
7
|
}
|
|
23
8
|
|
|
24
9
|
export interface Embedding {
|
|
@@ -45,7 +30,7 @@ interface EmbeddingRow {
|
|
|
45
30
|
title: string;
|
|
46
31
|
description: string;
|
|
47
32
|
source_path: string | null;
|
|
48
|
-
embedding:
|
|
33
|
+
embedding: number[] | null;
|
|
49
34
|
created_at: string;
|
|
50
35
|
}
|
|
51
36
|
|
|
@@ -58,14 +43,12 @@ function rowToEmbedding(row: EmbeddingRow): Embedding {
|
|
|
58
43
|
title: row.title,
|
|
59
44
|
description: row.description,
|
|
60
45
|
source_path: row.source_path,
|
|
61
|
-
embedding: row.embedding
|
|
62
|
-
? Array.from(new Float32Array(row.embedding.buffer))
|
|
63
|
-
: [],
|
|
46
|
+
embedding: row.embedding ?? [],
|
|
64
47
|
created_at: new Date(row.created_at),
|
|
65
48
|
};
|
|
66
49
|
}
|
|
67
50
|
|
|
68
|
-
export function createEmbedding(
|
|
51
|
+
export async function createEmbedding(
|
|
69
52
|
conn: DbConnection,
|
|
70
53
|
params: {
|
|
71
54
|
contextItemId: string;
|
|
@@ -76,23 +59,20 @@ export function createEmbedding(
|
|
|
76
59
|
sourcePath?: string | null;
|
|
77
60
|
embedding: number[];
|
|
78
61
|
},
|
|
79
|
-
): Embedding {
|
|
62
|
+
): Promise<Embedding> {
|
|
80
63
|
const id = uuidv7();
|
|
81
|
-
conn
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
.
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
params.sourcePath ?? null,
|
|
94
|
-
JSON.stringify(params.embedding),
|
|
95
|
-
);
|
|
64
|
+
await conn.queryRun(
|
|
65
|
+
`INSERT INTO embeddings (id, context_item_id, chunk_index, chunk_content, title, description, source_path, embedding)
|
|
66
|
+
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8::FLOAT[${EMBEDDING_DIMENSION}])`,
|
|
67
|
+
id,
|
|
68
|
+
params.contextItemId,
|
|
69
|
+
params.chunkIndex,
|
|
70
|
+
params.chunkContent,
|
|
71
|
+
params.title,
|
|
72
|
+
params.description ?? "",
|
|
73
|
+
params.sourcePath ?? null,
|
|
74
|
+
params.embedding,
|
|
75
|
+
);
|
|
96
76
|
|
|
97
77
|
return {
|
|
98
78
|
id,
|
|
@@ -107,52 +87,51 @@ export function createEmbedding(
|
|
|
107
87
|
};
|
|
108
88
|
}
|
|
109
89
|
|
|
110
|
-
export function getEmbeddingsForItem(
|
|
90
|
+
export async function getEmbeddingsForItem(
|
|
111
91
|
conn: DbConnection,
|
|
112
92
|
contextItemId: string,
|
|
113
|
-
): Embedding[] {
|
|
114
|
-
const rows = conn
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
.all(contextItemId) as EmbeddingRow[];
|
|
93
|
+
): Promise<Embedding[]> {
|
|
94
|
+
const rows = await conn.queryAll<EmbeddingRow>(
|
|
95
|
+
"SELECT * FROM embeddings WHERE context_item_id = ?1 ORDER BY chunk_index ASC",
|
|
96
|
+
contextItemId,
|
|
97
|
+
);
|
|
119
98
|
return rows.map(rowToEmbedding);
|
|
120
99
|
}
|
|
121
100
|
|
|
122
|
-
export function deleteEmbeddingsForItem(
|
|
101
|
+
export async function deleteEmbeddingsForItem(
|
|
123
102
|
conn: DbConnection,
|
|
124
103
|
contextItemId: string,
|
|
125
|
-
): number {
|
|
126
|
-
const result = conn
|
|
127
|
-
|
|
128
|
-
|
|
104
|
+
): Promise<number> {
|
|
105
|
+
const result = await conn.queryRun(
|
|
106
|
+
"DELETE FROM embeddings WHERE context_item_id = ?1",
|
|
107
|
+
contextItemId,
|
|
108
|
+
);
|
|
129
109
|
return result.changes;
|
|
130
110
|
}
|
|
131
111
|
|
|
132
|
-
interface
|
|
112
|
+
interface VectorSearchRow extends EmbeddingRow {
|
|
133
113
|
distance: number;
|
|
134
114
|
}
|
|
135
115
|
|
|
136
116
|
/**
|
|
137
|
-
* Vector similarity search using
|
|
138
|
-
*
|
|
117
|
+
* Vector similarity search using DuckDB's array_cosine_distance().
|
|
118
|
+
* With an HNSW index on the embedding column, DuckDB automatically
|
|
119
|
+
* uses the index for top-k queries. Returns results sorted by
|
|
139
120
|
* similarity (closest first), with score = 1 - distance.
|
|
140
121
|
*/
|
|
141
|
-
export function searchEmbeddings(
|
|
122
|
+
export async function searchEmbeddings(
|
|
142
123
|
conn: DbConnection,
|
|
143
124
|
queryEmbedding: number[],
|
|
144
125
|
limit = 10,
|
|
145
|
-
): EmbeddingSearchResult[] {
|
|
146
|
-
const
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
)
|
|
155
|
-
.all(queryJson, limit) as VectorScanRow[];
|
|
126
|
+
): Promise<EmbeddingSearchResult[]> {
|
|
127
|
+
const rows = await conn.queryAll<VectorSearchRow>(
|
|
128
|
+
`SELECT *, array_cosine_distance(embedding, ?1::FLOAT[${EMBEDDING_DIMENSION}]) AS distance
|
|
129
|
+
FROM embeddings
|
|
130
|
+
ORDER BY distance ASC
|
|
131
|
+
LIMIT ?2`,
|
|
132
|
+
queryEmbedding,
|
|
133
|
+
limit,
|
|
134
|
+
);
|
|
156
135
|
|
|
157
136
|
return rows.map((row) => ({
|
|
158
137
|
...rowToEmbedding(row),
|
|
@@ -160,28 +139,27 @@ export function searchEmbeddings(
|
|
|
160
139
|
}));
|
|
161
140
|
}
|
|
162
141
|
|
|
163
|
-
export function hybridSearch(
|
|
142
|
+
export async function hybridSearch(
|
|
164
143
|
conn: DbConnection,
|
|
165
144
|
query: string,
|
|
166
145
|
queryEmbedding: number[],
|
|
167
146
|
limit = 10,
|
|
168
|
-
): EmbeddingSearchResult[] {
|
|
147
|
+
): Promise<EmbeddingSearchResult[]> {
|
|
169
148
|
const k = 60; // RRF constant
|
|
170
149
|
|
|
171
150
|
// Keyword search: match on chunk_content and title
|
|
172
|
-
const keywordRows = conn
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
.all(query) as EmbeddingRow[];
|
|
151
|
+
const keywordRows = await conn.queryAll<EmbeddingRow>(
|
|
152
|
+
`SELECT * FROM embeddings
|
|
153
|
+
WHERE chunk_content ILIKE '%' || ?1 || '%'
|
|
154
|
+
OR title ILIKE '%' || ?1 || '%'
|
|
155
|
+
LIMIT 100`,
|
|
156
|
+
query,
|
|
157
|
+
);
|
|
180
158
|
|
|
181
159
|
const keywordRanked = keywordRows.map(rowToEmbedding);
|
|
182
160
|
|
|
183
|
-
// Vector search via
|
|
184
|
-
const vectorResults = searchEmbeddings(conn, queryEmbedding, 100);
|
|
161
|
+
// Vector search via DuckDB VSS
|
|
162
|
+
const vectorResults = await searchEmbeddings(conn, queryEmbedding, 100);
|
|
185
163
|
|
|
186
164
|
// Reciprocal rank fusion
|
|
187
165
|
const scores = new Map<string, { embedding: Embedding; score: number }>();
|
package/src/db/query.ts
CHANGED
|
@@ -1,5 +1,16 @@
|
|
|
1
1
|
type SqlParam = string | number | null;
|
|
2
2
|
|
|
3
|
+
/**
|
|
4
|
+
* Validate that a value is a positive integer, suitable for use in
|
|
5
|
+
* LIMIT / OFFSET clauses that must be interpolated into SQL strings.
|
|
6
|
+
*/
|
|
7
|
+
export function sanitizeInt(val: number): number {
|
|
8
|
+
if (!Number.isInteger(val) || val <= 0) {
|
|
9
|
+
throw new Error(`Expected a positive integer, got: ${val}`);
|
|
10
|
+
}
|
|
11
|
+
return val;
|
|
12
|
+
}
|
|
13
|
+
|
|
3
14
|
/**
|
|
4
15
|
* Build a WHERE clause from column-value pairs.
|
|
5
16
|
* Entries with `undefined` values are skipped.
|