@kyleparrott/where-was-i 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +89 -0
- package/README.md +167 -0
- package/dist/src/cli.js +423 -0
- package/dist/src/core/codex.js +303 -0
- package/dist/src/core/config.js +168 -0
- package/dist/src/core/database.js +432 -0
- package/dist/src/core/doctor.js +113 -0
- package/dist/src/core/embeddings.js +118 -0
- package/dist/src/core/indexer.js +60 -0
- package/dist/src/core/paths.js +20 -0
- package/dist/src/core/reset.js +18 -0
- package/dist/src/core/search-mode.js +17 -0
- package/dist/src/core/search.js +562 -0
- package/dist/src/core/semantic.js +220 -0
- package/dist/src/core/types.js +1 -0
- package/dist/src/core/vector.js +311 -0
- package/dist/src/mcp.js +345 -0
- package/dist/src/web-client.js +61 -0
- package/dist/src/web-settings.js +157 -0
- package/dist/src/web-style.js +797 -0
- package/dist/src/web-utils.js +81 -0
- package/dist/src/web-views.js +389 -0
- package/dist/src/web.js +512 -0
- package/docs/assets/web-ui.png +0 -0
- package/package.json +64 -0
|
@@ -0,0 +1,432 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import DatabaseConstructor from "better-sqlite3";
|
|
4
|
+
import * as sqliteVec from "sqlite-vec";
|
|
5
|
+
const SCHEMA_VERSION = 1;
|
|
6
|
+
export class SessionSearchDb {
|
|
7
|
+
db;
|
|
8
|
+
constructor(dbPath) {
|
|
9
|
+
fs.mkdirSync(path.dirname(dbPath), { recursive: true });
|
|
10
|
+
this.db = new DatabaseConstructor(dbPath);
|
|
11
|
+
sqliteVec.load(this.db);
|
|
12
|
+
this.db.pragma("journal_mode = WAL");
|
|
13
|
+
this.db.pragma("foreign_keys = ON");
|
|
14
|
+
this.migrate();
|
|
15
|
+
}
|
|
16
|
+
close() {
|
|
17
|
+
this.db.close();
|
|
18
|
+
}
|
|
19
|
+
getSource(pathValue) {
|
|
20
|
+
return this.db
|
|
21
|
+
.prepare("SELECT path, source, size, mtime_ms AS mtimeMs FROM sources WHERE path = ?")
|
|
22
|
+
.get(pathValue) ?? null;
|
|
23
|
+
}
|
|
24
|
+
sourceHasMessages(sourcePath) {
|
|
25
|
+
const row = this.db.prepare("SELECT 1 FROM messages WHERE source_path = ? LIMIT 1").get(sourcePath);
|
|
26
|
+
return Boolean(row);
|
|
27
|
+
}
|
|
28
|
+
upsertParsedSession(parsed, source) {
|
|
29
|
+
const insert = this.db.transaction(() => {
|
|
30
|
+
this.deleteSource(source.path);
|
|
31
|
+
this.db
|
|
32
|
+
.prepare(`INSERT INTO sources (path, source, size, mtime_ms, indexed_at)
|
|
33
|
+
VALUES (@path, @source, @size, @mtimeMs, @indexedAt)`)
|
|
34
|
+
.run({ ...source, indexedAt: new Date().toISOString() });
|
|
35
|
+
this.db
|
|
36
|
+
.prepare(`INSERT INTO sessions
|
|
37
|
+
(id, conversation_id, source, source_path, title, cwd, started_at, updated_at, message_count, turn_count)
|
|
38
|
+
VALUES
|
|
39
|
+
(@id, @conversationId, @source, @sourcePath, @title, @cwd, @startedAt, @updatedAt, @messageCount, @turnCount)
|
|
40
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
41
|
+
conversation_id = excluded.conversation_id,
|
|
42
|
+
source = excluded.source,
|
|
43
|
+
source_path = excluded.source_path,
|
|
44
|
+
title = excluded.title,
|
|
45
|
+
cwd = excluded.cwd,
|
|
46
|
+
started_at = excluded.started_at,
|
|
47
|
+
updated_at = excluded.updated_at,
|
|
48
|
+
message_count = excluded.message_count,
|
|
49
|
+
turn_count = excluded.turn_count`)
|
|
50
|
+
.run(parsed.session);
|
|
51
|
+
const turnStmt = this.db.prepare(`INSERT OR REPLACE INTO turns
|
|
52
|
+
(id, session_id, source, source_path, ordinal, started_at, updated_at, message_count)
|
|
53
|
+
VALUES
|
|
54
|
+
(@id, @sessionId, @source, @sourcePath, @ordinal, @startedAt, @updatedAt, @messageCount)`);
|
|
55
|
+
for (const turn of parsed.turns) {
|
|
56
|
+
turnStmt.run(turn);
|
|
57
|
+
}
|
|
58
|
+
const messageStmt = this.db.prepare(`INSERT OR REPLACE INTO messages
|
|
59
|
+
(id, session_id, conversation_id, turn_id, source, source_path, ordinal, role, kind, timestamp, text, line_start, line_end, metadata_json)
|
|
60
|
+
VALUES
|
|
61
|
+
(@id, @sessionId, @conversationId, @turnId, @source, @sourcePath, @ordinal, @role, @kind, @timestamp, @text, @lineStart, @lineEnd, @metadataJson)`);
|
|
62
|
+
for (const message of parsed.messages) {
|
|
63
|
+
messageStmt.run({
|
|
64
|
+
...message,
|
|
65
|
+
metadataJson: JSON.stringify(message.metadata)
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
const chunkStmt = this.db.prepare(`INSERT OR REPLACE INTO chunks
|
|
69
|
+
(id, message_id, session_id, conversation_id, turn_id, source, source_path, ordinal, chunk_index, role, kind, timestamp, text, line_start, line_end, metadata_json)
|
|
70
|
+
VALUES
|
|
71
|
+
(@id, @messageId, @sessionId, @conversationId, @turnId, @source, @sourcePath, @ordinal, @chunkIndex, @role, @kind, @timestamp, @text, @lineStart, @lineEnd, @metadataJson)`);
|
|
72
|
+
const deleteFtsStmt = this.db.prepare("DELETE FROM chunks_fts WHERE chunk_id = ?");
|
|
73
|
+
const ftsStmt = this.db.prepare(`INSERT INTO chunks_fts (chunk_id, session_id, role, text)
|
|
74
|
+
VALUES (@id, @sessionId, @role, @text)`);
|
|
75
|
+
for (const chunk of parsed.chunks) {
|
|
76
|
+
chunkStmt.run({
|
|
77
|
+
...chunk,
|
|
78
|
+
metadataJson: JSON.stringify(chunk.metadata)
|
|
79
|
+
});
|
|
80
|
+
deleteFtsStmt.run(chunk.id);
|
|
81
|
+
ftsStmt.run(chunk);
|
|
82
|
+
}
|
|
83
|
+
});
|
|
84
|
+
insert();
|
|
85
|
+
}
|
|
86
|
+
deleteSource(sourcePath) {
|
|
87
|
+
this.deleteVectorsForSource(sourcePath);
|
|
88
|
+
this.db.prepare("DELETE FROM chunks_fts WHERE chunk_id IN (SELECT id FROM chunks WHERE source_path = ?)").run(sourcePath);
|
|
89
|
+
this.db.prepare("DELETE FROM chunks WHERE source_path = ?").run(sourcePath);
|
|
90
|
+
this.db.prepare("DELETE FROM messages WHERE source_path = ?").run(sourcePath);
|
|
91
|
+
this.db.prepare("DELETE FROM turns WHERE source_path = ?").run(sourcePath);
|
|
92
|
+
this.db.prepare("DELETE FROM sessions WHERE source_path = ?").run(sourcePath);
|
|
93
|
+
this.db.prepare("DELETE FROM sources WHERE path = ?").run(sourcePath);
|
|
94
|
+
}
|
|
95
|
+
listSessionChunks(sessionId, limit = 200) {
|
|
96
|
+
const rows = this.db
|
|
97
|
+
.prepare(`SELECT
|
|
98
|
+
id,
|
|
99
|
+
message_id AS messageId,
|
|
100
|
+
session_id AS sessionId,
|
|
101
|
+
conversation_id AS conversationId,
|
|
102
|
+
turn_id AS turnId,
|
|
103
|
+
source,
|
|
104
|
+
source_path AS sourcePath,
|
|
105
|
+
ordinal,
|
|
106
|
+
chunk_index AS chunkIndex,
|
|
107
|
+
role,
|
|
108
|
+
kind,
|
|
109
|
+
timestamp,
|
|
110
|
+
text,
|
|
111
|
+
line_start AS lineStart,
|
|
112
|
+
line_end AS lineEnd,
|
|
113
|
+
metadata_json AS metadataJson
|
|
114
|
+
FROM chunks
|
|
115
|
+
WHERE session_id = ?
|
|
116
|
+
ORDER BY ordinal ASC
|
|
117
|
+
LIMIT ?`)
|
|
118
|
+
.all(sessionId, limit);
|
|
119
|
+
return rows.map((row) => {
|
|
120
|
+
const { metadataJson, ...rest } = row;
|
|
121
|
+
return {
|
|
122
|
+
...rest,
|
|
123
|
+
metadata: JSON.parse(metadataJson)
|
|
124
|
+
};
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
listSessionMessages(sessionId, limit = 200, offset = 0) {
|
|
128
|
+
const rows = this.db
|
|
129
|
+
.prepare(`SELECT
|
|
130
|
+
id,
|
|
131
|
+
session_id AS sessionId,
|
|
132
|
+
conversation_id AS conversationId,
|
|
133
|
+
turn_id AS turnId,
|
|
134
|
+
source,
|
|
135
|
+
source_path AS sourcePath,
|
|
136
|
+
ordinal,
|
|
137
|
+
role,
|
|
138
|
+
kind,
|
|
139
|
+
timestamp,
|
|
140
|
+
text,
|
|
141
|
+
line_start AS lineStart,
|
|
142
|
+
line_end AS lineEnd,
|
|
143
|
+
metadata_json AS metadataJson
|
|
144
|
+
FROM messages
|
|
145
|
+
WHERE session_id = ?
|
|
146
|
+
ORDER BY ordinal ASC
|
|
147
|
+
LIMIT ?
|
|
148
|
+
OFFSET ?`)
|
|
149
|
+
.all(sessionId, limit, Math.max(offset, 0));
|
|
150
|
+
return rows.map((row) => ({
|
|
151
|
+
...row,
|
|
152
|
+
metadata: JSON.parse(row.metadataJson)
|
|
153
|
+
}));
|
|
154
|
+
}
|
|
155
|
+
listRecentSessions(limit = 25, offset = 0) {
|
|
156
|
+
const rows = this.db
|
|
157
|
+
.prepare(`SELECT
|
|
158
|
+
s.id,
|
|
159
|
+
s.conversation_id AS conversationId,
|
|
160
|
+
s.source,
|
|
161
|
+
s.source_path AS sourcePath,
|
|
162
|
+
s.title,
|
|
163
|
+
s.cwd,
|
|
164
|
+
s.started_at AS startedAt,
|
|
165
|
+
s.updated_at AS updatedAt,
|
|
166
|
+
s.message_count AS messageCount,
|
|
167
|
+
s.turn_count AS turnCount,
|
|
168
|
+
(
|
|
169
|
+
SELECT m.text
|
|
170
|
+
FROM messages m
|
|
171
|
+
WHERE m.session_id = s.id
|
|
172
|
+
AND m.role = 'user'
|
|
173
|
+
AND m.text NOT LIKE '# AGENTS.md instructions%'
|
|
174
|
+
AND m.text NOT LIKE '<INSTRUCTIONS>%'
|
|
175
|
+
ORDER BY m.ordinal ASC
|
|
176
|
+
LIMIT 1
|
|
177
|
+
) AS firstUserText
|
|
178
|
+
FROM sessions s
|
|
179
|
+
ORDER BY COALESCE(s.updated_at, s.started_at, '') DESC, s.id DESC
|
|
180
|
+
LIMIT ?
|
|
181
|
+
OFFSET ?`)
|
|
182
|
+
.all(Math.min(Math.max(limit, 1), 100), Math.max(offset, 0));
|
|
183
|
+
return rows;
|
|
184
|
+
}
|
|
185
|
+
getMessage(messageId) {
|
|
186
|
+
const row = this.db
|
|
187
|
+
.prepare(`SELECT
|
|
188
|
+
id,
|
|
189
|
+
session_id AS sessionId,
|
|
190
|
+
conversation_id AS conversationId,
|
|
191
|
+
turn_id AS turnId,
|
|
192
|
+
source,
|
|
193
|
+
source_path AS sourcePath,
|
|
194
|
+
ordinal,
|
|
195
|
+
role,
|
|
196
|
+
kind,
|
|
197
|
+
timestamp,
|
|
198
|
+
text,
|
|
199
|
+
line_start AS lineStart,
|
|
200
|
+
line_end AS lineEnd,
|
|
201
|
+
metadata_json AS metadataJson
|
|
202
|
+
FROM messages
|
|
203
|
+
WHERE id = ?`)
|
|
204
|
+
.get(messageId);
|
|
205
|
+
return row ? messageFromRow(row) : null;
|
|
206
|
+
}
|
|
207
|
+
listMessagesAround(messageId, before = 2, after = 2) {
|
|
208
|
+
const target = this.getMessage(messageId);
|
|
209
|
+
if (!target) {
|
|
210
|
+
return { target: null, before: [], after: [] };
|
|
211
|
+
}
|
|
212
|
+
const beforeRows = this.messageRows(`session_id = ? AND ordinal < ? ORDER BY ordinal DESC LIMIT ?`, target.sessionId, target.ordinal, Math.max(before, 0)).reverse();
|
|
213
|
+
const afterRows = this.messageRows(`session_id = ? AND ordinal > ? ORDER BY ordinal ASC LIMIT ?`, target.sessionId, target.ordinal, Math.max(after, 0));
|
|
214
|
+
return { target, before: beforeRows, after: afterRows };
|
|
215
|
+
}
|
|
216
|
+
listTurnMessages(turnId, limit = 50) {
|
|
217
|
+
return this.messageRows(`turn_id = ? ORDER BY ordinal ASC LIMIT ?`, turnId, limit);
|
|
218
|
+
}
|
|
219
|
+
stats() {
|
|
220
|
+
const row = this.db
|
|
221
|
+
.prepare(`SELECT
|
|
222
|
+
(SELECT COUNT(*) FROM sources) AS sourceFiles,
|
|
223
|
+
(SELECT COUNT(*) FROM sessions) AS sessions,
|
|
224
|
+
(SELECT COUNT(*) FROM turns) AS turns,
|
|
225
|
+
(SELECT COUNT(*) FROM messages) AS messages,
|
|
226
|
+
(SELECT COUNT(*) FROM chunks) AS chunks,
|
|
227
|
+
(SELECT CASE WHEN EXISTS (SELECT 1 FROM sqlite_master WHERE name = 'vector_chunks_metadata')
|
|
228
|
+
THEN (SELECT COUNT(*) FROM vector_chunks_metadata)
|
|
229
|
+
ELSE 0
|
|
230
|
+
END) AS vectors,
|
|
231
|
+
(SELECT CASE WHEN EXISTS (SELECT 1 FROM sqlite_master WHERE name = 'embedding_providers')
|
|
232
|
+
THEN (SELECT COUNT(*) FROM embedding_providers)
|
|
233
|
+
ELSE 0
|
|
234
|
+
END) AS embeddingProviders,
|
|
235
|
+
(SELECT COALESCE(MAX(CAST(value AS INTEGER)), 0) FROM schema_metadata WHERE key = 'schema_version') AS schemaVersion,
|
|
236
|
+
(SELECT MAX(indexed_at) FROM sources) AS lastIndexedAt`)
|
|
237
|
+
.get();
|
|
238
|
+
return row;
|
|
239
|
+
}
|
|
240
|
+
migrate() {
|
|
241
|
+
this.db.exec(`
|
|
242
|
+
CREATE TABLE IF NOT EXISTS schema_metadata (
|
|
243
|
+
key TEXT PRIMARY KEY,
|
|
244
|
+
value TEXT NOT NULL,
|
|
245
|
+
updated_at TEXT NOT NULL
|
|
246
|
+
);
|
|
247
|
+
|
|
248
|
+
CREATE TABLE IF NOT EXISTS sources (
|
|
249
|
+
path TEXT PRIMARY KEY,
|
|
250
|
+
source TEXT NOT NULL,
|
|
251
|
+
size INTEGER NOT NULL,
|
|
252
|
+
mtime_ms INTEGER NOT NULL,
|
|
253
|
+
indexed_at TEXT NOT NULL
|
|
254
|
+
);
|
|
255
|
+
|
|
256
|
+
CREATE TABLE IF NOT EXISTS sessions (
|
|
257
|
+
id TEXT PRIMARY KEY,
|
|
258
|
+
conversation_id TEXT NOT NULL,
|
|
259
|
+
source TEXT NOT NULL,
|
|
260
|
+
source_path TEXT NOT NULL,
|
|
261
|
+
title TEXT,
|
|
262
|
+
cwd TEXT,
|
|
263
|
+
started_at TEXT,
|
|
264
|
+
updated_at TEXT,
|
|
265
|
+
message_count INTEGER NOT NULL,
|
|
266
|
+
turn_count INTEGER NOT NULL
|
|
267
|
+
);
|
|
268
|
+
|
|
269
|
+
CREATE TABLE IF NOT EXISTS turns (
|
|
270
|
+
id TEXT PRIMARY KEY,
|
|
271
|
+
session_id TEXT NOT NULL,
|
|
272
|
+
source TEXT NOT NULL,
|
|
273
|
+
source_path TEXT NOT NULL,
|
|
274
|
+
ordinal INTEGER NOT NULL,
|
|
275
|
+
started_at TEXT,
|
|
276
|
+
updated_at TEXT,
|
|
277
|
+
message_count INTEGER NOT NULL
|
|
278
|
+
);
|
|
279
|
+
|
|
280
|
+
CREATE INDEX IF NOT EXISTS idx_turns_session_ordinal
|
|
281
|
+
ON turns(session_id, ordinal);
|
|
282
|
+
|
|
283
|
+
CREATE TABLE IF NOT EXISTS messages (
|
|
284
|
+
id TEXT PRIMARY KEY,
|
|
285
|
+
session_id TEXT NOT NULL,
|
|
286
|
+
conversation_id TEXT NOT NULL,
|
|
287
|
+
turn_id TEXT NOT NULL,
|
|
288
|
+
source TEXT NOT NULL,
|
|
289
|
+
source_path TEXT NOT NULL,
|
|
290
|
+
ordinal INTEGER NOT NULL,
|
|
291
|
+
role TEXT NOT NULL,
|
|
292
|
+
kind TEXT NOT NULL,
|
|
293
|
+
timestamp TEXT,
|
|
294
|
+
text TEXT NOT NULL,
|
|
295
|
+
line_start INTEGER NOT NULL,
|
|
296
|
+
line_end INTEGER NOT NULL,
|
|
297
|
+
metadata_json TEXT NOT NULL
|
|
298
|
+
);
|
|
299
|
+
|
|
300
|
+
CREATE INDEX IF NOT EXISTS idx_messages_session_ordinal
|
|
301
|
+
ON messages(session_id, ordinal);
|
|
302
|
+
|
|
303
|
+
CREATE INDEX IF NOT EXISTS idx_messages_turn_ordinal
|
|
304
|
+
ON messages(turn_id, ordinal);
|
|
305
|
+
|
|
306
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
307
|
+
id TEXT PRIMARY KEY,
|
|
308
|
+
message_id TEXT NOT NULL,
|
|
309
|
+
session_id TEXT NOT NULL,
|
|
310
|
+
conversation_id TEXT NOT NULL,
|
|
311
|
+
turn_id TEXT NOT NULL,
|
|
312
|
+
source TEXT NOT NULL,
|
|
313
|
+
source_path TEXT NOT NULL,
|
|
314
|
+
ordinal INTEGER NOT NULL,
|
|
315
|
+
chunk_index INTEGER NOT NULL,
|
|
316
|
+
role TEXT NOT NULL,
|
|
317
|
+
kind TEXT NOT NULL,
|
|
318
|
+
timestamp TEXT,
|
|
319
|
+
text TEXT NOT NULL,
|
|
320
|
+
line_start INTEGER NOT NULL,
|
|
321
|
+
line_end INTEGER NOT NULL,
|
|
322
|
+
metadata_json TEXT NOT NULL
|
|
323
|
+
);
|
|
324
|
+
|
|
325
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_session_ordinal
|
|
326
|
+
ON chunks(session_id, ordinal);
|
|
327
|
+
|
|
328
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS chunks_fts USING fts5(
|
|
329
|
+
chunk_id UNINDEXED,
|
|
330
|
+
session_id UNINDEXED,
|
|
331
|
+
role UNINDEXED,
|
|
332
|
+
text,
|
|
333
|
+
tokenize = 'unicode61'
|
|
334
|
+
);
|
|
335
|
+
|
|
336
|
+
CREATE TABLE IF NOT EXISTS embedding_providers (
|
|
337
|
+
id TEXT PRIMARY KEY,
|
|
338
|
+
base_url TEXT NOT NULL,
|
|
339
|
+
model TEXT NOT NULL,
|
|
340
|
+
dimensions INTEGER NOT NULL,
|
|
341
|
+
updated_at TEXT NOT NULL
|
|
342
|
+
);
|
|
343
|
+
|
|
344
|
+
CREATE TABLE IF NOT EXISTS vector_chunks_metadata (
|
|
345
|
+
vector_rowid INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
346
|
+
chunk_id TEXT UNIQUE NOT NULL,
|
|
347
|
+
message_id TEXT NOT NULL,
|
|
348
|
+
session_id TEXT NOT NULL,
|
|
349
|
+
provider_id TEXT NOT NULL,
|
|
350
|
+
model TEXT NOT NULL,
|
|
351
|
+
dimensions INTEGER NOT NULL,
|
|
352
|
+
indexed_at TEXT NOT NULL,
|
|
353
|
+
source_fingerprint TEXT NOT NULL
|
|
354
|
+
);
|
|
355
|
+
`);
|
|
356
|
+
this.setSchemaVersion(SCHEMA_VERSION);
|
|
357
|
+
this.ensureColumn("sessions", "conversation_id", "TEXT");
|
|
358
|
+
this.ensureColumn("sessions", "turn_count", "INTEGER");
|
|
359
|
+
this.ensureColumn("chunks", "message_id", "TEXT");
|
|
360
|
+
this.ensureColumn("chunks", "conversation_id", "TEXT");
|
|
361
|
+
this.ensureColumn("chunks", "turn_id", "TEXT");
|
|
362
|
+
this.ensureColumn("chunks", "chunk_index", "INTEGER");
|
|
363
|
+
this.ensureColumn("chunks", "line_start", "INTEGER");
|
|
364
|
+
this.ensureColumn("chunks", "line_end", "INTEGER");
|
|
365
|
+
}
|
|
366
|
+
ensureColumn(table, column, sqlType) {
|
|
367
|
+
const columns = this.db.prepare(`PRAGMA table_info(${table})`).all();
|
|
368
|
+
if (columns.some((existing) => existing.name === column)) {
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
371
|
+
this.db.prepare(`ALTER TABLE ${table} ADD COLUMN ${column} ${sqlType}`).run();
|
|
372
|
+
}
|
|
373
|
+
setSchemaVersion(version) {
|
|
374
|
+
this.db
|
|
375
|
+
.prepare(`INSERT INTO schema_metadata (key, value, updated_at)
|
|
376
|
+
VALUES ('schema_version', @version, @updatedAt)
|
|
377
|
+
ON CONFLICT(key) DO UPDATE SET
|
|
378
|
+
value = excluded.value,
|
|
379
|
+
updated_at = excluded.updated_at`)
|
|
380
|
+
.run({ version: String(version), updatedAt: new Date().toISOString() });
|
|
381
|
+
}
|
|
382
|
+
deleteVectorsForSource(sourcePath) {
|
|
383
|
+
if (!this.tableExists("vector_chunks") || !this.tableExists("vector_chunks_metadata")) {
|
|
384
|
+
return;
|
|
385
|
+
}
|
|
386
|
+
const rows = this.db
|
|
387
|
+
.prepare(`SELECT v.vector_rowid AS vectorRowid
|
|
388
|
+
FROM vector_chunks_metadata v
|
|
389
|
+
JOIN chunks c ON c.id = v.chunk_id
|
|
390
|
+
WHERE c.source_path = ?`)
|
|
391
|
+
.all(sourcePath);
|
|
392
|
+
const deleteVector = this.db.prepare("DELETE FROM vector_chunks WHERE rowid = ?");
|
|
393
|
+
const deleteMetadata = this.db.prepare("DELETE FROM vector_chunks_metadata WHERE vector_rowid = ?");
|
|
394
|
+
for (const row of rows) {
|
|
395
|
+
deleteVector.run(BigInt(row.vectorRowid));
|
|
396
|
+
deleteMetadata.run(row.vectorRowid);
|
|
397
|
+
}
|
|
398
|
+
}
|
|
399
|
+
tableExists(table) {
|
|
400
|
+
const row = this.db.prepare("SELECT 1 FROM sqlite_master WHERE name = ?").get(table);
|
|
401
|
+
return Boolean(row);
|
|
402
|
+
}
|
|
403
|
+
messageRows(whereSql, ...params) {
|
|
404
|
+
const rows = this.db
|
|
405
|
+
.prepare(`SELECT
|
|
406
|
+
id,
|
|
407
|
+
session_id AS sessionId,
|
|
408
|
+
conversation_id AS conversationId,
|
|
409
|
+
turn_id AS turnId,
|
|
410
|
+
source,
|
|
411
|
+
source_path AS sourcePath,
|
|
412
|
+
ordinal,
|
|
413
|
+
role,
|
|
414
|
+
kind,
|
|
415
|
+
timestamp,
|
|
416
|
+
text,
|
|
417
|
+
line_start AS lineStart,
|
|
418
|
+
line_end AS lineEnd,
|
|
419
|
+
metadata_json AS metadataJson
|
|
420
|
+
FROM messages
|
|
421
|
+
WHERE ${whereSql}`)
|
|
422
|
+
.all(...params);
|
|
423
|
+
return rows.map(messageFromRow);
|
|
424
|
+
}
|
|
425
|
+
}
|
|
426
|
+
function messageFromRow(row) {
|
|
427
|
+
const { metadataJson, ...rest } = row;
|
|
428
|
+
return {
|
|
429
|
+
...rest,
|
|
430
|
+
metadata: JSON.parse(metadataJson)
|
|
431
|
+
};
|
|
432
|
+
}
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { EMBEDDING_PROVIDER_UNCONFIGURED_MESSAGE, OpenAICompatibleEmbeddingProvider, isEmbeddingProviderConfigured } from "./embeddings.js";
|
|
4
|
+
import { defaultCodexHome, defaultIndexPath, expandHome } from "./paths.js";
|
|
5
|
+
import { indexStats } from "./search.js";
|
|
6
|
+
import { semanticFreshness } from "./semantic.js";
|
|
7
|
+
export async function runDoctor(options = {}) {
|
|
8
|
+
const dbPath = expandHome(options.dbPath ?? defaultIndexPath());
|
|
9
|
+
const codexHome = expandHome(options.codexHome ?? defaultCodexHome());
|
|
10
|
+
const sessionsDir = path.join(codexHome, "sessions");
|
|
11
|
+
const archivedSessionsDir = path.join(codexHome, "archived_sessions");
|
|
12
|
+
const provider = new OpenAICompatibleEmbeddingProvider(options.embedding);
|
|
13
|
+
const semanticConfigured = isEmbeddingProviderConfigured(provider.config);
|
|
14
|
+
const recommendations = [];
|
|
15
|
+
const paths = {
|
|
16
|
+
dbPath,
|
|
17
|
+
dbExists: fs.existsSync(dbPath),
|
|
18
|
+
codexHome,
|
|
19
|
+
codexHomeExists: fs.existsSync(codexHome),
|
|
20
|
+
sessionsDir,
|
|
21
|
+
sessionsDirExists: fs.existsSync(sessionsDir),
|
|
22
|
+
archivedSessionsDir,
|
|
23
|
+
archivedSessionsDirExists: fs.existsSync(archivedSessionsDir)
|
|
24
|
+
};
|
|
25
|
+
const index = indexStats(dbPath);
|
|
26
|
+
if (!paths.codexHomeExists) {
|
|
27
|
+
recommendations.push(`Codex home does not exist: ${codexHome}`);
|
|
28
|
+
}
|
|
29
|
+
if (!paths.sessionsDirExists) {
|
|
30
|
+
recommendations.push(`Codex sessions directory does not exist: ${sessionsDir}`);
|
|
31
|
+
}
|
|
32
|
+
if (index.sourceFiles === 0 || index.messages === 0) {
|
|
33
|
+
recommendations.push("Run `wwi index` to build the lexical message index.");
|
|
34
|
+
}
|
|
35
|
+
const embedding = semanticConfigured ? await probeEmbedding(provider) : skippedEmbedding(provider);
|
|
36
|
+
if (semanticConfigured && !embedding.available) {
|
|
37
|
+
recommendations.push(`Embedding provider unavailable: ${embedding.error}`);
|
|
38
|
+
}
|
|
39
|
+
const semantic = await semanticFreshness({
|
|
40
|
+
dbPath,
|
|
41
|
+
embedding: options.embedding,
|
|
42
|
+
dimensions: embedding.dimensions,
|
|
43
|
+
probeProvider: false
|
|
44
|
+
});
|
|
45
|
+
if (semanticConfigured && semantic.missingChunks !== null && semantic.missingChunks > 0) {
|
|
46
|
+
recommendations.push(semantic.recommendation ?? "Run `wwi index --semantic` to refresh semantic vectors.");
|
|
47
|
+
}
|
|
48
|
+
return {
|
|
49
|
+
ok: recommendations.length === 0,
|
|
50
|
+
paths,
|
|
51
|
+
index,
|
|
52
|
+
embedding,
|
|
53
|
+
semantic,
|
|
54
|
+
recommendations
|
|
55
|
+
};
|
|
56
|
+
}
|
|
57
|
+
async function probeEmbedding(provider) {
|
|
58
|
+
if (!isEmbeddingProviderConfigured(provider.config)) {
|
|
59
|
+
return unconfiguredEmbedding(provider, true);
|
|
60
|
+
}
|
|
61
|
+
try {
|
|
62
|
+
const [models, dimensions] = await Promise.all([provider.listModels(), provider.probeDimensions()]);
|
|
63
|
+
return {
|
|
64
|
+
available: true,
|
|
65
|
+
checked: true,
|
|
66
|
+
providerId: provider.config.id,
|
|
67
|
+
baseUrl: provider.config.baseUrl,
|
|
68
|
+
model: provider.config.model,
|
|
69
|
+
dimensions,
|
|
70
|
+
models,
|
|
71
|
+
error: null
|
|
72
|
+
};
|
|
73
|
+
}
|
|
74
|
+
catch (error) {
|
|
75
|
+
return {
|
|
76
|
+
available: false,
|
|
77
|
+
checked: true,
|
|
78
|
+
providerId: provider.config.id,
|
|
79
|
+
baseUrl: provider.config.baseUrl || null,
|
|
80
|
+
model: provider.config.model || null,
|
|
81
|
+
dimensions: null,
|
|
82
|
+
models: [],
|
|
83
|
+
error: error instanceof Error ? error.message : String(error)
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
function skippedEmbedding(provider) {
|
|
88
|
+
if (!isEmbeddingProviderConfigured(provider.config)) {
|
|
89
|
+
return unconfiguredEmbedding(provider, false);
|
|
90
|
+
}
|
|
91
|
+
return {
|
|
92
|
+
checked: false,
|
|
93
|
+
available: false,
|
|
94
|
+
providerId: provider.config.id,
|
|
95
|
+
baseUrl: provider.config.baseUrl,
|
|
96
|
+
model: provider.config.model,
|
|
97
|
+
dimensions: null,
|
|
98
|
+
models: [],
|
|
99
|
+
error: "Semantic search is disabled; embedding provider was not checked."
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
function unconfiguredEmbedding(provider, checked) {
|
|
103
|
+
return {
|
|
104
|
+
checked,
|
|
105
|
+
available: false,
|
|
106
|
+
providerId: provider.config.id,
|
|
107
|
+
baseUrl: provider.config.baseUrl || null,
|
|
108
|
+
model: provider.config.model || null,
|
|
109
|
+
dimensions: null,
|
|
110
|
+
models: [],
|
|
111
|
+
error: EMBEDDING_PROVIDER_UNCONFIGURED_MESSAGE
|
|
112
|
+
};
|
|
113
|
+
}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
export const DEFAULT_EMBEDDING_TIMEOUT_MS = 30_000;
|
|
2
|
+
export const EMBEDDING_PROVIDER_UNCONFIGURED_MESSAGE = "Semantic search is not configured. Set WHERE_WAS_I_EMBEDDING_BASE_URL and WHERE_WAS_I_EMBEDDING_MODEL in ~/.where-was-i/config.json or environment variables.";
|
|
3
|
+
export class EmbeddingProviderUnavailableError extends Error {
|
|
4
|
+
constructor(message, options) {
|
|
5
|
+
super(message, options);
|
|
6
|
+
this.name = "EmbeddingProviderUnavailableError";
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
export class OpenAICompatibleEmbeddingProvider {
|
|
10
|
+
config;
|
|
11
|
+
constructor(config = {}) {
|
|
12
|
+
this.config = {
|
|
13
|
+
id: config.id ?? config.model ?? "unconfigured",
|
|
14
|
+
baseUrl: config.baseUrl ? trimTrailingSlash(config.baseUrl) : "",
|
|
15
|
+
model: config.model ?? "",
|
|
16
|
+
apiKey: config.apiKey,
|
|
17
|
+
timeoutMs: config.timeoutMs ?? DEFAULT_EMBEDDING_TIMEOUT_MS
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
async listModels() {
|
|
21
|
+
this.assertBaseUrlConfigured();
|
|
22
|
+
const response = (await this.request("/models"));
|
|
23
|
+
return (response.data ?? []).map((model) => model.id).filter((id) => typeof id === "string");
|
|
24
|
+
}
|
|
25
|
+
async embedDocuments(texts) {
|
|
26
|
+
this.assertConfigured();
|
|
27
|
+
if (texts.length === 0) {
|
|
28
|
+
return { model: this.config.model, dimensions: 0, embeddings: [] };
|
|
29
|
+
}
|
|
30
|
+
const response = (await this.request("/embeddings", {
|
|
31
|
+
method: "POST",
|
|
32
|
+
body: JSON.stringify({
|
|
33
|
+
model: this.config.model,
|
|
34
|
+
input: texts
|
|
35
|
+
})
|
|
36
|
+
}));
|
|
37
|
+
const embeddings = (response.data ?? [])
|
|
38
|
+
.slice()
|
|
39
|
+
.sort((left, right) => (left.index ?? 0) - (right.index ?? 0))
|
|
40
|
+
.map((item) => item.embedding)
|
|
41
|
+
.filter((embedding) => Array.isArray(embedding));
|
|
42
|
+
if (embeddings.length !== texts.length) {
|
|
43
|
+
throw new EmbeddingProviderUnavailableError(`Embedding provider returned ${embeddings.length} embeddings for ${texts.length} inputs.`);
|
|
44
|
+
}
|
|
45
|
+
const dimensions = embeddings[0]?.length ?? 0;
|
|
46
|
+
if (dimensions === 0 || embeddings.some((embedding) => embedding.length !== dimensions)) {
|
|
47
|
+
throw new EmbeddingProviderUnavailableError("Embedding provider returned empty or inconsistent dimensions.");
|
|
48
|
+
}
|
|
49
|
+
return {
|
|
50
|
+
model: response.model ?? this.config.model,
|
|
51
|
+
dimensions,
|
|
52
|
+
embeddings
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
async embedQuery(text) {
|
|
56
|
+
return (await this.embedDocuments([text])).embeddings[0] ?? [];
|
|
57
|
+
}
|
|
58
|
+
async probeDimensions() {
|
|
59
|
+
return (await this.embedDocuments(["where was i dimension probe"])).dimensions;
|
|
60
|
+
}
|
|
61
|
+
assertConfigured() {
|
|
62
|
+
this.assertBaseUrlConfigured();
|
|
63
|
+
if (!this.config.model) {
|
|
64
|
+
throw new EmbeddingProviderUnavailableError(EMBEDDING_PROVIDER_UNCONFIGURED_MESSAGE);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
assertBaseUrlConfigured() {
|
|
68
|
+
if (!this.config.baseUrl) {
|
|
69
|
+
throw new EmbeddingProviderUnavailableError(EMBEDDING_PROVIDER_UNCONFIGURED_MESSAGE);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
async request(path, init = {}) {
|
|
73
|
+
const headers = new Headers(init.headers);
|
|
74
|
+
headers.set("content-type", "application/json");
|
|
75
|
+
if (this.config.apiKey) {
|
|
76
|
+
headers.set("authorization", `Bearer ${this.config.apiKey}`);
|
|
77
|
+
}
|
|
78
|
+
const controller = new AbortController();
|
|
79
|
+
const timeout = setTimeout(() => controller.abort(), this.config.timeoutMs);
|
|
80
|
+
let response;
|
|
81
|
+
try {
|
|
82
|
+
response = await fetch(`${this.config.baseUrl}${path}`, {
|
|
83
|
+
...init,
|
|
84
|
+
headers,
|
|
85
|
+
signal: controller.signal
|
|
86
|
+
});
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
const isTimeout = error instanceof Error && error.name === "AbortError";
|
|
90
|
+
throw new EmbeddingProviderUnavailableError(isTimeout
|
|
91
|
+
? `Embedding provider request timed out after ${this.config.timeoutMs}ms at ${this.config.baseUrl}.`
|
|
92
|
+
: `Embedding provider is unavailable at ${this.config.baseUrl}: ${error instanceof Error ? error.message : String(error)}`, { cause: error });
|
|
93
|
+
}
|
|
94
|
+
finally {
|
|
95
|
+
clearTimeout(timeout);
|
|
96
|
+
}
|
|
97
|
+
const text = await response.text();
|
|
98
|
+
const body = text.length > 0 ? parseJson(text) : null;
|
|
99
|
+
if (!response.ok) {
|
|
100
|
+
throw new EmbeddingProviderUnavailableError(`Embedding provider request failed: ${response.status} ${response.statusText} ${typeof body === "string" ? body : JSON.stringify(body)}`);
|
|
101
|
+
}
|
|
102
|
+
return body;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
function trimTrailingSlash(input) {
|
|
106
|
+
return input.replace(/\/+$/, "");
|
|
107
|
+
}
|
|
108
|
+
export function isEmbeddingProviderConfigured(config) {
|
|
109
|
+
return Boolean(config.baseUrl && config.model);
|
|
110
|
+
}
|
|
111
|
+
function parseJson(text) {
|
|
112
|
+
try {
|
|
113
|
+
return JSON.parse(text);
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
return text;
|
|
117
|
+
}
|
|
118
|
+
}
|