@kyleparrott/where-was-i 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +89 -0
- package/README.md +167 -0
- package/dist/src/cli.js +423 -0
- package/dist/src/core/codex.js +303 -0
- package/dist/src/core/config.js +168 -0
- package/dist/src/core/database.js +432 -0
- package/dist/src/core/doctor.js +113 -0
- package/dist/src/core/embeddings.js +118 -0
- package/dist/src/core/indexer.js +60 -0
- package/dist/src/core/paths.js +20 -0
- package/dist/src/core/reset.js +18 -0
- package/dist/src/core/search-mode.js +17 -0
- package/dist/src/core/search.js +562 -0
- package/dist/src/core/semantic.js +220 -0
- package/dist/src/core/types.js +1 -0
- package/dist/src/core/vector.js +311 -0
- package/dist/src/mcp.js +345 -0
- package/dist/src/web-client.js +61 -0
- package/dist/src/web-settings.js +157 -0
- package/dist/src/web-style.js +797 -0
- package/dist/src/web-utils.js +81 -0
- package/dist/src/web-views.js +389 -0
- package/dist/src/web.js +512 -0
- package/docs/assets/web-ui.png +0 -0
- package/package.json +64 -0
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import { SessionSearchDb } from "./database.js";
|
|
3
|
+
import { EMBEDDING_PROVIDER_UNCONFIGURED_MESSAGE, EmbeddingProviderUnavailableError, OpenAICompatibleEmbeddingProvider, isEmbeddingProviderConfigured } from "./embeddings.js";
|
|
4
|
+
import { clearEmbeddings, countChunksMissingEmbeddings, embeddingCoverage, ensureVectorStore, listChunksMissingEmbeddings, upsertChunkEmbedding } from "./vector.js";
|
|
5
|
+
export async function indexSemanticChunks(options) {
|
|
6
|
+
const provider = new OpenAICompatibleEmbeddingProvider(options.embedding);
|
|
7
|
+
const batchSize = Math.min(Math.max(options.batchSize ?? 32, 1), 128);
|
|
8
|
+
const maxChunks = Math.max(options.maxChunks ?? Number.MAX_SAFE_INTEGER, 1);
|
|
9
|
+
const db = new SessionSearchDb(options.dbPath);
|
|
10
|
+
let indexed = 0;
|
|
11
|
+
const notify = options.onProgress;
|
|
12
|
+
try {
|
|
13
|
+
notify?.({
|
|
14
|
+
phase: "probing",
|
|
15
|
+
providerId: provider.config.id,
|
|
16
|
+
model: provider.config.model,
|
|
17
|
+
dimensions: null,
|
|
18
|
+
indexed,
|
|
19
|
+
total: null
|
|
20
|
+
});
|
|
21
|
+
let dimensions;
|
|
22
|
+
try {
|
|
23
|
+
dimensions = await provider.probeDimensions();
|
|
24
|
+
}
|
|
25
|
+
catch (error) {
|
|
26
|
+
if (error instanceof EmbeddingProviderUnavailableError) {
|
|
27
|
+
const summary = unavailableSummary(provider, error);
|
|
28
|
+
notify?.({
|
|
29
|
+
phase: "unavailable",
|
|
30
|
+
providerId: provider.config.id,
|
|
31
|
+
model: provider.config.model,
|
|
32
|
+
dimensions: null,
|
|
33
|
+
indexed,
|
|
34
|
+
total: null,
|
|
35
|
+
failed: summary.failed
|
|
36
|
+
});
|
|
37
|
+
return summary;
|
|
38
|
+
}
|
|
39
|
+
throw error;
|
|
40
|
+
}
|
|
41
|
+
ensureVectorStore(db.db, provider.config, dimensions);
|
|
42
|
+
const totalMissing = countChunksMissingEmbeddings(db.db, provider.config, dimensions, options.sessionIds ?? []);
|
|
43
|
+
const total = Math.min(totalMissing, maxChunks);
|
|
44
|
+
notify?.({
|
|
45
|
+
phase: "start",
|
|
46
|
+
providerId: provider.config.id,
|
|
47
|
+
model: provider.config.model,
|
|
48
|
+
dimensions,
|
|
49
|
+
indexed,
|
|
50
|
+
total
|
|
51
|
+
});
|
|
52
|
+
while (indexed < total) {
|
|
53
|
+
const remaining = total - indexed;
|
|
54
|
+
const chunks = listChunksMissingEmbeddings(db.db, provider.config, dimensions, Math.min(batchSize, remaining), options.sessionIds ?? []);
|
|
55
|
+
if (chunks.length === 0) {
|
|
56
|
+
const summary = {
|
|
57
|
+
available: true,
|
|
58
|
+
providerId: provider.config.id,
|
|
59
|
+
model: provider.config.model,
|
|
60
|
+
dimensions,
|
|
61
|
+
indexed,
|
|
62
|
+
skipped: 0,
|
|
63
|
+
failed: null
|
|
64
|
+
};
|
|
65
|
+
notify?.({
|
|
66
|
+
phase: "complete",
|
|
67
|
+
providerId: provider.config.id,
|
|
68
|
+
model: provider.config.model,
|
|
69
|
+
dimensions,
|
|
70
|
+
indexed,
|
|
71
|
+
total
|
|
72
|
+
});
|
|
73
|
+
return summary;
|
|
74
|
+
}
|
|
75
|
+
notify?.({
|
|
76
|
+
phase: "batch",
|
|
77
|
+
providerId: provider.config.id,
|
|
78
|
+
model: provider.config.model,
|
|
79
|
+
dimensions,
|
|
80
|
+
indexed,
|
|
81
|
+
total,
|
|
82
|
+
batchSize: chunks.length
|
|
83
|
+
});
|
|
84
|
+
const batch = await provider.embedDocuments(chunks.map((chunk) => chunk.text));
|
|
85
|
+
for (let index = 0; index < chunks.length; index += 1) {
|
|
86
|
+
const chunk = chunks[index];
|
|
87
|
+
const embedding = batch.embeddings[index];
|
|
88
|
+
if (chunk && embedding) {
|
|
89
|
+
upsertChunkEmbedding(db.db, provider.config, dimensions, chunk, embedding);
|
|
90
|
+
indexed += 1;
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
notify?.({
|
|
94
|
+
phase: "progress",
|
|
95
|
+
providerId: provider.config.id,
|
|
96
|
+
model: provider.config.model,
|
|
97
|
+
dimensions,
|
|
98
|
+
indexed,
|
|
99
|
+
total,
|
|
100
|
+
batchSize: chunks.length
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
const summary = {
|
|
104
|
+
available: true,
|
|
105
|
+
providerId: provider.config.id,
|
|
106
|
+
model: provider.config.model,
|
|
107
|
+
dimensions,
|
|
108
|
+
indexed,
|
|
109
|
+
skipped: 0,
|
|
110
|
+
failed: null
|
|
111
|
+
};
|
|
112
|
+
notify?.({
|
|
113
|
+
phase: "complete",
|
|
114
|
+
providerId: provider.config.id,
|
|
115
|
+
model: provider.config.model,
|
|
116
|
+
dimensions,
|
|
117
|
+
indexed,
|
|
118
|
+
total
|
|
119
|
+
});
|
|
120
|
+
return summary;
|
|
121
|
+
}
|
|
122
|
+
catch (error) {
|
|
123
|
+
if (error instanceof EmbeddingProviderUnavailableError) {
|
|
124
|
+
const summary = unavailableSummary(provider, error, indexed);
|
|
125
|
+
notify?.({
|
|
126
|
+
phase: "unavailable",
|
|
127
|
+
providerId: provider.config.id,
|
|
128
|
+
model: provider.config.model,
|
|
129
|
+
dimensions: null,
|
|
130
|
+
indexed,
|
|
131
|
+
total: null,
|
|
132
|
+
failed: summary.failed
|
|
133
|
+
});
|
|
134
|
+
return summary;
|
|
135
|
+
}
|
|
136
|
+
throw error;
|
|
137
|
+
}
|
|
138
|
+
finally {
|
|
139
|
+
db.close();
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
export function clearSemanticEmbeddings(dbPath) {
|
|
143
|
+
const db = new SessionSearchDb(dbPath);
|
|
144
|
+
try {
|
|
145
|
+
return clearEmbeddings(db.db);
|
|
146
|
+
}
|
|
147
|
+
finally {
|
|
148
|
+
db.close();
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
export async function semanticFreshness(options) {
|
|
152
|
+
const provider = new OpenAICompatibleEmbeddingProvider(options.embedding);
|
|
153
|
+
let dimensions = options.dimensions ?? null;
|
|
154
|
+
let providerAvailable = null;
|
|
155
|
+
let providerError = null;
|
|
156
|
+
const configured = isEmbeddingProviderConfigured(provider.config);
|
|
157
|
+
if (!configured) {
|
|
158
|
+
providerAvailable = false;
|
|
159
|
+
providerError = EMBEDDING_PROVIDER_UNCONFIGURED_MESSAGE;
|
|
160
|
+
}
|
|
161
|
+
else if (options.probeProvider) {
|
|
162
|
+
try {
|
|
163
|
+
dimensions = await provider.probeDimensions();
|
|
164
|
+
providerAvailable = true;
|
|
165
|
+
}
|
|
166
|
+
catch (error) {
|
|
167
|
+
providerAvailable = false;
|
|
168
|
+
providerError = error instanceof Error ? error.message : String(error);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
if (!fs.existsSync(options.dbPath)) {
|
|
172
|
+
const coverage = emptyEmbeddingCoverage(provider.config, configured ? dimensions : null);
|
|
173
|
+
return {
|
|
174
|
+
...coverage,
|
|
175
|
+
staleForConfiguredProvider: false,
|
|
176
|
+
recommendation: configured ? coverage.recommendation : "Configure semantic embeddings before running semantic search.",
|
|
177
|
+
providerAvailable,
|
|
178
|
+
providerError
|
|
179
|
+
};
|
|
180
|
+
}
|
|
181
|
+
const db = new SessionSearchDb(options.dbPath);
|
|
182
|
+
try {
|
|
183
|
+
const coverage = embeddingCoverage(db.db, provider.config, configured ? dimensions : null);
|
|
184
|
+
return {
|
|
185
|
+
...coverage,
|
|
186
|
+
staleForConfiguredProvider: configured ? coverage.staleForConfiguredProvider : false,
|
|
187
|
+
recommendation: configured ? coverage.recommendation : "Configure semantic embeddings before running semantic search.",
|
|
188
|
+
providerAvailable,
|
|
189
|
+
providerError
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
finally {
|
|
193
|
+
db.close();
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
function emptyEmbeddingCoverage(provider, dimensions) {
|
|
197
|
+
return {
|
|
198
|
+
providerId: provider.id,
|
|
199
|
+
model: provider.model,
|
|
200
|
+
dimensions,
|
|
201
|
+
totalEligibleChunks: 0,
|
|
202
|
+
indexedChunks: 0,
|
|
203
|
+
missingChunks: dimensions ? 0 : null,
|
|
204
|
+
lastIndexedAt: null,
|
|
205
|
+
incompatibleStoredVectors: 0,
|
|
206
|
+
staleForConfiguredProvider: false,
|
|
207
|
+
recommendation: null
|
|
208
|
+
};
|
|
209
|
+
}
|
|
210
|
+
function unavailableSummary(provider, error, indexed = 0) {
|
|
211
|
+
return {
|
|
212
|
+
available: false,
|
|
213
|
+
providerId: provider.config.id,
|
|
214
|
+
model: provider.config.model,
|
|
215
|
+
dimensions: null,
|
|
216
|
+
indexed,
|
|
217
|
+
skipped: 0,
|
|
218
|
+
failed: error.message
|
|
219
|
+
};
|
|
220
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
import crypto from "node:crypto";
|
|
2
|
+
export function ensureVectorStore(db, provider, dimensions) {
|
|
3
|
+
if (!Number.isInteger(dimensions) || dimensions <= 0) {
|
|
4
|
+
throw new Error(`Vector dimensions must be a positive integer. Received ${dimensions}.`);
|
|
5
|
+
}
|
|
6
|
+
ensureVectorTable(db, dimensions);
|
|
7
|
+
assertNoIncompatibleStoredVectors(db, provider, dimensions);
|
|
8
|
+
db.prepare(`INSERT INTO embedding_providers (id, base_url, model, dimensions, updated_at)
|
|
9
|
+
VALUES (@id, @baseUrl, @model, @dimensions, @updatedAt)
|
|
10
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
11
|
+
base_url = excluded.base_url,
|
|
12
|
+
model = excluded.model,
|
|
13
|
+
dimensions = excluded.dimensions,
|
|
14
|
+
updated_at = excluded.updated_at`).run({
|
|
15
|
+
id: provider.id,
|
|
16
|
+
baseUrl: provider.baseUrl,
|
|
17
|
+
model: provider.model,
|
|
18
|
+
dimensions,
|
|
19
|
+
updatedAt: new Date().toISOString()
|
|
20
|
+
});
|
|
21
|
+
}
|
|
22
|
+
export function upsertChunkEmbedding(db, provider, dimensions, chunk, embedding) {
|
|
23
|
+
if (embedding.length !== dimensions) {
|
|
24
|
+
throw new Error(`Embedding dimension mismatch for chunk ${chunk.id}: expected ${dimensions}, got ${embedding.length}.`);
|
|
25
|
+
}
|
|
26
|
+
ensureVectorStore(db, provider, dimensions);
|
|
27
|
+
deleteChunkEmbedding(db, chunk.id);
|
|
28
|
+
const indexedAt = new Date().toISOString();
|
|
29
|
+
const result = db
|
|
30
|
+
.prepare(`INSERT INTO vector_chunks_metadata
|
|
31
|
+
(chunk_id, message_id, session_id, provider_id, model, dimensions, indexed_at, source_fingerprint)
|
|
32
|
+
VALUES
|
|
33
|
+
(@chunkId, @messageId, @sessionId, @providerId, @model, @dimensions, @indexedAt, @sourceFingerprint)`)
|
|
34
|
+
.run({
|
|
35
|
+
chunkId: chunk.id,
|
|
36
|
+
messageId: chunk.messageId,
|
|
37
|
+
sessionId: chunk.sessionId,
|
|
38
|
+
providerId: provider.id,
|
|
39
|
+
model: provider.model,
|
|
40
|
+
dimensions,
|
|
41
|
+
indexedAt,
|
|
42
|
+
sourceFingerprint: sourceFingerprint(chunk)
|
|
43
|
+
});
|
|
44
|
+
const vectorRowid = BigInt(result.lastInsertRowid);
|
|
45
|
+
db.prepare("INSERT INTO vector_chunks(rowid, embedding) VALUES (?, ?)").run(vectorRowid, vectorToBuffer(embedding));
|
|
46
|
+
}
|
|
47
|
+
export function deleteChunkEmbedding(db, chunkId) {
|
|
48
|
+
if (!tableExists(db, "vector_chunks")) {
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
const row = db
|
|
52
|
+
.prepare("SELECT vector_rowid AS vectorRowid FROM vector_chunks_metadata WHERE chunk_id = ?")
|
|
53
|
+
.get(chunkId);
|
|
54
|
+
if (!row) {
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
db.prepare("DELETE FROM vector_chunks WHERE rowid = ?").run(BigInt(row.vectorRowid));
|
|
58
|
+
db.prepare("DELETE FROM vector_chunks_metadata WHERE vector_rowid = ?").run(row.vectorRowid);
|
|
59
|
+
}
|
|
60
|
+
export function searchVectorChunks(db, provider, embedding, limit) {
|
|
61
|
+
if (!tableExists(db, "vector_chunks")) {
|
|
62
|
+
return [];
|
|
63
|
+
}
|
|
64
|
+
assertVectorTableDimensions(db, embedding.length);
|
|
65
|
+
assertNoIncompatibleStoredVectors(db, provider, embedding.length);
|
|
66
|
+
const rows = db
|
|
67
|
+
.prepare(`SELECT
|
|
68
|
+
vector_chunks.rowid AS vectorRowid,
|
|
69
|
+
vector_chunks.distance AS distance,
|
|
70
|
+
m.chunk_id AS chunkId,
|
|
71
|
+
m.message_id AS messageId,
|
|
72
|
+
m.session_id AS sessionId,
|
|
73
|
+
m.provider_id AS providerId,
|
|
74
|
+
m.model AS model,
|
|
75
|
+
m.dimensions AS dimensions,
|
|
76
|
+
m.indexed_at AS indexedAt,
|
|
77
|
+
m.source_fingerprint AS sourceFingerprint
|
|
78
|
+
FROM vector_chunks
|
|
79
|
+
JOIN vector_chunks_metadata m ON m.vector_rowid = vector_chunks.rowid
|
|
80
|
+
WHERE embedding MATCH ? AND k = ?
|
|
81
|
+
AND m.provider_id = ?
|
|
82
|
+
AND m.model = ?
|
|
83
|
+
AND m.dimensions = ?
|
|
84
|
+
ORDER BY distance
|
|
85
|
+
LIMIT ?`)
|
|
86
|
+
.all(vectorToBuffer(embedding), Math.max(limit, 1), provider.id, provider.model, embedding.length, Math.max(limit, 1));
|
|
87
|
+
return rows;
|
|
88
|
+
}
|
|
89
|
+
export function listChunksMissingEmbeddings(db, provider, dimensions, limit = 500, sessionIds = []) {
|
|
90
|
+
const sessionFilter = sessionIds.length > 0 ? `AND c.session_id IN (${sessionIds.map(() => "?").join(", ")})` : "";
|
|
91
|
+
const rows = db
|
|
92
|
+
.prepare(`SELECT
|
|
93
|
+
c.id,
|
|
94
|
+
c.message_id AS messageId,
|
|
95
|
+
c.session_id AS sessionId,
|
|
96
|
+
c.conversation_id AS conversationId,
|
|
97
|
+
c.turn_id AS turnId,
|
|
98
|
+
c.source,
|
|
99
|
+
c.source_path AS sourcePath,
|
|
100
|
+
c.ordinal,
|
|
101
|
+
c.chunk_index AS chunkIndex,
|
|
102
|
+
c.role,
|
|
103
|
+
c.kind,
|
|
104
|
+
c.timestamp,
|
|
105
|
+
c.text,
|
|
106
|
+
c.line_start AS lineStart,
|
|
107
|
+
c.line_end AS lineEnd,
|
|
108
|
+
c.metadata_json AS metadataJson
|
|
109
|
+
FROM chunks c
|
|
110
|
+
LEFT JOIN vector_chunks_metadata v
|
|
111
|
+
ON v.chunk_id = c.id
|
|
112
|
+
AND v.provider_id = ?
|
|
113
|
+
AND v.model = ?
|
|
114
|
+
AND v.dimensions = ?
|
|
115
|
+
WHERE c.role IN ('user', 'assistant')
|
|
116
|
+
AND c.message_id IS NOT NULL
|
|
117
|
+
AND c.conversation_id IS NOT NULL
|
|
118
|
+
AND c.turn_id IS NOT NULL
|
|
119
|
+
${sessionFilter}
|
|
120
|
+
AND v.chunk_id IS NULL
|
|
121
|
+
ORDER BY c.ordinal ASC
|
|
122
|
+
LIMIT ?`)
|
|
123
|
+
.all(provider.id, provider.model, dimensions, ...sessionIds, Math.max(limit, 1));
|
|
124
|
+
return rows.map((row) => {
|
|
125
|
+
const { metadataJson, ...rest } = row;
|
|
126
|
+
return {
|
|
127
|
+
...rest,
|
|
128
|
+
metadata: JSON.parse(metadataJson)
|
|
129
|
+
};
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
export function countChunksMissingEmbeddings(db, provider, dimensions, sessionIds = []) {
|
|
133
|
+
const sessionFilter = sessionIds.length > 0 ? `AND c.session_id IN (${sessionIds.map(() => "?").join(", ")})` : "";
|
|
134
|
+
const row = db
|
|
135
|
+
.prepare(`SELECT COUNT(*) AS count
|
|
136
|
+
FROM chunks c
|
|
137
|
+
LEFT JOIN vector_chunks_metadata v
|
|
138
|
+
ON v.chunk_id = c.id
|
|
139
|
+
AND v.provider_id = ?
|
|
140
|
+
AND v.model = ?
|
|
141
|
+
AND v.dimensions = ?
|
|
142
|
+
WHERE c.role IN ('user', 'assistant')
|
|
143
|
+
AND c.message_id IS NOT NULL
|
|
144
|
+
AND c.conversation_id IS NOT NULL
|
|
145
|
+
AND c.turn_id IS NOT NULL
|
|
146
|
+
${sessionFilter}
|
|
147
|
+
AND v.chunk_id IS NULL`)
|
|
148
|
+
.get(provider.id, provider.model, dimensions, ...sessionIds);
|
|
149
|
+
return row.count;
|
|
150
|
+
}
|
|
151
|
+
export function vectorToBuffer(vector) {
|
|
152
|
+
return Buffer.from(Float32Array.from(vector).buffer);
|
|
153
|
+
}
|
|
154
|
+
export function clearEmbeddings(db) {
|
|
155
|
+
const vectorsDeleted = tableExists(db, "vector_chunks_metadata")
|
|
156
|
+
? (db.prepare("SELECT COUNT(*) AS count FROM vector_chunks_metadata").get().count ?? 0)
|
|
157
|
+
: 0;
|
|
158
|
+
const providersDeleted = tableExists(db, "embedding_providers")
|
|
159
|
+
? (db.prepare("SELECT COUNT(*) AS count FROM embedding_providers").get().count ?? 0)
|
|
160
|
+
: 0;
|
|
161
|
+
const clear = db.transaction(() => {
|
|
162
|
+
if (tableExists(db, "vector_chunks")) {
|
|
163
|
+
db.exec("DROP TABLE vector_chunks");
|
|
164
|
+
}
|
|
165
|
+
if (tableExists(db, "vector_chunks_metadata")) {
|
|
166
|
+
db.prepare("DELETE FROM vector_chunks_metadata").run();
|
|
167
|
+
}
|
|
168
|
+
if (tableExists(db, "embedding_providers")) {
|
|
169
|
+
db.prepare("DELETE FROM embedding_providers").run();
|
|
170
|
+
}
|
|
171
|
+
});
|
|
172
|
+
clear();
|
|
173
|
+
return { vectorsDeleted, providersDeleted };
|
|
174
|
+
}
|
|
175
|
+
export function embeddingCoverage(db, provider, dimensions) {
|
|
176
|
+
const storedDimensions = dimensions ?? storedProviderDimensions(db, provider);
|
|
177
|
+
const totalEligibleChunks = countEligibleChunks(db);
|
|
178
|
+
const incompatibleStoredVectors = countIncompatibleStoredVectors(db, provider, storedDimensions);
|
|
179
|
+
const lastIndexedAt = latestEmbeddingIndexedAt(db, provider, storedDimensions);
|
|
180
|
+
if (!storedDimensions) {
|
|
181
|
+
return {
|
|
182
|
+
providerId: provider.id,
|
|
183
|
+
model: provider.model,
|
|
184
|
+
dimensions: null,
|
|
185
|
+
totalEligibleChunks,
|
|
186
|
+
indexedChunks: 0,
|
|
187
|
+
missingChunks: null,
|
|
188
|
+
lastIndexedAt,
|
|
189
|
+
incompatibleStoredVectors,
|
|
190
|
+
staleForConfiguredProvider: totalEligibleChunks > 0 || incompatibleStoredVectors > 0,
|
|
191
|
+
recommendation: incompatibleStoredVectors > 0
|
|
192
|
+
? "Run `wwi embeddings clear` before switching embedding models or dimensions."
|
|
193
|
+
: totalEligibleChunks > 0
|
|
194
|
+
? "Run `wwi index --semantic` to build embeddings for the configured provider."
|
|
195
|
+
: null
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
const indexedChunks = tableExists(db, "vector_chunks_metadata")
|
|
199
|
+
? db
|
|
200
|
+
.prepare(`SELECT COUNT(*) AS count
|
|
201
|
+
FROM vector_chunks_metadata
|
|
202
|
+
WHERE provider_id = ? AND model = ? AND dimensions = ?`)
|
|
203
|
+
.get(provider.id, provider.model, storedDimensions).count
|
|
204
|
+
: 0;
|
|
205
|
+
const missingChunks = Math.max(totalEligibleChunks - indexedChunks, 0);
|
|
206
|
+
return {
|
|
207
|
+
providerId: provider.id,
|
|
208
|
+
model: provider.model,
|
|
209
|
+
dimensions: storedDimensions,
|
|
210
|
+
totalEligibleChunks,
|
|
211
|
+
indexedChunks,
|
|
212
|
+
missingChunks,
|
|
213
|
+
lastIndexedAt,
|
|
214
|
+
incompatibleStoredVectors,
|
|
215
|
+
staleForConfiguredProvider: missingChunks > 0 || incompatibleStoredVectors > 0,
|
|
216
|
+
recommendation: incompatibleStoredVectors > 0
|
|
217
|
+
? "Run `wwi embeddings clear` before switching embedding models or dimensions."
|
|
218
|
+
: missingChunks > 0
|
|
219
|
+
? "Run `wwi index --semantic` to refresh embeddings for the configured provider."
|
|
220
|
+
: null
|
|
221
|
+
};
|
|
222
|
+
}
|
|
223
|
+
function latestEmbeddingIndexedAt(db, provider, dimensions) {
|
|
224
|
+
if (!tableExists(db, "vector_chunks_metadata")) {
|
|
225
|
+
return null;
|
|
226
|
+
}
|
|
227
|
+
if (!dimensions) {
|
|
228
|
+
return db
|
|
229
|
+
.prepare(`SELECT MAX(indexed_at) AS lastIndexedAt
|
|
230
|
+
FROM vector_chunks_metadata
|
|
231
|
+
WHERE provider_id = ? AND model = ?`)
|
|
232
|
+
.get(provider.id, provider.model).lastIndexedAt;
|
|
233
|
+
}
|
|
234
|
+
return db
|
|
235
|
+
.prepare(`SELECT MAX(indexed_at) AS lastIndexedAt
|
|
236
|
+
FROM vector_chunks_metadata
|
|
237
|
+
WHERE provider_id = ? AND model = ? AND dimensions = ?`)
|
|
238
|
+
.get(provider.id, provider.model, dimensions).lastIndexedAt;
|
|
239
|
+
}
|
|
240
|
+
function countIncompatibleStoredVectors(db, provider, dimensions) {
|
|
241
|
+
if (!tableExists(db, "vector_chunks_metadata")) {
|
|
242
|
+
return 0;
|
|
243
|
+
}
|
|
244
|
+
if (!dimensions) {
|
|
245
|
+
return db.prepare("SELECT COUNT(*) AS count FROM vector_chunks_metadata").get().count;
|
|
246
|
+
}
|
|
247
|
+
return db
|
|
248
|
+
.prepare(`SELECT COUNT(*) AS count
|
|
249
|
+
FROM vector_chunks_metadata
|
|
250
|
+
WHERE provider_id != ? OR model != ? OR dimensions != ?`)
|
|
251
|
+
.get(provider.id, provider.model, dimensions).count;
|
|
252
|
+
}
|
|
253
|
+
function assertNoIncompatibleStoredVectors(db, provider, dimensions) {
|
|
254
|
+
const incompatibleStoredVectors = countIncompatibleStoredVectors(db, provider, dimensions);
|
|
255
|
+
if (incompatibleStoredVectors > 0) {
|
|
256
|
+
throw new Error(`Found ${incompatibleStoredVectors} stored vector(s) for a different embedding model or dimension count. Run \`wwi embeddings clear\` and rebuild semantic embeddings before switching embedding settings.`);
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
function ensureVectorTable(db, dimensions) {
|
|
260
|
+
const existing = db
|
|
261
|
+
.prepare("SELECT sql FROM sqlite_master WHERE name = 'vector_chunks'")
|
|
262
|
+
.get();
|
|
263
|
+
if (existing) {
|
|
264
|
+
const expected = `float[${dimensions}]`;
|
|
265
|
+
if (!existing.sql.includes(expected)) {
|
|
266
|
+
throw new Error(`Existing vector_chunks table does not match ${expected}. Run \`wwi embeddings clear\` and rebuild semantic embeddings. Table SQL: ${existing.sql}`);
|
|
267
|
+
}
|
|
268
|
+
return;
|
|
269
|
+
}
|
|
270
|
+
db.exec(`CREATE VIRTUAL TABLE vector_chunks USING vec0(embedding float[${dimensions}])`);
|
|
271
|
+
}
|
|
272
|
+
function assertVectorTableDimensions(db, dimensions) {
|
|
273
|
+
const existingDimensions = vectorTableDimensions(db);
|
|
274
|
+
if (existingDimensions !== null && existingDimensions !== dimensions) {
|
|
275
|
+
throw new Error(`Existing vector store dimensions (${existingDimensions}) do not match configured embedding dimensions (${dimensions}). Run \`wwi embeddings clear\` and rebuild semantic embeddings.`);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
function vectorTableDimensions(db) {
|
|
279
|
+
const existing = db
|
|
280
|
+
.prepare("SELECT sql FROM sqlite_master WHERE name = 'vector_chunks'")
|
|
281
|
+
.get();
|
|
282
|
+
const match = existing?.sql.match(/float\[(\d+)\]/);
|
|
283
|
+
return match ? Number(match[1]) : null;
|
|
284
|
+
}
|
|
285
|
+
function storedProviderDimensions(db, provider) {
|
|
286
|
+
if (!tableExists(db, "embedding_providers")) {
|
|
287
|
+
return null;
|
|
288
|
+
}
|
|
289
|
+
const row = db
|
|
290
|
+
.prepare("SELECT dimensions FROM embedding_providers WHERE id = ? AND model = ?")
|
|
291
|
+
.get(provider.id, provider.model);
|
|
292
|
+
return row?.dimensions ?? null;
|
|
293
|
+
}
|
|
294
|
+
function countEligibleChunks(db) {
|
|
295
|
+
const row = db
|
|
296
|
+
.prepare(`SELECT COUNT(*) AS count
|
|
297
|
+
FROM chunks
|
|
298
|
+
WHERE role IN ('user', 'assistant')
|
|
299
|
+
AND message_id IS NOT NULL
|
|
300
|
+
AND conversation_id IS NOT NULL
|
|
301
|
+
AND turn_id IS NOT NULL`)
|
|
302
|
+
.get();
|
|
303
|
+
return row.count;
|
|
304
|
+
}
|
|
305
|
+
function sourceFingerprint(chunk) {
|
|
306
|
+
return `${chunk.id}:${crypto.createHash("sha1").update(chunk.text).digest("hex")}`;
|
|
307
|
+
}
|
|
308
|
+
function tableExists(db, table) {
|
|
309
|
+
const row = db.prepare("SELECT 1 FROM sqlite_master WHERE name = ?").get(table);
|
|
310
|
+
return Boolean(row);
|
|
311
|
+
}
|