@gmickel/gno 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +256 -0
- package/assets/skill/SKILL.md +112 -0
- package/assets/skill/cli-reference.md +327 -0
- package/assets/skill/examples.md +234 -0
- package/assets/skill/mcp-reference.md +159 -0
- package/package.json +90 -0
- package/src/app/constants.ts +313 -0
- package/src/cli/colors.ts +65 -0
- package/src/cli/commands/ask.ts +545 -0
- package/src/cli/commands/cleanup.ts +105 -0
- package/src/cli/commands/collection/add.ts +120 -0
- package/src/cli/commands/collection/index.ts +10 -0
- package/src/cli/commands/collection/list.ts +108 -0
- package/src/cli/commands/collection/remove.ts +64 -0
- package/src/cli/commands/collection/rename.ts +95 -0
- package/src/cli/commands/context/add.ts +67 -0
- package/src/cli/commands/context/check.ts +153 -0
- package/src/cli/commands/context/index.ts +10 -0
- package/src/cli/commands/context/list.ts +109 -0
- package/src/cli/commands/context/rm.ts +52 -0
- package/src/cli/commands/doctor.ts +393 -0
- package/src/cli/commands/embed.ts +462 -0
- package/src/cli/commands/get.ts +356 -0
- package/src/cli/commands/index-cmd.ts +119 -0
- package/src/cli/commands/index.ts +102 -0
- package/src/cli/commands/init.ts +328 -0
- package/src/cli/commands/ls.ts +217 -0
- package/src/cli/commands/mcp/config.ts +300 -0
- package/src/cli/commands/mcp/index.ts +24 -0
- package/src/cli/commands/mcp/install.ts +203 -0
- package/src/cli/commands/mcp/paths.ts +470 -0
- package/src/cli/commands/mcp/status.ts +222 -0
- package/src/cli/commands/mcp/uninstall.ts +158 -0
- package/src/cli/commands/mcp.ts +20 -0
- package/src/cli/commands/models/clear.ts +103 -0
- package/src/cli/commands/models/index.ts +32 -0
- package/src/cli/commands/models/list.ts +214 -0
- package/src/cli/commands/models/path.ts +51 -0
- package/src/cli/commands/models/pull.ts +199 -0
- package/src/cli/commands/models/use.ts +85 -0
- package/src/cli/commands/multi-get.ts +400 -0
- package/src/cli/commands/query.ts +220 -0
- package/src/cli/commands/ref-parser.ts +108 -0
- package/src/cli/commands/reset.ts +191 -0
- package/src/cli/commands/search.ts +136 -0
- package/src/cli/commands/shared.ts +156 -0
- package/src/cli/commands/skill/index.ts +19 -0
- package/src/cli/commands/skill/install.ts +197 -0
- package/src/cli/commands/skill/paths-cmd.ts +81 -0
- package/src/cli/commands/skill/paths.ts +191 -0
- package/src/cli/commands/skill/show.ts +73 -0
- package/src/cli/commands/skill/uninstall.ts +141 -0
- package/src/cli/commands/status.ts +205 -0
- package/src/cli/commands/update.ts +68 -0
- package/src/cli/commands/vsearch.ts +188 -0
- package/src/cli/context.ts +64 -0
- package/src/cli/errors.ts +64 -0
- package/src/cli/format/search-results.ts +211 -0
- package/src/cli/options.ts +183 -0
- package/src/cli/program.ts +1330 -0
- package/src/cli/run.ts +213 -0
- package/src/cli/ui.ts +92 -0
- package/src/config/defaults.ts +20 -0
- package/src/config/index.ts +55 -0
- package/src/config/loader.ts +161 -0
- package/src/config/paths.ts +87 -0
- package/src/config/saver.ts +153 -0
- package/src/config/types.ts +280 -0
- package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
- package/src/converters/adapters/officeparser/adapter.ts +126 -0
- package/src/converters/canonicalize.ts +89 -0
- package/src/converters/errors.ts +218 -0
- package/src/converters/index.ts +51 -0
- package/src/converters/mime.ts +163 -0
- package/src/converters/native/markdown.ts +115 -0
- package/src/converters/native/plaintext.ts +56 -0
- package/src/converters/path.ts +48 -0
- package/src/converters/pipeline.ts +159 -0
- package/src/converters/registry.ts +74 -0
- package/src/converters/types.ts +123 -0
- package/src/converters/versions.ts +24 -0
- package/src/index.ts +27 -0
- package/src/ingestion/chunker.ts +238 -0
- package/src/ingestion/index.ts +32 -0
- package/src/ingestion/language.ts +276 -0
- package/src/ingestion/sync.ts +671 -0
- package/src/ingestion/types.ts +219 -0
- package/src/ingestion/walker.ts +235 -0
- package/src/llm/cache.ts +467 -0
- package/src/llm/errors.ts +191 -0
- package/src/llm/index.ts +58 -0
- package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
- package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
- package/src/llm/nodeLlamaCpp/generation.ts +88 -0
- package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
- package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
- package/src/llm/registry.ts +86 -0
- package/src/llm/types.ts +129 -0
- package/src/mcp/resources/index.ts +151 -0
- package/src/mcp/server.ts +229 -0
- package/src/mcp/tools/get.ts +220 -0
- package/src/mcp/tools/index.ts +160 -0
- package/src/mcp/tools/multi-get.ts +263 -0
- package/src/mcp/tools/query.ts +226 -0
- package/src/mcp/tools/search.ts +119 -0
- package/src/mcp/tools/status.ts +81 -0
- package/src/mcp/tools/vsearch.ts +198 -0
- package/src/pipeline/chunk-lookup.ts +44 -0
- package/src/pipeline/expansion.ts +256 -0
- package/src/pipeline/explain.ts +115 -0
- package/src/pipeline/fusion.ts +185 -0
- package/src/pipeline/hybrid.ts +535 -0
- package/src/pipeline/index.ts +64 -0
- package/src/pipeline/query-language.ts +118 -0
- package/src/pipeline/rerank.ts +223 -0
- package/src/pipeline/search.ts +261 -0
- package/src/pipeline/types.ts +328 -0
- package/src/pipeline/vsearch.ts +348 -0
- package/src/store/index.ts +41 -0
- package/src/store/migrations/001-initial.ts +196 -0
- package/src/store/migrations/index.ts +20 -0
- package/src/store/migrations/runner.ts +187 -0
- package/src/store/sqlite/adapter.ts +1242 -0
- package/src/store/sqlite/index.ts +7 -0
- package/src/store/sqlite/setup.ts +129 -0
- package/src/store/sqlite/types.ts +28 -0
- package/src/store/types.ts +506 -0
- package/src/store/vector/index.ts +13 -0
- package/src/store/vector/sqlite-vec.ts +373 -0
- package/src/store/vector/stats.ts +152 -0
- package/src/store/vector/types.ts +115 -0
|
@@ -0,0 +1,373 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* sqlite-vec adapter for vector search acceleration.
|
|
3
|
+
* Per-model vec tables to avoid dimension/collision issues.
|
|
4
|
+
*
|
|
5
|
+
* @module src/store/vector/sqliteVec
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Database } from 'bun:sqlite';
|
|
9
|
+
import { createHash } from 'node:crypto';
|
|
10
|
+
import type { StoreResult } from '../types';
|
|
11
|
+
import { err, ok } from '../types';
|
|
12
|
+
import type { VectorIndexPort, VectorRow, VectorSearchResult } from './types';
|
|
13
|
+
|
|
14
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
15
|
+
// BLOB Encoding Helpers (avoid Buffer.buffer footgun)
|
|
16
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Encode Float32Array to Uint8Array for SQLite BLOB storage.
|
|
20
|
+
* Creates a copy to avoid shared ArrayBuffer issues.
|
|
21
|
+
*/
|
|
22
|
+
export function encodeEmbedding(f32: Float32Array): Uint8Array {
|
|
23
|
+
return new Uint8Array(
|
|
24
|
+
f32.buffer.slice(f32.byteOffset, f32.byteOffset + f32.byteLength)
|
|
25
|
+
);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Decode Uint8Array from SQLite BLOB to Float32Array.
|
|
30
|
+
* Creates a copy to avoid shared ArrayBuffer issues.
|
|
31
|
+
* @throws Error if blob length is not aligned to 4 bytes
|
|
32
|
+
*/
|
|
33
|
+
export function decodeEmbedding(blob: Uint8Array): Float32Array {
|
|
34
|
+
if (blob.byteLength % 4 !== 0) {
|
|
35
|
+
throw new Error(
|
|
36
|
+
`Invalid embedding blob: length ${blob.byteLength} is not aligned to 4 bytes`
|
|
37
|
+
);
|
|
38
|
+
}
|
|
39
|
+
const copy = new Uint8Array(blob);
|
|
40
|
+
return new Float32Array(copy.buffer, copy.byteOffset, copy.byteLength / 4);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
44
|
+
// Helpers
|
|
45
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
46
|
+
|
|
47
|
+
/**
|
|
48
|
+
* Generate deterministic table name from model URI.
|
|
49
|
+
* First 8 chars of SHA256 hash.
|
|
50
|
+
*/
|
|
51
|
+
function modelTableName(modelUri: string): string {
|
|
52
|
+
const hash = createHash('sha256').update(modelUri).digest('hex').slice(0, 8);
|
|
53
|
+
return `vec_${hash}`;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
57
|
+
// Factory
|
|
58
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
export interface VectorIndexOptions {
|
|
61
|
+
model: string;
|
|
62
|
+
dimensions: number;
|
|
63
|
+
distanceMetric?: 'cosine' | 'l2';
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
/**
|
|
67
|
+
* Create a VectorIndexPort for a specific model.
|
|
68
|
+
* sqlite-vec is optional - storage works without it, search disabled.
|
|
69
|
+
*/
|
|
70
|
+
export async function createVectorIndexPort(
|
|
71
|
+
db: Database,
|
|
72
|
+
options: VectorIndexOptions
|
|
73
|
+
): Promise<StoreResult<VectorIndexPort>> {
|
|
74
|
+
const { model, dimensions, distanceMetric = 'cosine' } = options;
|
|
75
|
+
const tableName = modelTableName(model);
|
|
76
|
+
|
|
77
|
+
// Try loading sqlite-vec extension (ESM dynamic import)
|
|
78
|
+
let searchAvailable = false;
|
|
79
|
+
let loadError: string | undefined;
|
|
80
|
+
try {
|
|
81
|
+
const sqliteVec = await import('sqlite-vec');
|
|
82
|
+
sqliteVec.load(db);
|
|
83
|
+
searchAvailable = true;
|
|
84
|
+
} catch (e) {
|
|
85
|
+
// sqlite-vec not available - storage still works, search disabled
|
|
86
|
+
loadError = e instanceof Error ? e.message : String(e);
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
// Create per-model vec0 table if extension available
|
|
90
|
+
// Graceful degradation: if table creation fails, storage still works
|
|
91
|
+
if (searchAvailable) {
|
|
92
|
+
try {
|
|
93
|
+
db.exec(`
|
|
94
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS ${tableName} USING vec0(
|
|
95
|
+
chunk_id TEXT PRIMARY KEY,
|
|
96
|
+
embedding FLOAT[${dimensions}] distance_metric=${distanceMetric}
|
|
97
|
+
);
|
|
98
|
+
`);
|
|
99
|
+
} catch (e) {
|
|
100
|
+
// Vec table creation failed - degrade to storage-only mode
|
|
101
|
+
searchAvailable = false;
|
|
102
|
+
loadError = e instanceof Error ? e.message : String(e);
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Prepared statements for content_vectors table
|
|
107
|
+
const upsertVectorStmt = db.prepare(`
|
|
108
|
+
INSERT OR REPLACE INTO content_vectors (mirror_hash, seq, model, embedding, embedded_at)
|
|
109
|
+
VALUES (?, ?, ?, ?, datetime('now'))
|
|
110
|
+
`);
|
|
111
|
+
|
|
112
|
+
const deleteVectorStmt = db.prepare(`
|
|
113
|
+
DELETE FROM content_vectors WHERE mirror_hash = ? AND model = ?
|
|
114
|
+
`);
|
|
115
|
+
|
|
116
|
+
// Prepared statements for vec0 table (if available)
|
|
117
|
+
const upsertVecStmt = searchAvailable
|
|
118
|
+
? db.prepare(
|
|
119
|
+
`INSERT OR REPLACE INTO ${tableName} (chunk_id, embedding) VALUES (?, ?)`
|
|
120
|
+
)
|
|
121
|
+
: null;
|
|
122
|
+
|
|
123
|
+
const searchStmt = searchAvailable
|
|
124
|
+
? db.prepare(`
|
|
125
|
+
SELECT chunk_id, distance
|
|
126
|
+
FROM ${tableName}
|
|
127
|
+
WHERE embedding MATCH ?
|
|
128
|
+
AND k = ?
|
|
129
|
+
`)
|
|
130
|
+
: null;
|
|
131
|
+
|
|
132
|
+
const deleteVecStmt = searchAvailable
|
|
133
|
+
? db.prepare(`DELETE FROM ${tableName} WHERE chunk_id LIKE ? || ':%'`)
|
|
134
|
+
: null;
|
|
135
|
+
|
|
136
|
+
return ok({
|
|
137
|
+
searchAvailable,
|
|
138
|
+
model,
|
|
139
|
+
dimensions,
|
|
140
|
+
loadError,
|
|
141
|
+
|
|
142
|
+
upsertVectors(rows: VectorRow[]): Promise<StoreResult<void>> {
|
|
143
|
+
// 1. Always store in content_vectors first (critical path)
|
|
144
|
+
try {
|
|
145
|
+
db.transaction(() => {
|
|
146
|
+
for (const row of rows) {
|
|
147
|
+
upsertVectorStmt.run(
|
|
148
|
+
row.mirrorHash,
|
|
149
|
+
row.seq,
|
|
150
|
+
row.model,
|
|
151
|
+
encodeEmbedding(row.embedding)
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
})();
|
|
155
|
+
} catch (e) {
|
|
156
|
+
return Promise.resolve(
|
|
157
|
+
err(
|
|
158
|
+
'VECTOR_WRITE_FAILED',
|
|
159
|
+
`Vector write failed: ${e instanceof Error ? e.message : String(e)}`
|
|
160
|
+
)
|
|
161
|
+
);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// 2. Best-effort update vec0 (graceful degradation)
|
|
165
|
+
if (upsertVecStmt) {
|
|
166
|
+
try {
|
|
167
|
+
db.transaction(() => {
|
|
168
|
+
for (const row of rows) {
|
|
169
|
+
const chunkId = `${row.mirrorHash}:${row.seq}`;
|
|
170
|
+
upsertVecStmt.run(chunkId, encodeEmbedding(row.embedding));
|
|
171
|
+
}
|
|
172
|
+
})();
|
|
173
|
+
} catch {
|
|
174
|
+
// Vec0 write failed - storage succeeded, search may be degraded
|
|
175
|
+
// This is expected when dimensions mismatch or vec extension issues
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return Promise.resolve(ok(undefined));
|
|
180
|
+
},
|
|
181
|
+
|
|
182
|
+
deleteVectorsForMirror(mirrorHash: string): Promise<StoreResult<void>> {
|
|
183
|
+
// 1. Always delete from content_vectors first
|
|
184
|
+
try {
|
|
185
|
+
deleteVectorStmt.run(mirrorHash, model);
|
|
186
|
+
} catch (e) {
|
|
187
|
+
return Promise.resolve(
|
|
188
|
+
err(
|
|
189
|
+
'VECTOR_DELETE_FAILED',
|
|
190
|
+
`Vector delete failed: ${e instanceof Error ? e.message : String(e)}`
|
|
191
|
+
)
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// 2. Best-effort delete from vec0
|
|
196
|
+
if (deleteVecStmt) {
|
|
197
|
+
try {
|
|
198
|
+
deleteVecStmt.run(mirrorHash);
|
|
199
|
+
} catch {
|
|
200
|
+
// Vec0 delete failed - not critical
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
return Promise.resolve(ok(undefined));
|
|
205
|
+
},
|
|
206
|
+
|
|
207
|
+
searchNearest(
|
|
208
|
+
embedding: Float32Array,
|
|
209
|
+
k: number,
|
|
210
|
+
searchOptions?: { minScore?: number }
|
|
211
|
+
): Promise<StoreResult<VectorSearchResult[]>> {
|
|
212
|
+
if (!(searchAvailable && searchStmt)) {
|
|
213
|
+
return Promise.resolve(
|
|
214
|
+
err(
|
|
215
|
+
'VEC_SEARCH_UNAVAILABLE',
|
|
216
|
+
'Vector search requires sqlite-vec. Embeddings stored but KNN search disabled.'
|
|
217
|
+
)
|
|
218
|
+
);
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
try {
|
|
222
|
+
const results = searchStmt.all(encodeEmbedding(embedding), k) as {
|
|
223
|
+
chunk_id: string;
|
|
224
|
+
distance: number;
|
|
225
|
+
}[];
|
|
226
|
+
|
|
227
|
+
// Filter by minScore if provided
|
|
228
|
+
// For cosine distance: similarity = 1 - distance, keep if >= minScore
|
|
229
|
+
const minScore = searchOptions?.minScore;
|
|
230
|
+
const filtered =
|
|
231
|
+
minScore !== undefined
|
|
232
|
+
? results.filter((r) => 1 - r.distance >= minScore)
|
|
233
|
+
: results;
|
|
234
|
+
|
|
235
|
+
return Promise.resolve(
|
|
236
|
+
ok(
|
|
237
|
+
filtered.map((r) => {
|
|
238
|
+
const parts = r.chunk_id.split(':');
|
|
239
|
+
const mirrorHash = parts[0] ?? '';
|
|
240
|
+
const seqStr = parts[1] ?? '0';
|
|
241
|
+
return {
|
|
242
|
+
mirrorHash,
|
|
243
|
+
seq: Number.parseInt(seqStr, 10),
|
|
244
|
+
distance: r.distance,
|
|
245
|
+
};
|
|
246
|
+
})
|
|
247
|
+
)
|
|
248
|
+
);
|
|
249
|
+
} catch (e) {
|
|
250
|
+
return Promise.resolve(
|
|
251
|
+
err(
|
|
252
|
+
'VEC_SEARCH_FAILED',
|
|
253
|
+
`Vector search failed: ${e instanceof Error ? e.message : String(e)}`
|
|
254
|
+
)
|
|
255
|
+
);
|
|
256
|
+
}
|
|
257
|
+
},
|
|
258
|
+
|
|
259
|
+
rebuildVecIndex(): Promise<StoreResult<void>> {
|
|
260
|
+
if (!searchAvailable) {
|
|
261
|
+
return Promise.resolve(ok(undefined)); // No-op if no vec support
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
try {
|
|
265
|
+
// Drop and recreate vec table from content_vectors
|
|
266
|
+
db.exec(`DROP TABLE IF EXISTS ${tableName}`);
|
|
267
|
+
db.exec(`
|
|
268
|
+
CREATE VIRTUAL TABLE ${tableName} USING vec0(
|
|
269
|
+
chunk_id TEXT PRIMARY KEY,
|
|
270
|
+
embedding FLOAT[${dimensions}] distance_metric=${distanceMetric}
|
|
271
|
+
);
|
|
272
|
+
`);
|
|
273
|
+
|
|
274
|
+
// Repopulate from content_vectors
|
|
275
|
+
const rows = db
|
|
276
|
+
.prepare(
|
|
277
|
+
'SELECT mirror_hash, seq, embedding FROM content_vectors WHERE model = ?'
|
|
278
|
+
)
|
|
279
|
+
.all(model) as {
|
|
280
|
+
mirror_hash: string;
|
|
281
|
+
seq: number;
|
|
282
|
+
embedding: Uint8Array;
|
|
283
|
+
}[];
|
|
284
|
+
|
|
285
|
+
const insertStmt = db.prepare(`
|
|
286
|
+
INSERT INTO ${tableName} (chunk_id, embedding) VALUES (?, ?)
|
|
287
|
+
`);
|
|
288
|
+
|
|
289
|
+
db.transaction(() => {
|
|
290
|
+
for (const row of rows) {
|
|
291
|
+
const chunkId = `${row.mirror_hash}:${row.seq}`;
|
|
292
|
+
insertStmt.run(chunkId, row.embedding);
|
|
293
|
+
}
|
|
294
|
+
})();
|
|
295
|
+
|
|
296
|
+
return Promise.resolve(ok(undefined));
|
|
297
|
+
} catch (e) {
|
|
298
|
+
return Promise.resolve(
|
|
299
|
+
err(
|
|
300
|
+
'VEC_REBUILD_FAILED',
|
|
301
|
+
`Vec rebuild failed: ${e instanceof Error ? e.message : String(e)}`
|
|
302
|
+
)
|
|
303
|
+
);
|
|
304
|
+
}
|
|
305
|
+
},
|
|
306
|
+
|
|
307
|
+
syncVecIndex(): Promise<StoreResult<{ added: number; removed: number }>> {
|
|
308
|
+
if (!searchAvailable) {
|
|
309
|
+
return Promise.resolve(ok({ added: 0, removed: 0 }));
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
try {
|
|
313
|
+
let added = 0;
|
|
314
|
+
let removed = 0;
|
|
315
|
+
|
|
316
|
+
// 1. Remove orphans from vec table (not in content_vectors for this model)
|
|
317
|
+
const orphanResult = db
|
|
318
|
+
.prepare(
|
|
319
|
+
`
|
|
320
|
+
DELETE FROM ${tableName}
|
|
321
|
+
WHERE chunk_id NOT IN (
|
|
322
|
+
SELECT mirror_hash || ':' || seq
|
|
323
|
+
FROM content_vectors
|
|
324
|
+
WHERE model = ?
|
|
325
|
+
)
|
|
326
|
+
`
|
|
327
|
+
)
|
|
328
|
+
.run(model);
|
|
329
|
+
removed = orphanResult.changes;
|
|
330
|
+
|
|
331
|
+
// 2. Add missing entries (in content_vectors but not in vec table)
|
|
332
|
+
const missing = db
|
|
333
|
+
.prepare(
|
|
334
|
+
`
|
|
335
|
+
SELECT cv.mirror_hash, cv.seq, cv.embedding
|
|
336
|
+
FROM content_vectors cv
|
|
337
|
+
WHERE cv.model = ?
|
|
338
|
+
AND (cv.mirror_hash || ':' || cv.seq) NOT IN (
|
|
339
|
+
SELECT chunk_id FROM ${tableName}
|
|
340
|
+
)
|
|
341
|
+
`
|
|
342
|
+
)
|
|
343
|
+
.all(model) as {
|
|
344
|
+
mirror_hash: string;
|
|
345
|
+
seq: number;
|
|
346
|
+
embedding: Uint8Array;
|
|
347
|
+
}[];
|
|
348
|
+
|
|
349
|
+
if (missing.length > 0) {
|
|
350
|
+
const insertStmt = db.prepare(`
|
|
351
|
+
INSERT INTO ${tableName} (chunk_id, embedding) VALUES (?, ?)
|
|
352
|
+
`);
|
|
353
|
+
db.transaction(() => {
|
|
354
|
+
for (const row of missing) {
|
|
355
|
+
const chunkId = `${row.mirror_hash}:${row.seq}`;
|
|
356
|
+
insertStmt.run(chunkId, row.embedding);
|
|
357
|
+
}
|
|
358
|
+
})();
|
|
359
|
+
added = missing.length;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
return Promise.resolve(ok({ added, removed }));
|
|
363
|
+
} catch (e) {
|
|
364
|
+
return Promise.resolve(
|
|
365
|
+
err(
|
|
366
|
+
'VEC_SYNC_FAILED',
|
|
367
|
+
`Vec sync failed: ${e instanceof Error ? e.message : String(e)}`
|
|
368
|
+
)
|
|
369
|
+
);
|
|
370
|
+
}
|
|
371
|
+
},
|
|
372
|
+
});
|
|
373
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* VectorStatsPort implementation for backlog/stats queries.
|
|
3
|
+
* Works without sqlite-vec.
|
|
4
|
+
*
|
|
5
|
+
* @module src/store/vector/stats
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { Database } from 'bun:sqlite';
|
|
9
|
+
import type { StoreResult } from '../types';
|
|
10
|
+
import { err, ok } from '../types';
|
|
11
|
+
import type { BacklogItem, VectorStatsPort } from './types';
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Create a VectorStatsPort for backlog detection and vector stats.
|
|
15
|
+
* Uses EXISTS-based queries to avoid duplicates from multiple docs sharing mirror_hash.
|
|
16
|
+
*/
|
|
17
|
+
export function createVectorStatsPort(db: Database): VectorStatsPort {
|
|
18
|
+
return {
|
|
19
|
+
countVectors(model: string): Promise<StoreResult<number>> {
|
|
20
|
+
try {
|
|
21
|
+
const result = db
|
|
22
|
+
.prepare(
|
|
23
|
+
'SELECT COUNT(*) as count FROM content_vectors WHERE model = ?'
|
|
24
|
+
)
|
|
25
|
+
.get(model) as { count: number };
|
|
26
|
+
return Promise.resolve(ok(result.count));
|
|
27
|
+
} catch (e) {
|
|
28
|
+
return Promise.resolve(
|
|
29
|
+
err(
|
|
30
|
+
'QUERY_FAILED',
|
|
31
|
+
`Failed to count vectors: ${e instanceof Error ? e.message : String(e)}`
|
|
32
|
+
)
|
|
33
|
+
);
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
|
|
37
|
+
countBacklog(model: string): Promise<StoreResult<number>> {
|
|
38
|
+
try {
|
|
39
|
+
// Count chunks needing embedding (fast for progress display)
|
|
40
|
+
// Uses EXISTS to avoid duplicates when multiple docs share mirror_hash
|
|
41
|
+
const result = db
|
|
42
|
+
.prepare(
|
|
43
|
+
`
|
|
44
|
+
SELECT COUNT(*) as count
|
|
45
|
+
FROM content_chunks c
|
|
46
|
+
WHERE EXISTS (
|
|
47
|
+
SELECT 1 FROM documents d
|
|
48
|
+
WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
49
|
+
)
|
|
50
|
+
AND NOT EXISTS (
|
|
51
|
+
SELECT 1 FROM content_vectors v
|
|
52
|
+
WHERE v.mirror_hash = c.mirror_hash
|
|
53
|
+
AND v.seq = c.seq
|
|
54
|
+
AND v.model = ?
|
|
55
|
+
AND v.embedded_at >= c.created_at
|
|
56
|
+
)
|
|
57
|
+
`
|
|
58
|
+
)
|
|
59
|
+
.get(model) as { count: number };
|
|
60
|
+
return Promise.resolve(ok(result.count));
|
|
61
|
+
} catch (e) {
|
|
62
|
+
return Promise.resolve(
|
|
63
|
+
err(
|
|
64
|
+
'QUERY_FAILED',
|
|
65
|
+
`Failed to count backlog: ${e instanceof Error ? e.message : String(e)}`
|
|
66
|
+
)
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
|
|
71
|
+
getBacklog(
|
|
72
|
+
model: string,
|
|
73
|
+
options?: { limit?: number; after?: { mirrorHash: string; seq: number } }
|
|
74
|
+
): Promise<StoreResult<BacklogItem[]>> {
|
|
75
|
+
try {
|
|
76
|
+
const limit = options?.limit ?? 1000;
|
|
77
|
+
const after = options?.after;
|
|
78
|
+
|
|
79
|
+
// Seek pagination: use cursor to avoid skipping items as backlog shrinks
|
|
80
|
+
// Query structure changes based on whether we have a cursor
|
|
81
|
+
const sql = after
|
|
82
|
+
? `
|
|
83
|
+
SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
|
|
84
|
+
CASE
|
|
85
|
+
WHEN NOT EXISTS (
|
|
86
|
+
SELECT 1 FROM content_vectors v
|
|
87
|
+
WHERE v.mirror_hash = c.mirror_hash
|
|
88
|
+
AND v.seq = c.seq
|
|
89
|
+
AND v.model = ?
|
|
90
|
+
) THEN 'new'
|
|
91
|
+
ELSE 'changed'
|
|
92
|
+
END as reason
|
|
93
|
+
FROM content_chunks c
|
|
94
|
+
WHERE EXISTS (
|
|
95
|
+
SELECT 1 FROM documents d
|
|
96
|
+
WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
97
|
+
)
|
|
98
|
+
AND NOT EXISTS (
|
|
99
|
+
SELECT 1 FROM content_vectors v
|
|
100
|
+
WHERE v.mirror_hash = c.mirror_hash
|
|
101
|
+
AND v.seq = c.seq
|
|
102
|
+
AND v.model = ?
|
|
103
|
+
AND v.embedded_at >= c.created_at
|
|
104
|
+
)
|
|
105
|
+
AND (c.mirror_hash > ? OR (c.mirror_hash = ? AND c.seq > ?))
|
|
106
|
+
ORDER BY c.mirror_hash, c.seq
|
|
107
|
+
LIMIT ?
|
|
108
|
+
`
|
|
109
|
+
: `
|
|
110
|
+
SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
|
|
111
|
+
CASE
|
|
112
|
+
WHEN NOT EXISTS (
|
|
113
|
+
SELECT 1 FROM content_vectors v
|
|
114
|
+
WHERE v.mirror_hash = c.mirror_hash
|
|
115
|
+
AND v.seq = c.seq
|
|
116
|
+
AND v.model = ?
|
|
117
|
+
) THEN 'new'
|
|
118
|
+
ELSE 'changed'
|
|
119
|
+
END as reason
|
|
120
|
+
FROM content_chunks c
|
|
121
|
+
WHERE EXISTS (
|
|
122
|
+
SELECT 1 FROM documents d
|
|
123
|
+
WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
|
|
124
|
+
)
|
|
125
|
+
AND NOT EXISTS (
|
|
126
|
+
SELECT 1 FROM content_vectors v
|
|
127
|
+
WHERE v.mirror_hash = c.mirror_hash
|
|
128
|
+
AND v.seq = c.seq
|
|
129
|
+
AND v.model = ?
|
|
130
|
+
AND v.embedded_at >= c.created_at
|
|
131
|
+
)
|
|
132
|
+
ORDER BY c.mirror_hash, c.seq
|
|
133
|
+
LIMIT ?
|
|
134
|
+
`;
|
|
135
|
+
|
|
136
|
+
const params = after
|
|
137
|
+
? [model, model, after.mirrorHash, after.mirrorHash, after.seq, limit]
|
|
138
|
+
: [model, model, limit];
|
|
139
|
+
|
|
140
|
+
const results = db.prepare(sql).all(...params) as BacklogItem[];
|
|
141
|
+
return Promise.resolve(ok(results));
|
|
142
|
+
} catch (e) {
|
|
143
|
+
return Promise.resolve(
|
|
144
|
+
err(
|
|
145
|
+
'QUERY_FAILED',
|
|
146
|
+
`Failed to get backlog: ${e instanceof Error ? e.message : String(e)}`
|
|
147
|
+
)
|
|
148
|
+
);
|
|
149
|
+
}
|
|
150
|
+
},
|
|
151
|
+
};
|
|
152
|
+
}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Vector index types and interfaces.
|
|
3
|
+
* Defines VectorIndexPort and VectorStatsPort for embedding storage/search.
|
|
4
|
+
*
|
|
5
|
+
* @module src/store/vector/types
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import type { StoreResult } from '../types';
|
|
9
|
+
|
|
10
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
11
|
+
// Row Types
|
|
12
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
13
|
+
|
|
14
|
+
/** Vector row for storage */
|
|
15
|
+
export interface VectorRow {
|
|
16
|
+
mirrorHash: string;
|
|
17
|
+
seq: number;
|
|
18
|
+
model: string;
|
|
19
|
+
embedding: Float32Array;
|
|
20
|
+
embeddedAt: string;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
/** Vector search result */
|
|
24
|
+
export interface VectorSearchResult {
|
|
25
|
+
mirrorHash: string;
|
|
26
|
+
seq: number;
|
|
27
|
+
distance: number;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/** Cursor for seek-based backlog pagination */
|
|
31
|
+
export interface BacklogCursor {
|
|
32
|
+
mirrorHash: string;
|
|
33
|
+
seq: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/** Backlog item needing embedding */
|
|
37
|
+
export interface BacklogItem {
|
|
38
|
+
mirrorHash: string;
|
|
39
|
+
seq: number;
|
|
40
|
+
text: string;
|
|
41
|
+
reason: 'new' | 'changed' | 'force';
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
45
|
+
// VectorIndexPort
|
|
46
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* VectorIndexPort handles vector search acceleration via sqlite-vec.
|
|
50
|
+
* Storage is ALWAYS via content_vectors table (works without sqlite-vec).
|
|
51
|
+
* This port adds KNN search capability when sqlite-vec is available.
|
|
52
|
+
*/
|
|
53
|
+
export interface VectorIndexPort {
|
|
54
|
+
/** True if sqlite-vec loaded successfully */
|
|
55
|
+
readonly searchAvailable: boolean;
|
|
56
|
+
/** Model URI this index is configured for */
|
|
57
|
+
readonly model: string;
|
|
58
|
+
/** Vector dimensions */
|
|
59
|
+
readonly dimensions: number;
|
|
60
|
+
/** Error message if sqlite-vec failed to load (for diagnostics) */
|
|
61
|
+
readonly loadError?: string;
|
|
62
|
+
|
|
63
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
64
|
+
// Storage (always works, uses content_vectors table)
|
|
65
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
/** Upsert vectors into storage and vec index */
|
|
68
|
+
upsertVectors(rows: VectorRow[]): Promise<StoreResult<void>>;
|
|
69
|
+
|
|
70
|
+
/** Delete all vectors for a mirror hash (for this model) */
|
|
71
|
+
deleteVectorsForMirror(mirrorHash: string): Promise<StoreResult<void>>;
|
|
72
|
+
|
|
73
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
74
|
+
// Search (requires sqlite-vec)
|
|
75
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
76
|
+
|
|
77
|
+
/** Find k nearest neighbors */
|
|
78
|
+
searchNearest(
|
|
79
|
+
embedding: Float32Array,
|
|
80
|
+
k: number,
|
|
81
|
+
options?: { minScore?: number }
|
|
82
|
+
): Promise<StoreResult<VectorSearchResult[]>>;
|
|
83
|
+
|
|
84
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
85
|
+
// Index maintenance
|
|
86
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
87
|
+
|
|
88
|
+
/** Drop and rebuild vec index from content_vectors */
|
|
89
|
+
rebuildVecIndex(): Promise<StoreResult<void>>;
|
|
90
|
+
|
|
91
|
+
/** Sync vec index with content_vectors (add missing, remove orphans) */
|
|
92
|
+
syncVecIndex(): Promise<StoreResult<{ added: number; removed: number }>>;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
96
|
+
// VectorStatsPort
|
|
97
|
+
// ─────────────────────────────────────────────────────────────────────────────
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* VectorStatsPort for backlog/stats queries (model-aware).
|
|
101
|
+
* Works without sqlite-vec.
|
|
102
|
+
*/
|
|
103
|
+
export interface VectorStatsPort {
|
|
104
|
+
/** Count vectors for a model */
|
|
105
|
+
countVectors(model: string): Promise<StoreResult<number>>;
|
|
106
|
+
|
|
107
|
+
/** Count chunks needing embedding for a model */
|
|
108
|
+
countBacklog(model: string): Promise<StoreResult<number>>;
|
|
109
|
+
|
|
110
|
+
/** Get chunks needing embedding for a model (seek pagination) */
|
|
111
|
+
getBacklog(
|
|
112
|
+
model: string,
|
|
113
|
+
options?: { limit?: number; after?: BacklogCursor }
|
|
114
|
+
): Promise<StoreResult<BacklogItem[]>>;
|
|
115
|
+
}
|