@gmickel/gno 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +256 -0
  2. package/assets/skill/SKILL.md +112 -0
  3. package/assets/skill/cli-reference.md +327 -0
  4. package/assets/skill/examples.md +234 -0
  5. package/assets/skill/mcp-reference.md +159 -0
  6. package/package.json +90 -0
  7. package/src/app/constants.ts +313 -0
  8. package/src/cli/colors.ts +65 -0
  9. package/src/cli/commands/ask.ts +545 -0
  10. package/src/cli/commands/cleanup.ts +105 -0
  11. package/src/cli/commands/collection/add.ts +120 -0
  12. package/src/cli/commands/collection/index.ts +10 -0
  13. package/src/cli/commands/collection/list.ts +108 -0
  14. package/src/cli/commands/collection/remove.ts +64 -0
  15. package/src/cli/commands/collection/rename.ts +95 -0
  16. package/src/cli/commands/context/add.ts +67 -0
  17. package/src/cli/commands/context/check.ts +153 -0
  18. package/src/cli/commands/context/index.ts +10 -0
  19. package/src/cli/commands/context/list.ts +109 -0
  20. package/src/cli/commands/context/rm.ts +52 -0
  21. package/src/cli/commands/doctor.ts +393 -0
  22. package/src/cli/commands/embed.ts +462 -0
  23. package/src/cli/commands/get.ts +356 -0
  24. package/src/cli/commands/index-cmd.ts +119 -0
  25. package/src/cli/commands/index.ts +102 -0
  26. package/src/cli/commands/init.ts +328 -0
  27. package/src/cli/commands/ls.ts +217 -0
  28. package/src/cli/commands/mcp/config.ts +300 -0
  29. package/src/cli/commands/mcp/index.ts +24 -0
  30. package/src/cli/commands/mcp/install.ts +203 -0
  31. package/src/cli/commands/mcp/paths.ts +470 -0
  32. package/src/cli/commands/mcp/status.ts +222 -0
  33. package/src/cli/commands/mcp/uninstall.ts +158 -0
  34. package/src/cli/commands/mcp.ts +20 -0
  35. package/src/cli/commands/models/clear.ts +103 -0
  36. package/src/cli/commands/models/index.ts +32 -0
  37. package/src/cli/commands/models/list.ts +214 -0
  38. package/src/cli/commands/models/path.ts +51 -0
  39. package/src/cli/commands/models/pull.ts +199 -0
  40. package/src/cli/commands/models/use.ts +85 -0
  41. package/src/cli/commands/multi-get.ts +400 -0
  42. package/src/cli/commands/query.ts +220 -0
  43. package/src/cli/commands/ref-parser.ts +108 -0
  44. package/src/cli/commands/reset.ts +191 -0
  45. package/src/cli/commands/search.ts +136 -0
  46. package/src/cli/commands/shared.ts +156 -0
  47. package/src/cli/commands/skill/index.ts +19 -0
  48. package/src/cli/commands/skill/install.ts +197 -0
  49. package/src/cli/commands/skill/paths-cmd.ts +81 -0
  50. package/src/cli/commands/skill/paths.ts +191 -0
  51. package/src/cli/commands/skill/show.ts +73 -0
  52. package/src/cli/commands/skill/uninstall.ts +141 -0
  53. package/src/cli/commands/status.ts +205 -0
  54. package/src/cli/commands/update.ts +68 -0
  55. package/src/cli/commands/vsearch.ts +188 -0
  56. package/src/cli/context.ts +64 -0
  57. package/src/cli/errors.ts +64 -0
  58. package/src/cli/format/search-results.ts +211 -0
  59. package/src/cli/options.ts +183 -0
  60. package/src/cli/program.ts +1330 -0
  61. package/src/cli/run.ts +213 -0
  62. package/src/cli/ui.ts +92 -0
  63. package/src/config/defaults.ts +20 -0
  64. package/src/config/index.ts +55 -0
  65. package/src/config/loader.ts +161 -0
  66. package/src/config/paths.ts +87 -0
  67. package/src/config/saver.ts +153 -0
  68. package/src/config/types.ts +280 -0
  69. package/src/converters/adapters/markitdownTs/adapter.ts +140 -0
  70. package/src/converters/adapters/officeparser/adapter.ts +126 -0
  71. package/src/converters/canonicalize.ts +89 -0
  72. package/src/converters/errors.ts +218 -0
  73. package/src/converters/index.ts +51 -0
  74. package/src/converters/mime.ts +163 -0
  75. package/src/converters/native/markdown.ts +115 -0
  76. package/src/converters/native/plaintext.ts +56 -0
  77. package/src/converters/path.ts +48 -0
  78. package/src/converters/pipeline.ts +159 -0
  79. package/src/converters/registry.ts +74 -0
  80. package/src/converters/types.ts +123 -0
  81. package/src/converters/versions.ts +24 -0
  82. package/src/index.ts +27 -0
  83. package/src/ingestion/chunker.ts +238 -0
  84. package/src/ingestion/index.ts +32 -0
  85. package/src/ingestion/language.ts +276 -0
  86. package/src/ingestion/sync.ts +671 -0
  87. package/src/ingestion/types.ts +219 -0
  88. package/src/ingestion/walker.ts +235 -0
  89. package/src/llm/cache.ts +467 -0
  90. package/src/llm/errors.ts +191 -0
  91. package/src/llm/index.ts +58 -0
  92. package/src/llm/nodeLlamaCpp/adapter.ts +133 -0
  93. package/src/llm/nodeLlamaCpp/embedding.ts +165 -0
  94. package/src/llm/nodeLlamaCpp/generation.ts +88 -0
  95. package/src/llm/nodeLlamaCpp/lifecycle.ts +317 -0
  96. package/src/llm/nodeLlamaCpp/rerank.ts +94 -0
  97. package/src/llm/registry.ts +86 -0
  98. package/src/llm/types.ts +129 -0
  99. package/src/mcp/resources/index.ts +151 -0
  100. package/src/mcp/server.ts +229 -0
  101. package/src/mcp/tools/get.ts +220 -0
  102. package/src/mcp/tools/index.ts +160 -0
  103. package/src/mcp/tools/multi-get.ts +263 -0
  104. package/src/mcp/tools/query.ts +226 -0
  105. package/src/mcp/tools/search.ts +119 -0
  106. package/src/mcp/tools/status.ts +81 -0
  107. package/src/mcp/tools/vsearch.ts +198 -0
  108. package/src/pipeline/chunk-lookup.ts +44 -0
  109. package/src/pipeline/expansion.ts +256 -0
  110. package/src/pipeline/explain.ts +115 -0
  111. package/src/pipeline/fusion.ts +185 -0
  112. package/src/pipeline/hybrid.ts +535 -0
  113. package/src/pipeline/index.ts +64 -0
  114. package/src/pipeline/query-language.ts +118 -0
  115. package/src/pipeline/rerank.ts +223 -0
  116. package/src/pipeline/search.ts +261 -0
  117. package/src/pipeline/types.ts +328 -0
  118. package/src/pipeline/vsearch.ts +348 -0
  119. package/src/store/index.ts +41 -0
  120. package/src/store/migrations/001-initial.ts +196 -0
  121. package/src/store/migrations/index.ts +20 -0
  122. package/src/store/migrations/runner.ts +187 -0
  123. package/src/store/sqlite/adapter.ts +1242 -0
  124. package/src/store/sqlite/index.ts +7 -0
  125. package/src/store/sqlite/setup.ts +129 -0
  126. package/src/store/sqlite/types.ts +28 -0
  127. package/src/store/types.ts +506 -0
  128. package/src/store/vector/index.ts +13 -0
  129. package/src/store/vector/sqlite-vec.ts +373 -0
  130. package/src/store/vector/stats.ts +152 -0
  131. package/src/store/vector/types.ts +115 -0
@@ -0,0 +1,373 @@
1
+ /**
2
+ * sqlite-vec adapter for vector search acceleration.
3
+ * Per-model vec tables to avoid dimension/collision issues.
4
+ *
5
+ * @module src/store/vector/sqliteVec
6
+ */
7
+
8
+ import type { Database } from 'bun:sqlite';
9
+ import { createHash } from 'node:crypto';
10
+ import type { StoreResult } from '../types';
11
+ import { err, ok } from '../types';
12
+ import type { VectorIndexPort, VectorRow, VectorSearchResult } from './types';
13
+
14
+ // ─────────────────────────────────────────────────────────────────────────────
15
+ // BLOB Encoding Helpers (avoid Buffer.buffer footgun)
16
+ // ─────────────────────────────────────────────────────────────────────────────
17
+
18
+ /**
19
+ * Encode Float32Array to Uint8Array for SQLite BLOB storage.
20
+ * Creates a copy to avoid shared ArrayBuffer issues.
21
+ */
22
+ export function encodeEmbedding(f32: Float32Array): Uint8Array {
23
+ return new Uint8Array(
24
+ f32.buffer.slice(f32.byteOffset, f32.byteOffset + f32.byteLength)
25
+ );
26
+ }
27
+
28
+ /**
29
+ * Decode Uint8Array from SQLite BLOB to Float32Array.
30
+ * Creates a copy to avoid shared ArrayBuffer issues.
31
+ * @throws Error if blob length is not aligned to 4 bytes
32
+ */
33
+ export function decodeEmbedding(blob: Uint8Array): Float32Array {
34
+ if (blob.byteLength % 4 !== 0) {
35
+ throw new Error(
36
+ `Invalid embedding blob: length ${blob.byteLength} is not aligned to 4 bytes`
37
+ );
38
+ }
39
+ const copy = new Uint8Array(blob);
40
+ return new Float32Array(copy.buffer, copy.byteOffset, copy.byteLength / 4);
41
+ }
42
+
43
+ // ─────────────────────────────────────────────────────────────────────────────
44
+ // Helpers
45
+ // ─────────────────────────────────────────────────────────────────────────────
46
+
47
+ /**
48
+ * Generate deterministic table name from model URI.
49
+ * First 8 chars of SHA256 hash.
50
+ */
51
+ function modelTableName(modelUri: string): string {
52
+ const hash = createHash('sha256').update(modelUri).digest('hex').slice(0, 8);
53
+ return `vec_${hash}`;
54
+ }
55
+
56
+ // ─────────────────────────────────────────────────────────────────────────────
57
+ // Factory
58
+ // ─────────────────────────────────────────────────────────────────────────────
59
+
60
+ export interface VectorIndexOptions {
61
+ model: string;
62
+ dimensions: number;
63
+ distanceMetric?: 'cosine' | 'l2';
64
+ }
65
+
66
+ /**
67
+ * Create a VectorIndexPort for a specific model.
68
+ * sqlite-vec is optional - storage works without it, search disabled.
69
+ */
70
+ export async function createVectorIndexPort(
71
+ db: Database,
72
+ options: VectorIndexOptions
73
+ ): Promise<StoreResult<VectorIndexPort>> {
74
+ const { model, dimensions, distanceMetric = 'cosine' } = options;
75
+ const tableName = modelTableName(model);
76
+
77
+ // Try loading sqlite-vec extension (ESM dynamic import)
78
+ let searchAvailable = false;
79
+ let loadError: string | undefined;
80
+ try {
81
+ const sqliteVec = await import('sqlite-vec');
82
+ sqliteVec.load(db);
83
+ searchAvailable = true;
84
+ } catch (e) {
85
+ // sqlite-vec not available - storage still works, search disabled
86
+ loadError = e instanceof Error ? e.message : String(e);
87
+ }
88
+
89
+ // Create per-model vec0 table if extension available
90
+ // Graceful degradation: if table creation fails, storage still works
91
+ if (searchAvailable) {
92
+ try {
93
+ db.exec(`
94
+ CREATE VIRTUAL TABLE IF NOT EXISTS ${tableName} USING vec0(
95
+ chunk_id TEXT PRIMARY KEY,
96
+ embedding FLOAT[${dimensions}] distance_metric=${distanceMetric}
97
+ );
98
+ `);
99
+ } catch (e) {
100
+ // Vec table creation failed - degrade to storage-only mode
101
+ searchAvailable = false;
102
+ loadError = e instanceof Error ? e.message : String(e);
103
+ }
104
+ }
105
+
106
+ // Prepared statements for content_vectors table
107
+ const upsertVectorStmt = db.prepare(`
108
+ INSERT OR REPLACE INTO content_vectors (mirror_hash, seq, model, embedding, embedded_at)
109
+ VALUES (?, ?, ?, ?, datetime('now'))
110
+ `);
111
+
112
+ const deleteVectorStmt = db.prepare(`
113
+ DELETE FROM content_vectors WHERE mirror_hash = ? AND model = ?
114
+ `);
115
+
116
+ // Prepared statements for vec0 table (if available)
117
+ const upsertVecStmt = searchAvailable
118
+ ? db.prepare(
119
+ `INSERT OR REPLACE INTO ${tableName} (chunk_id, embedding) VALUES (?, ?)`
120
+ )
121
+ : null;
122
+
123
+ const searchStmt = searchAvailable
124
+ ? db.prepare(`
125
+ SELECT chunk_id, distance
126
+ FROM ${tableName}
127
+ WHERE embedding MATCH ?
128
+ AND k = ?
129
+ `)
130
+ : null;
131
+
132
+ const deleteVecStmt = searchAvailable
133
+ ? db.prepare(`DELETE FROM ${tableName} WHERE chunk_id LIKE ? || ':%'`)
134
+ : null;
135
+
136
+ return ok({
137
+ searchAvailable,
138
+ model,
139
+ dimensions,
140
+ loadError,
141
+
142
+ upsertVectors(rows: VectorRow[]): Promise<StoreResult<void>> {
143
+ // 1. Always store in content_vectors first (critical path)
144
+ try {
145
+ db.transaction(() => {
146
+ for (const row of rows) {
147
+ upsertVectorStmt.run(
148
+ row.mirrorHash,
149
+ row.seq,
150
+ row.model,
151
+ encodeEmbedding(row.embedding)
152
+ );
153
+ }
154
+ })();
155
+ } catch (e) {
156
+ return Promise.resolve(
157
+ err(
158
+ 'VECTOR_WRITE_FAILED',
159
+ `Vector write failed: ${e instanceof Error ? e.message : String(e)}`
160
+ )
161
+ );
162
+ }
163
+
164
+ // 2. Best-effort update vec0 (graceful degradation)
165
+ if (upsertVecStmt) {
166
+ try {
167
+ db.transaction(() => {
168
+ for (const row of rows) {
169
+ const chunkId = `${row.mirrorHash}:${row.seq}`;
170
+ upsertVecStmt.run(chunkId, encodeEmbedding(row.embedding));
171
+ }
172
+ })();
173
+ } catch {
174
+ // Vec0 write failed - storage succeeded, search may be degraded
175
+ // This is expected when dimensions mismatch or vec extension issues
176
+ }
177
+ }
178
+
179
+ return Promise.resolve(ok(undefined));
180
+ },
181
+
182
+ deleteVectorsForMirror(mirrorHash: string): Promise<StoreResult<void>> {
183
+ // 1. Always delete from content_vectors first
184
+ try {
185
+ deleteVectorStmt.run(mirrorHash, model);
186
+ } catch (e) {
187
+ return Promise.resolve(
188
+ err(
189
+ 'VECTOR_DELETE_FAILED',
190
+ `Vector delete failed: ${e instanceof Error ? e.message : String(e)}`
191
+ )
192
+ );
193
+ }
194
+
195
+ // 2. Best-effort delete from vec0
196
+ if (deleteVecStmt) {
197
+ try {
198
+ deleteVecStmt.run(mirrorHash);
199
+ } catch {
200
+ // Vec0 delete failed - not critical
201
+ }
202
+ }
203
+
204
+ return Promise.resolve(ok(undefined));
205
+ },
206
+
207
+ searchNearest(
208
+ embedding: Float32Array,
209
+ k: number,
210
+ searchOptions?: { minScore?: number }
211
+ ): Promise<StoreResult<VectorSearchResult[]>> {
212
+ if (!(searchAvailable && searchStmt)) {
213
+ return Promise.resolve(
214
+ err(
215
+ 'VEC_SEARCH_UNAVAILABLE',
216
+ 'Vector search requires sqlite-vec. Embeddings stored but KNN search disabled.'
217
+ )
218
+ );
219
+ }
220
+
221
+ try {
222
+ const results = searchStmt.all(encodeEmbedding(embedding), k) as {
223
+ chunk_id: string;
224
+ distance: number;
225
+ }[];
226
+
227
+ // Filter by minScore if provided
228
+ // For cosine distance: similarity = 1 - distance, keep if >= minScore
229
+ const minScore = searchOptions?.minScore;
230
+ const filtered =
231
+ minScore !== undefined
232
+ ? results.filter((r) => 1 - r.distance >= minScore)
233
+ : results;
234
+
235
+ return Promise.resolve(
236
+ ok(
237
+ filtered.map((r) => {
238
+ const parts = r.chunk_id.split(':');
239
+ const mirrorHash = parts[0] ?? '';
240
+ const seqStr = parts[1] ?? '0';
241
+ return {
242
+ mirrorHash,
243
+ seq: Number.parseInt(seqStr, 10),
244
+ distance: r.distance,
245
+ };
246
+ })
247
+ )
248
+ );
249
+ } catch (e) {
250
+ return Promise.resolve(
251
+ err(
252
+ 'VEC_SEARCH_FAILED',
253
+ `Vector search failed: ${e instanceof Error ? e.message : String(e)}`
254
+ )
255
+ );
256
+ }
257
+ },
258
+
259
+ rebuildVecIndex(): Promise<StoreResult<void>> {
260
+ if (!searchAvailable) {
261
+ return Promise.resolve(ok(undefined)); // No-op if no vec support
262
+ }
263
+
264
+ try {
265
+ // Drop and recreate vec table from content_vectors
266
+ db.exec(`DROP TABLE IF EXISTS ${tableName}`);
267
+ db.exec(`
268
+ CREATE VIRTUAL TABLE ${tableName} USING vec0(
269
+ chunk_id TEXT PRIMARY KEY,
270
+ embedding FLOAT[${dimensions}] distance_metric=${distanceMetric}
271
+ );
272
+ `);
273
+
274
+ // Repopulate from content_vectors
275
+ const rows = db
276
+ .prepare(
277
+ 'SELECT mirror_hash, seq, embedding FROM content_vectors WHERE model = ?'
278
+ )
279
+ .all(model) as {
280
+ mirror_hash: string;
281
+ seq: number;
282
+ embedding: Uint8Array;
283
+ }[];
284
+
285
+ const insertStmt = db.prepare(`
286
+ INSERT INTO ${tableName} (chunk_id, embedding) VALUES (?, ?)
287
+ `);
288
+
289
+ db.transaction(() => {
290
+ for (const row of rows) {
291
+ const chunkId = `${row.mirror_hash}:${row.seq}`;
292
+ insertStmt.run(chunkId, row.embedding);
293
+ }
294
+ })();
295
+
296
+ return Promise.resolve(ok(undefined));
297
+ } catch (e) {
298
+ return Promise.resolve(
299
+ err(
300
+ 'VEC_REBUILD_FAILED',
301
+ `Vec rebuild failed: ${e instanceof Error ? e.message : String(e)}`
302
+ )
303
+ );
304
+ }
305
+ },
306
+
307
+ syncVecIndex(): Promise<StoreResult<{ added: number; removed: number }>> {
308
+ if (!searchAvailable) {
309
+ return Promise.resolve(ok({ added: 0, removed: 0 }));
310
+ }
311
+
312
+ try {
313
+ let added = 0;
314
+ let removed = 0;
315
+
316
+ // 1. Remove orphans from vec table (not in content_vectors for this model)
317
+ const orphanResult = db
318
+ .prepare(
319
+ `
320
+ DELETE FROM ${tableName}
321
+ WHERE chunk_id NOT IN (
322
+ SELECT mirror_hash || ':' || seq
323
+ FROM content_vectors
324
+ WHERE model = ?
325
+ )
326
+ `
327
+ )
328
+ .run(model);
329
+ removed = orphanResult.changes;
330
+
331
+ // 2. Add missing entries (in content_vectors but not in vec table)
332
+ const missing = db
333
+ .prepare(
334
+ `
335
+ SELECT cv.mirror_hash, cv.seq, cv.embedding
336
+ FROM content_vectors cv
337
+ WHERE cv.model = ?
338
+ AND (cv.mirror_hash || ':' || cv.seq) NOT IN (
339
+ SELECT chunk_id FROM ${tableName}
340
+ )
341
+ `
342
+ )
343
+ .all(model) as {
344
+ mirror_hash: string;
345
+ seq: number;
346
+ embedding: Uint8Array;
347
+ }[];
348
+
349
+ if (missing.length > 0) {
350
+ const insertStmt = db.prepare(`
351
+ INSERT INTO ${tableName} (chunk_id, embedding) VALUES (?, ?)
352
+ `);
353
+ db.transaction(() => {
354
+ for (const row of missing) {
355
+ const chunkId = `${row.mirror_hash}:${row.seq}`;
356
+ insertStmt.run(chunkId, row.embedding);
357
+ }
358
+ })();
359
+ added = missing.length;
360
+ }
361
+
362
+ return Promise.resolve(ok({ added, removed }));
363
+ } catch (e) {
364
+ return Promise.resolve(
365
+ err(
366
+ 'VEC_SYNC_FAILED',
367
+ `Vec sync failed: ${e instanceof Error ? e.message : String(e)}`
368
+ )
369
+ );
370
+ }
371
+ },
372
+ });
373
+ }
@@ -0,0 +1,152 @@
1
+ /**
2
+ * VectorStatsPort implementation for backlog/stats queries.
3
+ * Works without sqlite-vec.
4
+ *
5
+ * @module src/store/vector/stats
6
+ */
7
+
8
+ import type { Database } from 'bun:sqlite';
9
+ import type { StoreResult } from '../types';
10
+ import { err, ok } from '../types';
11
+ import type { BacklogItem, VectorStatsPort } from './types';
12
+
13
+ /**
14
+ * Create a VectorStatsPort for backlog detection and vector stats.
15
+ * Uses EXISTS-based queries to avoid duplicates from multiple docs sharing mirror_hash.
16
+ */
17
+ export function createVectorStatsPort(db: Database): VectorStatsPort {
18
+ return {
19
+ countVectors(model: string): Promise<StoreResult<number>> {
20
+ try {
21
+ const result = db
22
+ .prepare(
23
+ 'SELECT COUNT(*) as count FROM content_vectors WHERE model = ?'
24
+ )
25
+ .get(model) as { count: number };
26
+ return Promise.resolve(ok(result.count));
27
+ } catch (e) {
28
+ return Promise.resolve(
29
+ err(
30
+ 'QUERY_FAILED',
31
+ `Failed to count vectors: ${e instanceof Error ? e.message : String(e)}`
32
+ )
33
+ );
34
+ }
35
+ },
36
+
37
+ countBacklog(model: string): Promise<StoreResult<number>> {
38
+ try {
39
+ // Count chunks needing embedding (fast for progress display)
40
+ // Uses EXISTS to avoid duplicates when multiple docs share mirror_hash
41
+ const result = db
42
+ .prepare(
43
+ `
44
+ SELECT COUNT(*) as count
45
+ FROM content_chunks c
46
+ WHERE EXISTS (
47
+ SELECT 1 FROM documents d
48
+ WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
49
+ )
50
+ AND NOT EXISTS (
51
+ SELECT 1 FROM content_vectors v
52
+ WHERE v.mirror_hash = c.mirror_hash
53
+ AND v.seq = c.seq
54
+ AND v.model = ?
55
+ AND v.embedded_at >= c.created_at
56
+ )
57
+ `
58
+ )
59
+ .get(model) as { count: number };
60
+ return Promise.resolve(ok(result.count));
61
+ } catch (e) {
62
+ return Promise.resolve(
63
+ err(
64
+ 'QUERY_FAILED',
65
+ `Failed to count backlog: ${e instanceof Error ? e.message : String(e)}`
66
+ )
67
+ );
68
+ }
69
+ },
70
+
71
+ getBacklog(
72
+ model: string,
73
+ options?: { limit?: number; after?: { mirrorHash: string; seq: number } }
74
+ ): Promise<StoreResult<BacklogItem[]>> {
75
+ try {
76
+ const limit = options?.limit ?? 1000;
77
+ const after = options?.after;
78
+
79
+ // Seek pagination: use cursor to avoid skipping items as backlog shrinks
80
+ // Query structure changes based on whether we have a cursor
81
+ const sql = after
82
+ ? `
83
+ SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
84
+ CASE
85
+ WHEN NOT EXISTS (
86
+ SELECT 1 FROM content_vectors v
87
+ WHERE v.mirror_hash = c.mirror_hash
88
+ AND v.seq = c.seq
89
+ AND v.model = ?
90
+ ) THEN 'new'
91
+ ELSE 'changed'
92
+ END as reason
93
+ FROM content_chunks c
94
+ WHERE EXISTS (
95
+ SELECT 1 FROM documents d
96
+ WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
97
+ )
98
+ AND NOT EXISTS (
99
+ SELECT 1 FROM content_vectors v
100
+ WHERE v.mirror_hash = c.mirror_hash
101
+ AND v.seq = c.seq
102
+ AND v.model = ?
103
+ AND v.embedded_at >= c.created_at
104
+ )
105
+ AND (c.mirror_hash > ? OR (c.mirror_hash = ? AND c.seq > ?))
106
+ ORDER BY c.mirror_hash, c.seq
107
+ LIMIT ?
108
+ `
109
+ : `
110
+ SELECT c.mirror_hash as mirrorHash, c.seq, c.text,
111
+ CASE
112
+ WHEN NOT EXISTS (
113
+ SELECT 1 FROM content_vectors v
114
+ WHERE v.mirror_hash = c.mirror_hash
115
+ AND v.seq = c.seq
116
+ AND v.model = ?
117
+ ) THEN 'new'
118
+ ELSE 'changed'
119
+ END as reason
120
+ FROM content_chunks c
121
+ WHERE EXISTS (
122
+ SELECT 1 FROM documents d
123
+ WHERE d.mirror_hash = c.mirror_hash AND d.active = 1
124
+ )
125
+ AND NOT EXISTS (
126
+ SELECT 1 FROM content_vectors v
127
+ WHERE v.mirror_hash = c.mirror_hash
128
+ AND v.seq = c.seq
129
+ AND v.model = ?
130
+ AND v.embedded_at >= c.created_at
131
+ )
132
+ ORDER BY c.mirror_hash, c.seq
133
+ LIMIT ?
134
+ `;
135
+
136
+ const params = after
137
+ ? [model, model, after.mirrorHash, after.mirrorHash, after.seq, limit]
138
+ : [model, model, limit];
139
+
140
+ const results = db.prepare(sql).all(...params) as BacklogItem[];
141
+ return Promise.resolve(ok(results));
142
+ } catch (e) {
143
+ return Promise.resolve(
144
+ err(
145
+ 'QUERY_FAILED',
146
+ `Failed to get backlog: ${e instanceof Error ? e.message : String(e)}`
147
+ )
148
+ );
149
+ }
150
+ },
151
+ };
152
+ }
@@ -0,0 +1,115 @@
1
+ /**
2
+ * Vector index types and interfaces.
3
+ * Defines VectorIndexPort and VectorStatsPort for embedding storage/search.
4
+ *
5
+ * @module src/store/vector/types
6
+ */
7
+
8
+ import type { StoreResult } from '../types';
9
+
10
+ // ─────────────────────────────────────────────────────────────────────────────
11
+ // Row Types
12
+ // ─────────────────────────────────────────────────────────────────────────────
13
+
14
+ /** Vector row for storage */
15
+ export interface VectorRow {
16
+ mirrorHash: string;
17
+ seq: number;
18
+ model: string;
19
+ embedding: Float32Array;
20
+ embeddedAt: string;
21
+ }
22
+
23
+ /** Vector search result */
24
+ export interface VectorSearchResult {
25
+ mirrorHash: string;
26
+ seq: number;
27
+ distance: number;
28
+ }
29
+
30
+ /** Cursor for seek-based backlog pagination */
31
+ export interface BacklogCursor {
32
+ mirrorHash: string;
33
+ seq: number;
34
+ }
35
+
36
+ /** Backlog item needing embedding */
37
+ export interface BacklogItem {
38
+ mirrorHash: string;
39
+ seq: number;
40
+ text: string;
41
+ reason: 'new' | 'changed' | 'force';
42
+ }
43
+
44
+ // ─────────────────────────────────────────────────────────────────────────────
45
+ // VectorIndexPort
46
+ // ─────────────────────────────────────────────────────────────────────────────
47
+
48
+ /**
49
+ * VectorIndexPort handles vector search acceleration via sqlite-vec.
50
+ * Storage is ALWAYS via content_vectors table (works without sqlite-vec).
51
+ * This port adds KNN search capability when sqlite-vec is available.
52
+ */
53
+ export interface VectorIndexPort {
54
+ /** True if sqlite-vec loaded successfully */
55
+ readonly searchAvailable: boolean;
56
+ /** Model URI this index is configured for */
57
+ readonly model: string;
58
+ /** Vector dimensions */
59
+ readonly dimensions: number;
60
+ /** Error message if sqlite-vec failed to load (for diagnostics) */
61
+ readonly loadError?: string;
62
+
63
+ // ─────────────────────────────────────────────────────────────────────────
64
+ // Storage (always works, uses content_vectors table)
65
+ // ─────────────────────────────────────────────────────────────────────────
66
+
67
+ /** Upsert vectors into storage and vec index */
68
+ upsertVectors(rows: VectorRow[]): Promise<StoreResult<void>>;
69
+
70
+ /** Delete all vectors for a mirror hash (for this model) */
71
+ deleteVectorsForMirror(mirrorHash: string): Promise<StoreResult<void>>;
72
+
73
+ // ─────────────────────────────────────────────────────────────────────────
74
+ // Search (requires sqlite-vec)
75
+ // ─────────────────────────────────────────────────────────────────────────
76
+
77
+ /** Find k nearest neighbors */
78
+ searchNearest(
79
+ embedding: Float32Array,
80
+ k: number,
81
+ options?: { minScore?: number }
82
+ ): Promise<StoreResult<VectorSearchResult[]>>;
83
+
84
+ // ─────────────────────────────────────────────────────────────────────────
85
+ // Index maintenance
86
+ // ─────────────────────────────────────────────────────────────────────────
87
+
88
+ /** Drop and rebuild vec index from content_vectors */
89
+ rebuildVecIndex(): Promise<StoreResult<void>>;
90
+
91
+ /** Sync vec index with content_vectors (add missing, remove orphans) */
92
+ syncVecIndex(): Promise<StoreResult<{ added: number; removed: number }>>;
93
+ }
94
+
95
+ // ─────────────────────────────────────────────────────────────────────────────
96
+ // VectorStatsPort
97
+ // ─────────────────────────────────────────────────────────────────────────────
98
+
99
+ /**
100
+ * VectorStatsPort for backlog/stats queries (model-aware).
101
+ * Works without sqlite-vec.
102
+ */
103
+ export interface VectorStatsPort {
104
+ /** Count vectors for a model */
105
+ countVectors(model: string): Promise<StoreResult<number>>;
106
+
107
+ /** Count chunks needing embedding for a model */
108
+ countBacklog(model: string): Promise<StoreResult<number>>;
109
+
110
+ /** Get chunks needing embedding for a model (seek pagination) */
111
+ getBacklog(
112
+ model: string,
113
+ options?: { limit?: number; after?: BacklogCursor }
114
+ ): Promise<StoreResult<BacklogItem[]>>;
115
+ }