@qearlyao/familiar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/.env.example +31 -0
  2. package/HEARTBEAT.md +23 -0
  3. package/LICENSE +21 -0
  4. package/MEMORY.md +1 -0
  5. package/README.md +245 -0
  6. package/SOUL.md +13 -0
  7. package/USER.md +13 -0
  8. package/config.example.toml +221 -0
  9. package/dist/agent-events.js +167 -0
  10. package/dist/agent.js +590 -0
  11. package/dist/browser-tools.js +638 -0
  12. package/dist/chat-log.js +130 -0
  13. package/dist/cli.js +168 -0
  14. package/dist/config.js +804 -0
  15. package/dist/data-retention.js +54 -0
  16. package/dist/discord.js +1203 -0
  17. package/dist/generated-media.js +86 -0
  18. package/dist/image-derivatives.js +102 -0
  19. package/dist/image-gen.js +440 -0
  20. package/dist/inbound-attachments.js +266 -0
  21. package/dist/index.js +10 -0
  22. package/dist/media-understanding.js +120 -0
  23. package/dist/memory/diary/ambient-injector.js +180 -0
  24. package/dist/memory/diary/ambient.js +124 -0
  25. package/dist/memory/diary/chunks.js +231 -0
  26. package/dist/memory/diary/index.js +3 -0
  27. package/dist/memory/diary/indexer.js +93 -0
  28. package/dist/memory/doctor.js +250 -0
  29. package/dist/memory/index/chunk-indexer.js +151 -0
  30. package/dist/memory/index/embedding-provider.js +119 -0
  31. package/dist/memory/index/fts-query.js +18 -0
  32. package/dist/memory/index/retrieval.js +246 -0
  33. package/dist/memory/index/schema.js +157 -0
  34. package/dist/memory/index/store.js +513 -0
  35. package/dist/memory/index/vec.js +72 -0
  36. package/dist/memory/index/vector-codec.js +27 -0
  37. package/dist/memory/lcm/backfill.js +247 -0
  38. package/dist/memory/lcm/condense.js +146 -0
  39. package/dist/memory/lcm/context-transformer.js +662 -0
  40. package/dist/memory/lcm/context.js +421 -0
  41. package/dist/memory/lcm/eviction-score.js +38 -0
  42. package/dist/memory/lcm/index.js +6 -0
  43. package/dist/memory/lcm/indexer.js +200 -0
  44. package/dist/memory/lcm/normalize.js +235 -0
  45. package/dist/memory/lcm/schema.js +188 -0
  46. package/dist/memory/lcm/segment-manager.js +136 -0
  47. package/dist/memory/lcm/store.js +722 -0
  48. package/dist/memory/lcm/summarizer.js +258 -0
  49. package/dist/memory/lcm/types.js +1 -0
  50. package/dist/memory/operator.js +477 -0
  51. package/dist/memory/service.js +202 -0
  52. package/dist/memory/tools.js +205 -0
  53. package/dist/models.js +165 -0
  54. package/dist/persona.js +54 -0
  55. package/dist/runtime.js +493 -0
  56. package/dist/scheduler.js +200 -0
  57. package/dist/settings.js +116 -0
  58. package/dist/skills.js +38 -0
  59. package/dist/tts.js +143 -0
  60. package/dist/web-auth.js +105 -0
  61. package/dist/web-events.js +114 -0
  62. package/dist/web-http.js +29 -0
  63. package/dist/web-static.js +106 -0
  64. package/dist/web-tools.js +940 -0
  65. package/dist/web-types.js +2 -0
  66. package/dist/web.js +844 -0
  67. package/package.json +60 -0
  68. package/web/dist/assets/index-ClgkMgaq.css +2 -0
  69. package/web/dist/assets/index-Cu2QquuR.js +59 -0
  70. package/web/dist/favicon.svg +1 -0
  71. package/web/dist/icons.svg +24 -0
  72. package/web/dist/index.html +20 -0
@@ -0,0 +1,513 @@
1
+ import { createHash } from "node:crypto";
2
+ import { mkdirSync } from "node:fs";
3
+ import { dirname, resolve } from "node:path";
4
+ import Database from "better-sqlite3";
5
+ import { normalizeFtsMatchQuery } from "./fts-query.js";
6
+ import { readMeta, runMemoryIndexMigrations } from "./schema.js";
7
+ import { cosineDistance, decodeVector, encodeVector } from "./vector-codec.js";
8
+ export class MemoryIndexStore {
9
+ db;
10
+ ownsDb;
11
+ embeddingProvider;
12
+ embeddingModel;
13
+ embeddingDimensions;
14
+ constructor(options) {
15
+ if (!options.db && !options.path)
16
+ throw new Error("MemoryIndexStore requires a db or path");
17
+ if (options.db) {
18
+ this.db = options.db;
19
+ this.ownsDb = false;
20
+ }
21
+ else {
22
+ const path = options.path;
23
+ mkdirSync(dirname(path), { recursive: true });
24
+ this.db = new Database(path);
25
+ this.ownsDb = true;
26
+ }
27
+ this.embeddingProvider = options.embeddingProvider;
28
+ this.embeddingModel = options.embeddingModel;
29
+ this.embeddingDimensions = options.embeddingDimensions;
30
+ runMemoryIndexMigrations(this.db, {
31
+ embeddingProvider: this.embeddingProvider,
32
+ embeddingModel: this.embeddingModel,
33
+ embeddingDimensions: this.embeddingDimensions,
34
+ });
35
+ }
36
+ static open(config) {
37
+ return new MemoryIndexStore({
38
+ path: resolve(config.memory.indexDir, "memory.sqlite"),
39
+ embeddingProvider: config.memory.embedding.provider,
40
+ embeddingModel: config.memory.embedding.model,
41
+ embeddingDimensions: config.memory.embedding.dimensions,
42
+ });
43
+ }
44
+ close() {
45
+ if (this.ownsDb)
46
+ this.db.close();
47
+ }
48
+ embeddingConfig() {
49
+ return {
50
+ provider: this.embeddingProvider,
51
+ model: this.embeddingModel,
52
+ dimensions: this.embeddingDimensions,
53
+ };
54
+ }
55
+ insertChunk(input) {
56
+ return this.insertChunks([input])[0];
57
+ }
58
+ insertChunks(inputs) {
59
+ if (inputs.length === 0)
60
+ return [];
61
+ const rows = inputs.map((input) => this.normalizeInput(input));
62
+ const out = [];
63
+ const insert = this.db.transaction((items) => {
64
+ for (const item of items)
65
+ out.push(this.insertNormalized(item));
66
+ });
67
+ insert.immediate(rows);
68
+ return out;
69
+ }
70
+ recordSourceMappings(inputs) {
71
+ if (inputs.length === 0)
72
+ return;
73
+ const rows = inputs.map((input) => this.normalizeInput(input));
74
+ this.db
75
+ .transaction((items) => {
76
+ for (const item of items) {
77
+ const existing = this.db
78
+ .prepare("SELECT id FROM memory_chunks WHERE content_hash = ?")
79
+ .get(item.contentHash);
80
+ if (existing)
81
+ this.insertSourceMapping(existing.id, item);
82
+ }
83
+ })
84
+ .immediate(rows);
85
+ }
86
+ replaceSource(corpus, sourceId, inputs) {
87
+ const rows = inputs.map((input) => this.normalizeInput({ ...input, corpus, sourceId }));
88
+ const out = [];
89
+ const replace = this.db.transaction(() => {
90
+ this.deleteBySourceInternal(corpus, sourceId);
91
+ for (const item of rows)
92
+ out.push(this.insertNormalized(item));
93
+ });
94
+ replace.immediate();
95
+ return out;
96
+ }
97
+ whichHashesPresent(hashes) {
98
+ const present = new Map();
99
+ if (hashes.length === 0)
100
+ return present;
101
+ const chunkSize = 256;
102
+ for (let index = 0; index < hashes.length; index += chunkSize) {
103
+ const chunk = hashes.slice(index, index + chunkSize);
104
+ const placeholders = chunk.map(() => "?").join(",");
105
+ const rows = this.db
106
+ .prepare(`SELECT content_hash, id FROM memory_chunks WHERE content_hash IN (${placeholders})`)
107
+ .all(...chunk);
108
+ for (const row of rows)
109
+ present.set(row.content_hash, row.id);
110
+ }
111
+ return present;
112
+ }
113
+ getChunk(id) {
114
+ const row = this.db
115
+ .prepare(`SELECT c.*, ${sourcesJsonSelect("c.id")} FROM memory_chunks c WHERE c.id = ?`)
116
+ .get(id);
117
+ return row ? rowToChunk(row) : null;
118
+ }
119
+ searchLexical(query, options = {}) {
120
+ const normalized = normalizeSearchOptions(options);
121
+ const matchQuery = normalizeFtsMatchQuery(query);
122
+ if (!matchQuery)
123
+ return [];
124
+ const params = [matchQuery];
125
+ const corpusFilter = normalized.corpus ? "AND c.corpus = ?" : "";
126
+ if (normalized.corpus)
127
+ params.push(normalized.corpus);
128
+ params.push(normalized.limit);
129
+ const rows = this.db
130
+ .prepare(`SELECT c.*, f.rank AS score, ${sourcesJsonSelect("c.id")}
131
+ FROM memory_fts f
132
+ JOIN memory_chunks c ON c.id = f.rowid
133
+ WHERE memory_fts MATCH ?
134
+ ${corpusFilter}
135
+ ORDER BY f.rank
136
+ LIMIT ?`)
137
+ .all(...params);
138
+ return rows.map((row) => ({ id: row.id, score: row.score, chunk: rowToChunk(row) }));
139
+ }
140
+ searchSemantic(query, options = {}) {
141
+ const normalized = normalizeSearchOptions(options);
142
+ if (query.length !== this.embeddingDimensions) {
143
+ throw new Error(`Query vector dimension mismatch: expected ${this.embeddingDimensions}, got ${query.length}`);
144
+ }
145
+ // memory_vec does not carry corpus metadata, so sqlite-vec cannot prefilter
146
+ // corpus-scoped KNN. Use the linear path to keep scoped nearest neighbors exact.
147
+ if (this.vectorCapability() === "sqlite-vec" && !normalized.corpus)
148
+ return this.searchSemanticVec(query, normalized);
149
+ return this.searchSemanticLinear(query, normalized);
150
+ }
151
+ searchSemanticVec(query, normalized) {
152
+ const params = [encodeVector(query), normalized.limit];
153
+ if (normalized.corpus)
154
+ params.push(normalized.corpus);
155
+ const corpusFilter = normalized.corpus ? "WHERE c.corpus = ?" : "";
156
+ const rows = this.db
157
+ .prepare(`SELECT c.*, v.distance AS score, ${sourcesJsonSelect("c.id")}
158
+ FROM (
159
+ SELECT rowid AS chunk_id, distance
160
+ FROM memory_vec
161
+ WHERE embedding MATCH ? AND k = ?
162
+ ) v
163
+ JOIN memory_chunks c ON c.id = v.chunk_id
164
+ ${corpusFilter}
165
+ ORDER BY v.distance
166
+ LIMIT ?`)
167
+ .all(...params, normalized.limit);
168
+ return rows.map((row) => ({ id: row.id, score: row.score, chunk: rowToChunk(row) }));
169
+ }
170
+ searchSemanticLinear(query, normalized) {
171
+ const rows = this.db
172
+ .prepare(normalized.corpus
173
+ ? `SELECT c.*, ${sourcesJsonSelect("c.id")} FROM memory_chunks c WHERE c.corpus = ?`
174
+ : `SELECT c.*, ${sourcesJsonSelect("c.id")} FROM memory_chunks c`)
175
+ .all(...(normalized.corpus ? [normalized.corpus] : []));
176
+ return rows
177
+ .map((row) => ({
178
+ id: row.id,
179
+ score: cosineDistance(query, decodeVector(row.embedding, row.embedding_dimensions)),
180
+ chunk: rowToChunk(row),
181
+ }))
182
+ .sort((a, b) => a.score - b.score)
183
+ .slice(0, normalized.limit);
184
+ }
185
+ deleteChunk(id) {
186
+ const remove = this.db.transaction(() => {
187
+ this.deleteFtsRow(id);
188
+ this.db.prepare("DELETE FROM memory_chunks WHERE id = ?").run(id);
189
+ });
190
+ remove.immediate();
191
+ }
192
+ deleteBySource(corpus, sourceId) {
193
+ this.db.transaction(() => this.deleteBySourceInternal(corpus, sourceId)).immediate();
194
+ }
195
+ /** Caller already owns the index DB write transaction. */
196
+ deleteBySourceUnsafe(corpus, sourceId) {
197
+ this.deleteBySourceInternal(corpus, sourceId);
198
+ }
199
+ getSourceState(corpus, sourceId) {
200
+ const row = this.db
201
+ .prepare(`SELECT
202
+ st.corpus,
203
+ st.source_id,
204
+ st.source_ref,
205
+ st.mtime_ms,
206
+ st.size_bytes,
207
+ st.updated_at,
208
+ EXISTS(
209
+ SELECT 1 FROM memory_index_sources s
210
+ WHERE s.corpus = st.corpus AND s.source_id = st.source_id
211
+ ) AS has_mappings
212
+ FROM memory_index_source_state st
213
+ WHERE st.corpus = ? AND st.source_id = ?`)
214
+ .get(corpus, sourceId);
215
+ return row
216
+ ? {
217
+ corpus: row.corpus,
218
+ sourceId: row.source_id,
219
+ sourceRef: row.source_ref,
220
+ mtimeMs: row.mtime_ms,
221
+ sizeBytes: row.size_bytes,
222
+ updatedAt: row.updated_at,
223
+ hasMappings: row.has_mappings === 1,
224
+ }
225
+ : null;
226
+ }
227
+ upsertSourceState(input) {
228
+ this.db
229
+ .prepare(`INSERT INTO memory_index_source_state(corpus, source_id, source_ref, mtime_ms, size_bytes)
230
+ VALUES (?, ?, ?, ?, ?)
231
+ ON CONFLICT(corpus, source_id) DO UPDATE SET
232
+ source_ref = excluded.source_ref,
233
+ mtime_ms = excluded.mtime_ms,
234
+ size_bytes = excluded.size_bytes,
235
+ updated_at = unixepoch()`)
236
+ .run(input.corpus, input.sourceId, input.sourceRef ?? null, Math.floor(input.mtimeMs), input.sizeBytes);
237
+ }
238
+ deleteBySourceExceptHashes(corpus, sourceId, contentHashes) {
239
+ this.deleteBySourceExceptMappings(corpus, sourceId, [...new Set(contentHashes)].map((contentHash) => ({ contentHash, chunkIndex: null })));
240
+ }
241
+ deleteBySourceExceptMappings(corpus, sourceId, kept) {
242
+ if (kept.length === 0) {
243
+ this.deleteBySource(corpus, sourceId);
244
+ return;
245
+ }
246
+ this.db
247
+ .transaction(() => {
248
+ const rows = this.db
249
+ .prepare(`SELECT s.chunk_id AS id, c.content_hash, s.chunk_index
250
+ FROM memory_index_sources s
251
+ JOIN memory_chunks c ON c.id = s.chunk_id
252
+ WHERE s.corpus = ? AND s.source_id = ?`)
253
+ .all(corpus, sourceId);
254
+ for (const row of rows) {
255
+ if (kept.some((item) => item.contentHash === row.content_hash &&
256
+ (item.chunkIndex === null || item.chunkIndex === row.chunk_index))) {
257
+ continue;
258
+ }
259
+ this.db
260
+ .prepare("DELETE FROM memory_index_sources WHERE corpus = ? AND source_id = ? AND chunk_index = ?")
261
+ .run(corpus, sourceId, row.chunk_index);
262
+ this.deleteOrphanChunk(row.id);
263
+ }
264
+ })
265
+ .immediate();
266
+ }
267
+ clearAll() {
268
+ this.db
269
+ .transaction(() => {
270
+ this.db.prepare("INSERT INTO memory_fts(memory_fts) VALUES ('delete-all')").run();
271
+ if (this.vectorCapability() === "sqlite-vec")
272
+ this.db.prepare("DELETE FROM memory_vec").run();
273
+ this.db.prepare("DELETE FROM memory_index_sources").run();
274
+ this.db.prepare("DELETE FROM memory_index_source_state").run();
275
+ this.db.prepare("DELETE FROM memory_chunks").run();
276
+ })
277
+ .immediate();
278
+ }
279
+ reconcileSources(exists) {
280
+ const sources = this.db
281
+ .prepare("SELECT chunk_id, corpus, source_id, source_ref, chunk_index FROM memory_index_sources")
282
+ .all();
283
+ if (sources.length === 0)
284
+ return;
285
+ this.db
286
+ .transaction(() => {
287
+ for (const row of sources) {
288
+ const source = {
289
+ corpus: row.corpus,
290
+ sourceId: row.source_id,
291
+ sourceRef: row.source_ref,
292
+ chunkIndex: row.chunk_index,
293
+ };
294
+ if (exists(source))
295
+ continue;
296
+ this.db
297
+ .prepare("DELETE FROM memory_index_sources WHERE corpus = ? AND source_id = ? AND chunk_index = ?")
298
+ .run(source.corpus, source.sourceId, source.chunkIndex);
299
+ this.deleteOrphanChunk(row.chunk_id);
300
+ }
301
+ })
302
+ .immediate();
303
+ }
304
+ stats() {
305
+ const indexed = this.db.prepare("SELECT COUNT(*) AS n FROM memory_chunks").get().n;
306
+ const ftsRows = this.db.prepare("SELECT COUNT(*) AS n FROM memory_fts").get().n;
307
+ const size = this.db
308
+ .prepare("SELECT page_count * page_size AS bytes FROM pragma_page_count(), pragma_page_size()")
309
+ .get();
310
+ const dimensionsRaw = readMeta(this.db, "embedding_dimensions");
311
+ const vectorCapability = this.vectorCapability();
312
+ const vectorRows = vectorCapability === "sqlite-vec" ? this.vectorRowCount() : indexed;
313
+ return {
314
+ indexed,
315
+ ftsRows,
316
+ vectorRows,
317
+ vectorAvailable: vectorCapability === "sqlite-vec",
318
+ vectorCapability,
319
+ requiresReindex: readMeta(this.db, "requires_reindex") === "1",
320
+ embeddingProvider: readMeta(this.db, "embedding_provider"),
321
+ embeddingModel: readMeta(this.db, "embedding_model"),
322
+ embeddingDimensions: dimensionsRaw ? Number(dimensionsRaw) : null,
323
+ dbSizeBytes: size.bytes,
324
+ };
325
+ }
326
+ vectorCapability() {
327
+ return readMeta(this.db, "vector_capability") === "sqlite-vec" ? "sqlite-vec" : "blob-js";
328
+ }
329
+ vectorRowCount() {
330
+ try {
331
+ const row = this.db.prepare("SELECT COUNT(*) AS n FROM memory_vec").get();
332
+ return row.n;
333
+ }
334
+ catch {
335
+ return 0;
336
+ }
337
+ }
338
+ normalizeInput(input) {
339
+ if (input.embedding.length !== this.embeddingDimensions) {
340
+ throw new Error(`Embedding dimension mismatch: expected ${this.embeddingDimensions}, got ${input.embedding.length}`);
341
+ }
342
+ const text = input.text.trim();
343
+ if (!text)
344
+ throw new Error("Memory chunk text must not be empty");
345
+ const chunkIndex = input.chunkIndex ?? 0;
346
+ const snippet = input.snippet?.trim() || text.slice(0, 280);
347
+ return {
348
+ corpus: input.corpus,
349
+ sourceId: input.sourceId ?? null,
350
+ sourceRef: input.sourceRef ?? null,
351
+ chunkIndex,
352
+ text,
353
+ snippet,
354
+ tokenCount: input.tokenCount ?? null,
355
+ metadataJson: input.metadata ? JSON.stringify(input.metadata) : null,
356
+ embedding: input.embedding,
357
+ contentHash: createMemoryContentHash({
358
+ corpus: input.corpus,
359
+ text,
360
+ embeddingModel: this.embeddingModel,
361
+ embeddingDimensions: this.embeddingDimensions,
362
+ }),
363
+ };
364
+ }
365
+ insertNormalized(item) {
366
+ const existing = this.db.prepare("SELECT id FROM memory_chunks WHERE content_hash = ?").get(item.contentHash);
367
+ if (existing) {
368
+ this.insertSourceMapping(existing.id, item);
369
+ return existing.id;
370
+ }
371
+ const result = this.db
372
+ .prepare(`INSERT INTO memory_chunks (
373
+ content_hash, corpus, text_full, snippet, token_count, metadata_json, embedding_model,
374
+ embedding_dimensions, embedding
375
+ ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`)
376
+ .run(item.contentHash, item.corpus, item.text, item.snippet, item.tokenCount, item.metadataJson, this.embeddingModel, this.embeddingDimensions, encodeVector(item.embedding));
377
+ const id = Number(result.lastInsertRowid);
378
+ this.db
379
+ .prepare("INSERT INTO memory_fts(rowid, text_full, snippet) VALUES (?, ?, ?)")
380
+ .run(id, item.text, item.snippet);
381
+ if (this.vectorCapability() === "sqlite-vec") {
382
+ this.db
383
+ .prepare("INSERT INTO memory_vec(rowid, embedding) VALUES (CAST(? AS INTEGER), ?)")
384
+ .run(id, encodeVector(item.embedding));
385
+ }
386
+ this.insertSourceMapping(id, item);
387
+ return id;
388
+ }
389
+ deleteBySourceInternal(corpus, sourceId) {
390
+ const rows = this.db
391
+ .prepare("SELECT chunk_id AS id FROM memory_index_sources WHERE corpus = ? AND source_id = ?")
392
+ .all(corpus, sourceId);
393
+ this.db.prepare("DELETE FROM memory_index_sources WHERE corpus = ? AND source_id = ?").run(corpus, sourceId);
394
+ this.db.prepare("DELETE FROM memory_index_source_state WHERE corpus = ? AND source_id = ?").run(corpus, sourceId);
395
+ for (const row of rows)
396
+ this.deleteOrphanChunk(row.id);
397
+ }
398
+ insertSourceMapping(chunkId, item) {
399
+ if (!item.sourceId)
400
+ return;
401
+ this.db
402
+ .prepare(`INSERT OR REPLACE INTO memory_index_sources(chunk_id, corpus, source_id, source_ref, chunk_index)
403
+ VALUES (?, ?, ?, ?, ?)`)
404
+ .run(chunkId, item.corpus, item.sourceId, item.sourceRef, item.chunkIndex);
405
+ }
406
+ deleteOrphanChunk(id) {
407
+ const remaining = this.db
408
+ .prepare("SELECT 1 AS ok FROM memory_index_sources WHERE chunk_id = ? LIMIT 1")
409
+ .get(id);
410
+ if (remaining)
411
+ return;
412
+ this.deleteFtsRow(id);
413
+ this.db.prepare("DELETE FROM memory_chunks WHERE id = ?").run(id);
414
+ }
415
+ deleteFtsRow(id) {
416
+ this.db.prepare("DELETE FROM memory_fts WHERE rowid = ?").run(id);
417
+ }
418
+ }
419
+ export function createMemoryContentHash(input) {
420
+ return createHash("sha256")
421
+ .update(JSON.stringify({
422
+ corpus: input.corpus,
423
+ text: input.text,
424
+ embeddingModel: input.embeddingModel,
425
+ embeddingDimensions: input.embeddingDimensions,
426
+ }))
427
+ .digest("hex");
428
+ }
429
+ function normalizeSearchOptions(options) {
430
+ if (typeof options === "number")
431
+ return { limit: options };
432
+ return {
433
+ limit: options.limit ?? 10,
434
+ corpus: options.corpus,
435
+ };
436
+ }
437
+ function sourcesJsonSelect(chunkIdExpr) {
438
+ return `(SELECT json_group_array(json_object(
439
+ 'corpus', s.corpus,
440
+ 'sourceId', s.source_id,
441
+ 'sourceRef', s.source_ref,
442
+ 'chunkIndex', s.chunk_index
443
+ )) FROM memory_index_sources s WHERE s.chunk_id = ${chunkIdExpr}) AS sources_json`;
444
+ }
445
+ function rowToChunk(row) {
446
+ const sources = sourceRefsFromRow(row);
447
+ const primary = sources[0] ?? {
448
+ corpus: row.corpus,
449
+ sourceId: row.source_id ?? null,
450
+ sourceRef: row.source_ref ?? null,
451
+ chunkIndex: row.chunk_index ?? 0,
452
+ };
453
+ return {
454
+ id: row.id,
455
+ contentHash: row.content_hash,
456
+ corpus: row.corpus,
457
+ sourceId: primary.sourceId,
458
+ sourceRef: primary.sourceRef,
459
+ chunkIndex: primary.chunkIndex,
460
+ sources,
461
+ text: row.text_full,
462
+ snippet: row.snippet,
463
+ tokenCount: row.token_count,
464
+ metadata: parseMetadata(row.metadata_json),
465
+ embeddingModel: row.embedding_model,
466
+ embeddingDimensions: row.embedding_dimensions,
467
+ createdAt: row.created_at,
468
+ updatedAt: row.updated_at,
469
+ };
470
+ }
471
+ function sourceRefsFromRow(row) {
472
+ if ("sources_json" in row && typeof row.sources_json === "string" && row.sources_json) {
473
+ try {
474
+ const parsed = JSON.parse(row.sources_json);
475
+ if (Array.isArray(parsed)) {
476
+ return parsed.filter(isSourceRef);
477
+ }
478
+ }
479
+ catch {
480
+ return [];
481
+ }
482
+ }
483
+ const sourceId = row.source_id ?? null;
484
+ return sourceId
485
+ ? [
486
+ {
487
+ corpus: row.corpus,
488
+ sourceId,
489
+ sourceRef: row.source_ref ?? null,
490
+ chunkIndex: row.chunk_index ?? 0,
491
+ },
492
+ ]
493
+ : [];
494
+ }
495
+ function isSourceRef(value) {
496
+ if (!value || typeof value !== "object")
497
+ return false;
498
+ const item = value;
499
+ return typeof item.corpus === "string" && typeof item.sourceId === "string";
500
+ }
501
+ function parseMetadata(value) {
502
+ if (!value)
503
+ return null;
504
+ try {
505
+ const parsed = JSON.parse(value);
506
+ return parsed && typeof parsed === "object" && !Array.isArray(parsed)
507
+ ? parsed
508
+ : null;
509
+ }
510
+ catch {
511
+ return null;
512
+ }
513
+ }
@@ -0,0 +1,72 @@
1
+ import { createRequire } from "node:module";
2
+ const requireOptional = createRequire(import.meta.url);
3
+ let loggedUnavailable = false;
4
+ let loadModule = () => requireOptional("sqlite-vec");
5
+ let loadedDbs = new WeakSet();
6
+ export function loadSqliteVec(db) {
7
+ if (loadedDbs.has(db))
8
+ return availableState;
9
+ try {
10
+ const mod = loadModule();
11
+ registerModuleOnDb(mod, db);
12
+ const row = db.prepare("SELECT vec_version() AS version").get();
13
+ if (!row?.version)
14
+ throw new Error("sqlite-vec loaded but vec_version() is unavailable");
15
+ loadedDbs.add(db);
16
+ return availableState;
17
+ }
18
+ catch {
19
+ logUnavailableOnce();
20
+ return { available: false };
21
+ }
22
+ }
23
+ export function isSqliteVecLoadedForDb(db) {
24
+ return loadedDbs.has(db);
25
+ }
26
+ function registerModuleOnDb(mod, db) {
27
+ if (typeof mod.load === "function") {
28
+ mod.load(db);
29
+ return;
30
+ }
31
+ if (typeof mod.default?.load === "function") {
32
+ mod.default.load(db);
33
+ return;
34
+ }
35
+ if (typeof mod.loadablePathFor === "function") {
36
+ const loadExtension = db.loadExtension;
37
+ if (typeof loadExtension !== "function")
38
+ throw new Error("better-sqlite3 does not support loadExtension");
39
+ loadExtension.call(db, mod.loadablePathFor());
40
+ return;
41
+ }
42
+ throw new Error("sqlite-vec module does not expose load(db) or loadablePathFor()");
43
+ }
44
+ function logUnavailableOnce() {
45
+ if (loggedUnavailable)
46
+ return;
47
+ loggedUnavailable = true;
48
+ console.info("sqlite-vec module unavailable; using JS linear scan over stored embeddings");
49
+ }
50
+ const availableState = {
51
+ available: true,
52
+ registerOnDb(db) {
53
+ const state = loadSqliteVec(db);
54
+ if (!state.available)
55
+ throw new Error("sqlite-vec unavailable");
56
+ },
57
+ };
58
+ export const __memoryVecTest = {
59
+ setLoader(loader) {
60
+ loadModule = loader ?? (() => requireOptional("sqlite-vec"));
61
+ loadedDbs = new WeakSet();
62
+ loggedUnavailable = false;
63
+ },
64
+ probePackage() {
65
+ try {
66
+ return requireOptional("sqlite-vec");
67
+ }
68
+ catch {
69
+ return null;
70
+ }
71
+ },
72
+ };
@@ -0,0 +1,27 @@
1
+ export function encodeVector(vector) {
2
+ return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
3
+ }
4
+ export function decodeVector(buffer, dimensions) {
5
+ if (buffer.byteLength !== dimensions * Float32Array.BYTES_PER_ELEMENT) {
6
+ throw new Error(`Vector blob dimension mismatch: expected ${dimensions}, got ${buffer.byteLength / 4}`);
7
+ }
8
+ const bytes = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
9
+ return new Float32Array(bytes);
10
+ }
11
+ export function cosineDistance(a, b) {
12
+ if (a.length !== b.length)
13
+ throw new Error(`Vector dimension mismatch: ${a.length} !== ${b.length}`);
14
+ let dot = 0;
15
+ let aNorm = 0;
16
+ let bNorm = 0;
17
+ for (let index = 0; index < a.length; index++) {
18
+ const av = a[index] ?? 0;
19
+ const bv = b[index] ?? 0;
20
+ dot += av * bv;
21
+ aNorm += av * av;
22
+ bNorm += bv * bv;
23
+ }
24
+ if (aNorm === 0 || bNorm === 0)
25
+ return 1;
26
+ return 1 - dot / (Math.sqrt(aNorm) * Math.sqrt(bNorm));
27
+ }