@qearlyao/familiar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +31 -0
- package/HEARTBEAT.md +23 -0
- package/LICENSE +21 -0
- package/MEMORY.md +1 -0
- package/README.md +245 -0
- package/SOUL.md +13 -0
- package/USER.md +13 -0
- package/config.example.toml +221 -0
- package/dist/agent-events.js +167 -0
- package/dist/agent.js +590 -0
- package/dist/browser-tools.js +638 -0
- package/dist/chat-log.js +130 -0
- package/dist/cli.js +168 -0
- package/dist/config.js +804 -0
- package/dist/data-retention.js +54 -0
- package/dist/discord.js +1203 -0
- package/dist/generated-media.js +86 -0
- package/dist/image-derivatives.js +102 -0
- package/dist/image-gen.js +440 -0
- package/dist/inbound-attachments.js +266 -0
- package/dist/index.js +10 -0
- package/dist/media-understanding.js +120 -0
- package/dist/memory/diary/ambient-injector.js +180 -0
- package/dist/memory/diary/ambient.js +124 -0
- package/dist/memory/diary/chunks.js +231 -0
- package/dist/memory/diary/index.js +3 -0
- package/dist/memory/diary/indexer.js +93 -0
- package/dist/memory/doctor.js +250 -0
- package/dist/memory/index/chunk-indexer.js +151 -0
- package/dist/memory/index/embedding-provider.js +119 -0
- package/dist/memory/index/fts-query.js +18 -0
- package/dist/memory/index/retrieval.js +246 -0
- package/dist/memory/index/schema.js +157 -0
- package/dist/memory/index/store.js +513 -0
- package/dist/memory/index/vec.js +72 -0
- package/dist/memory/index/vector-codec.js +27 -0
- package/dist/memory/lcm/backfill.js +247 -0
- package/dist/memory/lcm/condense.js +146 -0
- package/dist/memory/lcm/context-transformer.js +662 -0
- package/dist/memory/lcm/context.js +421 -0
- package/dist/memory/lcm/eviction-score.js +38 -0
- package/dist/memory/lcm/index.js +6 -0
- package/dist/memory/lcm/indexer.js +200 -0
- package/dist/memory/lcm/normalize.js +235 -0
- package/dist/memory/lcm/schema.js +188 -0
- package/dist/memory/lcm/segment-manager.js +136 -0
- package/dist/memory/lcm/store.js +722 -0
- package/dist/memory/lcm/summarizer.js +258 -0
- package/dist/memory/lcm/types.js +1 -0
- package/dist/memory/operator.js +477 -0
- package/dist/memory/service.js +202 -0
- package/dist/memory/tools.js +205 -0
- package/dist/models.js +165 -0
- package/dist/persona.js +54 -0
- package/dist/runtime.js +493 -0
- package/dist/scheduler.js +200 -0
- package/dist/settings.js +116 -0
- package/dist/skills.js +38 -0
- package/dist/tts.js +143 -0
- package/dist/web-auth.js +105 -0
- package/dist/web-events.js +114 -0
- package/dist/web-http.js +29 -0
- package/dist/web-static.js +106 -0
- package/dist/web-tools.js +940 -0
- package/dist/web-types.js +2 -0
- package/dist/web.js +844 -0
- package/package.json +60 -0
- package/web/dist/assets/index-ClgkMgaq.css +2 -0
- package/web/dist/assets/index-Cu2QquuR.js +59 -0
- package/web/dist/favicon.svg +1 -0
- package/web/dist/icons.svg +24 -0
- package/web/dist/index.html +20 -0
|
@@ -0,0 +1,513 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { mkdirSync } from "node:fs";
|
|
3
|
+
import { dirname, resolve } from "node:path";
|
|
4
|
+
import Database from "better-sqlite3";
|
|
5
|
+
import { normalizeFtsMatchQuery } from "./fts-query.js";
|
|
6
|
+
import { readMeta, runMemoryIndexMigrations } from "./schema.js";
|
|
7
|
+
import { cosineDistance, decodeVector, encodeVector } from "./vector-codec.js";
|
|
8
|
+
export class MemoryIndexStore {
|
|
9
|
+
db;
|
|
10
|
+
ownsDb;
|
|
11
|
+
embeddingProvider;
|
|
12
|
+
embeddingModel;
|
|
13
|
+
embeddingDimensions;
|
|
14
|
+
constructor(options) {
|
|
15
|
+
if (!options.db && !options.path)
|
|
16
|
+
throw new Error("MemoryIndexStore requires a db or path");
|
|
17
|
+
if (options.db) {
|
|
18
|
+
this.db = options.db;
|
|
19
|
+
this.ownsDb = false;
|
|
20
|
+
}
|
|
21
|
+
else {
|
|
22
|
+
const path = options.path;
|
|
23
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
24
|
+
this.db = new Database(path);
|
|
25
|
+
this.ownsDb = true;
|
|
26
|
+
}
|
|
27
|
+
this.embeddingProvider = options.embeddingProvider;
|
|
28
|
+
this.embeddingModel = options.embeddingModel;
|
|
29
|
+
this.embeddingDimensions = options.embeddingDimensions;
|
|
30
|
+
runMemoryIndexMigrations(this.db, {
|
|
31
|
+
embeddingProvider: this.embeddingProvider,
|
|
32
|
+
embeddingModel: this.embeddingModel,
|
|
33
|
+
embeddingDimensions: this.embeddingDimensions,
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
static open(config) {
|
|
37
|
+
return new MemoryIndexStore({
|
|
38
|
+
path: resolve(config.memory.indexDir, "memory.sqlite"),
|
|
39
|
+
embeddingProvider: config.memory.embedding.provider,
|
|
40
|
+
embeddingModel: config.memory.embedding.model,
|
|
41
|
+
embeddingDimensions: config.memory.embedding.dimensions,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
close() {
|
|
45
|
+
if (this.ownsDb)
|
|
46
|
+
this.db.close();
|
|
47
|
+
}
|
|
48
|
+
embeddingConfig() {
|
|
49
|
+
return {
|
|
50
|
+
provider: this.embeddingProvider,
|
|
51
|
+
model: this.embeddingModel,
|
|
52
|
+
dimensions: this.embeddingDimensions,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
insertChunk(input) {
|
|
56
|
+
return this.insertChunks([input])[0];
|
|
57
|
+
}
|
|
58
|
+
insertChunks(inputs) {
|
|
59
|
+
if (inputs.length === 0)
|
|
60
|
+
return [];
|
|
61
|
+
const rows = inputs.map((input) => this.normalizeInput(input));
|
|
62
|
+
const out = [];
|
|
63
|
+
const insert = this.db.transaction((items) => {
|
|
64
|
+
for (const item of items)
|
|
65
|
+
out.push(this.insertNormalized(item));
|
|
66
|
+
});
|
|
67
|
+
insert.immediate(rows);
|
|
68
|
+
return out;
|
|
69
|
+
}
|
|
70
|
+
recordSourceMappings(inputs) {
|
|
71
|
+
if (inputs.length === 0)
|
|
72
|
+
return;
|
|
73
|
+
const rows = inputs.map((input) => this.normalizeInput(input));
|
|
74
|
+
this.db
|
|
75
|
+
.transaction((items) => {
|
|
76
|
+
for (const item of items) {
|
|
77
|
+
const existing = this.db
|
|
78
|
+
.prepare("SELECT id FROM memory_chunks WHERE content_hash = ?")
|
|
79
|
+
.get(item.contentHash);
|
|
80
|
+
if (existing)
|
|
81
|
+
this.insertSourceMapping(existing.id, item);
|
|
82
|
+
}
|
|
83
|
+
})
|
|
84
|
+
.immediate(rows);
|
|
85
|
+
}
|
|
86
|
+
replaceSource(corpus, sourceId, inputs) {
|
|
87
|
+
const rows = inputs.map((input) => this.normalizeInput({ ...input, corpus, sourceId }));
|
|
88
|
+
const out = [];
|
|
89
|
+
const replace = this.db.transaction(() => {
|
|
90
|
+
this.deleteBySourceInternal(corpus, sourceId);
|
|
91
|
+
for (const item of rows)
|
|
92
|
+
out.push(this.insertNormalized(item));
|
|
93
|
+
});
|
|
94
|
+
replace.immediate();
|
|
95
|
+
return out;
|
|
96
|
+
}
|
|
97
|
+
whichHashesPresent(hashes) {
|
|
98
|
+
const present = new Map();
|
|
99
|
+
if (hashes.length === 0)
|
|
100
|
+
return present;
|
|
101
|
+
const chunkSize = 256;
|
|
102
|
+
for (let index = 0; index < hashes.length; index += chunkSize) {
|
|
103
|
+
const chunk = hashes.slice(index, index + chunkSize);
|
|
104
|
+
const placeholders = chunk.map(() => "?").join(",");
|
|
105
|
+
const rows = this.db
|
|
106
|
+
.prepare(`SELECT content_hash, id FROM memory_chunks WHERE content_hash IN (${placeholders})`)
|
|
107
|
+
.all(...chunk);
|
|
108
|
+
for (const row of rows)
|
|
109
|
+
present.set(row.content_hash, row.id);
|
|
110
|
+
}
|
|
111
|
+
return present;
|
|
112
|
+
}
|
|
113
|
+
getChunk(id) {
|
|
114
|
+
const row = this.db
|
|
115
|
+
.prepare(`SELECT c.*, ${sourcesJsonSelect("c.id")} FROM memory_chunks c WHERE c.id = ?`)
|
|
116
|
+
.get(id);
|
|
117
|
+
return row ? rowToChunk(row) : null;
|
|
118
|
+
}
|
|
119
|
+
searchLexical(query, options = {}) {
|
|
120
|
+
const normalized = normalizeSearchOptions(options);
|
|
121
|
+
const matchQuery = normalizeFtsMatchQuery(query);
|
|
122
|
+
if (!matchQuery)
|
|
123
|
+
return [];
|
|
124
|
+
const params = [matchQuery];
|
|
125
|
+
const corpusFilter = normalized.corpus ? "AND c.corpus = ?" : "";
|
|
126
|
+
if (normalized.corpus)
|
|
127
|
+
params.push(normalized.corpus);
|
|
128
|
+
params.push(normalized.limit);
|
|
129
|
+
const rows = this.db
|
|
130
|
+
.prepare(`SELECT c.*, f.rank AS score, ${sourcesJsonSelect("c.id")}
|
|
131
|
+
FROM memory_fts f
|
|
132
|
+
JOIN memory_chunks c ON c.id = f.rowid
|
|
133
|
+
WHERE memory_fts MATCH ?
|
|
134
|
+
${corpusFilter}
|
|
135
|
+
ORDER BY f.rank
|
|
136
|
+
LIMIT ?`)
|
|
137
|
+
.all(...params);
|
|
138
|
+
return rows.map((row) => ({ id: row.id, score: row.score, chunk: rowToChunk(row) }));
|
|
139
|
+
}
|
|
140
|
+
searchSemantic(query, options = {}) {
|
|
141
|
+
const normalized = normalizeSearchOptions(options);
|
|
142
|
+
if (query.length !== this.embeddingDimensions) {
|
|
143
|
+
throw new Error(`Query vector dimension mismatch: expected ${this.embeddingDimensions}, got ${query.length}`);
|
|
144
|
+
}
|
|
145
|
+
// memory_vec does not carry corpus metadata, so sqlite-vec cannot prefilter
|
|
146
|
+
// corpus-scoped KNN. Use the linear path to keep scoped nearest neighbors exact.
|
|
147
|
+
if (this.vectorCapability() === "sqlite-vec" && !normalized.corpus)
|
|
148
|
+
return this.searchSemanticVec(query, normalized);
|
|
149
|
+
return this.searchSemanticLinear(query, normalized);
|
|
150
|
+
}
|
|
151
|
+
searchSemanticVec(query, normalized) {
|
|
152
|
+
const params = [encodeVector(query), normalized.limit];
|
|
153
|
+
if (normalized.corpus)
|
|
154
|
+
params.push(normalized.corpus);
|
|
155
|
+
const corpusFilter = normalized.corpus ? "WHERE c.corpus = ?" : "";
|
|
156
|
+
const rows = this.db
|
|
157
|
+
.prepare(`SELECT c.*, v.distance AS score, ${sourcesJsonSelect("c.id")}
|
|
158
|
+
FROM (
|
|
159
|
+
SELECT rowid AS chunk_id, distance
|
|
160
|
+
FROM memory_vec
|
|
161
|
+
WHERE embedding MATCH ? AND k = ?
|
|
162
|
+
) v
|
|
163
|
+
JOIN memory_chunks c ON c.id = v.chunk_id
|
|
164
|
+
${corpusFilter}
|
|
165
|
+
ORDER BY v.distance
|
|
166
|
+
LIMIT ?`)
|
|
167
|
+
.all(...params, normalized.limit);
|
|
168
|
+
return rows.map((row) => ({ id: row.id, score: row.score, chunk: rowToChunk(row) }));
|
|
169
|
+
}
|
|
170
|
+
searchSemanticLinear(query, normalized) {
|
|
171
|
+
const rows = this.db
|
|
172
|
+
.prepare(normalized.corpus
|
|
173
|
+
? `SELECT c.*, ${sourcesJsonSelect("c.id")} FROM memory_chunks c WHERE c.corpus = ?`
|
|
174
|
+
: `SELECT c.*, ${sourcesJsonSelect("c.id")} FROM memory_chunks c`)
|
|
175
|
+
.all(...(normalized.corpus ? [normalized.corpus] : []));
|
|
176
|
+
return rows
|
|
177
|
+
.map((row) => ({
|
|
178
|
+
id: row.id,
|
|
179
|
+
score: cosineDistance(query, decodeVector(row.embedding, row.embedding_dimensions)),
|
|
180
|
+
chunk: rowToChunk(row),
|
|
181
|
+
}))
|
|
182
|
+
.sort((a, b) => a.score - b.score)
|
|
183
|
+
.slice(0, normalized.limit);
|
|
184
|
+
}
|
|
185
|
+
deleteChunk(id) {
|
|
186
|
+
const remove = this.db.transaction(() => {
|
|
187
|
+
this.deleteFtsRow(id);
|
|
188
|
+
this.db.prepare("DELETE FROM memory_chunks WHERE id = ?").run(id);
|
|
189
|
+
});
|
|
190
|
+
remove.immediate();
|
|
191
|
+
}
|
|
192
|
+
deleteBySource(corpus, sourceId) {
|
|
193
|
+
this.db.transaction(() => this.deleteBySourceInternal(corpus, sourceId)).immediate();
|
|
194
|
+
}
|
|
195
|
+
/** Caller already owns the index DB write transaction. */
|
|
196
|
+
deleteBySourceUnsafe(corpus, sourceId) {
|
|
197
|
+
this.deleteBySourceInternal(corpus, sourceId);
|
|
198
|
+
}
|
|
199
|
+
getSourceState(corpus, sourceId) {
|
|
200
|
+
const row = this.db
|
|
201
|
+
.prepare(`SELECT
|
|
202
|
+
st.corpus,
|
|
203
|
+
st.source_id,
|
|
204
|
+
st.source_ref,
|
|
205
|
+
st.mtime_ms,
|
|
206
|
+
st.size_bytes,
|
|
207
|
+
st.updated_at,
|
|
208
|
+
EXISTS(
|
|
209
|
+
SELECT 1 FROM memory_index_sources s
|
|
210
|
+
WHERE s.corpus = st.corpus AND s.source_id = st.source_id
|
|
211
|
+
) AS has_mappings
|
|
212
|
+
FROM memory_index_source_state st
|
|
213
|
+
WHERE st.corpus = ? AND st.source_id = ?`)
|
|
214
|
+
.get(corpus, sourceId);
|
|
215
|
+
return row
|
|
216
|
+
? {
|
|
217
|
+
corpus: row.corpus,
|
|
218
|
+
sourceId: row.source_id,
|
|
219
|
+
sourceRef: row.source_ref,
|
|
220
|
+
mtimeMs: row.mtime_ms,
|
|
221
|
+
sizeBytes: row.size_bytes,
|
|
222
|
+
updatedAt: row.updated_at,
|
|
223
|
+
hasMappings: row.has_mappings === 1,
|
|
224
|
+
}
|
|
225
|
+
: null;
|
|
226
|
+
}
|
|
227
|
+
upsertSourceState(input) {
|
|
228
|
+
this.db
|
|
229
|
+
.prepare(`INSERT INTO memory_index_source_state(corpus, source_id, source_ref, mtime_ms, size_bytes)
|
|
230
|
+
VALUES (?, ?, ?, ?, ?)
|
|
231
|
+
ON CONFLICT(corpus, source_id) DO UPDATE SET
|
|
232
|
+
source_ref = excluded.source_ref,
|
|
233
|
+
mtime_ms = excluded.mtime_ms,
|
|
234
|
+
size_bytes = excluded.size_bytes,
|
|
235
|
+
updated_at = unixepoch()`)
|
|
236
|
+
.run(input.corpus, input.sourceId, input.sourceRef ?? null, Math.floor(input.mtimeMs), input.sizeBytes);
|
|
237
|
+
}
|
|
238
|
+
deleteBySourceExceptHashes(corpus, sourceId, contentHashes) {
|
|
239
|
+
this.deleteBySourceExceptMappings(corpus, sourceId, [...new Set(contentHashes)].map((contentHash) => ({ contentHash, chunkIndex: null })));
|
|
240
|
+
}
|
|
241
|
+
deleteBySourceExceptMappings(corpus, sourceId, kept) {
|
|
242
|
+
if (kept.length === 0) {
|
|
243
|
+
this.deleteBySource(corpus, sourceId);
|
|
244
|
+
return;
|
|
245
|
+
}
|
|
246
|
+
this.db
|
|
247
|
+
.transaction(() => {
|
|
248
|
+
const rows = this.db
|
|
249
|
+
.prepare(`SELECT s.chunk_id AS id, c.content_hash, s.chunk_index
|
|
250
|
+
FROM memory_index_sources s
|
|
251
|
+
JOIN memory_chunks c ON c.id = s.chunk_id
|
|
252
|
+
WHERE s.corpus = ? AND s.source_id = ?`)
|
|
253
|
+
.all(corpus, sourceId);
|
|
254
|
+
for (const row of rows) {
|
|
255
|
+
if (kept.some((item) => item.contentHash === row.content_hash &&
|
|
256
|
+
(item.chunkIndex === null || item.chunkIndex === row.chunk_index))) {
|
|
257
|
+
continue;
|
|
258
|
+
}
|
|
259
|
+
this.db
|
|
260
|
+
.prepare("DELETE FROM memory_index_sources WHERE corpus = ? AND source_id = ? AND chunk_index = ?")
|
|
261
|
+
.run(corpus, sourceId, row.chunk_index);
|
|
262
|
+
this.deleteOrphanChunk(row.id);
|
|
263
|
+
}
|
|
264
|
+
})
|
|
265
|
+
.immediate();
|
|
266
|
+
}
|
|
267
|
+
clearAll() {
|
|
268
|
+
this.db
|
|
269
|
+
.transaction(() => {
|
|
270
|
+
this.db.prepare("INSERT INTO memory_fts(memory_fts) VALUES ('delete-all')").run();
|
|
271
|
+
if (this.vectorCapability() === "sqlite-vec")
|
|
272
|
+
this.db.prepare("DELETE FROM memory_vec").run();
|
|
273
|
+
this.db.prepare("DELETE FROM memory_index_sources").run();
|
|
274
|
+
this.db.prepare("DELETE FROM memory_index_source_state").run();
|
|
275
|
+
this.db.prepare("DELETE FROM memory_chunks").run();
|
|
276
|
+
})
|
|
277
|
+
.immediate();
|
|
278
|
+
}
|
|
279
|
+
reconcileSources(exists) {
|
|
280
|
+
const sources = this.db
|
|
281
|
+
.prepare("SELECT chunk_id, corpus, source_id, source_ref, chunk_index FROM memory_index_sources")
|
|
282
|
+
.all();
|
|
283
|
+
if (sources.length === 0)
|
|
284
|
+
return;
|
|
285
|
+
this.db
|
|
286
|
+
.transaction(() => {
|
|
287
|
+
for (const row of sources) {
|
|
288
|
+
const source = {
|
|
289
|
+
corpus: row.corpus,
|
|
290
|
+
sourceId: row.source_id,
|
|
291
|
+
sourceRef: row.source_ref,
|
|
292
|
+
chunkIndex: row.chunk_index,
|
|
293
|
+
};
|
|
294
|
+
if (exists(source))
|
|
295
|
+
continue;
|
|
296
|
+
this.db
|
|
297
|
+
.prepare("DELETE FROM memory_index_sources WHERE corpus = ? AND source_id = ? AND chunk_index = ?")
|
|
298
|
+
.run(source.corpus, source.sourceId, source.chunkIndex);
|
|
299
|
+
this.deleteOrphanChunk(row.chunk_id);
|
|
300
|
+
}
|
|
301
|
+
})
|
|
302
|
+
.immediate();
|
|
303
|
+
}
|
|
304
|
+
stats() {
|
|
305
|
+
const indexed = this.db.prepare("SELECT COUNT(*) AS n FROM memory_chunks").get().n;
|
|
306
|
+
const ftsRows = this.db.prepare("SELECT COUNT(*) AS n FROM memory_fts").get().n;
|
|
307
|
+
const size = this.db
|
|
308
|
+
.prepare("SELECT page_count * page_size AS bytes FROM pragma_page_count(), pragma_page_size()")
|
|
309
|
+
.get();
|
|
310
|
+
const dimensionsRaw = readMeta(this.db, "embedding_dimensions");
|
|
311
|
+
const vectorCapability = this.vectorCapability();
|
|
312
|
+
const vectorRows = vectorCapability === "sqlite-vec" ? this.vectorRowCount() : indexed;
|
|
313
|
+
return {
|
|
314
|
+
indexed,
|
|
315
|
+
ftsRows,
|
|
316
|
+
vectorRows,
|
|
317
|
+
vectorAvailable: vectorCapability === "sqlite-vec",
|
|
318
|
+
vectorCapability,
|
|
319
|
+
requiresReindex: readMeta(this.db, "requires_reindex") === "1",
|
|
320
|
+
embeddingProvider: readMeta(this.db, "embedding_provider"),
|
|
321
|
+
embeddingModel: readMeta(this.db, "embedding_model"),
|
|
322
|
+
embeddingDimensions: dimensionsRaw ? Number(dimensionsRaw) : null,
|
|
323
|
+
dbSizeBytes: size.bytes,
|
|
324
|
+
};
|
|
325
|
+
}
|
|
326
|
+
vectorCapability() {
|
|
327
|
+
return readMeta(this.db, "vector_capability") === "sqlite-vec" ? "sqlite-vec" : "blob-js";
|
|
328
|
+
}
|
|
329
|
+
vectorRowCount() {
|
|
330
|
+
try {
|
|
331
|
+
const row = this.db.prepare("SELECT COUNT(*) AS n FROM memory_vec").get();
|
|
332
|
+
return row.n;
|
|
333
|
+
}
|
|
334
|
+
catch {
|
|
335
|
+
return 0;
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
normalizeInput(input) {
|
|
339
|
+
if (input.embedding.length !== this.embeddingDimensions) {
|
|
340
|
+
throw new Error(`Embedding dimension mismatch: expected ${this.embeddingDimensions}, got ${input.embedding.length}`);
|
|
341
|
+
}
|
|
342
|
+
const text = input.text.trim();
|
|
343
|
+
if (!text)
|
|
344
|
+
throw new Error("Memory chunk text must not be empty");
|
|
345
|
+
const chunkIndex = input.chunkIndex ?? 0;
|
|
346
|
+
const snippet = input.snippet?.trim() || text.slice(0, 280);
|
|
347
|
+
return {
|
|
348
|
+
corpus: input.corpus,
|
|
349
|
+
sourceId: input.sourceId ?? null,
|
|
350
|
+
sourceRef: input.sourceRef ?? null,
|
|
351
|
+
chunkIndex,
|
|
352
|
+
text,
|
|
353
|
+
snippet,
|
|
354
|
+
tokenCount: input.tokenCount ?? null,
|
|
355
|
+
metadataJson: input.metadata ? JSON.stringify(input.metadata) : null,
|
|
356
|
+
embedding: input.embedding,
|
|
357
|
+
contentHash: createMemoryContentHash({
|
|
358
|
+
corpus: input.corpus,
|
|
359
|
+
text,
|
|
360
|
+
embeddingModel: this.embeddingModel,
|
|
361
|
+
embeddingDimensions: this.embeddingDimensions,
|
|
362
|
+
}),
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
insertNormalized(item) {
|
|
366
|
+
const existing = this.db.prepare("SELECT id FROM memory_chunks WHERE content_hash = ?").get(item.contentHash);
|
|
367
|
+
if (existing) {
|
|
368
|
+
this.insertSourceMapping(existing.id, item);
|
|
369
|
+
return existing.id;
|
|
370
|
+
}
|
|
371
|
+
const result = this.db
|
|
372
|
+
.prepare(`INSERT INTO memory_chunks (
|
|
373
|
+
content_hash, corpus, text_full, snippet, token_count, metadata_json, embedding_model,
|
|
374
|
+
embedding_dimensions, embedding
|
|
375
|
+
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`)
|
|
376
|
+
.run(item.contentHash, item.corpus, item.text, item.snippet, item.tokenCount, item.metadataJson, this.embeddingModel, this.embeddingDimensions, encodeVector(item.embedding));
|
|
377
|
+
const id = Number(result.lastInsertRowid);
|
|
378
|
+
this.db
|
|
379
|
+
.prepare("INSERT INTO memory_fts(rowid, text_full, snippet) VALUES (?, ?, ?)")
|
|
380
|
+
.run(id, item.text, item.snippet);
|
|
381
|
+
if (this.vectorCapability() === "sqlite-vec") {
|
|
382
|
+
this.db
|
|
383
|
+
.prepare("INSERT INTO memory_vec(rowid, embedding) VALUES (CAST(? AS INTEGER), ?)")
|
|
384
|
+
.run(id, encodeVector(item.embedding));
|
|
385
|
+
}
|
|
386
|
+
this.insertSourceMapping(id, item);
|
|
387
|
+
return id;
|
|
388
|
+
}
|
|
389
|
+
deleteBySourceInternal(corpus, sourceId) {
|
|
390
|
+
const rows = this.db
|
|
391
|
+
.prepare("SELECT chunk_id AS id FROM memory_index_sources WHERE corpus = ? AND source_id = ?")
|
|
392
|
+
.all(corpus, sourceId);
|
|
393
|
+
this.db.prepare("DELETE FROM memory_index_sources WHERE corpus = ? AND source_id = ?").run(corpus, sourceId);
|
|
394
|
+
this.db.prepare("DELETE FROM memory_index_source_state WHERE corpus = ? AND source_id = ?").run(corpus, sourceId);
|
|
395
|
+
for (const row of rows)
|
|
396
|
+
this.deleteOrphanChunk(row.id);
|
|
397
|
+
}
|
|
398
|
+
insertSourceMapping(chunkId, item) {
|
|
399
|
+
if (!item.sourceId)
|
|
400
|
+
return;
|
|
401
|
+
this.db
|
|
402
|
+
.prepare(`INSERT OR REPLACE INTO memory_index_sources(chunk_id, corpus, source_id, source_ref, chunk_index)
|
|
403
|
+
VALUES (?, ?, ?, ?, ?)`)
|
|
404
|
+
.run(chunkId, item.corpus, item.sourceId, item.sourceRef, item.chunkIndex);
|
|
405
|
+
}
|
|
406
|
+
deleteOrphanChunk(id) {
|
|
407
|
+
const remaining = this.db
|
|
408
|
+
.prepare("SELECT 1 AS ok FROM memory_index_sources WHERE chunk_id = ? LIMIT 1")
|
|
409
|
+
.get(id);
|
|
410
|
+
if (remaining)
|
|
411
|
+
return;
|
|
412
|
+
this.deleteFtsRow(id);
|
|
413
|
+
this.db.prepare("DELETE FROM memory_chunks WHERE id = ?").run(id);
|
|
414
|
+
}
|
|
415
|
+
deleteFtsRow(id) {
|
|
416
|
+
this.db.prepare("DELETE FROM memory_fts WHERE rowid = ?").run(id);
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
export function createMemoryContentHash(input) {
|
|
420
|
+
return createHash("sha256")
|
|
421
|
+
.update(JSON.stringify({
|
|
422
|
+
corpus: input.corpus,
|
|
423
|
+
text: input.text,
|
|
424
|
+
embeddingModel: input.embeddingModel,
|
|
425
|
+
embeddingDimensions: input.embeddingDimensions,
|
|
426
|
+
}))
|
|
427
|
+
.digest("hex");
|
|
428
|
+
}
|
|
429
|
+
function normalizeSearchOptions(options) {
|
|
430
|
+
if (typeof options === "number")
|
|
431
|
+
return { limit: options };
|
|
432
|
+
return {
|
|
433
|
+
limit: options.limit ?? 10,
|
|
434
|
+
corpus: options.corpus,
|
|
435
|
+
};
|
|
436
|
+
}
|
|
437
|
+
function sourcesJsonSelect(chunkIdExpr) {
|
|
438
|
+
return `(SELECT json_group_array(json_object(
|
|
439
|
+
'corpus', s.corpus,
|
|
440
|
+
'sourceId', s.source_id,
|
|
441
|
+
'sourceRef', s.source_ref,
|
|
442
|
+
'chunkIndex', s.chunk_index
|
|
443
|
+
)) FROM memory_index_sources s WHERE s.chunk_id = ${chunkIdExpr}) AS sources_json`;
|
|
444
|
+
}
|
|
445
|
+
function rowToChunk(row) {
|
|
446
|
+
const sources = sourceRefsFromRow(row);
|
|
447
|
+
const primary = sources[0] ?? {
|
|
448
|
+
corpus: row.corpus,
|
|
449
|
+
sourceId: row.source_id ?? null,
|
|
450
|
+
sourceRef: row.source_ref ?? null,
|
|
451
|
+
chunkIndex: row.chunk_index ?? 0,
|
|
452
|
+
};
|
|
453
|
+
return {
|
|
454
|
+
id: row.id,
|
|
455
|
+
contentHash: row.content_hash,
|
|
456
|
+
corpus: row.corpus,
|
|
457
|
+
sourceId: primary.sourceId,
|
|
458
|
+
sourceRef: primary.sourceRef,
|
|
459
|
+
chunkIndex: primary.chunkIndex,
|
|
460
|
+
sources,
|
|
461
|
+
text: row.text_full,
|
|
462
|
+
snippet: row.snippet,
|
|
463
|
+
tokenCount: row.token_count,
|
|
464
|
+
metadata: parseMetadata(row.metadata_json),
|
|
465
|
+
embeddingModel: row.embedding_model,
|
|
466
|
+
embeddingDimensions: row.embedding_dimensions,
|
|
467
|
+
createdAt: row.created_at,
|
|
468
|
+
updatedAt: row.updated_at,
|
|
469
|
+
};
|
|
470
|
+
}
|
|
471
|
+
function sourceRefsFromRow(row) {
|
|
472
|
+
if ("sources_json" in row && typeof row.sources_json === "string" && row.sources_json) {
|
|
473
|
+
try {
|
|
474
|
+
const parsed = JSON.parse(row.sources_json);
|
|
475
|
+
if (Array.isArray(parsed)) {
|
|
476
|
+
return parsed.filter(isSourceRef);
|
|
477
|
+
}
|
|
478
|
+
}
|
|
479
|
+
catch {
|
|
480
|
+
return [];
|
|
481
|
+
}
|
|
482
|
+
}
|
|
483
|
+
const sourceId = row.source_id ?? null;
|
|
484
|
+
return sourceId
|
|
485
|
+
? [
|
|
486
|
+
{
|
|
487
|
+
corpus: row.corpus,
|
|
488
|
+
sourceId,
|
|
489
|
+
sourceRef: row.source_ref ?? null,
|
|
490
|
+
chunkIndex: row.chunk_index ?? 0,
|
|
491
|
+
},
|
|
492
|
+
]
|
|
493
|
+
: [];
|
|
494
|
+
}
|
|
495
|
+
function isSourceRef(value) {
|
|
496
|
+
if (!value || typeof value !== "object")
|
|
497
|
+
return false;
|
|
498
|
+
const item = value;
|
|
499
|
+
return typeof item.corpus === "string" && typeof item.sourceId === "string";
|
|
500
|
+
}
|
|
501
|
+
function parseMetadata(value) {
|
|
502
|
+
if (!value)
|
|
503
|
+
return null;
|
|
504
|
+
try {
|
|
505
|
+
const parsed = JSON.parse(value);
|
|
506
|
+
return parsed && typeof parsed === "object" && !Array.isArray(parsed)
|
|
507
|
+
? parsed
|
|
508
|
+
: null;
|
|
509
|
+
}
|
|
510
|
+
catch {
|
|
511
|
+
return null;
|
|
512
|
+
}
|
|
513
|
+
}
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { createRequire } from "node:module";
|
|
2
|
+
const requireOptional = createRequire(import.meta.url);
|
|
3
|
+
let loggedUnavailable = false;
|
|
4
|
+
let loadModule = () => requireOptional("sqlite-vec");
|
|
5
|
+
let loadedDbs = new WeakSet();
|
|
6
|
+
export function loadSqliteVec(db) {
|
|
7
|
+
if (loadedDbs.has(db))
|
|
8
|
+
return availableState;
|
|
9
|
+
try {
|
|
10
|
+
const mod = loadModule();
|
|
11
|
+
registerModuleOnDb(mod, db);
|
|
12
|
+
const row = db.prepare("SELECT vec_version() AS version").get();
|
|
13
|
+
if (!row?.version)
|
|
14
|
+
throw new Error("sqlite-vec loaded but vec_version() is unavailable");
|
|
15
|
+
loadedDbs.add(db);
|
|
16
|
+
return availableState;
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
logUnavailableOnce();
|
|
20
|
+
return { available: false };
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
export function isSqliteVecLoadedForDb(db) {
|
|
24
|
+
return loadedDbs.has(db);
|
|
25
|
+
}
|
|
26
|
+
function registerModuleOnDb(mod, db) {
|
|
27
|
+
if (typeof mod.load === "function") {
|
|
28
|
+
mod.load(db);
|
|
29
|
+
return;
|
|
30
|
+
}
|
|
31
|
+
if (typeof mod.default?.load === "function") {
|
|
32
|
+
mod.default.load(db);
|
|
33
|
+
return;
|
|
34
|
+
}
|
|
35
|
+
if (typeof mod.loadablePathFor === "function") {
|
|
36
|
+
const loadExtension = db.loadExtension;
|
|
37
|
+
if (typeof loadExtension !== "function")
|
|
38
|
+
throw new Error("better-sqlite3 does not support loadExtension");
|
|
39
|
+
loadExtension.call(db, mod.loadablePathFor());
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
throw new Error("sqlite-vec module does not expose load(db) or loadablePathFor()");
|
|
43
|
+
}
|
|
44
|
+
function logUnavailableOnce() {
|
|
45
|
+
if (loggedUnavailable)
|
|
46
|
+
return;
|
|
47
|
+
loggedUnavailable = true;
|
|
48
|
+
console.info("sqlite-vec module unavailable; using JS linear scan over stored embeddings");
|
|
49
|
+
}
|
|
50
|
+
const availableState = {
|
|
51
|
+
available: true,
|
|
52
|
+
registerOnDb(db) {
|
|
53
|
+
const state = loadSqliteVec(db);
|
|
54
|
+
if (!state.available)
|
|
55
|
+
throw new Error("sqlite-vec unavailable");
|
|
56
|
+
},
|
|
57
|
+
};
|
|
58
|
+
export const __memoryVecTest = {
|
|
59
|
+
setLoader(loader) {
|
|
60
|
+
loadModule = loader ?? (() => requireOptional("sqlite-vec"));
|
|
61
|
+
loadedDbs = new WeakSet();
|
|
62
|
+
loggedUnavailable = false;
|
|
63
|
+
},
|
|
64
|
+
probePackage() {
|
|
65
|
+
try {
|
|
66
|
+
return requireOptional("sqlite-vec");
|
|
67
|
+
}
|
|
68
|
+
catch {
|
|
69
|
+
return null;
|
|
70
|
+
}
|
|
71
|
+
},
|
|
72
|
+
};
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
export function encodeVector(vector) {
|
|
2
|
+
return Buffer.from(vector.buffer, vector.byteOffset, vector.byteLength);
|
|
3
|
+
}
|
|
4
|
+
export function decodeVector(buffer, dimensions) {
|
|
5
|
+
if (buffer.byteLength !== dimensions * Float32Array.BYTES_PER_ELEMENT) {
|
|
6
|
+
throw new Error(`Vector blob dimension mismatch: expected ${dimensions}, got ${buffer.byteLength / 4}`);
|
|
7
|
+
}
|
|
8
|
+
const bytes = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
|
|
9
|
+
return new Float32Array(bytes);
|
|
10
|
+
}
|
|
11
|
+
export function cosineDistance(a, b) {
|
|
12
|
+
if (a.length !== b.length)
|
|
13
|
+
throw new Error(`Vector dimension mismatch: ${a.length} !== ${b.length}`);
|
|
14
|
+
let dot = 0;
|
|
15
|
+
let aNorm = 0;
|
|
16
|
+
let bNorm = 0;
|
|
17
|
+
for (let index = 0; index < a.length; index++) {
|
|
18
|
+
const av = a[index] ?? 0;
|
|
19
|
+
const bv = b[index] ?? 0;
|
|
20
|
+
dot += av * bv;
|
|
21
|
+
aNorm += av * av;
|
|
22
|
+
bNorm += bv * bv;
|
|
23
|
+
}
|
|
24
|
+
if (aNorm === 0 || bNorm === 0)
|
|
25
|
+
return 1;
|
|
26
|
+
return 1 - dot / (Math.sqrt(aNorm) * Math.sqrt(bNorm));
|
|
27
|
+
}
|