mnueron 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,5 @@
1
1
  import Database from 'better-sqlite3';
2
+ import { RECALL_EVENTS_DDL, buildRecallEvent, approximateTokens, } from '../savings/recall-event.js';
2
3
  import { randomUUID } from 'node:crypto';
3
4
  import { mkdirSync } from 'node:fs';
4
5
  import { dirname } from 'node:path';
@@ -216,44 +217,44 @@ export class LocalProvider {
216
217
  preload();
217
218
  }
218
219
  migrate() {
219
- this.db.exec(`
220
- CREATE TABLE IF NOT EXISTS memories (
221
- id TEXT PRIMARY KEY,
222
- namespace TEXT NOT NULL DEFAULT 'default',
223
- content TEXT NOT NULL,
224
- tags_json TEXT NOT NULL DEFAULT '[]',
225
- source TEXT NOT NULL DEFAULT 'manual',
226
- source_ref TEXT,
227
- meta_json TEXT,
228
- created_at INTEGER NOT NULL,
229
- updated_at INTEGER NOT NULL
230
- );
231
-
232
- CREATE INDEX IF NOT EXISTS idx_memories_namespace
233
- ON memories(namespace);
234
- CREATE INDEX IF NOT EXISTS idx_memories_created
235
- ON memories(created_at DESC);
236
- CREATE INDEX IF NOT EXISTS idx_memories_source
237
- ON memories(source);
238
- CREATE INDEX IF NOT EXISTS idx_memories_source_ref
239
- ON memories(source_ref);
240
-
241
- CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts
242
- USING fts5(content, tags, namespace UNINDEXED, content_id UNINDEXED);
243
-
244
- -- Keep FTS in sync. We do this manually rather than via triggers so
245
- -- the FTS row's content column holds raw text (FTS can't reach
246
- -- inside JSON for tags otherwise).
220
+ this.db.exec(`
221
+ CREATE TABLE IF NOT EXISTS memories (
222
+ id TEXT PRIMARY KEY,
223
+ namespace TEXT NOT NULL DEFAULT 'default',
224
+ content TEXT NOT NULL,
225
+ tags_json TEXT NOT NULL DEFAULT '[]',
226
+ source TEXT NOT NULL DEFAULT 'manual',
227
+ source_ref TEXT,
228
+ meta_json TEXT,
229
+ created_at INTEGER NOT NULL,
230
+ updated_at INTEGER NOT NULL
231
+ );
232
+
233
+ CREATE INDEX IF NOT EXISTS idx_memories_namespace
234
+ ON memories(namespace);
235
+ CREATE INDEX IF NOT EXISTS idx_memories_created
236
+ ON memories(created_at DESC);
237
+ CREATE INDEX IF NOT EXISTS idx_memories_source
238
+ ON memories(source);
239
+ CREATE INDEX IF NOT EXISTS idx_memories_source_ref
240
+ ON memories(source_ref);
241
+
242
+ CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts
243
+ USING fts5(content, tags, namespace UNINDEXED, content_id UNINDEXED);
244
+
245
+ -- Keep FTS in sync. We do this manually rather than via triggers so
246
+ -- the FTS row's content column holds raw text (FTS can't reach
247
+ -- inside JSON for tags otherwise).
247
248
  `);
248
249
  if (this.vecAvailable) {
249
250
  // vec0 virtual table. Each row carries the memory_id as an auxiliary
250
251
  // column so we can JOIN back to memories without managing rowids.
251
- this.db.exec(`
252
- CREATE VIRTUAL TABLE IF NOT EXISTS memories_vec
253
- USING vec0(
254
- memory_id TEXT PRIMARY KEY,
255
- embedding float[${EMBEDDING_DIM}]
256
- );
252
+ this.db.exec(`
253
+ CREATE VIRTUAL TABLE IF NOT EXISTS memories_vec
254
+ USING vec0(
255
+ memory_id TEXT PRIMARY KEY,
256
+ embedding float[${EMBEDDING_DIM}]
257
+ );
257
258
  `);
258
259
  }
259
260
  // ── P2.3 — Entity resolution tables ──────────────────────────────────
@@ -266,73 +267,78 @@ export class LocalProvider {
266
267
  // (e.g., "Johnny" → resolved to canonical "John Doe"); `confidence`
267
268
  // ranges in [0, 1] from exact match (1.0) down through embedding
268
269
  // similarity and LLM tiebreak picks (0.65-0.85).
269
- this.db.exec(`
270
- CREATE TABLE IF NOT EXISTS entities (
271
- id TEXT PRIMARY KEY,
272
- display_name TEXT NOT NULL,
273
- entity_type TEXT NOT NULL,
274
- aliases_json TEXT NOT NULL DEFAULT '[]',
275
- mention_count INTEGER NOT NULL DEFAULT 0,
276
- first_seen_at INTEGER NOT NULL,
277
- last_seen_at INTEGER NOT NULL
278
- );
279
- CREATE INDEX IF NOT EXISTS idx_entities_type
280
- ON entities(entity_type);
281
- CREATE INDEX IF NOT EXISTS idx_entities_last_seen
282
- ON entities(last_seen_at DESC);
283
-
284
- CREATE TABLE IF NOT EXISTS memory_entities (
285
- memory_id TEXT NOT NULL,
286
- entity_id TEXT NOT NULL,
287
- surface_form TEXT NOT NULL,
288
- confidence REAL NOT NULL,
289
- PRIMARY KEY (memory_id, entity_id)
290
- );
291
- CREATE INDEX IF NOT EXISTS idx_memory_entities_entity
292
- ON memory_entities(entity_id);
293
-
294
- -- P3 — Knowledge-graph edges. Each row is a triple (from, predicate,
295
- -- to) plus provenance (memory_id) + confidence. P4 forward-looking
296
- -- columns (valid_from / valid_to) are added now so bi-temporal
297
- -- queries don't require a schema migration later.
298
- CREATE TABLE IF NOT EXISTS relations (
299
- id TEXT PRIMARY KEY,
300
- from_entity_id TEXT NOT NULL,
301
- to_entity_id TEXT NOT NULL,
302
- predicate TEXT NOT NULL,
303
- memory_id TEXT NOT NULL,
304
- confidence REAL NOT NULL,
305
- valid_from INTEGER,
306
- valid_to INTEGER,
307
- recorded_at INTEGER NOT NULL
308
- );
309
- CREATE INDEX IF NOT EXISTS idx_relations_from
310
- ON relations(from_entity_id);
311
- CREATE INDEX IF NOT EXISTS idx_relations_to
312
- ON relations(to_entity_id);
313
- CREATE INDEX IF NOT EXISTS idx_relations_predicate
314
- ON relations(predicate);
315
- CREATE INDEX IF NOT EXISTS idx_relations_memory
316
- ON relations(memory_id);
317
- CREATE INDEX IF NOT EXISTS idx_relations_valid_to
318
- ON relations(valid_to);
270
+ this.db.exec(`
271
+ CREATE TABLE IF NOT EXISTS entities (
272
+ id TEXT PRIMARY KEY,
273
+ display_name TEXT NOT NULL,
274
+ entity_type TEXT NOT NULL,
275
+ aliases_json TEXT NOT NULL DEFAULT '[]',
276
+ mention_count INTEGER NOT NULL DEFAULT 0,
277
+ first_seen_at INTEGER NOT NULL,
278
+ last_seen_at INTEGER NOT NULL
279
+ );
280
+ CREATE INDEX IF NOT EXISTS idx_entities_type
281
+ ON entities(entity_type);
282
+ CREATE INDEX IF NOT EXISTS idx_entities_last_seen
283
+ ON entities(last_seen_at DESC);
284
+
285
+ CREATE TABLE IF NOT EXISTS memory_entities (
286
+ memory_id TEXT NOT NULL,
287
+ entity_id TEXT NOT NULL,
288
+ surface_form TEXT NOT NULL,
289
+ confidence REAL NOT NULL,
290
+ PRIMARY KEY (memory_id, entity_id)
291
+ );
292
+ CREATE INDEX IF NOT EXISTS idx_memory_entities_entity
293
+ ON memory_entities(entity_id);
294
+
295
+ -- P3 — Knowledge-graph edges. Each row is a triple (from, predicate,
296
+ -- to) plus provenance (memory_id) + confidence. P4 forward-looking
297
+ -- columns (valid_from / valid_to) are added now so bi-temporal
298
+ -- queries don't require a schema migration later.
299
+ CREATE TABLE IF NOT EXISTS relations (
300
+ id TEXT PRIMARY KEY,
301
+ from_entity_id TEXT NOT NULL,
302
+ to_entity_id TEXT NOT NULL,
303
+ predicate TEXT NOT NULL,
304
+ memory_id TEXT NOT NULL,
305
+ confidence REAL NOT NULL,
306
+ valid_from INTEGER,
307
+ valid_to INTEGER,
308
+ recorded_at INTEGER NOT NULL
309
+ );
310
+ CREATE INDEX IF NOT EXISTS idx_relations_from
311
+ ON relations(from_entity_id);
312
+ CREATE INDEX IF NOT EXISTS idx_relations_to
313
+ ON relations(to_entity_id);
314
+ CREATE INDEX IF NOT EXISTS idx_relations_predicate
315
+ ON relations(predicate);
316
+ CREATE INDEX IF NOT EXISTS idx_relations_memory
317
+ ON relations(memory_id);
318
+ CREATE INDEX IF NOT EXISTS idx_relations_valid_to
319
+ ON relations(valid_to);
319
320
  `);
320
321
  if (this.vecAvailable) {
321
322
  // Embedding index for entity name+context strings. Used by the
322
323
  // resolver's vector-similarity stage when finding candidate matches
323
324
  // for a freshly extracted entity.
324
- this.db.exec(`
325
- CREATE VIRTUAL TABLE IF NOT EXISTS entities_vec
326
- USING vec0(
327
- entity_id TEXT PRIMARY KEY,
328
- embedding float[${EMBEDDING_DIM}]
329
- );
325
+ this.db.exec(`
326
+ CREATE VIRTUAL TABLE IF NOT EXISTS entities_vec
327
+ USING vec0(
328
+ entity_id TEXT PRIMARY KEY,
329
+ embedding float[${EMBEDDING_DIM}]
330
+ );
330
331
  `);
331
332
  }
332
333
  // P5 — Consolidation proposal table (idempotent).
333
334
  ensureConsolidationSchema(this.db);
334
335
  // Procedural memory table (idempotent). Mem0 leapfrog feature.
335
336
  ensureProceduralSchema(this.db);
337
+ // ── Recall savings (v0.6) ────────────────────────────────────────
338
+ // Logs every search() for the savings dashboard widget.
339
+ // DDL lives in src/savings/recall-event.ts so the savings module
340
+ // owns its own schema.
341
+ this.db.exec(RECALL_EVENTS_DDL);
336
342
  }
337
343
  // ─── write path ──────────────────────────────────────────────────────────
338
344
  async save(input) {
@@ -391,17 +397,17 @@ export class LocalProvider {
391
397
  // Failure here is non-fatal — we just skip the vec insert.
392
398
  const vector = this.vecAvailable ? await embed(input.content) : null;
393
399
  const tx = this.db.transaction(() => {
394
- this.db.prepare(`
395
- INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
396
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
400
+ this.db.prepare(`
401
+ INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
402
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
397
403
  `).run(id, ns, input.content, JSON.stringify(tags), input.source ?? 'manual', input.source_ref ?? null, input.metadata ? JSON.stringify(input.metadata) : null, now, now);
398
- this.db.prepare(`
399
- INSERT INTO memories_fts (content, tags, namespace, content_id)
400
- VALUES (?, ?, ?, ?)
404
+ this.db.prepare(`
405
+ INSERT INTO memories_fts (content, tags, namespace, content_id)
406
+ VALUES (?, ?, ?, ?)
401
407
  `).run(input.content, tags.join(' '), ns, id);
402
408
  if (vector && this.vecAvailable) {
403
- this.db.prepare(`
404
- INSERT INTO memories_vec (memory_id, embedding) VALUES (?, ?)
409
+ this.db.prepare(`
410
+ INSERT INTO memories_vec (memory_id, embedding) VALUES (?, ?)
405
411
  `).run(id, Buffer.from(vector.buffer));
406
412
  }
407
413
  });
@@ -457,9 +463,9 @@ export class LocalProvider {
457
463
  ? meta.byok_anthropic_key : undefined;
458
464
  const relations = await extractRelations(input.content, resolvedForRelations, { anthropicKey: byokAnthropic });
459
465
  if (relations.length > 0) {
460
- const insertRel = this.db.prepare(`INSERT INTO relations
461
- (id, from_entity_id, to_entity_id, predicate, memory_id,
462
- confidence, valid_from, valid_to, recorded_at)
466
+ const insertRel = this.db.prepare(`INSERT INTO relations
467
+ (id, from_entity_id, to_entity_id, predicate, memory_id,
468
+ confidence, valid_from, valid_to, recorded_at)
463
469
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`);
464
470
  const tx2 = this.db.transaction(() => {
465
471
  for (const r of relations) {
@@ -528,13 +534,13 @@ export class LocalProvider {
528
534
  async bulkSaveOne(inputs) {
529
535
  const vectors = this.vecAvailable ? await embedBatch(inputs.map(i => i.content)) : inputs.map(() => null);
530
536
  const out = [];
531
- const insertMem = this.db.prepare(`
532
- INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
533
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
537
+ const insertMem = this.db.prepare(`
538
+ INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
539
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
534
540
  `);
535
- const insertFts = this.db.prepare(`
536
- INSERT INTO memories_fts (content, tags, namespace, content_id)
537
- VALUES (?, ?, ?, ?)
541
+ const insertFts = this.db.prepare(`
542
+ INSERT INTO memories_fts (content, tags, namespace, content_id)
543
+ VALUES (?, ?, ?, ?)
538
544
  `);
539
545
  const insertVec = this.vecAvailable
540
546
  ? this.db.prepare(`INSERT INTO memories_vec (memory_id, embedding) VALUES (?, ?)`)
@@ -609,13 +615,13 @@ export class LocalProvider {
609
615
  const vectors = this.vecAvailable
610
616
  ? await embedBatch(expanded.map(i => i.content))
611
617
  : expanded.map(() => null);
612
- const insertMem = this.db.prepare(`
613
- INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
614
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
618
+ const insertMem = this.db.prepare(`
619
+ INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
620
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
615
621
  `);
616
- const insertFts = this.db.prepare(`
617
- INSERT INTO memories_fts (content, tags, namespace, content_id)
618
- VALUES (?, ?, ?, ?)
622
+ const insertFts = this.db.prepare(`
623
+ INSERT INTO memories_fts (content, tags, namespace, content_id)
624
+ VALUES (?, ?, ?, ?)
619
625
  `);
620
626
  const insertVec = this.vecAvailable
621
627
  ? this.db.prepare(`INSERT INTO memories_vec (memory_id, embedding) VALUES (?, ?)`)
@@ -653,12 +659,12 @@ export class LocalProvider {
653
659
  const ftsRanks = new Map(); // id → 1-based rank
654
660
  if (safeQuery) {
655
661
  const filter = buildFilterFragment(input, 'm');
656
- let sql = `
657
- SELECT m.id
658
- FROM memories_fts f
659
- JOIN memories m ON m.id = f.content_id
660
- WHERE memories_fts MATCH ?
661
- AND ${filter.sql}
662
+ let sql = `
663
+ SELECT m.id
664
+ FROM memories_fts f
665
+ JOIN memories m ON m.id = f.content_id
666
+ WHERE memories_fts MATCH ?
667
+ AND ${filter.sql}
662
668
  `;
663
669
  const params = [safeQuery, ...filter.params];
664
670
  sql += ` ORDER BY bm25(memories_fts) LIMIT ?`;
@@ -677,12 +683,12 @@ export class LocalProvider {
677
683
  const qvec = await embed(input.query);
678
684
  if (qvec) {
679
685
  try {
680
- const rows = this.db.prepare(`
681
- SELECT memory_id AS id, distance
682
- FROM memories_vec
683
- WHERE embedding MATCH ?
684
- AND k = ?
685
- ORDER BY distance
686
+ const rows = this.db.prepare(`
687
+ SELECT memory_id AS id, distance
688
+ FROM memories_vec
689
+ WHERE embedding MATCH ?
690
+ AND k = ?
691
+ ORDER BY distance
686
692
  `).all(Buffer.from(qvec.buffer), candidateLimit);
687
693
  let candidates = rows.map(r => r.id);
688
694
  // Namespace filter (after the KNN — sqlite-vec doesn't let us
@@ -727,8 +733,59 @@ export class LocalProvider {
727
733
  const wanted = new Set(input.tags);
728
734
  memories = memories.filter(m => m.tags.some(t => wanted.has(t)));
729
735
  }
736
+ // Recall-event capture moved to the MCP-server tool handler
737
+ // (src/index.ts) so it's provider-agnostic — fires for both local and
738
+ // hosted modes. recordRecallEvent() below is kept for any direct
739
+ // LocalProvider callers (e.g. the benchmark adapter) that still want
740
+ // the inline capture, but it's no longer invoked from search().
730
741
  return memories;
731
742
  }
743
+ /**
744
+ * Logs one row to recall_events for the savings dashboard. Sums every
745
+ * memory in the namespace once to get the baseline 'what would I have
746
+ * had to send' figure (cheap — LENGTH() over content text). Fail-open.
747
+ */
748
+ recordRecallEvent(input, returned) {
749
+ try {
750
+ const tokens_returned = returned.reduce((sum, m) => sum + approximateTokens(m.content), 0);
751
+ const ns = input.namespace ?? null;
752
+ let baseline_chars = 0;
753
+ if (ns) {
754
+ const row = this.db
755
+ .prepare(`SELECT COALESCE(SUM(LENGTH(content)), 0) AS chars
756
+ FROM memories
757
+ WHERE namespace = ?`)
758
+ .get(ns);
759
+ baseline_chars = row?.chars ?? 0;
760
+ }
761
+ else {
762
+ const row = this.db
763
+ .prepare(`SELECT COALESCE(SUM(LENGTH(content)), 0) AS chars FROM memories`)
764
+ .get();
765
+ baseline_chars = row?.chars ?? 0;
766
+ }
767
+ const tokens_baseline_namespace = Math.ceil(baseline_chars / 4);
768
+ const ev = buildRecallEvent({
769
+ namespace: ns,
770
+ query: input.query,
771
+ tokens_returned,
772
+ tokens_baseline_namespace,
773
+ model_id: input.model_id ?? null,
774
+ client: input.client ?? null,
775
+ });
776
+ this.db
777
+ .prepare(`INSERT INTO recall_events
778
+ (id, created_at, namespace, query_hash, tokens_returned,
779
+ tokens_baseline_namespace, tokens_baseline_capped, model_id,
780
+ context_limit, client)
781
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`)
782
+ .run(ev.id, ev.created_at, ev.namespace, ev.query_hash, ev.tokens_returned, ev.tokens_baseline_namespace, ev.tokens_baseline_capped, ev.model_id, ev.context_limit, ev.client);
783
+ }
784
+ catch (e) {
785
+ // Fail-open by design — never let recall observability break recall.
786
+ console.warn('[mnueron/savings] recall-event capture failed:', e instanceof Error ? e.message : e);
787
+ }
788
+ }
732
789
  async list(input) {
733
790
  // v0.2.1 + v0.2.4: full filter support via shared helper.
734
791
  // Note: 'm' alias is omitted here because list() doesn't join other
@@ -822,12 +879,12 @@ export class LocalProvider {
822
879
  const nextTags = patch.tags
823
880
  ?? JSON.parse((existing.tags_json ?? existing.tags) ?? '[]');
824
881
  const now = Date.now();
825
- this.db.prepare(`UPDATE memories
826
- SET content = ?,
827
- namespace = ?,
828
- tags_json = ?,
829
- meta_json = ?,
830
- updated_at = ?
882
+ this.db.prepare(`UPDATE memories
883
+ SET content = ?,
884
+ namespace = ?,
885
+ tags_json = ?,
886
+ meta_json = ?,
887
+ updated_at = ?
831
888
  WHERE id = ?`).run(nextContent, nextNs, JSON.stringify(nextTags), JSON.stringify(merged), now, id);
832
889
  // If content changed, re-index FTS + (optionally) re-embed.
833
890
  if (contentChanged) {
@@ -865,13 +922,13 @@ export class LocalProvider {
865
922
  return tx();
866
923
  }
867
924
  async namespaces() {
868
- const rows = this.db.prepare(`
869
- SELECT namespace AS name,
870
- COUNT(*) AS count,
871
- MAX(updated_at) AS last_updated
872
- FROM memories
873
- GROUP BY namespace
874
- ORDER BY last_updated DESC
925
+ const rows = this.db.prepare(`
926
+ SELECT namespace AS name,
927
+ COUNT(*) AS count,
928
+ MAX(updated_at) AS last_updated
929
+ FROM memories
930
+ GROUP BY namespace
931
+ ORDER BY last_updated DESC
875
932
  `).all();
876
933
  return rows.map(r => ({
877
934
  name: r.name,
@@ -895,12 +952,12 @@ export class LocalProvider {
895
952
  }
896
953
  if (input.q && input.q.trim()) {
897
954
  // Match display_name OR any alias (case-insensitive substring).
898
- parts.push(`(
899
- lower(display_name) LIKE lower('%' || ? || '%')
900
- OR EXISTS (
901
- SELECT 1 FROM json_each(aliases_json) AS a
902
- WHERE lower(a.value) LIKE lower('%' || ? || '%')
903
- )
955
+ parts.push(`(
956
+ lower(display_name) LIKE lower('%' || ? || '%')
957
+ OR EXISTS (
958
+ SELECT 1 FROM json_each(aliases_json) AS a
959
+ WHERE lower(a.value) LIKE lower('%' || ? || '%')
960
+ )
904
961
  )`);
905
962
  params.push(input.q.trim(), input.q.trim());
906
963
  }
@@ -932,13 +989,13 @@ export class LocalProvider {
932
989
  async getEntityMemories(id, limit = 100) {
933
990
  const cap = clampLimit(limit, 500);
934
991
  const rows = this.db
935
- .prepare(`SELECT m.id, m.namespace, m.content, m.tags_json, m.source, m.source_ref,
936
- m.meta_json, m.created_at, m.updated_at,
937
- me.surface_form, me.confidence
938
- FROM memory_entities me
939
- JOIN memories m ON m.id = me.memory_id
940
- WHERE me.entity_id = ?
941
- ORDER BY m.created_at DESC
992
+ .prepare(`SELECT m.id, m.namespace, m.content, m.tags_json, m.source, m.source_ref,
993
+ m.meta_json, m.created_at, m.updated_at,
994
+ me.surface_form, me.confidence
995
+ FROM memory_entities me
996
+ JOIN memories m ON m.id = me.memory_id
997
+ WHERE me.entity_id = ?
998
+ ORDER BY m.created_at DESC
942
999
  LIMIT ?`)
943
1000
  .all(id, cap);
944
1001
  return rows.map((r) => ({
@@ -983,18 +1040,18 @@ export class LocalProvider {
983
1040
  // Repoint edges. INSERT-OR-IGNORE then DELETE-old, with confidence MAX
984
1041
  // fold to preserve the strongest edge if both winner and loser shared
985
1042
  // a memory.
986
- this.db.prepare(`INSERT INTO memory_entities (memory_id, entity_id, surface_form, confidence)
987
- SELECT memory_id, ?, surface_form, confidence
988
- FROM memory_entities WHERE entity_id = ?
989
- ON CONFLICT(memory_id, entity_id) DO UPDATE SET
1043
+ this.db.prepare(`INSERT INTO memory_entities (memory_id, entity_id, surface_form, confidence)
1044
+ SELECT memory_id, ?, surface_form, confidence
1045
+ FROM memory_entities WHERE entity_id = ?
1046
+ ON CONFLICT(memory_id, entity_id) DO UPDATE SET
990
1047
  confidence = MAX(memory_entities.confidence, excluded.confidence)`).run(winnerId, loserId);
991
1048
  this.db.prepare(`DELETE FROM memory_entities WHERE entity_id = ?`).run(loserId);
992
1049
  // Update winner aggregate.
993
- this.db.prepare(`UPDATE entities SET
994
- aliases_json = ?,
995
- mention_count = mention_count + ?,
996
- first_seen_at = MIN(first_seen_at, ?),
997
- last_seen_at = MAX(last_seen_at, ?)
1050
+ this.db.prepare(`UPDATE entities SET
1051
+ aliases_json = ?,
1052
+ mention_count = mention_count + ?,
1053
+ first_seen_at = MIN(first_seen_at, ?),
1054
+ last_seen_at = MAX(last_seen_at, ?)
998
1055
  WHERE id = ?`).run(JSON.stringify(mergedAliases), loser.mention_count, loser.first_seen_at, loser.last_seen_at, winnerId);
999
1056
  // Delete loser everywhere.
1000
1057
  if (this.vecAvailable) {
@@ -1056,11 +1113,11 @@ export class LocalProvider {
1056
1113
  }
1057
1114
  const limit = clampLimit(input.limit ?? 200, 1000);
1058
1115
  const rows = this.db
1059
- .prepare(`SELECT id, from_entity_id, to_entity_id, predicate, memory_id,
1060
- confidence, valid_from, valid_to, recorded_at
1061
- FROM relations
1062
- WHERE ${parts.join(' AND ')}
1063
- ORDER BY recorded_at DESC
1116
+ .prepare(`SELECT id, from_entity_id, to_entity_id, predicate, memory_id,
1117
+ confidence, valid_from, valid_to, recorded_at
1118
+ FROM relations
1119
+ WHERE ${parts.join(' AND ')}
1120
+ ORDER BY recorded_at DESC
1064
1121
  LIMIT ?`)
1065
1122
  .all(...params, limit);
1066
1123
  return rows;
@@ -1168,10 +1225,10 @@ export class LocalProvider {
1168
1225
  countMissingEmbeddings() {
1169
1226
  if (!this.vecAvailable)
1170
1227
  return 0;
1171
- const r = this.db.prepare(`
1172
- SELECT COUNT(*) AS c
1173
- FROM memories
1174
- WHERE id NOT IN (SELECT memory_id FROM memories_vec)
1228
+ const r = this.db.prepare(`
1229
+ SELECT COUNT(*) AS c
1230
+ FROM memories
1231
+ WHERE id NOT IN (SELECT memory_id FROM memories_vec)
1175
1232
  `).get();
1176
1233
  return r?.c ?? 0;
1177
1234
  }
@@ -1183,18 +1240,18 @@ export class LocalProvider {
1183
1240
  async rebuildEmbeddings(onProgress) {
1184
1241
  if (!this.vecAvailable)
1185
1242
  return { updated: 0, skipped: 0, errors: 0 };
1186
- const rows = this.db.prepare(`
1187
- SELECT id, content
1188
- FROM memories
1189
- WHERE id NOT IN (SELECT memory_id FROM memories_vec)
1190
- ORDER BY created_at ASC
1243
+ const rows = this.db.prepare(`
1244
+ SELECT id, content
1245
+ FROM memories
1246
+ WHERE id NOT IN (SELECT memory_id FROM memories_vec)
1247
+ ORDER BY created_at ASC
1191
1248
  `).all();
1192
1249
  const total = rows.length;
1193
1250
  let updated = 0, skipped = 0, errors = 0;
1194
1251
  // Embed in batches of 16 for throughput without spiking memory.
1195
1252
  const BATCH = 16;
1196
- const insertVec = this.db.prepare(`
1197
- INSERT OR REPLACE INTO memories_vec (memory_id, embedding) VALUES (?, ?)
1253
+ const insertVec = this.db.prepare(`
1254
+ INSERT OR REPLACE INTO memories_vec (memory_id, embedding) VALUES (?, ?)
1198
1255
  `);
1199
1256
  for (let i = 0; i < rows.length; i += BATCH) {
1200
1257
  const chunk = rows.slice(i, i + BATCH);
@@ -1258,20 +1315,20 @@ export class LocalProvider {
1258
1315
  }
1259
1316
  // Now fetch every memory whose metadata.parent_ref equals ref.
1260
1317
  // JSON field path syntax: json_extract(meta_json, '$.parent_ref')
1261
- const rows = this.db.prepare(`
1262
- SELECT *
1263
- FROM memories
1264
- WHERE json_extract(meta_json, '$.parent_ref') = ?
1265
- ORDER BY COALESCE(json_extract(meta_json, '$.chunk_index'), 0) ASC, created_at ASC
1318
+ const rows = this.db.prepare(`
1319
+ SELECT *
1320
+ FROM memories
1321
+ WHERE json_extract(meta_json, '$.parent_ref') = ?
1322
+ ORDER BY COALESCE(json_extract(meta_json, '$.chunk_index'), 0) ASC, created_at ASC
1266
1323
  `).all(ref);
1267
1324
  // Also try a fallback against source_ref for memories chunked via
1268
1325
  // source_ref-as-parent_ref (this is the common case for backfills).
1269
1326
  if (rows.length === 0) {
1270
- const alt = this.db.prepare(`
1271
- SELECT *
1272
- FROM memories
1273
- WHERE source_ref = ?
1274
- ORDER BY COALESCE(json_extract(meta_json, '$.chunk_index'), 0) ASC, created_at ASC
1327
+ const alt = this.db.prepare(`
1328
+ SELECT *
1329
+ FROM memories
1330
+ WHERE source_ref = ?
1331
+ ORDER BY COALESCE(json_extract(meta_json, '$.chunk_index'), 0) ASC, created_at ASC
1275
1332
  `).all(ref);
1276
1333
  return alt.map(r => this.rowToMemory(r));
1277
1334
  }
@@ -1292,37 +1349,37 @@ export class LocalProvider {
1292
1349
  const offset = opts.offset ?? 0;
1293
1350
  // We use COALESCE(parent_ref-from-metadata, id) as the bucket key so
1294
1351
  // standalone (non-chunked) memories show up as single-row threads too.
1295
- const sql = `
1296
- WITH grouped AS (
1297
- SELECT
1298
- COALESCE(json_extract(meta_json, '$.parent_ref'), id) AS pref,
1299
- namespace,
1300
- COUNT(*) AS cnt,
1301
- MIN(created_at) AS first_at,
1302
- MAX(updated_at) AS last_at,
1303
- SUM(CASE WHEN json_extract(meta_json, '$.chunk_index') IS NOT NULL THEN 1 ELSE 0 END) AS chunked_n
1304
- FROM memories
1305
- ${opts.namespace ? 'WHERE namespace = ?' : ''}
1306
- GROUP BY pref, namespace
1307
- )
1308
- SELECT
1309
- g.pref AS parent_ref,
1310
- g.namespace,
1311
- g.cnt AS count,
1312
- g.first_at,
1313
- g.last_at,
1314
- g.chunked_n > 0 AS has_chunks,
1315
- (
1316
- SELECT m.content
1317
- FROM memories m
1318
- WHERE COALESCE(json_extract(m.meta_json, '$.parent_ref'), m.id) = g.pref
1319
- AND m.namespace = g.namespace
1320
- ORDER BY COALESCE(json_extract(m.meta_json, '$.chunk_index'), 0) ASC, m.created_at ASC
1321
- LIMIT 1
1322
- ) AS title_source
1323
- FROM grouped g
1324
- ORDER BY g.last_at DESC
1325
- LIMIT ? OFFSET ?
1352
+ const sql = `
1353
+ WITH grouped AS (
1354
+ SELECT
1355
+ COALESCE(json_extract(meta_json, '$.parent_ref'), id) AS pref,
1356
+ namespace,
1357
+ COUNT(*) AS cnt,
1358
+ MIN(created_at) AS first_at,
1359
+ MAX(updated_at) AS last_at,
1360
+ SUM(CASE WHEN json_extract(meta_json, '$.chunk_index') IS NOT NULL THEN 1 ELSE 0 END) AS chunked_n
1361
+ FROM memories
1362
+ ${opts.namespace ? 'WHERE namespace = ?' : ''}
1363
+ GROUP BY pref, namespace
1364
+ )
1365
+ SELECT
1366
+ g.pref AS parent_ref,
1367
+ g.namespace,
1368
+ g.cnt AS count,
1369
+ g.first_at,
1370
+ g.last_at,
1371
+ g.chunked_n > 0 AS has_chunks,
1372
+ (
1373
+ SELECT m.content
1374
+ FROM memories m
1375
+ WHERE COALESCE(json_extract(m.meta_json, '$.parent_ref'), m.id) = g.pref
1376
+ AND m.namespace = g.namespace
1377
+ ORDER BY COALESCE(json_extract(m.meta_json, '$.chunk_index'), 0) ASC, m.created_at ASC
1378
+ LIMIT 1
1379
+ ) AS title_source
1380
+ FROM grouped g
1381
+ ORDER BY g.last_at DESC
1382
+ LIMIT ? OFFSET ?
1326
1383
  `;
1327
1384
  const params = opts.namespace ? [opts.namespace, limit, offset] : [limit, offset];
1328
1385
  const rows = this.db.prepare(sql).all(...params);
@@ -1341,15 +1398,15 @@ export class LocalProvider {
1341
1398
  * predate chunking. Used by `mnueron rechunk` to backfill the new shape.
1342
1399
  */
1343
1400
  findOversizedMemories(threshold = DEFAULT_CHUNK_THRESHOLD) {
1344
- return this.db.prepare(`
1345
- SELECT id, content, namespace, tags_json, source, source_ref, meta_json, created_at
1346
- FROM memories
1347
- WHERE LENGTH(content) > ?
1348
- AND (
1349
- meta_json IS NULL
1350
- OR json_extract(meta_json, '$.chunk_index') IS NULL
1351
- )
1352
- ORDER BY LENGTH(content) DESC
1401
+ return this.db.prepare(`
1402
+ SELECT id, content, namespace, tags_json, source, source_ref, meta_json, created_at
1403
+ FROM memories
1404
+ WHERE LENGTH(content) > ?
1405
+ AND (
1406
+ meta_json IS NULL
1407
+ OR json_extract(meta_json, '$.chunk_index') IS NULL
1408
+ )
1409
+ ORDER BY LENGTH(content) DESC
1353
1410
  `).all(threshold);
1354
1411
  }
1355
1412
  rowToMemory(row, score) {