mnueron 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,88 @@ import { dirname } from 'node:path';
5
5
  import * as sqliteVec from 'sqlite-vec';
6
6
  import { embed, embedBatch, EMBEDDING_DIM, preload } from './embeddings.js';
7
7
  import { chunkContent, shouldChunk, DEFAULT_CHUNK_THRESHOLD } from './chunking.js';
8
+ import { extractEntities, shouldExtractEntities } from './entity-extractor.js';
9
+ import { resolveEntitiesForMemory } from './entity-resolver.js';
10
+ import { extractRelations, shouldExtractRelations } from './relation-extractor.js';
11
+ import { ensureConsolidationSchema, detectDuplicates, listProposals, reviewProposal, } from './consolidator.js';
12
+ import { ensureProceduralSchema, saveProcedural, getProceduralByName, listProcedural, recallProcedural, deleteProcedural, } from './procedural.js';
8
13
  import { redact } from './redactor.js';
14
+ /**
15
+ * Build a SQL fragment + params for the shared filter shape used by
16
+ * search() and list(). Returns clauses joined by AND (always at least
17
+ * `1=1` so callers can append `${...}` after `WHERE`).
18
+ *
19
+ * The `m.` prefix is hard-coded — callers must alias their memories table
20
+ * as `m` for these clauses to bind. (The whole local store uses one table
21
+ * named `memories`, but the search path joins, so consistent aliasing is
22
+ * what keeps this reusable.)
23
+ */
24
+ function buildFilterFragment(f, alias = 'm') {
25
+ const parts = ['1=1'];
26
+ const params = [];
27
+ const a = alias ? `${alias}.` : '';
28
+ if (f.namespace) {
29
+ parts.push(`${a}namespace = ?`);
30
+ params.push(f.namespace);
31
+ }
32
+ if (f.created_after != null) {
33
+ parts.push(`${a}created_at >= ?`);
34
+ params.push(f.created_after);
35
+ }
36
+ if (f.created_before != null) {
37
+ parts.push(`${a}created_at <= ?`);
38
+ params.push(f.created_before);
39
+ }
40
+ if (f.updated_after != null) {
41
+ parts.push(`${a}updated_at >= ?`);
42
+ params.push(f.updated_after);
43
+ }
44
+ if (f.updated_before != null) {
45
+ parts.push(`${a}updated_at <= ?`);
46
+ params.push(f.updated_before);
47
+ }
48
+ // metadata_filter: SQLite has no native @> operator, but we can match
49
+ // every top-level k=v pair via json_extract. We only support strings,
50
+ // numbers, and booleans on the RHS — nested objects are not supported
51
+ // in this minimal port. Matches what most callers actually use.
52
+ if (f.metadata_filter && typeof f.metadata_filter === 'object') {
53
+ for (const [k, v] of Object.entries(f.metadata_filter)) {
54
+ if (typeof v === 'string' || typeof v === 'number' || typeof v === 'boolean') {
55
+ parts.push(`json_extract(${a}metadata, '$.' || ?) = ?`);
56
+ params.push(k, v);
57
+ }
58
+ }
59
+ }
60
+ return { sql: parts.join(' AND '), params };
61
+ }
62
+ /** Clamp a caller-supplied LIMIT to a sensible max — prevents accidental
63
+ * `LIMIT 999999` exhausting memory on large stores. */
64
+ function clampLimit(want, max) {
65
+ if (!Number.isFinite(want) || want <= 0)
66
+ return Math.min(100, max);
67
+ return Math.min(Math.floor(want), max);
68
+ }
69
+ /** Materialize an `entities` row into the public Entity shape. Parses
70
+ * aliases_json defensively — older rows or hand-edited data can have
71
+ * malformed JSON and we'd rather return an empty alias list than throw. */
72
+ function rowToEntity(row) {
73
+ let aliases = [];
74
+ try {
75
+ const parsed = JSON.parse(row.aliases_json);
76
+ if (Array.isArray(parsed))
77
+ aliases = parsed.filter((x) => typeof x === 'string');
78
+ }
79
+ catch { /* leave empty */ }
80
+ return {
81
+ id: row.id,
82
+ display_name: row.display_name,
83
+ entity_type: row.entity_type,
84
+ aliases,
85
+ mention_count: row.mention_count,
86
+ first_seen_at: row.first_seen_at,
87
+ last_seen_at: row.last_seen_at,
88
+ };
89
+ }
9
90
  /**
10
91
  * Run pre-save transforms in fixed order:
11
92
  * 1. Redact secrets — never store API keys / JWTs / etc.
@@ -43,19 +124,36 @@ const FTS_STOP_WORDS = new Set([
43
124
  ]);
44
125
  /**
45
126
  * Translate a natural-language query into an FTS5 MATCH expression.
46
- * - strips FTS5 control characters
127
+ * - strips FTS5 control characters AND user-facing punctuation that
128
+ * trips FTS5's parser (., /, ', etc.). FTS5's grammar treats `.` and
129
+ * apostrophes as separators between identifiers — `"redeploy.sh"` is
130
+ * parsed as "redeploy" "." "sh" and chokes. We replace all of these
131
+ * with spaces BEFORE tokenizing so the resulting tokens are pure
132
+ * alphanumeric-plus-underscore.
47
133
  * - lowercases
48
134
  * - drops stop words and 1-character tokens
49
135
  * - prefix-matches each surviving token (`token*`) so "stores" matches "stored"
50
136
  * - ORs the tokens — any one is enough, BM25 ranks multi-hit rows higher
137
+ *
138
+ * Regex characters we strip:
139
+ * " ( ) * : ^ ~ — FTS5 grammar
140
+ * . , ; / \ ' ` — punctuation that breaks FTS5 token boundaries
141
+ * ! ? & | = + - # @ $ — user-typed but unsafe in MATCH
142
+ * This is permissive: any non-[a-z0-9_] char is replaced with a space
143
+ * inside `buildFtsQuery`, so we don't have to enumerate every case.
51
144
  */
52
145
  function buildFtsQuery(raw) {
53
- const cleaned = raw.replace(/["()*:^~]/g, ' ').toLowerCase().trim();
146
+ // First: collapse anything that isn't a word-char into a space. This is
147
+ // safer than maintaining a denylist — FTS5 only consumes word tokens
148
+ // anyway, so we lose nothing by pre-flattening punctuation.
149
+ const cleaned = raw
150
+ .toLowerCase()
151
+ .replace(/[^a-z0-9_]+/g, ' ')
152
+ .trim();
54
153
  if (!cleaned)
55
154
  return '';
56
155
  const tokens = cleaned
57
156
  .split(/\s+/)
58
- .map(t => t.replace(/^[^a-z0-9_]+|[^a-z0-9_]+$/g, ''))
59
157
  .filter(t => t.length >= 2 && !FTS_STOP_WORDS.has(t));
60
158
  if (tokens.length === 0)
61
159
  return '';
@@ -158,6 +256,83 @@ export class LocalProvider {
158
256
  );
159
257
  `);
160
258
  }
259
+ // ── P2.3 — Entity resolution tables ──────────────────────────────────
260
+ // Canonical entities: one row per unique entity (person/org/project/...)
261
+ // resolved across all memories. `mention_count` and `last_seen_at` make
262
+ // it trivial to show "who/what is most active in your store right now".
263
+ //
264
+ // memory_entities: many-to-many join — one row per (memory, canonical
265
+ // entity) pair. `surface_form` is what the memory text actually said
266
+ // (e.g., "Johnny" → resolved to canonical "John Doe"); `confidence`
267
+ // ranges in [0, 1] from exact match (1.0) down through embedding
268
+ // similarity and LLM tiebreak picks (0.65-0.85).
269
+ this.db.exec(`
270
+ CREATE TABLE IF NOT EXISTS entities (
271
+ id TEXT PRIMARY KEY,
272
+ display_name TEXT NOT NULL,
273
+ entity_type TEXT NOT NULL,
274
+ aliases_json TEXT NOT NULL DEFAULT '[]',
275
+ mention_count INTEGER NOT NULL DEFAULT 0,
276
+ first_seen_at INTEGER NOT NULL,
277
+ last_seen_at INTEGER NOT NULL
278
+ );
279
+ CREATE INDEX IF NOT EXISTS idx_entities_type
280
+ ON entities(entity_type);
281
+ CREATE INDEX IF NOT EXISTS idx_entities_last_seen
282
+ ON entities(last_seen_at DESC);
283
+
284
+ CREATE TABLE IF NOT EXISTS memory_entities (
285
+ memory_id TEXT NOT NULL,
286
+ entity_id TEXT NOT NULL,
287
+ surface_form TEXT NOT NULL,
288
+ confidence REAL NOT NULL,
289
+ PRIMARY KEY (memory_id, entity_id)
290
+ );
291
+ CREATE INDEX IF NOT EXISTS idx_memory_entities_entity
292
+ ON memory_entities(entity_id);
293
+
294
+ -- P3 — Knowledge-graph edges. Each row is a triple (from, predicate,
295
+ -- to) plus provenance (memory_id) + confidence. P4 forward-looking
296
+ -- columns (valid_from / valid_to) are added now so bi-temporal
297
+ -- queries don't require a schema migration later.
298
+ CREATE TABLE IF NOT EXISTS relations (
299
+ id TEXT PRIMARY KEY,
300
+ from_entity_id TEXT NOT NULL,
301
+ to_entity_id TEXT NOT NULL,
302
+ predicate TEXT NOT NULL,
303
+ memory_id TEXT NOT NULL,
304
+ confidence REAL NOT NULL,
305
+ valid_from INTEGER,
306
+ valid_to INTEGER,
307
+ recorded_at INTEGER NOT NULL
308
+ );
309
+ CREATE INDEX IF NOT EXISTS idx_relations_from
310
+ ON relations(from_entity_id);
311
+ CREATE INDEX IF NOT EXISTS idx_relations_to
312
+ ON relations(to_entity_id);
313
+ CREATE INDEX IF NOT EXISTS idx_relations_predicate
314
+ ON relations(predicate);
315
+ CREATE INDEX IF NOT EXISTS idx_relations_memory
316
+ ON relations(memory_id);
317
+ CREATE INDEX IF NOT EXISTS idx_relations_valid_to
318
+ ON relations(valid_to);
319
+ `);
320
+ if (this.vecAvailable) {
321
+ // Embedding index for entity name+context strings. Used by the
322
+ // resolver's vector-similarity stage when finding candidate matches
323
+ // for a freshly extracted entity.
324
+ this.db.exec(`
325
+ CREATE VIRTUAL TABLE IF NOT EXISTS entities_vec
326
+ USING vec0(
327
+ entity_id TEXT PRIMARY KEY,
328
+ embedding float[${EMBEDDING_DIM}]
329
+ );
330
+ `);
331
+ }
332
+ // P5 — Consolidation proposal table (idempotent).
333
+ ensureConsolidationSchema(this.db);
334
+ // Procedural memory table (idempotent). Mem0 leapfrog feature.
335
+ ensureProceduralSchema(this.db);
161
336
  }
162
337
  // ─── write path ──────────────────────────────────────────────────────────
163
338
  async save(input) {
@@ -165,7 +340,39 @@ export class LocalProvider {
165
340
  // boundaries can't slip through. Single source of truth for what
166
341
  // hits SQLite.
167
342
  const transformed = preSaveTransform(input);
168
- // 2. Long content gets auto-chunked into multiple memories. Each chunk
343
+ // 2. P1 entity extraction. SECURITY-CRITICAL: capture and strip BYOK
344
+ // keys from metadata BEFORE the gate check, mirroring the hosted
345
+ // backend's ordering. Short-content saves with BYOK keys still get
346
+ // keys scrubbed even when extraction is skipped.
347
+ const meta = transformed.metadata ?? {};
348
+ const byokAnthropic = typeof meta.byok_anthropic_key === 'string'
349
+ ? meta.byok_anthropic_key : undefined;
350
+ const byokOpenAI = typeof meta.byok_openai_key === 'string'
351
+ ? meta.byok_openai_key : undefined;
352
+ if (byokAnthropic)
353
+ delete meta.byok_anthropic_key;
354
+ if (byokOpenAI)
355
+ delete meta.byok_openai_key;
356
+ transformed.metadata = meta;
357
+ if (shouldExtractEntities(transformed.content.length, transformed.metadata)) {
358
+ // Explicit opt-in (metadata.extract_entities or BYOK) bypasses the
359
+ // 200-char min-length floor. Otherwise (env-var default path) the
360
+ // floor still applies as a guardrail against burning money on
361
+ // one-liner autosaves.
362
+ const meta = transformed.metadata;
363
+ const explicit = meta?.extract_entities === true ||
364
+ (typeof byokAnthropic === 'string' && byokAnthropic.length > 0) ||
365
+ (typeof byokOpenAI === 'string' && byokOpenAI.length > 0);
366
+ const entities = await extractEntities(transformed.content, {
367
+ anthropicKey: byokAnthropic,
368
+ openaiKey: byokOpenAI,
369
+ ...(explicit ? { minChars: 1 } : {}),
370
+ });
371
+ if (entities.length > 0) {
372
+ transformed.metadata = { ...(transformed.metadata ?? {}), entities };
373
+ }
374
+ }
375
+ // 3. Long content gets auto-chunked into multiple memories. Each chunk
169
376
  // becomes a searchable atomic memory; the original conversation is
170
377
  // linkable via `parent_ref` (= source_ref + chunk_index in metadata).
171
378
  if (shouldChunk(transformed.content)) {
@@ -199,12 +406,84 @@ export class LocalProvider {
199
406
  }
200
407
  });
201
408
  tx();
409
+ // ── P2.3 — Entity resolution ────────────────────────────────────────
410
+ // If P1 extraction stamped `metadata.entities`, resolve each one to a
411
+ // canonical entity (reuse OR create), insert memory_entities edges,
412
+ // and write the resolved canonical_ids back onto the stored metadata.
413
+ //
414
+ // This runs AFTER the memory row exists so the resolver has a valid
415
+ // memory_id to link to. It also runs OUTSIDE the transaction because
416
+ // embeddings + LLM tiebreak are async; if those fail mid-way, the
417
+ // memory still saved successfully (fail-open contract).
418
+ let finalMetadata = input.metadata;
419
+ const extractedEntities = Array.isArray(input.metadata?.entities)
420
+ ? input.metadata.entities
421
+ : [];
422
+ if (extractedEntities.length > 0) {
423
+ try {
424
+ const meta = input.metadata;
425
+ const byokAnthropic = typeof meta.byok_anthropic_key === 'string'
426
+ ? meta.byok_anthropic_key : undefined;
427
+ const resolutions = await resolveEntitiesForMemory(this.db, id, extractedEntities.map((e) => ({
428
+ name: e.name,
429
+ type: e.type,
430
+ context: e.context,
431
+ })), this.vecAvailable, { anthropicKey: byokAnthropic });
432
+ // Stamp canonical_id back onto each entity in the stored metadata.
433
+ const entitiesWithIds = extractedEntities.map((e, i) => ({
434
+ ...e,
435
+ canonical_id: resolutions[i]?.canonical_id ?? null,
436
+ }));
437
+ finalMetadata = { ...(input.metadata ?? {}), entities: entitiesWithIds };
438
+ this.db.prepare(`UPDATE memories SET meta_json = ?, updated_at = ? WHERE id = ?`)
439
+ .run(JSON.stringify(finalMetadata), now, id);
440
+ // ── P3 — Relationship extraction ────────────────────────────────
441
+ // Once we have resolved canonical entities, ask the LLM what
442
+ // relationships exist between them. This populates `relations`
443
+ // edges that form the knowledge-graph layer. Gated separately
444
+ // from entity extraction so users can have entities-only without
445
+ // paying the second Haiku call.
446
+ const resolvedForRelations = extractedEntities
447
+ .map((e, i) => ({
448
+ canonical_id: resolutions[i]?.canonical_id ?? null,
449
+ name: e.name,
450
+ type: e.type,
451
+ }))
452
+ .filter((e) => !!e.canonical_id);
453
+ if (shouldExtractRelations(input.content.length, resolvedForRelations.length, input.metadata)) {
454
+ try {
455
+ const meta = input.metadata;
456
+ const byokAnthropic = typeof meta.byok_anthropic_key === 'string'
457
+ ? meta.byok_anthropic_key : undefined;
458
+ const relations = await extractRelations(input.content, resolvedForRelations, { anthropicKey: byokAnthropic });
459
+ if (relations.length > 0) {
460
+ const insertRel = this.db.prepare(`INSERT INTO relations
461
+ (id, from_entity_id, to_entity_id, predicate, memory_id,
462
+ confidence, valid_from, valid_to, recorded_at)
463
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`);
464
+ const tx2 = this.db.transaction(() => {
465
+ for (const r of relations) {
466
+ insertRel.run(randomUUID(), r.from_canonical_id, r.to_canonical_id, r.predicate, id, r.confidence, r.valid_from, r.valid_to, now);
467
+ }
468
+ });
469
+ tx2();
470
+ }
471
+ }
472
+ catch (e) {
473
+ console.warn('[mnueron/local] relation extraction failed (memory + entities saved):', e instanceof Error ? e.message : e);
474
+ }
475
+ }
476
+ }
477
+ catch (e) {
478
+ console.warn('[mnueron/local] entity resolution failed (memory saved without canonical_ids):', e instanceof Error ? e.message : e);
479
+ }
480
+ }
202
481
  return this.rowToMemory({
203
482
  id, namespace: ns, content: input.content,
204
483
  tags_json: JSON.stringify(tags),
205
484
  source: input.source ?? 'manual',
206
485
  source_ref: input.source_ref ?? null,
207
- meta_json: input.metadata ? JSON.stringify(input.metadata) : null,
486
+ meta_json: finalMetadata ? JSON.stringify(finalMetadata) : null,
208
487
  created_at: now, updated_at: now,
209
488
  });
210
489
  }
@@ -368,21 +647,19 @@ export class LocalProvider {
368
647
  // ─── read path: hybrid keyword + vector with RRF ─────────────────────────
369
648
  async search(input) {
370
649
  const k = input.k ?? 10;
371
- // FTS5 leg
650
+ // FTS5 leg — now honors all MemoryFilters (date range + metadata filter).
372
651
  const safeQuery = buildFtsQuery(input.query);
373
652
  const ftsRanks = new Map(); // id → 1-based rank
374
653
  if (safeQuery) {
654
+ const filter = buildFilterFragment(input, 'm');
375
655
  let sql = `
376
656
  SELECT m.id
377
657
  FROM memories_fts f
378
658
  JOIN memories m ON m.id = f.content_id
379
659
  WHERE memories_fts MATCH ?
660
+ AND ${filter.sql}
380
661
  `;
381
- const params = [safeQuery];
382
- if (input.namespace) {
383
- sql += ` AND m.namespace = ?`;
384
- params.push(input.namespace);
385
- }
662
+ const params = [safeQuery, ...filter.params];
386
663
  sql += ` ORDER BY bm25(memories_fts) LIMIT 50`;
387
664
  const rows = this.db.prepare(sql).all(...params);
388
665
  rows.forEach((r, i) => ftsRanks.set(r.id, i + 1));
@@ -452,18 +729,23 @@ export class LocalProvider {
452
729
  return memories;
453
730
  }
454
731
  async list(input) {
455
- let sql = `SELECT * FROM memories WHERE 1=1`;
456
- const params = [];
457
- if (input.namespace) {
458
- sql += ` AND namespace = ?`;
459
- params.push(input.namespace);
460
- }
732
+ // v0.2.1 + v0.2.4: full filter support via shared helper.
733
+ // Note: 'm' alias is omitted here because list() doesn't join other
734
+ // tables, so we pass alias = '' to skip the prefix.
735
+ const filter = buildFilterFragment(input, '');
736
+ let sql = `SELECT * FROM memories WHERE ${filter.sql}`;
737
+ const params = [...filter.params];
738
+ // Keep legacy `before` cursor working for older SDK callers.
461
739
  if (input.before) {
462
740
  sql += ` AND created_at < ?`;
463
741
  params.push(input.before);
464
742
  }
465
743
  sql += ` ORDER BY created_at DESC LIMIT ?`;
466
744
  params.push(input.limit ?? 50);
745
+ if (input.offset && input.offset > 0) {
746
+ sql += ` OFFSET ?`;
747
+ params.push(input.offset);
748
+ }
467
749
  const rows = this.db.prepare(sql).all(...params);
468
750
  let memories = rows.map(r => this.rowToMemory(r));
469
751
  if (input.tags && input.tags.length > 0) {
@@ -472,6 +754,100 @@ export class LocalProvider {
472
754
  }
473
755
  return memories;
474
756
  }
757
+ /**
758
+ * v0.2.3 — bulk search: same scope, multiple queries, one call.
759
+ * SQLite is single-threaded; the only saving here is the function-call
760
+ * overhead. The hosted version's savings are larger (one HTTP RTT). For
761
+ * API parity, exposed under the same Provider method either way.
762
+ */
763
+ async bulkSearch(input) {
764
+ const k = input.k ?? 5;
765
+ const out = [];
766
+ for (const q of input.queries) {
767
+ const hits = await this.search({
768
+ query: q, k,
769
+ namespace: input.namespace,
770
+ tags: input.tags,
771
+ created_after: input.created_after,
772
+ created_before: input.created_before,
773
+ updated_after: input.updated_after,
774
+ updated_before: input.updated_before,
775
+ metadata_filter: input.metadata_filter,
776
+ });
777
+ out.push({ query: q, hits });
778
+ }
779
+ return out;
780
+ }
781
+ /**
782
+ * v0.2.2 — partial update. Re-runs redaction + chunking-aware embed for
783
+ * the content path. Logs change to metadata.history so the same audit
784
+ * trail works against local and hosted.
785
+ *
786
+ * Returns the updated Memory or null if id wasn't found.
787
+ */
788
+ async update(id, patch) {
789
+ const existing = this.db.prepare(`SELECT * FROM memories WHERE id = ?`).get(id);
790
+ if (!existing)
791
+ return null;
792
+ // Build merged metadata + history entry. Note: the column is `meta_json`,
793
+ // not `metadata` (same schema mismatch that broke updates before P2.3
794
+ // backfill ran).
795
+ const metaCol = existing.meta_json ?? existing.metadata;
796
+ const priorMeta = typeof metaCol === 'string'
797
+ ? JSON.parse(metaCol || '{}')
798
+ : (metaCol ?? {});
799
+ const merged = { ...priorMeta };
800
+ if (patch.metadata && typeof patch.metadata === 'object') {
801
+ for (const [k, v] of Object.entries(patch.metadata)) {
802
+ if (v === null)
803
+ delete merged[k];
804
+ else
805
+ merged[k] = v;
806
+ }
807
+ }
808
+ const nextContent = patch.content != null ? redact(patch.content).content : existing.content;
809
+ const contentChanged = nextContent !== existing.content;
810
+ if (contentChanged) {
811
+ const history = Array.isArray(merged.history)
812
+ ? merged.history.slice(0)
813
+ : [];
814
+ history.push({
815
+ at: Date.now(),
816
+ prev_content_len: typeof existing.content === 'string' ? existing.content.length : 0,
817
+ });
818
+ merged.history = history;
819
+ }
820
+ const nextNs = patch.namespace ?? existing.namespace;
821
+ const nextTags = patch.tags
822
+ ?? JSON.parse((existing.tags_json ?? existing.tags) ?? '[]');
823
+ const now = Date.now();
824
+ this.db.prepare(`UPDATE memories
825
+ SET content = ?,
826
+ namespace = ?,
827
+ tags_json = ?,
828
+ meta_json = ?,
829
+ updated_at = ?
830
+ WHERE id = ?`).run(nextContent, nextNs, JSON.stringify(nextTags), JSON.stringify(merged), now, id);
831
+ // If content changed, re-index FTS + (optionally) re-embed.
832
+ if (contentChanged) {
833
+ this.db.prepare(`DELETE FROM memories_fts WHERE content_id = ?`).run(id);
834
+ this.db.prepare(`INSERT INTO memories_fts (content_id, content) VALUES (?, ?)`).run(id, nextContent);
835
+ if (this.vecAvailable) {
836
+ try {
837
+ const v = await embed(nextContent);
838
+ if (v) {
839
+ this.db.prepare(`DELETE FROM memories_vec WHERE memory_id = ?`).run(id);
840
+ this.db.prepare(`INSERT INTO memories_vec (memory_id, embedding) VALUES (?, ?)`).run(id, Buffer.from(v.buffer));
841
+ }
842
+ }
843
+ catch (e) {
844
+ process.stderr.write(`[mnueron] re-embed on update failed: ${e.message}\n`);
845
+ }
846
+ }
847
+ }
848
+ const fresh = this.db.prepare(`SELECT * FROM memories WHERE id = ?`).get(id);
849
+ return fresh ? this.rowToMemory(fresh) : null;
850
+ }
475
851
  async get(id) {
476
852
  const row = this.db.prepare(`SELECT * FROM memories WHERE id = ?`).get(id);
477
853
  return row ? this.rowToMemory(row) : null;
@@ -502,6 +878,279 @@ export class LocalProvider {
502
878
  last_updated: r.last_updated ?? 0,
503
879
  }));
504
880
  }
881
+ // ─── P2.3 — Entity API ──────────────────────────────────────────────────
882
+ /**
883
+ * List canonical entities with optional type filter, free-text query
884
+ * against display_name + aliases, and sort. Default sort: most-recently-seen.
885
+ */
886
+ async listEntities(input = {}) {
887
+ const limit = clampLimit(input.limit ?? 100, 500);
888
+ const offset = Math.max(0, input.offset ?? 0);
889
+ const parts = ['1=1'];
890
+ const params = [];
891
+ if (input.type) {
892
+ parts.push('entity_type = ?');
893
+ params.push(input.type);
894
+ }
895
+ if (input.q && input.q.trim()) {
896
+ // Match display_name OR any alias (case-insensitive substring).
897
+ parts.push(`(
898
+ lower(display_name) LIKE lower('%' || ? || '%')
899
+ OR EXISTS (
900
+ SELECT 1 FROM json_each(aliases_json) AS a
901
+ WHERE lower(a.value) LIKE lower('%' || ? || '%')
902
+ )
903
+ )`);
904
+ params.push(input.q.trim(), input.q.trim());
905
+ }
906
+ const orderBy = (() => {
907
+ switch (input.sort) {
908
+ case 'mentions': return 'mention_count DESC, last_seen_at DESC';
909
+ case 'alpha': return 'lower(display_name) ASC';
910
+ default: return 'last_seen_at DESC'; // 'recent'
911
+ }
912
+ })();
913
+ const rows = this.db
914
+ .prepare(`SELECT * FROM entities WHERE ${parts.join(' AND ')} ORDER BY ${orderBy} LIMIT ? OFFSET ?`)
915
+ .all(...params, limit, offset);
916
+ return rows.map(rowToEntity);
917
+ }
918
+ /** Single canonical entity by id, or null if not found. */
919
+ async getEntity(id) {
920
+ const row = this.db
921
+ .prepare(`SELECT * FROM entities WHERE id = ? LIMIT 1`)
922
+ .get(id);
923
+ return row ? rowToEntity(row) : null;
924
+ }
925
+ /**
926
+ * All memories linked to a canonical entity, most recent first. Includes
927
+ * the original surface_form so callers can render "John (mentioned as
928
+ * 'Johnny')". Caps at `limit` (default 100, max 500) — entity histories
929
+ * can get long.
930
+ */
931
+ async getEntityMemories(id, limit = 100) {
932
+ const cap = clampLimit(limit, 500);
933
+ const rows = this.db
934
+ .prepare(`SELECT m.id, m.namespace, m.content, m.tags_json, m.source, m.source_ref,
935
+ m.meta_json, m.created_at, m.updated_at,
936
+ me.surface_form, me.confidence
937
+ FROM memory_entities me
938
+ JOIN memories m ON m.id = me.memory_id
939
+ WHERE me.entity_id = ?
940
+ ORDER BY m.created_at DESC
941
+ LIMIT ?`)
942
+ .all(id, cap);
943
+ return rows.map((r) => ({
944
+ ...this.rowToMemory(r),
945
+ surface_form: r.surface_form,
946
+ confidence: r.confidence,
947
+ }));
948
+ }
949
+ /**
950
+ * Merge two canonical entities. After merge:
951
+ * • loserId is hard-deleted from `entities` + `entities_vec`.
952
+ * • All memory_entities rows pointing at loserId are repointed at winnerId.
953
+ * If the winner already has an edge to the same memory, we keep the
954
+ * stronger-confidence one and drop the duplicate.
955
+ * • Aliases from loser are absorbed into winner (deduped).
956
+ * • mention_count is summed; first_seen_at = min, last_seen_at = max.
957
+ *
958
+ * Returns the merged winner row, or null if either id is missing.
959
+ *
960
+ * This runs in a single SQL transaction. Future enhancement: emit a
961
+ * `entity_merge_log` row so merges are auditable / reversible.
962
+ */
963
+ async mergeEntities(winnerId, loserId) {
964
+ if (winnerId === loserId)
965
+ return this.getEntity(winnerId);
966
+ const winner = this.db.prepare(`SELECT * FROM entities WHERE id = ?`).get(winnerId);
967
+ const loser = this.db.prepare(`SELECT * FROM entities WHERE id = ?`).get(loserId);
968
+ if (!winner || !loser)
969
+ return null;
970
+ let winnerAliases = [];
971
+ let loserAliases = [];
972
+ try {
973
+ winnerAliases = JSON.parse(winner.aliases_json);
974
+ }
975
+ catch { /* */ }
976
+ try {
977
+ loserAliases = JSON.parse(loser.aliases_json);
978
+ }
979
+ catch { /* */ }
980
+ const mergedAliases = Array.from(new Set([...winnerAliases, ...loserAliases, loser.display_name]));
981
+ const tx = this.db.transaction(() => {
982
+ // Repoint edges. INSERT-OR-IGNORE then DELETE-old, with confidence MAX
983
+ // fold to preserve the strongest edge if both winner and loser shared
984
+ // a memory.
985
+ this.db.prepare(`INSERT INTO memory_entities (memory_id, entity_id, surface_form, confidence)
986
+ SELECT memory_id, ?, surface_form, confidence
987
+ FROM memory_entities WHERE entity_id = ?
988
+ ON CONFLICT(memory_id, entity_id) DO UPDATE SET
989
+ confidence = MAX(memory_entities.confidence, excluded.confidence)`).run(winnerId, loserId);
990
+ this.db.prepare(`DELETE FROM memory_entities WHERE entity_id = ?`).run(loserId);
991
+ // Update winner aggregate.
992
+ this.db.prepare(`UPDATE entities SET
993
+ aliases_json = ?,
994
+ mention_count = mention_count + ?,
995
+ first_seen_at = MIN(first_seen_at, ?),
996
+ last_seen_at = MAX(last_seen_at, ?)
997
+ WHERE id = ?`).run(JSON.stringify(mergedAliases), loser.mention_count, loser.first_seen_at, loser.last_seen_at, winnerId);
998
+ // Delete loser everywhere.
999
+ if (this.vecAvailable) {
1000
+ try {
1001
+ this.db.prepare(`DELETE FROM entities_vec WHERE entity_id = ?`).run(loserId);
1002
+ }
1003
+ catch { /* vec0 sometimes lacks DELETE; non-fatal */ }
1004
+ }
1005
+ this.db.prepare(`DELETE FROM entities WHERE id = ?`).run(loserId);
1006
+ });
1007
+ tx();
1008
+ return this.getEntity(winnerId);
1009
+ }
1010
+ /**
1011
+ * P2.3 backfill — run the resolver against entities that already exist
1012
+ * in a saved memory's metadata.entities. Used by
1013
+ * `mnueron entities backfill` to retro-fit canonical IDs onto memories
1014
+ * saved before the resolver shipped. Returns the resolutions parallel
1015
+ * to `extracted` so the caller can update metadata.
1016
+ */
1017
+ async backfillResolveMemory(memoryId, extracted, opts = {}) {
1018
+ if (extracted.length === 0)
1019
+ return [];
1020
+ const res = await resolveEntitiesForMemory(this.db, memoryId, extracted, this.vecAvailable, { anthropicKey: opts.anthropicKey });
1021
+ return res;
1022
+ }
1023
+ // ─── P3 + P4 — Knowledge graph API ──────────────────────────────────────
1024
+ /**
1025
+ * Fetch relation edges. All filters compose with AND. Uses indexed
1026
+ * lookups when from/to/predicate is set; otherwise sorted by most-recent.
1027
+ *
1028
+ * The P4 `asOf` filter implements bi-temporal recall: only edges whose
1029
+ * validity window contains `asOf` (and edges with no temporal info,
1030
+ * which are treated as "always valid") are returned. This is what
1031
+ * powers queries like "what did John think about X in January?"
1032
+ */
1033
+ async getRelations(input) {
1034
+ const parts = ['1=1'];
1035
+ const params = [];
1036
+ if (input.fromEntityId) {
1037
+ parts.push('from_entity_id = ?');
1038
+ params.push(input.fromEntityId);
1039
+ }
1040
+ if (input.toEntityId) {
1041
+ parts.push('to_entity_id = ?');
1042
+ params.push(input.toEntityId);
1043
+ }
1044
+ if (input.predicate) {
1045
+ parts.push('predicate = ?');
1046
+ params.push(input.predicate);
1047
+ }
1048
+ if (typeof input.asOf === 'number') {
1049
+ // Match if (valid_from IS NULL OR valid_from <= asOf)
1050
+ // AND (valid_to IS NULL OR valid_to > asOf)
1051
+ // Edges with no temporal info pass both clauses.
1052
+ parts.push('(valid_from IS NULL OR valid_from <= ?)');
1053
+ parts.push('(valid_to IS NULL OR valid_to > ?)');
1054
+ params.push(input.asOf, input.asOf);
1055
+ }
1056
+ const limit = clampLimit(input.limit ?? 200, 1000);
1057
+ const rows = this.db
1058
+ .prepare(`SELECT id, from_entity_id, to_entity_id, predicate, memory_id,
1059
+ confidence, valid_from, valid_to, recorded_at
1060
+ FROM relations
1061
+ WHERE ${parts.join(' AND ')}
1062
+ ORDER BY recorded_at DESC
1063
+ LIMIT ?`)
1064
+ .all(...params, limit);
1065
+ return rows;
1066
+ }
1067
+ /**
1068
+ * BFS traversal from a seed entity. Visits up to `depth` hops, following
1069
+ * BOTH outgoing and incoming edges so the user sees a complete
1070
+ * neighborhood. Each hop carries the edge that led to it.
1071
+ *
1072
+ * Respects the P4 `asOf` filter — only edges valid at that point in
1073
+ * time are followed.
1074
+ *
1075
+ * Bounds: depth is capped to 5 to keep dense graphs sane. Within each
1076
+ * hop we cap at 50 edges to avoid pathological star-graph blowups
1077
+ * (one super-node with thousands of mentions).
1078
+ */
1079
+ async traverseGraph(seedEntityId, opts = {}) {
1080
+ const depth = Math.max(0, Math.min(opts.depth ?? 2, 5));
1081
+ const seed = await this.getEntity(seedEntityId);
1082
+ if (!seed)
1083
+ return [];
1084
+ const visited = new Map();
1085
+ const queue = [
1086
+ { entityId: seedEntityId, depth: 0, via: null, direction: null },
1087
+ ];
1088
+ while (queue.length > 0) {
1089
+ const { entityId, depth: d, via, direction } = queue.shift();
1090
+ if (visited.has(entityId))
1091
+ continue;
1092
+ const e = entityId === seedEntityId ? seed : await this.getEntity(entityId);
1093
+ if (!e)
1094
+ continue;
1095
+ visited.set(entityId, { entity: e, via, direction, depth: d });
1096
+ if (d >= depth)
1097
+ continue;
1098
+ // Expand outgoing.
1099
+ const outgoing = await this.getRelations({
1100
+ fromEntityId: entityId,
1101
+ asOf: opts.asOf,
1102
+ limit: 50,
1103
+ });
1104
+ for (const rel of outgoing) {
1105
+ if (!visited.has(rel.to_entity_id)) {
1106
+ queue.push({ entityId: rel.to_entity_id, depth: d + 1, via: rel, direction: 'out' });
1107
+ }
1108
+ }
1109
+ // Expand incoming.
1110
+ const incoming = await this.getRelations({
1111
+ toEntityId: entityId,
1112
+ asOf: opts.asOf,
1113
+ limit: 50,
1114
+ });
1115
+ for (const rel of incoming) {
1116
+ if (!visited.has(rel.from_entity_id)) {
1117
+ queue.push({ entityId: rel.from_entity_id, depth: d + 1, via: rel, direction: 'in' });
1118
+ }
1119
+ }
1120
+ }
1121
+ // Stable order: depth ASC, then alpha for readable output.
1122
+ return Array.from(visited.values()).sort((a, b) => {
1123
+ if (a.depth !== b.depth)
1124
+ return a.depth - b.depth;
1125
+ return a.entity.display_name.localeCompare(b.entity.display_name);
1126
+ });
1127
+ }
1128
+ // ─── P5 — Self-revising memory (5a detection) ───────────────────────────
1129
+ async detectConsolidation(opts = {}) {
1130
+ return detectDuplicates(this.db, this.vecAvailable, opts);
1131
+ }
1132
+ async proposalsList(opts = {}) {
1133
+ return listProposals(this.db, opts);
1134
+ }
1135
+ async proposalReview(id, decision) {
1136
+ return reviewProposal(this.db, id, decision);
1137
+ }
1138
+ // ─── Procedural memory ──────────────────────────────────────────────────
1139
+ async saveProcedural(input) {
1140
+ return saveProcedural(this.db, input);
1141
+ }
1142
+ async getProcedural(name, namespace) {
1143
+ return getProceduralByName(this.db, name, namespace);
1144
+ }
1145
+ async listProcedural(opts = {}) {
1146
+ return listProcedural(this.db, opts);
1147
+ }
1148
+ async recallProcedural(name, namespace) {
1149
+ return recallProcedural(this.db, name, namespace);
1150
+ }
1151
+ async deleteProcedural(id) {
1152
+ return deleteProcedural(this.db, id);
1153
+ }
505
1154
  async close() {
506
1155
  this.db.close();
507
1156
  }