mnueron 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +123 -1
- package/dashboard/index.html +38 -0
- package/dist/cli.js +1187 -1
- package/dist/cli.js.map +1 -1
- package/dist/dashboard/server.js +95 -0
- package/dist/dashboard/server.js.map +1 -1
- package/dist/detectors/claude_desktop.js +79 -22
- package/dist/detectors/claude_desktop.js.map +1 -1
- package/dist/import/claude_cowork.js +359 -0
- package/dist/import/claude_cowork.js.map +1 -0
- package/dist/import/claude_desktop.js +196 -0
- package/dist/import/claude_desktop.js.map +1 -0
- package/dist/store/consolidator.js +168 -0
- package/dist/store/consolidator.js.map +1 -0
- package/dist/store/entity-extractor.js +283 -0
- package/dist/store/entity-extractor.js.map +1 -0
- package/dist/store/entity-resolver.js +378 -0
- package/dist/store/entity-resolver.js.map +1 -0
- package/dist/store/local.js +522 -14
- package/dist/store/local.js.map +1 -1
- package/dist/store/procedural.js +328 -0
- package/dist/store/procedural.js.map +1 -0
- package/dist/store/relation-extractor.js +292 -0
- package/dist/store/relation-extractor.js.map +1 -0
- package/dist/store/remote.js +182 -20
- package/dist/store/remote.js.map +1 -1
- package/dist/tools.js +84 -0
- package/dist/tools.js.map +1 -1
- package/dist/watch/cowork.js +137 -0
- package/dist/watch/cowork.js.map +1 -0
- package/package.json +1 -1
package/dist/store/local.js
CHANGED
|
@@ -5,6 +5,11 @@ import { dirname } from 'node:path';
|
|
|
5
5
|
import * as sqliteVec from 'sqlite-vec';
|
|
6
6
|
import { embed, embedBatch, EMBEDDING_DIM, preload } from './embeddings.js';
|
|
7
7
|
import { chunkContent, shouldChunk, DEFAULT_CHUNK_THRESHOLD } from './chunking.js';
|
|
8
|
+
import { extractEntities, shouldExtractEntities } from './entity-extractor.js';
|
|
9
|
+
import { resolveEntitiesForMemory } from './entity-resolver.js';
|
|
10
|
+
import { extractRelations, shouldExtractRelations } from './relation-extractor.js';
|
|
11
|
+
import { ensureConsolidationSchema, detectDuplicates, listProposals, reviewProposal, } from './consolidator.js';
|
|
12
|
+
import { ensureProceduralSchema, saveProcedural, getProceduralByName, listProcedural, recallProcedural, deleteProcedural, } from './procedural.js';
|
|
8
13
|
import { redact } from './redactor.js';
|
|
9
14
|
/**
|
|
10
15
|
* Build a SQL fragment + params for the shared filter shape used by
|
|
@@ -54,6 +59,34 @@ function buildFilterFragment(f, alias = 'm') {
|
|
|
54
59
|
}
|
|
55
60
|
return { sql: parts.join(' AND '), params };
|
|
56
61
|
}
|
|
62
|
+
/** Clamp a caller-supplied LIMIT to a sensible max — prevents accidental
|
|
63
|
+
* `LIMIT 999999` exhausting memory on large stores. */
|
|
64
|
+
function clampLimit(want, max) {
|
|
65
|
+
if (!Number.isFinite(want) || want <= 0)
|
|
66
|
+
return Math.min(100, max);
|
|
67
|
+
return Math.min(Math.floor(want), max);
|
|
68
|
+
}
|
|
69
|
+
/** Materialize an `entities` row into the public Entity shape. Parses
|
|
70
|
+
* aliases_json defensively — older rows or hand-edited data can have
|
|
71
|
+
* malformed JSON and we'd rather return an empty alias list than throw. */
|
|
72
|
+
function rowToEntity(row) {
|
|
73
|
+
let aliases = [];
|
|
74
|
+
try {
|
|
75
|
+
const parsed = JSON.parse(row.aliases_json);
|
|
76
|
+
if (Array.isArray(parsed))
|
|
77
|
+
aliases = parsed.filter((x) => typeof x === 'string');
|
|
78
|
+
}
|
|
79
|
+
catch { /* leave empty */ }
|
|
80
|
+
return {
|
|
81
|
+
id: row.id,
|
|
82
|
+
display_name: row.display_name,
|
|
83
|
+
entity_type: row.entity_type,
|
|
84
|
+
aliases,
|
|
85
|
+
mention_count: row.mention_count,
|
|
86
|
+
first_seen_at: row.first_seen_at,
|
|
87
|
+
last_seen_at: row.last_seen_at,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
57
90
|
/**
|
|
58
91
|
* Run pre-save transforms in fixed order:
|
|
59
92
|
* 1. Redact secrets — never store API keys / JWTs / etc.
|
|
@@ -91,19 +124,36 @@ const FTS_STOP_WORDS = new Set([
|
|
|
91
124
|
]);
|
|
92
125
|
/**
|
|
93
126
|
* Translate a natural-language query into an FTS5 MATCH expression.
|
|
94
|
-
* - strips FTS5 control characters
|
|
127
|
+
* - strips FTS5 control characters AND user-facing punctuation that
|
|
128
|
+
* trips FTS5's parser (., /, ', etc.). FTS5's grammar treats `.` and
|
|
129
|
+
* apostrophes as separators between identifiers — `"redeploy.sh"` is
|
|
130
|
+
* parsed as "redeploy" "." "sh" and chokes. We replace all of these
|
|
131
|
+
* with spaces BEFORE tokenizing so the resulting tokens are pure
|
|
132
|
+
* alphanumeric-plus-underscore.
|
|
95
133
|
* - lowercases
|
|
96
134
|
* - drops stop words and 1-character tokens
|
|
97
135
|
* - prefix-matches each surviving token (`token*`) so "stores" matches "stored"
|
|
98
136
|
* - ORs the tokens — any one is enough, BM25 ranks multi-hit rows higher
|
|
137
|
+
*
|
|
138
|
+
* Regex characters we strip:
|
|
139
|
+
* " ( ) * : ^ ~ — FTS5 grammar
|
|
140
|
+
* . , ; / \ ' ` — punctuation that breaks FTS5 token boundaries
|
|
141
|
+
* ! ? & | = + - # @ $ — user-typed but unsafe in MATCH
|
|
142
|
+
* This is permissive: any non-[a-z0-9_] char is replaced with a space
|
|
143
|
+
* inside `buildFtsQuery`, so we don't have to enumerate every case.
|
|
99
144
|
*/
|
|
100
145
|
function buildFtsQuery(raw) {
|
|
101
|
-
|
|
146
|
+
// First: collapse anything that isn't a word-char into a space. This is
|
|
147
|
+
// safer than maintaining a denylist — FTS5 only consumes word tokens
|
|
148
|
+
// anyway, so we lose nothing by pre-flattening punctuation.
|
|
149
|
+
const cleaned = raw
|
|
150
|
+
.toLowerCase()
|
|
151
|
+
.replace(/[^a-z0-9_]+/g, ' ')
|
|
152
|
+
.trim();
|
|
102
153
|
if (!cleaned)
|
|
103
154
|
return '';
|
|
104
155
|
const tokens = cleaned
|
|
105
156
|
.split(/\s+/)
|
|
106
|
-
.map(t => t.replace(/^[^a-z0-9_]+|[^a-z0-9_]+$/g, ''))
|
|
107
157
|
.filter(t => t.length >= 2 && !FTS_STOP_WORDS.has(t));
|
|
108
158
|
if (tokens.length === 0)
|
|
109
159
|
return '';
|
|
@@ -206,6 +256,83 @@ export class LocalProvider {
|
|
|
206
256
|
);
|
|
207
257
|
`);
|
|
208
258
|
}
|
|
259
|
+
// ── P2.3 — Entity resolution tables ──────────────────────────────────
|
|
260
|
+
// Canonical entities: one row per unique entity (person/org/project/...)
|
|
261
|
+
// resolved across all memories. `mention_count` and `last_seen_at` make
|
|
262
|
+
// it trivial to show "who/what is most active in your store right now".
|
|
263
|
+
//
|
|
264
|
+
// memory_entities: many-to-many join — one row per (memory, canonical
|
|
265
|
+
// entity) pair. `surface_form` is what the memory text actually said
|
|
266
|
+
// (e.g., "Johnny" → resolved to canonical "John Doe"); `confidence`
|
|
267
|
+
// ranges in [0, 1] from exact match (1.0) down through embedding
|
|
268
|
+
// similarity and LLM tiebreak picks (0.65-0.85).
|
|
269
|
+
this.db.exec(`
|
|
270
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
271
|
+
id TEXT PRIMARY KEY,
|
|
272
|
+
display_name TEXT NOT NULL,
|
|
273
|
+
entity_type TEXT NOT NULL,
|
|
274
|
+
aliases_json TEXT NOT NULL DEFAULT '[]',
|
|
275
|
+
mention_count INTEGER NOT NULL DEFAULT 0,
|
|
276
|
+
first_seen_at INTEGER NOT NULL,
|
|
277
|
+
last_seen_at INTEGER NOT NULL
|
|
278
|
+
);
|
|
279
|
+
CREATE INDEX IF NOT EXISTS idx_entities_type
|
|
280
|
+
ON entities(entity_type);
|
|
281
|
+
CREATE INDEX IF NOT EXISTS idx_entities_last_seen
|
|
282
|
+
ON entities(last_seen_at DESC);
|
|
283
|
+
|
|
284
|
+
CREATE TABLE IF NOT EXISTS memory_entities (
|
|
285
|
+
memory_id TEXT NOT NULL,
|
|
286
|
+
entity_id TEXT NOT NULL,
|
|
287
|
+
surface_form TEXT NOT NULL,
|
|
288
|
+
confidence REAL NOT NULL,
|
|
289
|
+
PRIMARY KEY (memory_id, entity_id)
|
|
290
|
+
);
|
|
291
|
+
CREATE INDEX IF NOT EXISTS idx_memory_entities_entity
|
|
292
|
+
ON memory_entities(entity_id);
|
|
293
|
+
|
|
294
|
+
-- P3 — Knowledge-graph edges. Each row is a triple (from, predicate,
|
|
295
|
+
-- to) plus provenance (memory_id) + confidence. P4 forward-looking
|
|
296
|
+
-- columns (valid_from / valid_to) are added now so bi-temporal
|
|
297
|
+
-- queries don't require a schema migration later.
|
|
298
|
+
CREATE TABLE IF NOT EXISTS relations (
|
|
299
|
+
id TEXT PRIMARY KEY,
|
|
300
|
+
from_entity_id TEXT NOT NULL,
|
|
301
|
+
to_entity_id TEXT NOT NULL,
|
|
302
|
+
predicate TEXT NOT NULL,
|
|
303
|
+
memory_id TEXT NOT NULL,
|
|
304
|
+
confidence REAL NOT NULL,
|
|
305
|
+
valid_from INTEGER,
|
|
306
|
+
valid_to INTEGER,
|
|
307
|
+
recorded_at INTEGER NOT NULL
|
|
308
|
+
);
|
|
309
|
+
CREATE INDEX IF NOT EXISTS idx_relations_from
|
|
310
|
+
ON relations(from_entity_id);
|
|
311
|
+
CREATE INDEX IF NOT EXISTS idx_relations_to
|
|
312
|
+
ON relations(to_entity_id);
|
|
313
|
+
CREATE INDEX IF NOT EXISTS idx_relations_predicate
|
|
314
|
+
ON relations(predicate);
|
|
315
|
+
CREATE INDEX IF NOT EXISTS idx_relations_memory
|
|
316
|
+
ON relations(memory_id);
|
|
317
|
+
CREATE INDEX IF NOT EXISTS idx_relations_valid_to
|
|
318
|
+
ON relations(valid_to);
|
|
319
|
+
`);
|
|
320
|
+
if (this.vecAvailable) {
|
|
321
|
+
// Embedding index for entity name+context strings. Used by the
|
|
322
|
+
// resolver's vector-similarity stage when finding candidate matches
|
|
323
|
+
// for a freshly extracted entity.
|
|
324
|
+
this.db.exec(`
|
|
325
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS entities_vec
|
|
326
|
+
USING vec0(
|
|
327
|
+
entity_id TEXT PRIMARY KEY,
|
|
328
|
+
embedding float[${EMBEDDING_DIM}]
|
|
329
|
+
);
|
|
330
|
+
`);
|
|
331
|
+
}
|
|
332
|
+
// P5 — Consolidation proposal table (idempotent).
|
|
333
|
+
ensureConsolidationSchema(this.db);
|
|
334
|
+
// Procedural memory table (idempotent). Mem0 leapfrog feature.
|
|
335
|
+
ensureProceduralSchema(this.db);
|
|
209
336
|
}
|
|
210
337
|
// ─── write path ──────────────────────────────────────────────────────────
|
|
211
338
|
async save(input) {
|
|
@@ -213,7 +340,39 @@ export class LocalProvider {
|
|
|
213
340
|
// boundaries can't slip through. Single source of truth for what
|
|
214
341
|
// hits SQLite.
|
|
215
342
|
const transformed = preSaveTransform(input);
|
|
216
|
-
// 2.
|
|
343
|
+
// 2. P1 — entity extraction. SECURITY-CRITICAL: capture and strip BYOK
|
|
344
|
+
// keys from metadata BEFORE the gate check, mirroring the hosted
|
|
345
|
+
// backend's ordering. Short-content saves with BYOK keys still get
|
|
346
|
+
// keys scrubbed even when extraction is skipped.
|
|
347
|
+
const meta = transformed.metadata ?? {};
|
|
348
|
+
const byokAnthropic = typeof meta.byok_anthropic_key === 'string'
|
|
349
|
+
? meta.byok_anthropic_key : undefined;
|
|
350
|
+
const byokOpenAI = typeof meta.byok_openai_key === 'string'
|
|
351
|
+
? meta.byok_openai_key : undefined;
|
|
352
|
+
if (byokAnthropic)
|
|
353
|
+
delete meta.byok_anthropic_key;
|
|
354
|
+
if (byokOpenAI)
|
|
355
|
+
delete meta.byok_openai_key;
|
|
356
|
+
transformed.metadata = meta;
|
|
357
|
+
if (shouldExtractEntities(transformed.content.length, transformed.metadata)) {
|
|
358
|
+
// Explicit opt-in (metadata.extract_entities or BYOK) bypasses the
|
|
359
|
+
// 200-char min-length floor. Otherwise (env-var default path) the
|
|
360
|
+
// floor still applies as a guardrail against burning money on
|
|
361
|
+
// one-liner autosaves.
|
|
362
|
+
const meta = transformed.metadata;
|
|
363
|
+
const explicit = meta?.extract_entities === true ||
|
|
364
|
+
(typeof byokAnthropic === 'string' && byokAnthropic.length > 0) ||
|
|
365
|
+
(typeof byokOpenAI === 'string' && byokOpenAI.length > 0);
|
|
366
|
+
const entities = await extractEntities(transformed.content, {
|
|
367
|
+
anthropicKey: byokAnthropic,
|
|
368
|
+
openaiKey: byokOpenAI,
|
|
369
|
+
...(explicit ? { minChars: 1 } : {}),
|
|
370
|
+
});
|
|
371
|
+
if (entities.length > 0) {
|
|
372
|
+
transformed.metadata = { ...(transformed.metadata ?? {}), entities };
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
// 3. Long content gets auto-chunked into multiple memories. Each chunk
|
|
217
376
|
// becomes a searchable atomic memory; the original conversation is
|
|
218
377
|
// linkable via `parent_ref` (= source_ref + chunk_index in metadata).
|
|
219
378
|
if (shouldChunk(transformed.content)) {
|
|
@@ -247,12 +406,84 @@ export class LocalProvider {
|
|
|
247
406
|
}
|
|
248
407
|
});
|
|
249
408
|
tx();
|
|
409
|
+
// ── P2.3 — Entity resolution ────────────────────────────────────────
|
|
410
|
+
// If P1 extraction stamped `metadata.entities`, resolve each one to a
|
|
411
|
+
// canonical entity (reuse OR create), insert memory_entities edges,
|
|
412
|
+
// and write the resolved canonical_ids back onto the stored metadata.
|
|
413
|
+
//
|
|
414
|
+
// This runs AFTER the memory row exists so the resolver has a valid
|
|
415
|
+
// memory_id to link to. It also runs OUTSIDE the transaction because
|
|
416
|
+
// embeddings + LLM tiebreak are async; if those fail mid-way, the
|
|
417
|
+
// memory still saved successfully (fail-open contract).
|
|
418
|
+
let finalMetadata = input.metadata;
|
|
419
|
+
const extractedEntities = Array.isArray(input.metadata?.entities)
|
|
420
|
+
? input.metadata.entities
|
|
421
|
+
: [];
|
|
422
|
+
if (extractedEntities.length > 0) {
|
|
423
|
+
try {
|
|
424
|
+
const meta = input.metadata;
|
|
425
|
+
const byokAnthropic = typeof meta.byok_anthropic_key === 'string'
|
|
426
|
+
? meta.byok_anthropic_key : undefined;
|
|
427
|
+
const resolutions = await resolveEntitiesForMemory(this.db, id, extractedEntities.map((e) => ({
|
|
428
|
+
name: e.name,
|
|
429
|
+
type: e.type,
|
|
430
|
+
context: e.context,
|
|
431
|
+
})), this.vecAvailable, { anthropicKey: byokAnthropic });
|
|
432
|
+
// Stamp canonical_id back onto each entity in the stored metadata.
|
|
433
|
+
const entitiesWithIds = extractedEntities.map((e, i) => ({
|
|
434
|
+
...e,
|
|
435
|
+
canonical_id: resolutions[i]?.canonical_id ?? null,
|
|
436
|
+
}));
|
|
437
|
+
finalMetadata = { ...(input.metadata ?? {}), entities: entitiesWithIds };
|
|
438
|
+
this.db.prepare(`UPDATE memories SET meta_json = ?, updated_at = ? WHERE id = ?`)
|
|
439
|
+
.run(JSON.stringify(finalMetadata), now, id);
|
|
440
|
+
// ── P3 — Relationship extraction ────────────────────────────────
|
|
441
|
+
// Once we have resolved canonical entities, ask the LLM what
|
|
442
|
+
// relationships exist between them. This populates `relations`
|
|
443
|
+
// edges that form the knowledge-graph layer. Gated separately
|
|
444
|
+
// from entity extraction so users can have entities-only without
|
|
445
|
+
// paying the second Haiku call.
|
|
446
|
+
const resolvedForRelations = extractedEntities
|
|
447
|
+
.map((e, i) => ({
|
|
448
|
+
canonical_id: resolutions[i]?.canonical_id ?? null,
|
|
449
|
+
name: e.name,
|
|
450
|
+
type: e.type,
|
|
451
|
+
}))
|
|
452
|
+
.filter((e) => !!e.canonical_id);
|
|
453
|
+
if (shouldExtractRelations(input.content.length, resolvedForRelations.length, input.metadata)) {
|
|
454
|
+
try {
|
|
455
|
+
const meta = input.metadata;
|
|
456
|
+
const byokAnthropic = typeof meta.byok_anthropic_key === 'string'
|
|
457
|
+
? meta.byok_anthropic_key : undefined;
|
|
458
|
+
const relations = await extractRelations(input.content, resolvedForRelations, { anthropicKey: byokAnthropic });
|
|
459
|
+
if (relations.length > 0) {
|
|
460
|
+
const insertRel = this.db.prepare(`INSERT INTO relations
|
|
461
|
+
(id, from_entity_id, to_entity_id, predicate, memory_id,
|
|
462
|
+
confidence, valid_from, valid_to, recorded_at)
|
|
463
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
464
|
+
const tx2 = this.db.transaction(() => {
|
|
465
|
+
for (const r of relations) {
|
|
466
|
+
insertRel.run(randomUUID(), r.from_canonical_id, r.to_canonical_id, r.predicate, id, r.confidence, r.valid_from, r.valid_to, now);
|
|
467
|
+
}
|
|
468
|
+
});
|
|
469
|
+
tx2();
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
catch (e) {
|
|
473
|
+
console.warn('[mnueron/local] relation extraction failed (memory + entities saved):', e instanceof Error ? e.message : e);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
catch (e) {
|
|
478
|
+
console.warn('[mnueron/local] entity resolution failed (memory saved without canonical_ids):', e instanceof Error ? e.message : e);
|
|
479
|
+
}
|
|
480
|
+
}
|
|
250
481
|
return this.rowToMemory({
|
|
251
482
|
id, namespace: ns, content: input.content,
|
|
252
483
|
tags_json: JSON.stringify(tags),
|
|
253
484
|
source: input.source ?? 'manual',
|
|
254
485
|
source_ref: input.source_ref ?? null,
|
|
255
|
-
meta_json:
|
|
486
|
+
meta_json: finalMetadata ? JSON.stringify(finalMetadata) : null,
|
|
256
487
|
created_at: now, updated_at: now,
|
|
257
488
|
});
|
|
258
489
|
}
|
|
@@ -558,10 +789,13 @@ export class LocalProvider {
|
|
|
558
789
|
const existing = this.db.prepare(`SELECT * FROM memories WHERE id = ?`).get(id);
|
|
559
790
|
if (!existing)
|
|
560
791
|
return null;
|
|
561
|
-
// Build merged metadata + history entry
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
792
|
+
// Build merged metadata + history entry. Note: the column is `meta_json`,
|
|
793
|
+
// not `metadata` (same schema mismatch that broke updates before P2.3
|
|
794
|
+
// backfill ran).
|
|
795
|
+
const metaCol = existing.meta_json ?? existing.metadata;
|
|
796
|
+
const priorMeta = typeof metaCol === 'string'
|
|
797
|
+
? JSON.parse(metaCol || '{}')
|
|
798
|
+
: (metaCol ?? {});
|
|
565
799
|
const merged = { ...priorMeta };
|
|
566
800
|
if (patch.metadata && typeof patch.metadata === 'object') {
|
|
567
801
|
for (const [k, v] of Object.entries(patch.metadata)) {
|
|
@@ -584,13 +818,14 @@ export class LocalProvider {
|
|
|
584
818
|
merged.history = history;
|
|
585
819
|
}
|
|
586
820
|
const nextNs = patch.namespace ?? existing.namespace;
|
|
587
|
-
const nextTags = patch.tags
|
|
821
|
+
const nextTags = patch.tags
|
|
822
|
+
?? JSON.parse((existing.tags_json ?? existing.tags) ?? '[]');
|
|
588
823
|
const now = Date.now();
|
|
589
824
|
this.db.prepare(`UPDATE memories
|
|
590
|
-
SET content
|
|
591
|
-
namespace
|
|
592
|
-
|
|
593
|
-
|
|
825
|
+
SET content = ?,
|
|
826
|
+
namespace = ?,
|
|
827
|
+
tags_json = ?,
|
|
828
|
+
meta_json = ?,
|
|
594
829
|
updated_at = ?
|
|
595
830
|
WHERE id = ?`).run(nextContent, nextNs, JSON.stringify(nextTags), JSON.stringify(merged), now, id);
|
|
596
831
|
// If content changed, re-index FTS + (optionally) re-embed.
|
|
@@ -643,6 +878,279 @@ export class LocalProvider {
|
|
|
643
878
|
last_updated: r.last_updated ?? 0,
|
|
644
879
|
}));
|
|
645
880
|
}
|
|
881
|
+
// ─── P2.3 — Entity API ──────────────────────────────────────────────────
|
|
882
|
+
/**
|
|
883
|
+
* List canonical entities with optional type filter, free-text query
|
|
884
|
+
* against display_name + aliases, and sort. Default sort: most-recently-seen.
|
|
885
|
+
*/
|
|
886
|
+
async listEntities(input = {}) {
|
|
887
|
+
const limit = clampLimit(input.limit ?? 100, 500);
|
|
888
|
+
const offset = Math.max(0, input.offset ?? 0);
|
|
889
|
+
const parts = ['1=1'];
|
|
890
|
+
const params = [];
|
|
891
|
+
if (input.type) {
|
|
892
|
+
parts.push('entity_type = ?');
|
|
893
|
+
params.push(input.type);
|
|
894
|
+
}
|
|
895
|
+
if (input.q && input.q.trim()) {
|
|
896
|
+
// Match display_name OR any alias (case-insensitive substring).
|
|
897
|
+
parts.push(`(
|
|
898
|
+
lower(display_name) LIKE lower('%' || ? || '%')
|
|
899
|
+
OR EXISTS (
|
|
900
|
+
SELECT 1 FROM json_each(aliases_json) AS a
|
|
901
|
+
WHERE lower(a.value) LIKE lower('%' || ? || '%')
|
|
902
|
+
)
|
|
903
|
+
)`);
|
|
904
|
+
params.push(input.q.trim(), input.q.trim());
|
|
905
|
+
}
|
|
906
|
+
const orderBy = (() => {
|
|
907
|
+
switch (input.sort) {
|
|
908
|
+
case 'mentions': return 'mention_count DESC, last_seen_at DESC';
|
|
909
|
+
case 'alpha': return 'lower(display_name) ASC';
|
|
910
|
+
default: return 'last_seen_at DESC'; // 'recent'
|
|
911
|
+
}
|
|
912
|
+
})();
|
|
913
|
+
const rows = this.db
|
|
914
|
+
.prepare(`SELECT * FROM entities WHERE ${parts.join(' AND ')} ORDER BY ${orderBy} LIMIT ? OFFSET ?`)
|
|
915
|
+
.all(...params, limit, offset);
|
|
916
|
+
return rows.map(rowToEntity);
|
|
917
|
+
}
|
|
918
|
+
/** Single canonical entity by id, or null if not found. */
|
|
919
|
+
async getEntity(id) {
|
|
920
|
+
const row = this.db
|
|
921
|
+
.prepare(`SELECT * FROM entities WHERE id = ? LIMIT 1`)
|
|
922
|
+
.get(id);
|
|
923
|
+
return row ? rowToEntity(row) : null;
|
|
924
|
+
}
|
|
925
|
+
/**
|
|
926
|
+
* All memories linked to a canonical entity, most recent first. Includes
|
|
927
|
+
* the original surface_form so callers can render "John (mentioned as
|
|
928
|
+
* 'Johnny')". Caps at `limit` (default 100, max 500) — entity histories
|
|
929
|
+
* can get long.
|
|
930
|
+
*/
|
|
931
|
+
async getEntityMemories(id, limit = 100) {
|
|
932
|
+
const cap = clampLimit(limit, 500);
|
|
933
|
+
const rows = this.db
|
|
934
|
+
.prepare(`SELECT m.id, m.namespace, m.content, m.tags_json, m.source, m.source_ref,
|
|
935
|
+
m.meta_json, m.created_at, m.updated_at,
|
|
936
|
+
me.surface_form, me.confidence
|
|
937
|
+
FROM memory_entities me
|
|
938
|
+
JOIN memories m ON m.id = me.memory_id
|
|
939
|
+
WHERE me.entity_id = ?
|
|
940
|
+
ORDER BY m.created_at DESC
|
|
941
|
+
LIMIT ?`)
|
|
942
|
+
.all(id, cap);
|
|
943
|
+
return rows.map((r) => ({
|
|
944
|
+
...this.rowToMemory(r),
|
|
945
|
+
surface_form: r.surface_form,
|
|
946
|
+
confidence: r.confidence,
|
|
947
|
+
}));
|
|
948
|
+
}
|
|
949
|
+
/**
|
|
950
|
+
* Merge two canonical entities. After merge:
|
|
951
|
+
* • loserId is hard-deleted from `entities` + `entities_vec`.
|
|
952
|
+
* • All memory_entities rows pointing at loserId are repointed at winnerId.
|
|
953
|
+
* If the winner already has an edge to the same memory, we keep the
|
|
954
|
+
* stronger-confidence one and drop the duplicate.
|
|
955
|
+
* • Aliases from loser are absorbed into winner (deduped).
|
|
956
|
+
* • mention_count is summed; first_seen_at = min, last_seen_at = max.
|
|
957
|
+
*
|
|
958
|
+
* Returns the merged winner row, or null if either id is missing.
|
|
959
|
+
*
|
|
960
|
+
* This runs in a single SQL transaction. Future enhancement: emit a
|
|
961
|
+
* `entity_merge_log` row so merges are auditable / reversible.
|
|
962
|
+
*/
|
|
963
|
+
async mergeEntities(winnerId, loserId) {
|
|
964
|
+
if (winnerId === loserId)
|
|
965
|
+
return this.getEntity(winnerId);
|
|
966
|
+
const winner = this.db.prepare(`SELECT * FROM entities WHERE id = ?`).get(winnerId);
|
|
967
|
+
const loser = this.db.prepare(`SELECT * FROM entities WHERE id = ?`).get(loserId);
|
|
968
|
+
if (!winner || !loser)
|
|
969
|
+
return null;
|
|
970
|
+
let winnerAliases = [];
|
|
971
|
+
let loserAliases = [];
|
|
972
|
+
try {
|
|
973
|
+
winnerAliases = JSON.parse(winner.aliases_json);
|
|
974
|
+
}
|
|
975
|
+
catch { /* */ }
|
|
976
|
+
try {
|
|
977
|
+
loserAliases = JSON.parse(loser.aliases_json);
|
|
978
|
+
}
|
|
979
|
+
catch { /* */ }
|
|
980
|
+
const mergedAliases = Array.from(new Set([...winnerAliases, ...loserAliases, loser.display_name]));
|
|
981
|
+
const tx = this.db.transaction(() => {
|
|
982
|
+
// Repoint edges. INSERT-OR-IGNORE then DELETE-old, with confidence MAX
|
|
983
|
+
// fold to preserve the strongest edge if both winner and loser shared
|
|
984
|
+
// a memory.
|
|
985
|
+
this.db.prepare(`INSERT INTO memory_entities (memory_id, entity_id, surface_form, confidence)
|
|
986
|
+
SELECT memory_id, ?, surface_form, confidence
|
|
987
|
+
FROM memory_entities WHERE entity_id = ?
|
|
988
|
+
ON CONFLICT(memory_id, entity_id) DO UPDATE SET
|
|
989
|
+
confidence = MAX(memory_entities.confidence, excluded.confidence)`).run(winnerId, loserId);
|
|
990
|
+
this.db.prepare(`DELETE FROM memory_entities WHERE entity_id = ?`).run(loserId);
|
|
991
|
+
// Update winner aggregate.
|
|
992
|
+
this.db.prepare(`UPDATE entities SET
|
|
993
|
+
aliases_json = ?,
|
|
994
|
+
mention_count = mention_count + ?,
|
|
995
|
+
first_seen_at = MIN(first_seen_at, ?),
|
|
996
|
+
last_seen_at = MAX(last_seen_at, ?)
|
|
997
|
+
WHERE id = ?`).run(JSON.stringify(mergedAliases), loser.mention_count, loser.first_seen_at, loser.last_seen_at, winnerId);
|
|
998
|
+
// Delete loser everywhere.
|
|
999
|
+
if (this.vecAvailable) {
|
|
1000
|
+
try {
|
|
1001
|
+
this.db.prepare(`DELETE FROM entities_vec WHERE entity_id = ?`).run(loserId);
|
|
1002
|
+
}
|
|
1003
|
+
catch { /* vec0 sometimes lacks DELETE; non-fatal */ }
|
|
1004
|
+
}
|
|
1005
|
+
this.db.prepare(`DELETE FROM entities WHERE id = ?`).run(loserId);
|
|
1006
|
+
});
|
|
1007
|
+
tx();
|
|
1008
|
+
return this.getEntity(winnerId);
|
|
1009
|
+
}
|
|
1010
|
+
/**
|
|
1011
|
+
* P2.3 backfill — run the resolver against entities that already exist
|
|
1012
|
+
* in a saved memory's metadata.entities. Used by
|
|
1013
|
+
* `mnueron entities backfill` to retro-fit canonical IDs onto memories
|
|
1014
|
+
* saved before the resolver shipped. Returns the resolutions parallel
|
|
1015
|
+
* to `extracted` so the caller can update metadata.
|
|
1016
|
+
*/
|
|
1017
|
+
async backfillResolveMemory(memoryId, extracted, opts = {}) {
|
|
1018
|
+
if (extracted.length === 0)
|
|
1019
|
+
return [];
|
|
1020
|
+
const res = await resolveEntitiesForMemory(this.db, memoryId, extracted, this.vecAvailable, { anthropicKey: opts.anthropicKey });
|
|
1021
|
+
return res;
|
|
1022
|
+
}
|
|
1023
|
+
// ─── P3 + P4 — Knowledge graph API ──────────────────────────────────────
|
|
1024
|
+
/**
|
|
1025
|
+
* Fetch relation edges. All filters compose with AND. Uses indexed
|
|
1026
|
+
* lookups when from/to/predicate is set; otherwise sorted by most-recent.
|
|
1027
|
+
*
|
|
1028
|
+
* The P4 `asOf` filter implements bi-temporal recall: only edges whose
|
|
1029
|
+
* validity window contains `asOf` (and edges with no temporal info,
|
|
1030
|
+
* which are treated as "always valid") are returned. This is what
|
|
1031
|
+
* powers queries like "what did John think about X in January?"
|
|
1032
|
+
*/
|
|
1033
|
+
async getRelations(input) {
|
|
1034
|
+
const parts = ['1=1'];
|
|
1035
|
+
const params = [];
|
|
1036
|
+
if (input.fromEntityId) {
|
|
1037
|
+
parts.push('from_entity_id = ?');
|
|
1038
|
+
params.push(input.fromEntityId);
|
|
1039
|
+
}
|
|
1040
|
+
if (input.toEntityId) {
|
|
1041
|
+
parts.push('to_entity_id = ?');
|
|
1042
|
+
params.push(input.toEntityId);
|
|
1043
|
+
}
|
|
1044
|
+
if (input.predicate) {
|
|
1045
|
+
parts.push('predicate = ?');
|
|
1046
|
+
params.push(input.predicate);
|
|
1047
|
+
}
|
|
1048
|
+
if (typeof input.asOf === 'number') {
|
|
1049
|
+
// Match if (valid_from IS NULL OR valid_from <= asOf)
|
|
1050
|
+
// AND (valid_to IS NULL OR valid_to > asOf)
|
|
1051
|
+
// Edges with no temporal info pass both clauses.
|
|
1052
|
+
parts.push('(valid_from IS NULL OR valid_from <= ?)');
|
|
1053
|
+
parts.push('(valid_to IS NULL OR valid_to > ?)');
|
|
1054
|
+
params.push(input.asOf, input.asOf);
|
|
1055
|
+
}
|
|
1056
|
+
const limit = clampLimit(input.limit ?? 200, 1000);
|
|
1057
|
+
const rows = this.db
|
|
1058
|
+
.prepare(`SELECT id, from_entity_id, to_entity_id, predicate, memory_id,
|
|
1059
|
+
confidence, valid_from, valid_to, recorded_at
|
|
1060
|
+
FROM relations
|
|
1061
|
+
WHERE ${parts.join(' AND ')}
|
|
1062
|
+
ORDER BY recorded_at DESC
|
|
1063
|
+
LIMIT ?`)
|
|
1064
|
+
.all(...params, limit);
|
|
1065
|
+
return rows;
|
|
1066
|
+
}
|
|
1067
|
+
/**
|
|
1068
|
+
* BFS traversal from a seed entity. Visits up to `depth` hops, following
|
|
1069
|
+
* BOTH outgoing and incoming edges so the user sees a complete
|
|
1070
|
+
* neighborhood. Each hop carries the edge that led to it.
|
|
1071
|
+
*
|
|
1072
|
+
* Respects the P4 `asOf` filter — only edges valid at that point in
|
|
1073
|
+
* time are followed.
|
|
1074
|
+
*
|
|
1075
|
+
* Bounds: depth is capped to 5 to keep dense graphs sane. Within each
|
|
1076
|
+
* hop we cap at 50 edges to avoid pathological star-graph blowups
|
|
1077
|
+
* (one super-node with thousands of mentions).
|
|
1078
|
+
*/
|
|
1079
|
+
async traverseGraph(seedEntityId, opts = {}) {
|
|
1080
|
+
const depth = Math.max(0, Math.min(opts.depth ?? 2, 5));
|
|
1081
|
+
const seed = await this.getEntity(seedEntityId);
|
|
1082
|
+
if (!seed)
|
|
1083
|
+
return [];
|
|
1084
|
+
const visited = new Map();
|
|
1085
|
+
const queue = [
|
|
1086
|
+
{ entityId: seedEntityId, depth: 0, via: null, direction: null },
|
|
1087
|
+
];
|
|
1088
|
+
while (queue.length > 0) {
|
|
1089
|
+
const { entityId, depth: d, via, direction } = queue.shift();
|
|
1090
|
+
if (visited.has(entityId))
|
|
1091
|
+
continue;
|
|
1092
|
+
const e = entityId === seedEntityId ? seed : await this.getEntity(entityId);
|
|
1093
|
+
if (!e)
|
|
1094
|
+
continue;
|
|
1095
|
+
visited.set(entityId, { entity: e, via, direction, depth: d });
|
|
1096
|
+
if (d >= depth)
|
|
1097
|
+
continue;
|
|
1098
|
+
// Expand outgoing.
|
|
1099
|
+
const outgoing = await this.getRelations({
|
|
1100
|
+
fromEntityId: entityId,
|
|
1101
|
+
asOf: opts.asOf,
|
|
1102
|
+
limit: 50,
|
|
1103
|
+
});
|
|
1104
|
+
for (const rel of outgoing) {
|
|
1105
|
+
if (!visited.has(rel.to_entity_id)) {
|
|
1106
|
+
queue.push({ entityId: rel.to_entity_id, depth: d + 1, via: rel, direction: 'out' });
|
|
1107
|
+
}
|
|
1108
|
+
}
|
|
1109
|
+
// Expand incoming.
|
|
1110
|
+
const incoming = await this.getRelations({
|
|
1111
|
+
toEntityId: entityId,
|
|
1112
|
+
asOf: opts.asOf,
|
|
1113
|
+
limit: 50,
|
|
1114
|
+
});
|
|
1115
|
+
for (const rel of incoming) {
|
|
1116
|
+
if (!visited.has(rel.from_entity_id)) {
|
|
1117
|
+
queue.push({ entityId: rel.from_entity_id, depth: d + 1, via: rel, direction: 'in' });
|
|
1118
|
+
}
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
// Stable order: depth ASC, then alpha for readable output.
|
|
1122
|
+
return Array.from(visited.values()).sort((a, b) => {
|
|
1123
|
+
if (a.depth !== b.depth)
|
|
1124
|
+
return a.depth - b.depth;
|
|
1125
|
+
return a.entity.display_name.localeCompare(b.entity.display_name);
|
|
1126
|
+
});
|
|
1127
|
+
}
|
|
1128
|
+
// ─── P5 — Self-revising memory (5a detection) ───────────────────────────
|
|
1129
|
+
async detectConsolidation(opts = {}) {
|
|
1130
|
+
return detectDuplicates(this.db, this.vecAvailable, opts);
|
|
1131
|
+
}
|
|
1132
|
+
async proposalsList(opts = {}) {
|
|
1133
|
+
return listProposals(this.db, opts);
|
|
1134
|
+
}
|
|
1135
|
+
async proposalReview(id, decision) {
|
|
1136
|
+
return reviewProposal(this.db, id, decision);
|
|
1137
|
+
}
|
|
1138
|
+
// ─── Procedural memory ──────────────────────────────────────────────────
|
|
1139
|
+
async saveProcedural(input) {
|
|
1140
|
+
return saveProcedural(this.db, input);
|
|
1141
|
+
}
|
|
1142
|
+
async getProcedural(name, namespace) {
|
|
1143
|
+
return getProceduralByName(this.db, name, namespace);
|
|
1144
|
+
}
|
|
1145
|
+
async listProcedural(opts = {}) {
|
|
1146
|
+
return listProcedural(this.db, opts);
|
|
1147
|
+
}
|
|
1148
|
+
async recallProcedural(name, namespace) {
|
|
1149
|
+
return recallProcedural(this.db, name, namespace);
|
|
1150
|
+
}
|
|
1151
|
+
async deleteProcedural(id) {
|
|
1152
|
+
return deleteProcedural(this.db, id);
|
|
1153
|
+
}
|
|
646
1154
|
async close() {
|
|
647
1155
|
this.db.close();
|
|
648
1156
|
}
|