mnueron 0.6.1 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE-OVERVIEW.md +138 -0
- package/dist/cli.js +58 -1
- package/dist/cli.js.map +1 -1
- package/dist/dashboard/server.js +29 -0
- package/dist/dashboard/server.js.map +1 -1
- package/dist/import/file.js +103 -0
- package/dist/import/file.js.map +1 -0
- package/dist/index.js +36 -2
- package/dist/index.js.map +1 -1
- package/dist/plugins/loader.js +2 -0
- package/dist/plugins/loader.js.map +1 -1
- package/dist/savings/pricing.js +75 -0
- package/dist/savings/pricing.js.map +1 -0
- package/dist/savings/recall-event.js +82 -0
- package/dist/savings/recall-event.js.map +1 -0
- package/dist/savings/recall-logger.js +148 -0
- package/dist/savings/recall-logger.js.map +1 -0
- package/dist/savings/summary.js +118 -0
- package/dist/savings/summary.js.map +1 -0
- package/dist/store/local.js +367 -254
- package/dist/store/local.js.map +1 -1
- package/dist/store/provider.js +1 -0
- package/dist/store/provider.js.map +1 -1
- package/dist/store/remote.js +73 -0
- package/dist/store/remote.js.map +1 -1
- package/dist/tools.js +285 -7
- package/dist/tools.js.map +1 -1
- package/package.json +67 -63
package/dist/store/local.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import Database from 'better-sqlite3';
|
|
2
|
+
import { RECALL_EVENTS_DDL, buildRecallEvent, approximateTokens, } from '../savings/recall-event.js';
|
|
2
3
|
import { randomUUID } from 'node:crypto';
|
|
3
4
|
import { mkdirSync } from 'node:fs';
|
|
4
5
|
import { dirname } from 'node:path';
|
|
@@ -216,44 +217,44 @@ export class LocalProvider {
|
|
|
216
217
|
preload();
|
|
217
218
|
}
|
|
218
219
|
migrate() {
|
|
219
|
-
this.db.exec(`
|
|
220
|
-
CREATE TABLE IF NOT EXISTS memories (
|
|
221
|
-
id TEXT PRIMARY KEY,
|
|
222
|
-
namespace TEXT NOT NULL DEFAULT 'default',
|
|
223
|
-
content TEXT NOT NULL,
|
|
224
|
-
tags_json TEXT NOT NULL DEFAULT '[]',
|
|
225
|
-
source TEXT NOT NULL DEFAULT 'manual',
|
|
226
|
-
source_ref TEXT,
|
|
227
|
-
meta_json TEXT,
|
|
228
|
-
created_at INTEGER NOT NULL,
|
|
229
|
-
updated_at INTEGER NOT NULL
|
|
230
|
-
);
|
|
231
|
-
|
|
232
|
-
CREATE INDEX IF NOT EXISTS idx_memories_namespace
|
|
233
|
-
ON memories(namespace);
|
|
234
|
-
CREATE INDEX IF NOT EXISTS idx_memories_created
|
|
235
|
-
ON memories(created_at DESC);
|
|
236
|
-
CREATE INDEX IF NOT EXISTS idx_memories_source
|
|
237
|
-
ON memories(source);
|
|
238
|
-
CREATE INDEX IF NOT EXISTS idx_memories_source_ref
|
|
239
|
-
ON memories(source_ref);
|
|
240
|
-
|
|
241
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts
|
|
242
|
-
USING fts5(content, tags, namespace UNINDEXED, content_id UNINDEXED);
|
|
243
|
-
|
|
244
|
-
-- Keep FTS in sync. We do this manually rather than via triggers so
|
|
245
|
-
-- the FTS row's content column holds raw text (FTS can't reach
|
|
246
|
-
-- inside JSON for tags otherwise).
|
|
220
|
+
this.db.exec(`
|
|
221
|
+
CREATE TABLE IF NOT EXISTS memories (
|
|
222
|
+
id TEXT PRIMARY KEY,
|
|
223
|
+
namespace TEXT NOT NULL DEFAULT 'default',
|
|
224
|
+
content TEXT NOT NULL,
|
|
225
|
+
tags_json TEXT NOT NULL DEFAULT '[]',
|
|
226
|
+
source TEXT NOT NULL DEFAULT 'manual',
|
|
227
|
+
source_ref TEXT,
|
|
228
|
+
meta_json TEXT,
|
|
229
|
+
created_at INTEGER NOT NULL,
|
|
230
|
+
updated_at INTEGER NOT NULL
|
|
231
|
+
);
|
|
232
|
+
|
|
233
|
+
CREATE INDEX IF NOT EXISTS idx_memories_namespace
|
|
234
|
+
ON memories(namespace);
|
|
235
|
+
CREATE INDEX IF NOT EXISTS idx_memories_created
|
|
236
|
+
ON memories(created_at DESC);
|
|
237
|
+
CREATE INDEX IF NOT EXISTS idx_memories_source
|
|
238
|
+
ON memories(source);
|
|
239
|
+
CREATE INDEX IF NOT EXISTS idx_memories_source_ref
|
|
240
|
+
ON memories(source_ref);
|
|
241
|
+
|
|
242
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts
|
|
243
|
+
USING fts5(content, tags, namespace UNINDEXED, content_id UNINDEXED);
|
|
244
|
+
|
|
245
|
+
-- Keep FTS in sync. We do this manually rather than via triggers so
|
|
246
|
+
-- the FTS row's content column holds raw text (FTS can't reach
|
|
247
|
+
-- inside JSON for tags otherwise).
|
|
247
248
|
`);
|
|
248
249
|
if (this.vecAvailable) {
|
|
249
250
|
// vec0 virtual table. Each row carries the memory_id as an auxiliary
|
|
250
251
|
// column so we can JOIN back to memories without managing rowids.
|
|
251
|
-
this.db.exec(`
|
|
252
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS memories_vec
|
|
253
|
-
USING vec0(
|
|
254
|
-
memory_id TEXT PRIMARY KEY,
|
|
255
|
-
embedding float[${EMBEDDING_DIM}]
|
|
256
|
-
);
|
|
252
|
+
this.db.exec(`
|
|
253
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS memories_vec
|
|
254
|
+
USING vec0(
|
|
255
|
+
memory_id TEXT PRIMARY KEY,
|
|
256
|
+
embedding float[${EMBEDDING_DIM}]
|
|
257
|
+
);
|
|
257
258
|
`);
|
|
258
259
|
}
|
|
259
260
|
// ── P2.3 — Entity resolution tables ──────────────────────────────────
|
|
@@ -266,73 +267,78 @@ export class LocalProvider {
|
|
|
266
267
|
// (e.g., "Johnny" → resolved to canonical "John Doe"); `confidence`
|
|
267
268
|
// ranges in [0, 1] from exact match (1.0) down through embedding
|
|
268
269
|
// similarity and LLM tiebreak picks (0.65-0.85).
|
|
269
|
-
this.db.exec(`
|
|
270
|
-
CREATE TABLE IF NOT EXISTS entities (
|
|
271
|
-
id TEXT PRIMARY KEY,
|
|
272
|
-
display_name TEXT NOT NULL,
|
|
273
|
-
entity_type TEXT NOT NULL,
|
|
274
|
-
aliases_json TEXT NOT NULL DEFAULT '[]',
|
|
275
|
-
mention_count INTEGER NOT NULL DEFAULT 0,
|
|
276
|
-
first_seen_at INTEGER NOT NULL,
|
|
277
|
-
last_seen_at INTEGER NOT NULL
|
|
278
|
-
);
|
|
279
|
-
CREATE INDEX IF NOT EXISTS idx_entities_type
|
|
280
|
-
ON entities(entity_type);
|
|
281
|
-
CREATE INDEX IF NOT EXISTS idx_entities_last_seen
|
|
282
|
-
ON entities(last_seen_at DESC);
|
|
283
|
-
|
|
284
|
-
CREATE TABLE IF NOT EXISTS memory_entities (
|
|
285
|
-
memory_id TEXT NOT NULL,
|
|
286
|
-
entity_id TEXT NOT NULL,
|
|
287
|
-
surface_form TEXT NOT NULL,
|
|
288
|
-
confidence REAL NOT NULL,
|
|
289
|
-
PRIMARY KEY (memory_id, entity_id)
|
|
290
|
-
);
|
|
291
|
-
CREATE INDEX IF NOT EXISTS idx_memory_entities_entity
|
|
292
|
-
ON memory_entities(entity_id);
|
|
293
|
-
|
|
294
|
-
-- P3 — Knowledge-graph edges. Each row is a triple (from, predicate,
|
|
295
|
-
-- to) plus provenance (memory_id) + confidence. P4 forward-looking
|
|
296
|
-
-- columns (valid_from / valid_to) are added now so bi-temporal
|
|
297
|
-
-- queries don't require a schema migration later.
|
|
298
|
-
CREATE TABLE IF NOT EXISTS relations (
|
|
299
|
-
id TEXT PRIMARY KEY,
|
|
300
|
-
from_entity_id TEXT NOT NULL,
|
|
301
|
-
to_entity_id TEXT NOT NULL,
|
|
302
|
-
predicate TEXT NOT NULL,
|
|
303
|
-
memory_id TEXT NOT NULL,
|
|
304
|
-
confidence REAL NOT NULL,
|
|
305
|
-
valid_from INTEGER,
|
|
306
|
-
valid_to INTEGER,
|
|
307
|
-
recorded_at INTEGER NOT NULL
|
|
308
|
-
);
|
|
309
|
-
CREATE INDEX IF NOT EXISTS idx_relations_from
|
|
310
|
-
ON relations(from_entity_id);
|
|
311
|
-
CREATE INDEX IF NOT EXISTS idx_relations_to
|
|
312
|
-
ON relations(to_entity_id);
|
|
313
|
-
CREATE INDEX IF NOT EXISTS idx_relations_predicate
|
|
314
|
-
ON relations(predicate);
|
|
315
|
-
CREATE INDEX IF NOT EXISTS idx_relations_memory
|
|
316
|
-
ON relations(memory_id);
|
|
317
|
-
CREATE INDEX IF NOT EXISTS idx_relations_valid_to
|
|
318
|
-
ON relations(valid_to);
|
|
270
|
+
this.db.exec(`
|
|
271
|
+
CREATE TABLE IF NOT EXISTS entities (
|
|
272
|
+
id TEXT PRIMARY KEY,
|
|
273
|
+
display_name TEXT NOT NULL,
|
|
274
|
+
entity_type TEXT NOT NULL,
|
|
275
|
+
aliases_json TEXT NOT NULL DEFAULT '[]',
|
|
276
|
+
mention_count INTEGER NOT NULL DEFAULT 0,
|
|
277
|
+
first_seen_at INTEGER NOT NULL,
|
|
278
|
+
last_seen_at INTEGER NOT NULL
|
|
279
|
+
);
|
|
280
|
+
CREATE INDEX IF NOT EXISTS idx_entities_type
|
|
281
|
+
ON entities(entity_type);
|
|
282
|
+
CREATE INDEX IF NOT EXISTS idx_entities_last_seen
|
|
283
|
+
ON entities(last_seen_at DESC);
|
|
284
|
+
|
|
285
|
+
CREATE TABLE IF NOT EXISTS memory_entities (
|
|
286
|
+
memory_id TEXT NOT NULL,
|
|
287
|
+
entity_id TEXT NOT NULL,
|
|
288
|
+
surface_form TEXT NOT NULL,
|
|
289
|
+
confidence REAL NOT NULL,
|
|
290
|
+
PRIMARY KEY (memory_id, entity_id)
|
|
291
|
+
);
|
|
292
|
+
CREATE INDEX IF NOT EXISTS idx_memory_entities_entity
|
|
293
|
+
ON memory_entities(entity_id);
|
|
294
|
+
|
|
295
|
+
-- P3 — Knowledge-graph edges. Each row is a triple (from, predicate,
|
|
296
|
+
-- to) plus provenance (memory_id) + confidence. P4 forward-looking
|
|
297
|
+
-- columns (valid_from / valid_to) are added now so bi-temporal
|
|
298
|
+
-- queries don't require a schema migration later.
|
|
299
|
+
CREATE TABLE IF NOT EXISTS relations (
|
|
300
|
+
id TEXT PRIMARY KEY,
|
|
301
|
+
from_entity_id TEXT NOT NULL,
|
|
302
|
+
to_entity_id TEXT NOT NULL,
|
|
303
|
+
predicate TEXT NOT NULL,
|
|
304
|
+
memory_id TEXT NOT NULL,
|
|
305
|
+
confidence REAL NOT NULL,
|
|
306
|
+
valid_from INTEGER,
|
|
307
|
+
valid_to INTEGER,
|
|
308
|
+
recorded_at INTEGER NOT NULL
|
|
309
|
+
);
|
|
310
|
+
CREATE INDEX IF NOT EXISTS idx_relations_from
|
|
311
|
+
ON relations(from_entity_id);
|
|
312
|
+
CREATE INDEX IF NOT EXISTS idx_relations_to
|
|
313
|
+
ON relations(to_entity_id);
|
|
314
|
+
CREATE INDEX IF NOT EXISTS idx_relations_predicate
|
|
315
|
+
ON relations(predicate);
|
|
316
|
+
CREATE INDEX IF NOT EXISTS idx_relations_memory
|
|
317
|
+
ON relations(memory_id);
|
|
318
|
+
CREATE INDEX IF NOT EXISTS idx_relations_valid_to
|
|
319
|
+
ON relations(valid_to);
|
|
319
320
|
`);
|
|
320
321
|
if (this.vecAvailable) {
|
|
321
322
|
// Embedding index for entity name+context strings. Used by the
|
|
322
323
|
// resolver's vector-similarity stage when finding candidate matches
|
|
323
324
|
// for a freshly extracted entity.
|
|
324
|
-
this.db.exec(`
|
|
325
|
-
CREATE VIRTUAL TABLE IF NOT EXISTS entities_vec
|
|
326
|
-
USING vec0(
|
|
327
|
-
entity_id TEXT PRIMARY KEY,
|
|
328
|
-
embedding float[${EMBEDDING_DIM}]
|
|
329
|
-
);
|
|
325
|
+
this.db.exec(`
|
|
326
|
+
CREATE VIRTUAL TABLE IF NOT EXISTS entities_vec
|
|
327
|
+
USING vec0(
|
|
328
|
+
entity_id TEXT PRIMARY KEY,
|
|
329
|
+
embedding float[${EMBEDDING_DIM}]
|
|
330
|
+
);
|
|
330
331
|
`);
|
|
331
332
|
}
|
|
332
333
|
// P5 — Consolidation proposal table (idempotent).
|
|
333
334
|
ensureConsolidationSchema(this.db);
|
|
334
335
|
// Procedural memory table (idempotent). Mem0 leapfrog feature.
|
|
335
336
|
ensureProceduralSchema(this.db);
|
|
337
|
+
// ── Recall savings (v0.6) ────────────────────────────────────────
|
|
338
|
+
// Logs every search() for the savings dashboard widget.
|
|
339
|
+
// DDL lives in src/savings/recall-event.ts so the savings module
|
|
340
|
+
// owns its own schema.
|
|
341
|
+
this.db.exec(RECALL_EVENTS_DDL);
|
|
336
342
|
}
|
|
337
343
|
// ─── write path ──────────────────────────────────────────────────────────
|
|
338
344
|
async save(input) {
|
|
@@ -391,17 +397,17 @@ export class LocalProvider {
|
|
|
391
397
|
// Failure here is non-fatal — we just skip the vec insert.
|
|
392
398
|
const vector = this.vecAvailable ? await embed(input.content) : null;
|
|
393
399
|
const tx = this.db.transaction(() => {
|
|
394
|
-
this.db.prepare(`
|
|
395
|
-
INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
|
|
396
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
400
|
+
this.db.prepare(`
|
|
401
|
+
INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
|
|
402
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
397
403
|
`).run(id, ns, input.content, JSON.stringify(tags), input.source ?? 'manual', input.source_ref ?? null, input.metadata ? JSON.stringify(input.metadata) : null, now, now);
|
|
398
|
-
this.db.prepare(`
|
|
399
|
-
INSERT INTO memories_fts (content, tags, namespace, content_id)
|
|
400
|
-
VALUES (?, ?, ?, ?)
|
|
404
|
+
this.db.prepare(`
|
|
405
|
+
INSERT INTO memories_fts (content, tags, namespace, content_id)
|
|
406
|
+
VALUES (?, ?, ?, ?)
|
|
401
407
|
`).run(input.content, tags.join(' '), ns, id);
|
|
402
408
|
if (vector && this.vecAvailable) {
|
|
403
|
-
this.db.prepare(`
|
|
404
|
-
INSERT INTO memories_vec (memory_id, embedding) VALUES (?, ?)
|
|
409
|
+
this.db.prepare(`
|
|
410
|
+
INSERT INTO memories_vec (memory_id, embedding) VALUES (?, ?)
|
|
405
411
|
`).run(id, Buffer.from(vector.buffer));
|
|
406
412
|
}
|
|
407
413
|
});
|
|
@@ -457,9 +463,9 @@ export class LocalProvider {
|
|
|
457
463
|
? meta.byok_anthropic_key : undefined;
|
|
458
464
|
const relations = await extractRelations(input.content, resolvedForRelations, { anthropicKey: byokAnthropic });
|
|
459
465
|
if (relations.length > 0) {
|
|
460
|
-
const insertRel = this.db.prepare(`INSERT INTO relations
|
|
461
|
-
(id, from_entity_id, to_entity_id, predicate, memory_id,
|
|
462
|
-
confidence, valid_from, valid_to, recorded_at)
|
|
466
|
+
const insertRel = this.db.prepare(`INSERT INTO relations
|
|
467
|
+
(id, from_entity_id, to_entity_id, predicate, memory_id,
|
|
468
|
+
confidence, valid_from, valid_to, recorded_at)
|
|
463
469
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`);
|
|
464
470
|
const tx2 = this.db.transaction(() => {
|
|
465
471
|
for (const r of relations) {
|
|
@@ -528,13 +534,13 @@ export class LocalProvider {
|
|
|
528
534
|
async bulkSaveOne(inputs) {
|
|
529
535
|
const vectors = this.vecAvailable ? await embedBatch(inputs.map(i => i.content)) : inputs.map(() => null);
|
|
530
536
|
const out = [];
|
|
531
|
-
const insertMem = this.db.prepare(`
|
|
532
|
-
INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
|
|
533
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
537
|
+
const insertMem = this.db.prepare(`
|
|
538
|
+
INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
|
|
539
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
534
540
|
`);
|
|
535
|
-
const insertFts = this.db.prepare(`
|
|
536
|
-
INSERT INTO memories_fts (content, tags, namespace, content_id)
|
|
537
|
-
VALUES (?, ?, ?, ?)
|
|
541
|
+
const insertFts = this.db.prepare(`
|
|
542
|
+
INSERT INTO memories_fts (content, tags, namespace, content_id)
|
|
543
|
+
VALUES (?, ?, ?, ?)
|
|
538
544
|
`);
|
|
539
545
|
const insertVec = this.vecAvailable
|
|
540
546
|
? this.db.prepare(`INSERT INTO memories_vec (memory_id, embedding) VALUES (?, ?)`)
|
|
@@ -567,7 +573,7 @@ export class LocalProvider {
|
|
|
567
573
|
return out;
|
|
568
574
|
}
|
|
569
575
|
async bulkSave(inputs) {
|
|
570
|
-
let saved = 0, errors = 0;
|
|
576
|
+
let saved = 0, errors = 0, skipped = 0;
|
|
571
577
|
// 1. Redact secrets up front, same as save().
|
|
572
578
|
const redactedInputs = inputs.map(preSaveTransform);
|
|
573
579
|
// 2. Expand long inputs into per-chunk memories before we save. A backfill
|
|
@@ -603,46 +609,102 @@ export class LocalProvider {
|
|
|
603
609
|
}
|
|
604
610
|
expanded.push(input);
|
|
605
611
|
}
|
|
606
|
-
//
|
|
607
|
-
//
|
|
608
|
-
//
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
612
|
+
// Persist in per-source_ref groups so progress is durable and re-runs are
|
|
613
|
+
// idempotent. The previous implementation embedded the ENTIRE expanded set
|
|
614
|
+
// in one forward pass and committed a single transaction at the very end.
|
|
615
|
+
// For large imports (e.g. many big Cowork transcripts → thousands of
|
|
616
|
+
// chunks) that one embedding pass exceeded the caller's request timeout,
|
|
617
|
+
// and because the only DB write was the final transaction, a timed-out
|
|
618
|
+
// call committed nothing — so every retry restarted from zero and could
|
|
619
|
+
// never make progress. We now (a) embed in bounded sub-batches, (b) commit
|
|
620
|
+
// each source group in its own transaction, and (c) skip groups whose
|
|
621
|
+
// source_ref is already present (the upsert-by-source_ref the import path
|
|
622
|
+
// always advertised but never actually performed here).
|
|
623
|
+
const insertMem = this.db.prepare(`
|
|
624
|
+
INSERT INTO memories (id, namespace, content, tags_json, source, source_ref, meta_json, created_at, updated_at)
|
|
625
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
615
626
|
`);
|
|
616
|
-
const insertFts = this.db.prepare(`
|
|
617
|
-
INSERT INTO memories_fts (content, tags, namespace, content_id)
|
|
618
|
-
VALUES (?, ?, ?, ?)
|
|
627
|
+
const insertFts = this.db.prepare(`
|
|
628
|
+
INSERT INTO memories_fts (content, tags, namespace, content_id)
|
|
629
|
+
VALUES (?, ?, ?, ?)
|
|
619
630
|
`);
|
|
620
631
|
const insertVec = this.vecAvailable
|
|
621
632
|
? this.db.prepare(`INSERT INTO memories_vec (memory_id, embedding) VALUES (?, ?)`)
|
|
622
633
|
: null;
|
|
623
|
-
const
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
|
|
634
|
+
const existsByRef = this.db.prepare(`SELECT 1 FROM memories WHERE source_ref = ? LIMIT 1`);
|
|
635
|
+
// Group expanded chunks by source_ref. All chunks of one source (e.g. a
|
|
636
|
+
// single Cowork session) share parent_ref == source_ref, so a group maps
|
|
637
|
+
// 1:1 to an importable unit that we can dedup and commit atomically.
|
|
638
|
+
const groups = new Map();
|
|
639
|
+
const ungrouped = [];
|
|
640
|
+
for (const item of expanded) {
|
|
641
|
+
const ref = item.source_ref ?? null;
|
|
642
|
+
if (ref) {
|
|
643
|
+
const g = groups.get(ref);
|
|
644
|
+
if (g)
|
|
645
|
+
g.push(item);
|
|
646
|
+
else
|
|
647
|
+
groups.set(ref, [item]);
|
|
648
|
+
}
|
|
649
|
+
else {
|
|
650
|
+
ungrouped.push(item);
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
// Keep each embedding forward-pass small and bounded regardless of how
|
|
654
|
+
// large a single source is.
|
|
655
|
+
const EMBED_BATCH = 32;
|
|
656
|
+
const persistGroup = async (items, dedupRef) => {
|
|
657
|
+
// Idempotency: a present source_ref means this source already imported.
|
|
658
|
+
// Groups commit atomically, so there are never partial sources to repair.
|
|
659
|
+
if (dedupRef && existsByRef.get(dedupRef)) {
|
|
660
|
+
skipped += items.length;
|
|
661
|
+
return;
|
|
662
|
+
}
|
|
663
|
+
const vectors = [];
|
|
664
|
+
if (this.vecAvailable) {
|
|
665
|
+
for (let i = 0; i < items.length; i += EMBED_BATCH) {
|
|
666
|
+
const slice = items.slice(i, i + EMBED_BATCH);
|
|
667
|
+
const vs = await embedBatch(slice.map(s => s.content));
|
|
668
|
+
for (const v of vs)
|
|
669
|
+
vectors.push(v);
|
|
641
670
|
}
|
|
642
671
|
}
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
672
|
+
else {
|
|
673
|
+
for (let i = 0; i < items.length; i++)
|
|
674
|
+
vectors.push(null);
|
|
675
|
+
}
|
|
676
|
+
const tx = this.db.transaction((rows) => {
|
|
677
|
+
for (let i = 0; i < rows.length; i++) {
|
|
678
|
+
const input = rows[i];
|
|
679
|
+
try {
|
|
680
|
+
const id = randomUUID();
|
|
681
|
+
const now = Date.now();
|
|
682
|
+
const ns = input.namespace ?? 'default';
|
|
683
|
+
const tags = input.tags ?? [];
|
|
684
|
+
insertMem.run(id, ns, input.content, JSON.stringify(tags), input.source ?? 'manual', input.source_ref ?? null, input.metadata ? JSON.stringify(input.metadata) : null, now, now);
|
|
685
|
+
insertFts.run(input.content, tags.join(' '), ns, id);
|
|
686
|
+
const vec = vectors[i];
|
|
687
|
+
if (vec && insertVec) {
|
|
688
|
+
insertVec.run(id, Buffer.from(vec.buffer));
|
|
689
|
+
}
|
|
690
|
+
saved++;
|
|
691
|
+
}
|
|
692
|
+
catch (e) {
|
|
693
|
+
errors++;
|
|
694
|
+
}
|
|
695
|
+
}
|
|
696
|
+
});
|
|
697
|
+
tx(items);
|
|
698
|
+
};
|
|
699
|
+
for (const [ref, items] of groups) {
|
|
700
|
+
await persistGroup(items, ref);
|
|
701
|
+
}
|
|
702
|
+
// Items without a source_ref can't be deduped; still persist them in
|
|
703
|
+
// bounded batches so one giant pass can't blow the timeout.
|
|
704
|
+
for (let i = 0; i < ungrouped.length; i += EMBED_BATCH) {
|
|
705
|
+
await persistGroup(ungrouped.slice(i, i + EMBED_BATCH), null);
|
|
706
|
+
}
|
|
707
|
+
return { saved, errors, skipped };
|
|
646
708
|
}
|
|
647
709
|
// ─── read path: hybrid keyword + vector with RRF ─────────────────────────
|
|
648
710
|
async search(input) {
|
|
@@ -653,12 +715,12 @@ export class LocalProvider {
|
|
|
653
715
|
const ftsRanks = new Map(); // id → 1-based rank
|
|
654
716
|
if (safeQuery) {
|
|
655
717
|
const filter = buildFilterFragment(input, 'm');
|
|
656
|
-
let sql = `
|
|
657
|
-
SELECT m.id
|
|
658
|
-
FROM memories_fts f
|
|
659
|
-
JOIN memories m ON m.id = f.content_id
|
|
660
|
-
WHERE memories_fts MATCH ?
|
|
661
|
-
AND ${filter.sql}
|
|
718
|
+
let sql = `
|
|
719
|
+
SELECT m.id
|
|
720
|
+
FROM memories_fts f
|
|
721
|
+
JOIN memories m ON m.id = f.content_id
|
|
722
|
+
WHERE memories_fts MATCH ?
|
|
723
|
+
AND ${filter.sql}
|
|
662
724
|
`;
|
|
663
725
|
const params = [safeQuery, ...filter.params];
|
|
664
726
|
sql += ` ORDER BY bm25(memories_fts) LIMIT ?`;
|
|
@@ -677,12 +739,12 @@ export class LocalProvider {
|
|
|
677
739
|
const qvec = await embed(input.query);
|
|
678
740
|
if (qvec) {
|
|
679
741
|
try {
|
|
680
|
-
const rows = this.db.prepare(`
|
|
681
|
-
SELECT memory_id AS id, distance
|
|
682
|
-
FROM memories_vec
|
|
683
|
-
WHERE embedding MATCH ?
|
|
684
|
-
AND k = ?
|
|
685
|
-
ORDER BY distance
|
|
742
|
+
const rows = this.db.prepare(`
|
|
743
|
+
SELECT memory_id AS id, distance
|
|
744
|
+
FROM memories_vec
|
|
745
|
+
WHERE embedding MATCH ?
|
|
746
|
+
AND k = ?
|
|
747
|
+
ORDER BY distance
|
|
686
748
|
`).all(Buffer.from(qvec.buffer), candidateLimit);
|
|
687
749
|
let candidates = rows.map(r => r.id);
|
|
688
750
|
// Namespace filter (after the KNN — sqlite-vec doesn't let us
|
|
@@ -727,8 +789,59 @@ export class LocalProvider {
|
|
|
727
789
|
const wanted = new Set(input.tags);
|
|
728
790
|
memories = memories.filter(m => m.tags.some(t => wanted.has(t)));
|
|
729
791
|
}
|
|
792
|
+
// Recall-event capture moved to the MCP-server tool handler
|
|
793
|
+
// (src/index.ts) so it's provider-agnostic — fires for both local and
|
|
794
|
+
// hosted modes. recordRecallEvent() below is kept for any direct
|
|
795
|
+
// LocalProvider callers (e.g. the benchmark adapter) that still want
|
|
796
|
+
// the inline capture, but it's no longer invoked from search().
|
|
730
797
|
return memories;
|
|
731
798
|
}
|
|
799
|
+
/**
|
|
800
|
+
* Logs one row to recall_events for the savings dashboard. Sums every
|
|
801
|
+
* memory in the namespace once to get the baseline 'what would I have
|
|
802
|
+
* had to send' figure (cheap — LENGTH() over content text). Fail-open.
|
|
803
|
+
*/
|
|
804
|
+
recordRecallEvent(input, returned) {
|
|
805
|
+
try {
|
|
806
|
+
const tokens_returned = returned.reduce((sum, m) => sum + approximateTokens(m.content), 0);
|
|
807
|
+
const ns = input.namespace ?? null;
|
|
808
|
+
let baseline_chars = 0;
|
|
809
|
+
if (ns) {
|
|
810
|
+
const row = this.db
|
|
811
|
+
.prepare(`SELECT COALESCE(SUM(LENGTH(content)), 0) AS chars
|
|
812
|
+
FROM memories
|
|
813
|
+
WHERE namespace = ?`)
|
|
814
|
+
.get(ns);
|
|
815
|
+
baseline_chars = row?.chars ?? 0;
|
|
816
|
+
}
|
|
817
|
+
else {
|
|
818
|
+
const row = this.db
|
|
819
|
+
.prepare(`SELECT COALESCE(SUM(LENGTH(content)), 0) AS chars FROM memories`)
|
|
820
|
+
.get();
|
|
821
|
+
baseline_chars = row?.chars ?? 0;
|
|
822
|
+
}
|
|
823
|
+
const tokens_baseline_namespace = Math.ceil(baseline_chars / 4);
|
|
824
|
+
const ev = buildRecallEvent({
|
|
825
|
+
namespace: ns,
|
|
826
|
+
query: input.query,
|
|
827
|
+
tokens_returned,
|
|
828
|
+
tokens_baseline_namespace,
|
|
829
|
+
model_id: input.model_id ?? null,
|
|
830
|
+
client: input.client ?? null,
|
|
831
|
+
});
|
|
832
|
+
this.db
|
|
833
|
+
.prepare(`INSERT INTO recall_events
|
|
834
|
+
(id, created_at, namespace, query_hash, tokens_returned,
|
|
835
|
+
tokens_baseline_namespace, tokens_baseline_capped, model_id,
|
|
836
|
+
context_limit, client)
|
|
837
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`)
|
|
838
|
+
.run(ev.id, ev.created_at, ev.namespace, ev.query_hash, ev.tokens_returned, ev.tokens_baseline_namespace, ev.tokens_baseline_capped, ev.model_id, ev.context_limit, ev.client);
|
|
839
|
+
}
|
|
840
|
+
catch (e) {
|
|
841
|
+
// Fail-open by design — never let recall observability break recall.
|
|
842
|
+
console.warn('[mnueron/savings] recall-event capture failed:', e instanceof Error ? e.message : e);
|
|
843
|
+
}
|
|
844
|
+
}
|
|
732
845
|
async list(input) {
|
|
733
846
|
// v0.2.1 + v0.2.4: full filter support via shared helper.
|
|
734
847
|
// Note: 'm' alias is omitted here because list() doesn't join other
|
|
@@ -822,12 +935,12 @@ export class LocalProvider {
|
|
|
822
935
|
const nextTags = patch.tags
|
|
823
936
|
?? JSON.parse((existing.tags_json ?? existing.tags) ?? '[]');
|
|
824
937
|
const now = Date.now();
|
|
825
|
-
this.db.prepare(`UPDATE memories
|
|
826
|
-
SET content = ?,
|
|
827
|
-
namespace = ?,
|
|
828
|
-
tags_json = ?,
|
|
829
|
-
meta_json = ?,
|
|
830
|
-
updated_at = ?
|
|
938
|
+
this.db.prepare(`UPDATE memories
|
|
939
|
+
SET content = ?,
|
|
940
|
+
namespace = ?,
|
|
941
|
+
tags_json = ?,
|
|
942
|
+
meta_json = ?,
|
|
943
|
+
updated_at = ?
|
|
831
944
|
WHERE id = ?`).run(nextContent, nextNs, JSON.stringify(nextTags), JSON.stringify(merged), now, id);
|
|
832
945
|
// If content changed, re-index FTS + (optionally) re-embed.
|
|
833
946
|
if (contentChanged) {
|
|
@@ -865,13 +978,13 @@ export class LocalProvider {
|
|
|
865
978
|
return tx();
|
|
866
979
|
}
|
|
867
980
|
async namespaces() {
|
|
868
|
-
const rows = this.db.prepare(`
|
|
869
|
-
SELECT namespace AS name,
|
|
870
|
-
COUNT(*) AS count,
|
|
871
|
-
MAX(updated_at) AS last_updated
|
|
872
|
-
FROM memories
|
|
873
|
-
GROUP BY namespace
|
|
874
|
-
ORDER BY last_updated DESC
|
|
981
|
+
const rows = this.db.prepare(`
|
|
982
|
+
SELECT namespace AS name,
|
|
983
|
+
COUNT(*) AS count,
|
|
984
|
+
MAX(updated_at) AS last_updated
|
|
985
|
+
FROM memories
|
|
986
|
+
GROUP BY namespace
|
|
987
|
+
ORDER BY last_updated DESC
|
|
875
988
|
`).all();
|
|
876
989
|
return rows.map(r => ({
|
|
877
990
|
name: r.name,
|
|
@@ -895,12 +1008,12 @@ export class LocalProvider {
|
|
|
895
1008
|
}
|
|
896
1009
|
if (input.q && input.q.trim()) {
|
|
897
1010
|
// Match display_name OR any alias (case-insensitive substring).
|
|
898
|
-
parts.push(`(
|
|
899
|
-
lower(display_name) LIKE lower('%' || ? || '%')
|
|
900
|
-
OR EXISTS (
|
|
901
|
-
SELECT 1 FROM json_each(aliases_json) AS a
|
|
902
|
-
WHERE lower(a.value) LIKE lower('%' || ? || '%')
|
|
903
|
-
)
|
|
1011
|
+
parts.push(`(
|
|
1012
|
+
lower(display_name) LIKE lower('%' || ? || '%')
|
|
1013
|
+
OR EXISTS (
|
|
1014
|
+
SELECT 1 FROM json_each(aliases_json) AS a
|
|
1015
|
+
WHERE lower(a.value) LIKE lower('%' || ? || '%')
|
|
1016
|
+
)
|
|
904
1017
|
)`);
|
|
905
1018
|
params.push(input.q.trim(), input.q.trim());
|
|
906
1019
|
}
|
|
@@ -932,13 +1045,13 @@ export class LocalProvider {
|
|
|
932
1045
|
async getEntityMemories(id, limit = 100) {
|
|
933
1046
|
const cap = clampLimit(limit, 500);
|
|
934
1047
|
const rows = this.db
|
|
935
|
-
.prepare(`SELECT m.id, m.namespace, m.content, m.tags_json, m.source, m.source_ref,
|
|
936
|
-
m.meta_json, m.created_at, m.updated_at,
|
|
937
|
-
me.surface_form, me.confidence
|
|
938
|
-
FROM memory_entities me
|
|
939
|
-
JOIN memories m ON m.id = me.memory_id
|
|
940
|
-
WHERE me.entity_id = ?
|
|
941
|
-
ORDER BY m.created_at DESC
|
|
1048
|
+
.prepare(`SELECT m.id, m.namespace, m.content, m.tags_json, m.source, m.source_ref,
|
|
1049
|
+
m.meta_json, m.created_at, m.updated_at,
|
|
1050
|
+
me.surface_form, me.confidence
|
|
1051
|
+
FROM memory_entities me
|
|
1052
|
+
JOIN memories m ON m.id = me.memory_id
|
|
1053
|
+
WHERE me.entity_id = ?
|
|
1054
|
+
ORDER BY m.created_at DESC
|
|
942
1055
|
LIMIT ?`)
|
|
943
1056
|
.all(id, cap);
|
|
944
1057
|
return rows.map((r) => ({
|
|
@@ -983,18 +1096,18 @@ export class LocalProvider {
|
|
|
983
1096
|
// Repoint edges. INSERT-OR-IGNORE then DELETE-old, with confidence MAX
|
|
984
1097
|
// fold to preserve the strongest edge if both winner and loser shared
|
|
985
1098
|
// a memory.
|
|
986
|
-
this.db.prepare(`INSERT INTO memory_entities (memory_id, entity_id, surface_form, confidence)
|
|
987
|
-
SELECT memory_id, ?, surface_form, confidence
|
|
988
|
-
FROM memory_entities WHERE entity_id = ?
|
|
989
|
-
ON CONFLICT(memory_id, entity_id) DO UPDATE SET
|
|
1099
|
+
this.db.prepare(`INSERT INTO memory_entities (memory_id, entity_id, surface_form, confidence)
|
|
1100
|
+
SELECT memory_id, ?, surface_form, confidence
|
|
1101
|
+
FROM memory_entities WHERE entity_id = ?
|
|
1102
|
+
ON CONFLICT(memory_id, entity_id) DO UPDATE SET
|
|
990
1103
|
confidence = MAX(memory_entities.confidence, excluded.confidence)`).run(winnerId, loserId);
|
|
991
1104
|
this.db.prepare(`DELETE FROM memory_entities WHERE entity_id = ?`).run(loserId);
|
|
992
1105
|
// Update winner aggregate.
|
|
993
|
-
this.db.prepare(`UPDATE entities SET
|
|
994
|
-
aliases_json = ?,
|
|
995
|
-
mention_count = mention_count + ?,
|
|
996
|
-
first_seen_at = MIN(first_seen_at, ?),
|
|
997
|
-
last_seen_at = MAX(last_seen_at, ?)
|
|
1106
|
+
this.db.prepare(`UPDATE entities SET
|
|
1107
|
+
aliases_json = ?,
|
|
1108
|
+
mention_count = mention_count + ?,
|
|
1109
|
+
first_seen_at = MIN(first_seen_at, ?),
|
|
1110
|
+
last_seen_at = MAX(last_seen_at, ?)
|
|
998
1111
|
WHERE id = ?`).run(JSON.stringify(mergedAliases), loser.mention_count, loser.first_seen_at, loser.last_seen_at, winnerId);
|
|
999
1112
|
// Delete loser everywhere.
|
|
1000
1113
|
if (this.vecAvailable) {
|
|
@@ -1056,11 +1169,11 @@ export class LocalProvider {
|
|
|
1056
1169
|
}
|
|
1057
1170
|
const limit = clampLimit(input.limit ?? 200, 1000);
|
|
1058
1171
|
const rows = this.db
|
|
1059
|
-
.prepare(`SELECT id, from_entity_id, to_entity_id, predicate, memory_id,
|
|
1060
|
-
confidence, valid_from, valid_to, recorded_at
|
|
1061
|
-
FROM relations
|
|
1062
|
-
WHERE ${parts.join(' AND ')}
|
|
1063
|
-
ORDER BY recorded_at DESC
|
|
1172
|
+
.prepare(`SELECT id, from_entity_id, to_entity_id, predicate, memory_id,
|
|
1173
|
+
confidence, valid_from, valid_to, recorded_at
|
|
1174
|
+
FROM relations
|
|
1175
|
+
WHERE ${parts.join(' AND ')}
|
|
1176
|
+
ORDER BY recorded_at DESC
|
|
1064
1177
|
LIMIT ?`)
|
|
1065
1178
|
.all(...params, limit);
|
|
1066
1179
|
return rows;
|
|
@@ -1168,10 +1281,10 @@ export class LocalProvider {
|
|
|
1168
1281
|
countMissingEmbeddings() {
|
|
1169
1282
|
if (!this.vecAvailable)
|
|
1170
1283
|
return 0;
|
|
1171
|
-
const r = this.db.prepare(`
|
|
1172
|
-
SELECT COUNT(*) AS c
|
|
1173
|
-
FROM memories
|
|
1174
|
-
WHERE id NOT IN (SELECT memory_id FROM memories_vec)
|
|
1284
|
+
const r = this.db.prepare(`
|
|
1285
|
+
SELECT COUNT(*) AS c
|
|
1286
|
+
FROM memories
|
|
1287
|
+
WHERE id NOT IN (SELECT memory_id FROM memories_vec)
|
|
1175
1288
|
`).get();
|
|
1176
1289
|
return r?.c ?? 0;
|
|
1177
1290
|
}
|
|
@@ -1183,18 +1296,18 @@ export class LocalProvider {
|
|
|
1183
1296
|
async rebuildEmbeddings(onProgress) {
|
|
1184
1297
|
if (!this.vecAvailable)
|
|
1185
1298
|
return { updated: 0, skipped: 0, errors: 0 };
|
|
1186
|
-
const rows = this.db.prepare(`
|
|
1187
|
-
SELECT id, content
|
|
1188
|
-
FROM memories
|
|
1189
|
-
WHERE id NOT IN (SELECT memory_id FROM memories_vec)
|
|
1190
|
-
ORDER BY created_at ASC
|
|
1299
|
+
const rows = this.db.prepare(`
|
|
1300
|
+
SELECT id, content
|
|
1301
|
+
FROM memories
|
|
1302
|
+
WHERE id NOT IN (SELECT memory_id FROM memories_vec)
|
|
1303
|
+
ORDER BY created_at ASC
|
|
1191
1304
|
`).all();
|
|
1192
1305
|
const total = rows.length;
|
|
1193
1306
|
let updated = 0, skipped = 0, errors = 0;
|
|
1194
1307
|
// Embed in batches of 16 for throughput without spiking memory.
|
|
1195
1308
|
const BATCH = 16;
|
|
1196
|
-
const insertVec = this.db.prepare(`
|
|
1197
|
-
INSERT OR REPLACE INTO memories_vec (memory_id, embedding) VALUES (?, ?)
|
|
1309
|
+
const insertVec = this.db.prepare(`
|
|
1310
|
+
INSERT OR REPLACE INTO memories_vec (memory_id, embedding) VALUES (?, ?)
|
|
1198
1311
|
`);
|
|
1199
1312
|
for (let i = 0; i < rows.length; i += BATCH) {
|
|
1200
1313
|
const chunk = rows.slice(i, i + BATCH);
|
|
@@ -1258,20 +1371,20 @@ export class LocalProvider {
|
|
|
1258
1371
|
}
|
|
1259
1372
|
// Now fetch every memory whose metadata.parent_ref equals ref.
|
|
1260
1373
|
// JSON field path syntax: json_extract(meta_json, '$.parent_ref')
|
|
1261
|
-
const rows = this.db.prepare(`
|
|
1262
|
-
SELECT *
|
|
1263
|
-
FROM memories
|
|
1264
|
-
WHERE json_extract(meta_json, '$.parent_ref') = ?
|
|
1265
|
-
ORDER BY COALESCE(json_extract(meta_json, '$.chunk_index'), 0) ASC, created_at ASC
|
|
1374
|
+
const rows = this.db.prepare(`
|
|
1375
|
+
SELECT *
|
|
1376
|
+
FROM memories
|
|
1377
|
+
WHERE json_extract(meta_json, '$.parent_ref') = ?
|
|
1378
|
+
ORDER BY COALESCE(json_extract(meta_json, '$.chunk_index'), 0) ASC, created_at ASC
|
|
1266
1379
|
`).all(ref);
|
|
1267
1380
|
// Also try a fallback against source_ref for memories chunked via
|
|
1268
1381
|
// source_ref-as-parent_ref (this is the common case for backfills).
|
|
1269
1382
|
if (rows.length === 0) {
|
|
1270
|
-
const alt = this.db.prepare(`
|
|
1271
|
-
SELECT *
|
|
1272
|
-
FROM memories
|
|
1273
|
-
WHERE source_ref = ?
|
|
1274
|
-
ORDER BY COALESCE(json_extract(meta_json, '$.chunk_index'), 0) ASC, created_at ASC
|
|
1383
|
+
const alt = this.db.prepare(`
|
|
1384
|
+
SELECT *
|
|
1385
|
+
FROM memories
|
|
1386
|
+
WHERE source_ref = ?
|
|
1387
|
+
ORDER BY COALESCE(json_extract(meta_json, '$.chunk_index'), 0) ASC, created_at ASC
|
|
1275
1388
|
`).all(ref);
|
|
1276
1389
|
return alt.map(r => this.rowToMemory(r));
|
|
1277
1390
|
}
|
|
@@ -1292,37 +1405,37 @@ export class LocalProvider {
|
|
|
1292
1405
|
const offset = opts.offset ?? 0;
|
|
1293
1406
|
// We use COALESCE(parent_ref-from-metadata, id) as the bucket key so
|
|
1294
1407
|
// standalone (non-chunked) memories show up as single-row threads too.
|
|
1295
|
-
const sql = `
|
|
1296
|
-
WITH grouped AS (
|
|
1297
|
-
SELECT
|
|
1298
|
-
COALESCE(json_extract(meta_json, '$.parent_ref'), id) AS pref,
|
|
1299
|
-
namespace,
|
|
1300
|
-
COUNT(*) AS cnt,
|
|
1301
|
-
MIN(created_at) AS first_at,
|
|
1302
|
-
MAX(updated_at) AS last_at,
|
|
1303
|
-
SUM(CASE WHEN json_extract(meta_json, '$.chunk_index') IS NOT NULL THEN 1 ELSE 0 END) AS chunked_n
|
|
1304
|
-
FROM memories
|
|
1305
|
-
${opts.namespace ? 'WHERE namespace = ?' : ''}
|
|
1306
|
-
GROUP BY pref, namespace
|
|
1307
|
-
)
|
|
1308
|
-
SELECT
|
|
1309
|
-
g.pref AS parent_ref,
|
|
1310
|
-
g.namespace,
|
|
1311
|
-
g.cnt AS count,
|
|
1312
|
-
g.first_at,
|
|
1313
|
-
g.last_at,
|
|
1314
|
-
g.chunked_n > 0 AS has_chunks,
|
|
1315
|
-
(
|
|
1316
|
-
SELECT m.content
|
|
1317
|
-
FROM memories m
|
|
1318
|
-
WHERE COALESCE(json_extract(m.meta_json, '$.parent_ref'), m.id) = g.pref
|
|
1319
|
-
AND m.namespace = g.namespace
|
|
1320
|
-
ORDER BY COALESCE(json_extract(m.meta_json, '$.chunk_index'), 0) ASC, m.created_at ASC
|
|
1321
|
-
LIMIT 1
|
|
1322
|
-
) AS title_source
|
|
1323
|
-
FROM grouped g
|
|
1324
|
-
ORDER BY g.last_at DESC
|
|
1325
|
-
LIMIT ? OFFSET ?
|
|
1408
|
+
const sql = `
|
|
1409
|
+
WITH grouped AS (
|
|
1410
|
+
SELECT
|
|
1411
|
+
COALESCE(json_extract(meta_json, '$.parent_ref'), id) AS pref,
|
|
1412
|
+
namespace,
|
|
1413
|
+
COUNT(*) AS cnt,
|
|
1414
|
+
MIN(created_at) AS first_at,
|
|
1415
|
+
MAX(updated_at) AS last_at,
|
|
1416
|
+
SUM(CASE WHEN json_extract(meta_json, '$.chunk_index') IS NOT NULL THEN 1 ELSE 0 END) AS chunked_n
|
|
1417
|
+
FROM memories
|
|
1418
|
+
${opts.namespace ? 'WHERE namespace = ?' : ''}
|
|
1419
|
+
GROUP BY pref, namespace
|
|
1420
|
+
)
|
|
1421
|
+
SELECT
|
|
1422
|
+
g.pref AS parent_ref,
|
|
1423
|
+
g.namespace,
|
|
1424
|
+
g.cnt AS count,
|
|
1425
|
+
g.first_at,
|
|
1426
|
+
g.last_at,
|
|
1427
|
+
g.chunked_n > 0 AS has_chunks,
|
|
1428
|
+
(
|
|
1429
|
+
SELECT m.content
|
|
1430
|
+
FROM memories m
|
|
1431
|
+
WHERE COALESCE(json_extract(m.meta_json, '$.parent_ref'), m.id) = g.pref
|
|
1432
|
+
AND m.namespace = g.namespace
|
|
1433
|
+
ORDER BY COALESCE(json_extract(m.meta_json, '$.chunk_index'), 0) ASC, m.created_at ASC
|
|
1434
|
+
LIMIT 1
|
|
1435
|
+
) AS title_source
|
|
1436
|
+
FROM grouped g
|
|
1437
|
+
ORDER BY g.last_at DESC
|
|
1438
|
+
LIMIT ? OFFSET ?
|
|
1326
1439
|
`;
|
|
1327
1440
|
const params = opts.namespace ? [opts.namespace, limit, offset] : [limit, offset];
|
|
1328
1441
|
const rows = this.db.prepare(sql).all(...params);
|
|
@@ -1341,15 +1454,15 @@ export class LocalProvider {
|
|
|
1341
1454
|
* predate chunking. Used by `mnueron rechunk` to backfill the new shape.
|
|
1342
1455
|
*/
|
|
1343
1456
|
findOversizedMemories(threshold = DEFAULT_CHUNK_THRESHOLD) {
|
|
1344
|
-
return this.db.prepare(`
|
|
1345
|
-
SELECT id, content, namespace, tags_json, source, source_ref, meta_json, created_at
|
|
1346
|
-
FROM memories
|
|
1347
|
-
WHERE LENGTH(content) > ?
|
|
1348
|
-
AND (
|
|
1349
|
-
meta_json IS NULL
|
|
1350
|
-
OR json_extract(meta_json, '$.chunk_index') IS NULL
|
|
1351
|
-
)
|
|
1352
|
-
ORDER BY LENGTH(content) DESC
|
|
1457
|
+
return this.db.prepare(`
|
|
1458
|
+
SELECT id, content, namespace, tags_json, source, source_ref, meta_json, created_at
|
|
1459
|
+
FROM memories
|
|
1460
|
+
WHERE LENGTH(content) > ?
|
|
1461
|
+
AND (
|
|
1462
|
+
meta_json IS NULL
|
|
1463
|
+
OR json_extract(meta_json, '$.chunk_index') IS NULL
|
|
1464
|
+
)
|
|
1465
|
+
ORDER BY LENGTH(content) DESC
|
|
1353
1466
|
`).all(threshold);
|
|
1354
1467
|
}
|
|
1355
1468
|
rowToMemory(row, score) {
|