clementine-agent 1.2.2 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. package/dist/agent/assistant.js +12 -0
  2. package/dist/cli/dashboard.js +3034 -734
  3. package/dist/cli/static/LICENSE-NOTICES.md +12 -0
  4. package/dist/cli/static/drawflow.min.css +1 -0
  5. package/dist/cli/static/drawflow.min.js +1 -0
  6. package/dist/config.d.ts +11 -0
  7. package/dist/config.js +16 -0
  8. package/dist/dashboard/builder/dry-run.d.ts +31 -0
  9. package/dist/dashboard/builder/dry-run.js +138 -0
  10. package/dist/dashboard/builder/events.d.ts +23 -0
  11. package/dist/dashboard/builder/events.js +28 -0
  12. package/dist/dashboard/builder/mcp-invoke.d.ts +25 -0
  13. package/dist/dashboard/builder/mcp-invoke.js +143 -0
  14. package/dist/dashboard/builder/runner.d.ts +68 -0
  15. package/dist/dashboard/builder/runner.js +418 -0
  16. package/dist/dashboard/builder/serializer.d.ts +79 -0
  17. package/dist/dashboard/builder/serializer.js +547 -0
  18. package/dist/dashboard/builder/snapshots.d.ts +32 -0
  19. package/dist/dashboard/builder/snapshots.js +138 -0
  20. package/dist/dashboard/builder/validation.d.ts +26 -0
  21. package/dist/dashboard/builder/validation.js +183 -0
  22. package/dist/gateway/router.js +31 -2
  23. package/dist/index.js +38 -0
  24. package/dist/memory/chunker.js +13 -2
  25. package/dist/memory/hot-cache.d.ts +38 -0
  26. package/dist/memory/hot-cache.js +73 -0
  27. package/dist/memory/integrity.d.ts +28 -0
  28. package/dist/memory/integrity.js +119 -0
  29. package/dist/memory/maintenance.d.ts +23 -2
  30. package/dist/memory/maintenance.js +140 -3
  31. package/dist/memory/store.d.ts +259 -2
  32. package/dist/memory/store.js +751 -21
  33. package/dist/memory/write-queue.d.ts +96 -0
  34. package/dist/memory/write-queue.js +165 -0
  35. package/dist/tools/builder-tools.d.ts +13 -0
  36. package/dist/tools/builder-tools.js +437 -0
  37. package/dist/tools/mcp-server.js +2 -0
  38. package/dist/tools/memory-tools.js +38 -1
  39. package/dist/types.d.ts +56 -2
  40. package/package.json +2 -2
  41. package/vault/00-System/skills/builder-canvas.md +126 -0
@@ -19,6 +19,8 @@ import * as embeddingsModule from './embeddings.js';
19
19
  import { chunkFile } from './chunker.js';
20
20
  import { mmrRerank } from './mmr.js';
21
21
  import { deduplicateResults } from './search.js';
22
+ import { HotCache } from './hot-cache.js';
23
+ import { WriteQueue } from './write-queue.js';
22
24
  const WIKILINK_RE = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
23
25
  export class MemoryStore {
24
26
  dbPath;
@@ -28,6 +30,14 @@ export class MemoryStore {
28
30
  _stmtChunkCount = null;
29
31
  _stmtInsertTranscript = null;
30
32
  _stmtInsertUsage = null;
33
+ // In-process LRU for chunk-row reads. Hit on every retrieval that calls
34
+ // getChunksByIds; invalidated on softDeleteChunk, restoreChunk, setPinned,
35
+ // updateFile, and bulk fullSync. Capacity tunable; default 1000 chunks.
36
+ chunkRowCache = new HotCache(1000);
37
+ // Async write-behind queue for non-critical writes (transcripts, recall
38
+ // traces, outcomes, access log). Null = sync mode (default; tests rely on
39
+ // immediate persistence). Enabled via enableWriteQueue() at daemon boot.
40
+ writeQueue = null;
31
41
  constructor(dbPath, vaultDir) {
32
42
  this.dbPath = dbPath;
33
43
  this.vaultDir = vaultDir;
@@ -782,6 +792,16 @@ export class MemoryStore {
782
792
  );
783
793
  CREATE UNIQUE INDEX IF NOT EXISTS idx_user_model_slot_agent
784
794
  ON user_model_blocks(slot, COALESCE(agent_slug, ''));
795
+ `);
796
+ // Persistent key/value scratch for the memory janitor — last vacuum
797
+ // timestamp, last janitor run, etc. Survives daemon restarts so we
798
+ // don't VACUUM on every boot.
799
+ this.conn.exec(`
800
+ CREATE TABLE IF NOT EXISTS maintenance_meta (
801
+ key TEXT PRIMARY KEY,
802
+ value TEXT NOT NULL,
803
+ updated_at TEXT NOT NULL DEFAULT (datetime('now'))
804
+ );
785
805
  `);
786
806
  }
787
807
  // ── Skill usage telemetry ─────────────────────────────────────────
@@ -866,6 +886,7 @@ export class MemoryStore {
866
886
  }
867
887
  /** Toggle the manual pin flag on a chunk. Pinned chunks get a 2x score boost in recall. */
868
888
  setPinned(chunkId, pinned) {
889
+ this.chunkRowCache.delete(chunkId);
869
890
  try {
870
891
  const result = this.conn.prepare('UPDATE chunks SET pinned = ? WHERE id = ?')
871
892
  .run(pinned ? 1 : 0, chunkId);
@@ -966,7 +987,10 @@ export class MemoryStore {
966
987
  VALUES ('delete', ?, ?, ?, ?)`).run(row.id, row.source_file, row.section, row.content);
967
988
  return true;
968
989
  });
969
- return tx(chunkId);
990
+ const ok = tx(chunkId);
991
+ if (ok)
992
+ this.chunkRowCache.delete(chunkId);
993
+ return ok;
970
994
  }
971
995
  /** Restore a soft-deleted chunk. Removes the chunk_soft_deletes row and
972
996
  * re-inserts the content into the FTS index. */
@@ -984,7 +1008,10 @@ export class MemoryStore {
984
1008
  VALUES (?, ?, ?, ?)`).run(row.id, row.source_file, row.section, row.content);
985
1009
  return true;
986
1010
  });
987
- return tx(chunkId);
1011
+ const ok = tx(chunkId);
1012
+ if (ok)
1013
+ this.chunkRowCache.delete(chunkId);
1014
+ return ok;
988
1015
  }
989
1016
  /** Recent edit history for a chunk (newest first). */
990
1017
  getChunkHistory(chunkId, limit = 20) {
@@ -1469,6 +1496,114 @@ export class MemoryStore {
1469
1496
  return rows.map(mapRow);
1470
1497
  }
1471
1498
  // ── Search: Context (Layer 3) ─────────────────────────────────────
1499
+ /**
1500
+ * 1-hop wikilink expansion: for each seed chunk's source_file, find files
1501
+ * that link to it or that it links to, and pull their top chunks. Returns
1502
+ * SearchResult-shaped rows with a fractional boost so they enter the
1503
+ * candidate pool below the seed scores but above pure noise.
1504
+ *
1505
+ * Pattern: 2026-frontier agent memory uses graph expansion (Mem0g, Zep
1506
+ * Graphiti) to surface chunks that share an entity but miss the lexical
1507
+ * match. Wikilinks are the cheapest available edge — Clementine already
1508
+ * extracts them on every vault sync. The richer FalkorDB graph adds
1509
+ * temporal validity and entity types but isn't required for this lift.
1510
+ */
1511
+ expandViaWikilinks(seeds, opts = {}) {
1512
+ const boost = opts.boost ?? 0.7;
1513
+ const limitPerFile = opts.limitPerFile ?? 1;
1514
+ const maxNeighbors = opts.maxNeighbors ?? 10;
1515
+ if (seeds.length === 0)
1516
+ return [];
1517
+ const seedFiles = new Set(seeds.map((s) => s.sourceFile));
1518
+ const seedFilesArr = [...seedFiles];
1519
+ if (seedFilesArr.length === 0)
1520
+ return [];
1521
+ // Wikilinks store the raw bracket text (e.g. "hub" from "[[hub]]"), not
1522
+ // the resolved path "hub.md". Match against both forms when looking up
1523
+ // backlinks so a seed of "hub.md" finds rows where target_file = "hub".
1524
+ const seedBasenames = seedFilesArr.map((f) => path.basename(f, '.md'));
1525
+ const fwdMatchSet = new Set([...seedFilesArr]);
1526
+ const backMatchSet = new Set([...seedFilesArr, ...seedBasenames]);
1527
+ const fwdArr = [...fwdMatchSet];
1528
+ const backArr = [...backMatchSet];
1529
+ const fwdPh = fwdArr.map(() => '?').join(',');
1530
+ const backPh = backArr.map(() => '?').join(',');
1531
+ const neighborFiles = new Set();
1532
+ try {
1533
+ const fwdRows = this.conn
1534
+ .prepare(`SELECT DISTINCT target_file FROM wikilinks WHERE source_file IN (${fwdPh})`)
1535
+ .all(...fwdArr);
1536
+ for (const r of fwdRows) {
1537
+ // target_file may be a basename — try both with and without .md.
1538
+ const candidates = [r.target_file, `${r.target_file}.md`];
1539
+ for (const c of candidates) {
1540
+ if (!seedFiles.has(c))
1541
+ neighborFiles.add(c);
1542
+ }
1543
+ }
1544
+ const backRows = this.conn
1545
+ .prepare(`SELECT DISTINCT source_file FROM wikilinks WHERE target_file IN (${backPh})`)
1546
+ .all(...backArr);
1547
+ for (const r of backRows) {
1548
+ if (!seedFiles.has(r.source_file))
1549
+ neighborFiles.add(r.source_file);
1550
+ }
1551
+ }
1552
+ catch {
1553
+ // wikilinks lookup failure — graph expansion is optional.
1554
+ return [];
1555
+ }
1556
+ if (neighborFiles.size === 0)
1557
+ return [];
1558
+ const neighborFilesArr = [...neighborFiles].slice(0, maxNeighbors * 2);
1559
+ const filePh = neighborFilesArr.map(() => '?').join(',');
1560
+ const args = [...neighborFilesArr];
1561
+ let agentClause = '';
1562
+ if (opts.agentSlug && opts.strict) {
1563
+ agentClause = 'AND (c.agent_slug = ? OR c.agent_slug IS NULL)';
1564
+ args.push(opts.agentSlug);
1565
+ }
1566
+ const chunkRows = this.conn
1567
+ .prepare(`SELECT c.id, c.source_file, c.section, c.content, c.chunk_type,
1568
+ c.salience, c.agent_slug, c.category, c.topic, c.updated_at,
1569
+ c.last_outcome_score, c.pinned
1570
+ FROM chunks c
1571
+ LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
1572
+ WHERE c.source_file IN (${filePh})
1573
+ AND c.chunk_type != 'frontmatter'
1574
+ AND sd.chunk_id IS NULL
1575
+ ${agentClause}
1576
+ ORDER BY c.salience DESC, c.updated_at DESC`)
1577
+ .all(...args);
1578
+ const seedScoreMax = Math.max(...seeds.map((s) => s.score), 1);
1579
+ const perFile = new Map();
1580
+ const out = [];
1581
+ for (const r of chunkRows) {
1582
+ const count = perFile.get(r.source_file) ?? 0;
1583
+ if (count >= limitPerFile)
1584
+ continue;
1585
+ perFile.set(r.source_file, count + 1);
1586
+ out.push({
1587
+ sourceFile: r.source_file,
1588
+ section: r.section,
1589
+ content: r.content,
1590
+ score: seedScoreMax * boost * (1 + (r.salience ?? 0)),
1591
+ chunkType: r.chunk_type,
1592
+ matchType: 'graph',
1593
+ lastUpdated: r.updated_at,
1594
+ chunkId: r.id,
1595
+ salience: r.salience ?? 0,
1596
+ lastOutcomeScore: r.last_outcome_score ?? 0,
1597
+ agentSlug: r.agent_slug ?? undefined,
1598
+ category: r.category,
1599
+ topic: r.topic,
1600
+ pinned: !!r.pinned,
1601
+ });
1602
+ if (out.length >= maxNeighbors)
1603
+ break;
1604
+ }
1605
+ return out;
1606
+ }
1472
1607
  /**
1473
1608
  * Combined FTS5 relevance + recency search for context injection.
1474
1609
  *
@@ -1477,6 +1612,7 @@ export class MemoryStore {
1477
1612
  * 2. Recency fetch -> N most recent chunks
1478
1613
  * 3. Deduplicate by (source_file, section)
1479
1614
  * 4. Apply salience boost to FTS results
1615
+ * 5. Wikilink graph expansion -> 1-hop neighbors of top seeds (boost 0.7×)
1480
1616
  */
1481
1617
  searchContext(query, limitOrOpts = 3, recencyLimitArg = 5) {
1482
1618
  let limit;
@@ -1573,8 +1709,23 @@ export class MemoryStore {
1573
1709
  }
1574
1710
  // 3. Recency
1575
1711
  const recentResults = this.getRecentChunks(recencyLimit, agentSlug, tagFilters, strict);
1576
- // 4. Merge and deduplicate (FTS results first, then vector, then recency)
1577
- const merged = [...ftsResults, ...vectorResults, ...recentResults];
1712
+ // 3b. 1-hop wikilink expansion. Only run when we have seed candidates so
1713
+ // we don't blow up on empty queries; bounded to keep MMR cost flat.
1714
+ let graphResults = [];
1715
+ try {
1716
+ const seeds = [...ftsResults.slice(0, 5), ...vectorResults.slice(0, 5)];
1717
+ if (seeds.length > 0) {
1718
+ graphResults = this.expandViaWikilinks(seeds, {
1719
+ agentSlug,
1720
+ strict,
1721
+ maxNeighbors: 5,
1722
+ limitPerFile: 1,
1723
+ });
1724
+ }
1725
+ }
1726
+ catch { /* graph expansion is optional — never fail the whole retrieval */ }
1727
+ // 4. Merge and deduplicate (FTS first, then vector, then graph, then recency)
1728
+ const merged = [...ftsResults, ...vectorResults, ...graphResults, ...recentResults];
1578
1729
  const finalResults = mmrRerank(deduplicateResults(merged), 0.7, limit + recencyLimit);
1579
1730
  // 5. Log recall trace if session context provided. Skipped for internal
1580
1731
  // calls (e.g. consolidation, dedup checks) by passing skipTrace=true.
@@ -1596,6 +1747,24 @@ export class MemoryStore {
1596
1747
  * Non-fatal: errors are swallowed so retrieval never fails on logging issues.
1597
1748
  */
1598
1749
  logRecallTrace(opts) {
1750
+ if (opts.chunkIds.length === 0)
1751
+ return;
1752
+ if (this.writeQueue) {
1753
+ this.writeQueue.enqueue({
1754
+ kind: 'recall',
1755
+ sessionKey: opts.sessionKey,
1756
+ messageId: opts.messageId ?? null,
1757
+ query: opts.query,
1758
+ chunkIds: [...opts.chunkIds],
1759
+ scores: [...opts.scores],
1760
+ agentSlug: opts.agentSlug ?? null,
1761
+ });
1762
+ return;
1763
+ }
1764
+ this._logRecallTraceSync(opts);
1765
+ }
1766
+ /** Internal sync recall_trace insert. Called by the WriteQueue. */
1767
+ _logRecallTraceSync(opts) {
1599
1768
  if (opts.chunkIds.length === 0)
1600
1769
  return;
1601
1770
  try {
@@ -1661,23 +1830,75 @@ export class MemoryStore {
1661
1830
  getChunksByIds(chunkIds) {
1662
1831
  if (chunkIds.length === 0)
1663
1832
  return [];
1664
- const placeholders = chunkIds.map(() => '?').join(',');
1833
+ // Hot-cache pass — split into hits (return as-is) and misses (need SQL).
1834
+ const out = [];
1835
+ const misses = [];
1836
+ for (const id of chunkIds) {
1837
+ const cached = this.chunkRowCache.get(id);
1838
+ if (cached)
1839
+ out.push(cached);
1840
+ else
1841
+ misses.push(id);
1842
+ }
1843
+ if (misses.length === 0)
1844
+ return out;
1845
+ const placeholders = misses.map(() => '?').join(',');
1665
1846
  const rows = this.conn.prepare(`SELECT id, source_file, section, content, chunk_type, agent_slug,
1666
1847
  pinned, consolidated, derived_from, salience, updated_at
1667
- FROM chunks WHERE id IN (${placeholders})`).all(...chunkIds);
1668
- return rows.map((r) => ({
1669
- id: r.id,
1670
- sourceFile: r.source_file,
1671
- section: r.section,
1672
- content: r.content,
1673
- chunkType: r.chunk_type,
1674
- agentSlug: r.agent_slug,
1675
- pinned: !!r.pinned,
1676
- consolidated: !!r.consolidated,
1677
- derivedFrom: r.derived_from ? this._parseJsonArray(r.derived_from) : null,
1678
- salience: r.salience,
1679
- updatedAt: r.updated_at,
1680
- }));
1848
+ FROM chunks WHERE id IN (${placeholders})`).all(...misses);
1849
+ for (const r of rows) {
1850
+ const shaped = {
1851
+ id: r.id,
1852
+ sourceFile: r.source_file,
1853
+ section: r.section,
1854
+ content: r.content,
1855
+ chunkType: r.chunk_type,
1856
+ agentSlug: r.agent_slug,
1857
+ pinned: !!r.pinned,
1858
+ consolidated: !!r.consolidated,
1859
+ derivedFrom: r.derived_from ? this._parseJsonArray(r.derived_from) : null,
1860
+ salience: r.salience,
1861
+ updatedAt: r.updated_at,
1862
+ };
1863
+ this.chunkRowCache.set(r.id, shaped);
1864
+ out.push(shaped);
1865
+ }
1866
+ return out;
1867
+ }
1868
+ /** Cache stats for the dashboard / debugging. */
1869
+ getChunkCacheStats() {
1870
+ return this.chunkRowCache.stats();
1871
+ }
1872
+ // ── Async write queue lifecycle ─────────────────────────────────
1873
+ /**
1874
+ * Enable the write-behind queue. After this call, saveTurn / recordAccess /
1875
+ * recordOutcome / logRecallTrace enqueue instead of running SQL on the
1876
+ * caller's thread. Idempotent. Tests leave this off and rely on the sync path.
1877
+ */
1878
+ enableWriteQueue(opts = {}) {
1879
+ if (this.writeQueue)
1880
+ return;
1881
+ this.writeQueue = new WriteQueue(this, opts);
1882
+ this.writeQueue.start();
1883
+ }
1884
+ /** Drain and stop the write queue. Call on graceful shutdown. */
1885
+ async flushWrites() {
1886
+ if (!this.writeQueue)
1887
+ return;
1888
+ await this.writeQueue.drain();
1889
+ this.writeQueue = null;
1890
+ }
1891
+ /** Stats for the dashboard / debugging. Returns null when queue disabled. */
1892
+ getWriteQueueStats() {
1893
+ return this.writeQueue ? this.writeQueue.stats() : null;
1894
+ }
1895
+ /** Drop a single cache entry — called from mutations that touch a chunk. */
1896
+ invalidateChunkCache(chunkId) {
1897
+ this.chunkRowCache.delete(chunkId);
1898
+ }
1899
+ /** Drop the whole cache — fullSync and similar bulk operations call this. */
1900
+ clearChunkCache() {
1901
+ this.chunkRowCache.clear();
1681
1902
  }
1682
1903
  _parseJsonArray(json) {
1683
1904
  try {
@@ -1941,6 +2162,51 @@ export class MemoryStore {
1941
2162
  }
1942
2163
  return scored.sort((a, b) => b.score - a.score).slice(0, limit);
1943
2164
  }
2165
+ /**
2166
+ * Pre-embed the top N most-cited chunks at startup. Eliminates cold-start
2167
+ * latency for the chunks the agent is most likely to retrieve next. Skips
2168
+ * chunks that already have a current-model dense embedding.
2169
+ *
2170
+ * Ranking: by outcome citation count in the last 30d (chunks the agent
2171
+ * actually used), tiebroken by recency. Soft-deleted excluded.
2172
+ */
2173
+ async warmDenseEmbeddings(topN = 200) {
2174
+ const currentModel = embeddingsModule.currentDenseModel();
2175
+ const candidates = this.conn.prepare(`SELECT c.id, c.content, COUNT(o.id) AS refs
2176
+ FROM chunks c
2177
+ LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
2178
+ LEFT JOIN outcomes o ON o.chunk_id = c.id
2179
+ AND o.referenced = 1
2180
+ AND o.created_at > datetime('now', '-30 days')
2181
+ WHERE sd.chunk_id IS NULL
2182
+ AND length(c.content) >= 1
2183
+ AND (c.embedding_dense IS NULL OR c.embedding_dense_model IS NULL OR c.embedding_dense_model != ?)
2184
+ GROUP BY c.id
2185
+ HAVING refs > 0
2186
+ ORDER BY refs DESC, c.updated_at DESC
2187
+ LIMIT ?`).all(currentModel, topN);
2188
+ let warmed = 0;
2189
+ let failed = 0;
2190
+ if (candidates.length === 0)
2191
+ return { warmed: 0, skipped: 0, failed: 0 };
2192
+ const updateStmt = this.conn.prepare(`UPDATE chunks SET embedding_dense = ?, embedding_dense_model = ? WHERE id = ?`);
2193
+ for (const c of candidates) {
2194
+ try {
2195
+ const vec = await embeddingsModule.embedDense(c.content, false);
2196
+ if (vec) {
2197
+ updateStmt.run(embeddingsModule.serializeEmbedding(vec), currentModel, c.id);
2198
+ warmed++;
2199
+ }
2200
+ else {
2201
+ failed++;
2202
+ }
2203
+ }
2204
+ catch {
2205
+ failed++;
2206
+ }
2207
+ }
2208
+ return { warmed, skipped: 0, failed };
2209
+ }
1944
2210
  /**
1945
2211
  * Backfill dense embeddings on chunks that don't yet have one (or that
1946
2212
  * were embedded by an older model). Async because the dense model itself
@@ -2052,9 +2318,18 @@ export class MemoryStore {
2052
2318
  }
2053
2319
  // ── Transcripts ───────────────────────────────────────────────────
2054
2320
  /**
2055
- * Save a conversation turn to the transcripts table.
2321
+ * Save a conversation turn to the transcripts table. Routes through the
2322
+ * write queue when enabled so the request thread doesn't block on SQL.
2056
2323
  */
2057
2324
  saveTurn(sessionKey, role, content, model = '') {
2325
+ if (this.writeQueue) {
2326
+ this.writeQueue.enqueue({ kind: 'transcript-turn', sessionKey, role, content, model });
2327
+ return;
2328
+ }
2329
+ this._saveTurnSync(sessionKey, role, content, model);
2330
+ }
2331
+ /** Internal sync transcript insert. Called directly by the WriteQueue. */
2332
+ _saveTurnSync(sessionKey, role, content, model) {
2058
2333
  if (!this._stmtInsertTranscript) {
2059
2334
  this._stmtInsertTranscript = this.conn.prepare('INSERT INTO transcripts (session_key, role, content, model) VALUES (?, ?, ?, ?)');
2060
2335
  }
@@ -2151,9 +2426,20 @@ export class MemoryStore {
2151
2426
  }
2152
2427
  // ── Salience Tracking ─────────────────────────────────────────────
2153
2428
  /**
2154
- * Record that chunks were accessed (retrieved/displayed).
2429
+ * Record that chunks were accessed (retrieved/displayed). Routes through
2430
+ * the write queue when enabled.
2155
2431
  */
2156
2432
  recordAccess(chunkIds, accessType = 'retrieval') {
2433
+ if (chunkIds.length === 0)
2434
+ return;
2435
+ if (this.writeQueue) {
2436
+ this.writeQueue.enqueue({ kind: 'access', chunkIds: [...chunkIds], accessType });
2437
+ return;
2438
+ }
2439
+ this._recordAccessSync(chunkIds, accessType);
2440
+ }
2441
+ /** Internal sync access log insert. Called directly by the WriteQueue. */
2442
+ _recordAccessSync(chunkIds, accessType) {
2157
2443
  if (chunkIds.length === 0)
2158
2444
  return;
2159
2445
  const insertStmt = this.conn.prepare('INSERT INTO access_log (chunk_id, access_type) VALUES (?, ?)');
@@ -2207,6 +2493,20 @@ export class MemoryStore {
2207
2493
  * can't dominate salience + BM25 + vector score.
2208
2494
  */
2209
2495
  recordOutcome(outcomes, sessionKey) {
2496
+ if (outcomes.length === 0)
2497
+ return;
2498
+ if (this.writeQueue) {
2499
+ this.writeQueue.enqueue({
2500
+ kind: 'outcome',
2501
+ outcomes: outcomes.map((o) => ({ ...o })),
2502
+ sessionKey: sessionKey ?? null,
2503
+ });
2504
+ return;
2505
+ }
2506
+ this._recordOutcomeSync(outcomes, sessionKey);
2507
+ }
2508
+ /** Internal sync outcome insert + EMA update. Called by the WriteQueue. */
2509
+ _recordOutcomeSync(outcomes, sessionKey) {
2210
2510
  if (outcomes.length === 0)
2211
2511
  return;
2212
2512
  const alpha = 0.3; // EMA weight on the new observation
@@ -2785,6 +3085,324 @@ export class MemoryStore {
2785
3085
  recallTracesPruned,
2786
3086
  };
2787
3087
  }
3088
+ // ── Staleness detection ─────────────────────────────────────────
3089
+ /**
3090
+ * User-model slots whose `updated_at` is older than maxAgeDays. These are
3091
+ * candidates for the "verify or refresh" nudge — high-relevance memories
3092
+ * that may have become silently wrong (Mem0 2026 calls this out as an
3093
+ * open problem; we surface it via observability rather than auto-decay).
3094
+ *
3095
+ * Empty content is skipped (an empty slot has no claim to verify).
3096
+ */
3097
+ findStaleUserModelSlots(opts = {}) {
3098
+ const maxAge = opts.maxAgeDays ?? 90;
3099
+ const rows = this.conn
3100
+ .prepare(`SELECT slot, agent_slug,
3101
+ CAST(strftime('%s', 'now') - strftime('%s', updated_at) AS INTEGER) AS age_seconds
3102
+ FROM user_model_blocks
3103
+ WHERE length(content) > 0
3104
+ AND updated_at < datetime('now', ?)
3105
+ AND COALESCE(agent_slug, '') = COALESCE(?, '')`)
3106
+ .all(`-${maxAge} days`, opts.agentSlug ?? null);
3107
+ return rows.map((r) => ({
3108
+ slot: r.slot,
3109
+ agentSlug: r.agent_slug,
3110
+ ageDays: Math.round(r.age_seconds / 86400),
3111
+ }));
3112
+ }
3113
+ /**
3114
+ * High-salience chunks whose outcome EMA has drifted negative — i.e., we
3115
+ * keep ranking them high but the agent stopped citing them. Strong signal
3116
+ * that the chunk is stale or wrong even though salience hasn't decayed.
3117
+ *
3118
+ * Conservative threshold: salience > 0.8 AND last_outcome_score < 0.
3119
+ * Soft-deleted excluded.
3120
+ */
3121
+ findStaleHighSalienceChunks(opts = {}) {
3122
+ const salienceFloor = opts.salienceFloor ?? 0.8;
3123
+ const outcomeCeiling = opts.outcomeCeiling ?? 0;
3124
+ const limit = opts.limit ?? 25;
3125
+ const rows = this.conn
3126
+ .prepare(`SELECT c.id, c.source_file, c.section, c.salience, c.last_outcome_score
3127
+ FROM chunks c
3128
+ LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
3129
+ WHERE sd.chunk_id IS NULL
3130
+ AND c.salience > ?
3131
+ AND COALESCE(c.last_outcome_score, 0) < ?
3132
+ ORDER BY c.salience DESC
3133
+ LIMIT ?`)
3134
+ .all(salienceFloor, outcomeCeiling, limit);
3135
+ return rows.map((r) => ({
3136
+ chunkId: r.id,
3137
+ sourceFile: r.source_file,
3138
+ section: r.section,
3139
+ salience: r.salience,
3140
+ lastOutcomeScore: r.last_outcome_score ?? 0,
3141
+ }));
3142
+ }
3143
+ /**
3144
+ * Format staleness findings into ready-to-inject prompt text. Heartbeat
3145
+ * builders can drop this into the system prompt verbatim. Returns null
3146
+ * if there's nothing to nudge about — caller should not inject empty text.
3147
+ */
3148
+ getStalenessNudges(opts = {}) {
3149
+ const stale = this.findStaleUserModelSlots({
3150
+ maxAgeDays: opts.maxSlotAgeDays,
3151
+ agentSlug: opts.agentSlug,
3152
+ });
3153
+ if (stale.length === 0)
3154
+ return null;
3155
+ const lines = stale.map((s) => `- \`${s.slot}\` is ${s.ageDays}d old`);
3156
+ return [
3157
+ 'User-model maintenance:',
3158
+ ...lines,
3159
+ 'Verify or refresh these during the next natural turn — do not force a check-in.',
3160
+ ].join('\n');
3161
+ }
3162
+ // ── Procedural memory ───────────────────────────────────────────
3163
+ /**
3164
+ * Find procedure chunks whose frontmatter `triggers` overlap with words
3165
+ * in the query. Used to surface learned workflows ("how Nate ships a
3166
+ * release", "how to handle inbound replies") above generic facts when
3167
+ * the user's intent matches.
3168
+ *
3169
+ * Match rule: case-insensitive substring of any trigger phrase appears
3170
+ * in the query. Empty result if no procedure chunks exist or no triggers
3171
+ * match — caller should treat this as additive context, not the whole
3172
+ * answer.
3173
+ */
3174
+ findRelevantProcedures(query, opts = {}) {
3175
+ const limit = opts.limit ?? 5;
3176
+ const q = query.toLowerCase();
3177
+ // Exclude the frontmatter chunk — chunker emits one per file (a key:val
3178
+ // dump) which inherits category=procedure from the parent. Only the body
3179
+ // chunks contain the actual procedure content the agent wants to recall.
3180
+ const rows = this.conn
3181
+ .prepare(`SELECT c.id, c.source_file, c.section, c.content, c.frontmatter_json
3182
+ FROM chunks c
3183
+ LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
3184
+ WHERE c.category = 'procedure'
3185
+ AND c.chunk_type != 'frontmatter'
3186
+ AND sd.chunk_id IS NULL
3187
+ AND (c.agent_slug IS NULL OR c.agent_slug = COALESCE(?, c.agent_slug))`)
3188
+ .all(opts.agentSlug ?? null);
3189
+ const matches = [];
3190
+ for (const row of rows) {
3191
+ let triggers = [];
3192
+ try {
3193
+ const fm = row.frontmatter_json ? JSON.parse(row.frontmatter_json) : {};
3194
+ if (Array.isArray(fm.triggers)) {
3195
+ triggers = fm.triggers.map((t) => String(t).toLowerCase()).filter(Boolean);
3196
+ }
3197
+ }
3198
+ catch { /* malformed frontmatter — skip */ }
3199
+ if (triggers.length === 0)
3200
+ continue;
3201
+ const matched = triggers.filter((t) => q.includes(t));
3202
+ if (matched.length === 0)
3203
+ continue;
3204
+ matches.push({
3205
+ id: row.id,
3206
+ sourceFile: row.source_file,
3207
+ section: row.section,
3208
+ content: row.content,
3209
+ triggers,
3210
+ matched,
3211
+ });
3212
+ }
3213
+ // Most-matched first, then most-specific (longest matched trigger).
3214
+ matches.sort((a, b) => {
3215
+ if (b.matched.length !== a.matched.length)
3216
+ return b.matched.length - a.matched.length;
3217
+ const aMax = Math.max(...a.matched.map((m) => m.length));
3218
+ const bMax = Math.max(...b.matched.map((m) => m.length));
3219
+ return bMax - aMax;
3220
+ });
3221
+ return matches.slice(0, limit);
3222
+ }
3223
+ // ── Janitor: bounded growth ─────────────────────────────────────
3224
+ /** Persistent key/value for janitor state (last vacuum, etc.). */
3225
+ getMaintenanceMeta(key) {
3226
+ const row = this.conn
3227
+ .prepare('SELECT value FROM maintenance_meta WHERE key = ?')
3228
+ .get(key);
3229
+ return row?.value ?? null;
3230
+ }
3231
+ setMaintenanceMeta(key, value) {
3232
+ this.conn
3233
+ .prepare(`INSERT INTO maintenance_meta (key, value) VALUES (?, ?)
3234
+ ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = datetime('now')`)
3235
+ .run(key, value);
3236
+ }
3237
+ /**
3238
+ * Two-phase delete for consolidated, low-salience, unused chunks.
3239
+ *
3240
+ * Phase 1: soft-delete chunks where consolidated=1, not pinned, salience
3241
+ * below floor, and never accessed (or last access older than
3242
+ * expireDays).
3243
+ * Phase 2: physically delete chunks that have been in chunk_soft_deletes
3244
+ * for graceDays. Cascades to access_log, outcomes, chunk_history
3245
+ * for the same chunk_id.
3246
+ *
3247
+ * Summary chunks whose `derived_from` references the deleted IDs are
3248
+ * intentionally NOT propagate-deleted — the summary still encodes signal.
3249
+ */
3250
+ expireConsolidated(opts = {}) {
3251
+ const expireDays = opts.expireDays ?? 60;
3252
+ const salienceFloor = opts.salienceFloor ?? 0.2;
3253
+ const graceDays = opts.graceDays ?? 14;
3254
+ // Phase 1 — soft-delete candidates.
3255
+ const candidates = this.conn
3256
+ .prepare(`SELECT c.id
3257
+ FROM chunks c
3258
+ LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
3259
+ LEFT JOIN (
3260
+ SELECT chunk_id, MAX(accessed_at) as last_access
3261
+ FROM access_log GROUP BY chunk_id
3262
+ ) a ON a.chunk_id = c.id
3263
+ WHERE c.consolidated = 1
3264
+ AND COALESCE(c.pinned, 0) = 0
3265
+ AND COALESCE(c.salience, 0) < ?
3266
+ AND sd.chunk_id IS NULL
3267
+ AND (a.last_access IS NULL OR a.last_access < datetime('now', ?))
3268
+ AND c.created_at < datetime('now', ?)`)
3269
+ .all(salienceFloor, `-${expireDays} days`, `-${expireDays} days`);
3270
+ let softDeleted = 0;
3271
+ for (const row of candidates) {
3272
+ if (this.softDeleteChunk(row.id, 'janitor'))
3273
+ softDeleted++;
3274
+ }
3275
+ // Phase 2 — physical delete after grace period.
3276
+ const stale = this.conn
3277
+ .prepare(`SELECT chunk_id FROM chunk_soft_deletes
3278
+ WHERE deleted_at < datetime('now', ?)`)
3279
+ .all(`-${graceDays} days`);
3280
+ let physicallyDeleted = 0;
3281
+ if (stale.length > 0) {
3282
+ // softDeleteChunk removed these rows from chunks_fts. The chunks_ad
3283
+ // trigger fires on DELETE FROM chunks and tries to remove them again —
3284
+ // FTS5 contentless tables corrupt ("database disk image is malformed")
3285
+ // when you delete a docid that's already gone. Re-add the row to FTS
3286
+ // first so the trigger can do a clean delete.
3287
+ const fetchRow = this.conn.prepare(`SELECT id, source_file, section, content FROM chunks WHERE id = ?`);
3288
+ const reAddFts = this.conn.prepare(`INSERT INTO chunks_fts (rowid, source_file, section, content) VALUES (?, ?, ?, ?)`);
3289
+ const delChunk = this.conn.prepare('DELETE FROM chunks WHERE id = ?');
3290
+ const delSoft = this.conn.prepare('DELETE FROM chunk_soft_deletes WHERE chunk_id = ?');
3291
+ const delAccess = this.conn.prepare('DELETE FROM access_log WHERE chunk_id = ?');
3292
+ const delOutcomes = this.conn.prepare('DELETE FROM outcomes WHERE chunk_id = ?');
3293
+ const delHistory = this.conn.prepare('DELETE FROM chunk_history WHERE chunk_id = ?');
3294
+ const tx = this.conn.transaction((rows) => {
3295
+ for (const r of rows) {
3296
+ delAccess.run(r.chunk_id);
3297
+ delOutcomes.run(r.chunk_id);
3298
+ delHistory.run(r.chunk_id);
3299
+ delSoft.run(r.chunk_id);
3300
+ const chunkRow = fetchRow.get(r.chunk_id);
3301
+ if (chunkRow) {
3302
+ try {
3303
+ reAddFts.run(chunkRow.id, chunkRow.source_file, chunkRow.section, chunkRow.content);
3304
+ }
3305
+ catch {
3306
+ // Already in FTS (chunk was never soft-removed from FTS) — fine.
3307
+ }
3308
+ const result = delChunk.run(r.chunk_id);
3309
+ if (result.changes > 0)
3310
+ physicallyDeleted++;
3311
+ }
3312
+ }
3313
+ });
3314
+ tx(stale);
3315
+ // Invalidate cache for all physically-deleted ids.
3316
+ for (const r of stale)
3317
+ this.chunkRowCache.delete(r.chunk_id);
3318
+ }
3319
+ return { softDeleted, physicallyDeleted };
3320
+ }
3321
+ /** Trim outcomes table to a rolling window. Append-only, can grow fast. */
3322
+ pruneOutcomes(retentionDays = 30) {
3323
+ const result = this.conn
3324
+ .prepare(`DELETE FROM outcomes WHERE created_at < datetime('now', ?)`)
3325
+ .run(`-${retentionDays} days`);
3326
+ return result.changes;
3327
+ }
3328
+ /**
3329
+ * Cap memory_extractions to maxRows. Deletes oldest non-active rows first;
3330
+ * 'active' extractions are preserved regardless of count to protect the
3331
+ * audit trail for in-flight work.
3332
+ */
3333
+ capExtractions(maxRows = 50000) {
3334
+ let count;
3335
+ try {
3336
+ count = this.conn.prepare('SELECT COUNT(*) as c FROM memory_extractions').get().c;
3337
+ }
3338
+ catch {
3339
+ return 0; // table missing on first boot
3340
+ }
3341
+ if (count <= maxRows)
3342
+ return 0;
3343
+ const overflow = count - maxRows;
3344
+ const result = this.conn
3345
+ .prepare(`DELETE FROM memory_extractions
3346
+ WHERE id IN (
3347
+ SELECT id FROM memory_extractions
3348
+ WHERE status != 'active'
3349
+ ORDER BY extracted_at ASC
3350
+ LIMIT ?
3351
+ )`)
3352
+ .run(overflow);
3353
+ return result.changes;
3354
+ }
3355
+ /** Approximate SQLite database file size on disk, in bytes. */
3356
+ dbSizeBytes() {
3357
+ try {
3358
+ return statSync(this.dbPath).size;
3359
+ }
3360
+ catch {
3361
+ return 0;
3362
+ }
3363
+ }
3364
+ /**
3365
+ * VACUUM the database. Reclaims space from deleted rows. Holds an
3366
+ * exclusive lock for the duration — caller is expected to gate on
3367
+ * idleness (see lastActivityAt).
3368
+ */
3369
+ vacuum() {
3370
+ const sizeBeforeBytes = this.dbSizeBytes();
3371
+ const start = Date.now();
3372
+ this.conn.exec('VACUUM');
3373
+ return {
3374
+ sizeBeforeBytes,
3375
+ sizeAfterBytes: this.dbSizeBytes(),
3376
+ durationMs: Date.now() - start,
3377
+ };
3378
+ }
3379
+ /**
3380
+ * Most recent timestamp across the high-write activity tables, as a Unix
3381
+ * milliseconds value. Returns null if all tables are empty. Used by the
3382
+ * janitor's idle gate.
3383
+ *
3384
+ * Implementation note: SQLite's datetime() returns "YYYY-MM-DD HH:MM:SS"
3385
+ * in UTC with no timezone marker — JS Date.parse interprets that as local
3386
+ * time and skews by the offset. We compute the unix epoch in SQL to avoid
3387
+ * the bug entirely.
3388
+ */
3389
+ lastActivityAt() {
3390
+ try {
3391
+ const row = this.conn
3392
+ .prepare(`SELECT MAX(unix_t) as last FROM (
3393
+ SELECT CAST(strftime('%s', retrieved_at) AS INTEGER) as unix_t FROM recall_traces
3394
+ UNION ALL
3395
+ SELECT CAST(strftime('%s', accessed_at) AS INTEGER) as unix_t FROM access_log
3396
+ UNION ALL
3397
+ SELECT CAST(strftime('%s', created_at) AS INTEGER) as unix_t FROM transcripts
3398
+ )`)
3399
+ .get();
3400
+ return row.last !== null && row.last !== undefined ? row.last * 1000 : null;
3401
+ }
3402
+ catch {
3403
+ return null;
3404
+ }
3405
+ }
2788
3406
  // ── Timeline Query ─────────────────────────────────────────────
2789
3407
  /**
2790
3408
  * Get chunks within a date range, ordered chronologically.
@@ -3364,6 +3982,112 @@ export class MemoryStore {
3364
3982
  WHERE id IN (${placeholders})`)
3365
3983
  .run(...chunkIds);
3366
3984
  }
3985
+ /**
3986
+ * Aggregate memory-health snapshot for the dashboard.
3987
+ *
3988
+ * Single-pass queries over each table; cheap enough to call on every
3989
+ * dashboard tab visit without caching. Adds graph stats only if a
3990
+ * graphStore is supplied and reachable.
3991
+ */
3992
+ getMemoryHealth(opts = {}) {
3993
+ const topLimit = opts.topCitedLimit ?? 10;
3994
+ const chunkAgg = this.conn
3995
+ .prepare(`SELECT
3996
+ COUNT(*) AS total,
3997
+ COALESCE(SUM(CASE WHEN consolidated = 1 THEN 1 ELSE 0 END), 0) AS consolidated,
3998
+ COALESCE(SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END), 0) AS pinned
3999
+ FROM chunks`)
4000
+ .get();
4001
+ const softDeletedRow = this.conn
4002
+ .prepare('SELECT COUNT(*) AS c FROM chunk_soft_deletes')
4003
+ .get();
4004
+ // Zombies: consolidated AND no access in last 30d (or never accessed at all).
4005
+ const zombieRow = this.conn
4006
+ .prepare(`SELECT COUNT(*) AS c
4007
+ FROM chunks c
4008
+ LEFT JOIN (
4009
+ SELECT chunk_id, MAX(accessed_at) AS la
4010
+ FROM access_log GROUP BY chunk_id
4011
+ ) a ON a.chunk_id = c.id
4012
+ LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
4013
+ WHERE c.consolidated = 1
4014
+ AND sd.chunk_id IS NULL
4015
+ AND (a.la IS NULL OR a.la < datetime('now', '-30 days'))`)
4016
+ .get();
4017
+ const byCategory = this.conn
4018
+ .prepare(`SELECT COALESCE(category, '(uncategorized)') AS category, COUNT(*) AS count
4019
+ FROM chunks GROUP BY category ORDER BY count DESC`)
4020
+ .all();
4021
+ // Row counts for the high-write tables (cheap COUNT(*) per table).
4022
+ const trackedTables = [
4023
+ 'chunks',
4024
+ 'chunks_fts',
4025
+ 'recall_traces',
4026
+ 'access_log',
4027
+ 'outcomes',
4028
+ 'transcripts',
4029
+ 'session_summaries',
4030
+ 'memory_extractions',
4031
+ 'chunk_soft_deletes',
4032
+ 'chunk_history',
4033
+ 'sdk_session_entries',
4034
+ 'wikilinks',
4035
+ ];
4036
+ const tableRowCounts = {};
4037
+ for (const t of trackedTables) {
4038
+ try {
4039
+ const row = this.conn.prepare(`SELECT COUNT(*) AS c FROM ${t}`).get();
4040
+ tableRowCounts[t] = row.c;
4041
+ }
4042
+ catch {
4043
+ tableRowCounts[t] = -1;
4044
+ }
4045
+ }
4046
+ const topCited = this.conn
4047
+ .prepare(`SELECT o.chunk_id, c.source_file, c.section, COUNT(*) AS ref_count
4048
+ FROM outcomes o
4049
+ JOIN chunks c ON c.id = o.chunk_id
4050
+ WHERE o.referenced = 1
4051
+ AND o.created_at > datetime('now', '-30 days')
4052
+ GROUP BY o.chunk_id
4053
+ ORDER BY ref_count DESC
4054
+ LIMIT ?`)
4055
+ .all(topLimit);
4056
+ return {
4057
+ chunks: {
4058
+ total: chunkAgg.total,
4059
+ consolidated: chunkAgg.consolidated,
4060
+ pinned: chunkAgg.pinned,
4061
+ softDeleted: softDeletedRow.c,
4062
+ zombieCount: zombieRow.c,
4063
+ },
4064
+ chunksByCategory: byCategory,
4065
+ tableRowCounts,
4066
+ topCitedLast30d: topCited.map((r) => ({
4067
+ chunkId: r.chunk_id,
4068
+ sourceFile: r.source_file,
4069
+ section: r.section,
4070
+ refCount: r.ref_count,
4071
+ })),
4072
+ staleUserModelSlots: this.findStaleUserModelSlots(),
4073
+ staleHighSalienceChunks: this.findStaleHighSalienceChunks({ limit: 10 }),
4074
+ chunkCacheStats: this.chunkRowCache.stats(),
4075
+ writeQueue: this.getWriteQueueStats(),
4076
+ lastIntegrityReport: (() => {
4077
+ const raw = this.getMaintenanceMeta('last_integrity_report');
4078
+ if (!raw)
4079
+ return null;
4080
+ try {
4081
+ return JSON.parse(raw);
4082
+ }
4083
+ catch {
4084
+ return null;
4085
+ }
4086
+ })(),
4087
+ dbSizeBytes: this.dbSizeBytes(),
4088
+ lastVacuumAt: this.getMaintenanceMeta('last_vacuum_at'),
4089
+ };
4090
+ }
3367
4091
  /**
3368
4092
  * Get consolidation stats for monitoring.
3369
4093
  */
@@ -3753,6 +4477,10 @@ export class MemoryStore {
3753
4477
  * Delete all chunks, wikilinks, file hash, and access log for a given file.
3754
4478
  */
3755
4479
  deleteFileChunks(relPath) {
4480
+ // Capture chunk ids first so we can invalidate the LRU after the deletes.
4481
+ const ids = this.conn
4482
+ .prepare('SELECT id FROM chunks WHERE source_file = ?')
4483
+ .all(relPath);
3756
4484
  // Delete access_log entries for chunks being removed (prevent orphans)
3757
4485
  this.conn
3758
4486
  .prepare('DELETE FROM access_log WHERE chunk_id IN (SELECT id FROM chunks WHERE source_file = ?)')
@@ -3760,6 +4488,8 @@ export class MemoryStore {
3760
4488
  this.conn.prepare('DELETE FROM chunks WHERE source_file = ?').run(relPath);
3761
4489
  this.conn.prepare('DELETE FROM wikilinks WHERE source_file = ?').run(relPath);
3762
4490
  this.conn.prepare('DELETE FROM file_hashes WHERE rel_path = ?').run(relPath);
4491
+ for (const r of ids)
4492
+ this.chunkRowCache.delete(r.id);
3763
4493
  }
3764
4494
  /**
3765
4495
  * Sanitize a query for FTS5 syntax.