clementine-agent 1.2.2 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agent/assistant.js +12 -0
- package/dist/cli/dashboard.js +3034 -734
- package/dist/cli/static/LICENSE-NOTICES.md +12 -0
- package/dist/cli/static/drawflow.min.css +1 -0
- package/dist/cli/static/drawflow.min.js +1 -0
- package/dist/config.d.ts +11 -0
- package/dist/config.js +16 -0
- package/dist/dashboard/builder/dry-run.d.ts +31 -0
- package/dist/dashboard/builder/dry-run.js +138 -0
- package/dist/dashboard/builder/events.d.ts +23 -0
- package/dist/dashboard/builder/events.js +28 -0
- package/dist/dashboard/builder/mcp-invoke.d.ts +25 -0
- package/dist/dashboard/builder/mcp-invoke.js +143 -0
- package/dist/dashboard/builder/runner.d.ts +68 -0
- package/dist/dashboard/builder/runner.js +418 -0
- package/dist/dashboard/builder/serializer.d.ts +79 -0
- package/dist/dashboard/builder/serializer.js +547 -0
- package/dist/dashboard/builder/snapshots.d.ts +32 -0
- package/dist/dashboard/builder/snapshots.js +138 -0
- package/dist/dashboard/builder/validation.d.ts +26 -0
- package/dist/dashboard/builder/validation.js +183 -0
- package/dist/gateway/router.js +31 -2
- package/dist/index.js +38 -0
- package/dist/memory/chunker.js +13 -2
- package/dist/memory/hot-cache.d.ts +38 -0
- package/dist/memory/hot-cache.js +73 -0
- package/dist/memory/integrity.d.ts +28 -0
- package/dist/memory/integrity.js +119 -0
- package/dist/memory/maintenance.d.ts +23 -2
- package/dist/memory/maintenance.js +140 -3
- package/dist/memory/store.d.ts +259 -2
- package/dist/memory/store.js +751 -21
- package/dist/memory/write-queue.d.ts +96 -0
- package/dist/memory/write-queue.js +165 -0
- package/dist/tools/builder-tools.d.ts +13 -0
- package/dist/tools/builder-tools.js +437 -0
- package/dist/tools/mcp-server.js +2 -0
- package/dist/tools/memory-tools.js +38 -1
- package/dist/types.d.ts +56 -2
- package/package.json +2 -2
- package/vault/00-System/skills/builder-canvas.md +126 -0
package/dist/memory/store.js
CHANGED
|
@@ -19,6 +19,8 @@ import * as embeddingsModule from './embeddings.js';
|
|
|
19
19
|
import { chunkFile } from './chunker.js';
|
|
20
20
|
import { mmrRerank } from './mmr.js';
|
|
21
21
|
import { deduplicateResults } from './search.js';
|
|
22
|
+
import { HotCache } from './hot-cache.js';
|
|
23
|
+
import { WriteQueue } from './write-queue.js';
|
|
22
24
|
const WIKILINK_RE = /\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g;
|
|
23
25
|
export class MemoryStore {
|
|
24
26
|
dbPath;
|
|
@@ -28,6 +30,14 @@ export class MemoryStore {
|
|
|
28
30
|
_stmtChunkCount = null;
|
|
29
31
|
_stmtInsertTranscript = null;
|
|
30
32
|
_stmtInsertUsage = null;
|
|
33
|
+
// In-process LRU for chunk-row reads. Hit on every retrieval that calls
|
|
34
|
+
// getChunksByIds; invalidated on softDeleteChunk, restoreChunk, setPinned,
|
|
35
|
+
// updateFile, and bulk fullSync. Capacity tunable; default 1000 chunks.
|
|
36
|
+
chunkRowCache = new HotCache(1000);
|
|
37
|
+
// Async write-behind queue for non-critical writes (transcripts, recall
|
|
38
|
+
// traces, outcomes, access log). Null = sync mode (default; tests rely on
|
|
39
|
+
// immediate persistence). Enabled via enableWriteQueue() at daemon boot.
|
|
40
|
+
writeQueue = null;
|
|
31
41
|
constructor(dbPath, vaultDir) {
|
|
32
42
|
this.dbPath = dbPath;
|
|
33
43
|
this.vaultDir = vaultDir;
|
|
@@ -782,6 +792,16 @@ export class MemoryStore {
|
|
|
782
792
|
);
|
|
783
793
|
CREATE UNIQUE INDEX IF NOT EXISTS idx_user_model_slot_agent
|
|
784
794
|
ON user_model_blocks(slot, COALESCE(agent_slug, ''));
|
|
795
|
+
`);
|
|
796
|
+
// Persistent key/value scratch for the memory janitor — last vacuum
|
|
797
|
+
// timestamp, last janitor run, etc. Survives daemon restarts so we
|
|
798
|
+
// don't VACUUM on every boot.
|
|
799
|
+
this.conn.exec(`
|
|
800
|
+
CREATE TABLE IF NOT EXISTS maintenance_meta (
|
|
801
|
+
key TEXT PRIMARY KEY,
|
|
802
|
+
value TEXT NOT NULL,
|
|
803
|
+
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
|
|
804
|
+
);
|
|
785
805
|
`);
|
|
786
806
|
}
|
|
787
807
|
// ── Skill usage telemetry ─────────────────────────────────────────
|
|
@@ -866,6 +886,7 @@ export class MemoryStore {
|
|
|
866
886
|
}
|
|
867
887
|
/** Toggle the manual pin flag on a chunk. Pinned chunks get a 2x score boost in recall. */
|
|
868
888
|
setPinned(chunkId, pinned) {
|
|
889
|
+
this.chunkRowCache.delete(chunkId);
|
|
869
890
|
try {
|
|
870
891
|
const result = this.conn.prepare('UPDATE chunks SET pinned = ? WHERE id = ?')
|
|
871
892
|
.run(pinned ? 1 : 0, chunkId);
|
|
@@ -966,7 +987,10 @@ export class MemoryStore {
|
|
|
966
987
|
VALUES ('delete', ?, ?, ?, ?)`).run(row.id, row.source_file, row.section, row.content);
|
|
967
988
|
return true;
|
|
968
989
|
});
|
|
969
|
-
|
|
990
|
+
const ok = tx(chunkId);
|
|
991
|
+
if (ok)
|
|
992
|
+
this.chunkRowCache.delete(chunkId);
|
|
993
|
+
return ok;
|
|
970
994
|
}
|
|
971
995
|
/** Restore a soft-deleted chunk. Removes the chunk_soft_deletes row and
|
|
972
996
|
* re-inserts the content into the FTS index. */
|
|
@@ -984,7 +1008,10 @@ export class MemoryStore {
|
|
|
984
1008
|
VALUES (?, ?, ?, ?)`).run(row.id, row.source_file, row.section, row.content);
|
|
985
1009
|
return true;
|
|
986
1010
|
});
|
|
987
|
-
|
|
1011
|
+
const ok = tx(chunkId);
|
|
1012
|
+
if (ok)
|
|
1013
|
+
this.chunkRowCache.delete(chunkId);
|
|
1014
|
+
return ok;
|
|
988
1015
|
}
|
|
989
1016
|
/** Recent edit history for a chunk (newest first). */
|
|
990
1017
|
getChunkHistory(chunkId, limit = 20) {
|
|
@@ -1469,6 +1496,114 @@ export class MemoryStore {
|
|
|
1469
1496
|
return rows.map(mapRow);
|
|
1470
1497
|
}
|
|
1471
1498
|
// ── Search: Context (Layer 3) ─────────────────────────────────────
|
|
1499
|
+
/**
|
|
1500
|
+
* 1-hop wikilink expansion: for each seed chunk's source_file, find files
|
|
1501
|
+
* that link to it or that it links to, and pull their top chunks. Returns
|
|
1502
|
+
* SearchResult-shaped rows with a fractional boost so they enter the
|
|
1503
|
+
* candidate pool below the seed scores but above pure noise.
|
|
1504
|
+
*
|
|
1505
|
+
* Pattern: 2026-frontier agent memory uses graph expansion (Mem0g, Zep
|
|
1506
|
+
* Graphiti) to surface chunks that share an entity but miss the lexical
|
|
1507
|
+
* match. Wikilinks are the cheapest available edge — Clementine already
|
|
1508
|
+
* extracts them on every vault sync. The richer FalkorDB graph adds
|
|
1509
|
+
* temporal validity and entity types but isn't required for this lift.
|
|
1510
|
+
*/
|
|
1511
|
+
expandViaWikilinks(seeds, opts = {}) {
|
|
1512
|
+
const boost = opts.boost ?? 0.7;
|
|
1513
|
+
const limitPerFile = opts.limitPerFile ?? 1;
|
|
1514
|
+
const maxNeighbors = opts.maxNeighbors ?? 10;
|
|
1515
|
+
if (seeds.length === 0)
|
|
1516
|
+
return [];
|
|
1517
|
+
const seedFiles = new Set(seeds.map((s) => s.sourceFile));
|
|
1518
|
+
const seedFilesArr = [...seedFiles];
|
|
1519
|
+
if (seedFilesArr.length === 0)
|
|
1520
|
+
return [];
|
|
1521
|
+
// Wikilinks store the raw bracket text (e.g. "hub" from "[[hub]]"), not
|
|
1522
|
+
// the resolved path "hub.md". Match against both forms when looking up
|
|
1523
|
+
// backlinks so a seed of "hub.md" finds rows where target_file = "hub".
|
|
1524
|
+
const seedBasenames = seedFilesArr.map((f) => path.basename(f, '.md'));
|
|
1525
|
+
const fwdMatchSet = new Set([...seedFilesArr]);
|
|
1526
|
+
const backMatchSet = new Set([...seedFilesArr, ...seedBasenames]);
|
|
1527
|
+
const fwdArr = [...fwdMatchSet];
|
|
1528
|
+
const backArr = [...backMatchSet];
|
|
1529
|
+
const fwdPh = fwdArr.map(() => '?').join(',');
|
|
1530
|
+
const backPh = backArr.map(() => '?').join(',');
|
|
1531
|
+
const neighborFiles = new Set();
|
|
1532
|
+
try {
|
|
1533
|
+
const fwdRows = this.conn
|
|
1534
|
+
.prepare(`SELECT DISTINCT target_file FROM wikilinks WHERE source_file IN (${fwdPh})`)
|
|
1535
|
+
.all(...fwdArr);
|
|
1536
|
+
for (const r of fwdRows) {
|
|
1537
|
+
// target_file may be a basename — try both with and without .md.
|
|
1538
|
+
const candidates = [r.target_file, `${r.target_file}.md`];
|
|
1539
|
+
for (const c of candidates) {
|
|
1540
|
+
if (!seedFiles.has(c))
|
|
1541
|
+
neighborFiles.add(c);
|
|
1542
|
+
}
|
|
1543
|
+
}
|
|
1544
|
+
const backRows = this.conn
|
|
1545
|
+
.prepare(`SELECT DISTINCT source_file FROM wikilinks WHERE target_file IN (${backPh})`)
|
|
1546
|
+
.all(...backArr);
|
|
1547
|
+
for (const r of backRows) {
|
|
1548
|
+
if (!seedFiles.has(r.source_file))
|
|
1549
|
+
neighborFiles.add(r.source_file);
|
|
1550
|
+
}
|
|
1551
|
+
}
|
|
1552
|
+
catch {
|
|
1553
|
+
// wikilinks lookup failure — graph expansion is optional.
|
|
1554
|
+
return [];
|
|
1555
|
+
}
|
|
1556
|
+
if (neighborFiles.size === 0)
|
|
1557
|
+
return [];
|
|
1558
|
+
const neighborFilesArr = [...neighborFiles].slice(0, maxNeighbors * 2);
|
|
1559
|
+
const filePh = neighborFilesArr.map(() => '?').join(',');
|
|
1560
|
+
const args = [...neighborFilesArr];
|
|
1561
|
+
let agentClause = '';
|
|
1562
|
+
if (opts.agentSlug && opts.strict) {
|
|
1563
|
+
agentClause = 'AND (c.agent_slug = ? OR c.agent_slug IS NULL)';
|
|
1564
|
+
args.push(opts.agentSlug);
|
|
1565
|
+
}
|
|
1566
|
+
const chunkRows = this.conn
|
|
1567
|
+
.prepare(`SELECT c.id, c.source_file, c.section, c.content, c.chunk_type,
|
|
1568
|
+
c.salience, c.agent_slug, c.category, c.topic, c.updated_at,
|
|
1569
|
+
c.last_outcome_score, c.pinned
|
|
1570
|
+
FROM chunks c
|
|
1571
|
+
LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
|
|
1572
|
+
WHERE c.source_file IN (${filePh})
|
|
1573
|
+
AND c.chunk_type != 'frontmatter'
|
|
1574
|
+
AND sd.chunk_id IS NULL
|
|
1575
|
+
${agentClause}
|
|
1576
|
+
ORDER BY c.salience DESC, c.updated_at DESC`)
|
|
1577
|
+
.all(...args);
|
|
1578
|
+
const seedScoreMax = Math.max(...seeds.map((s) => s.score), 1);
|
|
1579
|
+
const perFile = new Map();
|
|
1580
|
+
const out = [];
|
|
1581
|
+
for (const r of chunkRows) {
|
|
1582
|
+
const count = perFile.get(r.source_file) ?? 0;
|
|
1583
|
+
if (count >= limitPerFile)
|
|
1584
|
+
continue;
|
|
1585
|
+
perFile.set(r.source_file, count + 1);
|
|
1586
|
+
out.push({
|
|
1587
|
+
sourceFile: r.source_file,
|
|
1588
|
+
section: r.section,
|
|
1589
|
+
content: r.content,
|
|
1590
|
+
score: seedScoreMax * boost * (1 + (r.salience ?? 0)),
|
|
1591
|
+
chunkType: r.chunk_type,
|
|
1592
|
+
matchType: 'graph',
|
|
1593
|
+
lastUpdated: r.updated_at,
|
|
1594
|
+
chunkId: r.id,
|
|
1595
|
+
salience: r.salience ?? 0,
|
|
1596
|
+
lastOutcomeScore: r.last_outcome_score ?? 0,
|
|
1597
|
+
agentSlug: r.agent_slug ?? undefined,
|
|
1598
|
+
category: r.category,
|
|
1599
|
+
topic: r.topic,
|
|
1600
|
+
pinned: !!r.pinned,
|
|
1601
|
+
});
|
|
1602
|
+
if (out.length >= maxNeighbors)
|
|
1603
|
+
break;
|
|
1604
|
+
}
|
|
1605
|
+
return out;
|
|
1606
|
+
}
|
|
1472
1607
|
/**
|
|
1473
1608
|
* Combined FTS5 relevance + recency search for context injection.
|
|
1474
1609
|
*
|
|
@@ -1477,6 +1612,7 @@ export class MemoryStore {
|
|
|
1477
1612
|
* 2. Recency fetch -> N most recent chunks
|
|
1478
1613
|
* 3. Deduplicate by (source_file, section)
|
|
1479
1614
|
* 4. Apply salience boost to FTS results
|
|
1615
|
+
* 5. Wikilink graph expansion -> 1-hop neighbors of top seeds (boost 0.7×)
|
|
1480
1616
|
*/
|
|
1481
1617
|
searchContext(query, limitOrOpts = 3, recencyLimitArg = 5) {
|
|
1482
1618
|
let limit;
|
|
@@ -1573,8 +1709,23 @@ export class MemoryStore {
|
|
|
1573
1709
|
}
|
|
1574
1710
|
// 3. Recency
|
|
1575
1711
|
const recentResults = this.getRecentChunks(recencyLimit, agentSlug, tagFilters, strict);
|
|
1576
|
-
//
|
|
1577
|
-
|
|
1712
|
+
// 3b. 1-hop wikilink expansion. Only run when we have seed candidates so
|
|
1713
|
+
// we don't blow up on empty queries; bounded to keep MMR cost flat.
|
|
1714
|
+
let graphResults = [];
|
|
1715
|
+
try {
|
|
1716
|
+
const seeds = [...ftsResults.slice(0, 5), ...vectorResults.slice(0, 5)];
|
|
1717
|
+
if (seeds.length > 0) {
|
|
1718
|
+
graphResults = this.expandViaWikilinks(seeds, {
|
|
1719
|
+
agentSlug,
|
|
1720
|
+
strict,
|
|
1721
|
+
maxNeighbors: 5,
|
|
1722
|
+
limitPerFile: 1,
|
|
1723
|
+
});
|
|
1724
|
+
}
|
|
1725
|
+
}
|
|
1726
|
+
catch { /* graph expansion is optional — never fail the whole retrieval */ }
|
|
1727
|
+
// 4. Merge and deduplicate (FTS first, then vector, then graph, then recency)
|
|
1728
|
+
const merged = [...ftsResults, ...vectorResults, ...graphResults, ...recentResults];
|
|
1578
1729
|
const finalResults = mmrRerank(deduplicateResults(merged), 0.7, limit + recencyLimit);
|
|
1579
1730
|
// 5. Log recall trace if session context provided. Skipped for internal
|
|
1580
1731
|
// calls (e.g. consolidation, dedup checks) by passing skipTrace=true.
|
|
@@ -1596,6 +1747,24 @@ export class MemoryStore {
|
|
|
1596
1747
|
* Non-fatal: errors are swallowed so retrieval never fails on logging issues.
|
|
1597
1748
|
*/
|
|
1598
1749
|
logRecallTrace(opts) {
|
|
1750
|
+
if (opts.chunkIds.length === 0)
|
|
1751
|
+
return;
|
|
1752
|
+
if (this.writeQueue) {
|
|
1753
|
+
this.writeQueue.enqueue({
|
|
1754
|
+
kind: 'recall',
|
|
1755
|
+
sessionKey: opts.sessionKey,
|
|
1756
|
+
messageId: opts.messageId ?? null,
|
|
1757
|
+
query: opts.query,
|
|
1758
|
+
chunkIds: [...opts.chunkIds],
|
|
1759
|
+
scores: [...opts.scores],
|
|
1760
|
+
agentSlug: opts.agentSlug ?? null,
|
|
1761
|
+
});
|
|
1762
|
+
return;
|
|
1763
|
+
}
|
|
1764
|
+
this._logRecallTraceSync(opts);
|
|
1765
|
+
}
|
|
1766
|
+
/** Internal sync recall_trace insert. Called by the WriteQueue. */
|
|
1767
|
+
_logRecallTraceSync(opts) {
|
|
1599
1768
|
if (opts.chunkIds.length === 0)
|
|
1600
1769
|
return;
|
|
1601
1770
|
try {
|
|
@@ -1661,23 +1830,75 @@ export class MemoryStore {
|
|
|
1661
1830
|
getChunksByIds(chunkIds) {
|
|
1662
1831
|
if (chunkIds.length === 0)
|
|
1663
1832
|
return [];
|
|
1664
|
-
|
|
1833
|
+
// Hot-cache pass — split into hits (return as-is) and misses (need SQL).
|
|
1834
|
+
const out = [];
|
|
1835
|
+
const misses = [];
|
|
1836
|
+
for (const id of chunkIds) {
|
|
1837
|
+
const cached = this.chunkRowCache.get(id);
|
|
1838
|
+
if (cached)
|
|
1839
|
+
out.push(cached);
|
|
1840
|
+
else
|
|
1841
|
+
misses.push(id);
|
|
1842
|
+
}
|
|
1843
|
+
if (misses.length === 0)
|
|
1844
|
+
return out;
|
|
1845
|
+
const placeholders = misses.map(() => '?').join(',');
|
|
1665
1846
|
const rows = this.conn.prepare(`SELECT id, source_file, section, content, chunk_type, agent_slug,
|
|
1666
1847
|
pinned, consolidated, derived_from, salience, updated_at
|
|
1667
|
-
FROM chunks WHERE id IN (${placeholders})`).all(...
|
|
1668
|
-
|
|
1669
|
-
|
|
1670
|
-
|
|
1671
|
-
|
|
1672
|
-
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1848
|
+
FROM chunks WHERE id IN (${placeholders})`).all(...misses);
|
|
1849
|
+
for (const r of rows) {
|
|
1850
|
+
const shaped = {
|
|
1851
|
+
id: r.id,
|
|
1852
|
+
sourceFile: r.source_file,
|
|
1853
|
+
section: r.section,
|
|
1854
|
+
content: r.content,
|
|
1855
|
+
chunkType: r.chunk_type,
|
|
1856
|
+
agentSlug: r.agent_slug,
|
|
1857
|
+
pinned: !!r.pinned,
|
|
1858
|
+
consolidated: !!r.consolidated,
|
|
1859
|
+
derivedFrom: r.derived_from ? this._parseJsonArray(r.derived_from) : null,
|
|
1860
|
+
salience: r.salience,
|
|
1861
|
+
updatedAt: r.updated_at,
|
|
1862
|
+
};
|
|
1863
|
+
this.chunkRowCache.set(r.id, shaped);
|
|
1864
|
+
out.push(shaped);
|
|
1865
|
+
}
|
|
1866
|
+
return out;
|
|
1867
|
+
}
|
|
1868
|
+
/** Cache stats for the dashboard / debugging. */
|
|
1869
|
+
getChunkCacheStats() {
|
|
1870
|
+
return this.chunkRowCache.stats();
|
|
1871
|
+
}
|
|
1872
|
+
// ── Async write queue lifecycle ─────────────────────────────────
|
|
1873
|
+
/**
|
|
1874
|
+
* Enable the write-behind queue. After this call, saveTurn / recordAccess /
|
|
1875
|
+
* recordOutcome / logRecallTrace enqueue instead of running SQL on the
|
|
1876
|
+
* caller's thread. Idempotent. Tests leave this off and rely on the sync path.
|
|
1877
|
+
*/
|
|
1878
|
+
enableWriteQueue(opts = {}) {
|
|
1879
|
+
if (this.writeQueue)
|
|
1880
|
+
return;
|
|
1881
|
+
this.writeQueue = new WriteQueue(this, opts);
|
|
1882
|
+
this.writeQueue.start();
|
|
1883
|
+
}
|
|
1884
|
+
/** Drain and stop the write queue. Call on graceful shutdown. */
|
|
1885
|
+
async flushWrites() {
|
|
1886
|
+
if (!this.writeQueue)
|
|
1887
|
+
return;
|
|
1888
|
+
await this.writeQueue.drain();
|
|
1889
|
+
this.writeQueue = null;
|
|
1890
|
+
}
|
|
1891
|
+
/** Stats for the dashboard / debugging. Returns null when queue disabled. */
|
|
1892
|
+
getWriteQueueStats() {
|
|
1893
|
+
return this.writeQueue ? this.writeQueue.stats() : null;
|
|
1894
|
+
}
|
|
1895
|
+
/** Drop a single cache entry — called from mutations that touch a chunk. */
|
|
1896
|
+
invalidateChunkCache(chunkId) {
|
|
1897
|
+
this.chunkRowCache.delete(chunkId);
|
|
1898
|
+
}
|
|
1899
|
+
/** Drop the whole cache — fullSync and similar bulk operations call this. */
|
|
1900
|
+
clearChunkCache() {
|
|
1901
|
+
this.chunkRowCache.clear();
|
|
1681
1902
|
}
|
|
1682
1903
|
_parseJsonArray(json) {
|
|
1683
1904
|
try {
|
|
@@ -1941,6 +2162,51 @@ export class MemoryStore {
|
|
|
1941
2162
|
}
|
|
1942
2163
|
return scored.sort((a, b) => b.score - a.score).slice(0, limit);
|
|
1943
2164
|
}
|
|
2165
|
+
/**
|
|
2166
|
+
* Pre-embed the top N most-cited chunks at startup. Eliminates cold-start
|
|
2167
|
+
* latency for the chunks the agent is most likely to retrieve next. Skips
|
|
2168
|
+
* chunks that already have a current-model dense embedding.
|
|
2169
|
+
*
|
|
2170
|
+
* Ranking: by outcome citation count in the last 30d (chunks the agent
|
|
2171
|
+
* actually used), tiebroken by recency. Soft-deleted excluded.
|
|
2172
|
+
*/
|
|
2173
|
+
async warmDenseEmbeddings(topN = 200) {
|
|
2174
|
+
const currentModel = embeddingsModule.currentDenseModel();
|
|
2175
|
+
const candidates = this.conn.prepare(`SELECT c.id, c.content, COUNT(o.id) AS refs
|
|
2176
|
+
FROM chunks c
|
|
2177
|
+
LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
|
|
2178
|
+
LEFT JOIN outcomes o ON o.chunk_id = c.id
|
|
2179
|
+
AND o.referenced = 1
|
|
2180
|
+
AND o.created_at > datetime('now', '-30 days')
|
|
2181
|
+
WHERE sd.chunk_id IS NULL
|
|
2182
|
+
AND length(c.content) >= 1
|
|
2183
|
+
AND (c.embedding_dense IS NULL OR c.embedding_dense_model IS NULL OR c.embedding_dense_model != ?)
|
|
2184
|
+
GROUP BY c.id
|
|
2185
|
+
HAVING refs > 0
|
|
2186
|
+
ORDER BY refs DESC, c.updated_at DESC
|
|
2187
|
+
LIMIT ?`).all(currentModel, topN);
|
|
2188
|
+
let warmed = 0;
|
|
2189
|
+
let failed = 0;
|
|
2190
|
+
if (candidates.length === 0)
|
|
2191
|
+
return { warmed: 0, skipped: 0, failed: 0 };
|
|
2192
|
+
const updateStmt = this.conn.prepare(`UPDATE chunks SET embedding_dense = ?, embedding_dense_model = ? WHERE id = ?`);
|
|
2193
|
+
for (const c of candidates) {
|
|
2194
|
+
try {
|
|
2195
|
+
const vec = await embeddingsModule.embedDense(c.content, false);
|
|
2196
|
+
if (vec) {
|
|
2197
|
+
updateStmt.run(embeddingsModule.serializeEmbedding(vec), currentModel, c.id);
|
|
2198
|
+
warmed++;
|
|
2199
|
+
}
|
|
2200
|
+
else {
|
|
2201
|
+
failed++;
|
|
2202
|
+
}
|
|
2203
|
+
}
|
|
2204
|
+
catch {
|
|
2205
|
+
failed++;
|
|
2206
|
+
}
|
|
2207
|
+
}
|
|
2208
|
+
return { warmed, skipped: 0, failed };
|
|
2209
|
+
}
|
|
1944
2210
|
/**
|
|
1945
2211
|
* Backfill dense embeddings on chunks that don't yet have one (or that
|
|
1946
2212
|
* were embedded by an older model). Async because the dense model itself
|
|
@@ -2052,9 +2318,18 @@ export class MemoryStore {
|
|
|
2052
2318
|
}
|
|
2053
2319
|
// ── Transcripts ───────────────────────────────────────────────────
|
|
2054
2320
|
/**
|
|
2055
|
-
* Save a conversation turn to the transcripts table.
|
|
2321
|
+
* Save a conversation turn to the transcripts table. Routes through the
|
|
2322
|
+
* write queue when enabled so the request thread doesn't block on SQL.
|
|
2056
2323
|
*/
|
|
2057
2324
|
saveTurn(sessionKey, role, content, model = '') {
|
|
2325
|
+
if (this.writeQueue) {
|
|
2326
|
+
this.writeQueue.enqueue({ kind: 'transcript-turn', sessionKey, role, content, model });
|
|
2327
|
+
return;
|
|
2328
|
+
}
|
|
2329
|
+
this._saveTurnSync(sessionKey, role, content, model);
|
|
2330
|
+
}
|
|
2331
|
+
/** Internal sync transcript insert. Called directly by the WriteQueue. */
|
|
2332
|
+
_saveTurnSync(sessionKey, role, content, model) {
|
|
2058
2333
|
if (!this._stmtInsertTranscript) {
|
|
2059
2334
|
this._stmtInsertTranscript = this.conn.prepare('INSERT INTO transcripts (session_key, role, content, model) VALUES (?, ?, ?, ?)');
|
|
2060
2335
|
}
|
|
@@ -2151,9 +2426,20 @@ export class MemoryStore {
|
|
|
2151
2426
|
}
|
|
2152
2427
|
// ── Salience Tracking ─────────────────────────────────────────────
|
|
2153
2428
|
/**
|
|
2154
|
-
* Record that chunks were accessed (retrieved/displayed).
|
|
2429
|
+
* Record that chunks were accessed (retrieved/displayed). Routes through
|
|
2430
|
+
* the write queue when enabled.
|
|
2155
2431
|
*/
|
|
2156
2432
|
recordAccess(chunkIds, accessType = 'retrieval') {
|
|
2433
|
+
if (chunkIds.length === 0)
|
|
2434
|
+
return;
|
|
2435
|
+
if (this.writeQueue) {
|
|
2436
|
+
this.writeQueue.enqueue({ kind: 'access', chunkIds: [...chunkIds], accessType });
|
|
2437
|
+
return;
|
|
2438
|
+
}
|
|
2439
|
+
this._recordAccessSync(chunkIds, accessType);
|
|
2440
|
+
}
|
|
2441
|
+
/** Internal sync access log insert. Called directly by the WriteQueue. */
|
|
2442
|
+
_recordAccessSync(chunkIds, accessType) {
|
|
2157
2443
|
if (chunkIds.length === 0)
|
|
2158
2444
|
return;
|
|
2159
2445
|
const insertStmt = this.conn.prepare('INSERT INTO access_log (chunk_id, access_type) VALUES (?, ?)');
|
|
@@ -2207,6 +2493,20 @@ export class MemoryStore {
|
|
|
2207
2493
|
* can't dominate salience + BM25 + vector score.
|
|
2208
2494
|
*/
|
|
2209
2495
|
recordOutcome(outcomes, sessionKey) {
|
|
2496
|
+
if (outcomes.length === 0)
|
|
2497
|
+
return;
|
|
2498
|
+
if (this.writeQueue) {
|
|
2499
|
+
this.writeQueue.enqueue({
|
|
2500
|
+
kind: 'outcome',
|
|
2501
|
+
outcomes: outcomes.map((o) => ({ ...o })),
|
|
2502
|
+
sessionKey: sessionKey ?? null,
|
|
2503
|
+
});
|
|
2504
|
+
return;
|
|
2505
|
+
}
|
|
2506
|
+
this._recordOutcomeSync(outcomes, sessionKey);
|
|
2507
|
+
}
|
|
2508
|
+
/** Internal sync outcome insert + EMA update. Called by the WriteQueue. */
|
|
2509
|
+
_recordOutcomeSync(outcomes, sessionKey) {
|
|
2210
2510
|
if (outcomes.length === 0)
|
|
2211
2511
|
return;
|
|
2212
2512
|
const alpha = 0.3; // EMA weight on the new observation
|
|
@@ -2785,6 +3085,324 @@ export class MemoryStore {
|
|
|
2785
3085
|
recallTracesPruned,
|
|
2786
3086
|
};
|
|
2787
3087
|
}
|
|
3088
|
+
// ── Staleness detection ─────────────────────────────────────────
|
|
3089
|
+
/**
|
|
3090
|
+
* User-model slots whose `updated_at` is older than maxAgeDays. These are
|
|
3091
|
+
* candidates for the "verify or refresh" nudge — high-relevance memories
|
|
3092
|
+
* that may have become silently wrong (Mem0 2026 calls this out as an
|
|
3093
|
+
* open problem; we surface it via observability rather than auto-decay).
|
|
3094
|
+
*
|
|
3095
|
+
* Empty content is skipped (an empty slot has no claim to verify).
|
|
3096
|
+
*/
|
|
3097
|
+
findStaleUserModelSlots(opts = {}) {
|
|
3098
|
+
const maxAge = opts.maxAgeDays ?? 90;
|
|
3099
|
+
const rows = this.conn
|
|
3100
|
+
.prepare(`SELECT slot, agent_slug,
|
|
3101
|
+
CAST(strftime('%s', 'now') - strftime('%s', updated_at) AS INTEGER) AS age_seconds
|
|
3102
|
+
FROM user_model_blocks
|
|
3103
|
+
WHERE length(content) > 0
|
|
3104
|
+
AND updated_at < datetime('now', ?)
|
|
3105
|
+
AND COALESCE(agent_slug, '') = COALESCE(?, '')`)
|
|
3106
|
+
.all(`-${maxAge} days`, opts.agentSlug ?? null);
|
|
3107
|
+
return rows.map((r) => ({
|
|
3108
|
+
slot: r.slot,
|
|
3109
|
+
agentSlug: r.agent_slug,
|
|
3110
|
+
ageDays: Math.round(r.age_seconds / 86400),
|
|
3111
|
+
}));
|
|
3112
|
+
}
|
|
3113
|
+
/**
|
|
3114
|
+
* High-salience chunks whose outcome EMA has drifted negative — i.e., we
|
|
3115
|
+
* keep ranking them high but the agent stopped citing them. Strong signal
|
|
3116
|
+
* that the chunk is stale or wrong even though salience hasn't decayed.
|
|
3117
|
+
*
|
|
3118
|
+
* Conservative threshold: salience > 0.8 AND last_outcome_score < 0.
|
|
3119
|
+
* Soft-deleted excluded.
|
|
3120
|
+
*/
|
|
3121
|
+
findStaleHighSalienceChunks(opts = {}) {
|
|
3122
|
+
const salienceFloor = opts.salienceFloor ?? 0.8;
|
|
3123
|
+
const outcomeCeiling = opts.outcomeCeiling ?? 0;
|
|
3124
|
+
const limit = opts.limit ?? 25;
|
|
3125
|
+
const rows = this.conn
|
|
3126
|
+
.prepare(`SELECT c.id, c.source_file, c.section, c.salience, c.last_outcome_score
|
|
3127
|
+
FROM chunks c
|
|
3128
|
+
LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
|
|
3129
|
+
WHERE sd.chunk_id IS NULL
|
|
3130
|
+
AND c.salience > ?
|
|
3131
|
+
AND COALESCE(c.last_outcome_score, 0) < ?
|
|
3132
|
+
ORDER BY c.salience DESC
|
|
3133
|
+
LIMIT ?`)
|
|
3134
|
+
.all(salienceFloor, outcomeCeiling, limit);
|
|
3135
|
+
return rows.map((r) => ({
|
|
3136
|
+
chunkId: r.id,
|
|
3137
|
+
sourceFile: r.source_file,
|
|
3138
|
+
section: r.section,
|
|
3139
|
+
salience: r.salience,
|
|
3140
|
+
lastOutcomeScore: r.last_outcome_score ?? 0,
|
|
3141
|
+
}));
|
|
3142
|
+
}
|
|
3143
|
+
/**
|
|
3144
|
+
* Format staleness findings into ready-to-inject prompt text. Heartbeat
|
|
3145
|
+
* builders can drop this into the system prompt verbatim. Returns null
|
|
3146
|
+
* if there's nothing to nudge about — caller should not inject empty text.
|
|
3147
|
+
*/
|
|
3148
|
+
getStalenessNudges(opts = {}) {
|
|
3149
|
+
const stale = this.findStaleUserModelSlots({
|
|
3150
|
+
maxAgeDays: opts.maxSlotAgeDays,
|
|
3151
|
+
agentSlug: opts.agentSlug,
|
|
3152
|
+
});
|
|
3153
|
+
if (stale.length === 0)
|
|
3154
|
+
return null;
|
|
3155
|
+
const lines = stale.map((s) => `- \`${s.slot}\` is ${s.ageDays}d old`);
|
|
3156
|
+
return [
|
|
3157
|
+
'User-model maintenance:',
|
|
3158
|
+
...lines,
|
|
3159
|
+
'Verify or refresh these during the next natural turn — do not force a check-in.',
|
|
3160
|
+
].join('\n');
|
|
3161
|
+
}
|
|
3162
|
+
// ── Procedural memory ───────────────────────────────────────────
|
|
3163
|
+
/**
|
|
3164
|
+
* Find procedure chunks whose frontmatter `triggers` overlap with words
|
|
3165
|
+
* in the query. Used to surface learned workflows ("how Nate ships a
|
|
3166
|
+
* release", "how to handle inbound replies") above generic facts when
|
|
3167
|
+
* the user's intent matches.
|
|
3168
|
+
*
|
|
3169
|
+
* Match rule: case-insensitive substring of any trigger phrase appears
|
|
3170
|
+
* in the query. Empty result if no procedure chunks exist or no triggers
|
|
3171
|
+
* match — caller should treat this as additive context, not the whole
|
|
3172
|
+
* answer.
|
|
3173
|
+
*/
|
|
3174
|
+
findRelevantProcedures(query, opts = {}) {
|
|
3175
|
+
const limit = opts.limit ?? 5;
|
|
3176
|
+
const q = query.toLowerCase();
|
|
3177
|
+
// Exclude the frontmatter chunk — chunker emits one per file (a key:val
|
|
3178
|
+
// dump) which inherits category=procedure from the parent. Only the body
|
|
3179
|
+
// chunks contain the actual procedure content the agent wants to recall.
|
|
3180
|
+
const rows = this.conn
|
|
3181
|
+
.prepare(`SELECT c.id, c.source_file, c.section, c.content, c.frontmatter_json
|
|
3182
|
+
FROM chunks c
|
|
3183
|
+
LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
|
|
3184
|
+
WHERE c.category = 'procedure'
|
|
3185
|
+
AND c.chunk_type != 'frontmatter'
|
|
3186
|
+
AND sd.chunk_id IS NULL
|
|
3187
|
+
AND (c.agent_slug IS NULL OR c.agent_slug = COALESCE(?, c.agent_slug))`)
|
|
3188
|
+
.all(opts.agentSlug ?? null);
|
|
3189
|
+
const matches = [];
|
|
3190
|
+
for (const row of rows) {
|
|
3191
|
+
let triggers = [];
|
|
3192
|
+
try {
|
|
3193
|
+
const fm = row.frontmatter_json ? JSON.parse(row.frontmatter_json) : {};
|
|
3194
|
+
if (Array.isArray(fm.triggers)) {
|
|
3195
|
+
triggers = fm.triggers.map((t) => String(t).toLowerCase()).filter(Boolean);
|
|
3196
|
+
}
|
|
3197
|
+
}
|
|
3198
|
+
catch { /* malformed frontmatter — skip */ }
|
|
3199
|
+
if (triggers.length === 0)
|
|
3200
|
+
continue;
|
|
3201
|
+
const matched = triggers.filter((t) => q.includes(t));
|
|
3202
|
+
if (matched.length === 0)
|
|
3203
|
+
continue;
|
|
3204
|
+
matches.push({
|
|
3205
|
+
id: row.id,
|
|
3206
|
+
sourceFile: row.source_file,
|
|
3207
|
+
section: row.section,
|
|
3208
|
+
content: row.content,
|
|
3209
|
+
triggers,
|
|
3210
|
+
matched,
|
|
3211
|
+
});
|
|
3212
|
+
}
|
|
3213
|
+
// Most-matched first, then most-specific (longest matched trigger).
|
|
3214
|
+
matches.sort((a, b) => {
|
|
3215
|
+
if (b.matched.length !== a.matched.length)
|
|
3216
|
+
return b.matched.length - a.matched.length;
|
|
3217
|
+
const aMax = Math.max(...a.matched.map((m) => m.length));
|
|
3218
|
+
const bMax = Math.max(...b.matched.map((m) => m.length));
|
|
3219
|
+
return bMax - aMax;
|
|
3220
|
+
});
|
|
3221
|
+
return matches.slice(0, limit);
|
|
3222
|
+
}
|
|
3223
|
+
// ── Janitor: bounded growth ─────────────────────────────────────
|
|
3224
|
+
/** Persistent key/value for janitor state (last vacuum, etc.). */
|
|
3225
|
+
getMaintenanceMeta(key) {
|
|
3226
|
+
const row = this.conn
|
|
3227
|
+
.prepare('SELECT value FROM maintenance_meta WHERE key = ?')
|
|
3228
|
+
.get(key);
|
|
3229
|
+
return row?.value ?? null;
|
|
3230
|
+
}
|
|
3231
|
+
setMaintenanceMeta(key, value) {
|
|
3232
|
+
this.conn
|
|
3233
|
+
.prepare(`INSERT INTO maintenance_meta (key, value) VALUES (?, ?)
|
|
3234
|
+
ON CONFLICT(key) DO UPDATE SET value = excluded.value, updated_at = datetime('now')`)
|
|
3235
|
+
.run(key, value);
|
|
3236
|
+
}
|
|
3237
|
+
/**
|
|
3238
|
+
* Two-phase delete for consolidated, low-salience, unused chunks.
|
|
3239
|
+
*
|
|
3240
|
+
* Phase 1: soft-delete chunks where consolidated=1, not pinned, salience
|
|
3241
|
+
* below floor, and never accessed (or last access older than
|
|
3242
|
+
* expireDays).
|
|
3243
|
+
* Phase 2: physically delete chunks that have been in chunk_soft_deletes
|
|
3244
|
+
* for graceDays. Cascades to access_log, outcomes, chunk_history
|
|
3245
|
+
* for the same chunk_id.
|
|
3246
|
+
*
|
|
3247
|
+
* Summary chunks whose `derived_from` references the deleted IDs are
|
|
3248
|
+
* intentionally NOT propagate-deleted — the summary still encodes signal.
|
|
3249
|
+
*/
|
|
3250
|
+
expireConsolidated(opts = {}) {
|
|
3251
|
+
const expireDays = opts.expireDays ?? 60;
|
|
3252
|
+
const salienceFloor = opts.salienceFloor ?? 0.2;
|
|
3253
|
+
const graceDays = opts.graceDays ?? 14;
|
|
3254
|
+
// Phase 1 — soft-delete candidates.
|
|
3255
|
+
const candidates = this.conn
|
|
3256
|
+
.prepare(`SELECT c.id
|
|
3257
|
+
FROM chunks c
|
|
3258
|
+
LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
|
|
3259
|
+
LEFT JOIN (
|
|
3260
|
+
SELECT chunk_id, MAX(accessed_at) as last_access
|
|
3261
|
+
FROM access_log GROUP BY chunk_id
|
|
3262
|
+
) a ON a.chunk_id = c.id
|
|
3263
|
+
WHERE c.consolidated = 1
|
|
3264
|
+
AND COALESCE(c.pinned, 0) = 0
|
|
3265
|
+
AND COALESCE(c.salience, 0) < ?
|
|
3266
|
+
AND sd.chunk_id IS NULL
|
|
3267
|
+
AND (a.last_access IS NULL OR a.last_access < datetime('now', ?))
|
|
3268
|
+
AND c.created_at < datetime('now', ?)`)
|
|
3269
|
+
.all(salienceFloor, `-${expireDays} days`, `-${expireDays} days`);
|
|
3270
|
+
let softDeleted = 0;
|
|
3271
|
+
for (const row of candidates) {
|
|
3272
|
+
if (this.softDeleteChunk(row.id, 'janitor'))
|
|
3273
|
+
softDeleted++;
|
|
3274
|
+
}
|
|
3275
|
+
// Phase 2 — physical delete after grace period.
|
|
3276
|
+
const stale = this.conn
|
|
3277
|
+
.prepare(`SELECT chunk_id FROM chunk_soft_deletes
|
|
3278
|
+
WHERE deleted_at < datetime('now', ?)`)
|
|
3279
|
+
.all(`-${graceDays} days`);
|
|
3280
|
+
let physicallyDeleted = 0;
|
|
3281
|
+
if (stale.length > 0) {
|
|
3282
|
+
// softDeleteChunk removed these rows from chunks_fts. The chunks_ad
|
|
3283
|
+
// trigger fires on DELETE FROM chunks and tries to remove them again —
|
|
3284
|
+
// FTS5 contentless tables corrupt ("database disk image is malformed")
|
|
3285
|
+
// when you delete a docid that's already gone. Re-add the row to FTS
|
|
3286
|
+
// first so the trigger can do a clean delete.
|
|
3287
|
+
const fetchRow = this.conn.prepare(`SELECT id, source_file, section, content FROM chunks WHERE id = ?`);
|
|
3288
|
+
const reAddFts = this.conn.prepare(`INSERT INTO chunks_fts (rowid, source_file, section, content) VALUES (?, ?, ?, ?)`);
|
|
3289
|
+
const delChunk = this.conn.prepare('DELETE FROM chunks WHERE id = ?');
|
|
3290
|
+
const delSoft = this.conn.prepare('DELETE FROM chunk_soft_deletes WHERE chunk_id = ?');
|
|
3291
|
+
const delAccess = this.conn.prepare('DELETE FROM access_log WHERE chunk_id = ?');
|
|
3292
|
+
const delOutcomes = this.conn.prepare('DELETE FROM outcomes WHERE chunk_id = ?');
|
|
3293
|
+
const delHistory = this.conn.prepare('DELETE FROM chunk_history WHERE chunk_id = ?');
|
|
3294
|
+
const tx = this.conn.transaction((rows) => {
|
|
3295
|
+
for (const r of rows) {
|
|
3296
|
+
delAccess.run(r.chunk_id);
|
|
3297
|
+
delOutcomes.run(r.chunk_id);
|
|
3298
|
+
delHistory.run(r.chunk_id);
|
|
3299
|
+
delSoft.run(r.chunk_id);
|
|
3300
|
+
const chunkRow = fetchRow.get(r.chunk_id);
|
|
3301
|
+
if (chunkRow) {
|
|
3302
|
+
try {
|
|
3303
|
+
reAddFts.run(chunkRow.id, chunkRow.source_file, chunkRow.section, chunkRow.content);
|
|
3304
|
+
}
|
|
3305
|
+
catch {
|
|
3306
|
+
// Already in FTS (chunk was never soft-removed from FTS) — fine.
|
|
3307
|
+
}
|
|
3308
|
+
const result = delChunk.run(r.chunk_id);
|
|
3309
|
+
if (result.changes > 0)
|
|
3310
|
+
physicallyDeleted++;
|
|
3311
|
+
}
|
|
3312
|
+
}
|
|
3313
|
+
});
|
|
3314
|
+
tx(stale);
|
|
3315
|
+
// Invalidate cache for all physically-deleted ids.
|
|
3316
|
+
for (const r of stale)
|
|
3317
|
+
this.chunkRowCache.delete(r.chunk_id);
|
|
3318
|
+
}
|
|
3319
|
+
return { softDeleted, physicallyDeleted };
|
|
3320
|
+
}
|
|
3321
|
+
/** Trim outcomes table to a rolling window. Append-only, can grow fast. */
|
|
3322
|
+
pruneOutcomes(retentionDays = 30) {
|
|
3323
|
+
const result = this.conn
|
|
3324
|
+
.prepare(`DELETE FROM outcomes WHERE created_at < datetime('now', ?)`)
|
|
3325
|
+
.run(`-${retentionDays} days`);
|
|
3326
|
+
return result.changes;
|
|
3327
|
+
}
|
|
3328
|
+
/**
|
|
3329
|
+
* Cap memory_extractions to maxRows. Deletes oldest non-active rows first;
|
|
3330
|
+
* 'active' extractions are preserved regardless of count to protect the
|
|
3331
|
+
* audit trail for in-flight work.
|
|
3332
|
+
*/
|
|
3333
|
+
capExtractions(maxRows = 50000) {
|
|
3334
|
+
let count;
|
|
3335
|
+
try {
|
|
3336
|
+
count = this.conn.prepare('SELECT COUNT(*) as c FROM memory_extractions').get().c;
|
|
3337
|
+
}
|
|
3338
|
+
catch {
|
|
3339
|
+
return 0; // table missing on first boot
|
|
3340
|
+
}
|
|
3341
|
+
if (count <= maxRows)
|
|
3342
|
+
return 0;
|
|
3343
|
+
const overflow = count - maxRows;
|
|
3344
|
+
const result = this.conn
|
|
3345
|
+
.prepare(`DELETE FROM memory_extractions
|
|
3346
|
+
WHERE id IN (
|
|
3347
|
+
SELECT id FROM memory_extractions
|
|
3348
|
+
WHERE status != 'active'
|
|
3349
|
+
ORDER BY extracted_at ASC
|
|
3350
|
+
LIMIT ?
|
|
3351
|
+
)`)
|
|
3352
|
+
.run(overflow);
|
|
3353
|
+
return result.changes;
|
|
3354
|
+
}
|
|
3355
|
+
/** Approximate SQLite database file size on disk, in bytes. */
|
|
3356
|
+
dbSizeBytes() {
|
|
3357
|
+
try {
|
|
3358
|
+
return statSync(this.dbPath).size;
|
|
3359
|
+
}
|
|
3360
|
+
catch {
|
|
3361
|
+
return 0;
|
|
3362
|
+
}
|
|
3363
|
+
}
|
|
3364
|
+
/**
|
|
3365
|
+
* VACUUM the database. Reclaims space from deleted rows. Holds an
|
|
3366
|
+
* exclusive lock for the duration — caller is expected to gate on
|
|
3367
|
+
* idleness (see lastActivityAt).
|
|
3368
|
+
*/
|
|
3369
|
+
vacuum() {
|
|
3370
|
+
const sizeBeforeBytes = this.dbSizeBytes();
|
|
3371
|
+
const start = Date.now();
|
|
3372
|
+
this.conn.exec('VACUUM');
|
|
3373
|
+
return {
|
|
3374
|
+
sizeBeforeBytes,
|
|
3375
|
+
sizeAfterBytes: this.dbSizeBytes(),
|
|
3376
|
+
durationMs: Date.now() - start,
|
|
3377
|
+
};
|
|
3378
|
+
}
|
|
3379
|
+
/**
|
|
3380
|
+
* Most recent timestamp across the high-write activity tables, as a Unix
|
|
3381
|
+
* milliseconds value. Returns null if all tables are empty. Used by the
|
|
3382
|
+
* janitor's idle gate.
|
|
3383
|
+
*
|
|
3384
|
+
* Implementation note: SQLite's datetime() returns "YYYY-MM-DD HH:MM:SS"
|
|
3385
|
+
* in UTC with no timezone marker — JS Date.parse interprets that as local
|
|
3386
|
+
* time and skews by the offset. We compute the unix epoch in SQL to avoid
|
|
3387
|
+
* the bug entirely.
|
|
3388
|
+
*/
|
|
3389
|
+
lastActivityAt() {
|
|
3390
|
+
try {
|
|
3391
|
+
const row = this.conn
|
|
3392
|
+
.prepare(`SELECT MAX(unix_t) as last FROM (
|
|
3393
|
+
SELECT CAST(strftime('%s', retrieved_at) AS INTEGER) as unix_t FROM recall_traces
|
|
3394
|
+
UNION ALL
|
|
3395
|
+
SELECT CAST(strftime('%s', accessed_at) AS INTEGER) as unix_t FROM access_log
|
|
3396
|
+
UNION ALL
|
|
3397
|
+
SELECT CAST(strftime('%s', created_at) AS INTEGER) as unix_t FROM transcripts
|
|
3398
|
+
)`)
|
|
3399
|
+
.get();
|
|
3400
|
+
return row.last !== null && row.last !== undefined ? row.last * 1000 : null;
|
|
3401
|
+
}
|
|
3402
|
+
catch {
|
|
3403
|
+
return null;
|
|
3404
|
+
}
|
|
3405
|
+
}
|
|
2788
3406
|
// ── Timeline Query ─────────────────────────────────────────────
|
|
2789
3407
|
/**
|
|
2790
3408
|
* Get chunks within a date range, ordered chronologically.
|
|
@@ -3364,6 +3982,112 @@ export class MemoryStore {
|
|
|
3364
3982
|
WHERE id IN (${placeholders})`)
|
|
3365
3983
|
.run(...chunkIds);
|
|
3366
3984
|
}
|
|
3985
|
+
/**
|
|
3986
|
+
* Aggregate memory-health snapshot for the dashboard.
|
|
3987
|
+
*
|
|
3988
|
+
* Single-pass queries over each table; cheap enough to call on every
|
|
3989
|
+
* dashboard tab visit without caching. Adds graph stats only if a
|
|
3990
|
+
* graphStore is supplied and reachable.
|
|
3991
|
+
*/
|
|
3992
|
+
getMemoryHealth(opts = {}) {
|
|
3993
|
+
const topLimit = opts.topCitedLimit ?? 10;
|
|
3994
|
+
const chunkAgg = this.conn
|
|
3995
|
+
.prepare(`SELECT
|
|
3996
|
+
COUNT(*) AS total,
|
|
3997
|
+
COALESCE(SUM(CASE WHEN consolidated = 1 THEN 1 ELSE 0 END), 0) AS consolidated,
|
|
3998
|
+
COALESCE(SUM(CASE WHEN pinned = 1 THEN 1 ELSE 0 END), 0) AS pinned
|
|
3999
|
+
FROM chunks`)
|
|
4000
|
+
.get();
|
|
4001
|
+
const softDeletedRow = this.conn
|
|
4002
|
+
.prepare('SELECT COUNT(*) AS c FROM chunk_soft_deletes')
|
|
4003
|
+
.get();
|
|
4004
|
+
// Zombies: consolidated AND no access in last 30d (or never accessed at all).
|
|
4005
|
+
const zombieRow = this.conn
|
|
4006
|
+
.prepare(`SELECT COUNT(*) AS c
|
|
4007
|
+
FROM chunks c
|
|
4008
|
+
LEFT JOIN (
|
|
4009
|
+
SELECT chunk_id, MAX(accessed_at) AS la
|
|
4010
|
+
FROM access_log GROUP BY chunk_id
|
|
4011
|
+
) a ON a.chunk_id = c.id
|
|
4012
|
+
LEFT JOIN chunk_soft_deletes sd ON sd.chunk_id = c.id
|
|
4013
|
+
WHERE c.consolidated = 1
|
|
4014
|
+
AND sd.chunk_id IS NULL
|
|
4015
|
+
AND (a.la IS NULL OR a.la < datetime('now', '-30 days'))`)
|
|
4016
|
+
.get();
|
|
4017
|
+
const byCategory = this.conn
|
|
4018
|
+
.prepare(`SELECT COALESCE(category, '(uncategorized)') AS category, COUNT(*) AS count
|
|
4019
|
+
FROM chunks GROUP BY category ORDER BY count DESC`)
|
|
4020
|
+
.all();
|
|
4021
|
+
// Row counts for the high-write tables (cheap COUNT(*) per table).
|
|
4022
|
+
const trackedTables = [
|
|
4023
|
+
'chunks',
|
|
4024
|
+
'chunks_fts',
|
|
4025
|
+
'recall_traces',
|
|
4026
|
+
'access_log',
|
|
4027
|
+
'outcomes',
|
|
4028
|
+
'transcripts',
|
|
4029
|
+
'session_summaries',
|
|
4030
|
+
'memory_extractions',
|
|
4031
|
+
'chunk_soft_deletes',
|
|
4032
|
+
'chunk_history',
|
|
4033
|
+
'sdk_session_entries',
|
|
4034
|
+
'wikilinks',
|
|
4035
|
+
];
|
|
4036
|
+
const tableRowCounts = {};
|
|
4037
|
+
for (const t of trackedTables) {
|
|
4038
|
+
try {
|
|
4039
|
+
const row = this.conn.prepare(`SELECT COUNT(*) AS c FROM ${t}`).get();
|
|
4040
|
+
tableRowCounts[t] = row.c;
|
|
4041
|
+
}
|
|
4042
|
+
catch {
|
|
4043
|
+
tableRowCounts[t] = -1;
|
|
4044
|
+
}
|
|
4045
|
+
}
|
|
4046
|
+
const topCited = this.conn
|
|
4047
|
+
.prepare(`SELECT o.chunk_id, c.source_file, c.section, COUNT(*) AS ref_count
|
|
4048
|
+
FROM outcomes o
|
|
4049
|
+
JOIN chunks c ON c.id = o.chunk_id
|
|
4050
|
+
WHERE o.referenced = 1
|
|
4051
|
+
AND o.created_at > datetime('now', '-30 days')
|
|
4052
|
+
GROUP BY o.chunk_id
|
|
4053
|
+
ORDER BY ref_count DESC
|
|
4054
|
+
LIMIT ?`)
|
|
4055
|
+
.all(topLimit);
|
|
4056
|
+
return {
|
|
4057
|
+
chunks: {
|
|
4058
|
+
total: chunkAgg.total,
|
|
4059
|
+
consolidated: chunkAgg.consolidated,
|
|
4060
|
+
pinned: chunkAgg.pinned,
|
|
4061
|
+
softDeleted: softDeletedRow.c,
|
|
4062
|
+
zombieCount: zombieRow.c,
|
|
4063
|
+
},
|
|
4064
|
+
chunksByCategory: byCategory,
|
|
4065
|
+
tableRowCounts,
|
|
4066
|
+
topCitedLast30d: topCited.map((r) => ({
|
|
4067
|
+
chunkId: r.chunk_id,
|
|
4068
|
+
sourceFile: r.source_file,
|
|
4069
|
+
section: r.section,
|
|
4070
|
+
refCount: r.ref_count,
|
|
4071
|
+
})),
|
|
4072
|
+
staleUserModelSlots: this.findStaleUserModelSlots(),
|
|
4073
|
+
staleHighSalienceChunks: this.findStaleHighSalienceChunks({ limit: 10 }),
|
|
4074
|
+
chunkCacheStats: this.chunkRowCache.stats(),
|
|
4075
|
+
writeQueue: this.getWriteQueueStats(),
|
|
4076
|
+
lastIntegrityReport: (() => {
|
|
4077
|
+
const raw = this.getMaintenanceMeta('last_integrity_report');
|
|
4078
|
+
if (!raw)
|
|
4079
|
+
return null;
|
|
4080
|
+
try {
|
|
4081
|
+
return JSON.parse(raw);
|
|
4082
|
+
}
|
|
4083
|
+
catch {
|
|
4084
|
+
return null;
|
|
4085
|
+
}
|
|
4086
|
+
})(),
|
|
4087
|
+
dbSizeBytes: this.dbSizeBytes(),
|
|
4088
|
+
lastVacuumAt: this.getMaintenanceMeta('last_vacuum_at'),
|
|
4089
|
+
};
|
|
4090
|
+
}
|
|
3367
4091
|
/**
|
|
3368
4092
|
* Get consolidation stats for monitoring.
|
|
3369
4093
|
*/
|
|
@@ -3753,6 +4477,10 @@ export class MemoryStore {
|
|
|
3753
4477
|
* Delete all chunks, wikilinks, file hash, and access log for a given file.
|
|
3754
4478
|
*/
|
|
3755
4479
|
deleteFileChunks(relPath) {
|
|
4480
|
+
// Capture chunk ids first so we can invalidate the LRU after the deletes.
|
|
4481
|
+
const ids = this.conn
|
|
4482
|
+
.prepare('SELECT id FROM chunks WHERE source_file = ?')
|
|
4483
|
+
.all(relPath);
|
|
3756
4484
|
// Delete access_log entries for chunks being removed (prevent orphans)
|
|
3757
4485
|
this.conn
|
|
3758
4486
|
.prepare('DELETE FROM access_log WHERE chunk_id IN (SELECT id FROM chunks WHERE source_file = ?)')
|
|
@@ -3760,6 +4488,8 @@ export class MemoryStore {
|
|
|
3760
4488
|
this.conn.prepare('DELETE FROM chunks WHERE source_file = ?').run(relPath);
|
|
3761
4489
|
this.conn.prepare('DELETE FROM wikilinks WHERE source_file = ?').run(relPath);
|
|
3762
4490
|
this.conn.prepare('DELETE FROM file_hashes WHERE rel_path = ?').run(relPath);
|
|
4491
|
+
for (const r of ids)
|
|
4492
|
+
this.chunkRowCache.delete(r.id);
|
|
3763
4493
|
}
|
|
3764
4494
|
/**
|
|
3765
4495
|
* Sanitize a query for FTS5 syntax.
|