claude-mem-lite 2.87.0 → 2.88.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/hook.mjs +21 -106
- package/lib/compress-core.mjs +98 -0
- package/lib/maintain-core.mjs +236 -0
- package/mem-cli.mjs +50 -248
- package/package.json +3 -1
- package/server.mjs +40 -252
- package/source-files.mjs +9 -0
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"plugins": [
|
|
11
11
|
{
|
|
12
12
|
"name": "claude-mem-lite",
|
|
13
|
-
"version": "2.
|
|
13
|
+
"version": "2.88.0",
|
|
14
14
|
"source": "./",
|
|
15
15
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. Alternative to claude-mem with 600x lower cost."
|
|
16
16
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.88.0",
|
|
4
4
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. Alternative to claude-mem with 600x lower cost.",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "sdsrss"
|
package/hook.mjs
CHANGED
|
@@ -26,7 +26,7 @@ import {
|
|
|
26
26
|
truncate, inferProject, detectBashSignificance,
|
|
27
27
|
extractErrorKeywords, extractFilePaths, isRelatedToEpisode,
|
|
28
28
|
makeEntryDesc, scrubSecrets, stripPrivate, EDIT_TOOLS, debugCatch, debugLog,
|
|
29
|
-
COMPRESSED_AUTO, COMPRESSED_PENDING_PURGE,
|
|
29
|
+
COMPRESSED_AUTO, COMPRESSED_PENDING_PURGE, OBS_BM25,
|
|
30
30
|
computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity,
|
|
31
31
|
} from './utils.mjs';
|
|
32
32
|
import {
|
|
@@ -45,6 +45,8 @@ import {
|
|
|
45
45
|
} from './hook-shared.mjs';
|
|
46
46
|
import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation } from './hook-llm.mjs';
|
|
47
47
|
import { scrubRecord } from './lib/scrub-record.mjs';
|
|
48
|
+
import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from './lib/compress-core.mjs';
|
|
49
|
+
import { cleanupBroken, decayAndMarkIdle, boostAccessed } from './lib/maintain-core.mjs';
|
|
48
50
|
import {
|
|
49
51
|
extractCitationsFromTranscript,
|
|
50
52
|
extractAllInjected,
|
|
@@ -819,65 +821,19 @@ async function handleSessionStart() {
|
|
|
819
821
|
`).run(Date.now() - 37 * 86400000);
|
|
820
822
|
if (purged.changes > 0) debugLog('DEBUG', 'auto-maintain', `purged ${purged.changes} stale observations`);
|
|
821
823
|
|
|
822
|
-
//
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
if (
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
// hook-memory.mjs bumps it when the obs is auto-injected into Claude's
|
|
836
|
-
// context. Pre-v2.56 only checked access_count, so an obs auto-injected
|
|
837
|
-
// 8x (proven contextually relevant) still got decayed/marked. Adding
|
|
838
|
-
// `injection_count = 0` treats injection as first-class engagement.
|
|
839
|
-
const decayed = db.prepare(`
|
|
840
|
-
UPDATE observations SET importance = MAX(1, COALESCE(importance, 1) - 1)
|
|
841
|
-
WHERE id IN (
|
|
842
|
-
SELECT id FROM observations
|
|
843
|
-
WHERE COALESCE(compressed_into, 0) = 0
|
|
844
|
-
AND COALESCE(importance, 1) > 1
|
|
845
|
-
AND COALESCE(access_count, 0) = 0
|
|
846
|
-
AND COALESCE(injection_count, 0) = 0
|
|
847
|
-
AND created_at_epoch < ?
|
|
848
|
-
LIMIT ${OP_CAP}
|
|
849
|
-
)
|
|
850
|
-
`).run(STALE_AGE);
|
|
851
|
-
if (decayed.changes > 0) debugLog('DEBUG', 'auto-maintain', `decayed ${decayed.changes} stale observations`);
|
|
852
|
-
|
|
853
|
-
// Mark idle: importance=1, never-accessed, never-injected, old → pending-purge
|
|
854
|
-
// (will be purged next cycle). v2.56.0 #4: injection_count protects.
|
|
855
|
-
const idleMarked = db.prepare(`
|
|
856
|
-
UPDATE observations SET compressed_into = ${COMPRESSED_PENDING_PURGE}
|
|
857
|
-
WHERE id IN (
|
|
858
|
-
SELECT id FROM observations
|
|
859
|
-
WHERE COALESCE(compressed_into, 0) = 0
|
|
860
|
-
AND COALESCE(importance, 1) = 1
|
|
861
|
-
AND COALESCE(access_count, 0) = 0
|
|
862
|
-
AND COALESCE(injection_count, 0) = 0
|
|
863
|
-
AND created_at_epoch < ?
|
|
864
|
-
LIMIT ${OP_CAP}
|
|
865
|
-
)
|
|
866
|
-
`).run(STALE_AGE);
|
|
867
|
-
if (idleMarked.changes > 0) debugLog('DEBUG', 'auto-maintain', `marked ${idleMarked.changes} idle as pending-purge`);
|
|
868
|
-
|
|
869
|
-
// Boost: increase importance of frequently-accessed observations
|
|
870
|
-
const boosted = db.prepare(`
|
|
871
|
-
UPDATE observations SET importance = MIN(3, COALESCE(importance, 1) + 1)
|
|
872
|
-
WHERE id IN (
|
|
873
|
-
SELECT id FROM observations
|
|
874
|
-
WHERE COALESCE(compressed_into, 0) = 0
|
|
875
|
-
AND COALESCE(access_count, 0) > 3
|
|
876
|
-
AND COALESCE(importance, 1) < 3
|
|
877
|
-
LIMIT ${OP_CAP}
|
|
878
|
-
)
|
|
879
|
-
`).run();
|
|
880
|
-
if (boosted.changes > 0) debugLog('DEBUG', 'auto-maintain', `boosted ${boosted.changes} frequently-accessed observations`);
|
|
824
|
+
// cleanup / decay+mark-idle / boost via maintain-core (shared with CLI + MCP).
|
|
825
|
+
// injection_count>0 protection lives in decayAndMarkIdle. Whole-DB, cap 500.
|
|
826
|
+
const mctx = { projectFilter: '', baseParams: [], staleAge: STALE_AGE, opCap: OP_CAP };
|
|
827
|
+
|
|
828
|
+
const cleaned = cleanupBroken(db, mctx);
|
|
829
|
+
if (cleaned > 0) debugLog('DEBUG', 'auto-maintain', `cleaned ${cleaned} broken observations`);
|
|
830
|
+
|
|
831
|
+
const { decayed, idleMarked } = decayAndMarkIdle(db, mctx);
|
|
832
|
+
if (decayed > 0) debugLog('DEBUG', 'auto-maintain', `decayed ${decayed} stale observations`);
|
|
833
|
+
if (idleMarked > 0) debugLog('DEBUG', 'auto-maintain', `marked ${idleMarked} idle as pending-purge`);
|
|
834
|
+
|
|
835
|
+
const boosted = boostAccessed(db, mctx);
|
|
836
|
+
if (boosted > 0) debugLog('DEBUG', 'auto-maintain', `boosted ${boosted} frequently-accessed observations`);
|
|
881
837
|
|
|
882
838
|
// Auto-dedup (exact): merge identical-title observations within 1h.
|
|
883
839
|
// Catches rapid duplicate writes (same hook firing twice, race conditions).
|
|
@@ -1361,57 +1317,16 @@ function handleAutoCompress() {
|
|
|
1361
1317
|
|
|
1362
1318
|
try {
|
|
1363
1319
|
const compressCutoff = Date.now() - 60 * 86400000; // 60 days
|
|
1364
|
-
const compressCandidates = db
|
|
1365
|
-
SELECT id, project, type, title, created_at_epoch
|
|
1366
|
-
FROM observations
|
|
1367
|
-
WHERE COALESCE(importance, 1) = 1 AND COALESCE(access_count, 0) = 0
|
|
1368
|
-
AND created_at_epoch < ?
|
|
1369
|
-
AND (compressed_into IS NULL OR compressed_into = ${COMPRESSED_AUTO})
|
|
1370
|
-
ORDER BY project, created_at_epoch
|
|
1371
|
-
`).all(compressCutoff);
|
|
1320
|
+
const compressCandidates = selectCompressionCandidates(db, { cutoff: compressCutoff, includeAutoMarked: true });
|
|
1372
1321
|
if (compressCandidates.length < 3) return;
|
|
1373
1322
|
|
|
1374
|
-
const groups =
|
|
1375
|
-
|
|
1376
|
-
|
|
1377
|
-
if (!groups.has(key)) groups.set(key, []);
|
|
1378
|
-
groups.get(key).push(c);
|
|
1379
|
-
}
|
|
1380
|
-
// Transact each group to prevent orphan summaries on crash
|
|
1381
|
-
const compressGroup = db.transaction((proj, obs) => {
|
|
1382
|
-
const types = {};
|
|
1383
|
-
for (const o of obs) types[o.type] = (types[o.type] || 0) + 1;
|
|
1384
|
-
const dominantType = Object.entries(types).sort((a, b) => b[1] - a[1])[0][0];
|
|
1385
|
-
const title = `Weekly summary: ${obs.length} ${dominantType} observations`;
|
|
1386
|
-
const narrative = obs.map(o => `- ${o.title || '(untitled)'}`).join('\n');
|
|
1387
|
-
const sortedEpochs = obs.map(o => o.created_at_epoch).sort((a, b) => a - b);
|
|
1388
|
-
const medianEpoch = sortedEpochs[Math.floor(sortedEpochs.length / 2)];
|
|
1389
|
-
const sessionId = `compress-${proj}`;
|
|
1390
|
-
const now = new Date();
|
|
1391
|
-
db.prepare(`INSERT OR IGNORE INTO sdk_sessions
|
|
1392
|
-
(content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
|
|
1393
|
-
VALUES (?,?,?,?,?,'active')`
|
|
1394
|
-
).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
|
|
1395
|
-
// Defense-in-depth: title/narrative are derived from already-stored
|
|
1396
|
-
// obs.title, but those rows pre-date the central scrub policy in some
|
|
1397
|
-
// cases. Re-scrub at the persistence boundary.
|
|
1398
|
-
const safe = scrubRecord('observations', { text: narrative, title, narrative });
|
|
1399
|
-
const summaryResult = db.prepare(`INSERT INTO observations
|
|
1400
|
-
(memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts,
|
|
1401
|
-
files_read, files_modified, importance, created_at, created_at_epoch)
|
|
1402
|
-
VALUES (?,?,?,?,?,'',?,'','','[]','[]',2,?,?)`
|
|
1403
|
-
).run(sessionId, proj, safe.text, dominantType, safe.title, safe.narrative, new Date(medianEpoch).toISOString(), medianEpoch);
|
|
1404
|
-
const summaryId = Number(summaryResult.lastInsertRowid);
|
|
1405
|
-
const obsIds = obs.map(o => o.id);
|
|
1406
|
-
db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${obsIds.map(() => '?').join(',')})`)
|
|
1407
|
-
.run(summaryId, ...obsIds);
|
|
1408
|
-
return obs.length;
|
|
1409
|
-
});
|
|
1323
|
+
const groups = groupByProjectWeek(compressCandidates);
|
|
1324
|
+
// Transact each group to prevent orphan summaries on crash (CLI/MCP wrap all groups in one).
|
|
1325
|
+
const compressGroupTxn = db.transaction((proj, obs) => compressGroup(db, proj, obs).compressed);
|
|
1410
1326
|
let totalCompressed = 0;
|
|
1411
1327
|
for (const [key, obs] of groups) {
|
|
1412
|
-
if (obs.length < 3) continue;
|
|
1413
1328
|
const [proj] = key.split('::');
|
|
1414
|
-
totalCompressed +=
|
|
1329
|
+
totalCompressed += compressGroupTxn(proj, obs);
|
|
1415
1330
|
}
|
|
1416
1331
|
if (totalCompressed > 0) {
|
|
1417
1332
|
debugLog('DEBUG', 'auto-compress', `auto-compressed ${totalCompressed} observations into weekly summaries`);
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
// Shared "compress old low-value observations into weekly summaries" core.
|
|
2
|
+
//
|
|
3
|
+
// Single source of truth for cmdCompress (CLI), mem_compress (MCP), and
|
|
4
|
+
// handleAutoCompress (hook). Pre-extraction the candidate query, the
|
|
5
|
+
// project+ISO-week grouping, and the per-group summary INSERT + mark-compressed
|
|
6
|
+
// were copy-pasted across all three and kept in sync by hand-written "parity"
|
|
7
|
+
// comments — which is exactly how the TF-IDF-vector write drifted out of the
|
|
8
|
+
// compression path (audit ARCH-1). Call sites keep what legitimately differs:
|
|
9
|
+
// argument parsing, preview rendering, candidate-window params, and transaction
|
|
10
|
+
// granularity (CLI/MCP wrap all groups in one transaction; the hook transacts
|
|
11
|
+
// each group). They no longer re-implement the mutation.
|
|
12
|
+
//
|
|
13
|
+
// NOTE: the summary INSERT still omits the observation_vectors write, matching
|
|
14
|
+
// pre-extraction behavior. Fixing that (audit P5) is now a single change here
|
|
15
|
+
// instead of three — but it is a behavior change, intentionally NOT bundled.
|
|
16
|
+
|
|
17
|
+
import { isoWeekKey, COMPRESSED_AUTO } from '../utils.mjs';
|
|
18
|
+
import { scrubRecord } from './scrub-record.mjs';
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* Low-value compression candidates: importance=1, never accessed, older than
|
|
22
|
+
* `cutoff`, not already compressed. `includeAutoMarked` also folds in rows the
|
|
23
|
+
* hook lightweight-marked as COMPRESSED_AUTO (the hook re-summarizes those).
|
|
24
|
+
*/
|
|
25
|
+
export function selectCompressionCandidates(db, { cutoff, project = null, includeAutoMarked = false }) {
|
|
26
|
+
const compressedFilter = includeAutoMarked
|
|
27
|
+
? `AND (compressed_into IS NULL OR compressed_into = ${COMPRESSED_AUTO})`
|
|
28
|
+
: 'AND compressed_into IS NULL';
|
|
29
|
+
const projectFilter = project ? 'AND project = ?' : '';
|
|
30
|
+
const params = project ? [cutoff, project] : [cutoff];
|
|
31
|
+
return db.prepare(`
|
|
32
|
+
SELECT id, project, type, title, created_at, created_at_epoch
|
|
33
|
+
FROM observations
|
|
34
|
+
WHERE COALESCE(importance, 1) = 1
|
|
35
|
+
AND COALESCE(access_count, 0) = 0
|
|
36
|
+
AND created_at_epoch < ?
|
|
37
|
+
${compressedFilter}
|
|
38
|
+
${projectFilter}
|
|
39
|
+
ORDER BY project, created_at_epoch
|
|
40
|
+
`).all(...params);
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Group candidates by `project::isoWeek` and keep only groups worth compressing
|
|
45
|
+
* (≥ 3 observations). Returns [[key, obs[]], …] — callers split the key on '::'
|
|
46
|
+
* for the project.
|
|
47
|
+
*/
|
|
48
|
+
export function groupByProjectWeek(candidates) {
|
|
49
|
+
const groups = new Map();
|
|
50
|
+
for (const c of candidates) {
|
|
51
|
+
const key = `${c.project}::${isoWeekKey(c.created_at_epoch)}`;
|
|
52
|
+
if (!groups.has(key)) groups.set(key, []);
|
|
53
|
+
groups.get(key).push(c);
|
|
54
|
+
}
|
|
55
|
+
return [...groups.entries()].filter(([, obs]) => obs.length >= 3);
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Compress one group: create a weekly-summary observation (importance 2, dominant
|
|
60
|
+
* type, median timestamp so it sits correctly in recency/timeline), then mark all
|
|
61
|
+
* sources as compressed into it. Statement-only — the CALLER owns the transaction
|
|
62
|
+
* boundary (all-groups-in-one for CLI/MCP, per-group for the hook).
|
|
63
|
+
*
|
|
64
|
+
* @returns {{ summaryId: number, compressed: number }}
|
|
65
|
+
*/
|
|
66
|
+
export function compressGroup(db, proj, obs) {
|
|
67
|
+
const types = {};
|
|
68
|
+
for (const o of obs) types[o.type] = (types[o.type] || 0) + 1;
|
|
69
|
+
const dominantType = Object.entries(types).sort((a, b) => b[1] - a[1])[0][0];
|
|
70
|
+
const title = `Weekly summary: ${obs.length} ${dominantType} observations`;
|
|
71
|
+
const narrative = obs.map((o) => `- ${o.title || '(untitled)'}`).join('\n');
|
|
72
|
+
const sessionId = `compress-${proj}`;
|
|
73
|
+
|
|
74
|
+
const sortedEpochs = obs.map((o) => o.created_at_epoch).sort((a, b) => a - b);
|
|
75
|
+
const medianEpoch = sortedEpochs[Math.floor(sortedEpochs.length / 2)];
|
|
76
|
+
const medianDate = new Date(medianEpoch);
|
|
77
|
+
|
|
78
|
+
const now = new Date();
|
|
79
|
+
db.prepare(`
|
|
80
|
+
INSERT OR IGNORE INTO sdk_sessions (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
|
|
81
|
+
VALUES (?, ?, ?, ?, ?, 'active')
|
|
82
|
+
`).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
|
|
83
|
+
|
|
84
|
+
// Defense-in-depth: source rows were scrubbed at ingest, but the new narrative
|
|
85
|
+
// is constructed here and re-persisted.
|
|
86
|
+
const safe = scrubRecord('observations', { text: narrative, title, narrative });
|
|
87
|
+
const summaryResult = db.prepare(`
|
|
88
|
+
INSERT INTO observations (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, created_at, created_at_epoch)
|
|
89
|
+
VALUES (?, ?, ?, ?, ?, '', ?, '', '', '[]', '[]', 2, ?, ?)
|
|
90
|
+
`).run(sessionId, proj, safe.text, dominantType, safe.title, safe.narrative, medianDate.toISOString(), medianEpoch);
|
|
91
|
+
const summaryId = Number(summaryResult.lastInsertRowid);
|
|
92
|
+
|
|
93
|
+
const obsIds = obs.map((o) => o.id);
|
|
94
|
+
const obsPh = obsIds.map(() => '?').join(',');
|
|
95
|
+
db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${obsPh})`).run(summaryId, ...obsIds);
|
|
96
|
+
|
|
97
|
+
return { summaryId, compressed: obs.length };
|
|
98
|
+
}
|
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
// Shared maintenance operations — single source of truth for cmdMaintain (CLI),
|
|
2
|
+
// mem_maintain (MCP), and handleAutoMaintain (hook). Pre-extraction each
|
|
3
|
+
// operation's SQL was copy-pasted across the call sites and kept in sync by
|
|
4
|
+
// "parity" comments, which had already drifted: the CLI/hook `decay` and
|
|
5
|
+
// `mark-idle` protect injection_count>0 (v2.56.0 — an obs Claude was shown 8×
|
|
6
|
+
// is contextually proven), but the MCP copy never got that clause, so
|
|
7
|
+
// mem_maintain decayed/purged injected memories the other two paths preserve.
|
|
8
|
+
// Consolidating here UNIFIES decay/mark-idle on the protected (correct) form.
|
|
9
|
+
//
|
|
10
|
+
// Every mutation is statement-only — the CALLER owns the transaction boundary
|
|
11
|
+
// (CLI/MCP wrap the execute ops in one transaction; the hook runs them in its
|
|
12
|
+
// auto-maintain block). `ctx` carries the per-caller knobs:
|
|
13
|
+
// { projectFilter: 'AND project = ?' | '', baseParams: [project?] , staleAge, opCap }
|
|
14
|
+
|
|
15
|
+
import { COMPRESSED_PENDING_PURGE, computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity } from '../utils.mjs';
|
|
16
|
+
import { rebuildVocabulary, computeVector, _resetVocabCache } from '../tfidf.mjs';
|
|
17
|
+
|
|
18
|
+
export const STALE_AGE_MS = 30 * 86400000;
|
|
19
|
+
export const OP_CAP = 1000;
|
|
20
|
+
export const SCAN_LIMIT = 500;
|
|
21
|
+
export const DUPLICATE_LIMIT = 50;
|
|
22
|
+
export const SIMILARITY_THRESHOLD = 0.7;
|
|
23
|
+
export const MINHASH_PRE_THRESHOLD = 0.5;
|
|
24
|
+
// A memory injected this many times with zero citations is "pinned noise" that
|
|
25
|
+
// the regular decay op can't touch (decay protects injection_count>0).
|
|
26
|
+
export const PINNED_INJ_THRESHOLD = 8;
|
|
27
|
+
|
|
28
|
+
/** Delete broken observations (no title AND no narrative). Returns rows deleted. */
|
|
29
|
+
export function cleanupBroken(db, { projectFilter, baseParams, opCap = OP_CAP }) {
|
|
30
|
+
return db.prepare(`
|
|
31
|
+
DELETE FROM observations WHERE id IN (
|
|
32
|
+
SELECT id FROM observations
|
|
33
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
34
|
+
AND (title IS NULL OR title = '') AND (narrative IS NULL OR narrative = '')
|
|
35
|
+
${projectFilter} LIMIT ${opCap}
|
|
36
|
+
)
|
|
37
|
+
`).run(...baseParams).changes;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Decay importance of old, never-accessed, NEVER-INJECTED observations, then mark
|
|
42
|
+
* the importance-1 idle ones as pending-purge. injection_count>0 is protected as
|
|
43
|
+
* first-class engagement alongside access_count (unified across all three paths).
|
|
44
|
+
*/
|
|
45
|
+
export function decayAndMarkIdle(db, { projectFilter, baseParams, staleAge, opCap = OP_CAP }) {
|
|
46
|
+
const decayed = db.prepare(`
|
|
47
|
+
UPDATE observations SET importance = MAX(1, COALESCE(importance, 1) - 1)
|
|
48
|
+
WHERE id IN (
|
|
49
|
+
SELECT id FROM observations
|
|
50
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
51
|
+
AND COALESCE(importance, 1) > 1
|
|
52
|
+
AND COALESCE(access_count, 0) = 0
|
|
53
|
+
AND COALESCE(injection_count, 0) = 0
|
|
54
|
+
AND created_at_epoch < ?
|
|
55
|
+
${projectFilter} LIMIT ${opCap}
|
|
56
|
+
)
|
|
57
|
+
`).run(staleAge, ...baseParams).changes;
|
|
58
|
+
|
|
59
|
+
const idleMarked = db.prepare(`
|
|
60
|
+
UPDATE observations SET compressed_into = ${COMPRESSED_PENDING_PURGE}
|
|
61
|
+
WHERE id IN (
|
|
62
|
+
SELECT id FROM observations
|
|
63
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
64
|
+
AND COALESCE(importance, 1) = 1
|
|
65
|
+
AND COALESCE(access_count, 0) = 0
|
|
66
|
+
AND COALESCE(injection_count, 0) = 0
|
|
67
|
+
AND created_at_epoch < ?
|
|
68
|
+
${projectFilter} LIMIT ${opCap}
|
|
69
|
+
)
|
|
70
|
+
`).run(staleAge, ...baseParams).changes;
|
|
71
|
+
|
|
72
|
+
return { decayed, idleMarked };
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/** Boost importance of frequently-accessed observations. Returns rows boosted. */
|
|
76
|
+
export function boostAccessed(db, { projectFilter, baseParams, opCap = OP_CAP }) {
|
|
77
|
+
return db.prepare(`
|
|
78
|
+
UPDATE observations SET importance = MIN(3, COALESCE(importance, 1) + 1)
|
|
79
|
+
WHERE id IN (
|
|
80
|
+
SELECT id FROM observations
|
|
81
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
82
|
+
AND COALESCE(access_count, 0) > 3
|
|
83
|
+
AND COALESCE(importance, 1) < 3
|
|
84
|
+
${projectFilter} LIMIT ${opCap}
|
|
85
|
+
)
|
|
86
|
+
`).run(...baseParams).changes;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Repair the citation-decay blind spot: heavy-injection + zero-citation rows that
|
|
91
|
+
* decay protects (injection_count>0) stay pinned at max importance forever. Drop
|
|
92
|
+
* them to importance 1 in one pass (injection priority is binary at >=2, so a
|
|
93
|
+
* single step would not de-rank). Floor 1, not purge.
|
|
94
|
+
*/
|
|
95
|
+
export function demotePinned(db, { projectFilter, baseParams, opCap = OP_CAP }) {
|
|
96
|
+
return db.prepare(`
|
|
97
|
+
UPDATE observations SET importance = 1
|
|
98
|
+
WHERE id IN (
|
|
99
|
+
SELECT id FROM observations
|
|
100
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
101
|
+
AND COALESCE(injection_count, 0) >= ${PINNED_INJ_THRESHOLD}
|
|
102
|
+
AND COALESCE(cited_count, 0) = 0
|
|
103
|
+
AND COALESCE(importance, 1) > 1
|
|
104
|
+
${projectFilter} LIMIT ${opCap}
|
|
105
|
+
)
|
|
106
|
+
`).run(...baseParams).changes;
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
/**
|
|
110
|
+
* Merge explicit duplicate groups: each group is [keepId, removeId, …]. Marks the
|
|
111
|
+
* removeIds compressed into keepId (only if not already compressed). Returns the
|
|
112
|
+
* number of rows merged. Callers parse their own input (CLI string / MCP array).
|
|
113
|
+
*/
|
|
114
|
+
export function mergeDuplicates(db, groups) {
|
|
115
|
+
let merged = 0;
|
|
116
|
+
const mergeStmt = db.prepare('UPDATE observations SET compressed_into = ? WHERE id = ? AND COALESCE(compressed_into, 0) = 0');
|
|
117
|
+
for (const group of groups) {
|
|
118
|
+
if (!group || group.length < 2) continue;
|
|
119
|
+
const [keepId, ...removeIds] = group;
|
|
120
|
+
for (const removeId of removeIds) merged += mergeStmt.run(keepId, removeId).changes;
|
|
121
|
+
}
|
|
122
|
+
return merged;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/** Preview pending-purge candidates older than the retain cutoff (no deletion). */
|
|
126
|
+
export function purgeStalePreview(db, { projectFilter, baseParams }, retainCutoff) {
|
|
127
|
+
return db.prepare(`
|
|
128
|
+
SELECT COUNT(*) AS candidates, MIN(created_at_epoch) AS oldest, MAX(created_at_epoch) AS newest
|
|
129
|
+
FROM observations
|
|
130
|
+
WHERE compressed_into = ${COMPRESSED_PENDING_PURGE} AND created_at_epoch < ? ${projectFilter}
|
|
131
|
+
`).get(retainCutoff, ...baseParams);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
/** Delete pending-purge observations older than the retain cutoff. Returns rows deleted. */
|
|
135
|
+
export function purgeStale(db, { projectFilter, baseParams, opCap = OP_CAP }, retainCutoff) {
|
|
136
|
+
return db.prepare(`
|
|
137
|
+
DELETE FROM observations WHERE id IN (
|
|
138
|
+
SELECT id FROM observations
|
|
139
|
+
WHERE compressed_into = ${COMPRESSED_PENDING_PURGE} AND created_at_epoch < ?
|
|
140
|
+
${projectFilter} LIMIT ${opCap}
|
|
141
|
+
)
|
|
142
|
+
`).run(retainCutoff, ...baseParams).changes;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Near-duplicate title detection: MinHash pre-filter → exact Jaccard. Returns
|
|
147
|
+
* [{ a:{id,title,importance}, b:{…}, similarity:'0.NN' }, …].
|
|
148
|
+
*/
|
|
149
|
+
export function findDuplicates(db, { projectFilter, baseParams, limit = SCAN_LIMIT, dupLimit = DUPLICATE_LIMIT }) {
|
|
150
|
+
const recent = db.prepare(`
|
|
151
|
+
SELECT id, title, project, importance, access_count, created_at_epoch
|
|
152
|
+
FROM observations
|
|
153
|
+
WHERE COALESCE(compressed_into, 0) = 0 ${projectFilter}
|
|
154
|
+
ORDER BY created_at_epoch DESC LIMIT ${limit}
|
|
155
|
+
`).all(...baseParams);
|
|
156
|
+
|
|
157
|
+
const titles = recent.map((r) => (r.title || '').trim());
|
|
158
|
+
const minhashes = titles.map((t) => (t ? computeMinHash(t) : null));
|
|
159
|
+
const duplicates = [];
|
|
160
|
+
for (let i = 0; i < recent.length && duplicates.length < dupLimit; i++) {
|
|
161
|
+
if (!titles[i] || !minhashes[i]) continue;
|
|
162
|
+
for (let j = i + 1; j < recent.length; j++) {
|
|
163
|
+
if (!titles[j] || !minhashes[j]) continue;
|
|
164
|
+
if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < MINHASH_PRE_THRESHOLD) continue;
|
|
165
|
+
const sim = jaccardSimilarity(titles[i], titles[j]);
|
|
166
|
+
if (sim > SIMILARITY_THRESHOLD) {
|
|
167
|
+
duplicates.push({
|
|
168
|
+
a: { id: recent[i].id, title: recent[i].title, importance: recent[i].importance },
|
|
169
|
+
b: { id: recent[j].id, title: recent[j].title, importance: recent[j].importance },
|
|
170
|
+
similarity: sim.toFixed(2),
|
|
171
|
+
});
|
|
172
|
+
}
|
|
173
|
+
if (duplicates.length >= dupLimit) break;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
return duplicates;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/** Single-scan maintenance counters (includes `pinned`; callers render what they show). */
|
|
180
|
+
export function maintenanceStats(db, { projectFilter, baseParams, staleAge }) {
|
|
181
|
+
const stats = db.prepare(`
|
|
182
|
+
SELECT
|
|
183
|
+
COUNT(*) as total,
|
|
184
|
+
COALESCE(SUM(CASE WHEN COALESCE(importance, 1) = 1 AND COALESCE(access_count, 0) = 0
|
|
185
|
+
AND created_at_epoch < ? THEN 1 ELSE 0 END), 0) as stale,
|
|
186
|
+
COALESCE(SUM(CASE WHEN (title IS NULL OR title = '') AND (narrative IS NULL OR narrative = '')
|
|
187
|
+
THEN 1 ELSE 0 END), 0) as broken,
|
|
188
|
+
COALESCE(SUM(CASE WHEN COALESCE(access_count, 0) > 3 AND COALESCE(importance, 1) < 3
|
|
189
|
+
THEN 1 ELSE 0 END), 0) as boostable,
|
|
190
|
+
COALESCE(SUM(CASE WHEN COALESCE(injection_count, 0) >= ${PINNED_INJ_THRESHOLD}
|
|
191
|
+
AND COALESCE(cited_count, 0) = 0 AND COALESCE(importance, 1) > 1
|
|
192
|
+
THEN 1 ELSE 0 END), 0) as pinned
|
|
193
|
+
FROM observations
|
|
194
|
+
WHERE COALESCE(compressed_into, 0) = 0 ${projectFilter}
|
|
195
|
+
`).get(staleAge, ...baseParams);
|
|
196
|
+
const pendingPurge = db.prepare(
|
|
197
|
+
`SELECT COUNT(*) as count FROM observations WHERE compressed_into = ${COMPRESSED_PENDING_PURGE} ${projectFilter}`
|
|
198
|
+
).get(...baseParams);
|
|
199
|
+
return { ...stats, pendingPurge: pendingPurge.count };
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
/** Rebuild the TF-IDF vocabulary + every active observation vector (own transaction). */
|
|
203
|
+
export function rebuildVectors(db) {
|
|
204
|
+
_resetVocabCache();
|
|
205
|
+
const vocab = rebuildVocabulary(db);
|
|
206
|
+
if (!vocab) return { ok: false, reason: 'no observations to build vocabulary from' };
|
|
207
|
+
const allObs = db.prepare(`
|
|
208
|
+
SELECT id, title, narrative, concepts FROM observations
|
|
209
|
+
WHERE COALESCE(compressed_into, 0) = 0 AND superseded_at IS NULL
|
|
210
|
+
`).all();
|
|
211
|
+
let updated = 0;
|
|
212
|
+
const insertStmt = db.prepare('INSERT OR REPLACE INTO observation_vectors (observation_id, vector, vocab_version, created_at_epoch) VALUES (?, ?, ?, ?)');
|
|
213
|
+
const now = Date.now();
|
|
214
|
+
db.transaction(() => {
|
|
215
|
+
db.prepare('DELETE FROM observation_vectors').run();
|
|
216
|
+
for (const obs of allObs) {
|
|
217
|
+
const text = [obs.title || '', obs.narrative || '', obs.concepts || ''].filter(Boolean).join(' ');
|
|
218
|
+
const vec = computeVector(text, vocab);
|
|
219
|
+
if (vec) {
|
|
220
|
+
insertStmt.run(obs.id, Buffer.from(vec.buffer), vocab.version, now);
|
|
221
|
+
updated++;
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
})();
|
|
225
|
+
return { ok: true, terms: vocab.terms.size, updated, total: allObs.length };
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
/** VACUUM the whole DB, reporting freelist reclaim. Must run OUTSIDE any transaction. */
|
|
229
|
+
export function vacuum(db) {
|
|
230
|
+
const pageSize = db.pragma('page_size', { simple: true });
|
|
231
|
+
const freeBefore = db.pragma('freelist_count', { simple: true });
|
|
232
|
+
db.exec('VACUUM');
|
|
233
|
+
const freeAfter = db.pragma('freelist_count', { simple: true });
|
|
234
|
+
const reclaimedMB = ((Math.max(0, freeBefore - freeAfter) * pageSize) / 1048576).toFixed(1);
|
|
235
|
+
return { reclaimedMB, freeBefore, freeAfter };
|
|
236
|
+
}
|