claude-mem-lite 2.87.0 → 2.89.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ // Shared "compress old low-value observations into weekly summaries" core.
2
+ //
3
+ // Single source of truth for cmdCompress (CLI), mem_compress (MCP), and
4
+ // handleAutoCompress (hook). Pre-extraction the candidate query, the
5
+ // project+ISO-week grouping, and the per-group summary INSERT + mark-compressed
6
+ // were copy-pasted across all three and kept in sync by hand-written "parity"
7
+ // comments — which is exactly how the TF-IDF-vector write drifted out of the
8
+ // compression path (audit ARCH-1). Call sites keep what legitimately differs:
9
+ // argument parsing, preview rendering, candidate-window params, and transaction
10
+ // granularity (CLI/MCP wrap all groups in one transaction; the hook transacts
11
+ // each group). They no longer re-implement the mutation.
12
+ //
13
+ // The summary INSERT also writes its TF-IDF observation_vectors row in the same
14
+ // (caller-owned) transaction — fixed once here rather than in all three call
15
+ // sites. Without it, FTS-miss queries that fall back to vector recall (CJK /
16
+ // concept / paraphrase) could never reach compressed summaries; the LLM
17
+ // smart-compress path already wrote vectors, so the deterministic path was the
18
+ // sole gap (audit P6).
19
+
20
+ import { isoWeekKey, COMPRESSED_AUTO, debugCatch } from '../utils.mjs';
21
+ import { getVocabulary, computeVector } from '../tfidf.mjs';
22
+ import { scrubRecord } from './scrub-record.mjs';
23
+
24
+ /**
25
+ * Low-value compression candidates: importance=1, never accessed, older than
26
+ * `cutoff`, not already compressed. `includeAutoMarked` also folds in rows the
27
+ * hook lightweight-marked as COMPRESSED_AUTO (the hook re-summarizes those).
28
+ */
29
+ export function selectCompressionCandidates(db, { cutoff, project = null, includeAutoMarked = false }) {
30
+ const compressedFilter = includeAutoMarked
31
+ ? `AND (compressed_into IS NULL OR compressed_into = ${COMPRESSED_AUTO})`
32
+ : 'AND compressed_into IS NULL';
33
+ const projectFilter = project ? 'AND project = ?' : '';
34
+ const params = project ? [cutoff, project] : [cutoff];
35
+ return db.prepare(`
36
+ SELECT id, project, type, title, created_at, created_at_epoch
37
+ FROM observations
38
+ WHERE COALESCE(importance, 1) = 1
39
+ AND COALESCE(access_count, 0) = 0
40
+ AND created_at_epoch < ?
41
+ ${compressedFilter}
42
+ ${projectFilter}
43
+ ORDER BY project, created_at_epoch
44
+ `).all(...params);
45
+ }
46
+
47
+ /**
48
+ * Group candidates by `project::isoWeek` and keep only groups worth compressing
49
+ * (≥ 3 observations). Returns [[key, obs[]], …] — callers split the key on '::'
50
+ * for the project.
51
+ */
52
+ export function groupByProjectWeek(candidates) {
53
+ const groups = new Map();
54
+ for (const c of candidates) {
55
+ const key = `${c.project}::${isoWeekKey(c.created_at_epoch)}`;
56
+ if (!groups.has(key)) groups.set(key, []);
57
+ groups.get(key).push(c);
58
+ }
59
+ return [...groups.entries()].filter(([, obs]) => obs.length >= 3);
60
+ }
61
+
62
+ /**
63
+ * Compress one group: create a weekly-summary observation (importance 2, dominant
64
+ * type, median timestamp so it sits correctly in recency/timeline), then mark all
65
+ * sources as compressed into it. Statement-only — the CALLER owns the transaction
66
+ * boundary (all-groups-in-one for CLI/MCP, per-group for the hook).
67
+ *
68
+ * @returns {{ summaryId: number, compressed: number }}
69
+ */
70
+ export function compressGroup(db, proj, obs) {
71
+ const types = {};
72
+ for (const o of obs) types[o.type] = (types[o.type] || 0) + 1;
73
+ const dominantType = Object.entries(types).sort((a, b) => b[1] - a[1])[0][0];
74
+ const title = `Weekly summary: ${obs.length} ${dominantType} observations`;
75
+ const narrative = obs.map((o) => `- ${o.title || '(untitled)'}`).join('\n');
76
+ const sessionId = `compress-${proj}`;
77
+
78
+ const sortedEpochs = obs.map((o) => o.created_at_epoch).sort((a, b) => a - b);
79
+ const medianEpoch = sortedEpochs[Math.floor(sortedEpochs.length / 2)];
80
+ const medianDate = new Date(medianEpoch);
81
+
82
+ const now = new Date();
83
+ db.prepare(`
84
+ INSERT OR IGNORE INTO sdk_sessions (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
85
+ VALUES (?, ?, ?, ?, ?, 'active')
86
+ `).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
87
+
88
+ // Defense-in-depth: source rows were scrubbed at ingest, but the new narrative
89
+ // is constructed here and re-persisted.
90
+ const safe = scrubRecord('observations', { text: narrative, title, narrative });
91
+ const summaryResult = db.prepare(`
92
+ INSERT INTO observations (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, created_at, created_at_epoch)
93
+ VALUES (?, ?, ?, ?, ?, '', ?, '', '', '[]', '[]', 2, ?, ?)
94
+ `).run(sessionId, proj, safe.text, dominantType, safe.title, safe.narrative, medianDate.toISOString(), medianEpoch);
95
+ const summaryId = Number(summaryResult.lastInsertRowid);
96
+
97
+ // TF-IDF vector for the summary so it is reachable by vector recall (parity
98
+ // with save-observation.mjs and the LLM smart-compress path). Best-effort:
99
+ // vocab may be uninitialized on a fresh DB — a failure here must not abort the
100
+ // compression the caller is transacting.
101
+ try {
102
+ const vocab = getVocabulary(db);
103
+ if (vocab) {
104
+ const vec = computeVector(`${safe.title} ${safe.narrative}`, vocab);
105
+ if (vec) {
106
+ db.prepare(
107
+ 'INSERT OR REPLACE INTO observation_vectors (observation_id, vector, vocab_version, created_at_epoch) VALUES (?, ?, ?, ?)'
108
+ ).run(summaryId, Buffer.from(vec.buffer), vocab.version, medianEpoch);
109
+ }
110
+ }
111
+ } catch (e) { debugCatch(e, 'compress-vector'); }
112
+
113
+ const obsIds = obs.map((o) => o.id);
114
+ const obsPh = obsIds.map(() => '?').join(',');
115
+ db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${obsPh})`).run(summaryId, ...obsIds);
116
+
117
+ return { summaryId, compressed: obs.length };
118
+ }
@@ -0,0 +1,35 @@
1
+ // Dedup / merge similarity thresholds — single source of truth (P10).
2
+ //
3
+ // All values are Jaccard-space (word-set overlap, 0..1) unless noted. They were
4
+ // scattered as bare literals and duplicate local consts across save-observation,
5
+ // maintain-core, hook-llm, hook-optimize, mem-cli, server, and hook; converging
6
+ // them here removes the drift risk and gives the P7 benchmark named knobs.
7
+ // Vector-side constants (VOCAB_DIM / MIN_COSINE_SIMILARITY / RRF_K) deliberately
8
+ // stay in tfidf.mjs next to the search engine that consumes them.
9
+ //
10
+ // Pure constants only — no imports, so nothing can import-cycle through this.
11
+
12
+ // 0.7: near-duplicate cutoff for save-time dedup (5-min window, lib/save-observation)
13
+ // and the hook-llm tier-1 title dedup. Catches "Modified X" / "Fixed X" restatements
14
+ // (~70% word overlap) without collapsing distinct-but-related observations.
15
+ export const DEDUP_JACCARD_THRESHOLD = 0.7;
16
+
17
+ // 0.85: high-confidence auto-merge cutoff (maintain + optimize cluster-merge, CLI/MCP
18
+ // dedup preview). Pairs at or above this merge without an LLM merge-decision call.
19
+ export const AUTO_MERGE_THRESHOLD = 0.85;
20
+
21
+ // 0.4: low bound of the LLM-review merge band [0.4, 0.85) in hook-optimize. Below it,
22
+ // a pair is too dissimilar to be worth a merge-decision call.
23
+ export const MERGE_JACCARD_LOW = 0.4;
24
+
25
+ // 0.5: MinHash estimated-Jaccard pre-filter for the maintain O(n²) scan — skip the
26
+ // exact-Jaccard compare when the cheap signature estimate is already below this.
27
+ export const MINHASH_PRE_THRESHOLD = 0.5;
28
+
29
+ // 0.7: MinHash pre-filter for the hook post-inject fuzzy-dedup pass. Stricter than
30
+ // maintain's 0.5 to keep the inline inject path cheap (it runs in the hot Stop path).
31
+ export const MINHASH_PREFILTER = 0.7;
32
+
33
+ // 0.95: strict title-Jaccard cutoff for the hook post-inject fuzzy-dedup pass — only
34
+ // collapse near-identical titles inline; anything softer waits for the maintain sweep.
35
+ export const FUZZY_DEDUP_THRESHOLD = 0.95;
@@ -0,0 +1,239 @@
1
+ // Shared maintenance operations — single source of truth for cmdMaintain (CLI),
2
+ // mem_maintain (MCP), and handleAutoMaintain (hook). Pre-extraction each
3
+ // operation's SQL was copy-pasted across the call sites and kept in sync by
4
+ // "parity" comments, which had already drifted: the CLI/hook `decay` and
5
+ // `mark-idle` protect injection_count>0 (v2.56.0 — an obs Claude was shown 8×
6
+ // is contextually proven), but the MCP copy never got that clause, so
7
+ // mem_maintain decayed/purged injected memories the other two paths preserve.
8
+ // Consolidating here UNIFIES decay/mark-idle on the protected (correct) form.
9
+ //
10
+ // Every mutation is statement-only — the CALLER owns the transaction boundary
11
+ // (CLI/MCP wrap the execute ops in one transaction; the hook runs them in its
12
+ // auto-maintain block). `ctx` carries the per-caller knobs:
13
+ // { projectFilter: 'AND project = ?' | '', baseParams: [project?] , staleAge, opCap }
14
+
15
+ import { COMPRESSED_PENDING_PURGE, computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity } from '../utils.mjs';
16
+ import { rebuildVocabulary, computeVector, _resetVocabCache } from '../tfidf.mjs';
17
+ import { DEDUP_JACCARD_THRESHOLD, MINHASH_PRE_THRESHOLD as MINHASH_PRE_THRESHOLD_SRC } from './dedup-constants.mjs';
18
+
19
+ export const STALE_AGE_MS = 30 * 86400000;
20
+ export const OP_CAP = 1000;
21
+ export const SCAN_LIMIT = 500;
22
+ export const DUPLICATE_LIMIT = 50;
23
+ // Back-compat: maintain-core historically exported these names; both now source
24
+ // their value from the single canonical lib/dedup-constants.mjs.
25
+ export const SIMILARITY_THRESHOLD = DEDUP_JACCARD_THRESHOLD;
26
+ export const MINHASH_PRE_THRESHOLD = MINHASH_PRE_THRESHOLD_SRC;
27
+ // A memory injected this many times with zero citations is "pinned noise" that
28
+ // the regular decay op can't touch (decay protects injection_count>0).
29
+ export const PINNED_INJ_THRESHOLD = 8;
30
+
31
+ /** Delete broken observations (no title AND no narrative). Returns rows deleted. */
32
+ export function cleanupBroken(db, { projectFilter, baseParams, opCap = OP_CAP }) {
33
+ return db.prepare(`
34
+ DELETE FROM observations WHERE id IN (
35
+ SELECT id FROM observations
36
+ WHERE COALESCE(compressed_into, 0) = 0
37
+ AND (title IS NULL OR title = '') AND (narrative IS NULL OR narrative = '')
38
+ ${projectFilter} LIMIT ${opCap}
39
+ )
40
+ `).run(...baseParams).changes;
41
+ }
42
+
43
+ /**
44
+ * Decay importance of old, never-accessed, NEVER-INJECTED observations, then mark
45
+ * the importance-1 idle ones as pending-purge. injection_count>0 is protected as
46
+ * first-class engagement alongside access_count (unified across all three paths).
47
+ */
48
+ export function decayAndMarkIdle(db, { projectFilter, baseParams, staleAge, opCap = OP_CAP }) {
49
+ const decayed = db.prepare(`
50
+ UPDATE observations SET importance = MAX(1, COALESCE(importance, 1) - 1)
51
+ WHERE id IN (
52
+ SELECT id FROM observations
53
+ WHERE COALESCE(compressed_into, 0) = 0
54
+ AND COALESCE(importance, 1) > 1
55
+ AND COALESCE(access_count, 0) = 0
56
+ AND COALESCE(injection_count, 0) = 0
57
+ AND created_at_epoch < ?
58
+ ${projectFilter} LIMIT ${opCap}
59
+ )
60
+ `).run(staleAge, ...baseParams).changes;
61
+
62
+ const idleMarked = db.prepare(`
63
+ UPDATE observations SET compressed_into = ${COMPRESSED_PENDING_PURGE}
64
+ WHERE id IN (
65
+ SELECT id FROM observations
66
+ WHERE COALESCE(compressed_into, 0) = 0
67
+ AND COALESCE(importance, 1) = 1
68
+ AND COALESCE(access_count, 0) = 0
69
+ AND COALESCE(injection_count, 0) = 0
70
+ AND created_at_epoch < ?
71
+ ${projectFilter} LIMIT ${opCap}
72
+ )
73
+ `).run(staleAge, ...baseParams).changes;
74
+
75
+ return { decayed, idleMarked };
76
+ }
77
+
78
+ /** Boost importance of frequently-accessed observations. Returns rows boosted. */
79
+ export function boostAccessed(db, { projectFilter, baseParams, opCap = OP_CAP }) {
80
+ return db.prepare(`
81
+ UPDATE observations SET importance = MIN(3, COALESCE(importance, 1) + 1)
82
+ WHERE id IN (
83
+ SELECT id FROM observations
84
+ WHERE COALESCE(compressed_into, 0) = 0
85
+ AND COALESCE(access_count, 0) > 3
86
+ AND COALESCE(importance, 1) < 3
87
+ ${projectFilter} LIMIT ${opCap}
88
+ )
89
+ `).run(...baseParams).changes;
90
+ }
91
+
92
+ /**
93
+ * Repair the citation-decay blind spot: heavy-injection + zero-citation rows that
94
+ * decay protects (injection_count>0) stay pinned at max importance forever. Drop
95
+ * them to importance 1 in one pass (injection priority is binary at >=2, so a
96
+ * single step would not de-rank). Floor 1, not purge.
97
+ */
98
+ export function demotePinned(db, { projectFilter, baseParams, opCap = OP_CAP }) {
99
+ return db.prepare(`
100
+ UPDATE observations SET importance = 1
101
+ WHERE id IN (
102
+ SELECT id FROM observations
103
+ WHERE COALESCE(compressed_into, 0) = 0
104
+ AND COALESCE(injection_count, 0) >= ${PINNED_INJ_THRESHOLD}
105
+ AND COALESCE(cited_count, 0) = 0
106
+ AND COALESCE(importance, 1) > 1
107
+ ${projectFilter} LIMIT ${opCap}
108
+ )
109
+ `).run(...baseParams).changes;
110
+ }
111
+
112
+ /**
113
+ * Merge explicit duplicate groups: each group is [keepId, removeId, …]. Marks the
114
+ * removeIds compressed into keepId (only if not already compressed). Returns the
115
+ * number of rows merged. Callers parse their own input (CLI string / MCP array).
116
+ */
117
+ export function mergeDuplicates(db, groups) {
118
+ let merged = 0;
119
+ const mergeStmt = db.prepare('UPDATE observations SET compressed_into = ? WHERE id = ? AND COALESCE(compressed_into, 0) = 0');
120
+ for (const group of groups) {
121
+ if (!group || group.length < 2) continue;
122
+ const [keepId, ...removeIds] = group;
123
+ for (const removeId of removeIds) merged += mergeStmt.run(keepId, removeId).changes;
124
+ }
125
+ return merged;
126
+ }
127
+
128
+ /** Preview pending-purge candidates older than the retain cutoff (no deletion). */
129
+ export function purgeStalePreview(db, { projectFilter, baseParams }, retainCutoff) {
130
+ return db.prepare(`
131
+ SELECT COUNT(*) AS candidates, MIN(created_at_epoch) AS oldest, MAX(created_at_epoch) AS newest
132
+ FROM observations
133
+ WHERE compressed_into = ${COMPRESSED_PENDING_PURGE} AND created_at_epoch < ? ${projectFilter}
134
+ `).get(retainCutoff, ...baseParams);
135
+ }
136
+
137
+ /** Delete pending-purge observations older than the retain cutoff. Returns rows deleted. */
138
+ export function purgeStale(db, { projectFilter, baseParams, opCap = OP_CAP }, retainCutoff) {
139
+ return db.prepare(`
140
+ DELETE FROM observations WHERE id IN (
141
+ SELECT id FROM observations
142
+ WHERE compressed_into = ${COMPRESSED_PENDING_PURGE} AND created_at_epoch < ?
143
+ ${projectFilter} LIMIT ${opCap}
144
+ )
145
+ `).run(retainCutoff, ...baseParams).changes;
146
+ }
147
+
148
+ /**
149
+ * Near-duplicate title detection: MinHash pre-filter → exact Jaccard. Returns
150
+ * [{ a:{id,title,importance}, b:{…}, similarity:'0.NN' }, …].
151
+ */
152
+ export function findDuplicates(db, { projectFilter, baseParams, limit = SCAN_LIMIT, dupLimit = DUPLICATE_LIMIT }) {
153
+ const recent = db.prepare(`
154
+ SELECT id, title, project, importance, access_count, created_at_epoch
155
+ FROM observations
156
+ WHERE COALESCE(compressed_into, 0) = 0 ${projectFilter}
157
+ ORDER BY created_at_epoch DESC LIMIT ${limit}
158
+ `).all(...baseParams);
159
+
160
+ const titles = recent.map((r) => (r.title || '').trim());
161
+ const minhashes = titles.map((t) => (t ? computeMinHash(t) : null));
162
+ const duplicates = [];
163
+ for (let i = 0; i < recent.length && duplicates.length < dupLimit; i++) {
164
+ if (!titles[i] || !minhashes[i]) continue;
165
+ for (let j = i + 1; j < recent.length; j++) {
166
+ if (!titles[j] || !minhashes[j]) continue;
167
+ if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < MINHASH_PRE_THRESHOLD) continue;
168
+ const sim = jaccardSimilarity(titles[i], titles[j]);
169
+ if (sim > SIMILARITY_THRESHOLD) {
170
+ duplicates.push({
171
+ a: { id: recent[i].id, title: recent[i].title, importance: recent[i].importance },
172
+ b: { id: recent[j].id, title: recent[j].title, importance: recent[j].importance },
173
+ similarity: sim.toFixed(2),
174
+ });
175
+ }
176
+ if (duplicates.length >= dupLimit) break;
177
+ }
178
+ }
179
+ return duplicates;
180
+ }
181
+
182
+ /** Single-scan maintenance counters (includes `pinned`; callers render what they show). */
183
+ export function maintenanceStats(db, { projectFilter, baseParams, staleAge }) {
184
+ const stats = db.prepare(`
185
+ SELECT
186
+ COUNT(*) as total,
187
+ COALESCE(SUM(CASE WHEN COALESCE(importance, 1) = 1 AND COALESCE(access_count, 0) = 0
188
+ AND created_at_epoch < ? THEN 1 ELSE 0 END), 0) as stale,
189
+ COALESCE(SUM(CASE WHEN (title IS NULL OR title = '') AND (narrative IS NULL OR narrative = '')
190
+ THEN 1 ELSE 0 END), 0) as broken,
191
+ COALESCE(SUM(CASE WHEN COALESCE(access_count, 0) > 3 AND COALESCE(importance, 1) < 3
192
+ THEN 1 ELSE 0 END), 0) as boostable,
193
+ COALESCE(SUM(CASE WHEN COALESCE(injection_count, 0) >= ${PINNED_INJ_THRESHOLD}
194
+ AND COALESCE(cited_count, 0) = 0 AND COALESCE(importance, 1) > 1
195
+ THEN 1 ELSE 0 END), 0) as pinned
196
+ FROM observations
197
+ WHERE COALESCE(compressed_into, 0) = 0 ${projectFilter}
198
+ `).get(staleAge, ...baseParams);
199
+ const pendingPurge = db.prepare(
200
+ `SELECT COUNT(*) as count FROM observations WHERE compressed_into = ${COMPRESSED_PENDING_PURGE} ${projectFilter}`
201
+ ).get(...baseParams);
202
+ return { ...stats, pendingPurge: pendingPurge.count };
203
+ }
204
+
205
+ /** Rebuild the TF-IDF vocabulary + every active observation vector (own transaction). */
206
+ export function rebuildVectors(db) {
207
+ _resetVocabCache();
208
+ const vocab = rebuildVocabulary(db);
209
+ if (!vocab) return { ok: false, reason: 'no observations to build vocabulary from' };
210
+ const allObs = db.prepare(`
211
+ SELECT id, title, narrative, concepts FROM observations
212
+ WHERE COALESCE(compressed_into, 0) = 0 AND superseded_at IS NULL
213
+ `).all();
214
+ let updated = 0;
215
+ const insertStmt = db.prepare('INSERT OR REPLACE INTO observation_vectors (observation_id, vector, vocab_version, created_at_epoch) VALUES (?, ?, ?, ?)');
216
+ const now = Date.now();
217
+ db.transaction(() => {
218
+ db.prepare('DELETE FROM observation_vectors').run();
219
+ for (const obs of allObs) {
220
+ const text = [obs.title || '', obs.narrative || '', obs.concepts || ''].filter(Boolean).join(' ');
221
+ const vec = computeVector(text, vocab);
222
+ if (vec) {
223
+ insertStmt.run(obs.id, Buffer.from(vec.buffer), vocab.version, now);
224
+ updated++;
225
+ }
226
+ }
227
+ })();
228
+ return { ok: true, terms: vocab.terms.size, updated, total: allObs.length };
229
+ }
230
+
231
+ /** VACUUM the whole DB, reporting freelist reclaim. Must run OUTSIDE any transaction. */
232
+ export function vacuum(db) {
233
+ const pageSize = db.pragma('page_size', { simple: true });
234
+ const freeBefore = db.pragma('freelist_count', { simple: true });
235
+ db.exec('VACUUM');
236
+ const freeAfter = db.pragma('freelist_count', { simple: true });
237
+ const reclaimedMB = ((Math.max(0, freeBefore - freeAfter) * pageSize) / 1048576).toFixed(1);
238
+ return { reclaimedMB, freeBefore, freeAfter };
239
+ }
@@ -13,10 +13,10 @@
13
13
 
14
14
  import { jaccardSimilarity, scrubSecrets, computeMinHash, cjkBigrams, getCurrentBranch, debugCatch } from '../utils.mjs';
15
15
  import { getVocabulary, computeVector } from '../tfidf.mjs';
16
+ import { DEDUP_JACCARD_THRESHOLD } from './dedup-constants.mjs';
16
17
 
17
18
  const DEDUP_WINDOW_MS = 5 * 60 * 1000;
18
19
  const DEDUP_RECENT_LIMIT = 50;
19
- const DEDUP_JACCARD_THRESHOLD = 0.7;
20
20
 
21
21
  /**
22
22
  * Save a new observation if it isn't a near-duplicate of one saved within the