claude-mem-lite 2.31.2 → 2.32.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +50 -0
- package/README.zh-CN.md +46 -0
- package/adopt-cli.mjs +166 -0
- package/adopt-content.mjs +75 -0
- package/commands/adopt.md +44 -0
- package/commands/bug.md +57 -0
- package/commands/lesson.md +53 -0
- package/commands/unadopt.md +30 -0
- package/hook-context.mjs +12 -5
- package/hook-optimize.mjs +724 -0
- package/hook-shared.mjs +31 -0
- package/install.mjs +27 -0
- package/lib/activity.mjs +114 -0
- package/lib/doctor-benchmark.mjs +153 -0
- package/lib/git-state.mjs +38 -0
- package/lib/plan-reader.mjs +43 -0
- package/lib/startup-dashboard.mjs +113 -0
- package/lib/task-reader.mjs +149 -0
- package/mem-cli.mjs +17 -0
- package/memdir.mjs +252 -0
- package/package.json +19 -1
- package/plugin-cache-guard.mjs +77 -0
- package/scripts/user-prompt-search.js +5 -1
- package/server-internals.mjs +48 -0
- package/server.mjs +3 -36
|
@@ -0,0 +1,724 @@
|
|
|
1
|
+
// claude-mem-lite: LLM-powered database optimization
|
|
2
|
+
// Background worker for intelligent maintenance: re-enrich, normalize, cluster-merge, smart-compress
|
|
3
|
+
// Triggered from auto-maintain (24h) or manually via mem_optimize MCP tool / CLI
|
|
4
|
+
|
|
5
|
+
import { readFileSync, writeFileSync } from 'fs';
|
|
6
|
+
import { join } from 'path';
|
|
7
|
+
import {
|
|
8
|
+
truncate, debugLog, debugCatch, COMPRESSED_AUTO,
|
|
9
|
+
computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity, clampImportance, cjkBigrams,
|
|
10
|
+
notLowSignalTitleClause,
|
|
11
|
+
} from './utils.mjs';
|
|
12
|
+
import { callModelJSON } from './haiku-client.mjs';
|
|
13
|
+
import { acquireLLMSlot, releaseLLMSlot } from './hook-semaphore.mjs';
|
|
14
|
+
import { getVocabulary, computeVector, cosineSimilarity } from './tfidf.mjs';
|
|
15
|
+
import { DB_DIR } from './schema.mjs';
|
|
16
|
+
|
|
17
|
+
const RUNTIME_DIR = join(DB_DIR, 'runtime');
|
|
18
|
+
|
|
19
|
+
// ─── Budget ─────────────────────────────────────────────────────────────────
|
|
20
|
+
|
|
21
|
+
export function distributeBudget(total = 15) {
|
|
22
|
+
const normalize = 1;
|
|
23
|
+
const reenrich = Math.max(1, Math.floor(total * 0.4));
|
|
24
|
+
const clusterMerge = Math.max(1, Math.floor(total * 0.3));
|
|
25
|
+
const smartCompress = Math.max(1, total - reenrich - normalize - clusterMerge);
|
|
26
|
+
// Clamp: if total is too small for 4 tasks, cap each so sum ≤ total
|
|
27
|
+
if (reenrich + normalize + clusterMerge + smartCompress > total) {
|
|
28
|
+
return { reenrich: Math.max(1, total - 3), normalize: 1, clusterMerge: 1, smartCompress: 1 };
|
|
29
|
+
}
|
|
30
|
+
return { reenrich, normalize, clusterMerge, smartCompress };
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// ─── Shared Helpers ─────────────────────────────────────────────────────────
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Rebuild TF-IDF vector for an observation. Non-critical — swallows errors.
|
|
37
|
+
* Exported for testing; also kept as the single source of vector-rebuild logic
|
|
38
|
+
* for the optimize / re-enrich path to avoid drift with the hook-llm write path.
|
|
39
|
+
*/
|
|
40
|
+
export function rebuildVector(db, obsId, textParts) {
|
|
41
|
+
try {
|
|
42
|
+
const vocab = getVocabulary(db);
|
|
43
|
+
if (!vocab) return;
|
|
44
|
+
const vec = computeVector(textParts.filter(Boolean).join(' '), vocab);
|
|
45
|
+
if (vec) {
|
|
46
|
+
// Bug #1 fix: column is `created_at_epoch`, not `computed_at`. Every other
|
|
47
|
+
// INSERT callsite (server.mjs, hook-llm.mjs, mem-cli.mjs) uses the correct
|
|
48
|
+
// name; this was the only drift, silently caught by the catch below until
|
|
49
|
+
// the R-7 experiment surfaced it.
|
|
50
|
+
db.prepare(`
|
|
51
|
+
INSERT OR REPLACE INTO observation_vectors (observation_id, vector, vocab_version, created_at_epoch)
|
|
52
|
+
VALUES (?, ?, ?, ?)
|
|
53
|
+
`).run(obsId, Buffer.from(vec.buffer), vocab.version, Date.now());
|
|
54
|
+
}
|
|
55
|
+
} catch (e) { debugCatch(e, 'optimize-vector'); }
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ─── Task 1: Re-enrich ─────────────────────────────────────────────────────
|
|
59
|
+
|
|
60
|
+
/**
|
|
61
|
+
* Find observations eligible for LLM re-enrichment.
|
|
62
|
+
*
|
|
63
|
+
* Two scopes:
|
|
64
|
+
* - 'narrow' (default): fully-degraded observations — Haiku failed to extract
|
|
65
|
+
* concepts / facts / lesson / aliases. Conservative; preserves pre-R-7 behavior.
|
|
66
|
+
* - 'wide' (R-7): substantive bugfix / refactor / feature / decision observations
|
|
67
|
+
* that have concepts + facts populated but are missing lesson_learned.
|
|
68
|
+
* Targets the "Haiku ran but judged 'none'" cases that dominate the library.
|
|
69
|
+
* Excludes LOW_SIGNAL titles (no source material to extract from) and
|
|
70
|
+
* thin narratives (<100 chars → nothing to rewrite into a lesson).
|
|
71
|
+
*
|
|
72
|
+
* Both scopes respect optimized_at (idempotent) and skip compressed/superseded rows.
|
|
73
|
+
*
|
|
74
|
+
* @param {object} db better-sqlite3 database handle
|
|
75
|
+
* @param {number} limit max candidates to return
|
|
76
|
+
* @param {{ scope?: 'narrow' | 'wide' }} [opts]
|
|
77
|
+
*/
|
|
78
|
+
export function findReenrichCandidates(db, limit = 10, { scope = 'narrow' } = {}) {
|
|
79
|
+
if (scope === 'wide') {
|
|
80
|
+
return db.prepare(`
|
|
81
|
+
SELECT id, title, narrative, type, subtitle, concepts, facts
|
|
82
|
+
FROM observations
|
|
83
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
84
|
+
AND superseded_at IS NULL
|
|
85
|
+
AND optimized_at IS NULL
|
|
86
|
+
AND type IN ('bugfix','refactor','feature','decision')
|
|
87
|
+
AND (lesson_learned IS NULL OR lesson_learned = '')
|
|
88
|
+
AND LENGTH(COALESCE(narrative, '')) > 100
|
|
89
|
+
AND ${notLowSignalTitleClause('')}
|
|
90
|
+
ORDER BY
|
|
91
|
+
CASE type WHEN 'decision' THEN 0 WHEN 'bugfix' THEN 1 WHEN 'refactor' THEN 2 ELSE 3 END,
|
|
92
|
+
created_at_epoch DESC
|
|
93
|
+
LIMIT ?
|
|
94
|
+
`).all(limit);
|
|
95
|
+
}
|
|
96
|
+
return db.prepare(`
|
|
97
|
+
SELECT id, title, narrative, type, subtitle
|
|
98
|
+
FROM observations
|
|
99
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
100
|
+
AND (concepts IS NULL OR concepts = '')
|
|
101
|
+
AND (facts IS NULL OR facts = '')
|
|
102
|
+
AND lesson_learned IS NULL
|
|
103
|
+
AND search_aliases IS NULL
|
|
104
|
+
AND optimized_at IS NULL
|
|
105
|
+
ORDER BY created_at_epoch DESC
|
|
106
|
+
LIMIT ?
|
|
107
|
+
`).all(limit);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
export async function executeReenrich(db, limit = 10, { scope = 'narrow' } = {}) {
|
|
111
|
+
const candidates = findReenrichCandidates(db, limit, { scope });
|
|
112
|
+
if (candidates.length === 0) return { processed: 0, skipped: 0 };
|
|
113
|
+
|
|
114
|
+
let processed = 0, skipped = 0;
|
|
115
|
+
const validTypes = new Set(['decision', 'bugfix', 'feature', 'refactor', 'discovery', 'change']);
|
|
116
|
+
|
|
117
|
+
for (const cand of candidates) {
|
|
118
|
+
const gotSlot = await acquireLLMSlot();
|
|
119
|
+
if (!gotSlot) { skipped++; continue; }
|
|
120
|
+
|
|
121
|
+
try {
|
|
122
|
+
const prompt = `Re-enrich this observation with structured metadata. Return ONLY valid JSON, no markdown fences.
|
|
123
|
+
|
|
124
|
+
Title: ${truncate(cand.title || '(untitled)', 200)}
|
|
125
|
+
Narrative: ${truncate(cand.narrative || '(no narrative)', 500)}
|
|
126
|
+
Type: ${cand.type || 'change'}
|
|
127
|
+
|
|
128
|
+
JSON: {"type":"decision|bugfix|feature|refactor|discovery|change","title":"improved ≤120 char title","narrative":"improved 2-3 sentence narrative","concepts":["kw1","kw2"],"facts":["specific fact 1","specific fact 2"],"importance":1,"lesson_learned":"non-obvious insight or 'none' if routine","search_aliases":["alt query 1","alt query 2"]}
|
|
129
|
+
importance: 0=no value, 1=routine, 2=notable non-obvious insight, 3=critical. Default 1.
|
|
130
|
+
lesson_learned: State what was learned. If routine, write "none".
|
|
131
|
+
search_aliases: 2-6 alternative search terms (include CJK if applicable).`;
|
|
132
|
+
|
|
133
|
+
const parsed = await callModelJSON(prompt, 'haiku', { timeout: 15000, maxTokens: 500 });
|
|
134
|
+
if (!parsed || !parsed.title) { skipped++; continue; }
|
|
135
|
+
|
|
136
|
+
if (parsed.importance === 0 || parsed.importance === '0') {
|
|
137
|
+
db.prepare(`UPDATE observations SET compressed_into = ${COMPRESSED_AUTO}, optimized_at = ? WHERE id = ?`)
|
|
138
|
+
.run(Date.now(), cand.id);
|
|
139
|
+
processed++;
|
|
140
|
+
continue;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const type = validTypes.has(parsed.type) ? parsed.type : cand.type || 'change';
|
|
144
|
+
const concepts = Array.isArray(parsed.concepts) ? parsed.concepts.slice(0, 10) : [];
|
|
145
|
+
const facts = Array.isArray(parsed.facts) ? parsed.facts.slice(0, 10) : [];
|
|
146
|
+
const conceptsText = concepts.join(' ');
|
|
147
|
+
const factsText = facts.join(' ');
|
|
148
|
+
const lessonLearned = typeof parsed.lesson_learned === 'string'
|
|
149
|
+
&& parsed.lesson_learned.toLowerCase() !== 'none'
|
|
150
|
+
&& parsed.lesson_learned.trim().length > 0
|
|
151
|
+
? parsed.lesson_learned.slice(0, 500) : null;
|
|
152
|
+
const searchAliases = Array.isArray(parsed.search_aliases)
|
|
153
|
+
? parsed.search_aliases.slice(0, 6).join(' ') : null;
|
|
154
|
+
const title = truncate(parsed.title, 120);
|
|
155
|
+
const narrative = truncate(parsed.narrative || cand.narrative || '', 500);
|
|
156
|
+
const importance = clampImportance(parsed.importance);
|
|
157
|
+
|
|
158
|
+
const bigramText = cjkBigrams((title || '') + ' ' + (narrative || ''));
|
|
159
|
+
const textField = [conceptsText, factsText, searchAliases || '', bigramText].filter(Boolean).join(' ');
|
|
160
|
+
const minhashSig = computeMinHash((title || '') + ' ' + (narrative || ''));
|
|
161
|
+
|
|
162
|
+
db.prepare(`
|
|
163
|
+
UPDATE observations SET type=?, title=?, narrative=?, concepts=?, facts=?,
|
|
164
|
+
text=?, importance=?, lesson_learned=?, search_aliases=?, minhash_sig=?, optimized_at=?
|
|
165
|
+
WHERE id = ?
|
|
166
|
+
`).run(type, title, narrative, conceptsText, factsText, textField,
|
|
167
|
+
importance, lessonLearned, searchAliases, minhashSig, Date.now(), cand.id);
|
|
168
|
+
|
|
169
|
+
rebuildVector(db, cand.id, [title, narrative, conceptsText]);
|
|
170
|
+
|
|
171
|
+
processed++;
|
|
172
|
+
} catch (e) {
|
|
173
|
+
debugCatch(e, 'reenrich');
|
|
174
|
+
skipped++;
|
|
175
|
+
} finally {
|
|
176
|
+
releaseLLMSlot();
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
if (processed > 0) debugLog('DEBUG', 'llm-optimize', `re-enriched ${processed} degraded observations`);
|
|
181
|
+
return { processed, skipped };
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
// ─── Task 2: Normalize ─────────────────────────────────────────────────────
|
|
185
|
+
|
|
186
|
+
const NORMALIZE_GATE_FILE = join(RUNTIME_DIR, 'last-normalize.json');
|
|
187
|
+
const NORMALIZE_INTERVAL_MS = 7 * 86400000; // 7 days
|
|
188
|
+
|
|
189
|
+
export function shouldRunNormalize() {
|
|
190
|
+
try {
|
|
191
|
+
const last = JSON.parse(readFileSync(NORMALIZE_GATE_FILE, 'utf8'));
|
|
192
|
+
return Date.now() - last.epoch >= NORMALIZE_INTERVAL_MS;
|
|
193
|
+
} catch {
|
|
194
|
+
return true;
|
|
195
|
+
}
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
export function extractUniqueConcepts(db, limit = 500) {
|
|
199
|
+
const rows = db.prepare(`
|
|
200
|
+
SELECT concepts FROM observations
|
|
201
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
202
|
+
AND concepts IS NOT NULL AND concepts != ''
|
|
203
|
+
ORDER BY created_at_epoch DESC
|
|
204
|
+
LIMIT 2000
|
|
205
|
+
`).all();
|
|
206
|
+
|
|
207
|
+
const conceptSet = new Set();
|
|
208
|
+
for (const row of rows) {
|
|
209
|
+
for (const c of row.concepts.split(/\s+/)) {
|
|
210
|
+
const trimmed = c.trim();
|
|
211
|
+
if (trimmed.length >= 2) conceptSet.add(trimmed);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
return [...conceptSet].slice(0, limit);
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
export async function identifySynonymGroups(concepts) {
|
|
218
|
+
const gotSlot = await acquireLLMSlot();
|
|
219
|
+
if (!gotSlot) return [];
|
|
220
|
+
|
|
221
|
+
try {
|
|
222
|
+
const prompt = `Analyze these concept terms from a code memory database and identify synonym groups (terms that refer to the same concept). Include cross-language synonyms (English/Chinese). Return ONLY valid JSON.
|
|
223
|
+
|
|
224
|
+
Concepts: ${concepts.join(', ')}
|
|
225
|
+
|
|
226
|
+
JSON: {"groups":[{"canonical":"preferred term","aliases":["synonym1","synonym2"]}, ...]}
|
|
227
|
+
|
|
228
|
+
Rules:
|
|
229
|
+
- Only include groups where you are confident the terms are true synonyms
|
|
230
|
+
- canonical should be the most specific/technical term
|
|
231
|
+
- Include CJK ↔ English equivalents if present
|
|
232
|
+
- Skip terms that have no synonyms in the list`;
|
|
233
|
+
|
|
234
|
+
const parsed = await callModelJSON(prompt, 'sonnet', { timeout: 20000, maxTokens: 1000 });
|
|
235
|
+
if (!parsed?.groups || !Array.isArray(parsed.groups)) return [];
|
|
236
|
+
return parsed.groups.filter(g => g.canonical && Array.isArray(g.aliases) && g.aliases.length > 0);
|
|
237
|
+
} catch (e) {
|
|
238
|
+
debugCatch(e, 'normalize-identify');
|
|
239
|
+
return [];
|
|
240
|
+
} finally {
|
|
241
|
+
releaseLLMSlot();
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export function applyNormalization(db, groups) {
|
|
246
|
+
if (!groups || groups.length === 0) return { updated: 0 };
|
|
247
|
+
|
|
248
|
+
const aliasMap = new Map();
|
|
249
|
+
for (const g of groups) {
|
|
250
|
+
for (const alias of g.aliases) {
|
|
251
|
+
aliasMap.set(alias.toLowerCase(), g.canonical);
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
const rows = db.prepare(`
|
|
256
|
+
SELECT id, concepts, search_aliases FROM observations
|
|
257
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
258
|
+
AND concepts IS NOT NULL AND concepts != ''
|
|
259
|
+
`).all();
|
|
260
|
+
|
|
261
|
+
let updated = 0;
|
|
262
|
+
const updateStmt = db.prepare(`
|
|
263
|
+
UPDATE observations SET concepts = ?, search_aliases = ?, optimized_at = ? WHERE id = ?
|
|
264
|
+
`);
|
|
265
|
+
|
|
266
|
+
for (const row of rows) {
|
|
267
|
+
const terms = row.concepts.split(/\s+/);
|
|
268
|
+
let changed = false;
|
|
269
|
+
const newTerms = terms.map(t => {
|
|
270
|
+
const canonical = aliasMap.get(t.toLowerCase());
|
|
271
|
+
if (canonical && canonical !== t) { changed = true; return canonical; }
|
|
272
|
+
return t;
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
if (changed) {
|
|
276
|
+
const uniqueConcepts = [...new Set(newTerms)].join(' ');
|
|
277
|
+
const existingAliases = row.search_aliases || '';
|
|
278
|
+
const originalTerms = terms.filter(t => aliasMap.has(t.toLowerCase()) && aliasMap.get(t.toLowerCase()) !== t);
|
|
279
|
+
const newAliases = [existingAliases, ...originalTerms].filter(Boolean).join(' ');
|
|
280
|
+
updateStmt.run(uniqueConcepts, newAliases, Date.now(), row.id);
|
|
281
|
+
updated++;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
if (updated > 0) debugLog('DEBUG', 'llm-optimize', `normalized concepts in ${updated} observations`);
|
|
286
|
+
return { updated };
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
export async function executeNormalize(db, force = false) {
|
|
290
|
+
if (!force && !shouldRunNormalize()) return { skipped: true, reason: 'gate' };
|
|
291
|
+
|
|
292
|
+
const concepts = extractUniqueConcepts(db);
|
|
293
|
+
if (concepts.length < 5) return { skipped: true, reason: 'too few concepts' };
|
|
294
|
+
|
|
295
|
+
const groups = await identifySynonymGroups(concepts);
|
|
296
|
+
if (groups.length === 0) return { processed: 0, groups: 0 };
|
|
297
|
+
|
|
298
|
+
const result = applyNormalization(db, groups);
|
|
299
|
+
|
|
300
|
+
try { writeFileSync(NORMALIZE_GATE_FILE, JSON.stringify({ epoch: Date.now() })); } catch {}
|
|
301
|
+
|
|
302
|
+
return { processed: result.updated, groups: groups.length };
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
// ─── Task 3: Cluster-merge ─────────────────────────────────────────────────
|
|
306
|
+
|
|
307
|
+
const MERGE_TIME_WINDOW_MS = 30 * 86400000;
|
|
308
|
+
const MERGE_JACCARD_LOW = 0.4;
|
|
309
|
+
const MERGE_JACCARD_HIGH = 0.85;
|
|
310
|
+
|
|
311
|
+
export function findMergeCandidates(db, maxClusters = 5) {
|
|
312
|
+
const cutoff = Date.now() - MERGE_TIME_WINDOW_MS;
|
|
313
|
+
const rows = db.prepare(`
|
|
314
|
+
SELECT id, title, narrative, project, access_count, created_at_epoch, minhash_sig
|
|
315
|
+
FROM observations
|
|
316
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
317
|
+
AND optimized_at IS NULL
|
|
318
|
+
AND title IS NOT NULL AND title != ''
|
|
319
|
+
AND created_at_epoch > ?
|
|
320
|
+
ORDER BY created_at_epoch DESC
|
|
321
|
+
LIMIT 200
|
|
322
|
+
`).all(cutoff);
|
|
323
|
+
|
|
324
|
+
const used = new Set();
|
|
325
|
+
const clusters = [];
|
|
326
|
+
|
|
327
|
+
for (let i = 0; i < rows.length && clusters.length < maxClusters; i++) {
|
|
328
|
+
if (used.has(rows[i].id)) continue;
|
|
329
|
+
const cluster = [rows[i]];
|
|
330
|
+
|
|
331
|
+
for (let j = i + 1; j < rows.length && cluster.length < 5; j++) {
|
|
332
|
+
if (used.has(rows[j].id)) continue;
|
|
333
|
+
if (rows[i].project !== rows[j].project) continue;
|
|
334
|
+
if (Math.abs(rows[i].created_at_epoch - rows[j].created_at_epoch) > MERGE_TIME_WINDOW_MS) continue;
|
|
335
|
+
|
|
336
|
+
if (rows[i].minhash_sig && rows[j].minhash_sig) {
|
|
337
|
+
const est = estimateJaccardFromMinHash(rows[i].minhash_sig, rows[j].minhash_sig);
|
|
338
|
+
if (est < MERGE_JACCARD_LOW * 0.8) continue;
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
const titleSim = jaccardSimilarity(rows[i].title, rows[j].title);
|
|
342
|
+
if (titleSim >= MERGE_JACCARD_LOW && titleSim < MERGE_JACCARD_HIGH) {
|
|
343
|
+
cluster.push(rows[j]);
|
|
344
|
+
used.add(rows[j].id);
|
|
345
|
+
}
|
|
346
|
+
}
|
|
347
|
+
|
|
348
|
+
if (cluster.length >= 2) {
|
|
349
|
+
used.add(rows[i].id);
|
|
350
|
+
clusters.push(cluster);
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
return clusters;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
export async function executeMergeCluster(db, cluster) {
|
|
358
|
+
if (cluster.length < 2) return { merged: false };
|
|
359
|
+
|
|
360
|
+
const gotSlot = await acquireLLMSlot();
|
|
361
|
+
if (!gotSlot) return { merged: false };
|
|
362
|
+
|
|
363
|
+
try {
|
|
364
|
+
const obsDescriptions = cluster.map((o, i) =>
|
|
365
|
+
`${i + 1}. [${o.type || 'change'}] "${truncate(o.title, 200)}" — ${truncate(o.narrative || '(no narrative)', 500)}`
|
|
366
|
+
).join('\n');
|
|
367
|
+
|
|
368
|
+
const prompt = `These observations from a code memory database may be about the same topic. Should they be merged into a single observation?
|
|
369
|
+
|
|
370
|
+
Observations:
|
|
371
|
+
${obsDescriptions}
|
|
372
|
+
|
|
373
|
+
Return ONLY valid JSON:
|
|
374
|
+
- If they should NOT be merged: {"should_merge":false}
|
|
375
|
+
- If they SHOULD be merged: {"should_merge":true,"merged_title":"≤120 char comprehensive title","merged_narrative":"comprehensive ≤800 char summary preserving all key details","merged_concepts":["kw1","kw2"],"merged_facts":["specific fact 1"],"merged_lesson":"synthesized non-obvious lesson or null","importance":2}`;
|
|
376
|
+
|
|
377
|
+
const parsed = await callModelJSON(prompt, 'sonnet', { timeout: 20000, maxTokens: 1000 });
|
|
378
|
+
if (!parsed || !parsed.should_merge) return { merged: false };
|
|
379
|
+
|
|
380
|
+
const keeper = cluster.reduce((best, o) =>
|
|
381
|
+
(o.access_count || 0) > (best.access_count || 0) ? o : best
|
|
382
|
+
, cluster[0]);
|
|
383
|
+
const others = cluster.filter(o => o.id !== keeper.id);
|
|
384
|
+
|
|
385
|
+
const concepts = Array.isArray(parsed.merged_concepts) ? parsed.merged_concepts.slice(0, 10) : [];
|
|
386
|
+
const facts = Array.isArray(parsed.merged_facts) ? parsed.merged_facts.slice(0, 10) : [];
|
|
387
|
+
const conceptsText = concepts.join(' ');
|
|
388
|
+
const factsText = facts.join(' ');
|
|
389
|
+
const title = truncate(parsed.merged_title, 120);
|
|
390
|
+
const narrative = truncate(parsed.merged_narrative || '', 800);
|
|
391
|
+
const lessonLearned = typeof parsed.merged_lesson === 'string'
|
|
392
|
+
&& parsed.merged_lesson.trim().length > 0
|
|
393
|
+
? parsed.merged_lesson.slice(0, 500) : null;
|
|
394
|
+
|
|
395
|
+
const bigramText = cjkBigrams((title || '') + ' ' + (narrative || ''));
|
|
396
|
+
const textField = [conceptsText, factsText, bigramText].filter(Boolean).join(' ');
|
|
397
|
+
const minhashSig = computeMinHash((title || '') + ' ' + (narrative || ''));
|
|
398
|
+
const importance = clampImportance(parsed.importance || 2);
|
|
399
|
+
|
|
400
|
+
db.transaction(() => {
|
|
401
|
+
db.prepare(`
|
|
402
|
+
UPDATE observations SET title=?, narrative=?, concepts=?, facts=?, text=?,
|
|
403
|
+
importance=?, lesson_learned=?, minhash_sig=?, optimized_at=?
|
|
404
|
+
WHERE id = ?
|
|
405
|
+
`).run(title, narrative, conceptsText, factsText, textField,
|
|
406
|
+
importance, lessonLearned, minhashSig, Date.now(), keeper.id);
|
|
407
|
+
|
|
408
|
+
const otherIds = others.map(o => o.id);
|
|
409
|
+
const ph = otherIds.map(() => '?').join(',');
|
|
410
|
+
db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${ph})`)
|
|
411
|
+
.run(keeper.id, ...otherIds);
|
|
412
|
+
})();
|
|
413
|
+
|
|
414
|
+
rebuildVector(db, keeper.id, [title, narrative, conceptsText]);
|
|
415
|
+
|
|
416
|
+
debugLog('DEBUG', 'llm-optimize', `merged ${cluster.length} observations into #${keeper.id}`);
|
|
417
|
+
return { merged: true, keeperId: keeper.id, mergedCount: others.length };
|
|
418
|
+
} catch (e) {
|
|
419
|
+
debugCatch(e, 'cluster-merge');
|
|
420
|
+
return { merged: false };
|
|
421
|
+
} finally {
|
|
422
|
+
releaseLLMSlot();
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
export async function executeClusterMerge(db, maxClusters = 5) {
|
|
427
|
+
const clusters = findMergeCandidates(db, maxClusters);
|
|
428
|
+
if (clusters.length === 0) return { processed: 0, merged: 0 };
|
|
429
|
+
|
|
430
|
+
let merged = 0;
|
|
431
|
+
for (const cluster of clusters) {
|
|
432
|
+
const result = await executeMergeCluster(db, cluster);
|
|
433
|
+
if (result.merged) merged++;
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
return { processed: clusters.length, merged };
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
// ─── Task 4: Smart-compress ────────────────────────────────────────────────
|
|
440
|
+
|
|
441
|
+
const COMPRESS_TIME_SPLIT_MS = 14 * 86400000;
|
|
442
|
+
const COMPRESS_COSINE_THRESHOLD = 0.3;
|
|
443
|
+
|
|
444
|
+
export function findSmartCompressCandidates(db, ageDays = 30) {
|
|
445
|
+
const cutoff = Date.now() - ageDays * 86400000;
|
|
446
|
+
return db.prepare(`
|
|
447
|
+
SELECT id, title, narrative, lesson_learned, project, type, created_at_epoch
|
|
448
|
+
FROM observations
|
|
449
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
450
|
+
AND COALESCE(importance, 1) = 1
|
|
451
|
+
AND COALESCE(access_count, 0) = 0
|
|
452
|
+
AND created_at_epoch < ?
|
|
453
|
+
ORDER BY project, created_at_epoch
|
|
454
|
+
`).all(cutoff);
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
export function clusterForCompression(candidates, db) {
|
|
458
|
+
if (candidates.length < 3) return [];
|
|
459
|
+
|
|
460
|
+
const byProject = new Map();
|
|
461
|
+
for (const c of candidates) {
|
|
462
|
+
if (!byProject.has(c.project)) byProject.set(c.project, []);
|
|
463
|
+
byProject.get(c.project).push(c);
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
const clusters = [];
|
|
467
|
+
|
|
468
|
+
for (const [project, obs] of byProject) {
|
|
469
|
+
if (obs.length < 3) continue;
|
|
470
|
+
|
|
471
|
+
let vocab;
|
|
472
|
+
try { vocab = getVocabulary(db); } catch {}
|
|
473
|
+
|
|
474
|
+
if (vocab) {
|
|
475
|
+
const vectors = obs.map(o => {
|
|
476
|
+
const text = [o.title || '', o.narrative || ''].join(' ');
|
|
477
|
+
return computeVector(text, vocab);
|
|
478
|
+
});
|
|
479
|
+
|
|
480
|
+
const used = new Set();
|
|
481
|
+
for (let i = 0; i < obs.length; i++) {
|
|
482
|
+
if (used.has(i) || !vectors[i]) continue;
|
|
483
|
+
const cluster = [{ obs: obs[i], idx: i }];
|
|
484
|
+
used.add(i);
|
|
485
|
+
|
|
486
|
+
for (let j = i + 1; j < obs.length; j++) {
|
|
487
|
+
if (used.has(j) || !vectors[j]) continue;
|
|
488
|
+
const sim = cosineSimilarity(vectors[i], vectors[j]);
|
|
489
|
+
if (sim >= COMPRESS_COSINE_THRESHOLD) {
|
|
490
|
+
cluster.push({ obs: obs[j], idx: j });
|
|
491
|
+
used.add(j);
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
if (cluster.length >= 3) {
|
|
496
|
+
const sorted = cluster.map(c => c.obs).sort((a, b) => a.created_at_epoch - b.created_at_epoch);
|
|
497
|
+
let subCluster = [sorted[0]];
|
|
498
|
+
for (let k = 1; k < sorted.length; k++) {
|
|
499
|
+
if (sorted[k].created_at_epoch - subCluster[0].created_at_epoch > COMPRESS_TIME_SPLIT_MS) {
|
|
500
|
+
if (subCluster.length >= 3) clusters.push({ project, observations: subCluster });
|
|
501
|
+
subCluster = [sorted[k]];
|
|
502
|
+
} else {
|
|
503
|
+
subCluster.push(sorted[k]);
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
if (subCluster.length >= 3) clusters.push({ project, observations: subCluster });
|
|
507
|
+
}
|
|
508
|
+
}
|
|
509
|
+
} else {
|
|
510
|
+
// Fallback: group by time window only
|
|
511
|
+
const sorted = obs.sort((a, b) => a.created_at_epoch - b.created_at_epoch);
|
|
512
|
+
let subCluster = [sorted[0]];
|
|
513
|
+
for (let k = 1; k < sorted.length; k++) {
|
|
514
|
+
if (sorted[k].created_at_epoch - subCluster[0].created_at_epoch > COMPRESS_TIME_SPLIT_MS) {
|
|
515
|
+
if (subCluster.length >= 3) clusters.push({ project, observations: subCluster });
|
|
516
|
+
subCluster = [sorted[k]];
|
|
517
|
+
} else {
|
|
518
|
+
subCluster.push(sorted[k]);
|
|
519
|
+
}
|
|
520
|
+
}
|
|
521
|
+
if (subCluster.length >= 3) clusters.push({ project, observations: subCluster });
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
return clusters;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
export async function executeSmartCompressCluster(db, observations, project) {
|
|
529
|
+
if (observations.length < 3) return { compressed: false };
|
|
530
|
+
|
|
531
|
+
const gotSlot = await acquireLLMSlot();
|
|
532
|
+
if (!gotSlot) return { compressed: false };
|
|
533
|
+
|
|
534
|
+
try {
|
|
535
|
+
const obsDescriptions = observations.map((o, i) =>
|
|
536
|
+
`${i + 1}. [${o.type || 'change'}] "${truncate(o.title || '(untitled)', 200)}" — ${truncate(o.narrative || '(no narrative)', 500)}${o.lesson_learned ? ` | Lesson: ${truncate(o.lesson_learned, 200)}` : ''}`
|
|
537
|
+
).join('\n');
|
|
538
|
+
|
|
539
|
+
const prompt = `Summarize these related code memory observations into ONE comprehensive summary. Preserve all important decisions, lessons, and specific facts. Return ONLY valid JSON.
|
|
540
|
+
|
|
541
|
+
Observations:
|
|
542
|
+
${obsDescriptions}
|
|
543
|
+
|
|
544
|
+
JSON: {"title":"descriptive summary ≤120 chars","narrative":"comprehensive summary ≤800 chars preserving key decisions and lessons","concepts":["kw1","kw2"],"facts":["all specific facts preserved"],"lesson_learned":"most important synthesized lesson or 'none'","search_aliases":["alt search 1","alt search 2"]}`;
|
|
545
|
+
|
|
546
|
+
const parsed = await callModelJSON(prompt, 'sonnet', { timeout: 20000, maxTokens: 1000 });
|
|
547
|
+
if (!parsed || !parsed.title) return { compressed: false };
|
|
548
|
+
|
|
549
|
+
const title = truncate(parsed.title, 120);
|
|
550
|
+
const narrative = truncate(parsed.narrative || '', 800);
|
|
551
|
+
const concepts = Array.isArray(parsed.concepts) ? parsed.concepts.slice(0, 10) : [];
|
|
552
|
+
const facts = Array.isArray(parsed.facts) ? parsed.facts.slice(0, 10) : [];
|
|
553
|
+
const conceptsText = concepts.join(' ');
|
|
554
|
+
const factsText = facts.join(' ');
|
|
555
|
+
const lessonLearned = typeof parsed.lesson_learned === 'string'
|
|
556
|
+
&& parsed.lesson_learned.toLowerCase() !== 'none'
|
|
557
|
+
&& parsed.lesson_learned.trim().length > 0
|
|
558
|
+
? parsed.lesson_learned.slice(0, 500) : null;
|
|
559
|
+
const searchAliases = Array.isArray(parsed.search_aliases)
|
|
560
|
+
? parsed.search_aliases.slice(0, 6).join(' ') : null;
|
|
561
|
+
|
|
562
|
+
const bigramText = cjkBigrams((title || '') + ' ' + (narrative || ''));
|
|
563
|
+
const textField = [conceptsText, factsText, searchAliases || '', bigramText].filter(Boolean).join(' ');
|
|
564
|
+
|
|
565
|
+
const epochs = observations.map(o => o.created_at_epoch).sort((a, b) => a - b);
|
|
566
|
+
const medianEpoch = epochs[Math.floor(epochs.length / 2)];
|
|
567
|
+
|
|
568
|
+
const summaryId = db.transaction(() => {
|
|
569
|
+
const sessionId = `compress-${project}`;
|
|
570
|
+
const now = new Date();
|
|
571
|
+
db.prepare(`INSERT OR IGNORE INTO sdk_sessions
|
|
572
|
+
(content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
|
|
573
|
+
VALUES (?,?,?,?,?,'active')`
|
|
574
|
+
).run(sessionId, sessionId, project, now.toISOString(), now.getTime());
|
|
575
|
+
|
|
576
|
+
const result = db.prepare(`INSERT INTO observations
|
|
577
|
+
(memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts,
|
|
578
|
+
files_read, files_modified, importance, lesson_learned, search_aliases, optimized_at,
|
|
579
|
+
created_at, created_at_epoch)
|
|
580
|
+
VALUES (?,?,?,?,?,'',?,?,?,'[]','[]',2,?,?,?,?,?)`
|
|
581
|
+
).run(sessionId, project, textField, 'discovery', title, narrative,
|
|
582
|
+
conceptsText, factsText, lessonLearned, searchAliases, Date.now(),
|
|
583
|
+
new Date(medianEpoch).toISOString(), medianEpoch);
|
|
584
|
+
|
|
585
|
+
const sId = Number(result.lastInsertRowid);
|
|
586
|
+
|
|
587
|
+
const obsIds = observations.map(o => o.id);
|
|
588
|
+
const ph = obsIds.map(() => '?').join(',');
|
|
589
|
+
db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${ph})`)
|
|
590
|
+
.run(sId, ...obsIds);
|
|
591
|
+
|
|
592
|
+
return sId;
|
|
593
|
+
})();
|
|
594
|
+
|
|
595
|
+
rebuildVector(db, summaryId, [title, narrative, conceptsText]);
|
|
596
|
+
|
|
597
|
+
debugLog('DEBUG', 'llm-optimize', `smart-compressed ${observations.length} observations into #${summaryId}`);
|
|
598
|
+
return { compressed: true, summaryId, count: observations.length };
|
|
599
|
+
} catch (e) {
|
|
600
|
+
debugCatch(e, 'smart-compress');
|
|
601
|
+
return { compressed: false };
|
|
602
|
+
} finally {
|
|
603
|
+
releaseLLMSlot();
|
|
604
|
+
}
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
export async function executeSmartCompress(db, maxClusters = 5) {
|
|
608
|
+
const candidates = findSmartCompressCandidates(db);
|
|
609
|
+
if (candidates.length < 3) return { processed: 0, compressed: 0 };
|
|
610
|
+
|
|
611
|
+
const clusters = clusterForCompression(candidates, db);
|
|
612
|
+
if (clusters.length === 0) return { processed: 0, compressed: 0 };
|
|
613
|
+
|
|
614
|
+
let compressed = 0;
|
|
615
|
+
const toProcess = clusters.slice(0, maxClusters);
|
|
616
|
+
for (const cluster of toProcess) {
|
|
617
|
+
const result = await executeSmartCompressCluster(db, cluster.observations, cluster.project);
|
|
618
|
+
if (result.compressed) compressed++;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
return { processed: toProcess.length, compressed };
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
// ─── Pipeline Orchestrator ──────────────────────────────────────────────────
|
|
625
|
+
|
|
626
|
+
export function optimizePreview(db) {
|
|
627
|
+
const reenrich = findReenrichCandidates(db, 1000).length;
|
|
628
|
+
// R-7: also report the widened-scope candidate count so users can see how many
|
|
629
|
+
// bugfix/refactor/feature/decision observations are eligible for lesson backfill.
|
|
630
|
+
const reenrichWide = findReenrichCandidates(db, 5000, { scope: 'wide' }).length;
|
|
631
|
+
|
|
632
|
+
const concepts = extractUniqueConcepts(db);
|
|
633
|
+
const normalizeReady = shouldRunNormalize() && concepts.length >= 5;
|
|
634
|
+
|
|
635
|
+
const mergeClusters = findMergeCandidates(db, 50);
|
|
636
|
+
const clusterMerge = mergeClusters.length;
|
|
637
|
+
|
|
638
|
+
const compressCandidates = findSmartCompressCandidates(db);
|
|
639
|
+
const compressClusters = clusterForCompression(compressCandidates, db);
|
|
640
|
+
const smartCompress = compressClusters.length;
|
|
641
|
+
|
|
642
|
+
return {
|
|
643
|
+
reenrich,
|
|
644
|
+
reenrichWide,
|
|
645
|
+
normalize: normalizeReady ? concepts.length : 0,
|
|
646
|
+
normalizeGateOpen: shouldRunNormalize(),
|
|
647
|
+
clusterMerge,
|
|
648
|
+
smartCompress,
|
|
649
|
+
total: reenrich + (normalizeReady ? 1 : 0) + clusterMerge + smartCompress,
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
/**
|
|
654
|
+
* Run optimization tasks against the memory DB.
|
|
655
|
+
*
|
|
656
|
+
* @param {object} db better-sqlite3 handle
|
|
657
|
+
* @param {object} [opts]
|
|
658
|
+
* @param {string[]} [opts.tasks] Subset of tasks to run (default: all). When a single
|
|
659
|
+
* task is selected, it receives the FULL maxItems budget instead of the proportional
|
|
660
|
+
* slice from distributeBudget() — otherwise explicit `--max N --task re-enrich`
|
|
661
|
+
* would silently waste 60% of the requested budget.
|
|
662
|
+
* @param {number} [opts.maxItems=15] Total item budget across all selected tasks.
|
|
663
|
+
* @param {boolean} [opts.force=false] Bypass time-based gates (e.g. normalize interval).
|
|
664
|
+
* @param {'narrow'|'wide'} [opts.reenrichScope='narrow'] Scope for the re-enrich task.
|
|
665
|
+
* 'wide' targets bugfix/refactor/feature/decision with narrative but no lesson (R-7).
|
|
666
|
+
*/
|
|
667
|
+
export async function optimizeRun(db, { tasks, maxItems = 15, force = false, reenrichScope = 'narrow' } = {}) {
|
|
668
|
+
const allTasks = ['re-enrich', 'normalize', 'cluster-merge', 'smart-compress'];
|
|
669
|
+
const selectedTasks = tasks && tasks.length > 0 ? tasks : allTasks;
|
|
670
|
+
// Single-task mode: give that task the full budget. Distribution only makes sense
|
|
671
|
+
// when multiple tasks compete for the same pool.
|
|
672
|
+
const budget = selectedTasks.length === 1
|
|
673
|
+
? { reenrich: maxItems, normalize: maxItems, clusterMerge: maxItems, smartCompress: maxItems }
|
|
674
|
+
: distributeBudget(maxItems);
|
|
675
|
+
const results = {};
|
|
676
|
+
|
|
677
|
+
for (const task of selectedTasks) {
|
|
678
|
+
try {
|
|
679
|
+
switch (task) {
|
|
680
|
+
case 're-enrich':
|
|
681
|
+
results.reenrich = await executeReenrich(db, budget.reenrich, { scope: reenrichScope });
|
|
682
|
+
break;
|
|
683
|
+
case 'normalize':
|
|
684
|
+
results.normalize = await executeNormalize(db, force);
|
|
685
|
+
break;
|
|
686
|
+
case 'cluster-merge':
|
|
687
|
+
results.clusterMerge = await executeClusterMerge(db, budget.clusterMerge);
|
|
688
|
+
break;
|
|
689
|
+
case 'smart-compress':
|
|
690
|
+
results.smartCompress = await executeSmartCompress(db, budget.smartCompress);
|
|
691
|
+
break;
|
|
692
|
+
}
|
|
693
|
+
} catch (e) {
|
|
694
|
+
debugCatch(e, `optimize:${task}`);
|
|
695
|
+
results[task] = { error: e.message };
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
return results;
|
|
700
|
+
}
|
|
701
|
+
|
|
702
|
+
export async function handleLLMOptimize() {
|
|
703
|
+
const { ensureDb } = await import('./schema.mjs');
|
|
704
|
+
let db;
|
|
705
|
+
try {
|
|
706
|
+
db = ensureDb();
|
|
707
|
+
} catch {
|
|
708
|
+
return;
|
|
709
|
+
}
|
|
710
|
+
|
|
711
|
+
try {
|
|
712
|
+
const results = await optimizeRun(db);
|
|
713
|
+
const parts = [];
|
|
714
|
+
if (results.reenrich?.processed) parts.push(`re-enriched: ${results.reenrich.processed}`);
|
|
715
|
+
if (results.normalize?.processed) parts.push(`normalized: ${results.normalize.processed}`);
|
|
716
|
+
if (results.clusterMerge?.merged) parts.push(`merged: ${results.clusterMerge.merged}`);
|
|
717
|
+
if (results.smartCompress?.compressed) parts.push(`compressed: ${results.smartCompress.compressed}`);
|
|
718
|
+
if (parts.length > 0) debugLog('DEBUG', 'llm-optimize', parts.join(', '));
|
|
719
|
+
} catch (e) {
|
|
720
|
+
debugCatch(e, 'llm-optimize');
|
|
721
|
+
} finally {
|
|
722
|
+
db.close();
|
|
723
|
+
}
|
|
724
|
+
}
|