claude-mem-lite 2.32.1 → 2.32.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,724 @@
1
+ // claude-mem-lite: LLM-powered database optimization
2
+ // Background worker for intelligent maintenance: re-enrich, normalize, cluster-merge, smart-compress
3
+ // Triggered from auto-maintain (24h) or manually via mem_optimize MCP tool / CLI
4
+
5
+ import { readFileSync, writeFileSync } from 'fs';
6
+ import { join } from 'path';
7
+ import {
8
+ truncate, debugLog, debugCatch, COMPRESSED_AUTO,
9
+ computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity, clampImportance, cjkBigrams,
10
+ notLowSignalTitleClause,
11
+ } from './utils.mjs';
12
+ import { callModelJSON } from './haiku-client.mjs';
13
+ import { acquireLLMSlot, releaseLLMSlot } from './hook-semaphore.mjs';
14
+ import { getVocabulary, computeVector, cosineSimilarity } from './tfidf.mjs';
15
+ import { DB_DIR } from './schema.mjs';
16
+
17
+ const RUNTIME_DIR = join(DB_DIR, 'runtime');
18
+
19
+ // ─── Budget ─────────────────────────────────────────────────────────────────
20
+
21
+ export function distributeBudget(total = 15) {
22
+ const normalize = 1;
23
+ const reenrich = Math.max(1, Math.floor(total * 0.4));
24
+ const clusterMerge = Math.max(1, Math.floor(total * 0.3));
25
+ const smartCompress = Math.max(1, total - reenrich - normalize - clusterMerge);
26
+ // Clamp: if total is too small for 4 tasks, cap each so sum ≤ total
27
+ if (reenrich + normalize + clusterMerge + smartCompress > total) {
28
+ return { reenrich: Math.max(1, total - 3), normalize: 1, clusterMerge: 1, smartCompress: 1 };
29
+ }
30
+ return { reenrich, normalize, clusterMerge, smartCompress };
31
+ }
32
+
33
+ // ─── Shared Helpers ─────────────────────────────────────────────────────────
34
+
35
+ /**
36
+ * Rebuild TF-IDF vector for an observation. Non-critical — swallows errors.
37
+ * Exported for testing; also kept as the single source of vector-rebuild logic
38
+ * for the optimize / re-enrich path to avoid drift with the hook-llm write path.
39
+ */
40
+ export function rebuildVector(db, obsId, textParts) {
41
+ try {
42
+ const vocab = getVocabulary(db);
43
+ if (!vocab) return;
44
+ const vec = computeVector(textParts.filter(Boolean).join(' '), vocab);
45
+ if (vec) {
46
+ // Bug #1 fix: column is `created_at_epoch`, not `computed_at`. Every other
47
+ // INSERT callsite (server.mjs, hook-llm.mjs, mem-cli.mjs) uses the correct
48
+ // name; this was the only drift, silently caught by the catch below until
49
+ // the R-7 experiment surfaced it.
50
+ db.prepare(`
51
+ INSERT OR REPLACE INTO observation_vectors (observation_id, vector, vocab_version, created_at_epoch)
52
+ VALUES (?, ?, ?, ?)
53
+ `).run(obsId, Buffer.from(vec.buffer), vocab.version, Date.now());
54
+ }
55
+ } catch (e) { debugCatch(e, 'optimize-vector'); }
56
+ }
57
+
58
+ // ─── Task 1: Re-enrich ─────────────────────────────────────────────────────
59
+
60
+ /**
61
+ * Find observations eligible for LLM re-enrichment.
62
+ *
63
+ * Two scopes:
64
+ * - 'narrow' (default): fully-degraded observations — Haiku failed to extract
65
+ * concepts / facts / lesson / aliases. Conservative; preserves pre-R-7 behavior.
66
+ * - 'wide' (R-7): substantive bugfix / refactor / feature / decision observations
67
+ * that have concepts + facts populated but are missing lesson_learned.
68
+ * Targets the "Haiku ran but judged 'none'" cases that dominate the library.
69
+ * Excludes LOW_SIGNAL titles (no source material to extract from) and
70
+ * thin narratives (<100 chars → nothing to rewrite into a lesson).
71
+ *
72
+ * Both scopes respect optimized_at (idempotent) and skip compressed/superseded rows.
73
+ *
74
+ * @param {object} db better-sqlite3 database handle
75
+ * @param {number} limit max candidates to return
76
+ * @param {{ scope?: 'narrow' | 'wide' }} [opts]
77
+ */
78
+ export function findReenrichCandidates(db, limit = 10, { scope = 'narrow' } = {}) {
79
+ if (scope === 'wide') {
80
+ return db.prepare(`
81
+ SELECT id, title, narrative, type, subtitle, concepts, facts
82
+ FROM observations
83
+ WHERE COALESCE(compressed_into, 0) = 0
84
+ AND superseded_at IS NULL
85
+ AND optimized_at IS NULL
86
+ AND type IN ('bugfix','refactor','feature','decision')
87
+ AND (lesson_learned IS NULL OR lesson_learned = '')
88
+ AND LENGTH(COALESCE(narrative, '')) > 100
89
+ AND ${notLowSignalTitleClause('')}
90
+ ORDER BY
91
+ CASE type WHEN 'decision' THEN 0 WHEN 'bugfix' THEN 1 WHEN 'refactor' THEN 2 ELSE 3 END,
92
+ created_at_epoch DESC
93
+ LIMIT ?
94
+ `).all(limit);
95
+ }
96
+ return db.prepare(`
97
+ SELECT id, title, narrative, type, subtitle
98
+ FROM observations
99
+ WHERE COALESCE(compressed_into, 0) = 0
100
+ AND (concepts IS NULL OR concepts = '')
101
+ AND (facts IS NULL OR facts = '')
102
+ AND lesson_learned IS NULL
103
+ AND search_aliases IS NULL
104
+ AND optimized_at IS NULL
105
+ ORDER BY created_at_epoch DESC
106
+ LIMIT ?
107
+ `).all(limit);
108
+ }
109
+
110
+ export async function executeReenrich(db, limit = 10, { scope = 'narrow' } = {}) {
111
+ const candidates = findReenrichCandidates(db, limit, { scope });
112
+ if (candidates.length === 0) return { processed: 0, skipped: 0 };
113
+
114
+ let processed = 0, skipped = 0;
115
+ const validTypes = new Set(['decision', 'bugfix', 'feature', 'refactor', 'discovery', 'change']);
116
+
117
+ for (const cand of candidates) {
118
+ const gotSlot = await acquireLLMSlot();
119
+ if (!gotSlot) { skipped++; continue; }
120
+
121
+ try {
122
+ const prompt = `Re-enrich this observation with structured metadata. Return ONLY valid JSON, no markdown fences.
123
+
124
+ Title: ${truncate(cand.title || '(untitled)', 200)}
125
+ Narrative: ${truncate(cand.narrative || '(no narrative)', 500)}
126
+ Type: ${cand.type || 'change'}
127
+
128
+ JSON: {"type":"decision|bugfix|feature|refactor|discovery|change","title":"improved ≤120 char title","narrative":"improved 2-3 sentence narrative","concepts":["kw1","kw2"],"facts":["specific fact 1","specific fact 2"],"importance":1,"lesson_learned":"non-obvious insight or 'none' if routine","search_aliases":["alt query 1","alt query 2"]}
129
+ importance: 0=no value, 1=routine, 2=notable non-obvious insight, 3=critical. Default 1.
130
+ lesson_learned: State what was learned. If routine, write "none".
131
+ search_aliases: 2-6 alternative search terms (include CJK if applicable).`;
132
+
133
+ const parsed = await callModelJSON(prompt, 'haiku', { timeout: 15000, maxTokens: 500 });
134
+ if (!parsed || !parsed.title) { skipped++; continue; }
135
+
136
+ if (parsed.importance === 0 || parsed.importance === '0') {
137
+ db.prepare(`UPDATE observations SET compressed_into = ${COMPRESSED_AUTO}, optimized_at = ? WHERE id = ?`)
138
+ .run(Date.now(), cand.id);
139
+ processed++;
140
+ continue;
141
+ }
142
+
143
+ const type = validTypes.has(parsed.type) ? parsed.type : cand.type || 'change';
144
+ const concepts = Array.isArray(parsed.concepts) ? parsed.concepts.slice(0, 10) : [];
145
+ const facts = Array.isArray(parsed.facts) ? parsed.facts.slice(0, 10) : [];
146
+ const conceptsText = concepts.join(' ');
147
+ const factsText = facts.join(' ');
148
+ const lessonLearned = typeof parsed.lesson_learned === 'string'
149
+ && parsed.lesson_learned.toLowerCase() !== 'none'
150
+ && parsed.lesson_learned.trim().length > 0
151
+ ? parsed.lesson_learned.slice(0, 500) : null;
152
+ const searchAliases = Array.isArray(parsed.search_aliases)
153
+ ? parsed.search_aliases.slice(0, 6).join(' ') : null;
154
+ const title = truncate(parsed.title, 120);
155
+ const narrative = truncate(parsed.narrative || cand.narrative || '', 500);
156
+ const importance = clampImportance(parsed.importance);
157
+
158
+ const bigramText = cjkBigrams((title || '') + ' ' + (narrative || ''));
159
+ const textField = [conceptsText, factsText, searchAliases || '', bigramText].filter(Boolean).join(' ');
160
+ const minhashSig = computeMinHash((title || '') + ' ' + (narrative || ''));
161
+
162
+ db.prepare(`
163
+ UPDATE observations SET type=?, title=?, narrative=?, concepts=?, facts=?,
164
+ text=?, importance=?, lesson_learned=?, search_aliases=?, minhash_sig=?, optimized_at=?
165
+ WHERE id = ?
166
+ `).run(type, title, narrative, conceptsText, factsText, textField,
167
+ importance, lessonLearned, searchAliases, minhashSig, Date.now(), cand.id);
168
+
169
+ rebuildVector(db, cand.id, [title, narrative, conceptsText]);
170
+
171
+ processed++;
172
+ } catch (e) {
173
+ debugCatch(e, 'reenrich');
174
+ skipped++;
175
+ } finally {
176
+ releaseLLMSlot();
177
+ }
178
+ }
179
+
180
+ if (processed > 0) debugLog('DEBUG', 'llm-optimize', `re-enriched ${processed} degraded observations`);
181
+ return { processed, skipped };
182
+ }
183
+
184
+ // ─── Task 2: Normalize ─────────────────────────────────────────────────────
185
+
186
+ const NORMALIZE_GATE_FILE = join(RUNTIME_DIR, 'last-normalize.json');
187
+ const NORMALIZE_INTERVAL_MS = 7 * 86400000; // 7 days
188
+
189
+ export function shouldRunNormalize() {
190
+ try {
191
+ const last = JSON.parse(readFileSync(NORMALIZE_GATE_FILE, 'utf8'));
192
+ return Date.now() - last.epoch >= NORMALIZE_INTERVAL_MS;
193
+ } catch {
194
+ return true;
195
+ }
196
+ }
197
+
198
+ export function extractUniqueConcepts(db, limit = 500) {
199
+ const rows = db.prepare(`
200
+ SELECT concepts FROM observations
201
+ WHERE COALESCE(compressed_into, 0) = 0
202
+ AND concepts IS NOT NULL AND concepts != ''
203
+ ORDER BY created_at_epoch DESC
204
+ LIMIT 2000
205
+ `).all();
206
+
207
+ const conceptSet = new Set();
208
+ for (const row of rows) {
209
+ for (const c of row.concepts.split(/\s+/)) {
210
+ const trimmed = c.trim();
211
+ if (trimmed.length >= 2) conceptSet.add(trimmed);
212
+ }
213
+ }
214
+ return [...conceptSet].slice(0, limit);
215
+ }
216
+
217
+ export async function identifySynonymGroups(concepts) {
218
+ const gotSlot = await acquireLLMSlot();
219
+ if (!gotSlot) return [];
220
+
221
+ try {
222
+ const prompt = `Analyze these concept terms from a code memory database and identify synonym groups (terms that refer to the same concept). Include cross-language synonyms (English/Chinese). Return ONLY valid JSON.
223
+
224
+ Concepts: ${concepts.join(', ')}
225
+
226
+ JSON: {"groups":[{"canonical":"preferred term","aliases":["synonym1","synonym2"]}, ...]}
227
+
228
+ Rules:
229
+ - Only include groups where you are confident the terms are true synonyms
230
+ - canonical should be the most specific/technical term
231
+ - Include CJK ↔ English equivalents if present
232
+ - Skip terms that have no synonyms in the list`;
233
+
234
+ const parsed = await callModelJSON(prompt, 'sonnet', { timeout: 20000, maxTokens: 1000 });
235
+ if (!parsed?.groups || !Array.isArray(parsed.groups)) return [];
236
+ return parsed.groups.filter(g => g.canonical && Array.isArray(g.aliases) && g.aliases.length > 0);
237
+ } catch (e) {
238
+ debugCatch(e, 'normalize-identify');
239
+ return [];
240
+ } finally {
241
+ releaseLLMSlot();
242
+ }
243
+ }
244
+
245
+ export function applyNormalization(db, groups) {
246
+ if (!groups || groups.length === 0) return { updated: 0 };
247
+
248
+ const aliasMap = new Map();
249
+ for (const g of groups) {
250
+ for (const alias of g.aliases) {
251
+ aliasMap.set(alias.toLowerCase(), g.canonical);
252
+ }
253
+ }
254
+
255
+ const rows = db.prepare(`
256
+ SELECT id, concepts, search_aliases FROM observations
257
+ WHERE COALESCE(compressed_into, 0) = 0
258
+ AND concepts IS NOT NULL AND concepts != ''
259
+ `).all();
260
+
261
+ let updated = 0;
262
+ const updateStmt = db.prepare(`
263
+ UPDATE observations SET concepts = ?, search_aliases = ?, optimized_at = ? WHERE id = ?
264
+ `);
265
+
266
+ for (const row of rows) {
267
+ const terms = row.concepts.split(/\s+/);
268
+ let changed = false;
269
+ const newTerms = terms.map(t => {
270
+ const canonical = aliasMap.get(t.toLowerCase());
271
+ if (canonical && canonical !== t) { changed = true; return canonical; }
272
+ return t;
273
+ });
274
+
275
+ if (changed) {
276
+ const uniqueConcepts = [...new Set(newTerms)].join(' ');
277
+ const existingAliases = row.search_aliases || '';
278
+ const originalTerms = terms.filter(t => aliasMap.has(t.toLowerCase()) && aliasMap.get(t.toLowerCase()) !== t);
279
+ const newAliases = [existingAliases, ...originalTerms].filter(Boolean).join(' ');
280
+ updateStmt.run(uniqueConcepts, newAliases, Date.now(), row.id);
281
+ updated++;
282
+ }
283
+ }
284
+
285
+ if (updated > 0) debugLog('DEBUG', 'llm-optimize', `normalized concepts in ${updated} observations`);
286
+ return { updated };
287
+ }
288
+
289
+ export async function executeNormalize(db, force = false) {
290
+ if (!force && !shouldRunNormalize()) return { skipped: true, reason: 'gate' };
291
+
292
+ const concepts = extractUniqueConcepts(db);
293
+ if (concepts.length < 5) return { skipped: true, reason: 'too few concepts' };
294
+
295
+ const groups = await identifySynonymGroups(concepts);
296
+ if (groups.length === 0) return { processed: 0, groups: 0 };
297
+
298
+ const result = applyNormalization(db, groups);
299
+
300
+ try { writeFileSync(NORMALIZE_GATE_FILE, JSON.stringify({ epoch: Date.now() })); } catch {}
301
+
302
+ return { processed: result.updated, groups: groups.length };
303
+ }
304
+
305
+ // ─── Task 3: Cluster-merge ─────────────────────────────────────────────────
306
+
307
+ const MERGE_TIME_WINDOW_MS = 30 * 86400000;
308
+ const MERGE_JACCARD_LOW = 0.4;
309
+ const MERGE_JACCARD_HIGH = 0.85;
310
+
311
+ export function findMergeCandidates(db, maxClusters = 5) {
312
+ const cutoff = Date.now() - MERGE_TIME_WINDOW_MS;
313
+ const rows = db.prepare(`
314
+ SELECT id, title, narrative, project, access_count, created_at_epoch, minhash_sig
315
+ FROM observations
316
+ WHERE COALESCE(compressed_into, 0) = 0
317
+ AND optimized_at IS NULL
318
+ AND title IS NOT NULL AND title != ''
319
+ AND created_at_epoch > ?
320
+ ORDER BY created_at_epoch DESC
321
+ LIMIT 200
322
+ `).all(cutoff);
323
+
324
+ const used = new Set();
325
+ const clusters = [];
326
+
327
+ for (let i = 0; i < rows.length && clusters.length < maxClusters; i++) {
328
+ if (used.has(rows[i].id)) continue;
329
+ const cluster = [rows[i]];
330
+
331
+ for (let j = i + 1; j < rows.length && cluster.length < 5; j++) {
332
+ if (used.has(rows[j].id)) continue;
333
+ if (rows[i].project !== rows[j].project) continue;
334
+ if (Math.abs(rows[i].created_at_epoch - rows[j].created_at_epoch) > MERGE_TIME_WINDOW_MS) continue;
335
+
336
+ if (rows[i].minhash_sig && rows[j].minhash_sig) {
337
+ const est = estimateJaccardFromMinHash(rows[i].minhash_sig, rows[j].minhash_sig);
338
+ if (est < MERGE_JACCARD_LOW * 0.8) continue;
339
+ }
340
+
341
+ const titleSim = jaccardSimilarity(rows[i].title, rows[j].title);
342
+ if (titleSim >= MERGE_JACCARD_LOW && titleSim < MERGE_JACCARD_HIGH) {
343
+ cluster.push(rows[j]);
344
+ used.add(rows[j].id);
345
+ }
346
+ }
347
+
348
+ if (cluster.length >= 2) {
349
+ used.add(rows[i].id);
350
+ clusters.push(cluster);
351
+ }
352
+ }
353
+
354
+ return clusters;
355
+ }
356
+
357
+ export async function executeMergeCluster(db, cluster) {
358
+ if (cluster.length < 2) return { merged: false };
359
+
360
+ const gotSlot = await acquireLLMSlot();
361
+ if (!gotSlot) return { merged: false };
362
+
363
+ try {
364
+ const obsDescriptions = cluster.map((o, i) =>
365
+ `${i + 1}. [${o.type || 'change'}] "${truncate(o.title, 200)}" — ${truncate(o.narrative || '(no narrative)', 500)}`
366
+ ).join('\n');
367
+
368
+ const prompt = `These observations from a code memory database may be about the same topic. Should they be merged into a single observation?
369
+
370
+ Observations:
371
+ ${obsDescriptions}
372
+
373
+ Return ONLY valid JSON:
374
+ - If they should NOT be merged: {"should_merge":false}
375
+ - If they SHOULD be merged: {"should_merge":true,"merged_title":"≤120 char comprehensive title","merged_narrative":"comprehensive ≤800 char summary preserving all key details","merged_concepts":["kw1","kw2"],"merged_facts":["specific fact 1"],"merged_lesson":"synthesized non-obvious lesson or null","importance":2}`;
376
+
377
+ const parsed = await callModelJSON(prompt, 'sonnet', { timeout: 20000, maxTokens: 1000 });
378
+ if (!parsed || !parsed.should_merge) return { merged: false };
379
+
380
+ const keeper = cluster.reduce((best, o) =>
381
+ (o.access_count || 0) > (best.access_count || 0) ? o : best
382
+ , cluster[0]);
383
+ const others = cluster.filter(o => o.id !== keeper.id);
384
+
385
+ const concepts = Array.isArray(parsed.merged_concepts) ? parsed.merged_concepts.slice(0, 10) : [];
386
+ const facts = Array.isArray(parsed.merged_facts) ? parsed.merged_facts.slice(0, 10) : [];
387
+ const conceptsText = concepts.join(' ');
388
+ const factsText = facts.join(' ');
389
+ const title = truncate(parsed.merged_title, 120);
390
+ const narrative = truncate(parsed.merged_narrative || '', 800);
391
+ const lessonLearned = typeof parsed.merged_lesson === 'string'
392
+ && parsed.merged_lesson.trim().length > 0
393
+ ? parsed.merged_lesson.slice(0, 500) : null;
394
+
395
+ const bigramText = cjkBigrams((title || '') + ' ' + (narrative || ''));
396
+ const textField = [conceptsText, factsText, bigramText].filter(Boolean).join(' ');
397
+ const minhashSig = computeMinHash((title || '') + ' ' + (narrative || ''));
398
+ const importance = clampImportance(parsed.importance || 2);
399
+
400
+ db.transaction(() => {
401
+ db.prepare(`
402
+ UPDATE observations SET title=?, narrative=?, concepts=?, facts=?, text=?,
403
+ importance=?, lesson_learned=?, minhash_sig=?, optimized_at=?
404
+ WHERE id = ?
405
+ `).run(title, narrative, conceptsText, factsText, textField,
406
+ importance, lessonLearned, minhashSig, Date.now(), keeper.id);
407
+
408
+ const otherIds = others.map(o => o.id);
409
+ const ph = otherIds.map(() => '?').join(',');
410
+ db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${ph})`)
411
+ .run(keeper.id, ...otherIds);
412
+ })();
413
+
414
+ rebuildVector(db, keeper.id, [title, narrative, conceptsText]);
415
+
416
+ debugLog('DEBUG', 'llm-optimize', `merged ${cluster.length} observations into #${keeper.id}`);
417
+ return { merged: true, keeperId: keeper.id, mergedCount: others.length };
418
+ } catch (e) {
419
+ debugCatch(e, 'cluster-merge');
420
+ return { merged: false };
421
+ } finally {
422
+ releaseLLMSlot();
423
+ }
424
+ }
425
+
426
+ export async function executeClusterMerge(db, maxClusters = 5) {
427
+ const clusters = findMergeCandidates(db, maxClusters);
428
+ if (clusters.length === 0) return { processed: 0, merged: 0 };
429
+
430
+ let merged = 0;
431
+ for (const cluster of clusters) {
432
+ const result = await executeMergeCluster(db, cluster);
433
+ if (result.merged) merged++;
434
+ }
435
+
436
+ return { processed: clusters.length, merged };
437
+ }
438
+
439
+ // ─── Task 4: Smart-compress ────────────────────────────────────────────────
440
+
441
+ const COMPRESS_TIME_SPLIT_MS = 14 * 86400000;
442
+ const COMPRESS_COSINE_THRESHOLD = 0.3;
443
+
444
+ export function findSmartCompressCandidates(db, ageDays = 30) {
445
+ const cutoff = Date.now() - ageDays * 86400000;
446
+ return db.prepare(`
447
+ SELECT id, title, narrative, lesson_learned, project, type, created_at_epoch
448
+ FROM observations
449
+ WHERE COALESCE(compressed_into, 0) = 0
450
+ AND COALESCE(importance, 1) = 1
451
+ AND COALESCE(access_count, 0) = 0
452
+ AND created_at_epoch < ?
453
+ ORDER BY project, created_at_epoch
454
+ `).all(cutoff);
455
+ }
456
+
457
+ export function clusterForCompression(candidates, db) {
458
+ if (candidates.length < 3) return [];
459
+
460
+ const byProject = new Map();
461
+ for (const c of candidates) {
462
+ if (!byProject.has(c.project)) byProject.set(c.project, []);
463
+ byProject.get(c.project).push(c);
464
+ }
465
+
466
+ const clusters = [];
467
+
468
+ for (const [project, obs] of byProject) {
469
+ if (obs.length < 3) continue;
470
+
471
+ let vocab;
472
+ try { vocab = getVocabulary(db); } catch {}
473
+
474
+ if (vocab) {
475
+ const vectors = obs.map(o => {
476
+ const text = [o.title || '', o.narrative || ''].join(' ');
477
+ return computeVector(text, vocab);
478
+ });
479
+
480
+ const used = new Set();
481
+ for (let i = 0; i < obs.length; i++) {
482
+ if (used.has(i) || !vectors[i]) continue;
483
+ const cluster = [{ obs: obs[i], idx: i }];
484
+ used.add(i);
485
+
486
+ for (let j = i + 1; j < obs.length; j++) {
487
+ if (used.has(j) || !vectors[j]) continue;
488
+ const sim = cosineSimilarity(vectors[i], vectors[j]);
489
+ if (sim >= COMPRESS_COSINE_THRESHOLD) {
490
+ cluster.push({ obs: obs[j], idx: j });
491
+ used.add(j);
492
+ }
493
+ }
494
+
495
+ if (cluster.length >= 3) {
496
+ const sorted = cluster.map(c => c.obs).sort((a, b) => a.created_at_epoch - b.created_at_epoch);
497
+ let subCluster = [sorted[0]];
498
+ for (let k = 1; k < sorted.length; k++) {
499
+ if (sorted[k].created_at_epoch - subCluster[0].created_at_epoch > COMPRESS_TIME_SPLIT_MS) {
500
+ if (subCluster.length >= 3) clusters.push({ project, observations: subCluster });
501
+ subCluster = [sorted[k]];
502
+ } else {
503
+ subCluster.push(sorted[k]);
504
+ }
505
+ }
506
+ if (subCluster.length >= 3) clusters.push({ project, observations: subCluster });
507
+ }
508
+ }
509
+ } else {
510
+ // Fallback: group by time window only
511
+ const sorted = obs.sort((a, b) => a.created_at_epoch - b.created_at_epoch);
512
+ let subCluster = [sorted[0]];
513
+ for (let k = 1; k < sorted.length; k++) {
514
+ if (sorted[k].created_at_epoch - subCluster[0].created_at_epoch > COMPRESS_TIME_SPLIT_MS) {
515
+ if (subCluster.length >= 3) clusters.push({ project, observations: subCluster });
516
+ subCluster = [sorted[k]];
517
+ } else {
518
+ subCluster.push(sorted[k]);
519
+ }
520
+ }
521
+ if (subCluster.length >= 3) clusters.push({ project, observations: subCluster });
522
+ }
523
+ }
524
+
525
+ return clusters;
526
+ }
527
+
528
+ export async function executeSmartCompressCluster(db, observations, project) {
529
+ if (observations.length < 3) return { compressed: false };
530
+
531
+ const gotSlot = await acquireLLMSlot();
532
+ if (!gotSlot) return { compressed: false };
533
+
534
+ try {
535
+ const obsDescriptions = observations.map((o, i) =>
536
+ `${i + 1}. [${o.type || 'change'}] "${truncate(o.title || '(untitled)', 200)}" — ${truncate(o.narrative || '(no narrative)', 500)}${o.lesson_learned ? ` | Lesson: ${truncate(o.lesson_learned, 200)}` : ''}`
537
+ ).join('\n');
538
+
539
+ const prompt = `Summarize these related code memory observations into ONE comprehensive summary. Preserve all important decisions, lessons, and specific facts. Return ONLY valid JSON.
540
+
541
+ Observations:
542
+ ${obsDescriptions}
543
+
544
+ JSON: {"title":"descriptive summary ≤120 chars","narrative":"comprehensive summary ≤800 chars preserving key decisions and lessons","concepts":["kw1","kw2"],"facts":["all specific facts preserved"],"lesson_learned":"most important synthesized lesson or 'none'","search_aliases":["alt search 1","alt search 2"]}`;
545
+
546
+ const parsed = await callModelJSON(prompt, 'sonnet', { timeout: 20000, maxTokens: 1000 });
547
+ if (!parsed || !parsed.title) return { compressed: false };
548
+
549
+ const title = truncate(parsed.title, 120);
550
+ const narrative = truncate(parsed.narrative || '', 800);
551
+ const concepts = Array.isArray(parsed.concepts) ? parsed.concepts.slice(0, 10) : [];
552
+ const facts = Array.isArray(parsed.facts) ? parsed.facts.slice(0, 10) : [];
553
+ const conceptsText = concepts.join(' ');
554
+ const factsText = facts.join(' ');
555
+ const lessonLearned = typeof parsed.lesson_learned === 'string'
556
+ && parsed.lesson_learned.toLowerCase() !== 'none'
557
+ && parsed.lesson_learned.trim().length > 0
558
+ ? parsed.lesson_learned.slice(0, 500) : null;
559
+ const searchAliases = Array.isArray(parsed.search_aliases)
560
+ ? parsed.search_aliases.slice(0, 6).join(' ') : null;
561
+
562
+ const bigramText = cjkBigrams((title || '') + ' ' + (narrative || ''));
563
+ const textField = [conceptsText, factsText, searchAliases || '', bigramText].filter(Boolean).join(' ');
564
+
565
+ const epochs = observations.map(o => o.created_at_epoch).sort((a, b) => a - b);
566
+ const medianEpoch = epochs[Math.floor(epochs.length / 2)];
567
+
568
+ const summaryId = db.transaction(() => {
569
+ const sessionId = `compress-${project}`;
570
+ const now = new Date();
571
+ db.prepare(`INSERT OR IGNORE INTO sdk_sessions
572
+ (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
573
+ VALUES (?,?,?,?,?,'active')`
574
+ ).run(sessionId, sessionId, project, now.toISOString(), now.getTime());
575
+
576
+ const result = db.prepare(`INSERT INTO observations
577
+ (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts,
578
+ files_read, files_modified, importance, lesson_learned, search_aliases, optimized_at,
579
+ created_at, created_at_epoch)
580
+ VALUES (?,?,?,?,?,'',?,?,?,'[]','[]',2,?,?,?,?,?)`
581
+ ).run(sessionId, project, textField, 'discovery', title, narrative,
582
+ conceptsText, factsText, lessonLearned, searchAliases, Date.now(),
583
+ new Date(medianEpoch).toISOString(), medianEpoch);
584
+
585
+ const sId = Number(result.lastInsertRowid);
586
+
587
+ const obsIds = observations.map(o => o.id);
588
+ const ph = obsIds.map(() => '?').join(',');
589
+ db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${ph})`)
590
+ .run(sId, ...obsIds);
591
+
592
+ return sId;
593
+ })();
594
+
595
+ rebuildVector(db, summaryId, [title, narrative, conceptsText]);
596
+
597
+ debugLog('DEBUG', 'llm-optimize', `smart-compressed ${observations.length} observations into #${summaryId}`);
598
+ return { compressed: true, summaryId, count: observations.length };
599
+ } catch (e) {
600
+ debugCatch(e, 'smart-compress');
601
+ return { compressed: false };
602
+ } finally {
603
+ releaseLLMSlot();
604
+ }
605
+ }
606
+
607
+ export async function executeSmartCompress(db, maxClusters = 5) {
608
+ const candidates = findSmartCompressCandidates(db);
609
+ if (candidates.length < 3) return { processed: 0, compressed: 0 };
610
+
611
+ const clusters = clusterForCompression(candidates, db);
612
+ if (clusters.length === 0) return { processed: 0, compressed: 0 };
613
+
614
+ let compressed = 0;
615
+ const toProcess = clusters.slice(0, maxClusters);
616
+ for (const cluster of toProcess) {
617
+ const result = await executeSmartCompressCluster(db, cluster.observations, cluster.project);
618
+ if (result.compressed) compressed++;
619
+ }
620
+
621
+ return { processed: toProcess.length, compressed };
622
+ }
623
+
624
+ // ─── Pipeline Orchestrator ──────────────────────────────────────────────────
625
+
626
+ export function optimizePreview(db) {
627
+ const reenrich = findReenrichCandidates(db, 1000).length;
628
+ // R-7: also report the widened-scope candidate count so users can see how many
629
+ // bugfix/refactor/feature/decision observations are eligible for lesson backfill.
630
+ const reenrichWide = findReenrichCandidates(db, 5000, { scope: 'wide' }).length;
631
+
632
+ const concepts = extractUniqueConcepts(db);
633
+ const normalizeReady = shouldRunNormalize() && concepts.length >= 5;
634
+
635
+ const mergeClusters = findMergeCandidates(db, 50);
636
+ const clusterMerge = mergeClusters.length;
637
+
638
+ const compressCandidates = findSmartCompressCandidates(db);
639
+ const compressClusters = clusterForCompression(compressCandidates, db);
640
+ const smartCompress = compressClusters.length;
641
+
642
+ return {
643
+ reenrich,
644
+ reenrichWide,
645
+ normalize: normalizeReady ? concepts.length : 0,
646
+ normalizeGateOpen: shouldRunNormalize(),
647
+ clusterMerge,
648
+ smartCompress,
649
+ total: reenrich + (normalizeReady ? 1 : 0) + clusterMerge + smartCompress,
650
+ };
651
+ }
652
+
653
+ /**
654
+ * Run optimization tasks against the memory DB.
655
+ *
656
+ * @param {object} db better-sqlite3 handle
657
+ * @param {object} [opts]
658
+ * @param {string[]} [opts.tasks] Subset of tasks to run (default: all). When a single
659
+ * task is selected, it receives the FULL maxItems budget instead of the proportional
660
+ * slice from distributeBudget() — otherwise explicit `--max N --task re-enrich`
661
+ * would silently waste 60% of the requested budget.
662
+ * @param {number} [opts.maxItems=15] Total item budget across all selected tasks.
663
+ * @param {boolean} [opts.force=false] Bypass time-based gates (e.g. normalize interval).
664
+ * @param {'narrow'|'wide'} [opts.reenrichScope='narrow'] Scope for the re-enrich task.
665
+ * 'wide' targets bugfix/refactor/feature/decision with narrative but no lesson (R-7).
666
+ */
667
+ export async function optimizeRun(db, { tasks, maxItems = 15, force = false, reenrichScope = 'narrow' } = {}) {
668
+ const allTasks = ['re-enrich', 'normalize', 'cluster-merge', 'smart-compress'];
669
+ const selectedTasks = tasks && tasks.length > 0 ? tasks : allTasks;
670
+ // Single-task mode: give that task the full budget. Distribution only makes sense
671
+ // when multiple tasks compete for the same pool.
672
+ const budget = selectedTasks.length === 1
673
+ ? { reenrich: maxItems, normalize: maxItems, clusterMerge: maxItems, smartCompress: maxItems }
674
+ : distributeBudget(maxItems);
675
+ const results = {};
676
+
677
+ for (const task of selectedTasks) {
678
+ try {
679
+ switch (task) {
680
+ case 're-enrich':
681
+ results.reenrich = await executeReenrich(db, budget.reenrich, { scope: reenrichScope });
682
+ break;
683
+ case 'normalize':
684
+ results.normalize = await executeNormalize(db, force);
685
+ break;
686
+ case 'cluster-merge':
687
+ results.clusterMerge = await executeClusterMerge(db, budget.clusterMerge);
688
+ break;
689
+ case 'smart-compress':
690
+ results.smartCompress = await executeSmartCompress(db, budget.smartCompress);
691
+ break;
692
+ }
693
+ } catch (e) {
694
+ debugCatch(e, `optimize:${task}`);
695
+ results[task] = { error: e.message };
696
+ }
697
+ }
698
+
699
+ return results;
700
+ }
701
+
702
+ export async function handleLLMOptimize() {
703
+ const { ensureDb } = await import('./schema.mjs');
704
+ let db;
705
+ try {
706
+ db = ensureDb();
707
+ } catch {
708
+ return;
709
+ }
710
+
711
+ try {
712
+ const results = await optimizeRun(db);
713
+ const parts = [];
714
+ if (results.reenrich?.processed) parts.push(`re-enriched: ${results.reenrich.processed}`);
715
+ if (results.normalize?.processed) parts.push(`normalized: ${results.normalize.processed}`);
716
+ if (results.clusterMerge?.merged) parts.push(`merged: ${results.clusterMerge.merged}`);
717
+ if (results.smartCompress?.compressed) parts.push(`compressed: ${results.smartCompress.compressed}`);
718
+ if (parts.length > 0) debugLog('DEBUG', 'llm-optimize', parts.join(', '));
719
+ } catch (e) {
720
+ debugCatch(e, 'llm-optimize');
721
+ } finally {
722
+ db.close();
723
+ }
724
+ }