claude-mem-lite 2.45.0 → 2.47.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/hook-handoff.mjs +10 -3
- package/hook-llm.mjs +27 -3
- package/hook.mjs +73 -17
- package/lib/low-signal-patterns.mjs +34 -0
- package/lib/summary-extractor.mjs +112 -0
- package/package.json +2 -1
- package/schema.mjs +17 -1
- package/scoring-sql.mjs +13 -8
- package/source-files.mjs +1 -0
- package/tfidf.mjs +7 -0
package/hook-handoff.mjs
CHANGED
|
@@ -298,7 +298,7 @@ export function detectContinuationIntent(db, promptText, project, currentCcSessi
|
|
|
298
298
|
* @param {string|null} [currentCcSessionId=null] Claude Code session id for scoping
|
|
299
299
|
* @returns {string|null} Injection text or null if no handoff
|
|
300
300
|
*/
|
|
301
|
-
export function
|
|
301
|
+
export function pickHandoffToInject(db, project, currentCcSessionId = null) {
|
|
302
302
|
const now = Date.now();
|
|
303
303
|
// Fetch recent handoffs and find the most recent non-expired one.
|
|
304
304
|
// A newer but expired 'clear' handoff must not shadow a still-valid 'exit' handoff.
|
|
@@ -313,13 +313,20 @@ export function renderHandoffInjection(db, project, currentCcSessionId = null) {
|
|
|
313
313
|
SELECT * FROM session_handoffs
|
|
314
314
|
WHERE project = ? ORDER BY created_at_epoch DESC LIMIT 5
|
|
315
315
|
`).all(project);
|
|
316
|
-
|
|
316
|
+
return handoffs.find(h => {
|
|
317
317
|
const age = now - h.created_at_epoch;
|
|
318
318
|
const maxAge = h.type === 'clear' ? HANDOFF_EXPIRY_CLEAR : HANDOFF_EXPIRY_EXIT;
|
|
319
319
|
return age <= maxAge;
|
|
320
|
-
});
|
|
320
|
+
}) || null;
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
export function renderHandoffInjection(db, project, currentCcSessionId = null) {
|
|
324
|
+
const handoff = pickHandoffToInject(db, project, currentCcSessionId);
|
|
321
325
|
if (!handoff) return null;
|
|
326
|
+
return renderHandoffFromRow(handoff, db, project);
|
|
327
|
+
}
|
|
322
328
|
|
|
329
|
+
function renderHandoffFromRow(handoff, db, project) {
|
|
323
330
|
const ageSec = Math.round((Date.now() - handoff.created_at_epoch) / 1000);
|
|
324
331
|
const ageStr = ageSec < 60 ? `${ageSec}s` :
|
|
325
332
|
ageSec < 3600 ? `${Math.round(ageSec / 60)}m` :
|
package/hook-llm.mjs
CHANGED
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
sessionFile, getSessionId, openDb, callLLM, sleep,
|
|
17
17
|
} from './hook-shared.mjs';
|
|
18
18
|
import { EVENT_TYPES, saveEvent } from './lib/activity.mjs';
|
|
19
|
-
import { isNoiseObservation } from './lib/low-signal-patterns.mjs';
|
|
19
|
+
import { isNoiseObservation, capNoiseImportance } from './lib/low-signal-patterns.mjs';
|
|
20
20
|
|
|
21
21
|
// T9: memdir-incompatible types live in the `events` table, not `observations`.
|
|
22
22
|
// Set lookup is O(1) — authoritative source is lib/activity.mjs::EVENT_TYPES.
|
|
@@ -78,6 +78,16 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
|
|
|
78
78
|
return null;
|
|
79
79
|
}
|
|
80
80
|
|
|
81
|
+
// v2.47 P0-3: importance cap for LOW_SIGNAL titles that kept the drop gate
|
|
82
|
+
// open via importance>=2 but carry no lesson/facts signal. 341 rows in live
|
|
83
|
+
// DB had imp=3 under these conditions (99.4% noise). Cap to 1 so they
|
|
84
|
+
// enter the 7-day accelerated auto-compress window in hook.mjs.
|
|
85
|
+
const capped = capNoiseImportance(obs);
|
|
86
|
+
if (capped !== (obs.importance ?? 1)) {
|
|
87
|
+
debugLog('saveObservation', `capped imp ${obs.importance}→${capped}: ${truncate(obs.title || '', 60)}`);
|
|
88
|
+
obs.importance = capped;
|
|
89
|
+
}
|
|
90
|
+
|
|
81
91
|
// Three-tier dedup — returns null (not throw) for dedup hits
|
|
82
92
|
// Tier 1 (fast): 5-min Jaccard on titles
|
|
83
93
|
const fiveMinAgo = now.getTime() - DEDUP_WINDOW_MS;
|
|
@@ -853,10 +863,24 @@ key_decisions: Only decisions with lasting impact (library choices, architecture
|
|
|
853
863
|
`).get(sessionId);
|
|
854
864
|
|
|
855
865
|
if (existingFast) {
|
|
866
|
+
// Preserve structural-extractor content (completed / remaining_items written
|
|
867
|
+
// by handleStop fast-baseline from CLAUDE.md §10 markers) when Haiku returns
|
|
868
|
+
// empty for that field. Without COALESCE, a degraded Haiku pass would erase
|
|
869
|
+
// the deterministic floor — the exact regression that made 72% of prod
|
|
870
|
+
// session_summaries ship with empty remaining_items.
|
|
856
871
|
db.prepare(`
|
|
857
872
|
UPDATE session_summaries
|
|
858
|
-
SET request
|
|
859
|
-
|
|
873
|
+
SET request = COALESCE(NULLIF(?, ''), request),
|
|
874
|
+
investigated = COALESCE(NULLIF(?, ''), investigated),
|
|
875
|
+
learned = COALESCE(NULLIF(?, ''), learned),
|
|
876
|
+
completed = COALESCE(NULLIF(?, ''), completed),
|
|
877
|
+
next_steps = COALESCE(NULLIF(?, ''), next_steps),
|
|
878
|
+
remaining_items = COALESCE(NULLIF(?, ''), remaining_items),
|
|
879
|
+
lessons = COALESCE(?, lessons),
|
|
880
|
+
key_decisions = COALESCE(?, key_decisions),
|
|
881
|
+
notes = 'llm',
|
|
882
|
+
created_at = ?,
|
|
883
|
+
created_at_epoch = ?
|
|
860
884
|
WHERE id = ?
|
|
861
885
|
`).run(
|
|
862
886
|
llmParsed.request || '', llmParsed.investigated || '', llmParsed.learned || '',
|
package/hook.mjs
CHANGED
|
@@ -43,8 +43,9 @@ import {
|
|
|
43
43
|
} from './hook-shared.mjs';
|
|
44
44
|
import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation } from './hook-llm.mjs';
|
|
45
45
|
import { extractCitationsFromTranscript, bumpCitationAccess } from './lib/citation-tracker.mjs';
|
|
46
|
+
import { extractTailAssistantText, extractStructuredSummary } from './lib/summary-extractor.mjs';
|
|
46
47
|
import { searchRelevantMemories } from './hook-memory.mjs';
|
|
47
|
-
import { buildAndSaveHandoff, detectContinuationIntent, renderHandoffInjection, extractUnfinishedSummary } from './hook-handoff.mjs';
|
|
48
|
+
import { buildAndSaveHandoff, detectContinuationIntent, renderHandoffInjection, pickHandoffToInject, extractUnfinishedSummary } from './hook-handoff.mjs';
|
|
48
49
|
import { checkForUpdate } from './hook-update.mjs';
|
|
49
50
|
import { handleLLMOptimize } from './hook-optimize.mjs';
|
|
50
51
|
import { silentAutoAdopt, hasAutoAdoptMarker } from './adopt-cli.mjs';
|
|
@@ -442,14 +443,46 @@ async function handleStop() {
|
|
|
442
443
|
ORDER BY created_at_epoch DESC LIMIT 5
|
|
443
444
|
`).all(sessionId);
|
|
444
445
|
const fastRequest = truncate(firstPrompt?.prompt_text || '', 200);
|
|
445
|
-
const
|
|
446
|
-
|
|
446
|
+
const obsCompleted = recentObs.map(o => o.title).filter(Boolean).join('; ');
|
|
447
|
+
|
|
448
|
+
// Structural extraction from the assistant's tail message.
|
|
449
|
+
// CLAUDE.md §10 mandates Done/Not done/Failed/Uncertain markers, so the
|
|
450
|
+
// tail is deterministically parseable without Haiku. Prior baseline left
|
|
451
|
+
// remaining_items=='' for every session whose Haiku pass failed (≈66%
|
|
452
|
+
// in prod data), losing the user-visible "Not done" list.
|
|
453
|
+
let structuredCompleted = '';
|
|
454
|
+
let structuredNotDone = '';
|
|
455
|
+
let structuredNotes = '';
|
|
456
|
+
try {
|
|
457
|
+
const tail = transcriptPath ? extractTailAssistantText(transcriptPath) : null;
|
|
458
|
+
if (tail) {
|
|
459
|
+
const s = extractStructuredSummary(tail);
|
|
460
|
+
structuredCompleted = s.done;
|
|
461
|
+
structuredNotDone = s.notDone;
|
|
462
|
+
const notesParts = [];
|
|
463
|
+
if (s.failed) notesParts.push(`Failed: ${s.failed}`);
|
|
464
|
+
if (s.uncertain) notesParts.push(`Uncertain: ${s.uncertain}`);
|
|
465
|
+
structuredNotes = notesParts.join('\n');
|
|
466
|
+
}
|
|
467
|
+
} catch (e) { debugCatch(e, 'handleStop-structured-extract'); }
|
|
468
|
+
|
|
469
|
+
const finalCompleted = structuredCompleted || obsCompleted;
|
|
470
|
+
const finalRemaining = structuredNotDone;
|
|
471
|
+
const finalNotes = structuredNotes || 'fast';
|
|
472
|
+
|
|
473
|
+
if (fastRequest || finalCompleted || finalRemaining) {
|
|
447
474
|
const now = new Date();
|
|
448
475
|
db.prepare(`
|
|
449
476
|
INSERT INTO session_summaries
|
|
450
477
|
(memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, created_at, created_at_epoch)
|
|
451
|
-
VALUES (?, ?, ?, '', '', ?, '',
|
|
452
|
-
`).run(
|
|
478
|
+
VALUES (?, ?, ?, '', '', ?, '', ?, '[]', '[]', ?, ?, ?)
|
|
479
|
+
`).run(
|
|
480
|
+
sessionId, project, fastRequest,
|
|
481
|
+
truncate(finalCompleted, 600),
|
|
482
|
+
truncate(finalRemaining, 600),
|
|
483
|
+
truncate(finalNotes, 400),
|
|
484
|
+
now.toISOString(), now.getTime()
|
|
485
|
+
);
|
|
453
486
|
}
|
|
454
487
|
}
|
|
455
488
|
} catch (e) { debugCatch(e, 'handleStop-fast-summary'); }
|
|
@@ -615,6 +648,29 @@ async function handleSessionStart() {
|
|
|
615
648
|
if (compressed.changes > 0) {
|
|
616
649
|
debugLog('DEBUG', 'session-start', `auto-compressed ${compressed.changes} old observations`);
|
|
617
650
|
}
|
|
651
|
+
|
|
652
|
+
// v2.47 P0-3: accelerated compress for LOW_SIGNAL + no-signal noise.
|
|
653
|
+
// 7-day window instead of 30. The write-side capNoiseImportance forces
|
|
654
|
+
// imp=1 on these already; this just shrinks the GC latency so the
|
|
655
|
+
// projected 32.5% corpus reduction materializes within a week on live
|
|
656
|
+
// DBs instead of bleeding into the 30-day tier.
|
|
657
|
+
const noiseCompressAge = Date.now() - 7 * 86400000;
|
|
658
|
+
const noiseCompressed = db.prepare(`
|
|
659
|
+
UPDATE observations SET compressed_into = ${COMPRESSED_AUTO}
|
|
660
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
661
|
+
AND importance = 1
|
|
662
|
+
AND (lesson_learned IS NULL OR lesson_learned = '' OR lesson_learned = 'none')
|
|
663
|
+
AND (facts IS NULL OR facts = '' OR facts = '[]')
|
|
664
|
+
AND (
|
|
665
|
+
title LIKE 'Modified %' OR title LIKE 'Worked on %'
|
|
666
|
+
OR title LIKE 'Reviewed %' OR title LIKE 'Error%'
|
|
667
|
+
)
|
|
668
|
+
AND created_at_epoch < ?
|
|
669
|
+
AND project = ?
|
|
670
|
+
`).run(noiseCompressAge, project);
|
|
671
|
+
if (noiseCompressed.changes > 0) {
|
|
672
|
+
debugLog('DEBUG', 'session-start', `auto-compressed ${noiseCompressed.changes} LOW_SIGNAL noise (7d window)`);
|
|
673
|
+
}
|
|
618
674
|
})();
|
|
619
675
|
|
|
620
676
|
// Auto-maintain: cleanup + decay + boost + purge, gated to once per 24h
|
|
@@ -963,19 +1019,19 @@ async function handleUserPrompt() {
|
|
|
963
1019
|
if (promptNumber <= 3) {
|
|
964
1020
|
try {
|
|
965
1021
|
if (detectContinuationIntent(db, promptText, project, ccSessionId)) {
|
|
966
|
-
const
|
|
967
|
-
if (
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
//
|
|
971
|
-
//
|
|
972
|
-
//
|
|
1022
|
+
const picked = pickHandoffToInject(db, project, ccSessionId);
|
|
1023
|
+
if (picked) {
|
|
1024
|
+
const injection = renderHandoffInjection(db, project, ccSessionId);
|
|
1025
|
+
if (injection) process.stdout.write(injection + '\n');
|
|
1026
|
+
// Consume ONLY the row we just injected — leave other projects' exit
|
|
1027
|
+
// handoffs intact so future sessions can still resume from them.
|
|
1028
|
+
// Pre-v2.46 wiped every exit handoff for the project on any continuation
|
|
1029
|
+
// intent, which made the DB effectively forgetful: 115 completed sessions
|
|
1030
|
+
// produced 1 persisted handoff.
|
|
973
1031
|
try {
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
db.prepare("DELETE FROM session_handoffs WHERE project = ? AND type IN ('clear','exit')").run(project);
|
|
978
|
-
}
|
|
1032
|
+
db.prepare(
|
|
1033
|
+
'DELETE FROM session_handoffs WHERE project = ? AND type = ? AND session_id = ?'
|
|
1034
|
+
).run(project, picked.type, picked.session_id);
|
|
979
1035
|
} catch {}
|
|
980
1036
|
}
|
|
981
1037
|
}
|
|
@@ -113,6 +113,40 @@ function _isLikelyToolOutputPassthrough(narrative) {
|
|
|
113
113
|
* @param {object} [env=process.env] Environment (injected for testability)
|
|
114
114
|
* @returns {boolean} true = noise, caller should drop
|
|
115
115
|
*/
|
|
116
|
+
/**
|
|
117
|
+
* v2.47 P0-3: Importance cap for LOW_SIGNAL titles that slipped through with
|
|
118
|
+
* inflated importance. Complements isNoiseObservation — that one drops rows
|
|
119
|
+
* entirely when narrative is also thin; this one keeps the row (useful for
|
|
120
|
+
* session history) but demotes the importance so injection ranking and
|
|
121
|
+
* auto-compress treat it as the noise it is.
|
|
122
|
+
*
|
|
123
|
+
* Production baseline (2026-04-24, projects--mem DB, 3789 obs):
|
|
124
|
+
* LOW_SIGNAL title + importance=3 → 341 rows; only 1 had lesson, 1 had facts
|
|
125
|
+
* LOW_SIGNAL title + importance=2 → 80 rows; only 5 had lesson, 6 had facts
|
|
126
|
+
* 99%+ of those were Haiku-inflated noise. Cap forces imp=1 and the 7-day
|
|
127
|
+
* accelerated auto-compress in hook.mjs GCs them.
|
|
128
|
+
*
|
|
129
|
+
* Preserves importance when ANY real signal exists:
|
|
130
|
+
* - lesson_learned (or camelCase lessonLearned) set and not 'none'
|
|
131
|
+
* - facts array has >=1 non-empty string
|
|
132
|
+
* Non-LOW_SIGNAL titles are never capped (substantive prose is trusted).
|
|
133
|
+
*
|
|
134
|
+
* @param {object} obs { title, facts, importance, lesson_learned|lessonLearned }
|
|
135
|
+
* @returns {number} Capped importance (1 if LOW_SIGNAL+no-signal, else original)
|
|
136
|
+
*/
|
|
137
|
+
export function capNoiseImportance(obs) {
|
|
138
|
+
const original = obs?.importance ?? 1;
|
|
139
|
+
const title = (obs && obs.title) || '';
|
|
140
|
+
if (!_LOW_SIG_RE.test(title)) return original;
|
|
141
|
+
const lesson = obs.lessonLearned ?? obs.lesson_learned;
|
|
142
|
+
if (lesson && String(lesson).trim() && String(lesson).trim().toLowerCase() !== 'none') return original;
|
|
143
|
+
if (Array.isArray(obs.facts) &&
|
|
144
|
+
obs.facts.filter(f => typeof f === 'string' && f.trim().length > 0).length >= 1) {
|
|
145
|
+
return original;
|
|
146
|
+
}
|
|
147
|
+
return original > 1 ? 1 : original;
|
|
148
|
+
}
|
|
149
|
+
|
|
116
150
|
export function isNoiseObservation(obs, env = process.env) {
|
|
117
151
|
if (env && env.CLAUDE_MEM_KEEP_LOW_SIGNAL === '1') return false;
|
|
118
152
|
const title = (obs && obs.title) || '';
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
// Structured summary extractor: reads the tail assistant message from a
|
|
2
|
+
// Claude Code transcript and pulls out Done / Not done / Failed / Uncertain
|
|
3
|
+
// sections using deterministic markers. This is the non-Haiku path — the
|
|
4
|
+
// markers are enforced by CLAUDE.md §10's four-section order rule, so they
|
|
5
|
+
// appear in ~every end-of-task message.
|
|
6
|
+
//
|
|
7
|
+
// Haiku summarization remains the richer best-effort enrichment, but it
|
|
8
|
+
// silently fails ~66% of Stop events in practice, leaving session_summaries
|
|
9
|
+
// with empty remaining_items. This extractor runs synchronously in
|
|
10
|
+
// handleStop and gives a deterministic floor.
|
|
11
|
+
|
|
12
|
+
import { readFileSync, existsSync } from 'fs';
|
|
13
|
+
|
|
14
|
+
const EN_HEADER = /^[\s●*>-]*(Done|Not\s+done|Failed|Uncertain)\s*[::]\s*/im;
|
|
15
|
+
const ZH_HEADER = /^[\s●*>-]*(剩下的?|剩余|还剩|未完成|下次(?:要做|做|继续)?|待做|未做)\s*[::]?\s*/m;
|
|
16
|
+
|
|
17
|
+
// Recognised section keys, normalised.
|
|
18
|
+
const EN_KEY = { done: 'done', 'not done': 'notDone', failed: 'failed', uncertain: 'uncertain' };
|
|
19
|
+
const ZH_KEY_IS_NOTDONE = /剩下|剩余|还剩|未完成|下次|待做|未做/;
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* Read the LAST assistant text block from a Claude Code transcript .jsonl.
|
|
23
|
+
* Returns concatenated text of all text blocks in the last `type='assistant'`
|
|
24
|
+
* entry, or null if the file is missing/empty/malformed.
|
|
25
|
+
*
|
|
26
|
+
* @param {string} transcriptPath
|
|
27
|
+
* @returns {string|null}
|
|
28
|
+
*/
|
|
29
|
+
export function extractTailAssistantText(transcriptPath) {
|
|
30
|
+
if (!transcriptPath || !existsSync(transcriptPath)) return null;
|
|
31
|
+
let raw;
|
|
32
|
+
try { raw = readFileSync(transcriptPath, 'utf8'); } catch { return null; }
|
|
33
|
+
let last = null;
|
|
34
|
+
for (const line of raw.split('\n')) {
|
|
35
|
+
if (!line.trim()) continue;
|
|
36
|
+
let entry;
|
|
37
|
+
try { entry = JSON.parse(line); } catch { continue; }
|
|
38
|
+
if (entry.type !== 'assistant' || !entry.message) continue;
|
|
39
|
+
const content = entry.message.content;
|
|
40
|
+
if (!Array.isArray(content)) continue;
|
|
41
|
+
const texts = content
|
|
42
|
+
.filter(b => b && b.type === 'text' && typeof b.text === 'string')
|
|
43
|
+
.map(b => b.text);
|
|
44
|
+
if (texts.length === 0) continue;
|
|
45
|
+
last = texts.join('\n');
|
|
46
|
+
}
|
|
47
|
+
return last;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Extract Done / Not done / Failed / Uncertain sections from a message body.
|
|
52
|
+
* Returns an object with four string fields (empty when the section is absent).
|
|
53
|
+
*
|
|
54
|
+
* Strategy: scan line by line, recognise section headers in EN and 中文,
|
|
55
|
+
* attribute subsequent content to that section until the next header or a
|
|
56
|
+
* hard boundary (blank line followed by a non-bullet line).
|
|
57
|
+
*
|
|
58
|
+
* @param {string} text
|
|
59
|
+
* @returns {{done: string, notDone: string, failed: string, uncertain: string}}
|
|
60
|
+
*/
|
|
61
|
+
export function extractStructuredSummary(text) {
|
|
62
|
+
const out = { done: '', notDone: '', failed: '', uncertain: '' };
|
|
63
|
+
if (!text || typeof text !== 'string') return out;
|
|
64
|
+
|
|
65
|
+
const lines = text.split('\n');
|
|
66
|
+
let current = null;
|
|
67
|
+
const buffers = { done: [], notDone: [], failed: [], uncertain: [] };
|
|
68
|
+
|
|
69
|
+
for (let i = 0; i < lines.length; i++) {
|
|
70
|
+
const line = lines[i];
|
|
71
|
+
const trimmed = line.trim();
|
|
72
|
+
|
|
73
|
+
// Header detection — EN first (unambiguous), then 中文.
|
|
74
|
+
const enMatch = line.match(EN_HEADER);
|
|
75
|
+
if (enMatch) {
|
|
76
|
+
const key = EN_KEY[enMatch[1].toLowerCase().replace(/\s+/g, ' ')];
|
|
77
|
+
if (key) {
|
|
78
|
+
current = key;
|
|
79
|
+
const tail = line.slice(enMatch[0].length).trim();
|
|
80
|
+
if (tail) buffers[current].push(tail);
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
const zhMatch = line.match(ZH_HEADER);
|
|
85
|
+
if (zhMatch && ZH_KEY_IS_NOTDONE.test(zhMatch[1])) {
|
|
86
|
+
current = 'notDone';
|
|
87
|
+
const tail = line.slice(zhMatch[0].length).trim();
|
|
88
|
+
if (tail) buffers.notDone.push(tail);
|
|
89
|
+
continue;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
if (!current) continue;
|
|
93
|
+
|
|
94
|
+
// Paragraph-break termination: blank line followed by a non-bullet,
|
|
95
|
+
// non-indented line starts a fresh paragraph unrelated to the section.
|
|
96
|
+
if (!trimmed) {
|
|
97
|
+
const next = (lines[i + 1] || '').trim();
|
|
98
|
+
const nextIsBullet = /^[-*•●\d]+[.)]?\s+/.test(next);
|
|
99
|
+
if (!nextIsBullet && next) {
|
|
100
|
+
current = null;
|
|
101
|
+
}
|
|
102
|
+
continue;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
buffers[current].push(trimmed);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
for (const k of Object.keys(buffers)) {
|
|
109
|
+
out[k] = buffers[k].join('\n').trim();
|
|
110
|
+
}
|
|
111
|
+
return out;
|
|
112
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.47.0",
|
|
4
4
|
"description": "Lightweight persistent memory system for Claude Code",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -51,6 +51,7 @@
|
|
|
51
51
|
"lib/stats-quality.mjs",
|
|
52
52
|
"lib/low-signal-patterns.mjs",
|
|
53
53
|
"lib/citation-tracker.mjs",
|
|
54
|
+
"lib/summary-extractor.mjs",
|
|
54
55
|
"lib/id-routing.mjs",
|
|
55
56
|
"lib/err-sampler.mjs",
|
|
56
57
|
"lib/metrics.mjs",
|
package/schema.mjs
CHANGED
|
@@ -21,7 +21,12 @@ export const REGISTRY_DB_PATH = join(DB_DIR, 'resource-registry.db');
|
|
|
21
21
|
// FTS delete+reinsert cycles and amplified SQLITE_CORRUPT_VTAB blast radius
|
|
22
22
|
// (project_non_obvious.md). Migration drops the old triggers once and lets
|
|
23
23
|
// ensureFTS recreate them with the scoped form.
|
|
24
|
-
|
|
24
|
+
//
|
|
25
|
+
// v28 (v2.47): observation_vectors orphan + stale-vocab cleanup. Live DBs had
|
|
26
|
+
// 2839/6429 (44%) orphaned rows (historic deletes during FK-OFF migrations)
|
|
27
|
+
// and 3282/6429 (51%) stale-vocab rows (rebuildVocabulary never pruned old
|
|
28
|
+
// versions before v2.47). Idempotent one-shot DELETE on ensureDb.
|
|
29
|
+
export const CURRENT_SCHEMA_VERSION = 28;
|
|
25
30
|
|
|
26
31
|
const CORE_SCHEMA = `
|
|
27
32
|
CREATE TABLE IF NOT EXISTS sdk_sessions (
|
|
@@ -402,6 +407,17 @@ export function initSchema(db) {
|
|
|
402
407
|
|
|
403
408
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_obs_vectors_version ON observation_vectors(vocab_version)`);
|
|
404
409
|
|
|
410
|
+
// v28 (v2.47) P0-1: one-shot cleanup of orphaned observation_vectors.
|
|
411
|
+
// Live DBs accumulated 44% orphans even with ON DELETE CASCADE because
|
|
412
|
+
// early migrations ran with `foreign_keys=OFF` and deletes skipped cascade.
|
|
413
|
+
// Idempotent (NOT IN is empty on a clean DB), runs once per ensureDb().
|
|
414
|
+
try {
|
|
415
|
+
db.prepare(`
|
|
416
|
+
DELETE FROM observation_vectors
|
|
417
|
+
WHERE observation_id NOT IN (SELECT id FROM observations)
|
|
418
|
+
`).run();
|
|
419
|
+
} catch { /* non-critical — table-missing path handled by earlier CREATE */ }
|
|
420
|
+
|
|
405
421
|
// Persisted vocabulary for stable TF-IDF vector indexing
|
|
406
422
|
db.exec(`
|
|
407
423
|
CREATE TABLE IF NOT EXISTS vocab_state (
|
package/scoring-sql.mjs
CHANGED
|
@@ -70,12 +70,17 @@ export const TYPE_QUALITY_CASE = `(
|
|
|
70
70
|
* - injection_count: bumped ONLY on UserPromptSubmit / hook-memory auto-inject
|
|
71
71
|
* - access_count: bumped on citation (c039352 P4), explicit recall, get, timeline
|
|
72
72
|
*
|
|
73
|
-
* Empirical thresholds (
|
|
74
|
-
*
|
|
75
|
-
* •
|
|
76
|
-
* •
|
|
77
|
-
*
|
|
78
|
-
*
|
|
73
|
+
* Empirical thresholds (v2.47 recalibration — 2026-04-24 live projects--mem,
|
|
74
|
+
* 3789 obs, baseline 10/20 never fired because max injection_count=9):
|
|
75
|
+
* • Legitimate heavy use (#5588 9/10=0.9, #7549 7/13=0.54): ratio≤3 ⇒ 1.0×
|
|
76
|
+
* • Early noise candidate (#3518 6/1=6.0): inj≥4 AND ratio>3 ⇒ 0.5× (tier-1)
|
|
77
|
+
* • Entrenched noise (inj≥8 AND ratio>5): 0.2× (tier-2)
|
|
78
|
+
*
|
|
79
|
+
* Old thresholds (v26→v2.46, inj≥10/≥20) were chosen as theoretical upper bounds
|
|
80
|
+
* before injection_count accumulated 2 months of data — live distribution shows
|
|
81
|
+
* 100% of rows stayed under 10 inject events. The recalibrated gates bite the
|
|
82
|
+
* moderate-noise tier (first real data band) while still sparing ratio-clean
|
|
83
|
+
* heavy-use rows (ratio gate is the primary precision signal).
|
|
79
84
|
*
|
|
80
85
|
* Applied as: BM25 × time_decay × TYPE_QUALITY × (0.5 + 0.5·importance) × NOISE_PENALTY
|
|
81
86
|
* Note: multiplicative so ORDER BY relevance ASC (negative scores) still works —
|
|
@@ -88,10 +93,10 @@ export function noisePenaltyClause(alias = 'o') {
|
|
|
88
93
|
const a = alias ? `${alias}.` : '';
|
|
89
94
|
return `(
|
|
90
95
|
CASE
|
|
91
|
-
WHEN COALESCE(${a}injection_count, 0) >=
|
|
96
|
+
WHEN COALESCE(${a}injection_count, 0) >= 8
|
|
92
97
|
AND COALESCE(${a}injection_count, 0) > COALESCE(${a}access_count, 0) * 5
|
|
93
98
|
THEN 0.2
|
|
94
|
-
WHEN COALESCE(${a}injection_count, 0) >=
|
|
99
|
+
WHEN COALESCE(${a}injection_count, 0) >= 4
|
|
95
100
|
AND COALESCE(${a}injection_count, 0) > COALESCE(${a}access_count, 0) * 3
|
|
96
101
|
THEN 0.5
|
|
97
102
|
ELSE 1.0
|
package/source-files.mjs
CHANGED
package/tfidf.mjs
CHANGED
|
@@ -252,6 +252,13 @@ export function rebuildVocabulary(db) {
|
|
|
252
252
|
for (const [term, entry] of vocab.terms) {
|
|
253
253
|
insertStmt.run(term, entry.index, entry.idf, vocab.version, now);
|
|
254
254
|
}
|
|
255
|
+
// v2.47 P0-1: drop observation_vectors from earlier vocab versions.
|
|
256
|
+
// Without this, rebuildVocabulary compounded the stale set on every call
|
|
257
|
+
// (live DB measured 3282/6429 = 51% stale). vectorSearch filters by
|
|
258
|
+
// vocab_version at query time, so stale rows were dead storage.
|
|
259
|
+
try {
|
|
260
|
+
db.prepare('DELETE FROM observation_vectors WHERE vocab_version != ?').run(vocab.version);
|
|
261
|
+
} catch { /* table missing on legacy DBs — non-critical */ }
|
|
255
262
|
})();
|
|
256
263
|
|
|
257
264
|
_vocabCache = vocab;
|