claude-mem-lite 2.70.0 → 2.71.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/hook-handoff.mjs +25 -7
- package/hook-llm.mjs +95 -19
- package/hook-optimize.mjs +45 -7
- package/hook-precompact.mjs +44 -0
- package/hook.mjs +71 -19
- package/hooks/hooks.json +12 -0
- package/lib/import-jsonl.mjs +225 -0
- package/lib/scrub-record.mjs +63 -0
- package/mem-cli.mjs +60 -1
- package/package.json +4 -1
- package/server.mjs +5 -1
- package/source-files.mjs +10 -1
package/hook-handoff.mjs
CHANGED
|
@@ -2,7 +2,8 @@
|
|
|
2
2
|
// Extracted for testability — hook.mjs has module-level side effects
|
|
3
3
|
|
|
4
4
|
import { basename } from 'path';
|
|
5
|
-
import { truncate, extractMatchKeywords, tokenizeHandoff, isSpecificTerm, LOW_SIGNAL_TITLE, EDIT_TOOLS, isMetaTriggerPrompt, notLowSignalTitleClause } from './utils.mjs';
|
|
5
|
+
import { truncate, extractMatchKeywords, tokenizeHandoff, isSpecificTerm, scrubSecrets, LOW_SIGNAL_TITLE, EDIT_TOOLS, isMetaTriggerPrompt, notLowSignalTitleClause } from './utils.mjs';
|
|
6
|
+
import { scrubRecord } from './lib/scrub-record.mjs';
|
|
6
7
|
import {
|
|
7
8
|
HANDOFF_EXPIRY_CLEAR, HANDOFF_EXPIRY_EXIT, HANDOFF_ANCHOR_MAX_AGE,
|
|
8
9
|
HANDOFF_MATCH_THRESHOLD, CONTINUE_KEYWORDS,
|
|
@@ -161,6 +162,23 @@ export function buildAndSaveHandoff(db, sessionId, project, type, episodeSnapsho
|
|
|
161
162
|
// `scopeSessionId` (CC UUID) tags the row for parallel scoping; falls back to
|
|
162
163
|
// the mem-internal `sessionId` when the caller didn't supply one (tests + legacy).
|
|
163
164
|
const storedSessionId = scopeSessionId || sessionId;
|
|
165
|
+
// Defense-in-depth: aggregates are built from already-stored rows + raw
|
|
166
|
+
// session memory; scrub at the persistence boundary regardless of source.
|
|
167
|
+
// Order matters: scrub raw values BEFORE truncation, so a secret straddling
|
|
168
|
+
// the truncation boundary doesn't fall below scrubSecrets's regex length
|
|
169
|
+
// floors. JSON-stringified fields (key_files) are pre-scrubbed at the
|
|
170
|
+
// element level before stringify — letting scrubSecrets rewrite the JSON
|
|
171
|
+
// string would risk breaking downstream JSON.parse.
|
|
172
|
+
const safe = scrubRecord('session_handoffs', {
|
|
173
|
+
working_on: workingOn,
|
|
174
|
+
completed: completed.map(c => `[${c.type}] ${c.title}`).join('\n'),
|
|
175
|
+
unfinished,
|
|
176
|
+
key_decisions: decisions.map(d => d.title).join('\n'),
|
|
177
|
+
match_keywords: keywords,
|
|
178
|
+
});
|
|
179
|
+
const safeKeyFiles = JSON.stringify(
|
|
180
|
+
[...fileSet].slice(0, 20).map(f => scrubSecrets(String(f)))
|
|
181
|
+
);
|
|
164
182
|
db.prepare(`
|
|
165
183
|
INSERT INTO session_handoffs (project, type, session_id, working_on, completed, unfinished, key_files, key_decisions, match_keywords, created_at_epoch, git_sha_at_handoff)
|
|
166
184
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
@@ -175,12 +193,12 @@ export function buildAndSaveHandoff(db, sessionId, project, type, episodeSnapsho
|
|
|
175
193
|
git_sha_at_handoff = excluded.git_sha_at_handoff
|
|
176
194
|
`).run(
|
|
177
195
|
project, type, storedSessionId,
|
|
178
|
-
truncate(
|
|
179
|
-
completed
|
|
180
|
-
unfinished.length > 3000 ? unfinished.slice(0, 2999) + '…' : unfinished,
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
196
|
+
truncate(safe.working_on, 1000),
|
|
197
|
+
safe.completed,
|
|
198
|
+
safe.unfinished.length > 3000 ? safe.unfinished.slice(0, 2999) + '…' : safe.unfinished,
|
|
199
|
+
safeKeyFiles,
|
|
200
|
+
safe.key_decisions,
|
|
201
|
+
safe.match_keywords,
|
|
184
202
|
Date.now(),
|
|
185
203
|
gitShaAtHandoff,
|
|
186
204
|
);
|
package/hook-llm.mjs
CHANGED
|
@@ -10,6 +10,7 @@ import {
|
|
|
10
10
|
getCurrentBranch, notLowSignalTitleClause,
|
|
11
11
|
} from './utils.mjs';
|
|
12
12
|
import { acquireLLMSlot, releaseLLMSlot } from './hook-semaphore.mjs';
|
|
13
|
+
import { scrubRecord } from './lib/scrub-record.mjs';
|
|
13
14
|
import { getVocabulary, computeVector } from './tfidf.mjs';
|
|
14
15
|
import {
|
|
15
16
|
RUNTIME_DIR, DEDUP_WINDOW_MS, RELATED_OBS_WINDOW_MS,
|
|
@@ -194,6 +195,19 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
|
|
|
194
195
|
|
|
195
196
|
const { conceptsText, factsText, textField } = buildFtsTextField(obs);
|
|
196
197
|
|
|
198
|
+
// Defense-in-depth: scrub text fields before INSERT. Source is LLM output
|
|
199
|
+
// (Haiku occasionally regurgitates input verbatim — error logs, hashes).
|
|
200
|
+
const safe = scrubRecord('observations', {
|
|
201
|
+
text: textField,
|
|
202
|
+
title: obs.title || '',
|
|
203
|
+
subtitle: obs.subtitle || '',
|
|
204
|
+
narrative: obs.narrative || '',
|
|
205
|
+
concepts: conceptsText,
|
|
206
|
+
facts: factsText,
|
|
207
|
+
lesson_learned: obs.lessonLearned || null,
|
|
208
|
+
search_aliases: obs.searchAliases || null,
|
|
209
|
+
});
|
|
210
|
+
|
|
197
211
|
// Atomic: observation INSERT + observation_files + vector in one transaction
|
|
198
212
|
const savedId = db.transaction(() => {
|
|
199
213
|
const result = db.prepare(`
|
|
@@ -201,16 +215,16 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
|
|
|
201
215
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
202
216
|
`).run(
|
|
203
217
|
sessionId, project,
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
218
|
+
safe.text, obs.type, safe.title, safe.subtitle,
|
|
219
|
+
safe.narrative,
|
|
220
|
+
safe.concepts,
|
|
221
|
+
safe.facts,
|
|
208
222
|
JSON.stringify(obs.filesRead || []),
|
|
209
223
|
JSON.stringify(obs.files || []),
|
|
210
224
|
obs.importance ?? 1,
|
|
211
225
|
minhashSig,
|
|
212
|
-
|
|
213
|
-
|
|
226
|
+
safe.lesson_learned,
|
|
227
|
+
safe.search_aliases,
|
|
214
228
|
getCurrentBranch(),
|
|
215
229
|
now.toISOString(), now.getTime()
|
|
216
230
|
);
|
|
@@ -823,19 +837,32 @@ ${actionList}`;
|
|
|
823
837
|
// so the enriched FTS text field + minhash + vector are refreshed atomically.
|
|
824
838
|
const { conceptsText, factsText, textField } = buildFtsTextField(obs);
|
|
825
839
|
const minhashSig = computeMinHash((obs.title || '') + ' ' + (obs.narrative || ''));
|
|
840
|
+
// Scrub LLM-output text fields at the UPDATE boundary, mirroring the
|
|
841
|
+
// INSERT path. type is an enum, importance is numeric, files_read is a
|
|
842
|
+
// JSON array (already scrubbed upstream), minhash_sig is hash bytes.
|
|
843
|
+
const safe = scrubRecord('observations', {
|
|
844
|
+
title: truncate(obs.title, 120),
|
|
845
|
+
subtitle: obs.subtitle || '',
|
|
846
|
+
narrative: truncate(obs.narrative || '', 500),
|
|
847
|
+
concepts: conceptsText,
|
|
848
|
+
facts: factsText,
|
|
849
|
+
text: textField,
|
|
850
|
+
lesson_learned: obs.lessonLearned || null,
|
|
851
|
+
search_aliases: obs.searchAliases || null,
|
|
852
|
+
});
|
|
826
853
|
db.prepare(`
|
|
827
854
|
UPDATE observations SET type=?, title=?, subtitle=?, narrative=?, concepts=?, facts=?,
|
|
828
855
|
text=?, importance=?, files_read=?, minhash_sig=?, lesson_learned=?, search_aliases=?
|
|
829
856
|
WHERE id = ?
|
|
830
857
|
`).run(
|
|
831
|
-
obs.type,
|
|
832
|
-
|
|
833
|
-
|
|
858
|
+
obs.type, safe.title, safe.subtitle,
|
|
859
|
+
safe.narrative,
|
|
860
|
+
safe.concepts, safe.facts, safe.text,
|
|
834
861
|
obs.importance,
|
|
835
862
|
JSON.stringify(obs.filesRead || []),
|
|
836
863
|
minhashSig,
|
|
837
|
-
|
|
838
|
-
|
|
864
|
+
safe.lesson_learned,
|
|
865
|
+
safe.search_aliases,
|
|
839
866
|
episode.savedId
|
|
840
867
|
);
|
|
841
868
|
savedId = episode.savedId;
|
|
@@ -973,6 +1000,23 @@ ${obsList}`;
|
|
|
973
1000
|
// empty for that field. Without COALESCE, a degraded Haiku pass would erase
|
|
974
1001
|
// the deterministic floor — the exact regression that made 72% of prod
|
|
975
1002
|
// session_summaries ship with empty remaining_items.
|
|
1003
|
+
//
|
|
1004
|
+
// Scrub LLM-output text fields at the UPDATE boundary. lessons /
|
|
1005
|
+
// key_decisions are JSON.stringify(array<string>); we scrub the JSON
|
|
1006
|
+
// string here to match the sibling INSERT path. scrubSecrets uses
|
|
1007
|
+
// opaque placeholders that preserve JSON structure; element-level
|
|
1008
|
+
// pre-scrub remains safer in principle but would diverge from the
|
|
1009
|
+
// merged INSERT contract.
|
|
1010
|
+
const safe = scrubRecord('session_summaries', {
|
|
1011
|
+
request: llmParsed.request || '',
|
|
1012
|
+
investigated: llmParsed.investigated || '',
|
|
1013
|
+
learned: llmParsed.learned || '',
|
|
1014
|
+
completed: llmParsed.completed || '',
|
|
1015
|
+
next_steps: llmParsed.next_steps || '',
|
|
1016
|
+
remaining_items: llmParsed.remaining_items || '',
|
|
1017
|
+
lessons: lessonsJson,
|
|
1018
|
+
key_decisions: decisionsJson,
|
|
1019
|
+
});
|
|
976
1020
|
db.prepare(`
|
|
977
1021
|
UPDATE session_summaries
|
|
978
1022
|
SET request = COALESCE(NULLIF(?, ''), request),
|
|
@@ -988,23 +1032,33 @@ ${obsList}`;
|
|
|
988
1032
|
created_at_epoch = ?
|
|
989
1033
|
WHERE id = ?
|
|
990
1034
|
`).run(
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
1035
|
+
safe.request, safe.investigated, safe.learned,
|
|
1036
|
+
safe.completed, safe.next_steps,
|
|
1037
|
+
safe.remaining_items,
|
|
1038
|
+
safe.lessons, safe.key_decisions,
|
|
995
1039
|
now.toISOString(), now.getTime(),
|
|
996
1040
|
existingFast.id
|
|
997
1041
|
);
|
|
998
1042
|
} else {
|
|
1043
|
+
const safe = scrubRecord('session_summaries', {
|
|
1044
|
+
request: llmParsed.request || '',
|
|
1045
|
+
investigated: llmParsed.investigated || '',
|
|
1046
|
+
learned: llmParsed.learned || '',
|
|
1047
|
+
completed: llmParsed.completed || '',
|
|
1048
|
+
next_steps: llmParsed.next_steps || '',
|
|
1049
|
+
remaining_items: llmParsed.remaining_items || '',
|
|
1050
|
+
lessons: lessonsJson,
|
|
1051
|
+
key_decisions: decisionsJson,
|
|
1052
|
+
});
|
|
999
1053
|
db.prepare(`
|
|
1000
1054
|
INSERT INTO session_summaries (memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, lessons, key_decisions, created_at, created_at_epoch)
|
|
1001
1055
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, '[]', '[]', '', ?, ?, ?, ?)
|
|
1002
1056
|
`).run(
|
|
1003
1057
|
sessionId, project,
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1058
|
+
safe.request, safe.investigated, safe.learned,
|
|
1059
|
+
safe.completed, safe.next_steps,
|
|
1060
|
+
safe.remaining_items,
|
|
1061
|
+
safe.lessons, safe.key_decisions,
|
|
1008
1062
|
now.toISOString(), now.getTime()
|
|
1009
1063
|
);
|
|
1010
1064
|
}
|
|
@@ -1013,3 +1067,25 @@ ${obsList}`;
|
|
|
1013
1067
|
db.close();
|
|
1014
1068
|
}
|
|
1015
1069
|
}
|
|
1070
|
+
|
|
1071
|
+
// Test-only — DO NOT import outside tests/. Underscore prefix is a
|
|
1072
|
+
// convention; the plugin has no `main`/`exports` field so external imports
|
|
1073
|
+
// are blocked at the package level, but a misguided sibling import inside
|
|
1074
|
+
// this repo could drag this into prod by accident. If that ever needs
|
|
1075
|
+
// enforcing, move the helper to a tests/_helpers/ module that takes a
|
|
1076
|
+
// db-insert callback.
|
|
1077
|
+
//
|
|
1078
|
+
// Exercises the same scrubRecord path used by saveObservation without
|
|
1079
|
+
// spinning up the full LLM dispatcher. Lets the e2e leak test verify that
|
|
1080
|
+
// the observations INSERT path scrubs all configured text fields.
|
|
1081
|
+
export const __insertObservationForTest = (db, obs) => {
|
|
1082
|
+
const safe = scrubRecord('observations', obs);
|
|
1083
|
+
db.prepare(`INSERT INTO observations (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, minhash_sig, lesson_learned, search_aliases, branch, created_at, created_at_epoch)
|
|
1084
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(
|
|
1085
|
+
obs.session_id, obs.project, safe.text, 'change',
|
|
1086
|
+
safe.title, safe.subtitle, safe.narrative,
|
|
1087
|
+
safe.concepts, safe.facts, obs.files_read, obs.files_modified,
|
|
1088
|
+
obs.importance, obs.minhash_sig, safe.lesson_learned, safe.search_aliases,
|
|
1089
|
+
obs.branch, new Date().toISOString(), Date.now(),
|
|
1090
|
+
);
|
|
1091
|
+
};
|
package/hook-optimize.mjs
CHANGED
|
@@ -11,6 +11,7 @@ import {
|
|
|
11
11
|
} from './utils.mjs';
|
|
12
12
|
import { callModelJSON } from './haiku-client.mjs';
|
|
13
13
|
import { acquireLLMSlot, releaseLLMSlot } from './hook-semaphore.mjs';
|
|
14
|
+
import { scrubRecord } from './lib/scrub-record.mjs';
|
|
14
15
|
import { getVocabulary, computeVector, cosineSimilarity } from './tfidf.mjs';
|
|
15
16
|
import { DB_DIR } from './schema.mjs';
|
|
16
17
|
|
|
@@ -159,12 +160,22 @@ search_aliases: 2-6 alternative search terms (include CJK if applicable).`;
|
|
|
159
160
|
const textField = [conceptsText, factsText, searchAliases || '', bigramText].filter(Boolean).join(' ');
|
|
160
161
|
const minhashSig = computeMinHash((title || '') + ' ' + (narrative || ''));
|
|
161
162
|
|
|
163
|
+
// Scrub LLM-output text fields at the UPDATE boundary. type is an
|
|
164
|
+
// enum, importance is numeric, minhash_sig is hash bytes.
|
|
165
|
+
const safe = scrubRecord('observations', {
|
|
166
|
+
title, narrative,
|
|
167
|
+
concepts: conceptsText,
|
|
168
|
+
facts: factsText,
|
|
169
|
+
text: textField,
|
|
170
|
+
lesson_learned: lessonLearned,
|
|
171
|
+
search_aliases: searchAliases,
|
|
172
|
+
});
|
|
162
173
|
db.prepare(`
|
|
163
174
|
UPDATE observations SET type=?, title=?, narrative=?, concepts=?, facts=?,
|
|
164
175
|
text=?, importance=?, lesson_learned=?, search_aliases=?, minhash_sig=?, optimized_at=?
|
|
165
176
|
WHERE id = ?
|
|
166
|
-
`).run(type, title, narrative,
|
|
167
|
-
importance,
|
|
177
|
+
`).run(type, safe.title, safe.narrative, safe.concepts, safe.facts, safe.text,
|
|
178
|
+
importance, safe.lesson_learned, safe.search_aliases, minhashSig, Date.now(), cand.id);
|
|
168
179
|
|
|
169
180
|
rebuildVector(db, cand.id, [title, narrative, conceptsText]);
|
|
170
181
|
|
|
@@ -277,7 +288,14 @@ export function applyNormalization(db, groups) {
|
|
|
277
288
|
const existingAliases = row.search_aliases || '';
|
|
278
289
|
const originalTerms = terms.filter(t => aliasMap.has(t.toLowerCase()) && aliasMap.get(t.toLowerCase()) !== t);
|
|
279
290
|
const newAliases = [existingAliases, ...originalTerms].filter(Boolean).join(' ');
|
|
280
|
-
|
|
291
|
+
// Defense-in-depth scrub. Canonical concept names come from LLM output
|
|
292
|
+
// (identifySynonymGroups via Sonnet); existing values are already
|
|
293
|
+
// scrubbed but free LLM tokens can re-introduce secret-shaped strings.
|
|
294
|
+
const safe = scrubRecord('observations', {
|
|
295
|
+
concepts: uniqueConcepts,
|
|
296
|
+
search_aliases: newAliases,
|
|
297
|
+
});
|
|
298
|
+
updateStmt.run(safe.concepts, safe.search_aliases, Date.now(), row.id);
|
|
281
299
|
updated++;
|
|
282
300
|
}
|
|
283
301
|
}
|
|
@@ -397,13 +415,22 @@ Return ONLY valid JSON:
|
|
|
397
415
|
const minhashSig = computeMinHash((title || '') + ' ' + (narrative || ''));
|
|
398
416
|
const importance = clampImportance(parsed.importance || 2);
|
|
399
417
|
|
|
418
|
+
// Scrub LLM-output cluster-merge text fields at the UPDATE boundary.
|
|
419
|
+
// importance is numeric; minhash_sig is hash bytes.
|
|
420
|
+
const safe = scrubRecord('observations', {
|
|
421
|
+
title, narrative,
|
|
422
|
+
concepts: conceptsText,
|
|
423
|
+
facts: factsText,
|
|
424
|
+
text: textField,
|
|
425
|
+
lesson_learned: lessonLearned,
|
|
426
|
+
});
|
|
400
427
|
db.transaction(() => {
|
|
401
428
|
db.prepare(`
|
|
402
429
|
UPDATE observations SET title=?, narrative=?, concepts=?, facts=?, text=?,
|
|
403
430
|
importance=?, lesson_learned=?, minhash_sig=?, optimized_at=?
|
|
404
431
|
WHERE id = ?
|
|
405
|
-
`).run(title, narrative,
|
|
406
|
-
importance,
|
|
432
|
+
`).run(safe.title, safe.narrative, safe.concepts, safe.facts, safe.text,
|
|
433
|
+
importance, safe.lesson_learned, minhashSig, Date.now(), keeper.id);
|
|
407
434
|
|
|
408
435
|
const otherIds = others.map(o => o.id);
|
|
409
436
|
const ph = otherIds.map(() => '?').join(',');
|
|
@@ -573,13 +600,24 @@ JSON: {"title":"descriptive summary ≤120 chars","narrative":"comprehensive sum
|
|
|
573
600
|
VALUES (?,?,?,?,?,'active')`
|
|
574
601
|
).run(sessionId, sessionId, project, now.toISOString(), now.getTime());
|
|
575
602
|
|
|
603
|
+
// Defense-in-depth: title/narrative/etc. are LLM-generated compression
|
|
604
|
+
// output; scrub at the persistence boundary regardless of upstream trust.
|
|
605
|
+
const safe = scrubRecord('observations', {
|
|
606
|
+
text: textField,
|
|
607
|
+
title,
|
|
608
|
+
narrative,
|
|
609
|
+
concepts: conceptsText,
|
|
610
|
+
facts: factsText,
|
|
611
|
+
lesson_learned: lessonLearned,
|
|
612
|
+
search_aliases: searchAliases,
|
|
613
|
+
});
|
|
576
614
|
const result = db.prepare(`INSERT INTO observations
|
|
577
615
|
(memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts,
|
|
578
616
|
files_read, files_modified, importance, lesson_learned, search_aliases, optimized_at,
|
|
579
617
|
created_at, created_at_epoch)
|
|
580
618
|
VALUES (?,?,?,?,?,'',?,?,?,'[]','[]',2,?,?,?,?,?)`
|
|
581
|
-
).run(sessionId, project,
|
|
582
|
-
|
|
619
|
+
).run(sessionId, project, safe.text, 'discovery', safe.title, safe.narrative,
|
|
620
|
+
safe.concepts, safe.facts, safe.lesson_learned, safe.search_aliases, Date.now(),
|
|
583
621
|
new Date(medianEpoch).toISOString(), medianEpoch);
|
|
584
622
|
|
|
585
623
|
const sId = Number(result.lastInsertRowid);
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
// claude-mem-lite: PreCompact hook handler.
|
|
2
|
+
// Fires immediately before Claude Code auto-compaction begins. Emits a
|
|
3
|
+
// fresh <claude-mem-context> block on stdout so the summarizer that
|
|
4
|
+
// produces the compacted context has the most relevant memory in scope.
|
|
5
|
+
// Differs from SessionStart-on-compact (which fires AFTER compaction):
|
|
6
|
+
// PreCompact ensures memory survives the compaction step itself.
|
|
7
|
+
|
|
8
|
+
import { buildSessionContextLines } from './hook-context.mjs';
|
|
9
|
+
import { inferProject, debugCatch, debugLog } from './utils.mjs';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Build + emit the memory context block on stdout. Pure read; no DB writes.
|
|
13
|
+
*
|
|
14
|
+
* @param {object} ctx
|
|
15
|
+
* @param {import('better-sqlite3').Database} ctx.db
|
|
16
|
+
* @param {string} ctx.project
|
|
17
|
+
* @param {string} [ctx.sessionId]
|
|
18
|
+
* @returns {void}
|
|
19
|
+
*/
|
|
20
|
+
export function handlePreCompact({ db, project, sessionId }) {
|
|
21
|
+
try {
|
|
22
|
+
const body = buildSessionContextLines(db, project, new Date(), sessionId || null);
|
|
23
|
+
if (!body || String(body).trim() === '') return;
|
|
24
|
+
process.stdout.write(`<claude-mem-context>\n${body}\n</claude-mem-context>\n`);
|
|
25
|
+
} catch (e) {
|
|
26
|
+
debugCatch(e, 'handlePreCompact');
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Default-export entry for hook.mjs dispatcher. Caller passes an opened DB
|
|
32
|
+
* and the parsed stdin payload — no I/O performed inside this function
|
|
33
|
+
* beyond what handlePreCompact does.
|
|
34
|
+
*
|
|
35
|
+
* @param {import('better-sqlite3').Database} db
|
|
36
|
+
* @param {object} hookData Parsed JSON from hook stdin
|
|
37
|
+
* @returns {Promise<void>}
|
|
38
|
+
*/
|
|
39
|
+
export async function entry(db, hookData) {
|
|
40
|
+
const project = inferProject();
|
|
41
|
+
const sessionId = hookData?.session_id;
|
|
42
|
+
debugLog('DEBUG', 'pre-compact', `project=${project} sessionId=${sessionId || 'none'}`);
|
|
43
|
+
handlePreCompact({ db, project, sessionId });
|
|
44
|
+
}
|
package/hook.mjs
CHANGED
|
@@ -36,6 +36,7 @@ import {
|
|
|
36
36
|
writePendingEntry, mergePendingEntries, episodeHasSignificantContent,
|
|
37
37
|
} from './hook-episode.mjs';
|
|
38
38
|
import { cleanupClaudeMdLegacyBlock, buildSessionContextLines } from './hook-context.mjs';
|
|
39
|
+
import { entry as preCompactEntry } from './hook-precompact.mjs';
|
|
39
40
|
import {
|
|
40
41
|
RUNTIME_DIR, EPISODE_BUFFER_SIZE, EPISODE_TIME_GAP_MS,
|
|
41
42
|
SESSION_EXPIRY_MS, STALE_SESSION_MS, STALE_LOCK_MS,
|
|
@@ -43,6 +44,7 @@ import {
|
|
|
43
44
|
spawnBackground, sweepOrphanEpisodeFiles,
|
|
44
45
|
} from './hook-shared.mjs';
|
|
45
46
|
import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation } from './hook-llm.mjs';
|
|
47
|
+
import { scrubRecord } from './lib/scrub-record.mjs';
|
|
46
48
|
import { extractCitationsFromTranscript, bumpCitationAccess, computeCiteRecall } from './lib/citation-tracker.mjs';
|
|
47
49
|
import { extractTailAssistantText, extractStructuredSummary } from './lib/summary-extractor.mjs';
|
|
48
50
|
import { searchRelevantMemories, formatMemoryLine } from './hook-memory.mjs';
|
|
@@ -445,7 +447,10 @@ async function handleStop() {
|
|
|
445
447
|
WHERE memory_session_id = ? AND COALESCE(compressed_into, 0) = 0
|
|
446
448
|
ORDER BY created_at_epoch DESC LIMIT 5
|
|
447
449
|
`).all(sessionId);
|
|
448
|
-
|
|
450
|
+
// Raw values flow into scrubRecord below; truncation at .run() site
|
|
451
|
+
// so secrets straddling the boundary still match scrubSecrets's
|
|
452
|
+
// length floors.
|
|
453
|
+
const fastRequestRaw = firstPrompt?.prompt_text || '';
|
|
449
454
|
const obsCompleted = recentObs.map(o => o.title).filter(Boolean).join('; ');
|
|
450
455
|
|
|
451
456
|
// Structural extraction from the assistant's tail message.
|
|
@@ -473,17 +478,23 @@ async function handleStop() {
|
|
|
473
478
|
const finalRemaining = structuredNotDone;
|
|
474
479
|
const finalNotes = structuredNotes || 'fast';
|
|
475
480
|
|
|
476
|
-
if (
|
|
481
|
+
if (fastRequestRaw || finalCompleted || finalRemaining) {
|
|
477
482
|
const now = new Date();
|
|
483
|
+
const safe = scrubRecord('session_summaries', {
|
|
484
|
+
request: fastRequestRaw,
|
|
485
|
+
completed: finalCompleted,
|
|
486
|
+
remaining_items: finalRemaining,
|
|
487
|
+
notes: finalNotes,
|
|
488
|
+
});
|
|
478
489
|
db.prepare(`
|
|
479
490
|
INSERT INTO session_summaries
|
|
480
491
|
(memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, created_at, created_at_epoch)
|
|
481
492
|
VALUES (?, ?, ?, '', '', ?, '', ?, '[]', '[]', ?, ?, ?)
|
|
482
493
|
`).run(
|
|
483
|
-
sessionId, project,
|
|
484
|
-
truncate(
|
|
485
|
-
truncate(
|
|
486
|
-
truncate(
|
|
494
|
+
sessionId, project, truncate(safe.request, 200),
|
|
495
|
+
truncate(safe.completed, 600),
|
|
496
|
+
truncate(safe.remaining_items, 600),
|
|
497
|
+
truncate(safe.notes, 400),
|
|
487
498
|
now.toISOString(), now.getTime()
|
|
488
499
|
);
|
|
489
500
|
}
|
|
@@ -947,26 +958,34 @@ async function handleSessionStart() {
|
|
|
947
958
|
ORDER BY created_at_epoch DESC LIMIT 5
|
|
948
959
|
`).all(prevSessionId);
|
|
949
960
|
|
|
950
|
-
|
|
951
|
-
|
|
961
|
+
// Raw values flow into scrubRecord; truncation deferred to .run() so
|
|
962
|
+
// secrets straddling the truncation boundary still match scrubSecrets
|
|
963
|
+
// regex length floors.
|
|
964
|
+
const fastRequestRaw = firstPrompt?.prompt_text || '';
|
|
965
|
+
const fastCompletedRaw = prevObs.map(o => o.title).filter(Boolean).join('; ');
|
|
952
966
|
|
|
953
967
|
// Infer remaining_items from handoff unfinished (already built above at line 476)
|
|
954
|
-
let
|
|
968
|
+
let fastRemainingRaw = '';
|
|
955
969
|
if (prevClearHandoff?.unfinished) {
|
|
956
|
-
|
|
970
|
+
fastRemainingRaw = extractUnfinishedSummary(prevClearHandoff.unfinished, 0);
|
|
957
971
|
}
|
|
958
972
|
// Fallback: episode errors
|
|
959
|
-
if (!
|
|
973
|
+
if (!fastRemainingRaw && episodeSnapshot?.entries) {
|
|
960
974
|
const errors = episodeSnapshot.entries.filter(e => e.isError).map(e => e.desc).filter(Boolean);
|
|
961
|
-
if (errors.length > 0)
|
|
975
|
+
if (errors.length > 0) fastRemainingRaw = errors.join('; ');
|
|
962
976
|
}
|
|
963
977
|
|
|
964
|
-
if (
|
|
978
|
+
if (fastRequestRaw || fastCompletedRaw) {
|
|
979
|
+
const safe = scrubRecord('session_summaries', {
|
|
980
|
+
request: fastRequestRaw,
|
|
981
|
+
completed: fastCompletedRaw,
|
|
982
|
+
remaining_items: fastRemainingRaw,
|
|
983
|
+
});
|
|
965
984
|
db.prepare(`
|
|
966
985
|
INSERT INTO session_summaries
|
|
967
986
|
(memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, created_at, created_at_epoch)
|
|
968
987
|
VALUES (?, ?, ?, '', '', ?, '', ?, '[]', '[]', 'fast', ?, ?)
|
|
969
|
-
`).run(prevSessionId, prevProject || project,
|
|
988
|
+
`).run(prevSessionId, prevProject || project, truncate(safe.request, 200), truncate(safe.completed, 300), truncate(safe.remaining_items, 200), now.toISOString(), now.getTime());
|
|
970
989
|
}
|
|
971
990
|
} catch (e) { debugCatch(e, 'session-start-fast-summary'); }
|
|
972
991
|
}
|
|
@@ -1023,14 +1042,20 @@ async function handleSessionStart() {
|
|
|
1023
1042
|
ORDER BY created_at_epoch DESC LIMIT 5
|
|
1024
1043
|
`).all(recentSession.content_session_id);
|
|
1025
1044
|
|
|
1026
|
-
|
|
1027
|
-
|
|
1028
|
-
|
|
1045
|
+
// Raw values into scrubRecord; truncation at .run() preserves
|
|
1046
|
+
// straddling-secret detection (per privacy review).
|
|
1047
|
+
const frRaw = fp?.prompt_text || '';
|
|
1048
|
+
const fcRaw = po.map(o => o.title).filter(Boolean).join('; ');
|
|
1049
|
+
if (frRaw || fcRaw) {
|
|
1050
|
+
const safe = scrubRecord('session_summaries', {
|
|
1051
|
+
request: frRaw,
|
|
1052
|
+
completed: fcRaw,
|
|
1053
|
+
});
|
|
1029
1054
|
db.prepare(`
|
|
1030
1055
|
INSERT INTO session_summaries
|
|
1031
1056
|
(memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, created_at, created_at_epoch)
|
|
1032
1057
|
VALUES (?, ?, ?, '', '', ?, '', '', '[]', '[]', 'fast', ?, ?)
|
|
1033
|
-
`).run(recentSession.content_session_id, project,
|
|
1058
|
+
`).run(recentSession.content_session_id, project, truncate(safe.request, 200), truncate(safe.completed, 300), now.toISOString(), now.getTime());
|
|
1034
1059
|
}
|
|
1035
1060
|
}
|
|
1036
1061
|
}
|
|
@@ -1103,6 +1128,28 @@ async function handleSessionStart() {
|
|
|
1103
1128
|
}
|
|
1104
1129
|
}
|
|
1105
1130
|
|
|
1131
|
+
// ─── PreCompact Handler ──────────────────────────────────────────────────────
|
|
1132
|
+
// Fires immediately before Claude Code auto-compaction begins. Re-emits the
|
|
1133
|
+
// memory context block on stdout so the summarizer sees it during compaction.
|
|
1134
|
+
// SessionStart's "compact" matcher fires AFTER compaction — by then the
|
|
1135
|
+
// previous-turn injection has already been collapsed. Pure read; no DB writes.
|
|
1136
|
+
|
|
1137
|
+
async function handlePreCompactDispatch() {
|
|
1138
|
+
let hookData = {};
|
|
1139
|
+
try {
|
|
1140
|
+
const raw = await readStdin();
|
|
1141
|
+
hookData = JSON.parse(raw.text);
|
|
1142
|
+
} catch { /* stdin unavailable — emit anyway with whatever we can infer */ }
|
|
1143
|
+
|
|
1144
|
+
const db = openDb();
|
|
1145
|
+
if (!db) return;
|
|
1146
|
+
try {
|
|
1147
|
+
await preCompactEntry(db, hookData);
|
|
1148
|
+
} finally {
|
|
1149
|
+
try { db.close(); } catch {}
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1106
1153
|
// ─── UserPromptSubmit Handler ────────────────────────────────────────────────
|
|
1107
1154
|
|
|
1108
1155
|
async function handleUserPrompt() {
|
|
@@ -1272,11 +1319,15 @@ function handleAutoCompress() {
|
|
|
1272
1319
|
(content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
|
|
1273
1320
|
VALUES (?,?,?,?,?,'active')`
|
|
1274
1321
|
).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
|
|
1322
|
+
// Defense-in-depth: title/narrative are derived from already-stored
|
|
1323
|
+
// obs.title, but those rows pre-date the central scrub policy in some
|
|
1324
|
+
// cases. Re-scrub at the persistence boundary.
|
|
1325
|
+
const safe = scrubRecord('observations', { text: narrative, title, narrative });
|
|
1275
1326
|
const summaryResult = db.prepare(`INSERT INTO observations
|
|
1276
1327
|
(memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts,
|
|
1277
1328
|
files_read, files_modified, importance, created_at, created_at_epoch)
|
|
1278
1329
|
VALUES (?,?,?,?,?,'',?,'','','[]','[]',2,?,?)`
|
|
1279
|
-
).run(sessionId, proj,
|
|
1330
|
+
).run(sessionId, proj, safe.text, dominantType, safe.title, safe.narrative, new Date(medianEpoch).toISOString(), medianEpoch);
|
|
1280
1331
|
const summaryId = Number(summaryResult.lastInsertRowid);
|
|
1281
1332
|
const obsIds = obs.map(o => o.id);
|
|
1282
1333
|
db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${obsIds.map(() => '?').join(',')})`)
|
|
@@ -1366,6 +1417,7 @@ try {
|
|
|
1366
1417
|
switch (event) {
|
|
1367
1418
|
case 'post-tool-use': await handlePostToolUse(); break;
|
|
1368
1419
|
case 'session-start': await handleSessionStart(); break;
|
|
1420
|
+
case 'pre-compact': await handlePreCompactDispatch(); break;
|
|
1369
1421
|
case 'stop': await handleStop(); break;
|
|
1370
1422
|
case 'user-prompt': await handleUserPrompt(); break;
|
|
1371
1423
|
case 'llm-episode': await handleLLMEpisode(); break;
|
package/hooks/hooks.json
CHANGED
|
@@ -18,6 +18,18 @@
|
|
|
18
18
|
]
|
|
19
19
|
}
|
|
20
20
|
],
|
|
21
|
+
"PreCompact": [
|
|
22
|
+
{
|
|
23
|
+
"matcher": "*",
|
|
24
|
+
"hooks": [
|
|
25
|
+
{
|
|
26
|
+
"type": "command",
|
|
27
|
+
"command": "node \"${CLAUDE_PLUGIN_ROOT}/hook.mjs\" pre-compact",
|
|
28
|
+
"timeout": 5
|
|
29
|
+
}
|
|
30
|
+
]
|
|
31
|
+
}
|
|
32
|
+
],
|
|
21
33
|
"PreToolUse": [
|
|
22
34
|
{
|
|
23
35
|
"matcher": "Edit|Write|NotebookEdit|Read",
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
// claude-mem-lite: import a Claude Code JSONL transcript file into the
|
|
2
|
+
// memory DB. One transcript ≈ one Claude Code session; we map:
|
|
3
|
+
// user line -> user_prompts row
|
|
4
|
+
// tool_use+result -> observations row (matched by tool_use_id)
|
|
5
|
+
// anything else -> ignored
|
|
6
|
+
//
|
|
7
|
+
// Idempotent: re-running on the same file does not duplicate. Dedup keys
|
|
8
|
+
// are derived from full SHA-256 of the joined components. \x1f (ASCII unit
|
|
9
|
+
// separator) as join glue so adjacent components can't collide via inputs
|
|
10
|
+
// containing the separator. Truncating prompt_text would collapse rapid
|
|
11
|
+
// same-session "yes / next / 继续" replies into one observation.
|
|
12
|
+
//
|
|
13
|
+
// Orphan tool_use (truncated transcript: tool_use without matching
|
|
14
|
+
// tool_result) gets a fallback observation marked '[tool_use without
|
|
15
|
+
// result — transcript truncated]' so retrieval surfaces the truncation.
|
|
16
|
+
|
|
17
|
+
import { readFileSync, statSync } from 'fs';
|
|
18
|
+
import { createHash } from 'crypto';
|
|
19
|
+
import { scrubSecrets } from '../secret-scrub.mjs';
|
|
20
|
+
import { scrubRecord } from './scrub-record.mjs';
|
|
21
|
+
|
|
22
|
+
const TOOL_TO_TYPE = {
|
|
23
|
+
Edit: 'change', Write: 'change', NotebookEdit: 'change',
|
|
24
|
+
Read: 'discovery', Grep: 'discovery', Glob: 'discovery',
|
|
25
|
+
Bash: 'change', Task: 'discovery', Agent: 'discovery',
|
|
26
|
+
Skill: 'discovery', WebFetch: 'discovery', WebSearch: 'discovery',
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
function dedupKey(parts) {
|
|
30
|
+
return createHash('sha256').update(parts.join('\x1f')).digest('hex');
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function parseLine(line) {
|
|
34
|
+
try { return JSON.parse(line); } catch { return null; }
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function ensureSession(db, sessionId, project, ts) {
|
|
38
|
+
db.prepare(`
|
|
39
|
+
INSERT OR IGNORE INTO sdk_sessions
|
|
40
|
+
(content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
|
|
41
|
+
VALUES (?, ?, ?, ?, ?, 'completed')
|
|
42
|
+
`).run(sessionId, sessionId, project, ts, Date.parse(ts) || Date.now());
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function importPrompt(db, ev, project, seenPrompts) {
|
|
46
|
+
const text = typeof ev?.message?.content === 'string'
|
|
47
|
+
? ev.message.content
|
|
48
|
+
: (Array.isArray(ev?.message?.content)
|
|
49
|
+
? ev.message.content.filter(c => c?.type === 'text').map(c => c.text).join('\n')
|
|
50
|
+
: '');
|
|
51
|
+
if (!text) return false;
|
|
52
|
+
const sessionId = ev.sessionId || 'imported';
|
|
53
|
+
const ts = ev.timestamp || new Date().toISOString();
|
|
54
|
+
const safe = scrubSecrets(text.slice(0, 10000));
|
|
55
|
+
// Dedup key uses the scrubbed text so a re-run computes the same key as the
|
|
56
|
+
// first run (which persisted the scrubbed text). Keying on raw input would
|
|
57
|
+
// make idempotency fragile if the scrub policy changes.
|
|
58
|
+
const key = dedupKey([sessionId, ts, safe]);
|
|
59
|
+
if (seenPrompts.has(key)) return false;
|
|
60
|
+
seenPrompts.add(key);
|
|
61
|
+
|
|
62
|
+
ensureSession(db, sessionId, project, ts);
|
|
63
|
+
const bumped = db.prepare(
|
|
64
|
+
'UPDATE sdk_sessions SET prompt_counter = COALESCE(prompt_counter, 0) + 1 WHERE content_session_id = ? RETURNING prompt_counter'
|
|
65
|
+
).get(sessionId);
|
|
66
|
+
const promptNumber = bumped?.prompt_counter || 1;
|
|
67
|
+
|
|
68
|
+
db.prepare(`
|
|
69
|
+
INSERT OR IGNORE INTO user_prompts
|
|
70
|
+
(content_session_id, prompt_text, prompt_number, created_at, created_at_epoch)
|
|
71
|
+
VALUES (?, ?, ?, ?, ?)
|
|
72
|
+
`).run(sessionId, safe, promptNumber, ts, Date.parse(ts) || Date.now());
|
|
73
|
+
return true;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function importToolPair(db, toolUse, toolResult, project) {
|
|
77
|
+
const sessionId = toolUse.sessionId || 'imported';
|
|
78
|
+
const ts = toolUse.timestamp || new Date().toISOString();
|
|
79
|
+
ensureSession(db, sessionId, project, ts);
|
|
80
|
+
|
|
81
|
+
const toolName = toolUse.name || 'unknown';
|
|
82
|
+
const type = TOOL_TO_TYPE[toolName] || 'change';
|
|
83
|
+
const inputJson = typeof toolUse.input === 'object'
|
|
84
|
+
? JSON.stringify(toolUse.input).slice(0, 4000)
|
|
85
|
+
: String(toolUse.input ?? '').slice(0, 4000);
|
|
86
|
+
const resultText = typeof toolResult?.content === 'string'
|
|
87
|
+
? toolResult.content
|
|
88
|
+
: JSON.stringify(toolResult?.content ?? '').slice(0, 4000);
|
|
89
|
+
|
|
90
|
+
const filesModified = (toolName === 'Edit' || toolName === 'Write' || toolName === 'NotebookEdit')
|
|
91
|
+
&& toolUse.input?.file_path
|
|
92
|
+
? [toolUse.input.file_path] : [];
|
|
93
|
+
const filesRead = toolName === 'Read' && toolUse.input?.file_path
|
|
94
|
+
? [toolUse.input.file_path] : [];
|
|
95
|
+
|
|
96
|
+
const safe = scrubRecord('observations', {
|
|
97
|
+
title: `${toolName}: ${(toolUse.input?.command || toolUse.input?.file_path || '').slice(0, 80)}`,
|
|
98
|
+
subtitle: '',
|
|
99
|
+
text: `${inputJson}\n---\n${resultText}`,
|
|
100
|
+
narrative: '',
|
|
101
|
+
concepts: '',
|
|
102
|
+
facts: '',
|
|
103
|
+
lesson_learned: null,
|
|
104
|
+
search_aliases: null,
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
db.prepare(`
|
|
108
|
+
INSERT INTO observations
|
|
109
|
+
(memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, created_at, created_at_epoch)
|
|
110
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
111
|
+
`).run(
|
|
112
|
+
sessionId, project, safe.text, type, safe.title, safe.subtitle,
|
|
113
|
+
safe.narrative, safe.concepts, safe.facts,
|
|
114
|
+
JSON.stringify(filesRead), JSON.stringify(filesModified),
|
|
115
|
+
1, ts, Date.parse(ts) || Date.now(),
|
|
116
|
+
);
|
|
117
|
+
return true;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Import a single Claude Code JSONL transcript into the DB.
|
|
122
|
+
*
|
|
123
|
+
* @param {import('better-sqlite3').Database} db
|
|
124
|
+
* @param {string} path Absolute path to the .jsonl file
|
|
125
|
+
* @param {{project: string}} opts
|
|
126
|
+
* @returns {Promise<{prompts:number, observations:number, skipped:number, orphans:number}>}
|
|
127
|
+
*/
|
|
128
|
+
export async function importJsonl(db, path, { project }) {
|
|
129
|
+
statSync(path);
|
|
130
|
+
const lines = readFileSync(path, 'utf8').split('\n');
|
|
131
|
+
const seenPrompts = new Set();
|
|
132
|
+
const seenObs = new Set();
|
|
133
|
+
// Pre-seed dedup sets from existing rows so a second run on the same file
|
|
134
|
+
// is a no-op even when the in-memory `seen*` Sets start empty.
|
|
135
|
+
for (const r of db.prepare('SELECT content_session_id, prompt_text, created_at FROM user_prompts').all()) {
|
|
136
|
+
seenPrompts.add(dedupKey([r.content_session_id, r.created_at, r.prompt_text]));
|
|
137
|
+
}
|
|
138
|
+
// Observations carry no tool_use_id column, so the only durable dedup
|
|
139
|
+
// signal we have is the per-process `seenObs` Set inside one importJsonl
|
|
140
|
+
// call. Across calls we rely on the second importToolPair attempting an
|
|
141
|
+
// INSERT that would land — we guard re-runs by also checking for an
|
|
142
|
+
// existing (memory_session_id, created_at, title) match below.
|
|
143
|
+
//
|
|
144
|
+
// Dual-key layering: `seenObs` tracks the `existing:<title>:<ts>` form
|
|
145
|
+
// (cross-call idempotency, seeded from the DB). Per-call dedup uses
|
|
146
|
+
// `seenToolUseIds` keyed on `(sessionId, tool_use_id)` at the gate. The
|
|
147
|
+
// two key shapes never share a value — both checks must run.
|
|
148
|
+
for (const r of db.prepare('SELECT memory_session_id, title, created_at FROM observations').all()) {
|
|
149
|
+
// Use the stored title as a stand-in for tool_use_id when the prior run
|
|
150
|
+
// came from this importer. Title format `${toolName}: ${command|path}` is
|
|
151
|
+
// stable across re-runs of the same fixture.
|
|
152
|
+
seenObs.add(dedupKey([r.memory_session_id, `existing:${r.title}:${r.created_at}`]));
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
const pendingToolUse = new Map();
|
|
156
|
+
let prompts = 0, observations = 0, skipped = 0;
|
|
157
|
+
|
|
158
|
+
// Snapshot importToolPair so we can wrap it with a per-run uniqueness
|
|
159
|
+
// check that hits both in-call and cross-call dedup. (Inline because we
|
|
160
|
+
// only need it in this function.)
|
|
161
|
+
const seenToolUseIds = new Set();
|
|
162
|
+
const tryImportToolPair = (useEv, resultEv) => {
|
|
163
|
+
const sessionId = useEv.sessionId || 'imported';
|
|
164
|
+
const useId = useEv.tool_use_id || useEv.id || '';
|
|
165
|
+
const callKey = dedupKey([sessionId, useId]);
|
|
166
|
+
if (seenToolUseIds.has(callKey)) return false;
|
|
167
|
+
seenToolUseIds.add(callKey);
|
|
168
|
+
|
|
169
|
+
// Cross-call dedup: synthesize the title the previous run would have
|
|
170
|
+
// written and check the seenObs set seeded from the DB.
|
|
171
|
+
const toolName = useEv.name || 'unknown';
|
|
172
|
+
const titlePreview = `${toolName}: ${(useEv.input?.command || useEv.input?.file_path || '').slice(0, 80)}`;
|
|
173
|
+
const ts = useEv.timestamp || new Date().toISOString();
|
|
174
|
+
const crossKey = dedupKey([sessionId, `existing:${titlePreview}:${ts}`]);
|
|
175
|
+
if (seenObs.has(crossKey)) return false;
|
|
176
|
+
|
|
177
|
+
return importToolPair(db, useEv, resultEv, project);
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
const tx = db.transaction(() => {
|
|
181
|
+
for (const line of lines) {
|
|
182
|
+
if (!line.trim()) continue;
|
|
183
|
+
const ev = parseLine(line);
|
|
184
|
+
if (!ev) { skipped++; continue; }
|
|
185
|
+
if (ev.type === 'user') {
|
|
186
|
+
if (importPrompt(db, ev, project, seenPrompts)) prompts++; else skipped++;
|
|
187
|
+
} else if (ev.type === 'assistant' && Array.isArray(ev.message?.content)) {
|
|
188
|
+
for (const part of ev.message.content) {
|
|
189
|
+
if (part.type === 'tool_use') {
|
|
190
|
+
pendingToolUse.set(part.id, { ...ev, ...part });
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
} else if (ev.type === 'tool_result') {
|
|
194
|
+
const useEv = pendingToolUse.get(ev.tool_use_id);
|
|
195
|
+
if (useEv) {
|
|
196
|
+
if (tryImportToolPair(useEv, ev)) observations++;
|
|
197
|
+
pendingToolUse.delete(ev.tool_use_id);
|
|
198
|
+
} else {
|
|
199
|
+
skipped++;
|
|
200
|
+
}
|
|
201
|
+
} else {
|
|
202
|
+
skipped++;
|
|
203
|
+
}
|
|
204
|
+
}
|
|
205
|
+
});
|
|
206
|
+
tx();
|
|
207
|
+
|
|
208
|
+
// Orphan tool_use fallback: persist tool_use events that never paired with
|
|
209
|
+
// a tool_result (truncated transcript / killed Claude Code session).
|
|
210
|
+
let orphans = 0;
|
|
211
|
+
if (pendingToolUse.size > 0) {
|
|
212
|
+
const tx2 = db.transaction(() => {
|
|
213
|
+
for (const [, useEv] of pendingToolUse) {
|
|
214
|
+
const fauxResult = {
|
|
215
|
+
content: '[tool_use without result — transcript truncated]',
|
|
216
|
+
timestamp: useEv.timestamp,
|
|
217
|
+
};
|
|
218
|
+
if (tryImportToolPair(useEv, fauxResult)) orphans++;
|
|
219
|
+
}
|
|
220
|
+
});
|
|
221
|
+
tx2();
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
return { prompts, observations, skipped, orphans };
|
|
225
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
// claude-mem-lite: per-table scrub helper. Applies scrubSecrets to the known
|
|
2
|
+
// text fields of a table row. Numeric / JSON-blob / id fields are passed
|
|
3
|
+
// through untouched.
|
|
4
|
+
//
|
|
5
|
+
// Failsafe policy: when the table is unknown, scrub every string field by
|
|
6
|
+
// default. Newly added tables stay safe even before TEXT_FIELDS_BY_TABLE is
|
|
7
|
+
// updated — over-scrubbing is the safe direction; under-scrubbing leaks.
|
|
8
|
+
//
|
|
9
|
+
// JSON-stringified array fields (e.g. session_handoffs.key_files,
|
|
10
|
+
// session_handoffs.match_keywords-when-array) are NOT listed here — running
|
|
11
|
+
// scrubSecrets over the JSON string can rewrite quoted values and break
|
|
12
|
+
// downstream JSON.parse. Pre-scrub each element upstream of the
|
|
13
|
+
// JSON.stringify call instead.
|
|
14
|
+
|
|
15
|
+
import { scrubSecrets } from '../secret-scrub.mjs';
|
|
16
|
+
|
|
17
|
+
export const TEXT_FIELDS_BY_TABLE = {
|
|
18
|
+
observations: [
|
|
19
|
+
'title', 'subtitle', 'text', 'narrative',
|
|
20
|
+
'concepts', 'facts', 'lesson_learned', 'search_aliases',
|
|
21
|
+
],
|
|
22
|
+
session_summaries: [
|
|
23
|
+
'request', 'investigated', 'learned',
|
|
24
|
+
'completed', 'next_steps', 'remaining_items', 'notes',
|
|
25
|
+
'lessons', 'key_decisions',
|
|
26
|
+
],
|
|
27
|
+
session_handoffs: [
|
|
28
|
+
'working_on', 'completed', 'unfinished',
|
|
29
|
+
// Excluded:
|
|
30
|
+
// key_files — JSON.stringify(array); pre-scrub elements at call site
|
|
31
|
+
// match_keywords — currently a space-joined plain string; keeping it
|
|
32
|
+
// here would scrub safely, but the value is built from
|
|
33
|
+
// tokenizeHandoff() output (alphanumeric tokens only),
|
|
34
|
+
// so secrets cannot survive the upstream tokenizer.
|
|
35
|
+
// Excluded to avoid double-work + future-proof against
|
|
36
|
+
// a refactor that switches to JSON.stringify.
|
|
37
|
+
// key_decisions is kept: call site uses '\n'.join (plain string), and
|
|
38
|
+
// decision titles can carry secrets verbatim (LLM output).
|
|
39
|
+
'key_decisions',
|
|
40
|
+
],
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Scrub the text fields of a record before INSERT.
|
|
45
|
+
* Returns a shallow copy with string text-fields scrubbed; the input object
|
|
46
|
+
* is left untouched. Non-string values (numbers, null, JSON blobs the caller
|
|
47
|
+
* has already stringified) flow through unchanged.
|
|
48
|
+
*/
|
|
49
|
+
export function scrubRecord(table, row) {
|
|
50
|
+
if (!row || typeof row !== 'object') return row;
|
|
51
|
+
const fields = TEXT_FIELDS_BY_TABLE[table];
|
|
52
|
+
const out = { ...row };
|
|
53
|
+
if (fields) {
|
|
54
|
+
for (const f of fields) {
|
|
55
|
+
if (typeof out[f] === 'string') out[f] = scrubSecrets(out[f]);
|
|
56
|
+
}
|
|
57
|
+
} else {
|
|
58
|
+
for (const k of Object.keys(out)) {
|
|
59
|
+
if (typeof out[k] === 'string') out[k] = scrubSecrets(out[k]);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return out;
|
|
63
|
+
}
|
package/mem-cli.mjs
CHANGED
|
@@ -14,6 +14,7 @@ import { autoBoostIfNeeded, reRankWithContext, markSuperseded } from './server-i
|
|
|
14
14
|
import { searchObservationsHybrid, findFtsAnchor } from './search-engine.mjs';
|
|
15
15
|
import { ensureRegistryDb, upsertResource } from './registry.mjs';
|
|
16
16
|
import { searchResources } from './registry-retriever.mjs';
|
|
17
|
+
import { scrubRecord } from './lib/scrub-record.mjs';
|
|
17
18
|
import { optimizePreview, optimizeRun } from './hook-optimize.mjs';
|
|
18
19
|
import { buildSessionContextLines } from './hook-context.mjs';
|
|
19
20
|
import { cmdAdopt, cmdUnadopt } from './adopt-cli.mjs';
|
|
@@ -1765,8 +1766,11 @@ function cmdCompress(db, args) {
|
|
|
1765
1766
|
VALUES (?, ?, ?, ?, ?, 'active')
|
|
1766
1767
|
`).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
|
|
1767
1768
|
|
|
1769
|
+
// Defense-in-depth: source rows already scrubbed at original ingest, but
|
|
1770
|
+
// the new compressed narrative is constructed here and re-persisted.
|
|
1771
|
+
const safe = scrubRecord('observations', { text: narrative, title, narrative });
|
|
1768
1772
|
const summaryResult = insertSummary.run(
|
|
1769
|
-
sessionId, proj,
|
|
1773
|
+
sessionId, proj, safe.text, dominantType, safe.title, safe.narrative,
|
|
1770
1774
|
medianDate.toISOString(), medianEpoch
|
|
1771
1775
|
);
|
|
1772
1776
|
const summaryId = Number(summaryResult.lastInsertRowid);
|
|
@@ -2414,6 +2418,9 @@ Commands:
|
|
|
2414
2418
|
remove Remove resource --name N --resource-type T
|
|
2415
2419
|
reindex Rebuild FTS5 index
|
|
2416
2420
|
|
|
2421
|
+
import-jsonl <file-or-dir> Import Claude Code JSONL transcripts (cold-start backfill)
|
|
2422
|
+
--project P Project name (default: inferred from cwd)
|
|
2423
|
+
|
|
2417
2424
|
activity <action> Non-memdir event log (v2.31) — bugfix/lesson/bug/discovery/etc.
|
|
2418
2425
|
save --type T "<title>" [--body "<text>"] [--files f1,f2] [--file path] [--importance 1-3] [--project P]
|
|
2419
2426
|
search "<query>" Search events [--type T] [--limit N] [--project P]
|
|
@@ -2499,6 +2506,57 @@ async function cmdImport(argv) {
|
|
|
2499
2506
|
}
|
|
2500
2507
|
}
|
|
2501
2508
|
|
|
2509
|
+
// ─── Import (Claude Code JSONL transcript — cold-start backfill) ─────────────
|
|
2510
|
+
|
|
2511
|
+
async function cmdImportJsonl(db, argv) {
|
|
2512
|
+
const { positional, flags } = parseArgs(argv);
|
|
2513
|
+
const target = positional[0];
|
|
2514
|
+
if (!target) {
|
|
2515
|
+
fail('[mem] Usage: claude-mem-lite import-jsonl <file-or-dir> [--project <name>]');
|
|
2516
|
+
return;
|
|
2517
|
+
}
|
|
2518
|
+
|
|
2519
|
+
const project = flags.project || inferProject();
|
|
2520
|
+
const fs = await import('fs');
|
|
2521
|
+
const { join: pjoin, resolve } = await import('path');
|
|
2522
|
+
const abs = resolve(target);
|
|
2523
|
+
|
|
2524
|
+
let files = [];
|
|
2525
|
+
let st;
|
|
2526
|
+
try { st = fs.statSync(abs); }
|
|
2527
|
+
catch (e) { fail(`[mem] Cannot stat ${abs}: ${e.message}`); return; }
|
|
2528
|
+
|
|
2529
|
+
if (st.isDirectory()) {
|
|
2530
|
+
const walk = (dir) => {
|
|
2531
|
+
for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
2532
|
+
const p = pjoin(dir, e.name);
|
|
2533
|
+
if (e.isDirectory()) walk(p);
|
|
2534
|
+
else if (e.isFile() && p.endsWith('.jsonl')) files.push(p);
|
|
2535
|
+
}
|
|
2536
|
+
};
|
|
2537
|
+
walk(abs);
|
|
2538
|
+
} else {
|
|
2539
|
+
files = [abs];
|
|
2540
|
+
}
|
|
2541
|
+
|
|
2542
|
+
if (files.length === 0) { out('[mem] No .jsonl files found.'); return; }
|
|
2543
|
+
|
|
2544
|
+
const { importJsonl } = await import('./lib/import-jsonl.mjs');
|
|
2545
|
+
let totalPrompts = 0, totalObs = 0, totalSkip = 0, totalOrphans = 0;
|
|
2546
|
+
for (const f of files) {
|
|
2547
|
+
const r = await importJsonl(db, f, { project });
|
|
2548
|
+
totalPrompts += r.prompts;
|
|
2549
|
+
totalObs += r.observations;
|
|
2550
|
+
totalSkip += r.skipped;
|
|
2551
|
+
totalOrphans += r.orphans || 0;
|
|
2552
|
+
out(`[mem] ${f}: +${r.prompts} prompts, +${r.observations} observations, ${r.orphans || 0} orphan tool_use, ${r.skipped} skipped`);
|
|
2553
|
+
}
|
|
2554
|
+
out(`[mem] Total: ${totalPrompts} prompts, ${totalObs} observations, ${totalOrphans} orphan tool_use, ${totalSkip} skipped from ${files.length} file(s).`);
|
|
2555
|
+
if (totalPrompts > 0 || totalObs > 0) {
|
|
2556
|
+
out(`[mem] Try: claude-mem-lite recent 5 --project ${project}`);
|
|
2557
|
+
}
|
|
2558
|
+
}
|
|
2559
|
+
|
|
2502
2560
|
// ─── Enrich ─────────────────────────────────────────────────────────────────
|
|
2503
2561
|
|
|
2504
2562
|
async function cmdEnrich(argv) {
|
|
@@ -2682,6 +2740,7 @@ export async function run(argv) {
|
|
|
2682
2740
|
case 'browse': cmdBrowse(db, cmdArgs); break;
|
|
2683
2741
|
case 'registry': cmdRegistry(db, cmdArgs); break;
|
|
2684
2742
|
case 'import': await cmdImport(cmdArgs); break;
|
|
2743
|
+
case 'import-jsonl': await cmdImportJsonl(db, cmdArgs); break;
|
|
2685
2744
|
case 'enrich': await cmdEnrich(cmdArgs); break;
|
|
2686
2745
|
case 'doctor': await cmdDoctor(db, cmdArgs); break;
|
|
2687
2746
|
case 'activity': await cmdActivity(db, cmdArgs); break;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.71.0",
|
|
4
4
|
"description": "Lightweight persistent memory system for Claude Code",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"packageManager": "npm@10.9.2",
|
|
@@ -39,6 +39,7 @@
|
|
|
39
39
|
"hook-handoff.mjs",
|
|
40
40
|
"hook-update.mjs",
|
|
41
41
|
"hook-optimize.mjs",
|
|
42
|
+
"hook-precompact.mjs",
|
|
42
43
|
"plugin-cache-guard.mjs",
|
|
43
44
|
"memdir.mjs",
|
|
44
45
|
"adopt-content.mjs",
|
|
@@ -64,6 +65,8 @@
|
|
|
64
65
|
"lib/save-observation.mjs",
|
|
65
66
|
"lib/deferred-work.mjs",
|
|
66
67
|
"lib/upgrade-banner.mjs",
|
|
68
|
+
"lib/scrub-record.mjs",
|
|
69
|
+
"lib/import-jsonl.mjs",
|
|
67
70
|
"cli/common.mjs",
|
|
68
71
|
"cli/fts-check.mjs",
|
|
69
72
|
"cli/doctor.mjs",
|
package/server.mjs
CHANGED
|
@@ -11,6 +11,7 @@ import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
|
|
|
11
11
|
import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
|
|
12
12
|
import { reRankWithContext, markSuperseded, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
|
|
13
13
|
import { searchObservationsHybrid, findFtsAnchor } from './search-engine.mjs';
|
|
14
|
+
import { scrubRecord } from './lib/scrub-record.mjs';
|
|
14
15
|
import { effectiveQuiet } from './hook-shared.mjs';
|
|
15
16
|
import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
|
|
16
17
|
import { memSearchSchema, memRecentSchema, memTimelineSchema, memGetSchema, memDeleteSchema, memSaveSchema, memStatsSchema, memCompressSchema, memMaintainSchema, memOptimizeSchema, memUpdateSchema, memExportSchema, memRecallSchema, memFtsCheckSchema, memRegistrySchema, memBrowseSchema, memUseSchema, memDeferSchema, memDeferListSchema, memDeferDropSchema, tools as TOOL_DEFS } from './tool-schemas.mjs';
|
|
@@ -1248,8 +1249,11 @@ server.registerTool(
|
|
|
1248
1249
|
VALUES (?, ?, ?, ?, ?, 'active')
|
|
1249
1250
|
`).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
|
|
1250
1251
|
|
|
1252
|
+
// Defense-in-depth: source rows already scrubbed at original ingest,
|
|
1253
|
+
// but the new compressed narrative is constructed here and re-persisted.
|
|
1254
|
+
const safe = scrubRecord('observations', { text: narrative, title, narrative });
|
|
1251
1255
|
const summaryResult = insertSummary.run(
|
|
1252
|
-
sessionId, proj,
|
|
1256
|
+
sessionId, proj, safe.text, dominantType, safe.title, safe.narrative,
|
|
1253
1257
|
medianDate.toISOString(), medianEpoch
|
|
1254
1258
|
);
|
|
1255
1259
|
const summaryId = Number(summaryResult.lastInsertRowid);
|
package/source-files.mjs
CHANGED
|
@@ -9,7 +9,7 @@ export const SOURCE_FILES = [
|
|
|
9
9
|
'cli.mjs', 'server.mjs', 'server-internals.mjs', 'search-engine.mjs', 'tool-schemas.mjs',
|
|
10
10
|
'hook.mjs', 'hook-shared.mjs', 'hook-llm.mjs', 'hook-memory.mjs', 'skip-tools.mjs',
|
|
11
11
|
'hook-semaphore.mjs', 'hook-episode.mjs', 'hook-context.mjs', 'hook-handoff.mjs',
|
|
12
|
-
'hook-update.mjs', 'hook-optimize.mjs',
|
|
12
|
+
'hook-update.mjs', 'hook-optimize.mjs', 'hook-precompact.mjs',
|
|
13
13
|
'plugin-cache-guard.mjs',
|
|
14
14
|
'haiku-client.mjs', 'utils.mjs', 'schema.mjs',
|
|
15
15
|
'package.json', 'package-lock.json', 'skill.md',
|
|
@@ -70,6 +70,15 @@ export const SOURCE_FILES = [
|
|
|
70
70
|
// module-level `process.exit(0)` side effects that abort vitest workers on
|
|
71
71
|
// direct import. Statically imported by hook.mjs SessionStart handler.
|
|
72
72
|
'lib/upgrade-banner.mjs',
|
|
73
|
+
// Per-table scrub helper for defense-in-depth at text-write INSERT paths.
|
|
74
|
+
// Statically imported by hook-llm, hook-handoff, hook-optimize, hook,
|
|
75
|
+
// mem-cli; reached transitively from server.mjs and cli.mjs.
|
|
76
|
+
'lib/scrub-record.mjs',
|
|
77
|
+
// Cold-start backfill: parses ~/.claude/projects/<encoded>/<uuid>.jsonl
|
|
78
|
+
// transcripts into user_prompts + observations. Dynamic-imported by
|
|
79
|
+
// mem-cli.mjs::cmdImportJsonl; listed here so source-files-sync.test.mjs
|
|
80
|
+
// and the npm tarball ship it on every release.
|
|
81
|
+
'lib/import-jsonl.mjs',
|
|
73
82
|
];
|
|
74
83
|
|
|
75
84
|
/**
|