claude-mem-lite 2.70.0 → 2.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "2.70.0",
13
+ "version": "2.71.0",
14
14
  "source": "./",
15
15
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.70.0",
3
+ "version": "2.71.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/hook-handoff.mjs CHANGED
@@ -2,7 +2,8 @@
2
2
  // Extracted for testability — hook.mjs has module-level side effects
3
3
 
4
4
  import { basename } from 'path';
5
- import { truncate, extractMatchKeywords, tokenizeHandoff, isSpecificTerm, LOW_SIGNAL_TITLE, EDIT_TOOLS, isMetaTriggerPrompt, notLowSignalTitleClause } from './utils.mjs';
5
+ import { truncate, extractMatchKeywords, tokenizeHandoff, isSpecificTerm, scrubSecrets, LOW_SIGNAL_TITLE, EDIT_TOOLS, isMetaTriggerPrompt, notLowSignalTitleClause } from './utils.mjs';
6
+ import { scrubRecord } from './lib/scrub-record.mjs';
6
7
  import {
7
8
  HANDOFF_EXPIRY_CLEAR, HANDOFF_EXPIRY_EXIT, HANDOFF_ANCHOR_MAX_AGE,
8
9
  HANDOFF_MATCH_THRESHOLD, CONTINUE_KEYWORDS,
@@ -161,6 +162,23 @@ export function buildAndSaveHandoff(db, sessionId, project, type, episodeSnapsho
161
162
  // `scopeSessionId` (CC UUID) tags the row for parallel scoping; falls back to
162
163
  // the mem-internal `sessionId` when the caller didn't supply one (tests + legacy).
163
164
  const storedSessionId = scopeSessionId || sessionId;
165
+ // Defense-in-depth: aggregates are built from already-stored rows + raw
166
+ // session memory; scrub at the persistence boundary regardless of source.
167
+ // Order matters: scrub raw values BEFORE truncation, so a secret straddling
168
+ // the truncation boundary doesn't fall below scrubSecrets's regex length
169
+ // floors. JSON-stringified fields (key_files) are pre-scrubbed at the
170
+ // element level before stringify — letting scrubSecrets rewrite the JSON
171
+ // string would risk breaking downstream JSON.parse.
172
+ const safe = scrubRecord('session_handoffs', {
173
+ working_on: workingOn,
174
+ completed: completed.map(c => `[${c.type}] ${c.title}`).join('\n'),
175
+ unfinished,
176
+ key_decisions: decisions.map(d => d.title).join('\n'),
177
+ match_keywords: keywords,
178
+ });
179
+ const safeKeyFiles = JSON.stringify(
180
+ [...fileSet].slice(0, 20).map(f => scrubSecrets(String(f)))
181
+ );
164
182
  db.prepare(`
165
183
  INSERT INTO session_handoffs (project, type, session_id, working_on, completed, unfinished, key_files, key_decisions, match_keywords, created_at_epoch, git_sha_at_handoff)
166
184
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
@@ -175,12 +193,12 @@ export function buildAndSaveHandoff(db, sessionId, project, type, episodeSnapsho
175
193
  git_sha_at_handoff = excluded.git_sha_at_handoff
176
194
  `).run(
177
195
  project, type, storedSessionId,
178
- truncate(workingOn, 1000),
179
- completed.map(c => `[${c.type}] ${c.title}`).join('\n'),
180
- unfinished.length > 3000 ? unfinished.slice(0, 2999) + '…' : unfinished,
181
- JSON.stringify([...fileSet].slice(0, 20)),
182
- decisions.map(d => d.title).join('\n'),
183
- keywords,
196
+ truncate(safe.working_on, 1000),
197
+ safe.completed,
198
+ safe.unfinished.length > 3000 ? safe.unfinished.slice(0, 2999) + '…' : safe.unfinished,
199
+ safeKeyFiles,
200
+ safe.key_decisions,
201
+ safe.match_keywords,
184
202
  Date.now(),
185
203
  gitShaAtHandoff,
186
204
  );
package/hook-llm.mjs CHANGED
@@ -10,6 +10,7 @@ import {
10
10
  getCurrentBranch, notLowSignalTitleClause,
11
11
  } from './utils.mjs';
12
12
  import { acquireLLMSlot, releaseLLMSlot } from './hook-semaphore.mjs';
13
+ import { scrubRecord } from './lib/scrub-record.mjs';
13
14
  import { getVocabulary, computeVector } from './tfidf.mjs';
14
15
  import {
15
16
  RUNTIME_DIR, DEDUP_WINDOW_MS, RELATED_OBS_WINDOW_MS,
@@ -194,6 +195,19 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
194
195
 
195
196
  const { conceptsText, factsText, textField } = buildFtsTextField(obs);
196
197
 
198
+ // Defense-in-depth: scrub text fields before INSERT. Source is LLM output
199
+ // (Haiku occasionally regurgitates input verbatim — error logs, hashes).
200
+ const safe = scrubRecord('observations', {
201
+ text: textField,
202
+ title: obs.title || '',
203
+ subtitle: obs.subtitle || '',
204
+ narrative: obs.narrative || '',
205
+ concepts: conceptsText,
206
+ facts: factsText,
207
+ lesson_learned: obs.lessonLearned || null,
208
+ search_aliases: obs.searchAliases || null,
209
+ });
210
+
197
211
  // Atomic: observation INSERT + observation_files + vector in one transaction
198
212
  const savedId = db.transaction(() => {
199
213
  const result = db.prepare(`
@@ -201,16 +215,16 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
201
215
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
202
216
  `).run(
203
217
  sessionId, project,
204
- textField, obs.type, obs.title, obs.subtitle || '',
205
- obs.narrative || '',
206
- conceptsText,
207
- factsText,
218
+ safe.text, obs.type, safe.title, safe.subtitle,
219
+ safe.narrative,
220
+ safe.concepts,
221
+ safe.facts,
208
222
  JSON.stringify(obs.filesRead || []),
209
223
  JSON.stringify(obs.files || []),
210
224
  obs.importance ?? 1,
211
225
  minhashSig,
212
- obs.lessonLearned || null,
213
- obs.searchAliases || null,
226
+ safe.lesson_learned,
227
+ safe.search_aliases,
214
228
  getCurrentBranch(),
215
229
  now.toISOString(), now.getTime()
216
230
  );
@@ -823,19 +837,32 @@ ${actionList}`;
823
837
  // so the enriched FTS text field + minhash + vector are refreshed atomically.
824
838
  const { conceptsText, factsText, textField } = buildFtsTextField(obs);
825
839
  const minhashSig = computeMinHash((obs.title || '') + ' ' + (obs.narrative || ''));
840
+ // Scrub LLM-output text fields at the UPDATE boundary, mirroring the
841
+ // INSERT path. type is an enum, importance is numeric, files_read is a
842
+ // JSON array (already scrubbed upstream), minhash_sig is hash bytes.
843
+ const safe = scrubRecord('observations', {
844
+ title: truncate(obs.title, 120),
845
+ subtitle: obs.subtitle || '',
846
+ narrative: truncate(obs.narrative || '', 500),
847
+ concepts: conceptsText,
848
+ facts: factsText,
849
+ text: textField,
850
+ lesson_learned: obs.lessonLearned || null,
851
+ search_aliases: obs.searchAliases || null,
852
+ });
826
853
  db.prepare(`
827
854
  UPDATE observations SET type=?, title=?, subtitle=?, narrative=?, concepts=?, facts=?,
828
855
  text=?, importance=?, files_read=?, minhash_sig=?, lesson_learned=?, search_aliases=?
829
856
  WHERE id = ?
830
857
  `).run(
831
- obs.type, truncate(obs.title, 120), obs.subtitle || '',
832
- truncate(obs.narrative || '', 500),
833
- conceptsText, factsText, textField,
858
+ obs.type, safe.title, safe.subtitle,
859
+ safe.narrative,
860
+ safe.concepts, safe.facts, safe.text,
834
861
  obs.importance,
835
862
  JSON.stringify(obs.filesRead || []),
836
863
  minhashSig,
837
- obs.lessonLearned || null,
838
- obs.searchAliases || null,
864
+ safe.lesson_learned,
865
+ safe.search_aliases,
839
866
  episode.savedId
840
867
  );
841
868
  savedId = episode.savedId;
@@ -973,6 +1000,23 @@ ${obsList}`;
973
1000
  // empty for that field. Without COALESCE, a degraded Haiku pass would erase
974
1001
  // the deterministic floor — the exact regression that made 72% of prod
975
1002
  // session_summaries ship with empty remaining_items.
1003
+ //
1004
+ // Scrub LLM-output text fields at the UPDATE boundary. lessons /
1005
+ // key_decisions are JSON.stringify(array<string>); we scrub the JSON
1006
+ // string here to match the sibling INSERT path. scrubSecrets uses
1007
+ // opaque placeholders that preserve JSON structure; element-level
1008
+ // pre-scrub remains safer in principle but would diverge from the
1009
+ // merged INSERT contract.
1010
+ const safe = scrubRecord('session_summaries', {
1011
+ request: llmParsed.request || '',
1012
+ investigated: llmParsed.investigated || '',
1013
+ learned: llmParsed.learned || '',
1014
+ completed: llmParsed.completed || '',
1015
+ next_steps: llmParsed.next_steps || '',
1016
+ remaining_items: llmParsed.remaining_items || '',
1017
+ lessons: lessonsJson,
1018
+ key_decisions: decisionsJson,
1019
+ });
976
1020
  db.prepare(`
977
1021
  UPDATE session_summaries
978
1022
  SET request = COALESCE(NULLIF(?, ''), request),
@@ -988,23 +1032,33 @@ ${obsList}`;
988
1032
  created_at_epoch = ?
989
1033
  WHERE id = ?
990
1034
  `).run(
991
- llmParsed.request || '', llmParsed.investigated || '', llmParsed.learned || '',
992
- llmParsed.completed || '', llmParsed.next_steps || '',
993
- llmParsed.remaining_items || '',
994
- lessonsJson, decisionsJson,
1035
+ safe.request, safe.investigated, safe.learned,
1036
+ safe.completed, safe.next_steps,
1037
+ safe.remaining_items,
1038
+ safe.lessons, safe.key_decisions,
995
1039
  now.toISOString(), now.getTime(),
996
1040
  existingFast.id
997
1041
  );
998
1042
  } else {
1043
+ const safe = scrubRecord('session_summaries', {
1044
+ request: llmParsed.request || '',
1045
+ investigated: llmParsed.investigated || '',
1046
+ learned: llmParsed.learned || '',
1047
+ completed: llmParsed.completed || '',
1048
+ next_steps: llmParsed.next_steps || '',
1049
+ remaining_items: llmParsed.remaining_items || '',
1050
+ lessons: lessonsJson,
1051
+ key_decisions: decisionsJson,
1052
+ });
999
1053
  db.prepare(`
1000
1054
  INSERT INTO session_summaries (memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, lessons, key_decisions, created_at, created_at_epoch)
1001
1055
  VALUES (?, ?, ?, ?, ?, ?, ?, ?, '[]', '[]', '', ?, ?, ?, ?)
1002
1056
  `).run(
1003
1057
  sessionId, project,
1004
- llmParsed.request || '', llmParsed.investigated || '', llmParsed.learned || '',
1005
- llmParsed.completed || '', llmParsed.next_steps || '',
1006
- llmParsed.remaining_items || '',
1007
- lessonsJson, decisionsJson,
1058
+ safe.request, safe.investigated, safe.learned,
1059
+ safe.completed, safe.next_steps,
1060
+ safe.remaining_items,
1061
+ safe.lessons, safe.key_decisions,
1008
1062
  now.toISOString(), now.getTime()
1009
1063
  );
1010
1064
  }
@@ -1013,3 +1067,25 @@ ${obsList}`;
1013
1067
  db.close();
1014
1068
  }
1015
1069
  }
1070
+
1071
+ // Test-only — DO NOT import outside tests/. Underscore prefix is a
1072
+ // convention; the plugin has no `main`/`exports` field so external imports
1073
+ // are blocked at the package level, but a misguided sibling import inside
1074
+ // this repo could drag this into prod by accident. If that ever needs
1075
+ // enforcing, move the helper to a tests/_helpers/ module that takes a
1076
+ // db-insert callback.
1077
+ //
1078
+ // Exercises the same scrubRecord path used by saveObservation without
1079
+ // spinning up the full LLM dispatcher. Lets the e2e leak test verify that
1080
+ // the observations INSERT path scrubs all configured text fields.
1081
+ export const __insertObservationForTest = (db, obs) => {
1082
+ const safe = scrubRecord('observations', obs);
1083
+ db.prepare(`INSERT INTO observations (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, minhash_sig, lesson_learned, search_aliases, branch, created_at, created_at_epoch)
1084
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`).run(
1085
+ obs.session_id, obs.project, safe.text, 'change',
1086
+ safe.title, safe.subtitle, safe.narrative,
1087
+ safe.concepts, safe.facts, obs.files_read, obs.files_modified,
1088
+ obs.importance, obs.minhash_sig, safe.lesson_learned, safe.search_aliases,
1089
+ obs.branch, new Date().toISOString(), Date.now(),
1090
+ );
1091
+ };
package/hook-optimize.mjs CHANGED
@@ -11,6 +11,7 @@ import {
11
11
  } from './utils.mjs';
12
12
  import { callModelJSON } from './haiku-client.mjs';
13
13
  import { acquireLLMSlot, releaseLLMSlot } from './hook-semaphore.mjs';
14
+ import { scrubRecord } from './lib/scrub-record.mjs';
14
15
  import { getVocabulary, computeVector, cosineSimilarity } from './tfidf.mjs';
15
16
  import { DB_DIR } from './schema.mjs';
16
17
 
@@ -159,12 +160,22 @@ search_aliases: 2-6 alternative search terms (include CJK if applicable).`;
159
160
  const textField = [conceptsText, factsText, searchAliases || '', bigramText].filter(Boolean).join(' ');
160
161
  const minhashSig = computeMinHash((title || '') + ' ' + (narrative || ''));
161
162
 
163
+ // Scrub LLM-output text fields at the UPDATE boundary. type is an
164
+ // enum, importance is numeric, minhash_sig is hash bytes.
165
+ const safe = scrubRecord('observations', {
166
+ title, narrative,
167
+ concepts: conceptsText,
168
+ facts: factsText,
169
+ text: textField,
170
+ lesson_learned: lessonLearned,
171
+ search_aliases: searchAliases,
172
+ });
162
173
  db.prepare(`
163
174
  UPDATE observations SET type=?, title=?, narrative=?, concepts=?, facts=?,
164
175
  text=?, importance=?, lesson_learned=?, search_aliases=?, minhash_sig=?, optimized_at=?
165
176
  WHERE id = ?
166
- `).run(type, title, narrative, conceptsText, factsText, textField,
167
- importance, lessonLearned, searchAliases, minhashSig, Date.now(), cand.id);
177
+ `).run(type, safe.title, safe.narrative, safe.concepts, safe.facts, safe.text,
178
+ importance, safe.lesson_learned, safe.search_aliases, minhashSig, Date.now(), cand.id);
168
179
 
169
180
  rebuildVector(db, cand.id, [title, narrative, conceptsText]);
170
181
 
@@ -277,7 +288,14 @@ export function applyNormalization(db, groups) {
277
288
  const existingAliases = row.search_aliases || '';
278
289
  const originalTerms = terms.filter(t => aliasMap.has(t.toLowerCase()) && aliasMap.get(t.toLowerCase()) !== t);
279
290
  const newAliases = [existingAliases, ...originalTerms].filter(Boolean).join(' ');
280
- updateStmt.run(uniqueConcepts, newAliases, Date.now(), row.id);
291
+ // Defense-in-depth scrub. Canonical concept names come from LLM output
292
+ // (identifySynonymGroups via Sonnet); existing values are already
293
+ // scrubbed but free LLM tokens can re-introduce secret-shaped strings.
294
+ const safe = scrubRecord('observations', {
295
+ concepts: uniqueConcepts,
296
+ search_aliases: newAliases,
297
+ });
298
+ updateStmt.run(safe.concepts, safe.search_aliases, Date.now(), row.id);
281
299
  updated++;
282
300
  }
283
301
  }
@@ -397,13 +415,22 @@ Return ONLY valid JSON:
397
415
  const minhashSig = computeMinHash((title || '') + ' ' + (narrative || ''));
398
416
  const importance = clampImportance(parsed.importance || 2);
399
417
 
418
+ // Scrub LLM-output cluster-merge text fields at the UPDATE boundary.
419
+ // importance is numeric; minhash_sig is hash bytes.
420
+ const safe = scrubRecord('observations', {
421
+ title, narrative,
422
+ concepts: conceptsText,
423
+ facts: factsText,
424
+ text: textField,
425
+ lesson_learned: lessonLearned,
426
+ });
400
427
  db.transaction(() => {
401
428
  db.prepare(`
402
429
  UPDATE observations SET title=?, narrative=?, concepts=?, facts=?, text=?,
403
430
  importance=?, lesson_learned=?, minhash_sig=?, optimized_at=?
404
431
  WHERE id = ?
405
- `).run(title, narrative, conceptsText, factsText, textField,
406
- importance, lessonLearned, minhashSig, Date.now(), keeper.id);
432
+ `).run(safe.title, safe.narrative, safe.concepts, safe.facts, safe.text,
433
+ importance, safe.lesson_learned, minhashSig, Date.now(), keeper.id);
407
434
 
408
435
  const otherIds = others.map(o => o.id);
409
436
  const ph = otherIds.map(() => '?').join(',');
@@ -573,13 +600,24 @@ JSON: {"title":"descriptive summary ≤120 chars","narrative":"comprehensive sum
573
600
  VALUES (?,?,?,?,?,'active')`
574
601
  ).run(sessionId, sessionId, project, now.toISOString(), now.getTime());
575
602
 
603
+ // Defense-in-depth: title/narrative/etc. are LLM-generated compression
604
+ // output; scrub at the persistence boundary regardless of upstream trust.
605
+ const safe = scrubRecord('observations', {
606
+ text: textField,
607
+ title,
608
+ narrative,
609
+ concepts: conceptsText,
610
+ facts: factsText,
611
+ lesson_learned: lessonLearned,
612
+ search_aliases: searchAliases,
613
+ });
576
614
  const result = db.prepare(`INSERT INTO observations
577
615
  (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts,
578
616
  files_read, files_modified, importance, lesson_learned, search_aliases, optimized_at,
579
617
  created_at, created_at_epoch)
580
618
  VALUES (?,?,?,?,?,'',?,?,?,'[]','[]',2,?,?,?,?,?)`
581
- ).run(sessionId, project, textField, 'discovery', title, narrative,
582
- conceptsText, factsText, lessonLearned, searchAliases, Date.now(),
619
+ ).run(sessionId, project, safe.text, 'discovery', safe.title, safe.narrative,
620
+ safe.concepts, safe.facts, safe.lesson_learned, safe.search_aliases, Date.now(),
583
621
  new Date(medianEpoch).toISOString(), medianEpoch);
584
622
 
585
623
  const sId = Number(result.lastInsertRowid);
@@ -0,0 +1,44 @@
1
+ // claude-mem-lite: PreCompact hook handler.
2
+ // Fires immediately before Claude Code auto-compaction begins. Emits a
3
+ // fresh <claude-mem-context> block on stdout so the summarizer that
4
+ // produces the compacted context has the most relevant memory in scope.
5
+ // Differs from SessionStart-on-compact (which fires AFTER compaction):
6
+ // PreCompact ensures memory survives the compaction step itself.
7
+
8
+ import { buildSessionContextLines } from './hook-context.mjs';
9
+ import { inferProject, debugCatch, debugLog } from './utils.mjs';
10
+
11
+ /**
12
+ * Build + emit the memory context block on stdout. Pure read; no DB writes.
13
+ *
14
+ * @param {object} ctx
15
+ * @param {import('better-sqlite3').Database} ctx.db
16
+ * @param {string} ctx.project
17
+ * @param {string} [ctx.sessionId]
18
+ * @returns {void}
19
+ */
20
+ export function handlePreCompact({ db, project, sessionId }) {
21
+ try {
22
+ const body = buildSessionContextLines(db, project, new Date(), sessionId || null);
23
+ if (!body || String(body).trim() === '') return;
24
+ process.stdout.write(`<claude-mem-context>\n${body}\n</claude-mem-context>\n`);
25
+ } catch (e) {
26
+ debugCatch(e, 'handlePreCompact');
27
+ }
28
+ }
29
+
30
+ /**
31
+ * Default-export entry for hook.mjs dispatcher. Caller passes an opened DB
32
+ * and the parsed stdin payload — no I/O performed inside this function
33
+ * beyond what handlePreCompact does.
34
+ *
35
+ * @param {import('better-sqlite3').Database} db
36
+ * @param {object} hookData Parsed JSON from hook stdin
37
+ * @returns {Promise<void>}
38
+ */
39
+ export async function entry(db, hookData) {
40
+ const project = inferProject();
41
+ const sessionId = hookData?.session_id;
42
+ debugLog('DEBUG', 'pre-compact', `project=${project} sessionId=${sessionId || 'none'}`);
43
+ handlePreCompact({ db, project, sessionId });
44
+ }
package/hook.mjs CHANGED
@@ -36,6 +36,7 @@ import {
36
36
  writePendingEntry, mergePendingEntries, episodeHasSignificantContent,
37
37
  } from './hook-episode.mjs';
38
38
  import { cleanupClaudeMdLegacyBlock, buildSessionContextLines } from './hook-context.mjs';
39
+ import { entry as preCompactEntry } from './hook-precompact.mjs';
39
40
  import {
40
41
  RUNTIME_DIR, EPISODE_BUFFER_SIZE, EPISODE_TIME_GAP_MS,
41
42
  SESSION_EXPIRY_MS, STALE_SESSION_MS, STALE_LOCK_MS,
@@ -43,6 +44,7 @@ import {
43
44
  spawnBackground, sweepOrphanEpisodeFiles,
44
45
  } from './hook-shared.mjs';
45
46
  import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation } from './hook-llm.mjs';
47
+ import { scrubRecord } from './lib/scrub-record.mjs';
46
48
  import { extractCitationsFromTranscript, bumpCitationAccess, computeCiteRecall } from './lib/citation-tracker.mjs';
47
49
  import { extractTailAssistantText, extractStructuredSummary } from './lib/summary-extractor.mjs';
48
50
  import { searchRelevantMemories, formatMemoryLine } from './hook-memory.mjs';
@@ -445,7 +447,10 @@ async function handleStop() {
445
447
  WHERE memory_session_id = ? AND COALESCE(compressed_into, 0) = 0
446
448
  ORDER BY created_at_epoch DESC LIMIT 5
447
449
  `).all(sessionId);
448
- const fastRequest = truncate(firstPrompt?.prompt_text || '', 200);
450
+ // Raw values flow into scrubRecord below; truncation at .run() site
451
+ // so secrets straddling the boundary still match scrubSecrets's
452
+ // length floors.
453
+ const fastRequestRaw = firstPrompt?.prompt_text || '';
449
454
  const obsCompleted = recentObs.map(o => o.title).filter(Boolean).join('; ');
450
455
 
451
456
  // Structural extraction from the assistant's tail message.
@@ -473,17 +478,23 @@ async function handleStop() {
473
478
  const finalRemaining = structuredNotDone;
474
479
  const finalNotes = structuredNotes || 'fast';
475
480
 
476
- if (fastRequest || finalCompleted || finalRemaining) {
481
+ if (fastRequestRaw || finalCompleted || finalRemaining) {
477
482
  const now = new Date();
483
+ const safe = scrubRecord('session_summaries', {
484
+ request: fastRequestRaw,
485
+ completed: finalCompleted,
486
+ remaining_items: finalRemaining,
487
+ notes: finalNotes,
488
+ });
478
489
  db.prepare(`
479
490
  INSERT INTO session_summaries
480
491
  (memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, created_at, created_at_epoch)
481
492
  VALUES (?, ?, ?, '', '', ?, '', ?, '[]', '[]', ?, ?, ?)
482
493
  `).run(
483
- sessionId, project, fastRequest,
484
- truncate(finalCompleted, 600),
485
- truncate(finalRemaining, 600),
486
- truncate(finalNotes, 400),
494
+ sessionId, project, truncate(safe.request, 200),
495
+ truncate(safe.completed, 600),
496
+ truncate(safe.remaining_items, 600),
497
+ truncate(safe.notes, 400),
487
498
  now.toISOString(), now.getTime()
488
499
  );
489
500
  }
@@ -947,26 +958,34 @@ async function handleSessionStart() {
947
958
  ORDER BY created_at_epoch DESC LIMIT 5
948
959
  `).all(prevSessionId);
949
960
 
950
- const fastRequest = truncate(firstPrompt?.prompt_text || '', 200);
951
- const fastCompleted = prevObs.map(o => o.title).filter(Boolean).join('; ');
961
+ // Raw values flow into scrubRecord; truncation deferred to .run() so
962
+ // secrets straddling the truncation boundary still match scrubSecrets
963
+ // regex length floors.
964
+ const fastRequestRaw = firstPrompt?.prompt_text || '';
965
+ const fastCompletedRaw = prevObs.map(o => o.title).filter(Boolean).join('; ');
952
966
 
953
967
  // Infer remaining_items from handoff unfinished (already built above at line 476)
954
- let fastRemaining = '';
968
+ let fastRemainingRaw = '';
955
969
  if (prevClearHandoff?.unfinished) {
956
- fastRemaining = truncate(extractUnfinishedSummary(prevClearHandoff.unfinished, 0), 200);
970
+ fastRemainingRaw = extractUnfinishedSummary(prevClearHandoff.unfinished, 0);
957
971
  }
958
972
  // Fallback: episode errors
959
- if (!fastRemaining && episodeSnapshot?.entries) {
973
+ if (!fastRemainingRaw && episodeSnapshot?.entries) {
960
974
  const errors = episodeSnapshot.entries.filter(e => e.isError).map(e => e.desc).filter(Boolean);
961
- if (errors.length > 0) fastRemaining = truncate(errors.join('; '), 200);
975
+ if (errors.length > 0) fastRemainingRaw = errors.join('; ');
962
976
  }
963
977
 
964
- if (fastRequest || fastCompleted) {
978
+ if (fastRequestRaw || fastCompletedRaw) {
979
+ const safe = scrubRecord('session_summaries', {
980
+ request: fastRequestRaw,
981
+ completed: fastCompletedRaw,
982
+ remaining_items: fastRemainingRaw,
983
+ });
965
984
  db.prepare(`
966
985
  INSERT INTO session_summaries
967
986
  (memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, created_at, created_at_epoch)
968
987
  VALUES (?, ?, ?, '', '', ?, '', ?, '[]', '[]', 'fast', ?, ?)
969
- `).run(prevSessionId, prevProject || project, fastRequest, truncate(fastCompleted, 300), fastRemaining, now.toISOString(), now.getTime());
988
+ `).run(prevSessionId, prevProject || project, truncate(safe.request, 200), truncate(safe.completed, 300), truncate(safe.remaining_items, 200), now.toISOString(), now.getTime());
970
989
  }
971
990
  } catch (e) { debugCatch(e, 'session-start-fast-summary'); }
972
991
  }
@@ -1023,14 +1042,20 @@ async function handleSessionStart() {
1023
1042
  ORDER BY created_at_epoch DESC LIMIT 5
1024
1043
  `).all(recentSession.content_session_id);
1025
1044
 
1026
- const fr = truncate(fp?.prompt_text || '', 200);
1027
- const fc = po.map(o => o.title).filter(Boolean).join('; ');
1028
- if (fr || fc) {
1045
+ // Raw values into scrubRecord; truncation at .run() preserves
1046
+ // straddling-secret detection (per privacy review).
1047
+ const frRaw = fp?.prompt_text || '';
1048
+ const fcRaw = po.map(o => o.title).filter(Boolean).join('; ');
1049
+ if (frRaw || fcRaw) {
1050
+ const safe = scrubRecord('session_summaries', {
1051
+ request: frRaw,
1052
+ completed: fcRaw,
1053
+ });
1029
1054
  db.prepare(`
1030
1055
  INSERT INTO session_summaries
1031
1056
  (memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, created_at, created_at_epoch)
1032
1057
  VALUES (?, ?, ?, '', '', ?, '', '', '[]', '[]', 'fast', ?, ?)
1033
- `).run(recentSession.content_session_id, project, fr, truncate(fc, 300), now.toISOString(), now.getTime());
1058
+ `).run(recentSession.content_session_id, project, truncate(safe.request, 200), truncate(safe.completed, 300), now.toISOString(), now.getTime());
1034
1059
  }
1035
1060
  }
1036
1061
  }
@@ -1103,6 +1128,28 @@ async function handleSessionStart() {
1103
1128
  }
1104
1129
  }
1105
1130
 
1131
+ // ─── PreCompact Handler ──────────────────────────────────────────────────────
1132
+ // Fires immediately before Claude Code auto-compaction begins. Re-emits the
1133
+ // memory context block on stdout so the summarizer sees it during compaction.
1134
+ // SessionStart's "compact" matcher fires AFTER compaction — by then the
1135
+ // previous-turn injection has already been collapsed. Pure read; no DB writes.
1136
+
1137
+ async function handlePreCompactDispatch() {
1138
+ let hookData = {};
1139
+ try {
1140
+ const raw = await readStdin();
1141
+ hookData = JSON.parse(raw.text);
1142
+ } catch { /* stdin unavailable — emit anyway with whatever we can infer */ }
1143
+
1144
+ const db = openDb();
1145
+ if (!db) return;
1146
+ try {
1147
+ await preCompactEntry(db, hookData);
1148
+ } finally {
1149
+ try { db.close(); } catch {}
1150
+ }
1151
+ }
1152
+
1106
1153
  // ─── UserPromptSubmit Handler ────────────────────────────────────────────────
1107
1154
 
1108
1155
  async function handleUserPrompt() {
@@ -1272,11 +1319,15 @@ function handleAutoCompress() {
1272
1319
  (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
1273
1320
  VALUES (?,?,?,?,?,'active')`
1274
1321
  ).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
1322
+ // Defense-in-depth: title/narrative are derived from already-stored
1323
+ // obs.title, but those rows pre-date the central scrub policy in some
1324
+ // cases. Re-scrub at the persistence boundary.
1325
+ const safe = scrubRecord('observations', { text: narrative, title, narrative });
1275
1326
  const summaryResult = db.prepare(`INSERT INTO observations
1276
1327
  (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts,
1277
1328
  files_read, files_modified, importance, created_at, created_at_epoch)
1278
1329
  VALUES (?,?,?,?,?,'',?,'','','[]','[]',2,?,?)`
1279
- ).run(sessionId, proj, narrative, dominantType, title, narrative, new Date(medianEpoch).toISOString(), medianEpoch);
1330
+ ).run(sessionId, proj, safe.text, dominantType, safe.title, safe.narrative, new Date(medianEpoch).toISOString(), medianEpoch);
1280
1331
  const summaryId = Number(summaryResult.lastInsertRowid);
1281
1332
  const obsIds = obs.map(o => o.id);
1282
1333
  db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${obsIds.map(() => '?').join(',')})`)
@@ -1366,6 +1417,7 @@ try {
1366
1417
  switch (event) {
1367
1418
  case 'post-tool-use': await handlePostToolUse(); break;
1368
1419
  case 'session-start': await handleSessionStart(); break;
1420
+ case 'pre-compact': await handlePreCompactDispatch(); break;
1369
1421
  case 'stop': await handleStop(); break;
1370
1422
  case 'user-prompt': await handleUserPrompt(); break;
1371
1423
  case 'llm-episode': await handleLLMEpisode(); break;
package/hooks/hooks.json CHANGED
@@ -18,6 +18,18 @@
18
18
  ]
19
19
  }
20
20
  ],
21
+ "PreCompact": [
22
+ {
23
+ "matcher": "*",
24
+ "hooks": [
25
+ {
26
+ "type": "command",
27
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/hook.mjs\" pre-compact",
28
+ "timeout": 5
29
+ }
30
+ ]
31
+ }
32
+ ],
21
33
  "PreToolUse": [
22
34
  {
23
35
  "matcher": "Edit|Write|NotebookEdit|Read",
@@ -0,0 +1,225 @@
1
+ // claude-mem-lite: import a Claude Code JSONL transcript file into the
2
+ // memory DB. One transcript ≈ one Claude Code session; we map:
3
+ // user line -> user_prompts row
4
+ // tool_use+result -> observations row (matched by tool_use_id)
5
+ // anything else -> ignored
6
+ //
7
+ // Idempotent: re-running on the same file does not duplicate. Dedup keys
8
+ // are derived from full SHA-256 of the joined components. \x1f (ASCII unit
9
+ // separator) as join glue so adjacent components can't collide via inputs
10
+ // containing the separator. Truncating prompt_text would collapse rapid
11
+ // same-session "yes / next / 继续" replies into one observation.
12
+ //
13
+ // Orphan tool_use (truncated transcript: tool_use without matching
14
+ // tool_result) gets a fallback observation marked '[tool_use without
15
+ // result — transcript truncated]' so retrieval surfaces the truncation.
16
+
17
+ import { readFileSync, statSync } from 'fs';
18
+ import { createHash } from 'crypto';
19
+ import { scrubSecrets } from '../secret-scrub.mjs';
20
+ import { scrubRecord } from './scrub-record.mjs';
21
+
22
+ const TOOL_TO_TYPE = {
23
+ Edit: 'change', Write: 'change', NotebookEdit: 'change',
24
+ Read: 'discovery', Grep: 'discovery', Glob: 'discovery',
25
+ Bash: 'change', Task: 'discovery', Agent: 'discovery',
26
+ Skill: 'discovery', WebFetch: 'discovery', WebSearch: 'discovery',
27
+ };
28
+
29
+ function dedupKey(parts) {
30
+ return createHash('sha256').update(parts.join('\x1f')).digest('hex');
31
+ }
32
+
33
+ function parseLine(line) {
34
+ try { return JSON.parse(line); } catch { return null; }
35
+ }
36
+
37
+ function ensureSession(db, sessionId, project, ts) {
38
+ db.prepare(`
39
+ INSERT OR IGNORE INTO sdk_sessions
40
+ (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
41
+ VALUES (?, ?, ?, ?, ?, 'completed')
42
+ `).run(sessionId, sessionId, project, ts, Date.parse(ts) || Date.now());
43
+ }
44
+
45
+ function importPrompt(db, ev, project, seenPrompts) {
46
+ const text = typeof ev?.message?.content === 'string'
47
+ ? ev.message.content
48
+ : (Array.isArray(ev?.message?.content)
49
+ ? ev.message.content.filter(c => c?.type === 'text').map(c => c.text).join('\n')
50
+ : '');
51
+ if (!text) return false;
52
+ const sessionId = ev.sessionId || 'imported';
53
+ const ts = ev.timestamp || new Date().toISOString();
54
+ const safe = scrubSecrets(text.slice(0, 10000));
55
+ // Dedup key uses the scrubbed text so a re-run computes the same key as the
56
+ // first run (which persisted the scrubbed text). Keying on raw input would
57
+ // make idempotency fragile if the scrub policy changes.
58
+ const key = dedupKey([sessionId, ts, safe]);
59
+ if (seenPrompts.has(key)) return false;
60
+ seenPrompts.add(key);
61
+
62
+ ensureSession(db, sessionId, project, ts);
63
+ const bumped = db.prepare(
64
+ 'UPDATE sdk_sessions SET prompt_counter = COALESCE(prompt_counter, 0) + 1 WHERE content_session_id = ? RETURNING prompt_counter'
65
+ ).get(sessionId);
66
+ const promptNumber = bumped?.prompt_counter || 1;
67
+
68
+ db.prepare(`
69
+ INSERT OR IGNORE INTO user_prompts
70
+ (content_session_id, prompt_text, prompt_number, created_at, created_at_epoch)
71
+ VALUES (?, ?, ?, ?, ?)
72
+ `).run(sessionId, safe, promptNumber, ts, Date.parse(ts) || Date.now());
73
+ return true;
74
+ }
75
+
76
+ function importToolPair(db, toolUse, toolResult, project) {
77
+ const sessionId = toolUse.sessionId || 'imported';
78
+ const ts = toolUse.timestamp || new Date().toISOString();
79
+ ensureSession(db, sessionId, project, ts);
80
+
81
+ const toolName = toolUse.name || 'unknown';
82
+ const type = TOOL_TO_TYPE[toolName] || 'change';
83
+ const inputJson = typeof toolUse.input === 'object'
84
+ ? JSON.stringify(toolUse.input).slice(0, 4000)
85
+ : String(toolUse.input ?? '').slice(0, 4000);
86
+ const resultText = typeof toolResult?.content === 'string'
87
+ ? toolResult.content
88
+ : JSON.stringify(toolResult?.content ?? '').slice(0, 4000);
89
+
90
+ const filesModified = (toolName === 'Edit' || toolName === 'Write' || toolName === 'NotebookEdit')
91
+ && toolUse.input?.file_path
92
+ ? [toolUse.input.file_path] : [];
93
+ const filesRead = toolName === 'Read' && toolUse.input?.file_path
94
+ ? [toolUse.input.file_path] : [];
95
+
96
+ const safe = scrubRecord('observations', {
97
+ title: `${toolName}: ${(toolUse.input?.command || toolUse.input?.file_path || '').slice(0, 80)}`,
98
+ subtitle: '',
99
+ text: `${inputJson}\n---\n${resultText}`,
100
+ narrative: '',
101
+ concepts: '',
102
+ facts: '',
103
+ lesson_learned: null,
104
+ search_aliases: null,
105
+ });
106
+
107
+ db.prepare(`
108
+ INSERT INTO observations
109
+ (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, created_at, created_at_epoch)
110
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
111
+ `).run(
112
+ sessionId, project, safe.text, type, safe.title, safe.subtitle,
113
+ safe.narrative, safe.concepts, safe.facts,
114
+ JSON.stringify(filesRead), JSON.stringify(filesModified),
115
+ 1, ts, Date.parse(ts) || Date.now(),
116
+ );
117
+ return true;
118
+ }
119
+
120
+ /**
121
+ * Import a single Claude Code JSONL transcript into the DB.
122
+ *
123
+ * @param {import('better-sqlite3').Database} db
124
+ * @param {string} path Absolute path to the .jsonl file
125
+ * @param {{project: string}} opts
126
+ * @returns {Promise<{prompts:number, observations:number, skipped:number, orphans:number}>}
127
+ */
128
+ export async function importJsonl(db, path, { project }) {
129
+ statSync(path);
130
+ const lines = readFileSync(path, 'utf8').split('\n');
131
+ const seenPrompts = new Set();
132
+ const seenObs = new Set();
133
+ // Pre-seed dedup sets from existing rows so a second run on the same file
134
+ // is a no-op even when the in-memory `seen*` Sets start empty.
135
+ for (const r of db.prepare('SELECT content_session_id, prompt_text, created_at FROM user_prompts').all()) {
136
+ seenPrompts.add(dedupKey([r.content_session_id, r.created_at, r.prompt_text]));
137
+ }
138
+ // Observations carry no tool_use_id column, so the only durable dedup
139
+ // signal we have is the per-process `seenObs` Set inside one importJsonl
140
+ // call. Across calls we rely on the second importToolPair attempting an
141
+ // INSERT that would land — we guard re-runs by also checking for an
142
+ // existing (memory_session_id, created_at, title) match below.
143
+ //
144
+ // Dual-key layering: `seenObs` tracks the `existing:<title>:<ts>` form
145
+ // (cross-call idempotency, seeded from the DB). Per-call dedup uses
146
+ // `seenToolUseIds` keyed on `(sessionId, tool_use_id)` at the gate. The
147
+ // two key shapes never share a value — both checks must run.
148
+ for (const r of db.prepare('SELECT memory_session_id, title, created_at FROM observations').all()) {
149
+ // Use the stored title as a stand-in for tool_use_id when the prior run
150
+ // came from this importer. Title format `${toolName}: ${command|path}` is
151
+ // stable across re-runs of the same fixture.
152
+ seenObs.add(dedupKey([r.memory_session_id, `existing:${r.title}:${r.created_at}`]));
153
+ }
154
+
155
+ const pendingToolUse = new Map();
156
+ let prompts = 0, observations = 0, skipped = 0;
157
+
158
+ // Snapshot importToolPair so we can wrap it with a per-run uniqueness
159
+ // check that hits both in-call and cross-call dedup. (Inline because we
160
+ // only need it in this function.)
161
+ const seenToolUseIds = new Set();
162
+ const tryImportToolPair = (useEv, resultEv) => {
163
+ const sessionId = useEv.sessionId || 'imported';
164
+ const useId = useEv.tool_use_id || useEv.id || '';
165
+ const callKey = dedupKey([sessionId, useId]);
166
+ if (seenToolUseIds.has(callKey)) return false;
167
+ seenToolUseIds.add(callKey);
168
+
169
+ // Cross-call dedup: synthesize the title the previous run would have
170
+ // written and check the seenObs set seeded from the DB.
171
+ const toolName = useEv.name || 'unknown';
172
+ const titlePreview = `${toolName}: ${(useEv.input?.command || useEv.input?.file_path || '').slice(0, 80)}`;
173
+ const ts = useEv.timestamp || new Date().toISOString();
174
+ const crossKey = dedupKey([sessionId, `existing:${titlePreview}:${ts}`]);
175
+ if (seenObs.has(crossKey)) return false;
176
+
177
+ return importToolPair(db, useEv, resultEv, project);
178
+ };
179
+
180
+ const tx = db.transaction(() => {
181
+ for (const line of lines) {
182
+ if (!line.trim()) continue;
183
+ const ev = parseLine(line);
184
+ if (!ev) { skipped++; continue; }
185
+ if (ev.type === 'user') {
186
+ if (importPrompt(db, ev, project, seenPrompts)) prompts++; else skipped++;
187
+ } else if (ev.type === 'assistant' && Array.isArray(ev.message?.content)) {
188
+ for (const part of ev.message.content) {
189
+ if (part.type === 'tool_use') {
190
+ pendingToolUse.set(part.id, { ...ev, ...part });
191
+ }
192
+ }
193
+ } else if (ev.type === 'tool_result') {
194
+ const useEv = pendingToolUse.get(ev.tool_use_id);
195
+ if (useEv) {
196
+ if (tryImportToolPair(useEv, ev)) observations++;
197
+ pendingToolUse.delete(ev.tool_use_id);
198
+ } else {
199
+ skipped++;
200
+ }
201
+ } else {
202
+ skipped++;
203
+ }
204
+ }
205
+ });
206
+ tx();
207
+
208
+ // Orphan tool_use fallback: persist tool_use events that never paired with
209
+ // a tool_result (truncated transcript / killed Claude Code session).
210
+ let orphans = 0;
211
+ if (pendingToolUse.size > 0) {
212
+ const tx2 = db.transaction(() => {
213
+ for (const [, useEv] of pendingToolUse) {
214
+ const fauxResult = {
215
+ content: '[tool_use without result — transcript truncated]',
216
+ timestamp: useEv.timestamp,
217
+ };
218
+ if (tryImportToolPair(useEv, fauxResult)) orphans++;
219
+ }
220
+ });
221
+ tx2();
222
+ }
223
+
224
+ return { prompts, observations, skipped, orphans };
225
+ }
@@ -0,0 +1,63 @@
1
+ // claude-mem-lite: per-table scrub helper. Applies scrubSecrets to the known
2
+ // text fields of a table row. Numeric / JSON-blob / id fields are passed
3
+ // through untouched.
4
+ //
5
+ // Failsafe policy: when the table is unknown, scrub every string field by
6
+ // default. Newly added tables stay safe even before TEXT_FIELDS_BY_TABLE is
7
+ // updated — over-scrubbing is the safe direction; under-scrubbing leaks.
8
+ //
9
+ // JSON-stringified array fields (e.g. session_handoffs.key_files,
10
+ // session_handoffs.match_keywords-when-array) are NOT listed here — running
11
+ // scrubSecrets over the JSON string can rewrite quoted values and break
12
+ // downstream JSON.parse. Pre-scrub each element upstream of the
13
+ // JSON.stringify call instead.
14
+
15
+ import { scrubSecrets } from '../secret-scrub.mjs';
16
+
17
+ export const TEXT_FIELDS_BY_TABLE = {
18
+ observations: [
19
+ 'title', 'subtitle', 'text', 'narrative',
20
+ 'concepts', 'facts', 'lesson_learned', 'search_aliases',
21
+ ],
22
+ session_summaries: [
23
+ 'request', 'investigated', 'learned',
24
+ 'completed', 'next_steps', 'remaining_items', 'notes',
25
+ 'lessons', 'key_decisions',
26
+ ],
27
+ session_handoffs: [
28
+ 'working_on', 'completed', 'unfinished',
29
+ // Excluded:
30
+ // key_files — JSON.stringify(array); pre-scrub elements at call site
31
+ // match_keywords — currently a space-joined plain string; keeping it
32
+ // here would scrub safely, but the value is built from
33
+ // tokenizeHandoff() output (alphanumeric tokens only),
34
+ // so secrets cannot survive the upstream tokenizer.
35
+ // Excluded to avoid double-work + future-proof against
36
+ // a refactor that switches to JSON.stringify.
37
+ // key_decisions is kept: call site uses '\n'.join (plain string), and
38
+ // decision titles can carry secrets verbatim (LLM output).
39
+ 'key_decisions',
40
+ ],
41
+ };
42
+
43
+ /**
44
+ * Scrub the text fields of a record before INSERT.
45
+ * Returns a shallow copy with string text-fields scrubbed; the input object
46
+ * is left untouched. Non-string values (numbers, null, JSON blobs the caller
47
+ * has already stringified) flow through unchanged.
48
+ */
49
+ export function scrubRecord(table, row) {
50
+ if (!row || typeof row !== 'object') return row;
51
+ const fields = TEXT_FIELDS_BY_TABLE[table];
52
+ const out = { ...row };
53
+ if (fields) {
54
+ for (const f of fields) {
55
+ if (typeof out[f] === 'string') out[f] = scrubSecrets(out[f]);
56
+ }
57
+ } else {
58
+ for (const k of Object.keys(out)) {
59
+ if (typeof out[k] === 'string') out[k] = scrubSecrets(out[k]);
60
+ }
61
+ }
62
+ return out;
63
+ }
package/mem-cli.mjs CHANGED
@@ -14,6 +14,7 @@ import { autoBoostIfNeeded, reRankWithContext, markSuperseded } from './server-i
14
14
  import { searchObservationsHybrid, findFtsAnchor } from './search-engine.mjs';
15
15
  import { ensureRegistryDb, upsertResource } from './registry.mjs';
16
16
  import { searchResources } from './registry-retriever.mjs';
17
+ import { scrubRecord } from './lib/scrub-record.mjs';
17
18
  import { optimizePreview, optimizeRun } from './hook-optimize.mjs';
18
19
  import { buildSessionContextLines } from './hook-context.mjs';
19
20
  import { cmdAdopt, cmdUnadopt } from './adopt-cli.mjs';
@@ -1765,8 +1766,11 @@ function cmdCompress(db, args) {
1765
1766
  VALUES (?, ?, ?, ?, ?, 'active')
1766
1767
  `).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
1767
1768
 
1769
+ // Defense-in-depth: source rows already scrubbed at original ingest, but
1770
+ // the new compressed narrative is constructed here and re-persisted.
1771
+ const safe = scrubRecord('observations', { text: narrative, title, narrative });
1768
1772
  const summaryResult = insertSummary.run(
1769
- sessionId, proj, narrative, dominantType, title, narrative,
1773
+ sessionId, proj, safe.text, dominantType, safe.title, safe.narrative,
1770
1774
  medianDate.toISOString(), medianEpoch
1771
1775
  );
1772
1776
  const summaryId = Number(summaryResult.lastInsertRowid);
@@ -2414,6 +2418,9 @@ Commands:
2414
2418
  remove Remove resource --name N --resource-type T
2415
2419
  reindex Rebuild FTS5 index
2416
2420
 
2421
+ import-jsonl <file-or-dir> Import Claude Code JSONL transcripts (cold-start backfill)
2422
+ --project P Project name (default: inferred from cwd)
2423
+
2417
2424
  activity <action> Non-memdir event log (v2.31) — bugfix/lesson/bug/discovery/etc.
2418
2425
  save --type T "<title>" [--body "<text>"] [--files f1,f2] [--file path] [--importance 1-3] [--project P]
2419
2426
  search "<query>" Search events [--type T] [--limit N] [--project P]
@@ -2499,6 +2506,57 @@ async function cmdImport(argv) {
2499
2506
  }
2500
2507
  }
2501
2508
 
2509
+ // ─── Import (Claude Code JSONL transcript — cold-start backfill) ─────────────
2510
+
2511
+ async function cmdImportJsonl(db, argv) {
2512
+ const { positional, flags } = parseArgs(argv);
2513
+ const target = positional[0];
2514
+ if (!target) {
2515
+ fail('[mem] Usage: claude-mem-lite import-jsonl <file-or-dir> [--project <name>]');
2516
+ return;
2517
+ }
2518
+
2519
+ const project = flags.project || inferProject();
2520
+ const fs = await import('fs');
2521
+ const { join: pjoin, resolve } = await import('path');
2522
+ const abs = resolve(target);
2523
+
2524
+ let files = [];
2525
+ let st;
2526
+ try { st = fs.statSync(abs); }
2527
+ catch (e) { fail(`[mem] Cannot stat ${abs}: ${e.message}`); return; }
2528
+
2529
+ if (st.isDirectory()) {
2530
+ const walk = (dir) => {
2531
+ for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
2532
+ const p = pjoin(dir, e.name);
2533
+ if (e.isDirectory()) walk(p);
2534
+ else if (e.isFile() && p.endsWith('.jsonl')) files.push(p);
2535
+ }
2536
+ };
2537
+ walk(abs);
2538
+ } else {
2539
+ files = [abs];
2540
+ }
2541
+
2542
+ if (files.length === 0) { out('[mem] No .jsonl files found.'); return; }
2543
+
2544
+ const { importJsonl } = await import('./lib/import-jsonl.mjs');
2545
+ let totalPrompts = 0, totalObs = 0, totalSkip = 0, totalOrphans = 0;
2546
+ for (const f of files) {
2547
+ const r = await importJsonl(db, f, { project });
2548
+ totalPrompts += r.prompts;
2549
+ totalObs += r.observations;
2550
+ totalSkip += r.skipped;
2551
+ totalOrphans += r.orphans || 0;
2552
+ out(`[mem] ${f}: +${r.prompts} prompts, +${r.observations} observations, ${r.orphans || 0} orphan tool_use, ${r.skipped} skipped`);
2553
+ }
2554
+ out(`[mem] Total: ${totalPrompts} prompts, ${totalObs} observations, ${totalOrphans} orphan tool_use, ${totalSkip} skipped from ${files.length} file(s).`);
2555
+ if (totalPrompts > 0 || totalObs > 0) {
2556
+ out(`[mem] Try: claude-mem-lite recent 5 --project ${project}`);
2557
+ }
2558
+ }
2559
+
2502
2560
  // ─── Enrich ─────────────────────────────────────────────────────────────────
2503
2561
 
2504
2562
  async function cmdEnrich(argv) {
@@ -2682,6 +2740,7 @@ export async function run(argv) {
2682
2740
  case 'browse': cmdBrowse(db, cmdArgs); break;
2683
2741
  case 'registry': cmdRegistry(db, cmdArgs); break;
2684
2742
  case 'import': await cmdImport(cmdArgs); break;
2743
+ case 'import-jsonl': await cmdImportJsonl(db, cmdArgs); break;
2685
2744
  case 'enrich': await cmdEnrich(cmdArgs); break;
2686
2745
  case 'doctor': await cmdDoctor(db, cmdArgs); break;
2687
2746
  case 'activity': await cmdActivity(db, cmdArgs); break;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.70.0",
3
+ "version": "2.71.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code",
5
5
  "type": "module",
6
6
  "packageManager": "npm@10.9.2",
@@ -39,6 +39,7 @@
39
39
  "hook-handoff.mjs",
40
40
  "hook-update.mjs",
41
41
  "hook-optimize.mjs",
42
+ "hook-precompact.mjs",
42
43
  "plugin-cache-guard.mjs",
43
44
  "memdir.mjs",
44
45
  "adopt-content.mjs",
@@ -64,6 +65,8 @@
64
65
  "lib/save-observation.mjs",
65
66
  "lib/deferred-work.mjs",
66
67
  "lib/upgrade-banner.mjs",
68
+ "lib/scrub-record.mjs",
69
+ "lib/import-jsonl.mjs",
67
70
  "cli/common.mjs",
68
71
  "cli/fts-check.mjs",
69
72
  "cli/doctor.mjs",
package/server.mjs CHANGED
@@ -11,6 +11,7 @@ import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
11
11
  import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
12
12
  import { reRankWithContext, markSuperseded, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
13
13
  import { searchObservationsHybrid, findFtsAnchor } from './search-engine.mjs';
14
+ import { scrubRecord } from './lib/scrub-record.mjs';
14
15
  import { effectiveQuiet } from './hook-shared.mjs';
15
16
  import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
16
17
  import { memSearchSchema, memRecentSchema, memTimelineSchema, memGetSchema, memDeleteSchema, memSaveSchema, memStatsSchema, memCompressSchema, memMaintainSchema, memOptimizeSchema, memUpdateSchema, memExportSchema, memRecallSchema, memFtsCheckSchema, memRegistrySchema, memBrowseSchema, memUseSchema, memDeferSchema, memDeferListSchema, memDeferDropSchema, tools as TOOL_DEFS } from './tool-schemas.mjs';
@@ -1248,8 +1249,11 @@ server.registerTool(
1248
1249
  VALUES (?, ?, ?, ?, ?, 'active')
1249
1250
  `).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
1250
1251
 
1252
+ // Defense-in-depth: source rows already scrubbed at original ingest,
1253
+ // but the new compressed narrative is constructed here and re-persisted.
1254
+ const safe = scrubRecord('observations', { text: narrative, title, narrative });
1251
1255
  const summaryResult = insertSummary.run(
1252
- sessionId, proj, narrative, dominantType, title, narrative,
1256
+ sessionId, proj, safe.text, dominantType, safe.title, safe.narrative,
1253
1257
  medianDate.toISOString(), medianEpoch
1254
1258
  );
1255
1259
  const summaryId = Number(summaryResult.lastInsertRowid);
package/source-files.mjs CHANGED
@@ -9,7 +9,7 @@ export const SOURCE_FILES = [
9
9
  'cli.mjs', 'server.mjs', 'server-internals.mjs', 'search-engine.mjs', 'tool-schemas.mjs',
10
10
  'hook.mjs', 'hook-shared.mjs', 'hook-llm.mjs', 'hook-memory.mjs', 'skip-tools.mjs',
11
11
  'hook-semaphore.mjs', 'hook-episode.mjs', 'hook-context.mjs', 'hook-handoff.mjs',
12
- 'hook-update.mjs', 'hook-optimize.mjs',
12
+ 'hook-update.mjs', 'hook-optimize.mjs', 'hook-precompact.mjs',
13
13
  'plugin-cache-guard.mjs',
14
14
  'haiku-client.mjs', 'utils.mjs', 'schema.mjs',
15
15
  'package.json', 'package-lock.json', 'skill.md',
@@ -70,6 +70,15 @@ export const SOURCE_FILES = [
70
70
  // module-level `process.exit(0)` side effects that abort vitest workers on
71
71
  // direct import. Statically imported by hook.mjs SessionStart handler.
72
72
  'lib/upgrade-banner.mjs',
73
+ // Per-table scrub helper for defense-in-depth at text-write INSERT paths.
74
+ // Statically imported by hook-llm, hook-handoff, hook-optimize, hook,
75
+ // mem-cli; reached transitively from server.mjs and cli.mjs.
76
+ 'lib/scrub-record.mjs',
77
+ // Cold-start backfill: parses ~/.claude/projects/<encoded>/<uuid>.jsonl
78
+ // transcripts into user_prompts + observations. Dynamic-imported by
79
+ // mem-cli.mjs::cmdImportJsonl; listed here so source-files-sync.test.mjs
80
+ // and the npm tarball ship it on every release.
81
+ 'lib/import-jsonl.mjs',
73
82
  ];
74
83
 
75
84
  /**