claude-mem-lite 2.69.0 → 2.71.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hook.mjs CHANGED
@@ -36,6 +36,7 @@ import {
36
36
  writePendingEntry, mergePendingEntries, episodeHasSignificantContent,
37
37
  } from './hook-episode.mjs';
38
38
  import { cleanupClaudeMdLegacyBlock, buildSessionContextLines } from './hook-context.mjs';
39
+ import { entry as preCompactEntry } from './hook-precompact.mjs';
39
40
  import {
40
41
  RUNTIME_DIR, EPISODE_BUFFER_SIZE, EPISODE_TIME_GAP_MS,
41
42
  SESSION_EXPIRY_MS, STALE_SESSION_MS, STALE_LOCK_MS,
@@ -43,6 +44,7 @@ import {
43
44
  spawnBackground, sweepOrphanEpisodeFiles,
44
45
  } from './hook-shared.mjs';
45
46
  import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation } from './hook-llm.mjs';
47
+ import { scrubRecord } from './lib/scrub-record.mjs';
46
48
  import { extractCitationsFromTranscript, bumpCitationAccess, computeCiteRecall } from './lib/citation-tracker.mjs';
47
49
  import { extractTailAssistantText, extractStructuredSummary } from './lib/summary-extractor.mjs';
48
50
  import { searchRelevantMemories, formatMemoryLine } from './hook-memory.mjs';
@@ -51,6 +53,7 @@ import { buildAndSaveHandoff, detectContinuationIntent, renderHandoffInjection,
51
53
  import { checkForUpdate } from './hook-update.mjs';
52
54
  import { handleLLMOptimize } from './hook-optimize.mjs';
53
55
  import { silentAutoAdopt, hasAutoAdoptMarker } from './adopt-cli.mjs';
56
+ import { emitV270UpgradeBanner } from './lib/upgrade-banner.mjs';
54
57
  // plugin-cache-guard.mjs loaded dynamically — pre-2.31.2 installs that auto-upgraded
55
58
  // from an older hook-update.mjs SOURCE_FILES (which did not list this module) would
56
59
  // crash on static import. Degrade gracefully to no-op when the module is absent.
@@ -444,7 +447,10 @@ async function handleStop() {
444
447
  WHERE memory_session_id = ? AND COALESCE(compressed_into, 0) = 0
445
448
  ORDER BY created_at_epoch DESC LIMIT 5
446
449
  `).all(sessionId);
447
- const fastRequest = truncate(firstPrompt?.prompt_text || '', 200);
450
+ // Raw values flow into scrubRecord below; truncation at .run() site
451
+ // so secrets straddling the boundary still match scrubSecrets's
452
+ // length floors.
453
+ const fastRequestRaw = firstPrompt?.prompt_text || '';
448
454
  const obsCompleted = recentObs.map(o => o.title).filter(Boolean).join('; ');
449
455
 
450
456
  // Structural extraction from the assistant's tail message.
@@ -472,17 +478,23 @@ async function handleStop() {
472
478
  const finalRemaining = structuredNotDone;
473
479
  const finalNotes = structuredNotes || 'fast';
474
480
 
475
- if (fastRequest || finalCompleted || finalRemaining) {
481
+ if (fastRequestRaw || finalCompleted || finalRemaining) {
476
482
  const now = new Date();
483
+ const safe = scrubRecord('session_summaries', {
484
+ request: fastRequestRaw,
485
+ completed: finalCompleted,
486
+ remaining_items: finalRemaining,
487
+ notes: finalNotes,
488
+ });
477
489
  db.prepare(`
478
490
  INSERT INTO session_summaries
479
491
  (memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, created_at, created_at_epoch)
480
492
  VALUES (?, ?, ?, '', '', ?, '', ?, '[]', '[]', ?, ?, ?)
481
493
  `).run(
482
- sessionId, project, fastRequest,
483
- truncate(finalCompleted, 600),
484
- truncate(finalRemaining, 600),
485
- truncate(finalNotes, 400),
494
+ sessionId, project, truncate(safe.request, 200),
495
+ truncate(safe.completed, 600),
496
+ truncate(safe.remaining_items, 600),
497
+ truncate(safe.notes, 400),
486
498
  now.toISOString(), now.getTime()
487
499
  );
488
500
  }
@@ -946,26 +958,34 @@ async function handleSessionStart() {
946
958
  ORDER BY created_at_epoch DESC LIMIT 5
947
959
  `).all(prevSessionId);
948
960
 
949
- const fastRequest = truncate(firstPrompt?.prompt_text || '', 200);
950
- const fastCompleted = prevObs.map(o => o.title).filter(Boolean).join('; ');
961
+ // Raw values flow into scrubRecord; truncation deferred to .run() so
962
+ // secrets straddling the truncation boundary still match scrubSecrets
963
+ // regex length floors.
964
+ const fastRequestRaw = firstPrompt?.prompt_text || '';
965
+ const fastCompletedRaw = prevObs.map(o => o.title).filter(Boolean).join('; ');
951
966
 
952
967
  // Infer remaining_items from handoff unfinished (already built above at line 476)
953
- let fastRemaining = '';
968
+ let fastRemainingRaw = '';
954
969
  if (prevClearHandoff?.unfinished) {
955
- fastRemaining = truncate(extractUnfinishedSummary(prevClearHandoff.unfinished, 0), 200);
970
+ fastRemainingRaw = extractUnfinishedSummary(prevClearHandoff.unfinished, 0);
956
971
  }
957
972
  // Fallback: episode errors
958
- if (!fastRemaining && episodeSnapshot?.entries) {
973
+ if (!fastRemainingRaw && episodeSnapshot?.entries) {
959
974
  const errors = episodeSnapshot.entries.filter(e => e.isError).map(e => e.desc).filter(Boolean);
960
- if (errors.length > 0) fastRemaining = truncate(errors.join('; '), 200);
975
+ if (errors.length > 0) fastRemainingRaw = errors.join('; ');
961
976
  }
962
977
 
963
- if (fastRequest || fastCompleted) {
978
+ if (fastRequestRaw || fastCompletedRaw) {
979
+ const safe = scrubRecord('session_summaries', {
980
+ request: fastRequestRaw,
981
+ completed: fastCompletedRaw,
982
+ remaining_items: fastRemainingRaw,
983
+ });
964
984
  db.prepare(`
965
985
  INSERT INTO session_summaries
966
986
  (memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, created_at, created_at_epoch)
967
987
  VALUES (?, ?, ?, '', '', ?, '', ?, '[]', '[]', 'fast', ?, ?)
968
- `).run(prevSessionId, prevProject || project, fastRequest, truncate(fastCompleted, 300), fastRemaining, now.toISOString(), now.getTime());
988
+ `).run(prevSessionId, prevProject || project, truncate(safe.request, 200), truncate(safe.completed, 300), truncate(safe.remaining_items, 200), now.toISOString(), now.getTime());
969
989
  }
970
990
  } catch (e) { debugCatch(e, 'session-start-fast-summary'); }
971
991
  }
@@ -1022,14 +1042,20 @@ async function handleSessionStart() {
1022
1042
  ORDER BY created_at_epoch DESC LIMIT 5
1023
1043
  `).all(recentSession.content_session_id);
1024
1044
 
1025
- const fr = truncate(fp?.prompt_text || '', 200);
1026
- const fc = po.map(o => o.title).filter(Boolean).join('; ');
1027
- if (fr || fc) {
1045
+ // Raw values into scrubRecord; truncation at .run() preserves
1046
+ // straddling-secret detection (per privacy review).
1047
+ const frRaw = fp?.prompt_text || '';
1048
+ const fcRaw = po.map(o => o.title).filter(Boolean).join('; ');
1049
+ if (frRaw || fcRaw) {
1050
+ const safe = scrubRecord('session_summaries', {
1051
+ request: frRaw,
1052
+ completed: fcRaw,
1053
+ });
1028
1054
  db.prepare(`
1029
1055
  INSERT INTO session_summaries
1030
1056
  (memory_session_id, project, request, investigated, learned, completed, next_steps, remaining_items, files_read, files_edited, notes, created_at, created_at_epoch)
1031
1057
  VALUES (?, ?, ?, '', '', ?, '', '', '[]', '[]', 'fast', ?, ?)
1032
- `).run(recentSession.content_session_id, project, fr, truncate(fc, 300), now.toISOString(), now.getTime());
1058
+ `).run(recentSession.content_session_id, project, truncate(safe.request, 200), truncate(safe.completed, 300), now.toISOString(), now.getTime());
1033
1059
  }
1034
1060
  }
1035
1061
  }
@@ -1073,6 +1099,13 @@ async function handleSessionStart() {
1073
1099
  // CLAUDE.md by pre-v2.30 installs. Idempotent no-op afterwards.
1074
1100
  cleanupClaudeMdLegacyBlock();
1075
1101
 
1102
+ // v2.70.0 one-shot upgrade banner: notify users on first SessionStart per
1103
+ // project that the `### Deferred Work` block now reads from the
1104
+ // deferred_work table (was: high-importance observations in v2.69.x).
1105
+ // Idempotent via marker file; subsequent SessionStarts are silent.
1106
+ try { emitV270UpgradeBanner({ project, runtimeDir: RUNTIME_DIR }); }
1107
+ catch (e) { debugCatch(e, 'session-start-v270-banner'); }
1108
+
1076
1109
  // Pre-load TF-IDF vocabulary cache for this session (from DB, ~1ms)
1077
1110
  try { getVocabulary(db); } catch (e) { debugCatch(e, 'session-start-vocab'); }
1078
1111
 
@@ -1095,6 +1128,28 @@ async function handleSessionStart() {
1095
1128
  }
1096
1129
  }
1097
1130
 
1131
+ // ─── PreCompact Handler ──────────────────────────────────────────────────────
1132
+ // Fires immediately before Claude Code auto-compaction begins. Re-emits the
1133
+ // memory context block on stdout so the summarizer sees it during compaction.
1134
+ // SessionStart's "compact" matcher fires AFTER compaction — by then the
1135
+ // previous-turn injection has already been collapsed. Pure read; no DB writes.
1136
+
1137
+ async function handlePreCompactDispatch() {
1138
+ let hookData = {};
1139
+ try {
1140
+ const raw = await readStdin();
1141
+ hookData = JSON.parse(raw.text);
1142
+ } catch { /* stdin unavailable — emit anyway with whatever we can infer */ }
1143
+
1144
+ const db = openDb();
1145
+ if (!db) return;
1146
+ try {
1147
+ await preCompactEntry(db, hookData);
1148
+ } finally {
1149
+ try { db.close(); } catch {}
1150
+ }
1151
+ }
1152
+
1098
1153
  // ─── UserPromptSubmit Handler ────────────────────────────────────────────────
1099
1154
 
1100
1155
  async function handleUserPrompt() {
@@ -1264,11 +1319,15 @@ function handleAutoCompress() {
1264
1319
  (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
1265
1320
  VALUES (?,?,?,?,?,'active')`
1266
1321
  ).run(sessionId, sessionId, proj, now.toISOString(), now.getTime());
1322
+ // Defense-in-depth: title/narrative are derived from already-stored
1323
+ // obs.title, but those rows pre-date the central scrub policy in some
1324
+ // cases. Re-scrub at the persistence boundary.
1325
+ const safe = scrubRecord('observations', { text: narrative, title, narrative });
1267
1326
  const summaryResult = db.prepare(`INSERT INTO observations
1268
1327
  (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts,
1269
1328
  files_read, files_modified, importance, created_at, created_at_epoch)
1270
1329
  VALUES (?,?,?,?,?,'',?,'','','[]','[]',2,?,?)`
1271
- ).run(sessionId, proj, narrative, dominantType, title, narrative, new Date(medianEpoch).toISOString(), medianEpoch);
1330
+ ).run(sessionId, proj, safe.text, dominantType, safe.title, safe.narrative, new Date(medianEpoch).toISOString(), medianEpoch);
1272
1331
  const summaryId = Number(summaryResult.lastInsertRowid);
1273
1332
  const obsIds = obs.map(o => o.id);
1274
1333
  db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${obsIds.map(() => '?').join(',')})`)
@@ -1358,6 +1417,7 @@ try {
1358
1417
  switch (event) {
1359
1418
  case 'post-tool-use': await handlePostToolUse(); break;
1360
1419
  case 'session-start': await handleSessionStart(); break;
1420
+ case 'pre-compact': await handlePreCompactDispatch(); break;
1361
1421
  case 'stop': await handleStop(); break;
1362
1422
  case 'user-prompt': await handleUserPrompt(); break;
1363
1423
  case 'llm-episode': await handleLLMEpisode(); break;
package/hooks/hooks.json CHANGED
@@ -18,6 +18,18 @@
18
18
  ]
19
19
  }
20
20
  ],
21
+ "PreCompact": [
22
+ {
23
+ "matcher": "*",
24
+ "hooks": [
25
+ {
26
+ "type": "command",
27
+ "command": "node \"${CLAUDE_PLUGIN_ROOT}/hook.mjs\" pre-compact",
28
+ "timeout": 5
29
+ }
30
+ ]
31
+ }
32
+ ],
21
33
  "PreToolUse": [
22
34
  {
23
35
  "matcher": "Edit|Write|NotebookEdit|Read",
@@ -0,0 +1,171 @@
1
+ // claude-mem-lite — deferred_work data layer
2
+ // Pure-data CRUD + ordinal resolver + transactional closure helper.
3
+ // Decoupled from observations table: different lifecycle, different scoring.
4
+
5
+ /**
6
+ * Insert a new open deferred_work row.
7
+ * @param {Database} db Opened DB
8
+ * @param {object} args
9
+ * @param {string} args.project Required project name
10
+ * @param {string} args.title Required one-line subject
11
+ * @param {number} [args.priority=2] 1=low, 2=normal, 3=urgent
12
+ * @param {string} [args.detail] Optional longer description
13
+ * @param {string[]} [args.files] Optional file paths
14
+ * @param {string} [args.source_session_id] Mem session id
15
+ * @param {number} [args.source_prompt_id] user_prompts.id
16
+ * @returns {{id: number}} Inserted row id
17
+ */
18
+ export function insertDeferred(db, args) {
19
+ const { project, title, priority = 2, detail = null, files = null,
20
+ source_session_id = null, source_prompt_id = null } = args;
21
+ // source_session_id / source_prompt_id: forward-compat for v2.71+ defer-detector
22
+ // hook (anchor a deferred item to the originating prompt). v1 inserts NULL.
23
+ if (!project || typeof project !== 'string') throw new Error('project required');
24
+ if (!title || typeof title !== 'string') throw new Error('title required');
25
+ if (![1, 2, 3].includes(priority)) throw new Error('priority must be 1, 2, or 3');
26
+ const stmt = db.prepare(`
27
+ INSERT INTO deferred_work
28
+ (project, title, detail, priority, status, created_at_epoch,
29
+ source_session_id, source_prompt_id, files)
30
+ VALUES (?, ?, ?, ?, 'open', ?, ?, ?, ?)
31
+ `);
32
+ const r = stmt.run(
33
+ project, title, detail, priority, Date.now(),
34
+ source_session_id, source_prompt_id,
35
+ files ? JSON.stringify(files) : null,
36
+ );
37
+ return { id: Number(r.lastInsertRowid) };
38
+ }
39
+
40
+ /**
41
+ * List open items in a project with computed per-project ordinal.
42
+ * Ordinal is dynamic — recomputed each call by ROW_NUMBER over open rows
43
+ * sorted (priority DESC, created_at_epoch ASC). When item-1 closes, item-2
44
+ * becomes the new item-1.
45
+ * @param {Database} db
46
+ * @param {string} project
47
+ * @param {number} [limit=10]
48
+ * @returns {Array<{id, project, title, detail, priority, status, created_at_epoch, ordinal}>}
49
+ */
50
+ export function listOpenWithOrdinal(db, project, limit = 10) {
51
+ return db.prepare(`
52
+ SELECT id, project, title, detail, priority, status, created_at_epoch,
53
+ ROW_NUMBER() OVER (ORDER BY priority DESC, created_at_epoch ASC) AS ordinal
54
+ FROM deferred_work
55
+ WHERE project = ? AND status = 'open'
56
+ ORDER BY priority DESC, created_at_epoch ASC
57
+ LIMIT ?
58
+ `).all(project, limit);
59
+ }
60
+
61
+ /**
62
+ * Set status='dropped' with a non-empty reason. No-op when status is not 'open'.
63
+ * @returns {{changed: number}} 1 if updated, 0 if not found or not open.
64
+ */
65
+ export function dropDeferred(db, id, reason) {
66
+ if (typeof reason !== 'string' || reason.trim().length === 0) {
67
+ throw new Error('drop reason required (non-empty string)');
68
+ }
69
+ const r = db.prepare(`
70
+ UPDATE deferred_work
71
+ SET status='dropped', closed_at_epoch=?, drop_reason=?
72
+ WHERE id=? AND status='open'
73
+ `).run(Date.now(), reason.trim(), id);
74
+ return { changed: r.changes };
75
+ }
76
+
77
+ /**
78
+ * Resolve mixed ordinal (int) + raw-id ("D#<n>") tokens to real deferred_work
79
+ * ids, validated against caller project + status='open'.
80
+ *
81
+ * - bare integer N → ordinal-within-project (uses same ROW_NUMBER as listOpenWithOrdinal)
82
+ * - "D#<n>" string → raw deferred_work.id; must belong to caller project AND be open
83
+ *
84
+ * @param {Database} db
85
+ * @param {string} project Caller project (FK guard)
86
+ * @param {Array<number|string>} tokens Mixed input
87
+ * @returns {number[]} Real deferred_work ids in input order
88
+ * @throws {Error} On unresolvable input — error message names the offending token
89
+ */
90
+ export function resolveDeferredIds(db, project, tokens) {
91
+ if (!Array.isArray(tokens)) throw new Error('tokens must be an array');
92
+ // Pre-load open list once for ordinal resolution (ROW_NUMBER snapshot stable
93
+ // within this call so [1, 2] resolves consistently).
94
+ const open = db.prepare(`
95
+ SELECT id, ROW_NUMBER() OVER (ORDER BY priority DESC, created_at_epoch ASC) AS ordinal
96
+ FROM deferred_work
97
+ WHERE project = ? AND status = 'open'
98
+ `).all(project);
99
+ const ordinalToId = new Map(open.map(r => [r.ordinal, r.id]));
100
+
101
+ const getRow = db.prepare(`SELECT id, project, status FROM deferred_work WHERE id = ?`);
102
+ const seen = new Set();
103
+ const resolved = [];
104
+
105
+ for (const t of tokens) {
106
+ let id;
107
+ if (Number.isInteger(t)) {
108
+ id = ordinalToId.get(t);
109
+ if (id === undefined) {
110
+ throw new Error(`ordinal ${t} has no corresponding open deferred item in project "${project}" (open count: ${open.length})`);
111
+ }
112
+ } else if (typeof t === 'string') {
113
+ const m = /^D#(\d+)$/.exec(t.trim());
114
+ if (!m) throw new Error(`invalid token "${t}" — expected D#N or integer ordinal`);
115
+ id = parseInt(m[1], 10);
116
+ const row = getRow.get(id);
117
+ if (!row) throw new Error(`D#${id} not found`);
118
+ if (row.project !== project) {
119
+ throw new Error(`D#${id} belongs to project "${row.project}", not "${project}"`);
120
+ }
121
+ if (row.status !== 'open') {
122
+ throw new Error(`D#${id} status is "${row.status}", cannot close (only 'open' items)`);
123
+ }
124
+ } else {
125
+ throw new Error(`invalid token type ${typeof t} — expected D#N or integer ordinal`);
126
+ }
127
+ if (seen.has(id)) throw new Error(`duplicate token resolves to id ${id}`);
128
+ seen.add(id);
129
+ resolved.push(id);
130
+ }
131
+ return resolved;
132
+ }
133
+
134
+ /**
135
+ * Close a set of deferred items by id, all-or-nothing.
136
+ *
137
+ * Wraps the UPDATE loop in an internal transaction so that any per-row failure
138
+ * rolls back prior rows. better-sqlite3's `.transaction()` composes with an
139
+ * outer caller-managed transaction via SAVEPOINT — Task 5's wider closure flow
140
+ * (obs INSERT + closeDeferredItems) wraps both calls in one outer transaction
141
+ * to guarantee atomicity across the obs row and the deferred-work UPDATEs.
142
+ *
143
+ * @param {Database} db
144
+ * @param {number[]} ids Already-resolved real ids (use resolveDeferredIds first)
145
+ * @param {number} closingObsId observations.id that proves closure
146
+ * @throws {Error} If any id is not currently open (lookup-based safety net)
147
+ */
148
+ export function closeDeferredItems(db, ids, closingObsId) {
149
+ if (!Array.isArray(ids) || ids.length === 0) return;
150
+ if (!Number.isInteger(closingObsId) || closingObsId <= 0) {
151
+ throw new Error('closingObsId must be a positive integer');
152
+ }
153
+ // Defense-in-depth: even if caller already validated via resolveDeferredIds,
154
+ // re-check status here (caller may have done resolution earlier in the same
155
+ // transaction without holding a lock).
156
+ const stmt = db.prepare(`
157
+ UPDATE deferred_work
158
+ SET status='done', closed_at_epoch=?, closed_by_obs_id=?
159
+ WHERE id=? AND status='open'
160
+ `);
161
+ const now = Date.now();
162
+ const tx = db.transaction((idList) => {
163
+ for (const id of idList) {
164
+ const r = stmt.run(now, closingObsId, id);
165
+ if (r.changes !== 1) {
166
+ throw new Error(`closeDeferredItems: id ${id} was not in 'open' status (changes=${r.changes})`);
167
+ }
168
+ }
169
+ });
170
+ tx(ids);
171
+ }
package/lib/git-state.mjs CHANGED
@@ -6,10 +6,25 @@ import { execFileSync } from 'child_process';
6
6
 
7
7
  const GIT_TIMEOUT_MS = 1500;
8
8
 
9
+ // Strip inherited GIT_* env so child `git` operates on the requested `cwd`
10
+ // rather than a parent process's repo. Required when readGitState is called
11
+ // from contexts where GIT_DIR/GIT_INDEX_FILE/GIT_WORK_TREE/GIT_PREFIX leak in:
12
+ // pre-commit hooks running tests, hooks invoked under `git commit`, etc.
13
+ // Without this, headSha and `changed` reflect the parent's repo, not cwd's.
14
+ function buildCleanEnv() {
15
+ const env = { ...process.env };
16
+ delete env.GIT_DIR;
17
+ delete env.GIT_INDEX_FILE;
18
+ delete env.GIT_WORK_TREE;
19
+ delete env.GIT_PREFIX;
20
+ return env;
21
+ }
22
+
9
23
  function run(cmd, args, { cwd } = {}) {
10
24
  try {
11
25
  return execFileSync(cmd, args, {
12
26
  cwd,
27
+ env: buildCleanEnv(),
13
28
  encoding: 'utf8',
14
29
  timeout: GIT_TIMEOUT_MS,
15
30
  // Suppress git's own stderr noise (e.g. "fatal: not a git repository").
@@ -0,0 +1,225 @@
1
+ // claude-mem-lite: import a Claude Code JSONL transcript file into the
2
+ // memory DB. One transcript ≈ one Claude Code session; we map:
3
+ // user line -> user_prompts row
4
+ // tool_use+result -> observations row (matched by tool_use_id)
5
+ // anything else -> ignored
6
+ //
7
+ // Idempotent: re-running on the same file does not duplicate. Dedup keys
8
+ // are derived from full SHA-256 of the joined components. \x1f (ASCII unit
9
+ // separator) as join glue so adjacent components can't collide via inputs
10
+ // containing the separator. Truncating prompt_text would collapse rapid
11
+ // same-session "yes / next / 继续" replies into one observation.
12
+ //
13
+ // Orphan tool_use (truncated transcript: tool_use without matching
14
+ // tool_result) gets a fallback observation marked '[tool_use without
15
+ // result — transcript truncated]' so retrieval surfaces the truncation.
16
+
17
+ import { readFileSync, statSync } from 'fs';
18
+ import { createHash } from 'crypto';
19
+ import { scrubSecrets } from '../secret-scrub.mjs';
20
+ import { scrubRecord } from './scrub-record.mjs';
21
+
22
+ const TOOL_TO_TYPE = {
23
+ Edit: 'change', Write: 'change', NotebookEdit: 'change',
24
+ Read: 'discovery', Grep: 'discovery', Glob: 'discovery',
25
+ Bash: 'change', Task: 'discovery', Agent: 'discovery',
26
+ Skill: 'discovery', WebFetch: 'discovery', WebSearch: 'discovery',
27
+ };
28
+
29
+ function dedupKey(parts) {
30
+ return createHash('sha256').update(parts.join('\x1f')).digest('hex');
31
+ }
32
+
33
+ function parseLine(line) {
34
+ try { return JSON.parse(line); } catch { return null; }
35
+ }
36
+
37
+ function ensureSession(db, sessionId, project, ts) {
38
+ db.prepare(`
39
+ INSERT OR IGNORE INTO sdk_sessions
40
+ (content_session_id, memory_session_id, project, started_at, started_at_epoch, status)
41
+ VALUES (?, ?, ?, ?, ?, 'completed')
42
+ `).run(sessionId, sessionId, project, ts, Date.parse(ts) || Date.now());
43
+ }
44
+
45
+ function importPrompt(db, ev, project, seenPrompts) {
46
+ const text = typeof ev?.message?.content === 'string'
47
+ ? ev.message.content
48
+ : (Array.isArray(ev?.message?.content)
49
+ ? ev.message.content.filter(c => c?.type === 'text').map(c => c.text).join('\n')
50
+ : '');
51
+ if (!text) return false;
52
+ const sessionId = ev.sessionId || 'imported';
53
+ const ts = ev.timestamp || new Date().toISOString();
54
+ const safe = scrubSecrets(text.slice(0, 10000));
55
+ // Dedup key uses the scrubbed text so a re-run computes the same key as the
56
+ // first run (which persisted the scrubbed text). Keying on raw input would
57
+ // make idempotency fragile if the scrub policy changes.
58
+ const key = dedupKey([sessionId, ts, safe]);
59
+ if (seenPrompts.has(key)) return false;
60
+ seenPrompts.add(key);
61
+
62
+ ensureSession(db, sessionId, project, ts);
63
+ const bumped = db.prepare(
64
+ 'UPDATE sdk_sessions SET prompt_counter = COALESCE(prompt_counter, 0) + 1 WHERE content_session_id = ? RETURNING prompt_counter'
65
+ ).get(sessionId);
66
+ const promptNumber = bumped?.prompt_counter || 1;
67
+
68
+ db.prepare(`
69
+ INSERT OR IGNORE INTO user_prompts
70
+ (content_session_id, prompt_text, prompt_number, created_at, created_at_epoch)
71
+ VALUES (?, ?, ?, ?, ?)
72
+ `).run(sessionId, safe, promptNumber, ts, Date.parse(ts) || Date.now());
73
+ return true;
74
+ }
75
+
76
+ function importToolPair(db, toolUse, toolResult, project) {
77
+ const sessionId = toolUse.sessionId || 'imported';
78
+ const ts = toolUse.timestamp || new Date().toISOString();
79
+ ensureSession(db, sessionId, project, ts);
80
+
81
+ const toolName = toolUse.name || 'unknown';
82
+ const type = TOOL_TO_TYPE[toolName] || 'change';
83
+ const inputJson = typeof toolUse.input === 'object'
84
+ ? JSON.stringify(toolUse.input).slice(0, 4000)
85
+ : String(toolUse.input ?? '').slice(0, 4000);
86
+ const resultText = typeof toolResult?.content === 'string'
87
+ ? toolResult.content
88
+ : JSON.stringify(toolResult?.content ?? '').slice(0, 4000);
89
+
90
+ const filesModified = (toolName === 'Edit' || toolName === 'Write' || toolName === 'NotebookEdit')
91
+ && toolUse.input?.file_path
92
+ ? [toolUse.input.file_path] : [];
93
+ const filesRead = toolName === 'Read' && toolUse.input?.file_path
94
+ ? [toolUse.input.file_path] : [];
95
+
96
+ const safe = scrubRecord('observations', {
97
+ title: `${toolName}: ${(toolUse.input?.command || toolUse.input?.file_path || '').slice(0, 80)}`,
98
+ subtitle: '',
99
+ text: `${inputJson}\n---\n${resultText}`,
100
+ narrative: '',
101
+ concepts: '',
102
+ facts: '',
103
+ lesson_learned: null,
104
+ search_aliases: null,
105
+ });
106
+
107
+ db.prepare(`
108
+ INSERT INTO observations
109
+ (memory_session_id, project, text, type, title, subtitle, narrative, concepts, facts, files_read, files_modified, importance, created_at, created_at_epoch)
110
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
111
+ `).run(
112
+ sessionId, project, safe.text, type, safe.title, safe.subtitle,
113
+ safe.narrative, safe.concepts, safe.facts,
114
+ JSON.stringify(filesRead), JSON.stringify(filesModified),
115
+ 1, ts, Date.parse(ts) || Date.now(),
116
+ );
117
+ return true;
118
+ }
119
+
120
+ /**
121
+ * Import a single Claude Code JSONL transcript into the DB.
122
+ *
123
+ * @param {import('better-sqlite3').Database} db
124
+ * @param {string} path Absolute path to the .jsonl file
125
+ * @param {{project: string}} opts
126
+ * @returns {Promise<{prompts:number, observations:number, skipped:number, orphans:number}>}
127
+ */
128
+ export async function importJsonl(db, path, { project }) {
129
+ statSync(path);
130
+ const lines = readFileSync(path, 'utf8').split('\n');
131
+ const seenPrompts = new Set();
132
+ const seenObs = new Set();
133
+ // Pre-seed dedup sets from existing rows so a second run on the same file
134
+ // is a no-op even when the in-memory `seen*` Sets start empty.
135
+ for (const r of db.prepare('SELECT content_session_id, prompt_text, created_at FROM user_prompts').all()) {
136
+ seenPrompts.add(dedupKey([r.content_session_id, r.created_at, r.prompt_text]));
137
+ }
138
+ // Observations carry no tool_use_id column, so the only durable dedup
139
+ // signal we have is the per-process `seenObs` Set inside one importJsonl
140
+ // call. Across calls we rely on the second importToolPair attempting an
141
+ // INSERT that would land — we guard re-runs by also checking for an
142
+ // existing (memory_session_id, created_at, title) match below.
143
+ //
144
+ // Dual-key layering: `seenObs` tracks the `existing:<title>:<ts>` form
145
+ // (cross-call idempotency, seeded from the DB). Per-call dedup uses
146
+ // `seenToolUseIds` keyed on `(sessionId, tool_use_id)` at the gate. The
147
+ // two key shapes never share a value — both checks must run.
148
+ for (const r of db.prepare('SELECT memory_session_id, title, created_at FROM observations').all()) {
149
+ // Use the stored title as a stand-in for tool_use_id when the prior run
150
+ // came from this importer. Title format `${toolName}: ${command|path}` is
151
+ // stable across re-runs of the same fixture.
152
+ seenObs.add(dedupKey([r.memory_session_id, `existing:${r.title}:${r.created_at}`]));
153
+ }
154
+
155
+ const pendingToolUse = new Map();
156
+ let prompts = 0, observations = 0, skipped = 0;
157
+
158
+ // Snapshot importToolPair so we can wrap it with a per-run uniqueness
159
+ // check that hits both in-call and cross-call dedup. (Inline because we
160
+ // only need it in this function.)
161
+ const seenToolUseIds = new Set();
162
+ const tryImportToolPair = (useEv, resultEv) => {
163
+ const sessionId = useEv.sessionId || 'imported';
164
+ const useId = useEv.tool_use_id || useEv.id || '';
165
+ const callKey = dedupKey([sessionId, useId]);
166
+ if (seenToolUseIds.has(callKey)) return false;
167
+ seenToolUseIds.add(callKey);
168
+
169
+ // Cross-call dedup: synthesize the title the previous run would have
170
+ // written and check the seenObs set seeded from the DB.
171
+ const toolName = useEv.name || 'unknown';
172
+ const titlePreview = `${toolName}: ${(useEv.input?.command || useEv.input?.file_path || '').slice(0, 80)}`;
173
+ const ts = useEv.timestamp || new Date().toISOString();
174
+ const crossKey = dedupKey([sessionId, `existing:${titlePreview}:${ts}`]);
175
+ if (seenObs.has(crossKey)) return false;
176
+
177
+ return importToolPair(db, useEv, resultEv, project);
178
+ };
179
+
180
+ const tx = db.transaction(() => {
181
+ for (const line of lines) {
182
+ if (!line.trim()) continue;
183
+ const ev = parseLine(line);
184
+ if (!ev) { skipped++; continue; }
185
+ if (ev.type === 'user') {
186
+ if (importPrompt(db, ev, project, seenPrompts)) prompts++; else skipped++;
187
+ } else if (ev.type === 'assistant' && Array.isArray(ev.message?.content)) {
188
+ for (const part of ev.message.content) {
189
+ if (part.type === 'tool_use') {
190
+ pendingToolUse.set(part.id, { ...ev, ...part });
191
+ }
192
+ }
193
+ } else if (ev.type === 'tool_result') {
194
+ const useEv = pendingToolUse.get(ev.tool_use_id);
195
+ if (useEv) {
196
+ if (tryImportToolPair(useEv, ev)) observations++;
197
+ pendingToolUse.delete(ev.tool_use_id);
198
+ } else {
199
+ skipped++;
200
+ }
201
+ } else {
202
+ skipped++;
203
+ }
204
+ }
205
+ });
206
+ tx();
207
+
208
+ // Orphan tool_use fallback: persist tool_use events that never paired with
209
+ // a tool_result (truncated transcript / killed Claude Code session).
210
+ let orphans = 0;
211
+ if (pendingToolUse.size > 0) {
212
+ const tx2 = db.transaction(() => {
213
+ for (const [, useEv] of pendingToolUse) {
214
+ const fauxResult = {
215
+ content: '[tool_use without result — transcript truncated]',
216
+ timestamp: useEv.timestamp,
217
+ };
218
+ if (tryImportToolPair(useEv, fauxResult)) orphans++;
219
+ }
220
+ });
221
+ tx2();
222
+ }
223
+
224
+ return { prompts, observations, skipped, orphans };
225
+ }