claude-mem-lite 3.8.0 → 3.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/hook-llm.mjs +19 -0
- package/hook-update.mjs +19 -12
- package/hook.mjs +20 -24
- package/lib/citation-tracker.mjs +26 -14
- package/lib/dedup-constants.mjs +7 -0
- package/lib/err-sampler.mjs +18 -1
- package/lib/maintain-core.mjs +52 -1
- package/lib/search-core.mjs +4 -1
- package/mem-cli.mjs +9 -1
- package/package.json +1 -1
- package/registry.mjs +65 -5
- package/secret-scrub.mjs +28 -2
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"plugins": [
|
|
11
11
|
{
|
|
12
12
|
"name": "claude-mem-lite",
|
|
13
|
-
"version": "3.
|
|
13
|
+
"version": "3.9.1",
|
|
14
14
|
"source": "./",
|
|
15
15
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark)."
|
|
16
16
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.9.1",
|
|
4
4
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "sdsrss"
|
package/hook-llm.mjs
CHANGED
|
@@ -20,6 +20,7 @@ import {
|
|
|
20
20
|
} from './hook-shared.mjs';
|
|
21
21
|
import { EVENT_TYPES, saveEvent } from './lib/activity.mjs';
|
|
22
22
|
import { isNoiseObservation, capNoiseImportance, isLowYieldChangeObs } from './lib/low-signal-patterns.mjs';
|
|
23
|
+
import { episodeHasSignificantContent } from './hook-episode.mjs';
|
|
23
24
|
|
|
24
25
|
// T9: memdir-incompatible types live in the `events` table, not `observations`.
|
|
25
26
|
// Set lookup is O(1) — authoritative source is lib/activity.mjs::EVENT_TYPES.
|
|
@@ -467,6 +468,24 @@ export function buildDegradedTitle(episode) {
|
|
|
467
468
|
.trim();
|
|
468
469
|
}
|
|
469
470
|
|
|
471
|
+
// Best-effort SYNCHRONOUS persist of an episode's rule-based observation. Shared by
|
|
472
|
+
// the normal flush and the SIGTERM/SIGINT shutdown handler. The ep-flush-* file the
|
|
473
|
+
// shutdown handler writes has NO consumer (only spawnBackground-passed files are
|
|
474
|
+
// processed), so without this the in-flight episode is silently lost on abnormal
|
|
475
|
+
// termination — and spawning a detached child from a dying process is unreliable, so
|
|
476
|
+
// the save must be synchronous (audit #6). Never throws; returns the obs id or null.
|
|
477
|
+
export function saveEpisodeImmediate(episode, externalDb) {
|
|
478
|
+
try {
|
|
479
|
+
if (!episode || !Array.isArray(episode.entries) || episode.entries.length === 0) return null;
|
|
480
|
+
if (!episodeHasSignificantContent(episode)) return null;
|
|
481
|
+
const obs = buildImmediateObservation(episode);
|
|
482
|
+
return saveObservation(obs, episode.project, episode.sessionId, externalDb) || null;
|
|
483
|
+
} catch (e) {
|
|
484
|
+
debugCatch(e, 'saveEpisodeImmediate');
|
|
485
|
+
return null;
|
|
486
|
+
}
|
|
487
|
+
}
|
|
488
|
+
|
|
470
489
|
/**
|
|
471
490
|
* Build a rule-based observation from episode metadata for immediate DB persistence.
|
|
472
491
|
* Used as pre-save (before LLM) and as fallback when LLM is unavailable.
|
package/hook-update.mjs
CHANGED
|
@@ -440,20 +440,27 @@ async function fetchAssetBuffer(url) {
|
|
|
440
440
|
}
|
|
441
441
|
|
|
442
442
|
// I/O gate called from downloadAndInstall after validateExtractedTarball.
|
|
443
|
-
//
|
|
444
|
-
// embedded key
|
|
445
|
-
//
|
|
446
|
-
//
|
|
447
|
-
|
|
443
|
+
// Two regimes, switched by whether a public key is embedded:
|
|
444
|
+
// • No embedded key (the shipped default, RELEASE_PUBLIC_KEY=''): INERT —
|
|
445
|
+
// skipped-no-pubkey so an un-provisioned key can never brick auto-update.
|
|
446
|
+
// • Key embedded (signing active): FAIL CLOSED — a missing signature asset, a
|
|
447
|
+
// signature-asset fetch failure, or an invalid signature all return ok=false.
|
|
448
|
+
// Once we publish signed releases, an attacker who can publish a release or MITM
|
|
449
|
+
// the asset CDN must not bypass verification by stripping the signature assets
|
|
450
|
+
// (the tags-fallback path also sends assets:[]). A transient fetch failure only
|
|
451
|
+
// defers the install to the next ~6h poll, not a permanent brick. (audit P1 #5)
|
|
452
|
+
// The CLAUDE_MEM_SKIP_SIG_VERIFY escape hatch still forces a skip. publicKey is a
|
|
453
|
+
// param (defaulting to the embedded constant) only so tests can exercise both regimes.
|
|
454
|
+
export async function verifyReleaseAuthenticity(extractedDir, assets, publicKey = RELEASE_PUBLIC_KEY) {
|
|
448
455
|
if (process.env.CLAUDE_MEM_SKIP_SIG_VERIFY) return { ok: true, action: 'skipped-env' };
|
|
449
|
-
if (!
|
|
456
|
+
if (!publicKey) return { ok: true, action: 'skipped-no-pubkey' };
|
|
450
457
|
|
|
451
458
|
const list = Array.isArray(assets) ? assets : [];
|
|
452
459
|
const manifestAsset = list.find(a => a && a.name === MANIFEST_ASSET_NAME);
|
|
453
460
|
const sigAsset = list.find(a => a && a.name === SIGNATURE_ASSET_NAME);
|
|
454
461
|
if (!manifestAsset || !sigAsset) {
|
|
455
|
-
debugLog('WARN', 'hook-update', '
|
|
456
|
-
return { ok:
|
|
462
|
+
debugLog('WARN', 'hook-update', 'Signed-release mode: release carries no signature assets — refusing to install (possible downgrade/strip)');
|
|
463
|
+
return { ok: false, action: 'missing-signature' };
|
|
457
464
|
}
|
|
458
465
|
|
|
459
466
|
let manifestBytes, signatureB64;
|
|
@@ -461,12 +468,12 @@ export async function verifyReleaseAuthenticity(extractedDir, assets) {
|
|
|
461
468
|
manifestBytes = await fetchAssetBuffer(manifestAsset.browser_download_url);
|
|
462
469
|
signatureB64 = (await fetchAssetBuffer(sigAsset.browser_download_url)).toString('utf8').trim();
|
|
463
470
|
} catch (e) {
|
|
464
|
-
//
|
|
465
|
-
debugLog('WARN', 'hook-update', `
|
|
466
|
-
return { ok:
|
|
471
|
+
// Can't fetch the signature → can't verify → don't install this cycle (retries next poll).
|
|
472
|
+
debugLog('WARN', 'hook-update', `Signed-release mode: signature asset fetch failed (${e.message}) — refusing to install this cycle`);
|
|
473
|
+
return { ok: false, action: 'signature-fetch-failed' };
|
|
467
474
|
}
|
|
468
475
|
|
|
469
|
-
const r = verifyDownloadedRelease(extractedDir, manifestBytes, signatureB64);
|
|
476
|
+
const r = verifyDownloadedRelease(extractedDir, manifestBytes, signatureB64, publicKey);
|
|
470
477
|
if (!r.ok) return { ok: false, action: r.reason };
|
|
471
478
|
debugLog('DEBUG', 'hook-update', 'Release signature verified');
|
|
472
479
|
return { ok: true, action: 'verified' };
|
package/hook.mjs
CHANGED
|
@@ -27,7 +27,6 @@ import {
|
|
|
27
27
|
extractErrorKeywords, extractFilePaths, isRelatedToEpisode,
|
|
28
28
|
makeEntryDesc, scrubSecrets, stripPrivate, EDIT_TOOLS, debugCatch, debugLog,
|
|
29
29
|
COMPRESSED_AUTO, COMPRESSED_PENDING_PURGE, OBS_BM25,
|
|
30
|
-
computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity,
|
|
31
30
|
} from './utils.mjs';
|
|
32
31
|
import {
|
|
33
32
|
readEpisodeRaw, episodeFile,
|
|
@@ -43,11 +42,11 @@ import {
|
|
|
43
42
|
sessionFile, getSessionId, createSessionId, openDb,
|
|
44
43
|
spawnBackground, sweepOrphanEpisodeFiles,
|
|
45
44
|
} from './hook-shared.mjs';
|
|
46
|
-
import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation } from './hook-llm.mjs';
|
|
45
|
+
import { handleLLMEpisode, handleLLMSummary, saveObservation, buildImmediateObservation, saveEpisodeImmediate } from './hook-llm.mjs';
|
|
47
46
|
import { scrubRecord } from './lib/scrub-record.mjs';
|
|
48
47
|
import { formatHookError } from './lib/native-binding-hint.mjs';
|
|
49
48
|
import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from './lib/compress-core.mjs';
|
|
50
|
-
import { cleanupBroken, decayAndMarkIdle, boostAccessed } from './lib/maintain-core.mjs';
|
|
49
|
+
import { cleanupBroken, decayAndMarkIdle, boostAccessed, selectFuzzyDedupeIds } from './lib/maintain-core.mjs';
|
|
51
50
|
import {
|
|
52
51
|
extractCitationsFromTranscript,
|
|
53
52
|
extractAllInjected,
|
|
@@ -66,7 +65,6 @@ import { handleLLMOptimize } from './hook-optimize.mjs';
|
|
|
66
65
|
import { silentAutoAdopt, hasAutoAdoptMarker } from './adopt-cli.mjs';
|
|
67
66
|
import { emitV270UpgradeBanner } from './lib/upgrade-banner.mjs';
|
|
68
67
|
import { loadCiteBackForEpisode, extractCiteBackSignals, buildUnsavedBugfixHint, countUnsavedBugfixShape, buildCiteRecallNudge as libBuildCiteRecallNudge, nextCiteLowStreak } from './lib/cite-back-hint.mjs';
|
|
69
|
-
import { MINHASH_PREFILTER, FUZZY_DEDUP_THRESHOLD } from './lib/dedup-constants.mjs';
|
|
70
68
|
// plugin-cache-guard.mjs loaded dynamically — pre-2.31.2 installs that auto-upgraded
|
|
71
69
|
// from an older hook-update.mjs SOURCE_FILES (which did not list this module) would
|
|
72
70
|
// crash on static import. Degrade gracefully to no-op when the module is absent.
|
|
@@ -115,6 +113,10 @@ for (const sig of ['SIGTERM', 'SIGINT']) {
|
|
|
115
113
|
try {
|
|
116
114
|
const ep = readEpisodeRaw();
|
|
117
115
|
if (ep && ep.entries && ep.entries.length > 0) {
|
|
116
|
+
// Persist a rule-based observation synchronously BEFORE writing the flush
|
|
117
|
+
// file — that file has no consumer, so this is the only thing that prevents
|
|
118
|
+
// the in-flight episode being lost on abnormal termination (audit #6).
|
|
119
|
+
saveEpisodeImmediate(ep);
|
|
118
120
|
const flushFile = join(RUNTIME_DIR, `ep-flush-${Date.now()}-${randomUUID().slice(0, 8)}.json`);
|
|
119
121
|
writeFileSync(flushFile, JSON.stringify(ep));
|
|
120
122
|
try { unlinkSync(join(RUNTIME_DIR, `ep-${inferProject()}.json`)); } catch {}
|
|
@@ -551,7 +553,11 @@ async function handleStop() {
|
|
|
551
553
|
// Union closed by extractAllInjected — one integration point so the
|
|
552
554
|
// contract test in tests/citation-tracker-userprompt.test.mjs covers it.
|
|
553
555
|
try {
|
|
554
|
-
|
|
556
|
+
// mainOnly: the injected denominator must use the same thread
|
|
557
|
+
// filter as citedMain (the numerator, below) — an obs injected only
|
|
558
|
+
// inside a subagent (sidechain) would otherwise enter the denominator
|
|
559
|
+
// but never the numerator and streak-demote despite being used there.
|
|
560
|
+
const injected = extractAllInjected(transcriptPath, { mainOnly: true });
|
|
555
561
|
// P5 ①: cite-back signals — observations whose warned file the agent
|
|
556
562
|
// edited this session. Union into injected so they're resolved (they
|
|
557
563
|
// were injected via pre-tool-recall) and, below, into cited so the
|
|
@@ -788,7 +794,7 @@ function runSessionStartAutoMaintain(db) {
|
|
|
788
794
|
const SCAN_LIMIT = 500;
|
|
789
795
|
const FUZZY_MAX_MERGES = 20;
|
|
790
796
|
const recent = db.prepare(`
|
|
791
|
-
SELECT id, title, importance, created_at_epoch
|
|
797
|
+
SELECT id, title, importance, created_at_epoch, narrative, text
|
|
792
798
|
FROM observations
|
|
793
799
|
WHERE COALESCE(compressed_into, 0) = 0
|
|
794
800
|
AND superseded_at IS NULL
|
|
@@ -797,24 +803,14 @@ function runSessionStartAutoMaintain(db) {
|
|
|
797
803
|
ORDER BY created_at_epoch DESC LIMIT ${SCAN_LIMIT}
|
|
798
804
|
`).all(STALE_AGE);
|
|
799
805
|
if (recent.length >= 2) {
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
const
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < MINHASH_PREFILTER) continue;
|
|
809
|
-
if (jaccardSimilarity(titles[i], titles[j]) < FUZZY_DEDUP_THRESHOLD) continue;
|
|
810
|
-
// Keep the higher-importance row; tiebreak by older (lower id wins access history)
|
|
811
|
-
const keep = (recent[i].importance ?? 1) >= (recent[j].importance ?? 1) ? recent[i] : recent[j];
|
|
812
|
-
const remove = keep === recent[i] ? recent[j] : recent[i];
|
|
813
|
-
fuzzyRemoveIds.push(remove.id);
|
|
814
|
-
removed.add(remove.id);
|
|
815
|
-
if (fuzzyRemoveIds.length >= FUZZY_MAX_MERGES) break outer;
|
|
816
|
-
}
|
|
817
|
-
}
|
|
806
|
+
// audit #8: supersede only when title AND body match — title-only (a word-SET
|
|
807
|
+
// metric) collapsed distinct observations sharing a title token-set. The
|
|
808
|
+
// selection is the shared pure core in lib/maintain-core (unit-tested there).
|
|
809
|
+
const rows = recent.map(r => ({
|
|
810
|
+
id: r.id, title: r.title, importance: r.importance,
|
|
811
|
+
body: (r.narrative && r.narrative.trim()) || (r.text && r.text.trim()) || '',
|
|
812
|
+
}));
|
|
813
|
+
const fuzzyRemoveIds = selectFuzzyDedupeIds(rows, { maxMerges: FUZZY_MAX_MERGES });
|
|
818
814
|
if (fuzzyRemoveIds.length > 0) {
|
|
819
815
|
const ph = fuzzyRemoveIds.map(() => '?').join(',');
|
|
820
816
|
db.prepare(`UPDATE observations SET superseded_at = ?, superseded_by = 'auto-dedup-fuzzy' WHERE id IN (${ph})`)
|
package/lib/citation-tracker.mjs
CHANGED
|
@@ -182,8 +182,15 @@ function normalizeHookCommand(command) {
|
|
|
182
182
|
*
|
|
183
183
|
* @param {string|null|undefined} transcriptPath
|
|
184
184
|
* @param {(ctx: {command: string, text: string}) => void} fn
|
|
185
|
+
* @param {object} [opts]
|
|
186
|
+
* @param {boolean} [opts.mainOnly=false] If true, skip attachments on sidechain
|
|
187
|
+
* (subagent) transcript records. Mirrors extractCitationsFromTranscript's
|
|
188
|
+
* mainOnly so the citation-decay injected DENOMINATOR uses the same thread
|
|
189
|
+
* filter as the cited NUMERATOR — an obs injected only inside a subagent must
|
|
190
|
+
* not enter the denominator, else it streak-demotes despite being used there.
|
|
185
191
|
*/
|
|
186
|
-
function eachHookAttachment(transcriptPath, fn) {
|
|
192
|
+
function eachHookAttachment(transcriptPath, fn, opts = {}) {
|
|
193
|
+
const { mainOnly = false } = opts;
|
|
187
194
|
if (!transcriptPath || !existsSync(transcriptPath)) return;
|
|
188
195
|
let raw;
|
|
189
196
|
try { raw = readFileSync(transcriptPath, 'utf8'); } catch { return; }
|
|
@@ -192,6 +199,7 @@ function eachHookAttachment(transcriptPath, fn) {
|
|
|
192
199
|
let entry;
|
|
193
200
|
try { entry = JSON.parse(line); } catch { continue; }
|
|
194
201
|
if (entry.type !== 'attachment') continue;
|
|
202
|
+
if (mainOnly && entry.isSidechain === true) continue;
|
|
195
203
|
const att = entry.attachment;
|
|
196
204
|
if (!att || att.type !== 'hook_success') continue;
|
|
197
205
|
const stdout = att.stdout || '';
|
|
@@ -217,14 +225,14 @@ function eachHookAttachment(transcriptPath, fn) {
|
|
|
217
225
|
* @param {string|null|undefined} transcriptPath
|
|
218
226
|
* @returns {Set<number>} unique injected IDs (empty set on missing path/file)
|
|
219
227
|
*/
|
|
220
|
-
export function extractInjectedFromPreToolUse(transcriptPath) {
|
|
228
|
+
export function extractInjectedFromPreToolUse(transcriptPath, opts = {}) {
|
|
221
229
|
const ids = new Set();
|
|
222
230
|
eachHookAttachment(transcriptPath, ({ command, text }) => {
|
|
223
231
|
if (!command.includes('pre-tool-recall')) return;
|
|
224
232
|
INJECTED_RE.lastIndex = 0;
|
|
225
233
|
let m;
|
|
226
234
|
while ((m = INJECTED_RE.exec(text))) addObsId(ids, m[1]);
|
|
227
|
-
});
|
|
235
|
+
}, opts);
|
|
228
236
|
return ids;
|
|
229
237
|
}
|
|
230
238
|
|
|
@@ -251,7 +259,7 @@ const UPS_COMMAND_SUFFIX = 'hook.mjs user-prompt';
|
|
|
251
259
|
* @param {string|null|undefined} transcriptPath
|
|
252
260
|
* @returns {Set<number>}
|
|
253
261
|
*/
|
|
254
|
-
export function extractInjectedFromUserPromptSubmit(transcriptPath) {
|
|
262
|
+
export function extractInjectedFromUserPromptSubmit(transcriptPath, opts = {}) {
|
|
255
263
|
const ids = new Set();
|
|
256
264
|
eachHookAttachment(transcriptPath, ({ command, text }) => {
|
|
257
265
|
if (!command.includes(UPS_COMMAND_SUFFIX)) return;
|
|
@@ -265,7 +273,7 @@ export function extractInjectedFromUserPromptSubmit(transcriptPath) {
|
|
|
265
273
|
if (matches.length === 0) continue;
|
|
266
274
|
addObsId(ids, matches[matches.length - 1][1]);
|
|
267
275
|
}
|
|
268
|
-
});
|
|
276
|
+
}, opts);
|
|
269
277
|
return ids;
|
|
270
278
|
}
|
|
271
279
|
|
|
@@ -280,7 +288,7 @@ export function extractInjectedFromUserPromptSubmit(transcriptPath) {
|
|
|
280
288
|
* @param {string|null|undefined} transcriptPath
|
|
281
289
|
* @returns {Set<number>}
|
|
282
290
|
*/
|
|
283
|
-
export function extractInjectedFromErrorRecall(transcriptPath) {
|
|
291
|
+
export function extractInjectedFromErrorRecall(transcriptPath, opts = {}) {
|
|
284
292
|
const ids = new Set();
|
|
285
293
|
eachHookAttachment(transcriptPath, ({ command, text }) => {
|
|
286
294
|
if (!command.includes('post-tool-use')) return;
|
|
@@ -290,7 +298,7 @@ export function extractInjectedFromErrorRecall(transcriptPath) {
|
|
|
290
298
|
INJECTED_RE.lastIndex = 0;
|
|
291
299
|
let m;
|
|
292
300
|
while ((m = INJECTED_RE.exec(text))) addObsId(ids, m[1]);
|
|
293
|
-
});
|
|
301
|
+
}, opts);
|
|
294
302
|
return ids;
|
|
295
303
|
}
|
|
296
304
|
|
|
@@ -311,7 +319,7 @@ const FYI_LINE_ID_RE = /^#(\d{1,7})\s/;
|
|
|
311
319
|
* @param {string|null|undefined} transcriptPath
|
|
312
320
|
* @returns {Set<number>}
|
|
313
321
|
*/
|
|
314
|
-
export function extractInjectedFromFyi(transcriptPath) {
|
|
322
|
+
export function extractInjectedFromFyi(transcriptPath, opts = {}) {
|
|
315
323
|
const ids = new Set();
|
|
316
324
|
eachHookAttachment(transcriptPath, ({ command, text }) => {
|
|
317
325
|
if (!command.includes('user-prompt-search')) return;
|
|
@@ -320,7 +328,7 @@ export function extractInjectedFromFyi(transcriptPath) {
|
|
|
320
328
|
const m = FYI_LINE_ID_RE.exec(fyiLine);
|
|
321
329
|
if (m) addObsId(ids, m[1]);
|
|
322
330
|
}
|
|
323
|
-
});
|
|
331
|
+
}, opts);
|
|
324
332
|
return ids;
|
|
325
333
|
}
|
|
326
334
|
|
|
@@ -330,14 +338,18 @@ export function extractInjectedFromFyi(transcriptPath) {
|
|
|
330
338
|
* user-prompt-search FYI block. Single integration point the Stop handler calls.
|
|
331
339
|
*
|
|
332
340
|
* @param {string|null|undefined} transcriptPath
|
|
341
|
+
* @param {object} [opts]
|
|
342
|
+
* @param {boolean} [opts.mainOnly=false] Skip sidechain-injected IDs. The
|
|
343
|
+
* citation-decay caller passes true so the injected denominator matches the
|
|
344
|
+
* mainOnly cited numerator; the P4 access-bump caller omits it (broader).
|
|
333
345
|
* @returns {Set<number>}
|
|
334
346
|
*/
|
|
335
|
-
export function extractAllInjected(transcriptPath) {
|
|
347
|
+
export function extractAllInjected(transcriptPath, opts = {}) {
|
|
336
348
|
return new Set([
|
|
337
|
-
...extractInjectedFromPreToolUse(transcriptPath),
|
|
338
|
-
...extractInjectedFromUserPromptSubmit(transcriptPath),
|
|
339
|
-
...extractInjectedFromErrorRecall(transcriptPath),
|
|
340
|
-
...extractInjectedFromFyi(transcriptPath),
|
|
349
|
+
...extractInjectedFromPreToolUse(transcriptPath, opts),
|
|
350
|
+
...extractInjectedFromUserPromptSubmit(transcriptPath, opts),
|
|
351
|
+
...extractInjectedFromErrorRecall(transcriptPath, opts),
|
|
352
|
+
...extractInjectedFromFyi(transcriptPath, opts),
|
|
341
353
|
]);
|
|
342
354
|
}
|
|
343
355
|
|
package/lib/dedup-constants.mjs
CHANGED
|
@@ -33,3 +33,10 @@ export const MINHASH_PREFILTER = 0.7;
|
|
|
33
33
|
// 0.95: strict title-Jaccard cutoff for the hook post-inject fuzzy-dedup pass — only
|
|
34
34
|
// collapse near-identical titles inline; anything softer waits for the maintain sweep.
|
|
35
35
|
export const FUZZY_DEDUP_THRESHOLD = 0.95;
|
|
36
|
+
|
|
37
|
+
// 0.5: companion BODY-Jaccard floor for the hook fuzzy-dedup pass (audit #8). Titles
|
|
38
|
+
// alone are a word-SET metric, so two distinct observations sharing a title token-set
|
|
39
|
+
// ("Fix auth bug in login.mjs" vs "Fix login.mjs auth bug") would collapse and hide
|
|
40
|
+
// one body. Requiring the narratives to also overlap means only a genuine re-save of
|
|
41
|
+
// the same event (near-identical body) supersedes; distinct bodies are kept.
|
|
42
|
+
export const FUZZY_BODY_THRESHOLD = 0.5;
|
package/lib/err-sampler.mjs
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
// Gated entirely by CLAUDE_MEM_CATCH_SAMPLE env (0..1). Default off. All
|
|
16
16
|
// failures inside the sampler are swallowed — never crash the caller.
|
|
17
17
|
|
|
18
|
-
import { appendFileSync, mkdirSync, existsSync } from 'fs';
|
|
18
|
+
import { appendFileSync, mkdirSync, existsSync, readdirSync, statSync, unlinkSync } from 'fs';
|
|
19
19
|
import { join } from 'path';
|
|
20
20
|
import { scrubSecrets } from '../secret-scrub.mjs';
|
|
21
21
|
|
|
@@ -32,6 +32,22 @@ function parseSampleRate(raw) {
|
|
|
32
32
|
return Number.isFinite(n) && n >= 0 && n <= 1 ? n : 0;
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
+
// Delete daily shards older than the retention window. Mirrors
|
|
36
|
+
// lib/hook-telemetry.pruneOldShards (the sibling JSONL sink). Without this the
|
|
37
|
+
// retention constant was dead and errors/ grew one shard/day forever once
|
|
38
|
+
// CLAUDE_MEM_CATCH_SAMPLE was set — a slow unbounded leak in the user data dir.
|
|
39
|
+
function pruneOldShards(dir) {
|
|
40
|
+
let entries;
|
|
41
|
+
try { entries = readdirSync(dir); } catch { return; }
|
|
42
|
+
const cutoff = Date.now() - SAMPLE_LOG_RETENTION_MS;
|
|
43
|
+
for (const f of entries) {
|
|
44
|
+
if (!/^\d{4}-\d{2}-\d{2}\.jsonl$/.test(f)) continue;
|
|
45
|
+
try {
|
|
46
|
+
if (statSync(join(dir, f)).mtimeMs < cutoff) unlinkSync(join(dir, f));
|
|
47
|
+
} catch { /* gone or unreadable — skip */ }
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
35
51
|
/**
|
|
36
52
|
* Sample one caught error into the daily JSONL log.
|
|
37
53
|
* @param {Error|unknown} e Caught error
|
|
@@ -59,6 +75,7 @@ export function maybeSampleError(e, ctx, dbDir) {
|
|
|
59
75
|
}) + '\n';
|
|
60
76
|
|
|
61
77
|
appendFileSync(join(errDir, `${today()}.jsonl`), line, { mode: 0o600 });
|
|
78
|
+
pruneOldShards(errDir);
|
|
62
79
|
} catch { /* sampler must never throw */ }
|
|
63
80
|
}
|
|
64
81
|
|
package/lib/maintain-core.mjs
CHANGED
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import { COMPRESSED_PENDING_PURGE, computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity } from '../utils.mjs';
|
|
16
16
|
import { rebuildVocabulary, computeVector, _resetVocabCache } from '../tfidf.mjs';
|
|
17
|
-
import { DEDUP_JACCARD_THRESHOLD, MINHASH_PRE_THRESHOLD as MINHASH_PRE_THRESHOLD_SRC } from './dedup-constants.mjs';
|
|
17
|
+
import { DEDUP_JACCARD_THRESHOLD, MINHASH_PRE_THRESHOLD as MINHASH_PRE_THRESHOLD_SRC, FUZZY_DEDUP_THRESHOLD, FUZZY_BODY_THRESHOLD, MINHASH_PREFILTER } from './dedup-constants.mjs';
|
|
18
18
|
|
|
19
19
|
export const STALE_AGE_MS = 30 * 86400000;
|
|
20
20
|
export const OP_CAP = 1000;
|
|
@@ -28,6 +28,57 @@ export const MINHASH_PRE_THRESHOLD = MINHASH_PRE_THRESHOLD_SRC;
|
|
|
28
28
|
// the regular decay op can't touch (decay protects injection_count>0).
|
|
29
29
|
export const PINNED_INJ_THRESHOLD = 8;
|
|
30
30
|
|
|
31
|
+
// Two trimmed bodies count as "the same body" when both are empty (a genuine
|
|
32
|
+
// no-body re-save) or their word-set Jaccard clears the floor. One-empty-one-not
|
|
33
|
+
// is treated as DISTINCT so a body-bearing observation is never hidden by a
|
|
34
|
+
// body-less peer that merely shares its title.
|
|
35
|
+
function bodiesSimilar(a, b, threshold) {
|
|
36
|
+
const ba = (a || '').trim();
|
|
37
|
+
const bb = (b || '').trim();
|
|
38
|
+
if (!ba && !bb) return true;
|
|
39
|
+
if (!ba || !bb) return false;
|
|
40
|
+
return jaccardSimilarity(ba, bb) >= threshold;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Pick which near-duplicate observation ids to supersede in the hook fuzzy-dedup
|
|
45
|
+
* pass. Pure (no DB) so it is unit-testable. A pair must clear BOTH the title
|
|
46
|
+
* thresholds (MinHash prefilter → exact title Jaccard) AND the body Jaccard floor
|
|
47
|
+
* before the lower-importance row is marked for superseding (audit #8 — title-only
|
|
48
|
+
* matching collapsed observations with the same title token-set but different bodies).
|
|
49
|
+
* @param {Array<{id:number,title:string,body:string,importance:number}>} rows
|
|
50
|
+
* Candidate rows in scan order (caller decides ordering / recency window).
|
|
51
|
+
* @returns {number[]} ids to supersede (lower-importance member of each kept pair).
|
|
52
|
+
*/
|
|
53
|
+
export function selectFuzzyDedupeIds(rows, {
|
|
54
|
+
titleThreshold = FUZZY_DEDUP_THRESHOLD,
|
|
55
|
+
bodyThreshold = FUZZY_BODY_THRESHOLD,
|
|
56
|
+
minhashPrefilter = MINHASH_PREFILTER,
|
|
57
|
+
maxMerges = 20,
|
|
58
|
+
} = {}) {
|
|
59
|
+
const removeIds = [];
|
|
60
|
+
if (!Array.isArray(rows) || rows.length < 2) return removeIds;
|
|
61
|
+
const removed = new Set();
|
|
62
|
+
const titles = rows.map(r => (r.title || '').trim());
|
|
63
|
+
const minhashes = titles.map(t => t ? computeMinHash(t) : null);
|
|
64
|
+
outer: for (let i = 0; i < rows.length; i++) {
|
|
65
|
+
if (!minhashes[i] || removed.has(rows[i].id)) continue;
|
|
66
|
+
for (let j = i + 1; j < rows.length; j++) {
|
|
67
|
+
if (!minhashes[j] || removed.has(rows[j].id)) continue;
|
|
68
|
+
if (estimateJaccardFromMinHash(minhashes[i], minhashes[j]) < minhashPrefilter) continue;
|
|
69
|
+
if (jaccardSimilarity(titles[i], titles[j]) < titleThreshold) continue;
|
|
70
|
+
if (!bodiesSimilar(rows[i].body, rows[j].body, bodyThreshold)) continue;
|
|
71
|
+
// Keep the higher-importance row; tiebreak by earlier scan position (kept as i).
|
|
72
|
+
const keep = (rows[i].importance ?? 1) >= (rows[j].importance ?? 1) ? rows[i] : rows[j];
|
|
73
|
+
const remove = keep === rows[i] ? rows[j] : rows[i];
|
|
74
|
+
removeIds.push(remove.id);
|
|
75
|
+
removed.add(remove.id);
|
|
76
|
+
if (removeIds.length >= maxMerges) break outer;
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return removeIds;
|
|
80
|
+
}
|
|
81
|
+
|
|
31
82
|
/** Delete broken observations (no title AND no narrative). Returns rows deleted. */
|
|
32
83
|
// Before hard-deleting observations, un-hide any rows merged INTO them. A child has
|
|
33
84
|
// compressed_into = <keeperId>; deleting that keeper (compressed_into has no FK) would
|
package/lib/search-core.mjs
CHANGED
|
@@ -428,7 +428,10 @@ export async function coreRunSearchPipeline(ctx, opts) {
|
|
|
428
428
|
const doReRank = rerankPolicy === 'mcp' ? (ftsQuery && !deepReranked) : !deepReranked;
|
|
429
429
|
if (doReRank) reRankWithContext(db, obsResults, rerankProject);
|
|
430
430
|
markSuperseded(obsResults);
|
|
431
|
-
|
|
431
|
+
// CLI single-source path must also re-sort when a context re-rank actually ran,
|
|
432
|
+
// else reRankWithContext's score boost mutates scores but never reorders output
|
|
433
|
+
// (audit #9). MCP branch unchanged. doReRank already implies a rerank happened.
|
|
434
|
+
const doReSort = rerankPolicy === 'mcp' ? (ftsQuery && !deepReranked) : (isCrossSource || doReRank);
|
|
432
435
|
if (doReSort) results.sort((a, b) => (a.score ?? 0) - (b.score ?? 0));
|
|
433
436
|
}
|
|
434
437
|
|
package/mem-cli.mjs
CHANGED
|
@@ -57,6 +57,11 @@ async function cmdSearch(db, args, { llm } = {}) {
|
|
|
57
57
|
return;
|
|
58
58
|
}
|
|
59
59
|
|
|
60
|
+
// Bare string flags parse to boolean `true`; without this guard `--branch` reaches
|
|
61
|
+
// the SQLite bind and crashes, while `--to`/`--project` silently change results
|
|
62
|
+
// (epoch-1 upper bound → zero rows; unscoped search). (audit P1 #3)
|
|
63
|
+
if (rejectBareStringFlags(flags, ['source', 'project', 'from', 'to', 'branch'])) return;
|
|
64
|
+
|
|
60
65
|
const limit = parseIntFlag(flags.limit, { name: '--limit', defaultValue: 20, max: 1000 });
|
|
61
66
|
const type = flags.type || null;
|
|
62
67
|
const validObsTypes = new Set(['decision', 'bugfix', 'feature', 'refactor', 'discovery', 'change']);
|
|
@@ -1400,8 +1405,11 @@ function cmdUpdate(db, args) {
|
|
|
1400
1405
|
`Prompts and sessions are append-only.`);
|
|
1401
1406
|
return;
|
|
1402
1407
|
}
|
|
1408
|
+
// Strict parseIdToken gate (aligned with cmdDelete): a bare parseInt fallback
|
|
1409
|
+
// truncated "3.9" → 3 and silently UPDATE'd the WRONG row #3 (no preview, no
|
|
1410
|
+
// --confirm). Require an exact obs-id token; non-matching input → usage error.
|
|
1403
1411
|
const parsed = raw ? parseIdToken(raw) : null;
|
|
1404
|
-
const id = parsed && parsed.source === null ? parsed.id :
|
|
1412
|
+
const id = parsed && parsed.source === null ? parsed.id : NaN;
|
|
1405
1413
|
if (!id || isNaN(id)) {
|
|
1406
1414
|
fail('[mem] Usage: claude-mem-lite update <id> [--title T] [--type T] [--importance N] [--lesson T] [--narrative T] [--concepts T]');
|
|
1407
1415
|
return;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.9.1",
|
|
4
4
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"packageManager": "npm@10.9.2",
|
package/registry.mjs
CHANGED
|
@@ -110,7 +110,7 @@ const TRIGGERS_SCHEMA = `
|
|
|
110
110
|
const INVOCATIONS_SCHEMA = `
|
|
111
111
|
CREATE TABLE IF NOT EXISTS invocations (
|
|
112
112
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
113
|
-
resource_id INTEGER NOT NULL REFERENCES resources(id),
|
|
113
|
+
resource_id INTEGER NOT NULL REFERENCES resources(id) ON DELETE CASCADE,
|
|
114
114
|
session_id TEXT,
|
|
115
115
|
trigger TEXT CHECK(trigger IN ('session_start','pre_tool_use','user_explicit','user_prompt')),
|
|
116
116
|
tier INTEGER CHECK(tier IN (1,2,3)),
|
|
@@ -195,11 +195,19 @@ export function ensureRegistryDb(dbPath) {
|
|
|
195
195
|
} catch (e) { debugCatch(e, 'resources-column-migration'); }
|
|
196
196
|
|
|
197
197
|
// Migrate: add 'github' to source CHECK constraint (required for smart import)
|
|
198
|
-
// Must disable FK checks during table recreation (RENAME triggers FK validation)
|
|
198
|
+
// Must disable FK checks during table recreation (RENAME triggers FK validation).
|
|
199
|
+
// legacy_alter_table=ON is REQUIRED: under modern SQLite (the better-sqlite3
|
|
200
|
+
// default) `ALTER TABLE resources RENAME TO resources_old` rewrites child-table FK
|
|
201
|
+
// references, so invocations.resource_id would become `REFERENCES resources_old`
|
|
202
|
+
// and the trailing DROP would leave it dangling — silently killing every future
|
|
203
|
+
// `INSERT INTO invocations` (audit P0 #1). Legacy mode keeps child FKs pointing at
|
|
204
|
+
// the original name, which the freshly-created `resources` table then satisfies.
|
|
205
|
+
let resourcesRebuilt = false;
|
|
199
206
|
try {
|
|
200
207
|
const resSchema = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='resources'`).get();
|
|
201
208
|
if (resSchema?.sql && !resSchema.sql.includes("'github'")) {
|
|
202
209
|
db.pragma('foreign_keys = OFF');
|
|
210
|
+
db.pragma('legacy_alter_table = ON');
|
|
203
211
|
try {
|
|
204
212
|
db.transaction(() => {
|
|
205
213
|
const hasOld = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='resources_old'`).get();
|
|
@@ -216,10 +224,18 @@ export function ensureRegistryDb(dbPath) {
|
|
|
216
224
|
const common = cols.filter(c => newCols.has(c)).join(', ');
|
|
217
225
|
db.exec(`INSERT INTO resources (${common}) SELECT ${common} FROM resources_old`);
|
|
218
226
|
db.exec(`DROP TABLE resources_old`);
|
|
227
|
+
// Recreate the table's indexes: the CREATE INDEX IF NOT EXISTS inside
|
|
228
|
+
// RESOURCES_SCHEMA above was SKIPPED while resources_old still held the
|
|
229
|
+
// index names, so the rebuilt table had NONE — including the UNIQUE
|
|
230
|
+
// idx_res_type_name that upsertResource's ON CONFLICT(type,name) requires
|
|
231
|
+
// (review HIGH-1; pre-existing, closed here). Names are free post-DROP.
|
|
232
|
+
db.exec(RESOURCES_SCHEMA);
|
|
219
233
|
})();
|
|
220
234
|
} finally {
|
|
235
|
+
db.pragma('legacy_alter_table = OFF');
|
|
221
236
|
db.pragma('foreign_keys = ON');
|
|
222
237
|
}
|
|
238
|
+
resourcesRebuilt = true;
|
|
223
239
|
}
|
|
224
240
|
} catch (e) { debugCatch(e, 'resources-source-check-migration'); }
|
|
225
241
|
|
|
@@ -231,6 +247,16 @@ export function ensureRegistryDb(dbPath) {
|
|
|
231
247
|
// Triggers: always ensure (IF NOT EXISTS) — fixes DBs where FTS5 was created without triggers
|
|
232
248
|
db.exec(TRIGGERS_SCHEMA);
|
|
233
249
|
|
|
250
|
+
// The source-CHECK migration replaced the `resources` content table out from under
|
|
251
|
+
// the external-content FTS index (content=resources), leaving resources_fts stale.
|
|
252
|
+
// Rebuild it so a later DELETE's res_fts_delete trigger doesn't throw "database disk
|
|
253
|
+
// image is malformed" against the mismatched index. Gated on the migration actually
|
|
254
|
+
// having run so we don't rebuild on every open.
|
|
255
|
+
if (resourcesRebuilt) {
|
|
256
|
+
try { db.exec("INSERT INTO resources_fts(resources_fts) VALUES('rebuild')"); }
|
|
257
|
+
catch (e) { debugCatch(e, 'resources-fts-rebuild-after-source-check'); }
|
|
258
|
+
}
|
|
259
|
+
|
|
234
260
|
db.exec(INVOCATIONS_SCHEMA);
|
|
235
261
|
|
|
236
262
|
// Migrate invocations CHECK constraint: add 'user_prompt' trigger value
|
|
@@ -281,10 +307,44 @@ export function ensureRegistryDb(dbPath) {
|
|
|
281
307
|
}
|
|
282
308
|
} catch (e) { debugCatch(e, 'rejection_reason-migration'); }
|
|
283
309
|
|
|
284
|
-
// Migrate:
|
|
310
|
+
// Migrate: add ON DELETE CASCADE to invocations.resource_id (audit P0 #4). Old DBs
|
|
311
|
+
// declared the FK with no ON DELETE action, so deleting a resource that had
|
|
312
|
+
// invocation history threw SQLITE_CONSTRAINT_FOREIGNKEY (registry remove /
|
|
313
|
+
// mem_registry delete) or silently no-op'd (dead-repo purge). SQLite can't ALTER an
|
|
314
|
+
// FK, so rebuild the table. Renaming the CHILD table is safe (nothing references
|
|
315
|
+
// invocations), so legacy_alter_table is not a concern here. Runs after the
|
|
316
|
+
// rejection_reason ADD COLUMN so the column exists in both old and new tables.
|
|
285
317
|
try {
|
|
286
|
-
db.
|
|
287
|
-
|
|
318
|
+
const schema = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='invocations'`).get();
|
|
319
|
+
if (schema?.sql && !/ON DELETE CASCADE/i.test(schema.sql)) {
|
|
320
|
+
db.transaction(() => {
|
|
321
|
+
const hasOld = db.prepare(`SELECT 1 FROM sqlite_master WHERE type='table' AND name='invocations_old'`).get();
|
|
322
|
+
if (hasOld) db.exec(`DROP TABLE invocations_old`);
|
|
323
|
+
db.exec(`ALTER TABLE invocations RENAME TO invocations_old`);
|
|
324
|
+
db.exec(INVOCATIONS_SCHEMA);
|
|
325
|
+
// Omit rejection_reason from the copy (matching the CHECK migrations above):
|
|
326
|
+
// it was historically a bare TEXT with NO CHECK, so an old row could hold a
|
|
327
|
+
// value outside INVOCATIONS_SCHEMA's current rejection_reason CHECK whitelist.
|
|
328
|
+
// Copying it would throw SQLITE_CONSTRAINT_CHECK → rollback → the FK is left
|
|
329
|
+
// un-cascaded forever and every retry re-fails (review HIGH-2). The column is
|
|
330
|
+
// never written at runtime, so copied rows get NULL — no data loss.
|
|
331
|
+
db.exec(`INSERT INTO invocations
|
|
332
|
+
(id, resource_id, session_id, trigger, tier, recommended, adopted, outcome, score, created_at)
|
|
333
|
+
SELECT id, resource_id, session_id, trigger, tier, recommended, adopted, outcome, score, created_at
|
|
334
|
+
FROM invocations_old`);
|
|
335
|
+
db.exec(`DROP TABLE invocations_old`);
|
|
336
|
+
// Recreate the table's indexes — the INVOCATIONS_SCHEMA CREATE INDEX above was
|
|
337
|
+
// skipped while invocations_old held the names (review HIGH-1). Free post-DROP.
|
|
338
|
+
db.exec(INVOCATIONS_SCHEMA);
|
|
339
|
+
})();
|
|
340
|
+
}
|
|
341
|
+
} catch (e) { debugCatch(e, 'invocations-ondelete-cascade-migration'); }
|
|
342
|
+
|
|
343
|
+
// (Removed the separate idx_invocations_resource_created migration — it was a column-
|
|
344
|
+
// identical duplicate of idx_inv_resource (resource_id, created_at) in INVOCATIONS_SCHEMA.
|
|
345
|
+
// It only ever survived because the rebuild migrations dropped idx_inv_resource; now that
|
|
346
|
+
// the rebuilds recreate their indexes (review HIGH-1), the duplicate is pure dead weight.
|
|
347
|
+
// Pre-existing DBs keep their old idx_invocations_resource_created; it's harmless.)
|
|
288
348
|
|
|
289
349
|
db.exec(PREINSTALLED_SCHEMA);
|
|
290
350
|
|
package/secret-scrub.mjs
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
// claude-mem-lite: Secret pattern detection and scrubbing
|
|
2
2
|
// Extracted from utils.mjs for focused responsibility
|
|
3
3
|
|
|
4
|
+
import { stripPrivate } from './lib/private-strip.mjs';
|
|
5
|
+
|
|
4
6
|
// ─── Secret Patterns ──────────────────────────────────────────────────────
|
|
5
7
|
|
|
6
8
|
export const SECRET_PATTERNS = [
|
|
@@ -28,7 +30,28 @@ export const SECRET_PATTERNS = [
|
|
|
28
30
|
// access_token / refresh_token are the canonical OAuth2 field names — they were
|
|
29
31
|
// missing from this KV list (drift vs the JSON list below). `(?:\b|_)` for the same
|
|
30
32
|
// underscore-prefix reason.
|
|
31
|
-
|
|
33
|
+
// `pgpassword|pgpass|mysql_pwd` are well-known credential ENV-VAR names whose
|
|
34
|
+
// keyword tail is unreachable via the noun list above (`PGPASSWORD`=PG+password has
|
|
35
|
+
// no \b/_ before "password"; `MYSQL_PWD` has no "password"/"token" substring). They
|
|
36
|
+
// live in THIS pattern (no prose lookbehind) so `export PGPASSWORD=x` / `env MYSQL_PWD=x`
|
|
37
|
+
// scrub — a compound credential env-var name is unambiguous config even after a word.
|
|
38
|
+
// Enumerating known names (not a blanket letter-prefix) preserves the deliberate
|
|
39
|
+
// low-FP decision that `topsecret=` / `access_token_count:` are non-credentials
|
|
40
|
+
// (#8283 + utils.test.mjs:1089-1100); bare `pwd` is omitted so `PWD=` (a path) survives.
|
|
41
|
+
[/((?:\b|_)(?:api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret|auth[_-]?token|access[_-]?token|refresh[_-]?token|pgpassword|pgpass|mysql_pwd)\s*[=:]\s*)(?!process\.env\.)(?!new\s)(?!\w+\()(?!(?:null|undefined|true|false|None|nil|empty|""|''|0)\b)[^\s,;'"}\]]{6,}/gi, '$1***'],
|
|
42
|
+
// Bare-key QUOTED values — `api_key="..."`, `password: '...'`. The unquoted KV
|
|
43
|
+
// patterns above stop at `'`/`"` (excluded from their value class), so a quoted
|
|
44
|
+
// value matched 0 chars and slipped through. Consumes the opening quote, the value,
|
|
45
|
+
// and the matching close quote (backref \2), replacing only the value. Unlike the
|
|
46
|
+
// JSON pattern below it does NOT require the KEY to be quoted, covering `key="value"`
|
|
47
|
+
// object-literal / YAML / quoted-.env shapes. Split into the SAME two patterns as the
|
|
48
|
+
// unquoted KV pairs above so prose survives — a quoted value does not turn prose into
|
|
49
|
+
// config (`the token: "x"` is still prose, must NOT scrub; #8283 / utils.test.mjs:1090).
|
|
50
|
+
// (a) bare credential nouns keep the prose lookbehind:
|
|
51
|
+
[/((?<![A-Za-z][ \t])(?:\b|_)(?:password|passwd|token|bearer|secret)\s*[=:]\s*)(['"])[^'"]{6,}\2/gi, '$1$2***$2'],
|
|
52
|
+
// (b) structured keys + named env vars are unambiguous config even after a word
|
|
53
|
+
// (`see api_key: "x"` DOES scrub, mirroring the unquoted structured-key path):
|
|
54
|
+
[/((?:\b|_)(?:pgpassword|pgpass|mysql_pwd|api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret|auth[_-]?token|access[_-]?token|refresh[_-]?token)\s*[=:]\s*)(['"])[^'"]{6,}\2/gi, '$1$2***$2'],
|
|
32
55
|
// AWS access keys (AKIA...)
|
|
33
56
|
[/\bAKIA[A-Z0-9]{16}\b/g, '***'],
|
|
34
57
|
// OpenAI / Anthropic keys (sk-...) — specific prefixes have lower length threshold
|
|
@@ -94,12 +117,15 @@ export const SECRET_PATTERNS = [
|
|
|
94
117
|
|
|
95
118
|
/**
|
|
96
119
|
* Scrub known secret patterns (API keys, tokens, credentials) from text.
|
|
120
|
+
* Also strips user-marked `<private>...</private>` blocks first, so every
|
|
121
|
+
* persistence/log path that scrubs secrets inherits the `<private>` opt-out —
|
|
122
|
+
* previously stripPrivate ran only on the user-prompt hook, not on writes.
|
|
97
123
|
* @param {string} text Input text potentially containing secrets
|
|
98
124
|
* @returns {string} Text with secrets replaced by '***'
|
|
99
125
|
*/
|
|
100
126
|
export function scrubSecrets(text) {
|
|
101
127
|
if (!text || typeof text !== 'string') return text || '';
|
|
102
|
-
let result = text;
|
|
128
|
+
let result = stripPrivate(text);
|
|
103
129
|
for (const [pattern, replacement] of SECRET_PATTERNS) {
|
|
104
130
|
result = result.replace(pattern, replacement);
|
|
105
131
|
}
|