claude-mem-lite 2.88.0 → 2.90.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +2 -2
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +11 -9
- package/README.zh-CN.md +8 -8
- package/cli/activity.mjs +9 -5
- package/cli.mjs +11 -9
- package/haiku-client.mjs +20 -10
- package/hook-handoff.mjs +44 -12
- package/hook-llm.mjs +4 -3
- package/hook-optimize.mjs +7 -3
- package/hook-update.mjs +11 -4
- package/hook.mjs +28 -14
- package/install.mjs +46 -19
- package/lib/citation-tracker.mjs +61 -1
- package/lib/cite-back-hint.mjs +39 -1
- package/lib/cli-flags.mjs +24 -2
- package/lib/compress-core.mjs +24 -4
- package/lib/dedup-constants.mjs +35 -0
- package/lib/maintain-core.mjs +5 -2
- package/lib/save-observation.mjs +1 -1
- package/mem-cli.mjs +163 -17
- package/nlp.mjs +6 -0
- package/package.json +3 -2
- package/schema.mjs +45 -3
- package/search-engine.mjs +2 -1
- package/server.mjs +8 -2
- package/source-files.mjs +5 -0
- package/tfidf.mjs +12 -8
package/install.mjs
CHANGED
|
@@ -10,8 +10,18 @@ import { createRequire } from 'node:module';
|
|
|
10
10
|
|
|
11
11
|
const PROJECT_DIR = resolve(import.meta.dirname ?? dirname(fileURLToPath(import.meta.url)));
|
|
12
12
|
const SETTINGS_PATH = join(homedir(), '.claude', 'settings.json');
|
|
13
|
+
// Plugin CODE / install location — ALWAYS homedir-rooted. Claude Code's
|
|
14
|
+
// settings.json + MCP registration bake ABSOLUTE paths to server.mjs / hooks here,
|
|
15
|
+
// and env vars are per-shell (the MCP launcher won't reliably inherit
|
|
16
|
+
// CLAUDE_MEM_DIR), so code must NOT follow the relocation env var.
|
|
13
17
|
const DATA_DIR = join(homedir(), '.claude-mem-lite');
|
|
14
|
-
|
|
18
|
+
// User DATA location — DB, managed resources, registry DB, runtime/. Honors
|
|
19
|
+
// CLAUDE_MEM_DIR exactly like schema.mjs DB_DIR so the installer WRITES data where
|
|
20
|
+
// the runtime/data layer READS it (pre-fix: installer wrote homedir, runtime read
|
|
21
|
+
// the relocated dir → preinstalled skills silently vanished, doctor read the wrong
|
|
22
|
+
// DB). Equals DATA_DIR when CLAUDE_MEM_DIR is unset (the common case).
|
|
23
|
+
const MEM_DATA_DIR = process.env.CLAUDE_MEM_DIR || DATA_DIR;
|
|
24
|
+
const DB_PATH = join(MEM_DATA_DIR, 'claude-mem-lite.db');
|
|
15
25
|
const OLD_DATA_DIR = join(homedir(), '.claude-mem');
|
|
16
26
|
|
|
17
27
|
// Detect ephemeral context (npx) — files won't persist after exit
|
|
@@ -319,6 +329,8 @@ async function install() {
|
|
|
319
329
|
}
|
|
320
330
|
|
|
321
331
|
if (!existsSync(DATA_DIR)) mkdirSync(DATA_DIR, { recursive: true });
|
|
332
|
+
// Under relocation the DB/managed/runtime live here, not in the code dir — create it too.
|
|
333
|
+
if (!existsSync(MEM_DATA_DIR)) mkdirSync(MEM_DATA_DIR, { recursive: true });
|
|
322
334
|
|
|
323
335
|
if (IS_DEV) {
|
|
324
336
|
log('Dev mode — creating symlinks in ~/.claude-mem-lite/...');
|
|
@@ -675,7 +687,7 @@ async function install() {
|
|
|
675
687
|
// "no such column: memory_session_id". Rename to a timestamped backup
|
|
676
688
|
// so the new install creates a fresh v28 DB.
|
|
677
689
|
try {
|
|
678
|
-
const r = migrateLegacyClaudeMemData(OLD_DATA_DIR,
|
|
690
|
+
const r = migrateLegacyClaudeMemData(OLD_DATA_DIR, MEM_DATA_DIR);
|
|
679
691
|
if (r.action === 'backed-up') {
|
|
680
692
|
ok(`Legacy ~/.claude-mem/ DB backed up to ${r.backupPath}`);
|
|
681
693
|
log('New v28 DB will be created on first launch (legacy schema is incompatible).');
|
|
@@ -685,7 +697,7 @@ async function install() {
|
|
|
685
697
|
}
|
|
686
698
|
|
|
687
699
|
// 5b. Rename claude-mem.db → claude-mem-lite.db in same directory
|
|
688
|
-
const oldDbInDir = join(
|
|
700
|
+
const oldDbInDir = join(MEM_DATA_DIR, 'claude-mem.db');
|
|
689
701
|
if (existsSync(oldDbInDir) && !existsSync(DB_PATH)) {
|
|
690
702
|
renameSync(oldDbInDir, DB_PATH);
|
|
691
703
|
for (const ext of ['-wal', '-shm']) {
|
|
@@ -714,7 +726,7 @@ async function install() {
|
|
|
714
726
|
const resources = manifest.resources || [];
|
|
715
727
|
|
|
716
728
|
if (resources.length > 0) {
|
|
717
|
-
const managedDir = join(
|
|
729
|
+
const managedDir = join(MEM_DATA_DIR, 'managed');
|
|
718
730
|
|
|
719
731
|
// 6a. Git shallow clone unique repos
|
|
720
732
|
const repos = new Map();
|
|
@@ -805,7 +817,7 @@ async function install() {
|
|
|
805
817
|
|
|
806
818
|
// 6b. Init registry DB and record preinstalled entries
|
|
807
819
|
const { ensureRegistryDb } = await importFromInstall('registry.mjs');
|
|
808
|
-
const regDbPath = join(
|
|
820
|
+
const regDbPath = join(MEM_DATA_DIR, 'resource-registry.db');
|
|
809
821
|
const rdb = ensureRegistryDb(regDbPath);
|
|
810
822
|
|
|
811
823
|
const insertPre = rdb.prepare(`
|
|
@@ -853,7 +865,7 @@ async function install() {
|
|
|
853
865
|
// 6d. Scan and index resources (fallback-only, Haiku indexing deferred to first run)
|
|
854
866
|
log(' Scanning resources...');
|
|
855
867
|
const { scanAllResources, diffResources } = await importFromInstall('registry-scanner.mjs');
|
|
856
|
-
const scanned = scanAllResources({ dataDir:
|
|
868
|
+
const scanned = scanAllResources({ dataDir: MEM_DATA_DIR });
|
|
857
869
|
|
|
858
870
|
// Attach star counts and repo URLs
|
|
859
871
|
for (const s of scanned) {
|
|
@@ -1063,15 +1075,26 @@ async function uninstall() {
|
|
|
1063
1075
|
|
|
1064
1076
|
// 6. Purge data if requested
|
|
1065
1077
|
if (flags.has('--purge')) {
|
|
1066
|
-
const
|
|
1067
|
-
|
|
1078
|
+
const homeDir = join(homedir(), '.claude-mem-lite');
|
|
1079
|
+
// Always remove the homedir code/install dir (guarded to the canonical path).
|
|
1080
|
+
if (existsSync(DATA_DIR) && DATA_DIR === homeDir) {
|
|
1068
1081
|
rmSync(DATA_DIR, { recursive: true, force: true });
|
|
1069
1082
|
ok('Data purged (~/.claude-mem-lite/)');
|
|
1070
1083
|
} else if (existsSync(DATA_DIR)) {
|
|
1071
1084
|
fail('DATA_DIR path mismatch, refusing to purge for safety: ' + DATA_DIR);
|
|
1072
1085
|
}
|
|
1086
|
+
// Also remove the relocated data dir — but ONLY if it's genuinely our data dir
|
|
1087
|
+
// (contains claude-mem-lite.db), so a mistyped CLAUDE_MEM_DIR is never rm'd.
|
|
1088
|
+
if (MEM_DATA_DIR !== homeDir) {
|
|
1089
|
+
if (existsSync(join(MEM_DATA_DIR, 'claude-mem-lite.db'))) {
|
|
1090
|
+
rmSync(MEM_DATA_DIR, { recursive: true, force: true });
|
|
1091
|
+
ok(`Relocated data purged (${MEM_DATA_DIR})`);
|
|
1092
|
+
} else if (existsSync(MEM_DATA_DIR)) {
|
|
1093
|
+
warn(`CLAUDE_MEM_DIR (${MEM_DATA_DIR}) has no claude-mem-lite.db — left untouched. Remove manually if intended.`);
|
|
1094
|
+
}
|
|
1095
|
+
}
|
|
1073
1096
|
} else {
|
|
1074
|
-
log('Data preserved
|
|
1097
|
+
log('Data preserved (use --purge to remove)');
|
|
1075
1098
|
}
|
|
1076
1099
|
|
|
1077
1100
|
console.log('\n Done!\n');
|
|
@@ -1383,7 +1406,7 @@ async function doctor() {
|
|
|
1383
1406
|
|
|
1384
1407
|
// Update state
|
|
1385
1408
|
try {
|
|
1386
|
-
const stateFile = join(
|
|
1409
|
+
const stateFile = join(MEM_DATA_DIR, 'runtime', 'update-state.json');
|
|
1387
1410
|
if (existsSync(stateFile)) {
|
|
1388
1411
|
const state = JSON.parse(readFileSync(stateFile, 'utf8'));
|
|
1389
1412
|
const parts = [];
|
|
@@ -1439,11 +1462,14 @@ async function doctor() {
|
|
|
1439
1462
|
|
|
1440
1463
|
// Stale temp files
|
|
1441
1464
|
try {
|
|
1442
|
-
|
|
1465
|
+
// hook-update + the episode workers write runtime/ + staging under DB_DIR
|
|
1466
|
+
// (= MEM_DATA_DIR, env-aware), NOT the homedir code dir — scan there so doctor
|
|
1467
|
+
// sees the real residue under relocation.
|
|
1468
|
+
const runtimeDir = join(MEM_DATA_DIR, 'runtime');
|
|
1443
1469
|
let staleCount = 0;
|
|
1444
1470
|
const stalePatterns = ['.update-staging-', '.update-backup-'];
|
|
1445
|
-
if (existsSync(
|
|
1446
|
-
for (const f of readdirSync(
|
|
1471
|
+
if (existsSync(MEM_DATA_DIR)) {
|
|
1472
|
+
for (const f of readdirSync(MEM_DATA_DIR)) {
|
|
1447
1473
|
if (stalePatterns.some(p => f.startsWith(p))) staleCount++;
|
|
1448
1474
|
}
|
|
1449
1475
|
}
|
|
@@ -1712,10 +1738,11 @@ function cleanup() {
|
|
|
1712
1738
|
console.log(`\nclaude-mem-lite cleanup${dryRun ? ' (--dry-run)' : ''}\n`);
|
|
1713
1739
|
let removed = 0;
|
|
1714
1740
|
|
|
1715
|
-
// Clean .update-staging-* / .update-backup-*
|
|
1741
|
+
// Clean .update-staging-* / .update-backup-* — hook-update writes these under
|
|
1742
|
+
// DB_DIR (= MEM_DATA_DIR, env-aware), so scan the data dir, not the homedir code dir.
|
|
1716
1743
|
const stalePatterns = ['.update-staging-', '.update-backup-'];
|
|
1717
|
-
if (existsSync(
|
|
1718
|
-
for (const f of readdirSync(
|
|
1744
|
+
if (existsSync(MEM_DATA_DIR)) {
|
|
1745
|
+
for (const f of readdirSync(MEM_DATA_DIR)) {
|
|
1719
1746
|
if (stalePatterns.some(p => f.startsWith(p))) {
|
|
1720
1747
|
if (dryRun) {
|
|
1721
1748
|
ok(`Would remove: ${f}`);
|
|
@@ -1723,7 +1750,7 @@ function cleanup() {
|
|
|
1723
1750
|
continue;
|
|
1724
1751
|
}
|
|
1725
1752
|
try {
|
|
1726
|
-
rmSync(join(
|
|
1753
|
+
rmSync(join(MEM_DATA_DIR, f), { recursive: true, force: true });
|
|
1727
1754
|
ok(`Removed: ${f}`);
|
|
1728
1755
|
removed++;
|
|
1729
1756
|
} catch (e) {
|
|
@@ -1733,8 +1760,8 @@ function cleanup() {
|
|
|
1733
1760
|
}
|
|
1734
1761
|
}
|
|
1735
1762
|
|
|
1736
|
-
// Clean pending-* / ep-flush-* in runtime/
|
|
1737
|
-
const runtimeDir = join(
|
|
1763
|
+
// Clean pending-* / ep-flush-* in runtime/ (under the env-aware data dir)
|
|
1764
|
+
const runtimeDir = join(MEM_DATA_DIR, 'runtime');
|
|
1738
1765
|
if (existsSync(runtimeDir)) {
|
|
1739
1766
|
for (const f of readdirSync(runtimeDir)) {
|
|
1740
1767
|
if (f.startsWith('pending-') || f.startsWith('ep-flush-')) {
|
package/lib/citation-tracker.mjs
CHANGED
|
@@ -387,6 +387,42 @@ const IMPORTANCE_CAP = 3;
|
|
|
387
387
|
const IMPORTANCE_FLOOR = 0;
|
|
388
388
|
const UNCITED_STREAK_THRESHOLD = 3;
|
|
389
389
|
|
|
390
|
+
// Adoption-rate gate (P5 ②). A project's cite-rate is SUM(cited_count) /
|
|
391
|
+
// SUM(decay_seen_count) over its non-superseded observations: of every decay
|
|
392
|
+
// resolution this project has ever produced, what fraction were citations.
|
|
393
|
+
// Below ADOPTION_THRESHOLD with at least ADOPTION_MIN_SEEN resolutions on record,
|
|
394
|
+
// the project has demonstrably not adopted the #NN convention, so we suppress
|
|
395
|
+
// DEMOTION (never promotion) — see the construct-validity note on
|
|
396
|
+
// applyCitationDecay. MIN_SEEN keeps the gate dormant for low-data projects so
|
|
397
|
+
// the established behavior is preserved until there's enough signal to judge.
|
|
398
|
+
const ADOPTION_THRESHOLD = 0.02;
|
|
399
|
+
const ADOPTION_MIN_SEEN = 8;
|
|
400
|
+
|
|
401
|
+
/**
|
|
402
|
+
* Compute a project's citation-adoption snapshot: total citations vs total decay
|
|
403
|
+
* resolutions on record, and their ratio. Read-only; safe to call before the
|
|
404
|
+
* decay transaction (the gate decision is made on the pre-mutation snapshot).
|
|
405
|
+
*
|
|
406
|
+
* @param {import('better-sqlite3').Database} db
|
|
407
|
+
* @param {string} project
|
|
408
|
+
* @returns {{cited: number, seen: number, rate: number}}
|
|
409
|
+
*/
|
|
410
|
+
export function computeCitationAdoption(db, project) {
|
|
411
|
+
const empty = { cited: 0, seen: 0, rate: 0 };
|
|
412
|
+
if (!db || !project) return empty;
|
|
413
|
+
try {
|
|
414
|
+
const row = db.prepare(`
|
|
415
|
+
SELECT COALESCE(SUM(cited_count), 0) AS cited,
|
|
416
|
+
COALESCE(SUM(decay_seen_count), 0) AS seen
|
|
417
|
+
FROM observations
|
|
418
|
+
WHERE project = ? AND superseded_at IS NULL
|
|
419
|
+
`).get(project);
|
|
420
|
+
const cited = row?.cited || 0;
|
|
421
|
+
const seen = row?.seen || 0;
|
|
422
|
+
return { cited, seen, rate: seen > 0 ? cited / seen : 0 };
|
|
423
|
+
} catch (e) { debugCatch(e, 'computeCitationAdoption'); return empty; }
|
|
424
|
+
}
|
|
425
|
+
|
|
390
426
|
/**
|
|
391
427
|
* Apply the citation-feedback loop for one session: for each injected obs id,
|
|
392
428
|
* decide cited vs uncited and mutate importance/streak/cited_count per spec.
|
|
@@ -398,6 +434,20 @@ const UNCITED_STREAK_THRESHOLD = 3;
|
|
|
398
434
|
* - cross-project IDs are silently ignored by the WHERE clause.
|
|
399
435
|
* - MEM_DISABLE_CITATION_DECAY=1 disables all writes; returns zeros.
|
|
400
436
|
*
|
|
437
|
+
* CONSTRUCT-VALIDITY ASSUMPTION (P5): a "citation" is operationally two signals,
|
|
438
|
+
* neither of which is ground-truth behavioral impact:
|
|
439
|
+
* 1. the literal `#NN` token appears in main-thread assistant text (citedIds), and
|
|
440
|
+
* 2. (cite-back) the agent edited a file a prior lesson #NN had warned about —
|
|
441
|
+
* unioned into citedIds by the Stop handler before this call.
|
|
442
|
+
* Signal 2 was added because signal 1 alone penalizes projects that act on a
|
|
443
|
+
* lesson without typing its id. Even so, both are proxies. For a project that has
|
|
444
|
+
* never cited anything (cite-rate below ADOPTION_THRESHOLD over ≥ADOPTION_MIN_SEEN
|
|
445
|
+
* resolutions), demotion is suppressed: absent any positive signal we cannot
|
|
446
|
+
* distinguish "useless lesson" from "useful lesson in a project that doesn't use
|
|
447
|
+
* the #NN convention," and a false demotion is the costlier error. The gate trades
|
|
448
|
+
* missed demotions (stale lessons linger) for avoided false demotions. Promotion
|
|
449
|
+
* is never gated — a single citation lifts the project's rate and re-enables decay.
|
|
450
|
+
*
|
|
401
451
|
* @param {import('better-sqlite3').Database} db
|
|
402
452
|
* @param {string} project
|
|
403
453
|
* @param {Set<number>|Iterable<number>} injectedIds
|
|
@@ -413,6 +463,13 @@ export function applyCitationDecay(db, project, injectedIds, citedIds, sessionId
|
|
|
413
463
|
if (injected.size === 0) return empty;
|
|
414
464
|
const cited = citedIds instanceof Set ? citedIds : new Set(citedIds || []);
|
|
415
465
|
|
|
466
|
+
// Adoption gate (snapshot taken before any mutation this run). Suppress only
|
|
467
|
+
// demotion; promotion always proceeds. Threshold overridable via env.
|
|
468
|
+
const adoption = computeCitationAdoption(db, project);
|
|
469
|
+
const envThreshold = Number.parseFloat(process.env.CLAUDE_MEM_CITATION_ADOPTION_THRESHOLD);
|
|
470
|
+
const adoptionThreshold = Number.isFinite(envThreshold) && envThreshold >= 0 ? envThreshold : ADOPTION_THRESHOLD;
|
|
471
|
+
const suppressDemotion = adoption.seen >= ADOPTION_MIN_SEEN && adoption.rate < adoptionThreshold;
|
|
472
|
+
|
|
416
473
|
const selectStmt = db.prepare(
|
|
417
474
|
'SELECT id, importance, uncited_streak, last_decided_session_id FROM observations WHERE id = ? AND project = ?'
|
|
418
475
|
);
|
|
@@ -457,7 +514,10 @@ export function applyCitationDecay(db, project, injectedIds, citedIds, sessionId
|
|
|
457
514
|
promoted++;
|
|
458
515
|
} else {
|
|
459
516
|
const nextStreak = (row.uncited_streak || 0) + 1;
|
|
460
|
-
|
|
517
|
+
// Demote only when the streak is up AND the project has demonstrably
|
|
518
|
+
// adopted citations. A non-adopting project advances the streak (idempotent
|
|
519
|
+
// bookkeeping) but never loses importance — see construct-validity note.
|
|
520
|
+
if (nextStreak >= UNCITED_STREAK_THRESHOLD && !suppressDemotion) {
|
|
461
521
|
updateDemote.run(IMPORTANCE_FLOOR, sessionId, Date.now(), id);
|
|
462
522
|
demoted++;
|
|
463
523
|
} else {
|
package/lib/cite-back-hint.mjs
CHANGED
|
@@ -17,6 +17,11 @@ import { EDIT_TOOLS } from '../utils.mjs';
|
|
|
17
17
|
|
|
18
18
|
const MAX_FILES = 2;
|
|
19
19
|
|
|
20
|
+
// Leader literal for the cite-back hint. Shared by the builder (below) and the
|
|
21
|
+
// Stop-time signal extractor (extractCiteBackSignals) so the two can never drift
|
|
22
|
+
// — the extractor finds hint emissions by this exact prefix.
|
|
23
|
+
const CITE_BACK_HINT_LEADER = '[mem] ⚠ Cite-back:';
|
|
24
|
+
|
|
20
25
|
export function buildCiteBackHint(episode, cooldown) {
|
|
21
26
|
if (!episode || !cooldown) return null;
|
|
22
27
|
const entries = episode.entries;
|
|
@@ -48,7 +53,7 @@ export function buildCiteBackHint(episode, cooldown) {
|
|
|
48
53
|
// numeric framing is measurably harder to dismiss than a hedged hint.
|
|
49
54
|
const totalLessons = matches.reduce((sum, m) => sum + m.ids.length, 0);
|
|
50
55
|
const lines = [
|
|
51
|
-
|
|
56
|
+
`${CITE_BACK_HINT_LEADER} edited ${matches.length} file(s) with ${totalLessons} prior lesson(s) this session. Save now if any was the root cause:`,
|
|
52
57
|
];
|
|
53
58
|
for (const m of matches) {
|
|
54
59
|
const fname = basename(m.file);
|
|
@@ -242,3 +247,36 @@ export function loadCiteBackForEpisode(episode, runtimeDir) {
|
|
|
242
247
|
}
|
|
243
248
|
return buildCiteBackHint(episode, cooldown);
|
|
244
249
|
}
|
|
250
|
+
|
|
251
|
+
// ─── extractCiteBackSignals (P5 ①) ──────────────────────────────────────────
|
|
252
|
+
// Stop-time positive-citation signal. Scans the transcript for cite-back hint
|
|
253
|
+
// emissions (PostToolUse attachment.stdout carrying CITE_BACK_HINT_LEADER — the
|
|
254
|
+
// same source countUnsavedBugfixShape reads) and collects the `#NN` lesson ids
|
|
255
|
+
// they name. Each id is an observation whose warned file the agent actually
|
|
256
|
+
// EDITED this session — a behavioral citation even when the agent never typed
|
|
257
|
+
// #NN. The Stop handler unions these into the cited set passed to
|
|
258
|
+
// applyCitationDecay (lib/citation-tracker.mjs), so acting on a lesson promotes
|
|
259
|
+
// it and lifts the project's adoption rate. Returns an empty set on missing path.
|
|
260
|
+
const CITE_BACK_ID_RE = /#(\d{1,7})\b/g;
|
|
261
|
+
|
|
262
|
+
export function extractCiteBackSignals(transcriptPath) {
|
|
263
|
+
const ids = new Set();
|
|
264
|
+
if (!transcriptPath || !existsSync(transcriptPath)) return ids;
|
|
265
|
+
let raw;
|
|
266
|
+
try { raw = readFileSync(transcriptPath, 'utf8'); } catch { return ids; }
|
|
267
|
+
for (const line of raw.split('\n')) {
|
|
268
|
+
if (!line.trim()) continue;
|
|
269
|
+
let entry;
|
|
270
|
+
try { entry = JSON.parse(line); } catch { continue; }
|
|
271
|
+
if (entry.type !== 'attachment') continue;
|
|
272
|
+
const stdout = entry.attachment?.stdout || '';
|
|
273
|
+
if (!stdout.includes(CITE_BACK_HINT_LEADER)) continue;
|
|
274
|
+
CITE_BACK_ID_RE.lastIndex = 0;
|
|
275
|
+
let m;
|
|
276
|
+
while ((m = CITE_BACK_ID_RE.exec(stdout))) {
|
|
277
|
+
const id = Number(m[1]);
|
|
278
|
+
if (Number.isInteger(id) && id > 0 && id < 1e7) ids.add(id);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
return ids;
|
|
282
|
+
}
|
package/lib/cli-flags.mjs
CHANGED
|
@@ -19,6 +19,21 @@
|
|
|
19
19
|
|
|
20
20
|
const DEFAULT_STDERR_WRITE = msg => process.stderr.write(msg);
|
|
21
21
|
|
|
22
|
+
/**
|
|
23
|
+
* True if `raw` is a clean integer or float-literal token — no trailing garbage,
|
|
24
|
+
* hex, or scientific notation. Float literals ARE accepted (callers truncate via
|
|
25
|
+
* parseInt, the deliberate #8277 decision); this only rejects shapes bare parseInt
|
|
26
|
+
* would silently coerce ("2abc"→2, "0x10"→0, "1e2"→1). Single source of the
|
|
27
|
+
* strict-shape rule shared by parseIntFlag and the reject-style numeric flags
|
|
28
|
+
* (save/update --importance, defer --priority).
|
|
29
|
+
*
|
|
30
|
+
* @param {string|number} raw Flag value as captured by parseArgs.
|
|
31
|
+
* @returns {boolean}
|
|
32
|
+
*/
|
|
33
|
+
export function isNumericToken(raw) {
|
|
34
|
+
return /^-?\d+(\.\d+)?$/.test(String(raw).trim());
|
|
35
|
+
}
|
|
36
|
+
|
|
22
37
|
/**
|
|
23
38
|
* Validate and parse a CLI numeric flag with optional bounds.
|
|
24
39
|
*
|
|
@@ -38,8 +53,15 @@ export function parseIntFlag(rawValue, opts) {
|
|
|
38
53
|
return defaultValue;
|
|
39
54
|
}
|
|
40
55
|
|
|
41
|
-
|
|
42
|
-
|
|
56
|
+
// Reject trailing-garbage / hex / scientific tokens that bare parseInt would
|
|
57
|
+
// silently coerce by stopping at the first non-digit ("2abc"→2, "0x10"→0,
|
|
58
|
+
// "1e2"→1) — those slip past the Number.isInteger gate and violate the
|
|
59
|
+
// warn+default contract above. Float literals ("3.7"→3) stay ACCEPTED via
|
|
60
|
+
// parseInt truncation: that's the deliberate #8277 decision pinned by the
|
|
61
|
+
// 'rejects floats' case in cli-flags.test.mjs, so the shape check admits them.
|
|
62
|
+
const str = String(rawValue).trim();
|
|
63
|
+
const parsed = parseInt(str, 10);
|
|
64
|
+
if (!isNumericToken(str) || !Number.isInteger(parsed) || parsed < min || parsed > max) {
|
|
43
65
|
const range = max === Number.MAX_SAFE_INTEGER ? `≥ ${min}` : `between ${min} and ${max}`;
|
|
44
66
|
warn(`[mem] Invalid ${name} "${rawValue}" (must be an integer ${range}); using default ${defaultValue}\n`);
|
|
45
67
|
return defaultValue;
|
package/lib/compress-core.mjs
CHANGED
|
@@ -10,11 +10,15 @@
|
|
|
10
10
|
// granularity (CLI/MCP wrap all groups in one transaction; the hook transacts
|
|
11
11
|
// each group). They no longer re-implement the mutation.
|
|
12
12
|
//
|
|
13
|
-
//
|
|
14
|
-
//
|
|
15
|
-
//
|
|
13
|
+
// The summary INSERT also writes its TF-IDF observation_vectors row in the same
|
|
14
|
+
// (caller-owned) transaction — fixed once here rather than in all three call
|
|
15
|
+
// sites. Without it, FTS-miss queries that fall back to vector recall (CJK /
|
|
16
|
+
// concept / paraphrase) could never reach compressed summaries; the LLM
|
|
17
|
+
// smart-compress path already wrote vectors, so the deterministic path was the
|
|
18
|
+
// sole gap (audit P6).
|
|
16
19
|
|
|
17
|
-
import { isoWeekKey, COMPRESSED_AUTO } from '../utils.mjs';
|
|
20
|
+
import { isoWeekKey, COMPRESSED_AUTO, debugCatch } from '../utils.mjs';
|
|
21
|
+
import { getVocabulary, computeVector } from '../tfidf.mjs';
|
|
18
22
|
import { scrubRecord } from './scrub-record.mjs';
|
|
19
23
|
|
|
20
24
|
/**
|
|
@@ -90,6 +94,22 @@ export function compressGroup(db, proj, obs) {
|
|
|
90
94
|
`).run(sessionId, proj, safe.text, dominantType, safe.title, safe.narrative, medianDate.toISOString(), medianEpoch);
|
|
91
95
|
const summaryId = Number(summaryResult.lastInsertRowid);
|
|
92
96
|
|
|
97
|
+
// TF-IDF vector for the summary so it is reachable by vector recall (parity
|
|
98
|
+
// with save-observation.mjs and the LLM smart-compress path). Best-effort:
|
|
99
|
+
// vocab may be uninitialized on a fresh DB — a failure here must not abort the
|
|
100
|
+
// compression the caller is transacting.
|
|
101
|
+
try {
|
|
102
|
+
const vocab = getVocabulary(db);
|
|
103
|
+
if (vocab) {
|
|
104
|
+
const vec = computeVector(`${safe.title} ${safe.narrative}`, vocab);
|
|
105
|
+
if (vec) {
|
|
106
|
+
db.prepare(
|
|
107
|
+
'INSERT OR REPLACE INTO observation_vectors (observation_id, vector, vocab_version, created_at_epoch) VALUES (?, ?, ?, ?)'
|
|
108
|
+
).run(summaryId, Buffer.from(vec.buffer), vocab.version, medianEpoch);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
} catch (e) { debugCatch(e, 'compress-vector'); }
|
|
112
|
+
|
|
93
113
|
const obsIds = obs.map((o) => o.id);
|
|
94
114
|
const obsPh = obsIds.map(() => '?').join(',');
|
|
95
115
|
db.prepare(`UPDATE observations SET compressed_into = ? WHERE id IN (${obsPh})`).run(summaryId, ...obsIds);
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
// Dedup / merge similarity thresholds — single source of truth (P10).
|
|
2
|
+
//
|
|
3
|
+
// All values are Jaccard-space (word-set overlap, 0..1) unless noted. They were
|
|
4
|
+
// scattered as bare literals and duplicate local consts across save-observation,
|
|
5
|
+
// maintain-core, hook-llm, hook-optimize, mem-cli, server, and hook; converging
|
|
6
|
+
// them here removes the drift risk and gives the P7 benchmark named knobs.
|
|
7
|
+
// Vector-side constants (VOCAB_DIM / MIN_COSINE_SIMILARITY / RRF_K) deliberately
|
|
8
|
+
// stay in tfidf.mjs next to the search engine that consumes them.
|
|
9
|
+
//
|
|
10
|
+
// Pure constants only — no imports, so nothing can import-cycle through this.
|
|
11
|
+
|
|
12
|
+
// 0.7: near-duplicate cutoff for save-time dedup (5-min window, lib/save-observation)
|
|
13
|
+
// and the hook-llm tier-1 title dedup. Catches "Modified X" / "Fixed X" restatements
|
|
14
|
+
// (~70% word overlap) without collapsing distinct-but-related observations.
|
|
15
|
+
export const DEDUP_JACCARD_THRESHOLD = 0.7;
|
|
16
|
+
|
|
17
|
+
// 0.85: high-confidence auto-merge cutoff (maintain + optimize cluster-merge, CLI/MCP
|
|
18
|
+
// dedup preview). Pairs at or above this merge without an LLM merge-decision call.
|
|
19
|
+
export const AUTO_MERGE_THRESHOLD = 0.85;
|
|
20
|
+
|
|
21
|
+
// 0.4: low bound of the LLM-review merge band [0.4, 0.85) in hook-optimize. Below it,
|
|
22
|
+
// a pair is too dissimilar to be worth a merge-decision call.
|
|
23
|
+
export const MERGE_JACCARD_LOW = 0.4;
|
|
24
|
+
|
|
25
|
+
// 0.5: MinHash estimated-Jaccard pre-filter for the maintain O(n²) scan — skip the
|
|
26
|
+
// exact-Jaccard compare when the cheap signature estimate is already below this.
|
|
27
|
+
export const MINHASH_PRE_THRESHOLD = 0.5;
|
|
28
|
+
|
|
29
|
+
// 0.7: MinHash pre-filter for the hook post-inject fuzzy-dedup pass. Stricter than
|
|
30
|
+
// maintain's 0.5 to keep the inline inject path cheap (it runs in the hot Stop path).
|
|
31
|
+
export const MINHASH_PREFILTER = 0.7;
|
|
32
|
+
|
|
33
|
+
// 0.95: strict title-Jaccard cutoff for the hook post-inject fuzzy-dedup pass — only
|
|
34
|
+
// collapse near-identical titles inline; anything softer waits for the maintain sweep.
|
|
35
|
+
export const FUZZY_DEDUP_THRESHOLD = 0.95;
|
package/lib/maintain-core.mjs
CHANGED
|
@@ -14,13 +14,16 @@
|
|
|
14
14
|
|
|
15
15
|
import { COMPRESSED_PENDING_PURGE, computeMinHash, estimateJaccardFromMinHash, jaccardSimilarity } from '../utils.mjs';
|
|
16
16
|
import { rebuildVocabulary, computeVector, _resetVocabCache } from '../tfidf.mjs';
|
|
17
|
+
import { DEDUP_JACCARD_THRESHOLD, MINHASH_PRE_THRESHOLD as MINHASH_PRE_THRESHOLD_SRC } from './dedup-constants.mjs';
|
|
17
18
|
|
|
18
19
|
export const STALE_AGE_MS = 30 * 86400000;
|
|
19
20
|
export const OP_CAP = 1000;
|
|
20
21
|
export const SCAN_LIMIT = 500;
|
|
21
22
|
export const DUPLICATE_LIMIT = 50;
|
|
22
|
-
|
|
23
|
-
|
|
23
|
+
// Back-compat: maintain-core historically exported these names; both now source
|
|
24
|
+
// their value from the single canonical lib/dedup-constants.mjs.
|
|
25
|
+
export const SIMILARITY_THRESHOLD = DEDUP_JACCARD_THRESHOLD;
|
|
26
|
+
export const MINHASH_PRE_THRESHOLD = MINHASH_PRE_THRESHOLD_SRC;
|
|
24
27
|
// A memory injected this many times with zero citations is "pinned noise" that
|
|
25
28
|
// the regular decay op can't touch (decay protects injection_count>0).
|
|
26
29
|
export const PINNED_INJ_THRESHOLD = 8;
|
package/lib/save-observation.mjs
CHANGED
|
@@ -13,10 +13,10 @@
|
|
|
13
13
|
|
|
14
14
|
import { jaccardSimilarity, scrubSecrets, computeMinHash, cjkBigrams, getCurrentBranch, debugCatch } from '../utils.mjs';
|
|
15
15
|
import { getVocabulary, computeVector } from '../tfidf.mjs';
|
|
16
|
+
import { DEDUP_JACCARD_THRESHOLD } from './dedup-constants.mjs';
|
|
16
17
|
|
|
17
18
|
const DEDUP_WINDOW_MS = 5 * 60 * 1000;
|
|
18
19
|
const DEDUP_RECENT_LIMIT = 50;
|
|
19
|
-
const DEDUP_JACCARD_THRESHOLD = 0.7;
|
|
20
20
|
|
|
21
21
|
/**
|
|
22
22
|
* Save a new observation if it isn't a near-duplicate of one saved within the
|