claude-mem-lite 2.92.0 → 2.94.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/adopt-cli.mjs +19 -9
- package/bash-utils.mjs +45 -5
- package/cli/activity.mjs +12 -4
- package/cli/common.mjs +23 -0
- package/format-utils.mjs +12 -1
- package/hook-handoff.mjs +20 -2
- package/hook-llm.mjs +6 -1
- package/hook-optimize.mjs +23 -8
- package/hook-update.mjs +16 -5
- package/hook.mjs +8 -1
- package/lib/maintain-core.mjs +10 -3
- package/mem-cli.mjs +36 -26
- package/memdir.mjs +36 -11
- package/nlp.mjs +20 -3
- package/package.json +1 -1
- package/project-utils.mjs +6 -0
- package/registry-importer.mjs +8 -3
- package/registry-retriever.mjs +10 -6
- package/schema.mjs +15 -8
- package/search-engine.mjs +5 -1
- package/secret-scrub.mjs +33 -3
- package/server.mjs +9 -1
- package/synonyms.mjs +7 -0
- package/tier.mjs +5 -2
- package/utils.mjs +40 -3
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"plugins": [
|
|
11
11
|
{
|
|
12
12
|
"name": "claude-mem-lite",
|
|
13
|
-
"version": "2.
|
|
13
|
+
"version": "2.94.0",
|
|
14
14
|
"source": "./",
|
|
15
15
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark)."
|
|
16
16
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.94.0",
|
|
4
4
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "sdsrss"
|
package/adopt-cli.mjs
CHANGED
|
@@ -15,7 +15,7 @@ import { join } from 'path';
|
|
|
15
15
|
import {
|
|
16
16
|
memdirPath, writePluginSection, removePluginSection,
|
|
17
17
|
writePluginDoc, removePluginDoc,
|
|
18
|
-
isAdopted, readMemoryIndex,
|
|
18
|
+
isAdopted, hasPluginState, readMemoryIndex,
|
|
19
19
|
UserEditedError, BudgetExceededError,
|
|
20
20
|
} from './memdir.mjs';
|
|
21
21
|
import {
|
|
@@ -325,6 +325,7 @@ export function cmdUnadopt(args = []) {
|
|
|
325
325
|
|
|
326
326
|
const all = hasFlag(args, '--all');
|
|
327
327
|
const dryRun = hasFlag(args, '--dry-run');
|
|
328
|
+
const force = hasFlag(args, '--force');
|
|
328
329
|
const targets = all
|
|
329
330
|
? listAllMemdirs().map((m) => m.memdir)
|
|
330
331
|
: [memdirPath(detectCwd())];
|
|
@@ -334,23 +335,32 @@ export function cmdUnadopt(args = []) {
|
|
|
334
335
|
return;
|
|
335
336
|
}
|
|
336
337
|
|
|
337
|
-
let removed = 0, absent = 0;
|
|
338
|
+
let removed = 0, absent = 0, skipped = 0;
|
|
338
339
|
for (const memdir of targets) {
|
|
339
340
|
if (dryRun) {
|
|
340
|
-
|
|
341
|
-
|
|
341
|
+
// Mirror the live foreign-content guard: a sentinel with no state sidecar would be
|
|
342
|
+
// skipped (not removed) unless --force, so dry-run must report it the same way.
|
|
343
|
+
const action = !isAdopted(memdir, PLUGIN_SLUG) ? 'absent'
|
|
344
|
+
: (hasPluginState(memdir, PLUGIN_SLUG) || force) ? 'would-remove'
|
|
345
|
+
: 'would-skip-foreign';
|
|
342
346
|
log(`[unadopt --dry-run] ${memdir} → ${action}`);
|
|
343
|
-
if (
|
|
347
|
+
if (action === 'would-remove') removed++;
|
|
348
|
+
else if (action === 'would-skip-foreign') skipped++;
|
|
349
|
+
else absent++;
|
|
344
350
|
continue;
|
|
345
351
|
}
|
|
346
|
-
const r = removePluginSection(memdir, PLUGIN_SLUG);
|
|
347
|
-
removePluginDoc(memdir, PLUGIN_SLUG);
|
|
348
|
-
if (r.action === '
|
|
352
|
+
const r = removePluginSection(memdir, PLUGIN_SLUG, { force });
|
|
353
|
+
if (r.action === 'removed') { removePluginDoc(memdir, PLUGIN_SLUG); removed++; }
|
|
354
|
+
else if (r.action === 'skipped-foreign') skipped++;
|
|
349
355
|
else absent++;
|
|
350
356
|
log(`[unadopt] ${memdir} → ${r.action}`);
|
|
351
357
|
}
|
|
352
358
|
|
|
359
|
+
if (skipped > 0) {
|
|
360
|
+
log('[unadopt] skipped-foreign = a sentinel block with no plugin state file (not proven plugin-written).');
|
|
361
|
+
log('[unadopt] pass --force to remove it anyway.');
|
|
362
|
+
}
|
|
353
363
|
log('');
|
|
354
364
|
const verb = dryRun ? 'would remove' : 'removed';
|
|
355
|
-
log(`[unadopt${dryRun ? ' --dry-run' : ''}] ${targets.length} target(s): ${removed} ${verb}, ${absent} absent`);
|
|
365
|
+
log(`[unadopt${dryRun ? ' --dry-run' : ''}] ${targets.length} target(s): ${removed} ${verb}, ${skipped} skipped-foreign, ${absent} absent`);
|
|
356
366
|
}
|
package/bash-utils.mjs
CHANGED
|
@@ -3,6 +3,38 @@
|
|
|
3
3
|
|
|
4
4
|
import { basename } from 'path';
|
|
5
5
|
|
|
6
|
+
// Read/search commands whose output legitimately contains "error"-like keywords without
|
|
7
|
+
// being a failure. Matched against the PRIMARY command (see isReadOnlyCommand).
|
|
8
|
+
const SEARCH_VERBS = new Set([
|
|
9
|
+
'grep', 'rg', 'ag', 'ack', 'cat', 'head', 'tail', 'less', 'more', 'find', 'locate', 'wc', 'file', 'which', 'type',
|
|
10
|
+
]);
|
|
11
|
+
// Command prefixes that wrap the real command (env-assignments handled separately).
|
|
12
|
+
const CMD_WRAPPERS = new Set(['sudo', 'doas', 'env', 'time', 'command', 'nice', 'nohup', 'stdbuf', 'xargs']);
|
|
13
|
+
// git read subcommands whose output contains commit/log/match text, not failures.
|
|
14
|
+
const GIT_READ_SUBCMDS = new Set(['grep', 'log', 'show', 'diff', 'blame', 'ls-files', 'cat-file', 'whatchanged', 'shortlog', 'reflog', 'status']);
|
|
15
|
+
|
|
16
|
+
// True when the command's PRIMARY operation (left of the first pipe, past any
|
|
17
|
+
// env-assignments / wrapper like `sudo`/`env`/`time`) is a read/search — including
|
|
18
|
+
// `git grep`/`git log`. Anchoring on the primary command (not "search verb appears
|
|
19
|
+
// anywhere") is what lets `npm run build 2>&1 | tail` stay an error while `sudo grep`,
|
|
20
|
+
// `git grep`, `cat f | head` are correctly exempt.
|
|
21
|
+
function isReadOnlyCommand(cmd) {
|
|
22
|
+
const primary = cmd.split('|')[0];
|
|
23
|
+
const toks = primary.trim().split(/\s+/).filter(Boolean);
|
|
24
|
+
let i = 0;
|
|
25
|
+
while (i < toks.length && (/^\w+=/.test(toks[i]) || CMD_WRAPPERS.has(toks[i]))) i++;
|
|
26
|
+
const first = toks[i];
|
|
27
|
+
if (!first) return false;
|
|
28
|
+
if (SEARCH_VERBS.has(first)) return true;
|
|
29
|
+
return first === 'git' && GIT_READ_SUBCMDS.has(toks[i + 1]);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// Paths excluded from observation capture (ephemeral / virtual filesystems) — applied
|
|
33
|
+
// uniformly to both command-parsed paths and direct file_path/path/filePath fields.
|
|
34
|
+
function isExcludedPath(p) {
|
|
35
|
+
return p.startsWith('/dev/') || p.startsWith('/proc/') || p.startsWith('/tmp/');
|
|
36
|
+
}
|
|
37
|
+
|
|
6
38
|
/**
|
|
7
39
|
* Detect significance signals in a Bash command and its response.
|
|
8
40
|
* Checks for errors, test runs, builds, git operations, and deployments.
|
|
@@ -12,9 +44,12 @@ import { basename } from 'path';
|
|
|
12
44
|
*/
|
|
13
45
|
export function detectBashSignificance(input, response) {
|
|
14
46
|
const cmd = (input.command || '').toLowerCase();
|
|
15
|
-
// Skip error keyword matching when the command is a read/search
|
|
16
|
-
//
|
|
17
|
-
|
|
47
|
+
// Skip error keyword matching only when the PRIMARY command is a read/search op (its
|
|
48
|
+
// output naturally contains "error"-like keywords that aren't failures). Anchored on the
|
|
49
|
+
// primary command — NOT "search verb appears anywhere" — so `npm run build 2>&1 | tail`
|
|
50
|
+
// stays a real failure while `sudo grep`, `git grep`, `git log --grep`, `cat f | head`
|
|
51
|
+
// remain exempt and `run-cat-tests` doesn't trip a substring match.
|
|
52
|
+
const isSearchCmd = isReadOnlyCommand(cmd);
|
|
18
53
|
const looksLikeError = !isSearchCmd
|
|
19
54
|
&& /\berror\b|\bERR!|fail(ed|ure)?|exception|panic|traceback|errno|enoent|command not found/i.test(response)
|
|
20
55
|
&& response.length > 15;
|
|
@@ -38,7 +73,9 @@ export function detectBashSignificance(input, response) {
|
|
|
38
73
|
const isTest = /\b(npm\s+test|npm\s+run\s+test|yarn\s+test|pnpm\s+test|pnpm\s+run\s+test|bun\s+test|go\s+test|cargo\s+test)\b/i.test(cmd)
|
|
39
74
|
|| /\b(jest|pytest|vitest|mocha|cypress|playwright)\b/i.test(cmd);
|
|
40
75
|
const isBuild = /\b(build|compile|tsc|webpack|vite|rollup|esbuild|make|cargo)\b/i.test(cmd);
|
|
41
|
-
|
|
76
|
+
// Allow intervening global git options (`-C <path>`, `-c k=v`, `--no-pager`, …) between
|
|
77
|
+
// `git` and the subcommand — `git -C /repo push` is the standard multi-repo/scripted form.
|
|
78
|
+
const isGit = /\bgit\s+(?:(?:-[cC]\s+\S+|--?[\w-]+(?:=\S+)?)\s+)*(commit|merge|rebase|cherry-pick|push)\b/i.test(cmd);
|
|
42
79
|
const isDeploy = /\b(deploy|docker|kubectl|terraform)\b/i.test(cmd);
|
|
43
80
|
return {
|
|
44
81
|
isError, isTest, isBuild, isGit, isDeploy,
|
|
@@ -92,6 +129,9 @@ export function extractErrorKeywords(cmd, response) {
|
|
|
92
129
|
*/
|
|
93
130
|
export function extractFilePaths(input) {
|
|
94
131
|
const paths = [];
|
|
132
|
+
// Direct fields (Edit/Write file_path) are kept unconditionally — an explicit edit to a
|
|
133
|
+
// /tmp path is real work the user chose to make, unlike a /tmp path that merely appears as
|
|
134
|
+
// a transient argument inside a Bash command (excluded as noise in the command branch below).
|
|
95
135
|
if (input.file_path) paths.push(input.file_path);
|
|
96
136
|
if (input.path) paths.push(input.path);
|
|
97
137
|
if (input.filePath) paths.push(input.filePath);
|
|
@@ -101,7 +141,7 @@ export function extractFilePaths(input) {
|
|
|
101
141
|
if (match) {
|
|
102
142
|
for (const m of match) {
|
|
103
143
|
const p = m.trim();
|
|
104
|
-
if (!
|
|
144
|
+
if (!isExcludedPath(p)
|
|
105
145
|
// Skip single-component paths like /exit, /clear — likely slash commands, not files
|
|
106
146
|
&& (p.indexOf('/', 1) !== -1 || /\.\w+$/.test(p))) {
|
|
107
147
|
paths.push(p);
|
package/cli/activity.mjs
CHANGED
|
@@ -10,8 +10,8 @@
|
|
|
10
10
|
|
|
11
11
|
import { inferProject } from '../utils.mjs';
|
|
12
12
|
import { resolveProject } from '../project-utils.mjs';
|
|
13
|
-
import { parseArgs, out, fail } from './common.mjs';
|
|
14
|
-
import { parseIntFlag } from '../lib/cli-flags.mjs';
|
|
13
|
+
import { parseArgs, out, fail, rejectBareStringFlags } from './common.mjs';
|
|
14
|
+
import { parseIntFlag, isNumericToken } from '../lib/cli-flags.mjs';
|
|
15
15
|
|
|
16
16
|
function formatActivityResults(rows) {
|
|
17
17
|
if (!rows || rows.length === 0) return '(no events)';
|
|
@@ -31,6 +31,9 @@ export async function cmdActivity(db, args) {
|
|
|
31
31
|
const project = flags.project ? resolveProject(db, flags.project) : inferProject();
|
|
32
32
|
|
|
33
33
|
if (sub === 'save') {
|
|
34
|
+
// Reject value-less string flags before they reach saveEvent as a boolean `true`
|
|
35
|
+
// (#8470): bare --body / --title crashed with a raw "SQLite3 can only bind ..." error.
|
|
36
|
+
if (rejectBareStringFlags(flags, ['type', 'title', 'body', 'files', 'file', 'project'])) return;
|
|
34
37
|
const type = flags.type || 'observation';
|
|
35
38
|
if (!VALID_EVENT_TYPES.has(type)) {
|
|
36
39
|
fail(`[mem] activity save: invalid --type "${type}". Valid: ${[...VALID_EVENT_TYPES].join(', ')}`);
|
|
@@ -51,7 +54,9 @@ export async function cmdActivity(db, args) {
|
|
|
51
54
|
const file_paths_merged = [...filesFromSingular, ...filesFromPlural];
|
|
52
55
|
const file_paths = file_paths_merged.length > 0 ? file_paths_merged : null;
|
|
53
56
|
const rawImp = flags.importance !== undefined ? parseInt(flags.importance, 10) : 2;
|
|
54
|
-
|
|
57
|
+
// isNumericToken first (mirrors cmdSave): bare parseInt coerces "3xyz"→3 and would
|
|
58
|
+
// persist a wrong importance that silently skews ranking. Float literals truncate (#8277).
|
|
59
|
+
if (flags.importance !== undefined && (!isNumericToken(flags.importance) || isNaN(rawImp) || rawImp < 1 || rawImp > 3)) {
|
|
55
60
|
fail(`[mem] Invalid importance "${flags.importance}". Must be 1, 2, or 3.`);
|
|
56
61
|
return;
|
|
57
62
|
}
|
|
@@ -112,7 +117,10 @@ export async function cmdActivity(db, args) {
|
|
|
112
117
|
if (row) {
|
|
113
118
|
out(JSON.stringify(row, null, 2));
|
|
114
119
|
} else {
|
|
115
|
-
|
|
120
|
+
// fail() (stderr + exit 1), matching the not-found contract of sibling commands
|
|
121
|
+
// (`get`, `activity delete`, `update`); previously stdout + exit 0, so scripts
|
|
122
|
+
// couldn't detect a missing event from the exit code.
|
|
123
|
+
fail(`[mem] activity show: event #${id} not found`);
|
|
116
124
|
}
|
|
117
125
|
return;
|
|
118
126
|
}
|
package/cli/common.mjs
CHANGED
|
@@ -54,6 +54,29 @@ export function fail(text) {
|
|
|
54
54
|
process.exitCode = 1;
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
/**
|
|
58
|
+
* Reject value-less `--flag` for string-valued flags. A bare trailing flag (or one
|
|
59
|
+
* immediately followed by another `--flag`) parses to boolean `true` (parseArgs above);
|
|
60
|
+
* that `true` then slips into code expecting a string and surfaces a raw
|
|
61
|
+
* `flags.x.split is not a function` / `SQLite3 can only bind ...` stacktrace (#8470).
|
|
62
|
+
* Returns true (and emits a clean `fail()`) when any listed key is a bare flag — the
|
|
63
|
+
* caller should `return` on true. Single source of the guard the update/registry paths
|
|
64
|
+
* previously inlined, so new string-flag commands stay consistent.
|
|
65
|
+
*
|
|
66
|
+
* @param {object} flags Parsed flags from parseArgs.
|
|
67
|
+
* @param {string[]} keys String-valued flag names to guard (without leading dashes).
|
|
68
|
+
* @returns {boolean} true if a bare flag was found and rejected.
|
|
69
|
+
*/
|
|
70
|
+
export function rejectBareStringFlags(flags, keys) {
|
|
71
|
+
for (const key of keys) {
|
|
72
|
+
if (flags[key] === true) {
|
|
73
|
+
fail(`[mem] --${key} requires a value (received a bare flag with no value).`);
|
|
74
|
+
return true;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
|
|
57
80
|
// ─── Time Formatting ─────────────────────────────────────────────────────────
|
|
58
81
|
|
|
59
82
|
/** "just now" / "5m ago" / "3h ago" / "2d ago" relative to now. */
|
package/format-utils.mjs
CHANGED
|
@@ -9,8 +9,19 @@
|
|
|
9
9
|
*/
|
|
10
10
|
export function truncate(str, max = 80) {
|
|
11
11
|
if (!str) return '';
|
|
12
|
+
// Defense-in-depth: a non-string (e.g. an LLM that returned title as an array/number)
|
|
13
|
+
// would throw `str.replace is not a function` and abort the caller. Coerce to '' rather
|
|
14
|
+
// than crash; the real type-guarding happens at the call site.
|
|
15
|
+
if (typeof str !== 'string') return '';
|
|
12
16
|
str = str.replace(/\n/g, ' ').trim();
|
|
13
|
-
|
|
17
|
+
if (str.length <= max) return str;
|
|
18
|
+
// Never split a UTF-16 surrogate pair: slicing between the high and low half emits a
|
|
19
|
+
// lone surrogate (invalid UTF-16) that then gets persisted to the DB. If the last kept
|
|
20
|
+
// code unit is a high surrogate, drop it so we cut on a code-point boundary.
|
|
21
|
+
let end = max - 1;
|
|
22
|
+
const last = str.charCodeAt(end - 1);
|
|
23
|
+
if (last >= 0xD800 && last <= 0xDBFF) end--;
|
|
24
|
+
return str.slice(0, end) + '\u2026';
|
|
14
25
|
}
|
|
15
26
|
|
|
16
27
|
/**
|
package/hook-handoff.mjs
CHANGED
|
@@ -446,13 +446,31 @@ function renderHandoffFromRow(handoff, db, project) {
|
|
|
446
446
|
|
|
447
447
|
lines.push('</session-handoff>');
|
|
448
448
|
|
|
449
|
-
// Append session summary if available (long-gap enrichment)
|
|
449
|
+
// Append session summary if available (long-gap enrichment).
|
|
450
|
+
// session_summaries is keyed by the mem-internal memory_session_id, but in production
|
|
451
|
+
// session_handoffs.session_id holds the Claude Code UUID (the scope tag) — the two id
|
|
452
|
+
// namespaces never match, so the exact lookup returned nothing and this block was always
|
|
453
|
+
// dropped on a real resume. There is no bridge column (the CC-UUID lives on user_prompts,
|
|
454
|
+
// not on sdk_sessions/session_summaries), so: try the exact id match first (correct when
|
|
455
|
+
// ids align — legacy rows + tests), then fall back to the most-recent summary for the
|
|
456
|
+
// project, which at resume time is the summary from the session that wrote this handoff.
|
|
450
457
|
try {
|
|
451
|
-
|
|
458
|
+
let summary = db.prepare(`
|
|
452
459
|
SELECT completed, next_steps, remaining_items FROM session_summaries
|
|
453
460
|
WHERE memory_session_id = ? AND project = ?
|
|
454
461
|
ORDER BY created_at_epoch DESC LIMIT 1
|
|
455
462
|
`).get(handoff.session_id, project);
|
|
463
|
+
if (!summary) {
|
|
464
|
+
// Pick the project summary CLOSEST IN TIME to this handoff, not merely the newest:
|
|
465
|
+
// a handoff and its own session's summary are written within ms of each other at
|
|
466
|
+
// session end, so nearest-timestamp recovers the right session even when a different
|
|
467
|
+
// session later wrote a newer summary for the same project (concurrent/interleaved use).
|
|
468
|
+
summary = db.prepare(`
|
|
469
|
+
SELECT completed, next_steps, remaining_items FROM session_summaries
|
|
470
|
+
WHERE project = ?
|
|
471
|
+
ORDER BY ABS(created_at_epoch - ?) ASC LIMIT 1
|
|
472
|
+
`).get(project, handoff.created_at_epoch ?? 0);
|
|
473
|
+
}
|
|
456
474
|
if (summary && (summary.completed || summary.next_steps || summary.remaining_items)) {
|
|
457
475
|
lines.push('');
|
|
458
476
|
lines.push('<session-summary source="haiku">');
|
package/hook-llm.mjs
CHANGED
|
@@ -657,7 +657,12 @@ ${actionList}`;
|
|
|
657
657
|
releaseLLMSlot();
|
|
658
658
|
}
|
|
659
659
|
|
|
660
|
-
|
|
660
|
+
// Require a STRING title: a truthy non-string (LLM returned title as an array/number/
|
|
661
|
+
// object) would pass a bare `parsed.title` check, then crash truncate() downstream,
|
|
662
|
+
// aborting the worker before tmpFile cleanup (leak) and leaving the obs degraded.
|
|
663
|
+
if (parsed && typeof parsed.title === 'string' && parsed.title) {
|
|
664
|
+
// Normalize narrative to a string too — same non-string crash risk in truncate().
|
|
665
|
+
if (typeof parsed.narrative !== 'string') parsed.narrative = '';
|
|
661
666
|
// Discard if LLM judges observation has no learning value
|
|
662
667
|
if (parsed.importance === 0 || parsed.importance === '0') {
|
|
663
668
|
debugLog('DEBUG', 'llm-episode', `Discarded low-value observation: ${parsed.title}`);
|
package/hook-optimize.mjs
CHANGED
|
@@ -262,7 +262,7 @@ Rules:
|
|
|
262
262
|
}
|
|
263
263
|
}
|
|
264
264
|
|
|
265
|
-
export function applyNormalization(db, groups) {
|
|
265
|
+
export function applyNormalization(db, groups, { project = null } = {}) {
|
|
266
266
|
if (!groups || groups.length === 0) return { updated: 0 };
|
|
267
267
|
|
|
268
268
|
const aliasMap = new Map();
|
|
@@ -272,11 +272,17 @@ export function applyNormalization(db, groups) {
|
|
|
272
272
|
}
|
|
273
273
|
}
|
|
274
274
|
|
|
275
|
+
// Scope the mutation to `project` when normalize was scoped (v2.72.0 --project).
|
|
276
|
+
// Without this, synonym groups derived from ONE project's concepts rewrote the
|
|
277
|
+
// concepts/search_aliases of EVERY project's observations — the exact cross-project
|
|
278
|
+
// contamination the --project flag was added to prevent. NULL → all projects (legacy
|
|
279
|
+
// unscoped run), matching the search-engine `(? IS NULL OR project = ?)` idiom.
|
|
275
280
|
const rows = db.prepare(`
|
|
276
281
|
SELECT id, concepts, search_aliases FROM observations
|
|
277
282
|
WHERE COALESCE(compressed_into, 0) = 0
|
|
278
283
|
AND concepts IS NOT NULL AND concepts != ''
|
|
279
|
-
|
|
284
|
+
AND (? IS NULL OR project = ?)
|
|
285
|
+
`).all(project, project);
|
|
280
286
|
|
|
281
287
|
let updated = 0;
|
|
282
288
|
const updateStmt = db.prepare(`
|
|
@@ -322,7 +328,7 @@ export async function executeNormalize(db, force = false, { project } = {}) {
|
|
|
322
328
|
const groups = await identifySynonymGroups(concepts);
|
|
323
329
|
if (groups.length === 0) return { processed: 0, groups: 0 };
|
|
324
330
|
|
|
325
|
-
const result = applyNormalization(db, groups);
|
|
331
|
+
const result = applyNormalization(db, groups, { project });
|
|
326
332
|
|
|
327
333
|
try { writeFileSync(NORMALIZE_GATE_FILE, JSON.stringify({ epoch: Date.now() })); } catch {}
|
|
328
334
|
|
|
@@ -340,7 +346,7 @@ export function findMergeCandidates(db, maxClusters = 5, { project } = {}) {
|
|
|
340
346
|
const cutoff = Date.now() - MERGE_TIME_WINDOW_MS;
|
|
341
347
|
const projectClause = project ? 'AND project = ?' : '';
|
|
342
348
|
const stmt = db.prepare(`
|
|
343
|
-
SELECT id, title, narrative, project, type, access_count, created_at_epoch, minhash_sig
|
|
349
|
+
SELECT id, title, narrative, project, type, access_count, importance, created_at_epoch, minhash_sig
|
|
344
350
|
FROM observations
|
|
345
351
|
WHERE COALESCE(compressed_into, 0) = 0
|
|
346
352
|
AND optimized_at IS NULL
|
|
@@ -410,10 +416,19 @@ Return ONLY valid JSON:
|
|
|
410
416
|
const parsed = await callModelJSON(prompt, 'sonnet', { timeout: 20000, maxTokens: 1000 });
|
|
411
417
|
if (!parsed || !parsed.should_merge) return { merged: false };
|
|
412
418
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
419
|
+
// Keeper = highest importance, then highest access_count. Previously access_count
|
|
420
|
+
// alone, so a critical (importance=3) but never-accessed observation lost the keeper
|
|
421
|
+
// role to a trivial (importance=1) accessed one and was compressed away.
|
|
422
|
+
const keeper = cluster.reduce((best, o) => {
|
|
423
|
+
const oi = o.importance || 1, bi = best.importance || 1;
|
|
424
|
+
if (oi !== bi) return oi > bi ? o : best;
|
|
425
|
+
return (o.access_count || 0) > (best.access_count || 0) ? o : best;
|
|
426
|
+
}, cluster[0]);
|
|
416
427
|
const others = cluster.filter(o => o.id !== keeper.id);
|
|
428
|
+
// Floor the merged importance at the cluster max — merging must never silently
|
|
429
|
+
// downgrade the ranking of the most-important member (the LLM default is 2). The keeper
|
|
430
|
+
// is selected by importance-first, so keeper.importance IS the cluster max by construction.
|
|
431
|
+
const maxClusterImportance = keeper.importance || 1;
|
|
417
432
|
|
|
418
433
|
const concepts = Array.isArray(parsed.merged_concepts) ? parsed.merged_concepts.slice(0, 10) : [];
|
|
419
434
|
const facts = Array.isArray(parsed.merged_facts) ? parsed.merged_facts.slice(0, 10) : [];
|
|
@@ -428,7 +443,7 @@ Return ONLY valid JSON:
|
|
|
428
443
|
const bigramText = cjkBigrams((title || '') + ' ' + (narrative || ''));
|
|
429
444
|
const textField = [conceptsText, factsText, bigramText].filter(Boolean).join(' ');
|
|
430
445
|
const minhashSig = computeMinHash((title || '') + ' ' + (narrative || ''));
|
|
431
|
-
const importance = clampImportance(parsed.importance || 2);
|
|
446
|
+
const importance = Math.max(clampImportance(parsed.importance || 2), maxClusterImportance);
|
|
432
447
|
|
|
433
448
|
// Scrub LLM-output cluster-merge text fields at the UPDATE boundary.
|
|
434
449
|
// importance is numeric; minhash_sig is hash bytes.
|
package/hook-update.mjs
CHANGED
|
@@ -27,7 +27,10 @@ const STATE_DIR = DB_DIR;
|
|
|
27
27
|
const STATE_FILE = join(STATE_DIR, 'runtime', 'update-state.json');
|
|
28
28
|
const CHECK_INTERVAL_MS = 24 * 60 * 60 * 1000; // 24 hours
|
|
29
29
|
const FETCH_TIMEOUT_MS = 3000; // 3s network timeout
|
|
30
|
-
|
|
30
|
+
// When rate-limited we got NO release data, so re-check sooner than the normal 24h
|
|
31
|
+
// cadence (GitHub's unauthenticated rate-limit window resets within the hour). 6h × ≤2
|
|
32
|
+
// requests = 4 polls/day, far under the 60/hr limit, so this is a faster retry, not a hammer.
|
|
33
|
+
const RATE_LIMIT_INTERVAL_MS = 6 * 60 * 60 * 1000; // 6h retry when rate-limited
|
|
31
34
|
const NPM_INSTALL_CMD = 'npm install --omit=dev --no-audit --no-fund';
|
|
32
35
|
|
|
33
36
|
// ── Main Entry ─────────────────────────────────────────────
|
|
@@ -57,7 +60,12 @@ export async function checkForUpdate(options = {}) {
|
|
|
57
60
|
|
|
58
61
|
const latest = await fetchLatestRelease();
|
|
59
62
|
if (!latest) {
|
|
60
|
-
|
|
63
|
+
// Re-read from disk: a 403 inside fetchWithTimeout just persisted rateLimited:true.
|
|
64
|
+
// Spreading the stale in-memory `state` (captured above with rateLimited:false) would
|
|
65
|
+
// clobber that flag back to false, so shouldCheck never honors the backoff and the
|
|
66
|
+
// rate-limit mechanism is dead. Re-reading preserves the freshly-written flag.
|
|
67
|
+
const fresh = readState();
|
|
68
|
+
saveState({ ...fresh, lastCheck: new Date().toISOString() });
|
|
61
69
|
return null;
|
|
62
70
|
}
|
|
63
71
|
|
|
@@ -174,7 +182,10 @@ async function fetchLatestRelease() {
|
|
|
174
182
|
headers,
|
|
175
183
|
);
|
|
176
184
|
if (result === 'rate-limited') return null;
|
|
177
|
-
|
|
185
|
+
// Guard tag_name: a 200-OK with a malformed body ({} / {tag_name:null}) would throw
|
|
186
|
+
// `Cannot read properties of undefined (reading 'replace')`. Caught upstream, but it
|
|
187
|
+
// poisons lastError and blocks the tags fallback below — fall through instead.
|
|
188
|
+
if (result && typeof result.tag_name === 'string') {
|
|
178
189
|
return {
|
|
179
190
|
version: result.tag_name.replace(/^v/, ''),
|
|
180
191
|
tarballUrl: result.tarball_url,
|
|
@@ -188,7 +199,7 @@ async function fetchLatestRelease() {
|
|
|
188
199
|
headers,
|
|
189
200
|
);
|
|
190
201
|
if (tags === 'rate-limited') return null;
|
|
191
|
-
if (Array.isArray(tags) && tags.length > 0) {
|
|
202
|
+
if (Array.isArray(tags) && tags.length > 0 && typeof tags[0]?.name === 'string') {
|
|
192
203
|
const tag = tags[0];
|
|
193
204
|
return {
|
|
194
205
|
version: tag.name.replace(/^v/, ''),
|
|
@@ -208,7 +219,7 @@ async function fetchWithTimeout(url, headers) {
|
|
|
208
219
|
if (res.status === 403) {
|
|
209
220
|
const state = readState();
|
|
210
221
|
saveState({ ...state, rateLimited: true });
|
|
211
|
-
debugLog('DEBUG', 'hook-update', 'GitHub API rate limited
|
|
222
|
+
debugLog('DEBUG', 'hook-update', 'GitHub API rate limited; will retry on the 6h rate-limit cadence');
|
|
212
223
|
return 'rate-limited';
|
|
213
224
|
}
|
|
214
225
|
if (!res.ok) return null;
|
package/hook.mjs
CHANGED
|
@@ -202,13 +202,20 @@ function flushEpisode(episode, hookEventName = 'PostToolUse') {
|
|
|
202
202
|
// bugfix-shape nudge above and may co-fire.
|
|
203
203
|
const citeBack = loadCiteBackForEpisode(episode, RUNTIME_DIR);
|
|
204
204
|
if (citeBack) lines.push(citeBack);
|
|
205
|
+
// Trailing newline is REQUIRED: when this receipt flushes at SessionStart
|
|
206
|
+
// (leftover episode after /clear or /compact), the startup dashboard writes a
|
|
207
|
+
// second hookSpecificOutput object right after. Without the '\n' the two land
|
|
208
|
+
// back-to-back as `}{` on one line and Claude Code's line-based JSON parser
|
|
209
|
+
// drops both — losing the episode-flush / cite-back context exactly at the
|
|
210
|
+
// session boundary. Every other hookSpecificOutput write appends '\n'; this
|
|
211
|
+
// was the lone exception.
|
|
205
212
|
process.stdout.write(JSON.stringify({
|
|
206
213
|
suppressOutput: true,
|
|
207
214
|
hookSpecificOutput: {
|
|
208
215
|
hookEventName,
|
|
209
216
|
additionalContext: lines.join('\n'),
|
|
210
217
|
},
|
|
211
|
-
}));
|
|
218
|
+
}) + '\n');
|
|
212
219
|
} catch { /* never block on receipt */ }
|
|
213
220
|
}
|
|
214
221
|
} else {
|
package/lib/maintain-core.mjs
CHANGED
|
@@ -33,11 +33,18 @@ export const PINNED_INJ_THRESHOLD = 8;
|
|
|
33
33
|
// compressed_into = <keeperId>; deleting that keeper (compressed_into has no FK) would
|
|
34
34
|
// leave the child dangling behind a now-missing parent — hidden from every
|
|
35
35
|
// COALESCE(compressed_into,0)=0 view and unrecoverable. Recovery = resurface the child
|
|
36
|
-
// as live (NULL) rather than lose it silently. Shared by
|
|
37
|
-
|
|
36
|
+
// as live (NULL) rather than lose it silently. Shared by every hard-delete path:
|
|
37
|
+
// maintain (cleanupBroken/purgeStale) AND the interactive `delete` / MCP mem_delete.
|
|
38
|
+
export function recoverChildrenOf(db, ids) {
|
|
38
39
|
if (!ids.length) return 0;
|
|
39
40
|
const ph = ids.map(() => '?').join(',');
|
|
40
|
-
|
|
41
|
+
// `AND id NOT IN (...)`: never "recover" a row that is itself being deleted in the same
|
|
42
|
+
// call (e.g. `delete 1,2` where #2 was merged into #1). Without it, #2 is un-hidden and
|
|
43
|
+
// then immediately deleted, inflating the reported recovery count with a row that did not
|
|
44
|
+
// survive. Recovery should count only children that actually stay live.
|
|
45
|
+
return db.prepare(
|
|
46
|
+
`UPDATE observations SET compressed_into = NULL WHERE compressed_into IN (${ph}) AND id NOT IN (${ph})`
|
|
47
|
+
).run(...ids, ...ids).changes;
|
|
41
48
|
}
|
|
42
49
|
|
|
43
50
|
export function cleanupBroken(db, { projectFilter, baseParams, opCap = OP_CAP }) {
|
package/mem-cli.mjs
CHANGED
|
@@ -18,6 +18,7 @@ import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from '
|
|
|
18
18
|
import {
|
|
19
19
|
cleanupBroken, decayAndMarkIdle, boostAccessed, demotePinned, mergeDuplicates,
|
|
20
20
|
purgeStale, purgeStalePreview, findDuplicates, maintenanceStats, rebuildVectors, vacuum,
|
|
21
|
+
recoverChildrenOf,
|
|
21
22
|
OP_CAP, STALE_AGE_MS, PINNED_INJ_THRESHOLD,
|
|
22
23
|
} from './lib/maintain-core.mjs';
|
|
23
24
|
import { optimizePreview, optimizeRun } from './hook-optimize.mjs';
|
|
@@ -32,7 +33,7 @@ import { readFileSync, existsSync, readdirSync } from 'fs';
|
|
|
32
33
|
// v2.41: shared CLI helpers extracted to cli/common.mjs. Keep this file as the
|
|
33
34
|
// router + remaining-command bodies during the incremental split. Future work:
|
|
34
35
|
// move each cmdXxx into its own cli/<cmd>.mjs; mem-cli.mjs becomes pure dispatch.
|
|
35
|
-
import { parseArgs, out, fail, relativeTime, fmtDateShort, parseIdToken, formatProbeHints } from './cli/common.mjs';
|
|
36
|
+
import { parseArgs, out, fail, relativeTime, fmtDateShort, parseIdToken, formatProbeHints, rejectBareStringFlags } from './cli/common.mjs';
|
|
36
37
|
import { saveObservation } from './lib/save-observation.mjs';
|
|
37
38
|
import { AUTO_MERGE_THRESHOLD } from './lib/dedup-constants.mjs';
|
|
38
39
|
import { countRecentHookErrors } from './lib/hook-telemetry.mjs';
|
|
@@ -667,6 +668,7 @@ function cmdGet(db, args) {
|
|
|
667
668
|
}
|
|
668
669
|
|
|
669
670
|
// Validate --fields against obs schema (only meaningful for obs rows).
|
|
671
|
+
if (rejectBareStringFlags(flags, ['fields', 'source'])) return;
|
|
670
672
|
let requestedFields = null;
|
|
671
673
|
if (flags.fields) {
|
|
672
674
|
const allRequested = flags.fields.split(',').map(s => s.trim());
|
|
@@ -713,6 +715,10 @@ function cmdGet(db, args) {
|
|
|
713
715
|
|
|
714
716
|
function cmdTimeline(db, args) {
|
|
715
717
|
const { positional, flags } = parseArgs(args);
|
|
718
|
+
// Bare `--query` parses to boolean true and crashed downstream in sanitizeFtsQuery
|
|
719
|
+
// (nlp.mjs string ops on a boolean). No sensible default for a search anchor — reject
|
|
720
|
+
// cleanly (#8470). (`--project` bare is absorbed by resolveProject's non-string guard.)
|
|
721
|
+
if (rejectBareStringFlags(flags, ['query'])) return;
|
|
716
722
|
// parseInt('-5') === -5 is truthy, so `|| 5` doesn't rescue negative input.
|
|
717
723
|
// Match cmdSearch's warn-then-default pattern for consistency across CLI flags.
|
|
718
724
|
const parseWindow = (label, raw) => {
|
|
@@ -944,6 +950,10 @@ function cmdSave(db, args) {
|
|
|
944
950
|
return;
|
|
945
951
|
}
|
|
946
952
|
|
|
953
|
+
// Reject value-less string flags before they reach .split()/saveObservation as a
|
|
954
|
+
// boolean `true` (#8470): bare --files/--title/--lesson crashed with a raw stacktrace.
|
|
955
|
+
if (rejectBareStringFlags(flags, ['title', 'files', 'lesson', 'lesson-learned', 'project', 'type'])) return;
|
|
956
|
+
|
|
947
957
|
const type = flags.type || 'discovery';
|
|
948
958
|
const validTypes = new Set(['decision', 'bugfix', 'feature', 'refactor', 'discovery', 'change']);
|
|
949
959
|
if (!validTypes.has(type)) {
|
|
@@ -1070,6 +1080,8 @@ function cmdDeferAdd(db, args) {
|
|
|
1070
1080
|
fail(`[mem] defer add: title too long (${title.length} chars, max 200). Move detail to --detail "<text>".`);
|
|
1071
1081
|
return;
|
|
1072
1082
|
}
|
|
1083
|
+
// Reject bare --files/--detail/--project before .split()/bind sees a boolean true (#8470).
|
|
1084
|
+
if (rejectBareStringFlags(flags, ['files', 'detail', 'project'])) return;
|
|
1073
1085
|
const priority = flags.priority !== undefined ? parseInt(flags.priority, 10) : 2;
|
|
1074
1086
|
// isNumericToken first: bare parseInt would coerce "3xyz"→3 and silently escalate a
|
|
1075
1087
|
// deferred item's urgency. Float literals still truncate (#8277).
|
|
@@ -1614,11 +1626,19 @@ function cmdDelete(db, args) {
|
|
|
1614
1626
|
db.prepare('UPDATE observations SET related_ids = ? WHERE id = ?').run(JSON.stringify(filtered), r.id);
|
|
1615
1627
|
}
|
|
1616
1628
|
}
|
|
1617
|
-
|
|
1629
|
+
// Resurface any rows merged/compressed INTO the doomed keepers before deleting,
|
|
1630
|
+
// else they dangle behind a missing parent (compressed_into has no FK) — invisible
|
|
1631
|
+
// to every COALESCE(compressed_into,0)=0 view and unrecoverable. Same guard the
|
|
1632
|
+
// maintain hard-delete paths use (recoverChildrenOf); the interactive delete path
|
|
1633
|
+
// was missing it. Returned in the result so the user sees the recovery count.
|
|
1634
|
+
const recovered = recoverChildrenOf(db, ids);
|
|
1635
|
+
const deleted = db.prepare(`DELETE FROM observations WHERE id IN (${placeholders})`).run(...ids);
|
|
1636
|
+
return { changes: deleted.changes, recovered };
|
|
1618
1637
|
});
|
|
1619
1638
|
const result = deleteTx();
|
|
1620
1639
|
const missing = ids.filter(id => !rows.some(r => r.id === id));
|
|
1621
|
-
|
|
1640
|
+
const recoveredNote = result.recovered > 0 ? ` Recovered ${result.recovered} merged/compressed child observation(s) to live.` : '';
|
|
1641
|
+
out(`[mem] Deleted ${result.changes} observation(s).${recoveredNote}${missing.length > 0 ? ` Note: ID(s) ${missing.join(', ')} not found.` : ''}`);
|
|
1622
1642
|
}
|
|
1623
1643
|
|
|
1624
1644
|
// ─── Update ──────────────────────────────────────────────────────────────────
|
|
@@ -1644,18 +1664,10 @@ function cmdUpdate(db, args) {
|
|
|
1644
1664
|
return;
|
|
1645
1665
|
}
|
|
1646
1666
|
|
|
1647
|
-
// A value-less `--flag`
|
|
1648
|
-
//
|
|
1649
|
-
//
|
|
1650
|
-
|
|
1651
|
-
// — the same accidental shell-strip class the empty-title guard (#8470) catches.
|
|
1652
|
-
// Reject it cleanly for every string-valued update flag.
|
|
1653
|
-
for (const key of ['title', 'narrative', 'lesson', 'lesson-learned', 'concepts']) {
|
|
1654
|
-
if (flags[key] === true) {
|
|
1655
|
-
fail(`[mem] --${key} requires a value (received a bare flag with no value).`);
|
|
1656
|
-
return;
|
|
1657
|
-
}
|
|
1658
|
-
}
|
|
1667
|
+
// A value-less `--flag` parses to boolean `true` (cli/common.mjs parseArgs); for string
|
|
1668
|
+
// fields that would reach the SQLite bind as a raw "TypeError: SQLite3 can only bind ..."
|
|
1669
|
+
// (#8470). Reject cleanly via the shared guard — single source with the other commands.
|
|
1670
|
+
if (rejectBareStringFlags(flags, ['title', 'narrative', 'lesson', 'lesson-learned', 'concepts'])) return;
|
|
1659
1671
|
|
|
1660
1672
|
const updates = [];
|
|
1661
1673
|
const params = [];
|
|
@@ -2172,6 +2184,9 @@ function cmdRegistry(_memDb, args) {
|
|
|
2172
2184
|
|
|
2173
2185
|
try {
|
|
2174
2186
|
if (action === 'search') {
|
|
2187
|
+
// Bare `--query` parses to boolean true; `true || ...` would search for the literal
|
|
2188
|
+
// string "true". Reject it cleanly (#8470) before it becomes a confusing no-match.
|
|
2189
|
+
if (rejectBareStringFlags(flags, ['query', 'category', 'quality'])) return;
|
|
2175
2190
|
const query = flags.query || positional.slice(1).join(' ');
|
|
2176
2191
|
if (!query) { fail('[mem] Usage: claude-mem-lite registry search <query> [--type skill|agent] [--category C] [--quality Q]'); return; }
|
|
2177
2192
|
let results = searchResources(rdb, query, {
|
|
@@ -2260,12 +2275,9 @@ function cmdRegistry(_memDb, args) {
|
|
|
2260
2275
|
}
|
|
2261
2276
|
|
|
2262
2277
|
if (action === 'import') {
|
|
2263
|
-
//
|
|
2264
|
-
//
|
|
2265
|
-
|
|
2266
|
-
for (const key of ['name', 'resource-type', 'invocation-name', 'source', 'repo-url', 'local-path', 'intent-tags', 'domain-tags', 'trigger-patterns', 'capability-summary', 'keywords', 'tech-stack', 'use-cases']) {
|
|
2267
|
-
if (flags[key] === true) { fail(`[mem] --${key} requires a value (received a bare flag with no value).`); return; }
|
|
2268
|
-
}
|
|
2278
|
+
// Bare value-less flags → boolean true → SQLite-bind crash in upsertResource (#8470).
|
|
2279
|
+
// Shared guard — single source with update/remove/the other commands.
|
|
2280
|
+
if (rejectBareStringFlags(flags, ['name', 'resource-type', 'invocation-name', 'source', 'repo-url', 'local-path', 'intent-tags', 'domain-tags', 'trigger-patterns', 'capability-summary', 'keywords', 'tech-stack', 'use-cases'])) return;
|
|
2269
2281
|
const name = flags.name;
|
|
2270
2282
|
const resourceType = flags['resource-type'];
|
|
2271
2283
|
if (!name || !resourceType) { fail('[mem] Usage: claude-mem-lite registry import --name N --resource-type skill|agent [--invocation-name I] [--capability-summary S]'); return; }
|
|
@@ -2287,11 +2299,9 @@ function cmdRegistry(_memDb, args) {
|
|
|
2287
2299
|
}
|
|
2288
2300
|
|
|
2289
2301
|
if (action === 'remove') {
|
|
2290
|
-
// Bare value-less --name / --resource-type → boolean true → SQLite-bind crash
|
|
2291
|
-
//
|
|
2292
|
-
|
|
2293
|
-
if (flags[key] === true) { fail(`[mem] --${key} requires a value (received a bare flag with no value).`); return; }
|
|
2294
|
-
}
|
|
2302
|
+
// Bare value-less --name / --resource-type → boolean true → SQLite-bind crash on
|
|
2303
|
+
// the DELETE below; shared guard, single source with import/update.
|
|
2304
|
+
if (rejectBareStringFlags(flags, ['name', 'resource-type'])) return;
|
|
2295
2305
|
const name = flags.name;
|
|
2296
2306
|
const resourceType = flags['resource-type'];
|
|
2297
2307
|
if (!name || !resourceType) { fail('[mem] Usage: claude-mem-lite registry remove --name N --resource-type skill|agent'); return; }
|
package/memdir.mjs
CHANGED
|
@@ -219,33 +219,58 @@ export function writePluginSection(memdir, { slug, version, contentLine, force =
|
|
|
219
219
|
/**
|
|
220
220
|
* Remove the plugin's sentinel block plus its state sidecar. External content
|
|
221
221
|
* in MEMORY.md is preserved.
|
|
222
|
-
*
|
|
222
|
+
*
|
|
223
|
+
* Foreign-content guard (symmetric with writePluginSection): a sentinel block with
|
|
224
|
+
* NO state sidecar is content we cannot prove the plugin authored — the user may have
|
|
225
|
+
* pasted plugin docs or quoted a sentinel example. Without `force`, such a block is
|
|
226
|
+
* LEFT IN PLACE (action 'skipped-foreign') instead of being silently deleted. The
|
|
227
|
+
* adopt side already throws UserEditedError on the same condition; unadopt lacked the
|
|
228
|
+
* mirror, so it could delete user-authored text that merely resembled the sentinel.
|
|
229
|
+
*
|
|
230
|
+
* @param {string} memdir
|
|
231
|
+
* @param {string} slug
|
|
232
|
+
* @param {{force?: boolean}} [opts] force=true removes even a no-state (foreign) block.
|
|
233
|
+
* @returns {{action: 'removed'|'absent'|'skipped-foreign'}}
|
|
223
234
|
*/
|
|
224
|
-
export function removePluginSection(memdir, slug) {
|
|
225
|
-
clearState(memdir, slug);
|
|
235
|
+
export function removePluginSection(memdir, slug, { force = false } = {}) {
|
|
226
236
|
const path = memoryFile(memdir);
|
|
227
|
-
if (!existsSync(path)) return { action: 'absent' };
|
|
237
|
+
if (!existsSync(path)) { clearState(memdir, slug); return { action: 'absent' }; }
|
|
228
238
|
const raw = readFileSync(path, 'utf8');
|
|
229
239
|
const match = raw.match(sentinelRegex(slug));
|
|
230
|
-
if (!match) return { action: 'absent' };
|
|
240
|
+
if (!match) { clearState(memdir, slug); return { action: 'absent' }; }
|
|
241
|
+
|
|
242
|
+
// Only remove a block we have a state sidecar for (proof we wrote it), unless forced.
|
|
243
|
+
if (!readState(memdir, slug) && !force) {
|
|
244
|
+
return { action: 'skipped-foreign' };
|
|
245
|
+
}
|
|
246
|
+
clearState(memdir, slug);
|
|
231
247
|
|
|
232
248
|
// Delete the match plus a trailing newline + a preceding blank line so we
|
|
233
249
|
// don't leave a stranded paragraph gap.
|
|
250
|
+
const blockAtStart = match.index === 0;
|
|
234
251
|
let start = match.index;
|
|
235
252
|
let end = match.index + match[0].length;
|
|
236
253
|
if (raw[end] === '\n') end++;
|
|
237
254
|
if (start > 0 && raw.slice(0, start).endsWith('\n\n')) start--;
|
|
238
255
|
let next = raw.slice(0, start) + raw.slice(end);
|
|
239
|
-
//
|
|
240
|
-
//
|
|
241
|
-
//
|
|
242
|
-
//
|
|
243
|
-
|
|
244
|
-
next = next.replace(/^\s+/, '')
|
|
256
|
+
// Collapse any ≥3 consecutive newlines left at the removal seam so the remaining
|
|
257
|
+
// content looks hand-authored. Only strip leading whitespace when OUR block was the
|
|
258
|
+
// file's first content — otherwise an unconditional `/^\s+/` deleted user-authored
|
|
259
|
+
// leading blank lines / structure that sat far above our (end-of-file) block.
|
|
260
|
+
next = next.replace(/\n{3,}/g, '\n\n');
|
|
261
|
+
if (blockAtStart) next = next.replace(/^\s+/, '');
|
|
245
262
|
atomicWrite(path, next);
|
|
246
263
|
return { action: 'removed' };
|
|
247
264
|
}
|
|
248
265
|
|
|
266
|
+
/**
|
|
267
|
+
* Whether a plugin state sidecar exists for this memdir — i.e. the plugin can prove it
|
|
268
|
+
* wrote the sentinel. Used by unadopt's dry-run to predict the foreign-content skip.
|
|
269
|
+
*/
|
|
270
|
+
export function hasPluginState(memdir, slug) {
|
|
271
|
+
return readState(memdir, slug) !== null;
|
|
272
|
+
}
|
|
273
|
+
|
|
249
274
|
/**
|
|
250
275
|
* Whether this memdir has our sentinel. Body edits don't demote the adoption —
|
|
251
276
|
* users who hand-tweak the contract line still count as adopted.
|
package/nlp.mjs
CHANGED
|
@@ -11,6 +11,23 @@ export { SYNONYM_MAP, CJK_COMPOUNDS };
|
|
|
11
11
|
|
|
12
12
|
const FTS5_KEYWORDS = new Set(['AND', 'OR', 'NOT', 'NEAR']);
|
|
13
13
|
|
|
14
|
+
/**
|
|
15
|
+
* True if a CJK bigram is pure grammatical noise that should not enter an FTS query
|
|
16
|
+
* or the precision gate's `required` set. CJK_STOP_WORDS holds single-char particles
|
|
17
|
+
* (的/了/是…) plus a few whole multi-char fillers (什么/怎么…); callers used to test a
|
|
18
|
+
* 2-char bigram with a bare `CJK_STOP_WORDS.has(bg)`, which only caught the whole-filler
|
|
19
|
+
* case — so a particle-pair bigram like `的了` / `了是` slipped through and (a) forced an
|
|
20
|
+
* unsatisfiable AND term and (b) made an all-particle query's `required` set non-empty,
|
|
21
|
+
* wrongly rejecting every candidate. We reject a bigram when it IS a known filler OR when
|
|
22
|
+
* BOTH characters are single-char stop words. A bigram with only ONE stop char (有效, 目的)
|
|
23
|
+
* is deliberately kept — those are real compounds, and distinguishing a boundary-straddle
|
|
24
|
+
* (的全) from a genuine compound needs a dictionary/recall benchmark (deferred).
|
|
25
|
+
*/
|
|
26
|
+
function isCjkNoiseBigram(bg) {
|
|
27
|
+
if (CJK_STOP_WORDS.has(bg)) return true;
|
|
28
|
+
return bg.length === 2 && CJK_STOP_WORDS.has(bg[0]) && CJK_STOP_WORDS.has(bg[1]);
|
|
29
|
+
}
|
|
30
|
+
|
|
14
31
|
// Sort by length descending for greedy matching
|
|
15
32
|
const CJK_SORTED = [...CJK_COMPOUNDS].sort((a, b) => b.length - a.length);
|
|
16
33
|
|
|
@@ -177,7 +194,7 @@ export function cjkPrecisionOk(query, text, threshold) {
|
|
|
177
194
|
const keywords = extractCjkKeywords(query);
|
|
178
195
|
const required = keywords.length > 0
|
|
179
196
|
? keywords
|
|
180
|
-
: cjkBigrams(query).split(' ').filter(b => b && !
|
|
197
|
+
: cjkBigrams(query).split(' ').filter(b => b && !isCjkNoiseBigram(b));
|
|
181
198
|
if (required.length === 0) return true;
|
|
182
199
|
const hit = required.filter(w => text.includes(w)).length;
|
|
183
200
|
return (hit / required.length) >= threshold;
|
|
@@ -254,7 +271,7 @@ export function sanitizeFtsQuery(query) {
|
|
|
254
271
|
const gapBigrams = cjkBigrams(remainder);
|
|
255
272
|
if (gapBigrams) {
|
|
256
273
|
for (const bg of gapBigrams.split(' ')) {
|
|
257
|
-
if (bg && !
|
|
274
|
+
if (bg && !isCjkNoiseBigram(bg) && !matched.has(bg)) expandedTokens.push(bg);
|
|
258
275
|
}
|
|
259
276
|
}
|
|
260
277
|
continue;
|
|
@@ -278,7 +295,7 @@ export function sanitizeFtsQuery(query) {
|
|
|
278
295
|
);
|
|
279
296
|
if (pureCjkTokens.length > 0) bigrams = cjkBigrams(pureCjkTokens.join(' '));
|
|
280
297
|
}
|
|
281
|
-
const bigramSet = new Set(bigrams ? bigrams.split(' ').filter(b => b && !
|
|
298
|
+
const bigramSet = new Set(bigrams ? bigrams.split(' ').filter(b => b && !isCjkNoiseBigram(b)) : []);
|
|
282
299
|
const hasBigrams = bigramSet.size > 0;
|
|
283
300
|
const finalTokens = [];
|
|
284
301
|
const seen = new Set();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.94.0",
|
|
4
4
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"packageManager": "npm@10.9.2",
|
package/project-utils.mjs
CHANGED
|
@@ -14,6 +14,12 @@ const _cache = new Map();
|
|
|
14
14
|
*/
|
|
15
15
|
export function resolveProject(db, name) {
|
|
16
16
|
if (!name) return name;
|
|
17
|
+
// Defense-in-depth: a bare `--project` CLI flag parses to boolean `true` (and a
|
|
18
|
+
// malformed MCP/hook caller could pass any non-string). `true.includes('--')` below
|
|
19
|
+
// throws a raw TypeError that crashed search/recent/timeline/stats/export/defer-list.
|
|
20
|
+
// Treat any non-string as "no project filter" (null) — the degradation every caller
|
|
21
|
+
// already handles for an absent --project — instead of crashing at the root helper.
|
|
22
|
+
if (typeof name !== 'string') return null;
|
|
17
23
|
if (_cache.has(name)) return _cache.get(name);
|
|
18
24
|
// Already a canonical name (contains "--")? Use as-is.
|
|
19
25
|
if (name.includes('--')) { _cache.set(name, name); return name; }
|
package/registry-importer.mjs
CHANGED
|
@@ -336,10 +336,15 @@ export async function importFromGitHub(db, url, opts = {}) {
|
|
|
336
336
|
indexed_at: new Date().toISOString(),
|
|
337
337
|
});
|
|
338
338
|
|
|
339
|
-
// 5g. Update repo_forks and repo_updated_at (not in upsert SQL)
|
|
339
|
+
// 5g. Update repo_forks and repo_updated_at (not in upsert SQL).
|
|
340
|
+
// Do NOT touch quality_tier here: UPSERT_SQL never writes it, so a first insert
|
|
341
|
+
// gets the column DEFAULT 'community' and a re-import preserves whatever tier the
|
|
342
|
+
// row reached. Re-stamping 'community' downgraded enrichment-promoted tiers
|
|
343
|
+
// (verified/installed → community) on every content re-import, silently lowering
|
|
344
|
+
// the resource's BM25 composite rank (tier is a 1.0/2.0/3.0 multiplier).
|
|
340
345
|
db.prepare(
|
|
341
|
-
'UPDATE resources SET repo_forks = ?, repo_updated_at =
|
|
342
|
-
).run(repoForks, repoUpdatedAt,
|
|
346
|
+
'UPDATE resources SET repo_forks = ?, repo_updated_at = ? WHERE id = ?'
|
|
347
|
+
).run(repoForks, repoUpdatedAt, resourceId);
|
|
343
348
|
|
|
344
349
|
results.push({ name, type: item.type, id: resourceId });
|
|
345
350
|
debugLog('INFO', 'importer', `Imported ${item.type}:${name} (id=${resourceId})`);
|
package/registry-retriever.mjs
CHANGED
|
@@ -284,6 +284,10 @@ export function filterByProjectDomain(results, projectDomains) {
|
|
|
284
284
|
//
|
|
285
285
|
// Composite ranking formula:
|
|
286
286
|
// 40% BM25 text relevance
|
|
287
|
+
// Quality-tier bonus: bounded additive (installed -0.15, verified -0.075). Was a
|
|
288
|
+
// MULTIPLIER on the BM25 term, which scaled the magnitude of a variable, unbounded,
|
|
289
|
+
// NEGATIVE signal — letting a weakly-matching installed resource (×3) outrank a
|
|
290
|
+
// strongly-matching community one. Additive keeps tier a promotion, not an override.
|
|
287
291
|
// 15% Star popularity (saturation normalization — diminishing returns after ~500 stars)
|
|
288
292
|
// 15% Success rate (Laplace smoothing — Beta prior α=1, β=1 for small-sample robustness)
|
|
289
293
|
// 10% Adoption rate (Laplace smoothing)
|
|
@@ -301,10 +305,10 @@ export function filterByProjectDomain(results, projectDomains) {
|
|
|
301
305
|
// Sign convention: more negative = better. BM25 is negative, behavioral signals are subtracted.
|
|
302
306
|
const COMPOSITE_EXPR = `(
|
|
303
307
|
bm25(resources_fts, 3.0, 3.0, 3.0, 2.0, 2.0, 1.0, 1.0, 1.0) * 0.4
|
|
304
|
-
|
|
305
|
-
WHEN 'installed' THEN
|
|
306
|
-
WHEN 'verified' THEN
|
|
307
|
-
ELSE
|
|
308
|
+
- CASE COALESCE(r.quality_tier, 'community')
|
|
309
|
+
WHEN 'installed' THEN 0.15
|
|
310
|
+
WHEN 'verified' THEN 0.075
|
|
311
|
+
ELSE 0
|
|
308
312
|
END
|
|
309
313
|
- COALESCE(r.repo_stars * 1.0 / (r.repo_stars + 100.0), 0) * 0.15
|
|
310
314
|
- (
|
|
@@ -347,7 +351,7 @@ const SEARCH_SQL = `
|
|
|
347
351
|
WHERE resources_fts MATCH ?
|
|
348
352
|
AND r.status = 'active'
|
|
349
353
|
) sub
|
|
350
|
-
ORDER BY composite_score ASC
|
|
354
|
+
ORDER BY composite_score ASC, id ASC
|
|
351
355
|
LIMIT ?
|
|
352
356
|
`;
|
|
353
357
|
|
|
@@ -362,7 +366,7 @@ const SEARCH_BY_TYPE_SQL = `
|
|
|
362
366
|
AND r.status = 'active'
|
|
363
367
|
AND r.type = ?
|
|
364
368
|
) sub
|
|
365
|
-
ORDER BY composite_score ASC
|
|
369
|
+
ORDER BY composite_score ASC, id ASC
|
|
366
370
|
LIMIT ?
|
|
367
371
|
`;
|
|
368
372
|
|
package/schema.mjs
CHANGED
|
@@ -382,6 +382,7 @@ export function initSchema(db) {
|
|
|
382
382
|
|
|
383
383
|
// FTS5 migration: recreate observations_fts when columns are missing (one-time)
|
|
384
384
|
// Detect old FTS5 table missing lesson_learned or search_aliases and recreate with full column set
|
|
385
|
+
let obsFtsRecreated = false;
|
|
385
386
|
try {
|
|
386
387
|
const ftsDdl = db.prepare(`SELECT sql FROM sqlite_master WHERE type='table' AND name='observations_fts'`).get();
|
|
387
388
|
if (ftsDdl && (!ftsDdl.sql.includes('lesson_learned') || !ftsDdl.sql.includes('search_aliases'))) {
|
|
@@ -389,6 +390,7 @@ export function initSchema(db) {
|
|
|
389
390
|
db.exec(`DROP TRIGGER IF EXISTS observations_ad`);
|
|
390
391
|
db.exec(`DROP TRIGGER IF EXISTS observations_au`);
|
|
391
392
|
db.exec(`DROP TABLE IF EXISTS observations_fts`);
|
|
393
|
+
obsFtsRecreated = true;
|
|
392
394
|
}
|
|
393
395
|
} catch { /* non-critical — ensureFTS will create if missing */ }
|
|
394
396
|
|
|
@@ -416,14 +418,19 @@ export function initSchema(db) {
|
|
|
416
418
|
ensureFTS(db, 'session_summaries_fts', 'session_summaries', ['request', 'investigated', 'learned', 'completed', 'next_steps', 'notes', 'remaining_items']);
|
|
417
419
|
ensureFTS(db, 'user_prompts_fts', 'user_prompts', ['prompt_text']);
|
|
418
420
|
|
|
419
|
-
// Rebuild FTS5 if we just recreated it (
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
421
|
+
// Rebuild FTS5 if we just recreated it above (the new index is empty and must be
|
|
422
|
+
// populated from the content table). The old emptiness probe — `SELECT COUNT(*) FROM
|
|
423
|
+
// observations_fts` — was DEAD: for an external-content FTS5 table, COUNT reads the
|
|
424
|
+
// CONTENT table (observations), not the index, so `ftsCount === 0` was only ever true
|
|
425
|
+
// on an empty DB (where needsRebuild>0 is false). The rebuild therefore never fired and
|
|
426
|
+
// full-text search silently returned 0 rows after the column-mismatch migration. Gate
|
|
427
|
+
// on the recreation flag instead, which is the only path that leaves the index empty.
|
|
428
|
+
if (obsFtsRecreated) {
|
|
429
|
+
try {
|
|
430
|
+
const cnt = db.prepare(`SELECT COUNT(*) as cnt FROM observations`).get();
|
|
431
|
+
if (cnt.cnt > 0) db.exec(`INSERT INTO observations_fts(observations_fts) VALUES('rebuild')`);
|
|
432
|
+
} catch { /* non-critical */ }
|
|
433
|
+
}
|
|
427
434
|
|
|
428
435
|
// v36 migration: narrow events_fts_au like the v27 fix above. The events FTS
|
|
429
436
|
// triggers were hand-written inline (below) rather than via ensureFTS, so
|
package/search-engine.mjs
CHANGED
|
@@ -179,7 +179,11 @@ export function countSearchTotal(db, {
|
|
|
179
179
|
export function ftsRowToResult(r, { scoreMultiplier, snippet } = {}) {
|
|
180
180
|
return {
|
|
181
181
|
source: 'obs', id: r.id, type: r.type, title: r.title, subtitle: r.subtitle,
|
|
182
|
-
|
|
182
|
+
// `date` is the legacy key the MCP paired-search path reads; `created_at` aligns the
|
|
183
|
+
// obs row shape with the session/prompt rows the CLI interleaves in the same results
|
|
184
|
+
// array (cmdSearch reads r.created_at uniformly) and with recent/recall output. Both
|
|
185
|
+
// hold the same ISO string — keep both so neither consumer breaks.
|
|
186
|
+
project: r.project, date: r.created_at, created_at: r.created_at, created_at_epoch: r.created_at_epoch,
|
|
183
187
|
score: scoreMultiplier ? r.score * scoreMultiplier : r.score,
|
|
184
188
|
files_modified: r.files_modified, importance: r.importance, lesson_learned: r.lesson_learned,
|
|
185
189
|
snippet: snippet ? (r.match_snippet || '') : '',
|
package/secret-scrub.mjs
CHANGED
|
@@ -18,8 +18,17 @@ export const SECRET_PATTERNS = [
|
|
|
18
18
|
// 2. Structured keys (api_key, auth_token, …) keep the original behavior —
|
|
19
19
|
// a separator/compound key is unambiguous config syntax even when
|
|
20
20
|
// preceded by prose ("see auth_token: shhhhhh").
|
|
21
|
-
|
|
22
|
-
|
|
21
|
+
// `(?:\b|_)` before the keyword: a plain word-boundary misses the single most
|
|
22
|
+
// common credential shape — underscore-cased env vars (DB_PASSWORD, GH_TOKEN,
|
|
23
|
+
// MY_AUTH_TOKEN) — because `_` is a \w char, so there is NO \b between it and the
|
|
24
|
+
// keyword. Allowing a leading `_` catches those while the prose lookbehind still
|
|
25
|
+
// excludes "Marker token: …". `secret` added so a bare SECRET=… with a mixed-alnum
|
|
26
|
+
// value is covered (the hex-only assignment pattern below misses non-hex values).
|
|
27
|
+
[/((?<![A-Za-z][ \t])(?:\b|_)(?:password|passwd|token|bearer|secret)\s*[=:]\s*)(?!process\.env\.)(?!new\s)(?!\w+\()(?!(?:null|undefined|true|false|None|nil|empty|""|''|0)\b)[^\s,;'"}\]]{6,}/gi, '$1***'],
|
|
28
|
+
// access_token / refresh_token are the canonical OAuth2 field names — they were
|
|
29
|
+
// missing from this KV list (drift vs the JSON list below). `(?:\b|_)` for the same
|
|
30
|
+
// underscore-prefix reason.
|
|
31
|
+
[/((?:\b|_)(?:api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret|auth[_-]?token|access[_-]?token|refresh[_-]?token)\s*[=:]\s*)(?!process\.env\.)(?!new\s)(?!\w+\()(?!(?:null|undefined|true|false|None|nil|empty|""|''|0)\b)[^\s,;'"}\]]{6,}/gi, '$1***'],
|
|
23
32
|
// AWS access keys (AKIA...)
|
|
24
33
|
[/\bAKIA[A-Z0-9]{16}\b/g, '***'],
|
|
25
34
|
// OpenAI / Anthropic keys (sk-...) — specific prefixes have lower length threshold
|
|
@@ -52,14 +61,35 @@ export const SECRET_PATTERNS = [
|
|
|
52
61
|
[/\bnpm_[a-zA-Z0-9]{36,}\b/g, '***'],
|
|
53
62
|
// Stripe keys (sk_live_, rk_live_, pk_live_, sk_test_, pk_test_)
|
|
54
63
|
[/\b[srp]k_(?:live|test)_[a-zA-Z0-9]{20,}\b/g, '***'],
|
|
64
|
+
// SendGrid API keys: SG.<22>.<43> — two dots at fixed offsets make this
|
|
65
|
+
// structurally unmistakable; near-zero false-positive risk.
|
|
66
|
+
[/\bSG\.[A-Za-z0-9_-]{22}\.[A-Za-z0-9_-]{43}\b/g, '***'],
|
|
67
|
+
// Twilio identifiers: Account SID (AC…) + API Key SID (SK…), each = prefix
|
|
68
|
+
// + exactly 32 hex. The 2-letter prefix + 32-hex shape is specific: an MD5
|
|
69
|
+
// is 32 hex (no AC/SK prefix → no match) and a 40-hex git SHA has no internal
|
|
70
|
+
// \b so the trailing \b can't land mid-string. We deliberately do NOT scrub
|
|
71
|
+
// the bare-hex Twilio *auth token* — see comment block at end re: SHA collision.
|
|
72
|
+
[/\b(?:AC|SK)[0-9a-f]{32}\b/g, '***'],
|
|
73
|
+
// Mailgun private API keys: key-<32 hex>. Prefix-anchored for the same reason;
|
|
74
|
+
// bare 32-hex (no `key-`) is intentionally left alone to avoid hashing FPs.
|
|
75
|
+
[/\bkey-[0-9a-f]{32}\b/g, '***'],
|
|
55
76
|
// JSON-quoted secrets — error payloads / API responses commonly carry creds
|
|
56
77
|
// as `{"api_key": "..."}`. The base key=value pattern stops at quotes, so
|
|
57
78
|
// these slip through. Match the value-quoted form explicitly. Length floor
|
|
58
79
|
// (6) avoids tripping on intentional placeholder shorts ("...", "secret").
|
|
59
|
-
[/("(?:password|passwd|token|api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|private[_-]?key|client[_-]?secret|auth[_-]?token|bearer|refresh[_-]?token|session[_-]?id|sessionid)"\s*:\s*")[^"]{6,}(")/gi, '$1***$2'],
|
|
80
|
+
[/("(?:password|passwd|token|api[_-]?key|api[_-]?secret|secret[_-]?key|access[_-]?key|access[_-]?token|private[_-]?key|client[_-]?secret|auth[_-]?token|bearer|refresh[_-]?token|session[_-]?id|sessionid)"\s*:\s*")[^"]{6,}(")/gi, '$1***$2'],
|
|
60
81
|
// Session cookies in headers / urlencoded bodies (sessionid=, session_id=, JSESSIONID=, PHPSESSID=).
|
|
61
82
|
// 16+ chars filters out short test fixtures like sessionid=abc.
|
|
62
83
|
[/\b((?:session[_-]?id|sessionid|jsessionid|phpsessid)\s*[=:]\s*)[^\s,;'"}\]]{16,}/gi, '$1***'],
|
|
84
|
+
// ── DELIBERATELY NOT COVERED: bare high-entropy / "raw N-char" tokens ──────
|
|
85
|
+
// A generic `[A-Fa-f0-9]{40}` / high-entropy regex would scrub this repo's own
|
|
86
|
+
// legitimate data: 40-hex git SHAs, 32-hex MD5s, 64-hex SHA256s, and stored
|
|
87
|
+
// `minhash_sig` values. In a hash-heavy codebase the false-positive cost
|
|
88
|
+
// (silent `***` over real content, lost recall) exceeds the marginal catch —
|
|
89
|
+
// and an entropy gate doesn't help because git SHAs are themselves high-entropy.
|
|
90
|
+
// The contextual forms (token=…, Authorization: Bearer …, "api_key":"…") above
|
|
91
|
+
// already cover the dangerous *labelled* shapes. If you are tempted to add a
|
|
92
|
+
// bare-token pattern here: don't — anchor it to a provider prefix instead.
|
|
63
93
|
];
|
|
64
94
|
|
|
65
95
|
/**
|
package/server.mjs
CHANGED
|
@@ -15,6 +15,7 @@ import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from '
|
|
|
15
15
|
import {
|
|
16
16
|
cleanupBroken, decayAndMarkIdle, boostAccessed, demotePinned, mergeDuplicates,
|
|
17
17
|
purgeStale, purgeStalePreview, findDuplicates, maintenanceStats, rebuildVectors, vacuum,
|
|
18
|
+
recoverChildrenOf,
|
|
18
19
|
OP_CAP, STALE_AGE_MS,
|
|
19
20
|
} from './lib/maintain-core.mjs';
|
|
20
21
|
import { effectiveQuiet, RUNTIME_DIR } from './hook-shared.mjs';
|
|
@@ -926,13 +927,20 @@ server.registerTool(
|
|
|
926
927
|
db.prepare('UPDATE observations SET related_ids = ? WHERE id = ?').run(JSON.stringify(filtered), r.id);
|
|
927
928
|
}
|
|
928
929
|
}
|
|
930
|
+
// Resurface rows merged/compressed INTO the doomed keepers before deleting, else
|
|
931
|
+
// they dangle behind a now-missing parent (compressed_into has no FK) — invisible
|
|
932
|
+
// to every COALESCE(compressed_into,0)=0 view and unrecoverable. Mirrors the CLI
|
|
933
|
+
// delete path + the maintain hard-delete guard (recoverChildrenOf).
|
|
934
|
+
const recovered = recoverChildrenOf(db, args.ids);
|
|
929
935
|
// Execute deletion (FTS5 cleanup handled by observations_ad trigger)
|
|
930
|
-
|
|
936
|
+
const deleted = db.prepare(`DELETE FROM observations WHERE id IN (${placeholders})`).run(...args.ids);
|
|
937
|
+
return { changes: deleted.changes, recovered };
|
|
931
938
|
});
|
|
932
939
|
const result = deleteTx();
|
|
933
940
|
|
|
934
941
|
const missing = args.ids.filter(id => !rows.some(r => r.id === id));
|
|
935
942
|
const msg = [`Deleted ${result.changes} observation(s).`];
|
|
943
|
+
if (result.recovered > 0) msg.push(`Recovered ${result.recovered} merged/compressed child observation(s) to live.`);
|
|
936
944
|
if (missing.length > 0) msg.push(`Note: ID(s) ${missing.join(', ')} not found.`);
|
|
937
945
|
return { content: [{ type: 'text', text: msg.join(' ') }] };
|
|
938
946
|
})
|
package/synonyms.mjs
CHANGED
|
@@ -265,6 +265,13 @@ export const CJK_COMPOUNDS = new Set([
|
|
|
265
265
|
// architecture
|
|
266
266
|
'架构', '设计', '方案', '规划', '文档', '注释', '版本', '分支', '依赖',
|
|
267
267
|
'性能', '安全', '漏洞', '补丁', '系统', '算法',
|
|
268
|
+
// common task/dev vocab — mined from the zero-dict-keyword prompt slice
|
|
269
|
+
// (benchmark/cjk-straddle-prevalence.mjs). These ubiquitous words were absent
|
|
270
|
+
// from the dictionary, so ~15% of real CJK queries fell through to all-bigram
|
|
271
|
+
// noise. Adding real words is monotonically safe: greedy longest-match only
|
|
272
|
+
// improves, and real compounds cannot create boundary-straddle bigrams.
|
|
273
|
+
'工作', '用户', '完成', '计划', '命令', '工具', '插件', '实施', '处理',
|
|
274
|
+
'清理', '显示', '本地', '改动', '确认', '直接', '开始',
|
|
268
275
|
]);
|
|
269
276
|
|
|
270
277
|
// ─── Dispatch Synonyms (unidirectional, broader groupings) ──────────────────
|
package/tier.mjs
CHANGED
|
@@ -44,9 +44,12 @@ export function computeTier(obs, ctx) {
|
|
|
44
44
|
return 'working';
|
|
45
45
|
}
|
|
46
46
|
|
|
47
|
-
// Rule 5: Active if within type-specific window
|
|
47
|
+
// Rule 5: Active if within type-specific window. Use `<=` so the exact-millisecond
|
|
48
|
+
// window edge matches TIER_CASE_SQL (`created_at_epoch >= now - window`, i.e. inclusive).
|
|
49
|
+
// The strict `<` here disagreed with the SQL classifier by one tier at the boundary,
|
|
50
|
+
// despite both being documented as the same classifier.
|
|
48
51
|
const activeWindow = ACTIVE_WINDOWS[obs.type] ?? DEFAULT_ACTIVE_WINDOW_MS;
|
|
49
|
-
if (now - obs.created_at_epoch
|
|
52
|
+
if (now - obs.created_at_epoch <= activeWindow) return 'active';
|
|
50
53
|
|
|
51
54
|
// Rule 6: Archive (fallback)
|
|
52
55
|
return 'archive';
|
package/utils.mjs
CHANGED
|
@@ -77,8 +77,11 @@ export function estimateTokens(text) {
|
|
|
77
77
|
* @returns {number} Clamped integer importance (1, 2, or 3)
|
|
78
78
|
*/
|
|
79
79
|
export function clampImportance(val) {
|
|
80
|
-
|
|
81
|
-
|
|
80
|
+
// Coerce numeric strings: an LLM emitting "importance":"2" (quoted) would otherwise
|
|
81
|
+
// collapse to 1, silently dropping its signal. Non-numeric strings → NaN → 1.
|
|
82
|
+
const n = typeof val === 'number' ? val : (typeof val === 'string' ? Number(val) : NaN);
|
|
83
|
+
if (!Number.isFinite(n)) return 1;
|
|
84
|
+
return Math.max(1, Math.min(3, Math.round(n)));
|
|
82
85
|
}
|
|
83
86
|
|
|
84
87
|
/**
|
|
@@ -267,9 +270,39 @@ export function debugCatch(e, context) {
|
|
|
267
270
|
|
|
268
271
|
// ─── JSON Parsing ────────────────────────────────────────────────────────────
|
|
269
272
|
|
|
273
|
+
/**
|
|
274
|
+
* Extract the first brace-balanced JSON object substring from text, honoring strings
|
|
275
|
+
* and escapes so braces inside string values don't throw off the depth count. Returns
|
|
276
|
+
* null when there's no `{` or no balanced close. Used to recover a valid leading object
|
|
277
|
+
* when the LLM wrapped it in prose that ALSO contains braces — the greedy `{[\s\S]*}`
|
|
278
|
+
* fallback spans first-`{` to last-`}` and is defeated by an unrelated trailing `{…}`.
|
|
279
|
+
*/
|
|
280
|
+
function firstBalancedJsonObject(text) {
|
|
281
|
+
// Anchor on whichever structural opener comes first — `{` (object) or `[` (array) —
|
|
282
|
+
// so a prose-wrapped top-level array isn't truncated to its first inner object.
|
|
283
|
+
const braceAt = text.indexOf('{');
|
|
284
|
+
const brackAt = text.indexOf('[');
|
|
285
|
+
let start, open, close;
|
|
286
|
+
if (braceAt === -1 && brackAt === -1) return null;
|
|
287
|
+
if (brackAt !== -1 && (braceAt === -1 || brackAt < braceAt)) { start = brackAt; open = '['; close = ']'; }
|
|
288
|
+
else { start = braceAt; open = '{'; close = '}'; }
|
|
289
|
+
let depth = 0, inStr = false, esc = false;
|
|
290
|
+
for (let i = start; i < text.length; i++) {
|
|
291
|
+
const c = text[i];
|
|
292
|
+
if (inStr) {
|
|
293
|
+
if (esc) esc = false;
|
|
294
|
+
else if (c === '\\') esc = true;
|
|
295
|
+
else if (c === '"') inStr = false;
|
|
296
|
+
} else if (c === '"') inStr = true;
|
|
297
|
+
else if (c === open) depth++;
|
|
298
|
+
else if (c === close && --depth === 0) return text.slice(start, i + 1);
|
|
299
|
+
}
|
|
300
|
+
return null;
|
|
301
|
+
}
|
|
302
|
+
|
|
270
303
|
/**
|
|
271
304
|
* Parse JSON from LLM output, handling markdown fences and embedded objects.
|
|
272
|
-
* Tries: direct parse → fenced code block →
|
|
305
|
+
* Tries: direct parse → fenced code block → first balanced object → greedy regex.
|
|
273
306
|
* @param {string} text Raw LLM output text
|
|
274
307
|
* @returns {object|null} Parsed JSON object or null on failure
|
|
275
308
|
*/
|
|
@@ -278,6 +311,10 @@ export function parseJsonFromLLM(text) {
|
|
|
278
311
|
try { return JSON.parse(text); } catch {}
|
|
279
312
|
const fenced = text.match(/```(?:json)?\s*([\s\S]*?)\s*```/);
|
|
280
313
|
if (fenced) try { return JSON.parse(fenced[1]); } catch {}
|
|
314
|
+
// First balanced object — survives unfenced output wrapped in brace-containing prose.
|
|
315
|
+
const balanced = firstBalancedJsonObject(text);
|
|
316
|
+
if (balanced) try { return JSON.parse(balanced); } catch {}
|
|
317
|
+
// Last-resort greedy span (handles a payload that isn't the FIRST balanced object).
|
|
281
318
|
const obj = text.match(/\{[\s\S]*\}/);
|
|
282
319
|
if (obj) try { return JSON.parse(obj[0]); } catch {}
|
|
283
320
|
return null;
|