claude-mem-lite 2.34.2 → 2.34.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -9,12 +9,23 @@ const CONFIRM_RE = /^(y(es)?|no?|ok|done|go|sure|lgtm|thanks?|ty|继续|确认|
|
|
|
9
9
|
const SLASH_CMD_RE = /^\//;
|
|
10
10
|
const PURE_OP_RE = /^(git\s+(commit|push|merge)|npm\s+(publish|deploy))\b/i;
|
|
11
11
|
|
|
12
|
+
/**
|
|
13
|
+
* CJK-weighted effective length. CJK characters (CJK Unified Ideographs
|
|
14
|
+
* main + extension A) carry ~3x the semantic token density of Latin
|
|
15
|
+
* characters — a 5-char Chinese phrase like "优化数据库" encodes roughly
|
|
16
|
+
* the same information as a 15-char English equivalent. Used by every
|
|
17
|
+
* length gate downstream of the prompt hook so Latin-calibrated
|
|
18
|
+
* thresholds (8 / 15) don't falsely reject substantive CJK prompts.
|
|
19
|
+
*/
|
|
20
|
+
export function computeEffectiveLen(text) {
|
|
21
|
+
if (!text) return 0;
|
|
22
|
+
const cjkCount = (text.match(/[\u4e00-\u9fff\u3400-\u4dbf]/g) || []).length;
|
|
23
|
+
return (text.length - cjkCount) + cjkCount * 3;
|
|
24
|
+
}
|
|
25
|
+
|
|
12
26
|
export function shouldSkip(text) {
|
|
13
27
|
if (!text) return true;
|
|
14
|
-
|
|
15
|
-
const cjkCount = (text.match(/[\u4e00-\u9fff\u3400-\u4dbf]/g) || []).length;
|
|
16
|
-
const effectiveLen = (text.length - cjkCount) + cjkCount * 3;
|
|
17
|
-
if (effectiveLen < 8) return true;
|
|
28
|
+
if (computeEffectiveLen(text) < 8) return true;
|
|
18
29
|
const trimmed = text.trim();
|
|
19
30
|
if (CONFIRM_RE.test(trimmed)) return true;
|
|
20
31
|
if (SLASH_CMD_RE.test(trimmed)) return true;
|
|
@@ -8,7 +8,7 @@ import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject,
|
|
|
8
8
|
import { writeFileSync, readFileSync, existsSync, renameSync } from 'fs';
|
|
9
9
|
import { join } from 'path';
|
|
10
10
|
import Database from 'better-sqlite3';
|
|
11
|
-
import { shouldSkip, detectIntent, shouldSkipByDedup, extractFiles, extractErrorSignature, DEDUP_STALE_MS, matchRegistrySkillName } from './prompt-search-utils.mjs';
|
|
11
|
+
import { shouldSkip, computeEffectiveLen, detectIntent, shouldSkipByDedup, extractFiles, extractErrorSignature, DEDUP_STALE_MS, matchRegistrySkillName } from './prompt-search-utils.mjs';
|
|
12
12
|
|
|
13
13
|
// ─── Constants ──────────────────────────────────────────────────────────────
|
|
14
14
|
|
|
@@ -16,22 +16,26 @@ const INJECTED_IDS_FILE = join(DB_DIR, 'runtime', `.claude-mem-injected-${inferP
|
|
|
16
16
|
const MAX_RESULTS = 5;
|
|
17
17
|
const LOOKBACK_MS = 60 * 86400000; // 60 days
|
|
18
18
|
|
|
19
|
-
// T3 (v2.31): BM25 magnitude
|
|
20
|
-
// raw bm25() value
|
|
21
|
-
//
|
|
22
|
-
//
|
|
23
|
-
// larger magnitude, so we compare against `Math.abs(relevance)`.
|
|
19
|
+
// T3 (v2.31): per-row BM25 magnitude floor. OBS_BM25 (in scoring-sql.mjs)
|
|
20
|
+
// returns the raw bm25() value — negative, smaller = better. Multiplied by
|
|
21
|
+
// decay × type-quality × (0.5+0.5·importance), sign stays negative. We
|
|
22
|
+
// compare against Math.abs(relevance).
|
|
24
23
|
//
|
|
25
|
-
//
|
|
26
|
-
//
|
|
27
|
-
//
|
|
28
|
-
//
|
|
29
|
-
//
|
|
30
|
-
//
|
|
24
|
+
// v2.34.3 note: the historic comment claimed |rel| falls in 3e-6..5e-5 range.
|
|
25
|
+
// Re-measured against real data (see v2.34.3 CHANGELOG probe), actual scores
|
|
26
|
+
// span ~6..133 across SIGNAL / META / NOISE prompts — the scoring expression
|
|
27
|
+
// was revised in later versions and this constant was never retuned. 1e-5 now
|
|
28
|
+
// acts as a NULL-rel guard, not a real noise filter. The primary noise gate
|
|
29
|
+
// is TOP_REL_FLOOR below, which drops the whole FTS set when the best match
|
|
30
|
+
// is weak.
|
|
31
31
|
const BM25_MIN_SCORE = Number(process.env.CLAUDE_MEM_UPS_BM25_MIN || 1e-5);
|
|
32
|
-
//
|
|
33
|
-
//
|
|
34
|
-
//
|
|
32
|
+
// CJK-weighted minimum length for the prompt. Catches medium-short Latin
|
|
33
|
+
// prompts ("run tests", "fix bug now") that survive `shouldSkip`'s weaker 8-unit
|
|
34
|
+
// floor but carry too few tokens to justify an FTS lookup.
|
|
35
|
+
// v2.34.4: applied to `computeEffectiveLen(prompt)`, not raw char count — a
|
|
36
|
+
// 14-char CJK prompt ("优化 hook 性能降低延迟") scores 30 effective units and
|
|
37
|
+
// now reaches FTS, matching shouldSkip's CJK-weighted gate rather than silently
|
|
38
|
+
// failing the raw-char one.
|
|
35
39
|
const PROMPT_MIN_LENGTH = 15;
|
|
36
40
|
|
|
37
41
|
// v2.33.1: follow-up prompts ("前面那个", "继续 X", "再看看 Y") are short by
|
|
@@ -41,6 +45,27 @@ const PROMPT_MIN_LENGTH = 15;
|
|
|
41
45
|
const FOLLOWUP_PROMPT_MIN_LENGTH = 8;
|
|
42
46
|
const FOLLOWUP_BM25_MIN_SCORE = Number(process.env.CLAUDE_MEM_UPS_BM25_MIN_FOLLOWUP || 5e-6);
|
|
43
47
|
|
|
48
|
+
// v2.34.3: top-|rel| sanity gate. BM25_MIN_SCORE filters per-row; this floor
|
|
49
|
+
// gates the entire FTS set. Noise prompts ("today's date", "current time")
|
|
50
|
+
// produce OR-fallback leakage where every hit shares one tangential stem and
|
|
51
|
+
// per-row filtering leaves all of them through. When the best match scores
|
|
52
|
+
// below this floor, the whole FTS result set is dropped.
|
|
53
|
+
//
|
|
54
|
+
// Empirical distribution (v2.34.3 probe, 12 prompts):
|
|
55
|
+
// SIGNAL top-|rel| 60..133
|
|
56
|
+
// NOISE top-|rel| 25..48
|
|
57
|
+
// WEAK-META 6.86..33
|
|
58
|
+
// Default 50 sits in the clean 48→60 gap. Env override for project tuning.
|
|
59
|
+
// Error-signature hits (sigRows) and file-recall (fileRows) bypass this gate —
|
|
60
|
+
// both are precision passes with independent relevance signal.
|
|
61
|
+
//
|
|
62
|
+
// Note: no follow-up halving (unlike PROMPT_MIN_LENGTH / BM25_MIN_SCORE).
|
|
63
|
+
// Those lower the length/per-row bar to let short context-dependent prompts
|
|
64
|
+
// through, but the top-|rel| gap is an absolute distribution separator —
|
|
65
|
+
// lowering it in follow-up mode re-admits the 37..48 noise band that the
|
|
66
|
+
// gate exists to drop.
|
|
67
|
+
const TOP_REL_FLOOR = Number(process.env.CLAUDE_MEM_UPS_TOP_MIN || 50);
|
|
68
|
+
|
|
44
69
|
function isFollowUpSession() {
|
|
45
70
|
try {
|
|
46
71
|
const raw = readFileSync(INJECTED_IDS_FILE, 'utf8');
|
|
@@ -275,7 +300,7 @@ async function main() {
|
|
|
275
300
|
// short continuations ("前面那个?", "does it work?") depend on prior context.
|
|
276
301
|
const followUp = isFollowUpSession();
|
|
277
302
|
const promptMinLen = followUp ? FOLLOWUP_PROMPT_MIN_LENGTH : PROMPT_MIN_LENGTH;
|
|
278
|
-
if (promptText.trim()
|
|
303
|
+
if (computeEffectiveLen(promptText.trim()) < promptMinLen) return;
|
|
279
304
|
const bm25Floor = followUp ? FOLLOWUP_BM25_MIN_SCORE : BM25_MIN_SCORE;
|
|
280
305
|
|
|
281
306
|
let db;
|
|
@@ -323,6 +348,16 @@ async function main() {
|
|
|
323
348
|
typeof r.relevance === 'number' && Math.abs(r.relevance) >= bm25Floor
|
|
324
349
|
);
|
|
325
350
|
|
|
351
|
+
// v2.34.3: top-|rel| sanity gate. Per-row filtering above leaves noise
|
|
352
|
+
// prompts intact when many rows share a weak stem (all in 25..48 range).
|
|
353
|
+
// If the best remaining FTS match is below the top floor, drop the
|
|
354
|
+
// whole FTS set — noise prompts should produce no FTS injection.
|
|
355
|
+
// Query orders by `relevance` ASC; negative values → ftsRows[0] has the
|
|
356
|
+
// largest magnitude (strongest match) in this scoring expression.
|
|
357
|
+
if (ftsRows.length > 0 && Math.abs(ftsRows[0].relevance) < TOP_REL_FLOOR) {
|
|
358
|
+
ftsRows = [];
|
|
359
|
+
}
|
|
360
|
+
|
|
326
361
|
// Merge: FTS results first, then file results, deduplicated
|
|
327
362
|
const seen = new Set(ftsRows.map(r => r.id));
|
|
328
363
|
rows = [...ftsRows];
|