claude-mem-lite 2.49.0 → 2.50.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/hook-llm.mjs +5 -1
- package/mem-cli.mjs +21 -5
- package/nlp.mjs +52 -0
- package/package.json +1 -1
- package/scripts/user-prompt-search.js +10 -1
- package/server.mjs +14 -4
- package/tool-schemas.mjs +3 -3
package/hook-llm.mjs
CHANGED
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
jaccardSimilarity, truncate, clampImportance, computeRuleImportance,
|
|
8
8
|
inferProject, parseJsonFromLLM,
|
|
9
9
|
computeMinHash, estimateJaccardFromMinHash, cjkBigrams, EDIT_TOOLS, LOW_SIGNAL_TITLE, debugCatch, debugLog, OBS_BM25,
|
|
10
|
-
getCurrentBranch,
|
|
10
|
+
getCurrentBranch, notLowSignalTitleClause,
|
|
11
11
|
} from './utils.mjs';
|
|
12
12
|
import { acquireLLMSlot, releaseLLMSlot } from './hook-semaphore.mjs';
|
|
13
13
|
import { getVocabulary, computeVector } from './tfidf.mjs';
|
|
@@ -802,10 +802,14 @@ export async function handleLLMSummary() {
|
|
|
802
802
|
const sessionId = process.argv[3] || getSessionId();
|
|
803
803
|
const project = process.argv[4] || inferProject();
|
|
804
804
|
|
|
805
|
+
// Exclude LOW_SIGNAL hook-llm fallback titles ("Error: files +2 more: ...",
|
|
806
|
+
// "Modified X", "Worked on X", etc.) from the Haiku summary input — they
|
|
807
|
+
// pollute the `completed` field and mislead session-resume context.
|
|
805
808
|
const recentObs = db.prepare(`
|
|
806
809
|
SELECT id, type, title, narrative
|
|
807
810
|
FROM observations
|
|
808
811
|
WHERE memory_session_id = ?
|
|
812
|
+
AND ${notLowSignalTitleClause('')}
|
|
809
813
|
ORDER BY created_at_epoch DESC
|
|
810
814
|
LIMIT 30
|
|
811
815
|
`).all(sessionId);
|
package/mem-cli.mjs
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
import { homedir } from 'os';
|
|
6
6
|
import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
|
|
7
7
|
import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, jaccardSimilarity, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, isoWeekKey, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, DEFAULT_DECAY_HALF_LIFE_MS, getCurrentBranch, notLowSignalTitleClause, LOW_SIGNAL_TITLE } from './utils.mjs';
|
|
8
|
+
import { cjkPrecisionOk } from './nlp.mjs';
|
|
8
9
|
import { extractCjkLikePatterns } from './nlp.mjs';
|
|
9
10
|
import { resolveProject } from './project-utils.mjs';
|
|
10
11
|
import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
|
|
@@ -240,9 +241,14 @@ function cmdSearch(db, args) {
|
|
|
240
241
|
ORDER BY score
|
|
241
242
|
LIMIT ? OFFSET ?
|
|
242
243
|
`).all(...promptParams);
|
|
243
|
-
|
|
244
|
+
// CJK precision filter (read-path parity with server.mjs): unicode61
|
|
245
|
+
// degrades bigram queries to single-char AND, letting common-char
|
|
246
|
+
// Chinese prose leak through. Drop rows that miss < 20% of query
|
|
247
|
+
// bigrams/keywords as contiguous substrings.
|
|
248
|
+
const keptPromptRows = promptRows.filter(r => cjkPrecisionOk(query, r.prompt_text));
|
|
249
|
+
for (const r of keptPromptRows) results.push({ ...r, _source: 'prompt' });
|
|
244
250
|
// CJK LIKE fallback: FTS5 unicode61 can't tokenize CJK substrings in prompts
|
|
245
|
-
if (
|
|
251
|
+
if (keptPromptRows.length === 0) {
|
|
246
252
|
const cjkPatterns = extractCjkLikePatterns(query);
|
|
247
253
|
if (cjkPatterns.length > 0) {
|
|
248
254
|
const likeConds = cjkPatterns.map(() => 'p.prompt_text LIKE ?');
|
|
@@ -263,7 +269,12 @@ function cmdSearch(db, args) {
|
|
|
263
269
|
ORDER BY p.created_at_epoch DESC
|
|
264
270
|
LIMIT ? OFFSET ?
|
|
265
271
|
`).all(...likeParams);
|
|
266
|
-
|
|
272
|
+
// CJK precision filter applies here too: the LIKE fallback is just
|
|
273
|
+
// OR'd substring bigrams; without the precision gate it re-admits
|
|
274
|
+
// the same common-char noise the FTS path dropped (this was the
|
|
275
|
+
// actual leak source — FTS returned 0, fallback filled 20).
|
|
276
|
+
const keptFallback = fallbackRows.filter(r => cjkPrecisionOk(query, r.prompt_text));
|
|
277
|
+
for (const r of keptFallback) results.push({ ...r, _source: 'prompt', score: 0 });
|
|
267
278
|
}
|
|
268
279
|
}
|
|
269
280
|
} catch { /* prompt FTS may not exist in older DBs */ }
|
|
@@ -450,8 +461,13 @@ function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minIm
|
|
|
450
461
|
|
|
451
462
|
function cmdRecent(db, args) {
|
|
452
463
|
const { positional, flags } = parseArgs(args);
|
|
453
|
-
const
|
|
454
|
-
const
|
|
464
|
+
const rawArg = positional[0];
|
|
465
|
+
const rawLimit = parseInt(rawArg, 10);
|
|
466
|
+
const isValid = Number.isInteger(rawLimit) && rawLimit > 0;
|
|
467
|
+
if (rawArg !== undefined && !isValid) {
|
|
468
|
+
process.stderr.write(`[mem] Invalid count "${rawArg}" (must be a positive integer); using default 10\n`);
|
|
469
|
+
}
|
|
470
|
+
const limit = isValid ? rawLimit : 10;
|
|
455
471
|
const project = flags.project ? resolveProject(db, flags.project) : inferProject();
|
|
456
472
|
|
|
457
473
|
const params = [];
|
package/nlp.mjs
CHANGED
|
@@ -124,6 +124,58 @@ export function extractCjkLikePatterns(query) {
|
|
|
124
124
|
return [...new Set([...keywords, ...bigrams])];
|
|
125
125
|
}
|
|
126
126
|
|
|
127
|
+
/**
|
|
128
|
+
* Post-FTS precision filter for CJK queries.
|
|
129
|
+
*
|
|
130
|
+
* Background: FTS5 unicode61 tokenizer splits every CJK character into its
|
|
131
|
+
* own token. An application-layer bigram query like "我是" then reduces to
|
|
132
|
+
* (我 AND 是) at match time — matching any document that happens to contain
|
|
133
|
+
* both chars anywhere, which is extremely permissive in Chinese prose.
|
|
134
|
+
*
|
|
135
|
+
* Precision check: given the raw query and a candidate result's full text,
|
|
136
|
+
* require that at least `threshold` fraction of the query's CJK bigrams
|
|
137
|
+
* (or dictionary words, if any matched) appear as contiguous substrings in
|
|
138
|
+
* the result. Non-CJK queries bypass this filter entirely.
|
|
139
|
+
*
|
|
140
|
+
* Applied only to the prompts/user-prompt path — observations have richer
|
|
141
|
+
* rerank + low-signal filtering that already control noise there. Also,
|
|
142
|
+
* obs-side synonym expansion ("查询"→"(查询 OR query OR search)") is a
|
|
143
|
+
* legitimate recall mechanism that this filter would break.
|
|
144
|
+
*
|
|
145
|
+
* Threshold default 0.2 is tunable via `CLAUDE_MEM_CJK_PREC_MIN` env var.
|
|
146
|
+
* Explicit threshold arg still overrides the env value — tests and in-code
|
|
147
|
+
* callers with domain context stay authoritative.
|
|
148
|
+
*
|
|
149
|
+
* Default was tuned from 0.3 → 0.2 after a 20-query production-DB fixture
|
|
150
|
+
* showed 0.3 over-rejected legitimate multi-bigram queries whose dict-
|
|
151
|
+
* keyword coverage was incomplete (e.g. "同义词扩展" — neither compound
|
|
152
|
+
* is in CJK_COMPOUNDS → 4 bigrams required, single-keyword match only
|
|
153
|
+
* 25% < 30% rejected 19/20 real hits). At 0.2, pure-noise reduction stays
|
|
154
|
+
* ≥85% on noise fixture while SIG-6 recall recovered to 100%.
|
|
155
|
+
*
|
|
156
|
+
* @param {string} query Raw query text
|
|
157
|
+
* @param {string} text Candidate result text
|
|
158
|
+
* @param {number} [threshold] Fraction of patterns that must match. If
|
|
159
|
+
* omitted, reads CLAUDE_MEM_CJK_PREC_MIN (default 0.2).
|
|
160
|
+
* @returns {boolean}
|
|
161
|
+
*/
|
|
162
|
+
export function cjkPrecisionOk(query, text, threshold) {
|
|
163
|
+
if (threshold === undefined) {
|
|
164
|
+
const envVal = process.env.CLAUDE_MEM_CJK_PREC_MIN;
|
|
165
|
+
const parsed = envVal ? parseFloat(envVal) : NaN;
|
|
166
|
+
threshold = Number.isFinite(parsed) && parsed >= 0 && parsed <= 1 ? parsed : 0.2;
|
|
167
|
+
}
|
|
168
|
+
if (!query || !text) return true;
|
|
169
|
+
if (!/[一-鿿㐀-䶿]{2,}/.test(query)) return true;
|
|
170
|
+
const keywords = extractCjkKeywords(query);
|
|
171
|
+
const required = keywords.length > 0
|
|
172
|
+
? keywords
|
|
173
|
+
: cjkBigrams(query).split(' ').filter(b => b && !CJK_STOP_WORDS.has(b));
|
|
174
|
+
if (required.length === 0) return true;
|
|
175
|
+
const hit = required.filter(w => text.includes(w)).length;
|
|
176
|
+
return (hit / required.length) >= threshold;
|
|
177
|
+
}
|
|
178
|
+
|
|
127
179
|
// ─── FTS5 Token Formatting ──────────────────────────────────────────────────
|
|
128
180
|
|
|
129
181
|
// Format a term for FTS5: quote if it contains spaces, hyphens, or special chars
|
package/package.json
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import { ensureDb, DB_DIR, REGISTRY_DB_PATH } from '../schema.mjs';
|
|
7
7
|
import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, notLowSignalTitleClause, noisePenaltyClause } from '../utils.mjs';
|
|
8
|
+
import { cjkPrecisionOk } from '../nlp.mjs';
|
|
8
9
|
import { writeFileSync, readFileSync, existsSync, renameSync } from 'fs';
|
|
9
10
|
import { join } from 'path';
|
|
10
11
|
import Database from 'better-sqlite3';
|
|
@@ -216,6 +217,9 @@ function searchByUserPrompts(db, queryText, project, limit) {
|
|
|
216
217
|
if (!ftsQuery) return [];
|
|
217
218
|
|
|
218
219
|
const cutoff = Date.now() - LOOKBACK_MS;
|
|
220
|
+
// Exclude <task-notification> internal protocol messages — parity with
|
|
221
|
+
// server.mjs mem_search + mem-cli.mjs search (see lesson #8139: read-path
|
|
222
|
+
// parity across paths querying the same table).
|
|
219
223
|
const sql = `
|
|
220
224
|
SELECT up.id, up.prompt_text, up.created_at_epoch,
|
|
221
225
|
bm25(user_prompts_fts) as relevance
|
|
@@ -225,6 +229,7 @@ function searchByUserPrompts(db, queryText, project, limit) {
|
|
|
225
229
|
WHERE user_prompts_fts MATCH ?
|
|
226
230
|
AND s.project = ?
|
|
227
231
|
AND up.created_at_epoch > ?
|
|
232
|
+
AND up.prompt_text NOT LIKE '<task-notification>%'
|
|
228
233
|
ORDER BY relevance
|
|
229
234
|
LIMIT ?
|
|
230
235
|
`;
|
|
@@ -238,7 +243,11 @@ function searchByUserPrompts(db, queryText, project, limit) {
|
|
|
238
243
|
}
|
|
239
244
|
}
|
|
240
245
|
|
|
241
|
-
|
|
246
|
+
// CJK precision filter (parity with server.mjs + mem-cli.mjs): unicode61
|
|
247
|
+
// FTS degrades CJK bigram queries to single-char AND, letting any prose
|
|
248
|
+
// sharing common chars leak through. Drop rows that miss < 20% of query
|
|
249
|
+
// bigrams/keywords as contiguous substrings. Non-CJK queries bypass.
|
|
250
|
+
return rows.filter(r => cjkPrecisionOk(queryText, r.prompt_text));
|
|
242
251
|
}
|
|
243
252
|
|
|
244
253
|
function searchRecent(db, project, limit) {
|
package/server.mjs
CHANGED
|
@@ -6,7 +6,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
|
6
6
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
7
7
|
import { ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
|
|
8
8
|
import { jaccardSimilarity, truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, fmtDate, isoWeekKey, debugLog, debugCatch, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, getCurrentBranch, DEFAULT_DECAY_HALF_LIFE_MS, isPathConfined, notLowSignalTitleClause, LOW_SIGNAL_TITLE } from './utils.mjs';
|
|
9
|
-
import { extractCjkLikePatterns } from './nlp.mjs';
|
|
9
|
+
import { extractCjkLikePatterns, cjkPrecisionOk } from './nlp.mjs';
|
|
10
10
|
import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
|
|
11
11
|
import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
|
|
12
12
|
import { reRankWithContext, markSuperseded, extractPRFTerms, expandQueryByConcepts, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
|
|
@@ -463,11 +463,15 @@ function searchPrompts(ctx) {
|
|
|
463
463
|
epochTo, epochTo,
|
|
464
464
|
perSourceLimit, perSourceOffset
|
|
465
465
|
);
|
|
466
|
-
|
|
466
|
+
// CJK precision filter: unicode61 FTS degrades CJK bigram queries to
|
|
467
|
+
// single-char AND, letting any prose sharing common chars leak through.
|
|
468
|
+
// Require ≥30% of query's CJK bigrams/keywords as contiguous substrings.
|
|
469
|
+
const keptRows = args.query ? rows.filter(r => cjkPrecisionOk(args.query, r.prompt_text)) : rows;
|
|
470
|
+
for (const r of keptRows) {
|
|
467
471
|
results.push({ source: 'prompt', id: r.id, text: r.prompt_text, session: r.content_session_id, date: r.created_at, created_at_epoch: r.created_at_epoch, score: r.score });
|
|
468
472
|
}
|
|
469
473
|
// CJK LIKE fallback: FTS5 unicode61 can't tokenize CJK substrings in prompts
|
|
470
|
-
if (
|
|
474
|
+
if (keptRows.length === 0 && args.query) {
|
|
471
475
|
const cjkPatterns = extractCjkLikePatterns(args.query);
|
|
472
476
|
if (cjkPatterns.length > 0) {
|
|
473
477
|
const likeConds = cjkPatterns.map(() => 'p.prompt_text LIKE ?');
|
|
@@ -490,7 +494,13 @@ function searchPrompts(ctx) {
|
|
|
490
494
|
epochTo, epochTo,
|
|
491
495
|
perSourceLimit, perSourceOffset
|
|
492
496
|
);
|
|
493
|
-
|
|
497
|
+
// Parity with mem-cli.mjs: the LIKE fallback is an OR'd bigram
|
|
498
|
+
// substring scan with no scoring gate. The precision filter must
|
|
499
|
+
// apply here too — without it, queries whose FTS set is empty
|
|
500
|
+
// re-admit the full common-char noise band that FTS would have
|
|
501
|
+
// dropped downstream anyway.
|
|
502
|
+
const keptFallback = args.query ? fallbackRows.filter(r => cjkPrecisionOk(args.query, r.prompt_text)) : fallbackRows;
|
|
503
|
+
for (const r of keptFallback) {
|
|
494
504
|
results.push({ source: 'prompt', id: r.id, text: r.prompt_text, session: r.content_session_id, date: r.created_at, created_at_epoch: r.created_at_epoch, score: 0 });
|
|
495
505
|
}
|
|
496
506
|
}
|
package/tool-schemas.mjs
CHANGED
|
@@ -423,7 +423,7 @@ export const tools = [
|
|
|
423
423
|
' - After a major project phase completes and old per-file observations are noise\n' +
|
|
424
424
|
' - Stats show thousands of low-importance rows dragging search quality\n' +
|
|
425
425
|
'\n' +
|
|
426
|
-
'Equivalent CLI: claude-mem-lite compress [--
|
|
426
|
+
'Equivalent CLI: claude-mem-lite compress [--execute] [--age-days 90] (preview is default)',
|
|
427
427
|
inputSchema: memCompressSchema,
|
|
428
428
|
hidden: true,
|
|
429
429
|
},
|
|
@@ -442,7 +442,7 @@ export const tools = [
|
|
|
442
442
|
' - After bulk imports or a long offline period\n' +
|
|
443
443
|
' - User asks for periodic maintenance / cleanup\n' +
|
|
444
444
|
'\n' +
|
|
445
|
-
'Equivalent CLI: claude-mem-lite maintain
|
|
445
|
+
'Equivalent CLI: claude-mem-lite maintain scan --ops dedup,decay',
|
|
446
446
|
inputSchema: memMaintainSchema,
|
|
447
447
|
hidden: true,
|
|
448
448
|
},
|
|
@@ -461,7 +461,7 @@ export const tools = [
|
|
|
461
461
|
' - stats show many degraded (title-only, no lesson) observations\n' +
|
|
462
462
|
' - Start with action="preview" to see candidates before spending tokens\n' +
|
|
463
463
|
'\n' +
|
|
464
|
-
'Equivalent CLI: claude-mem-lite optimize [--
|
|
464
|
+
'Equivalent CLI: claude-mem-lite optimize [--run|--run-all] [--task re-enrich,normalize,cluster-merge,smart-compress] [--max N] (preview is default)',
|
|
465
465
|
inputSchema: memOptimizeSchema,
|
|
466
466
|
hidden: true,
|
|
467
467
|
},
|