claude-mem-lite 2.49.0 → 2.50.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,7 +10,7 @@
10
10
  "plugins": [
11
11
  {
12
12
  "name": "claude-mem-lite",
13
- "version": "2.49.0",
13
+ "version": "2.50.0",
14
14
  "source": "./",
15
15
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall"
16
16
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.49.0",
3
+ "version": "2.50.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code — FTS5 search, episode batching, error-triggered recall",
5
5
  "author": {
6
6
  "name": "sdsrss"
package/hook-llm.mjs CHANGED
@@ -7,7 +7,7 @@ import {
7
7
  jaccardSimilarity, truncate, clampImportance, computeRuleImportance,
8
8
  inferProject, parseJsonFromLLM,
9
9
  computeMinHash, estimateJaccardFromMinHash, cjkBigrams, EDIT_TOOLS, LOW_SIGNAL_TITLE, debugCatch, debugLog, OBS_BM25,
10
- getCurrentBranch,
10
+ getCurrentBranch, notLowSignalTitleClause,
11
11
  } from './utils.mjs';
12
12
  import { acquireLLMSlot, releaseLLMSlot } from './hook-semaphore.mjs';
13
13
  import { getVocabulary, computeVector } from './tfidf.mjs';
@@ -802,10 +802,14 @@ export async function handleLLMSummary() {
802
802
  const sessionId = process.argv[3] || getSessionId();
803
803
  const project = process.argv[4] || inferProject();
804
804
 
805
+ // Exclude LOW_SIGNAL hook-llm fallback titles ("Error: files +2 more: ...",
806
+ // "Modified X", "Worked on X", etc.) from the Haiku summary input — they
807
+ // pollute the `completed` field and mislead session-resume context.
805
808
  const recentObs = db.prepare(`
806
809
  SELECT id, type, title, narrative
807
810
  FROM observations
808
811
  WHERE memory_session_id = ?
812
+ AND ${notLowSignalTitleClause('')}
809
813
  ORDER BY created_at_epoch DESC
810
814
  LIMIT 30
811
815
  `).all(sessionId);
package/mem-cli.mjs CHANGED
@@ -5,6 +5,7 @@
5
5
  import { homedir } from 'os';
6
6
  import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
7
7
  import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, jaccardSimilarity, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, isoWeekKey, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, DEFAULT_DECAY_HALF_LIFE_MS, getCurrentBranch, notLowSignalTitleClause, LOW_SIGNAL_TITLE } from './utils.mjs';
8
+ import { cjkPrecisionOk } from './nlp.mjs';
8
9
  import { extractCjkLikePatterns } from './nlp.mjs';
9
10
  import { resolveProject } from './project-utils.mjs';
10
11
  import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
@@ -240,9 +241,14 @@ function cmdSearch(db, args) {
240
241
  ORDER BY score
241
242
  LIMIT ? OFFSET ?
242
243
  `).all(...promptParams);
243
- for (const r of promptRows) results.push({ ...r, _source: 'prompt' });
244
+ // CJK precision filter (read-path parity with server.mjs): unicode61
245
+ // degrades bigram queries to single-char AND, letting common-char
246
+ // Chinese prose leak through. Drop rows that miss < 20% of query
247
+ // bigrams/keywords as contiguous substrings.
248
+ const keptPromptRows = promptRows.filter(r => cjkPrecisionOk(query, r.prompt_text));
249
+ for (const r of keptPromptRows) results.push({ ...r, _source: 'prompt' });
244
250
  // CJK LIKE fallback: FTS5 unicode61 can't tokenize CJK substrings in prompts
245
- if (promptRows.length === 0) {
251
+ if (keptPromptRows.length === 0) {
246
252
  const cjkPatterns = extractCjkLikePatterns(query);
247
253
  if (cjkPatterns.length > 0) {
248
254
  const likeConds = cjkPatterns.map(() => 'p.prompt_text LIKE ?');
@@ -263,7 +269,12 @@ function cmdSearch(db, args) {
263
269
  ORDER BY p.created_at_epoch DESC
264
270
  LIMIT ? OFFSET ?
265
271
  `).all(...likeParams);
266
- for (const r of fallbackRows) results.push({ ...r, _source: 'prompt', score: 0 });
272
+ // CJK precision filter applies here too: the LIKE fallback is just
273
+ // OR'd substring bigrams; without the precision gate it re-admits
274
+ // the same common-char noise the FTS path dropped (this was the
275
+ // actual leak source — FTS returned 0, fallback filled 20).
276
+ const keptFallback = fallbackRows.filter(r => cjkPrecisionOk(query, r.prompt_text));
277
+ for (const r of keptFallback) results.push({ ...r, _source: 'prompt', score: 0 });
267
278
  }
268
279
  }
269
280
  } catch { /* prompt FTS may not exist in older DBs */ }
@@ -450,8 +461,13 @@ function searchFts(db, ftsQuery, { type, project, limit, dateFrom, dateTo, minIm
450
461
 
451
462
  function cmdRecent(db, args) {
452
463
  const { positional, flags } = parseArgs(args);
453
- const rawLimit = parseInt(positional[0], 10);
454
- const limit = (Number.isInteger(rawLimit) && rawLimit > 0) ? rawLimit : 10;
464
+ const rawArg = positional[0];
465
+ const rawLimit = parseInt(rawArg, 10);
466
+ const isValid = Number.isInteger(rawLimit) && rawLimit > 0;
467
+ if (rawArg !== undefined && !isValid) {
468
+ process.stderr.write(`[mem] Invalid count "${rawArg}" (must be a positive integer); using default 10\n`);
469
+ }
470
+ const limit = isValid ? rawLimit : 10;
455
471
  const project = flags.project ? resolveProject(db, flags.project) : inferProject();
456
472
 
457
473
  const params = [];
package/nlp.mjs CHANGED
@@ -124,6 +124,58 @@ export function extractCjkLikePatterns(query) {
124
124
  return [...new Set([...keywords, ...bigrams])];
125
125
  }
126
126
 
127
+ /**
128
+ * Post-FTS precision filter for CJK queries.
129
+ *
130
+ * Background: FTS5 unicode61 tokenizer splits every CJK character into its
131
+ * own token. An application-layer bigram query like "我是" then reduces to
132
+ * (我 AND 是) at match time — matching any document that happens to contain
133
+ * both chars anywhere, which is extremely permissive in Chinese prose.
134
+ *
135
+ * Precision check: given the raw query and a candidate result's full text,
136
+ * require that at least `threshold` fraction of the query's CJK bigrams
137
+ * (or dictionary words, if any matched) appear as contiguous substrings in
138
+ * the result. Non-CJK queries bypass this filter entirely.
139
+ *
140
+ * Applied only to the prompts/user-prompt path — observations have richer
141
+ * rerank + low-signal filtering that already control noise there. Also,
142
+ * obs-side synonym expansion ("查询"→"(查询 OR query OR search)") is a
143
+ * legitimate recall mechanism that this filter would break.
144
+ *
145
+ * Threshold default 0.2 is tunable via `CLAUDE_MEM_CJK_PREC_MIN` env var.
146
+ * Explicit threshold arg still overrides the env value — tests and in-code
147
+ * callers with domain context stay authoritative.
148
+ *
149
+ * Default was tuned from 0.3 → 0.2 after a 20-query production-DB fixture
150
+ * showed 0.3 over-rejected legitimate multi-bigram queries whose dict-
151
+ * keyword coverage was incomplete (e.g. "同义词扩展" — neither compound
152
+ * is in CJK_COMPOUNDS → 4 bigrams required, single-keyword match only
153
+ * 25% < 30% rejected 19/20 real hits). At 0.2, pure-noise reduction stays
154
+ * ≥85% on noise fixture while SIG-6 recall recovered to 100%.
155
+ *
156
+ * @param {string} query Raw query text
157
+ * @param {string} text Candidate result text
158
+ * @param {number} [threshold] Fraction of patterns that must match. If
159
+ * omitted, reads CLAUDE_MEM_CJK_PREC_MIN (default 0.2).
160
+ * @returns {boolean}
161
+ */
162
+ export function cjkPrecisionOk(query, text, threshold) {
163
+ if (threshold === undefined) {
164
+ const envVal = process.env.CLAUDE_MEM_CJK_PREC_MIN;
165
+ const parsed = envVal ? parseFloat(envVal) : NaN;
166
+ threshold = Number.isFinite(parsed) && parsed >= 0 && parsed <= 1 ? parsed : 0.2;
167
+ }
168
+ if (!query || !text) return true;
169
+ if (!/[一-鿿㐀-䶿]{2,}/.test(query)) return true;
170
+ const keywords = extractCjkKeywords(query);
171
+ const required = keywords.length > 0
172
+ ? keywords
173
+ : cjkBigrams(query).split(' ').filter(b => b && !CJK_STOP_WORDS.has(b));
174
+ if (required.length === 0) return true;
175
+ const hit = required.filter(w => text.includes(w)).length;
176
+ return (hit / required.length) >= threshold;
177
+ }
178
+
127
179
  // ─── FTS5 Token Formatting ──────────────────────────────────────────────────
128
180
 
129
181
  // Format a term for FTS5: quote if it contains spaces, hyphens, or special chars
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "claude-mem-lite",
3
- "version": "2.49.0",
3
+ "version": "2.50.0",
4
4
  "description": "Lightweight persistent memory system for Claude Code",
5
5
  "type": "module",
6
6
  "engines": {
@@ -5,6 +5,7 @@
5
5
 
6
6
  import { ensureDb, DB_DIR, REGISTRY_DB_PATH } from '../schema.mjs';
7
7
  import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, notLowSignalTitleClause, noisePenaltyClause } from '../utils.mjs';
8
+ import { cjkPrecisionOk } from '../nlp.mjs';
8
9
  import { writeFileSync, readFileSync, existsSync, renameSync } from 'fs';
9
10
  import { join } from 'path';
10
11
  import Database from 'better-sqlite3';
@@ -216,6 +217,9 @@ function searchByUserPrompts(db, queryText, project, limit) {
216
217
  if (!ftsQuery) return [];
217
218
 
218
219
  const cutoff = Date.now() - LOOKBACK_MS;
220
+ // Exclude <task-notification> internal protocol messages — parity with
221
+ // server.mjs mem_search + mem-cli.mjs search (see lesson #8139: read-path
222
+ // parity across paths querying the same table).
219
223
  const sql = `
220
224
  SELECT up.id, up.prompt_text, up.created_at_epoch,
221
225
  bm25(user_prompts_fts) as relevance
@@ -225,6 +229,7 @@ function searchByUserPrompts(db, queryText, project, limit) {
225
229
  WHERE user_prompts_fts MATCH ?
226
230
  AND s.project = ?
227
231
  AND up.created_at_epoch > ?
232
+ AND up.prompt_text NOT LIKE '<task-notification>%'
228
233
  ORDER BY relevance
229
234
  LIMIT ?
230
235
  `;
@@ -238,7 +243,11 @@ function searchByUserPrompts(db, queryText, project, limit) {
238
243
  }
239
244
  }
240
245
 
241
- return rows;
246
+ // CJK precision filter (parity with server.mjs + mem-cli.mjs): unicode61
247
+ // FTS degrades CJK bigram queries to single-char AND, letting any prose
248
+ // sharing common chars leak through. Drop rows that miss < 20% of query
249
+ // bigrams/keywords as contiguous substrings. Non-CJK queries bypass.
250
+ return rows.filter(r => cjkPrecisionOk(queryText, r.prompt_text));
242
251
  }
243
252
 
244
253
  function searchRecent(db, project, limit) {
package/server.mjs CHANGED
@@ -6,7 +6,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
6
6
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
7
7
  import { ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
8
8
  import { jaccardSimilarity, truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, fmtDate, isoWeekKey, debugLog, debugCatch, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, getCurrentBranch, DEFAULT_DECAY_HALF_LIFE_MS, isPathConfined, notLowSignalTitleClause, LOW_SIGNAL_TITLE } from './utils.mjs';
9
- import { extractCjkLikePatterns } from './nlp.mjs';
9
+ import { extractCjkLikePatterns, cjkPrecisionOk } from './nlp.mjs';
10
10
  import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
11
11
  import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
12
12
  import { reRankWithContext, markSuperseded, extractPRFTerms, expandQueryByConcepts, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
@@ -463,11 +463,15 @@ function searchPrompts(ctx) {
463
463
  epochTo, epochTo,
464
464
  perSourceLimit, perSourceOffset
465
465
  );
466
- for (const r of rows) {
466
+ // CJK precision filter: unicode61 FTS degrades CJK bigram queries to
467
+ // single-char AND, letting any prose sharing common chars leak through.
468
+ // Require ≥30% of query's CJK bigrams/keywords as contiguous substrings.
469
+ const keptRows = args.query ? rows.filter(r => cjkPrecisionOk(args.query, r.prompt_text)) : rows;
470
+ for (const r of keptRows) {
467
471
  results.push({ source: 'prompt', id: r.id, text: r.prompt_text, session: r.content_session_id, date: r.created_at, created_at_epoch: r.created_at_epoch, score: r.score });
468
472
  }
469
473
  // CJK LIKE fallback: FTS5 unicode61 can't tokenize CJK substrings in prompts
470
- if (rows.length === 0 && args.query) {
474
+ if (keptRows.length === 0 && args.query) {
471
475
  const cjkPatterns = extractCjkLikePatterns(args.query);
472
476
  if (cjkPatterns.length > 0) {
473
477
  const likeConds = cjkPatterns.map(() => 'p.prompt_text LIKE ?');
@@ -490,7 +494,13 @@ function searchPrompts(ctx) {
490
494
  epochTo, epochTo,
491
495
  perSourceLimit, perSourceOffset
492
496
  );
493
- for (const r of fallbackRows) {
497
+ // Parity with mem-cli.mjs: the LIKE fallback is an OR'd bigram
498
+ // substring scan with no scoring gate. The precision filter must
499
+ // apply here too — without it, queries whose FTS set is empty
500
+ // re-admit the full common-char noise band that FTS would have
501
+ // dropped downstream anyway.
502
+ const keptFallback = args.query ? fallbackRows.filter(r => cjkPrecisionOk(args.query, r.prompt_text)) : fallbackRows;
503
+ for (const r of keptFallback) {
494
504
  results.push({ source: 'prompt', id: r.id, text: r.prompt_text, session: r.content_session_id, date: r.created_at, created_at_epoch: r.created_at_epoch, score: 0 });
495
505
  }
496
506
  }
package/tool-schemas.mjs CHANGED
@@ -423,7 +423,7 @@ export const tools = [
423
423
  ' - After a major project phase completes and old per-file observations are noise\n' +
424
424
  ' - Stats show thousands of low-importance rows dragging search quality\n' +
425
425
  '\n' +
426
- 'Equivalent CLI: claude-mem-lite compress [--preview] [--age-days 90]',
426
+ 'Equivalent CLI: claude-mem-lite compress [--execute] [--age-days 90] (preview is default)',
427
427
  inputSchema: memCompressSchema,
428
428
  hidden: true,
429
429
  },
@@ -442,7 +442,7 @@ export const tools = [
442
442
  ' - After bulk imports or a long offline period\n' +
443
443
  ' - User asks for periodic maintenance / cleanup\n' +
444
444
  '\n' +
445
- 'Equivalent CLI: claude-mem-lite maintain --action scan --operations dedup,decay',
445
+ 'Equivalent CLI: claude-mem-lite maintain scan --ops dedup,decay',
446
446
  inputSchema: memMaintainSchema,
447
447
  hidden: true,
448
448
  },
@@ -461,7 +461,7 @@ export const tools = [
461
461
  ' - stats show many degraded (title-only, no lesson) observations\n' +
462
462
  ' - Start with action="preview" to see candidates before spending tokens\n' +
463
463
  '\n' +
464
- 'Equivalent CLI: claude-mem-lite optimize [--action preview|run|run_all] [--max-items N]',
464
+ 'Equivalent CLI: claude-mem-lite optimize [--run|--run-all] [--task re-enrich,normalize,cluster-merge,smart-compress] [--max N] (preview is default)',
465
465
  inputSchema: memOptimizeSchema,
466
466
  hidden: true,
467
467
  },