claude-mem-lite 2.5.4 → 2.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/tool-schemas.mjs CHANGED
@@ -5,6 +5,29 @@ import { z } from 'zod';
5
5
 
6
6
  export const OBS_TYPE_ENUM = z.enum(['decision', 'bugfix', 'feature', 'refactor', 'discovery', 'change']);
7
7
 
8
+ // LLM-friendly coercion: accept string numbers and normalize to proper types
9
+ const coerceInt = z.preprocess(
10
+ (v) => (typeof v === 'string' && /^-?\d+$/.test(v.trim())) ? parseInt(v.trim(), 10) : v,
11
+ z.number().int()
12
+ );
13
+
14
+ // LLM-friendly coercion: accept "true"/"false"/"True"/"TRUE" strings as boolean
15
+ const coerceBool = z.preprocess(
16
+ (v) => typeof v === 'string' ? ({ true: true, false: false })[v.toLowerCase()] ?? v : v,
17
+ z.boolean()
18
+ );
19
+
20
+ // Coerce ids: accept single number, string "123", comma-separated "1,2,3", or array
21
+ const coerceIntArray = z.preprocess(
22
+ (v) => {
23
+ if (Array.isArray(v)) return v.map(x => typeof x === 'string' ? parseInt(x, 10) : x);
24
+ if (typeof v === 'number') return [v];
25
+ if (typeof v === 'string') return v.split(',').map(s => parseInt(s.trim(), 10)).filter(n => !isNaN(n));
26
+ return v;
27
+ },
28
+ z.array(z.number().int())
29
+ );
30
+
8
31
  export const memSearchSchema = {
9
32
  query: z.string().optional().describe('Search query (FTS5 syntax supported)'),
10
33
  type: z.enum(['observations', 'sessions', 'prompts']).optional().describe('Limit to one table'),
@@ -12,28 +35,28 @@ export const memSearchSchema = {
12
35
  project: z.string().optional().describe('Filter by project name'),
13
36
  date_from: z.string().optional().describe('Start date (ISO 8601 or YYYY-MM-DD)'),
14
37
  date_to: z.string().optional().describe('End date (ISO 8601 or YYYY-MM-DD). Date-only format is inclusive (covers full day)'),
15
- importance: z.number().int().min(1).max(3).optional().describe('Minimum importance (1=routine, 2=notable, 3=critical)'),
16
- limit: z.number().int().min(1).max(100).optional().describe('Max results (default 20)'),
17
- offset: z.number().int().min(0).optional().describe('Offset for pagination'),
38
+ importance: coerceInt.pipe(z.number().int().min(1).max(3)).optional().describe('Minimum importance (1=routine, 2=notable, 3=critical)'),
39
+ limit: coerceInt.pipe(z.number().int().min(1).max(100)).optional().describe('Max results (default 20)'),
40
+ offset: coerceInt.pipe(z.number().int().min(0)).optional().describe('Offset for pagination'),
18
41
  };
19
42
 
20
43
  export const memTimelineSchema = {
21
- anchor: z.number().int().optional().describe('Observation ID as center point'),
44
+ anchor: coerceInt.pipe(z.number().int()).optional().describe('Observation ID as center point'),
22
45
  query: z.string().optional().describe('FTS5 query to auto-find anchor'),
23
- before: z.number().int().min(0).max(50).optional().describe('Items before anchor (default 5)'),
24
- after: z.number().int().min(0).max(50).optional().describe('Items after anchor (default 5)'),
46
+ before: coerceInt.pipe(z.number().int().min(0).max(50)).optional().describe('Items before anchor (default 5)'),
47
+ after: coerceInt.pipe(z.number().int().min(0).max(50)).optional().describe('Items after anchor (default 5)'),
25
48
  project: z.string().optional().describe('Filter by project'),
26
49
  };
27
50
 
28
51
  export const memGetSchema = {
29
- ids: z.array(z.number().int()).min(1).max(20).describe('Observation IDs to retrieve'),
52
+ ids: coerceIntArray.pipe(z.array(z.number().int()).min(1).max(20)).describe('Observation IDs to retrieve'),
30
53
  source: z.enum(['obs', 'session', 'prompt']).optional().describe('Record type: obs (default), session (S# from search), prompt (P# from search)'),
31
54
  fields: z.array(z.string()).optional().describe('Specific fields to return (default: all)'),
32
55
  };
33
56
 
34
57
  export const memDeleteSchema = {
35
- ids: z.array(z.number().int()).min(1).max(50).describe('Observation IDs to delete'),
36
- confirm: z.boolean().describe('false=preview what will be deleted, true=execute deletion'),
58
+ ids: coerceIntArray.pipe(z.array(z.number().int()).min(1).max(50)).describe('Observation IDs to delete'),
59
+ confirm: coerceBool.describe('false=preview what will be deleted, true=execute deletion'),
37
60
  };
38
61
 
39
62
  export const memSaveSchema = {
@@ -41,17 +64,17 @@ export const memSaveSchema = {
41
64
  title: z.string().optional().describe('Short title'),
42
65
  type: OBS_TYPE_ENUM.optional().describe('Observation type (default: discovery)'),
43
66
  project: z.string().optional().describe('Project name (default: inferred from CWD)'),
44
- importance: z.number().int().min(1).max(3).optional().describe('Importance level: 1=routine, 2=notable, 3=critical (default: 1)'),
67
+ importance: coerceInt.pipe(z.number().int().min(1).max(3)).optional().describe('Importance level: 1=routine, 2=notable, 3=critical (default: 1)'),
45
68
  };
46
69
 
47
70
  export const memStatsSchema = {
48
71
  project: z.string().optional().describe('Filter by project'),
49
- days: z.number().int().min(1).max(365).optional().describe('Look back N days (default 30)'),
72
+ days: coerceInt.pipe(z.number().int().min(1).max(365)).optional().describe('Look back N days (default 30)'),
50
73
  };
51
74
 
52
75
  export const memCompressSchema = {
53
- preview: z.boolean().optional().describe('true=count candidates, false=execute compression (default: true)'),
54
- age_days: z.number().int().min(30).max(365).optional().describe('Min age in days (default: 60)'),
76
+ preview: coerceBool.optional().describe('true=count candidates, false=execute compression (default: true)'),
77
+ age_days: coerceInt.pipe(z.number().int().min(30).max(365)).optional().describe('Min age in days (default: 60)'),
55
78
  project: z.string().optional().describe('Filter by project'),
56
79
  };
57
80
 
@@ -59,9 +82,11 @@ export const memMaintainSchema = {
59
82
  action: z.enum(['scan', 'execute']).describe('scan=analyze candidates, execute=apply changes'),
60
83
  operations: z.array(z.enum(['dedup', 'decay', 'cleanup', 'boost', 'purge_stale'])).optional()
61
84
  .describe('Operations to execute (for action=execute). purge_stale deletes idle-marked observations after user confirmation.'),
62
- merge_ids: z.array(z.array(z.number().int()).min(2)).optional()
63
- .describe('For dedup: [[keepId, removeId1, removeId2], ...] first ID in each group is kept'),
64
- retain_days: z.number().int().min(7).max(365).optional()
85
+ merge_ids: z.preprocess(
86
+ (v) => Array.isArray(v) ? v.map(g => Array.isArray(g) ? g.map(x => typeof x === 'string' ? parseInt(x, 10) : x) : g) : v,
87
+ z.array(z.array(z.number().int()).min(2))
88
+ ).optional().describe('For dedup: [[keepId, removeId1, removeId2], ...] — first ID in each group is kept'),
89
+ retain_days: coerceInt.pipe(z.number().int().min(7).max(365)).optional()
65
90
  .describe('For purge_stale: keep observations newer than N days (default 30)'),
66
91
  project: z.string().optional().describe('Filter by project'),
67
92
  };
package/utils.mjs CHANGED
@@ -33,8 +33,11 @@ export const DEFAULT_DECAY_HALF_LIFE_MS = 14 * 86400000;
33
33
  */
34
34
  export function jaccardSimilarity(a, b) {
35
35
  if (!a || !b) return 0;
36
- const setA = new Set(a.toLowerCase().split(/\s+/));
37
- const setB = new Set(b.toLowerCase().split(/\s+/));
36
+ // Strip trailing punctuation from tokens to match MinHash normalization
37
+ // (prevents "server.rs," "server.rs" dedup failures)
38
+ const norm = s => s.toLowerCase().split(/\s+/).map(t => t.replace(/[,;:!?]+$/, ''));
39
+ const setA = new Set(norm(a));
40
+ const setB = new Set(norm(b));
38
41
  let intersection = 0;
39
42
  for (const w of setA) { if (setB.has(w)) intersection++; }
40
43
  const union = setA.size + setB.size - intersection;
@@ -109,12 +112,26 @@ export function scrubSecrets(text) {
109
112
  // ─── Token Estimation ─────────────────────────────────────────────────────
110
113
 
111
114
  /**
112
- * Estimate token count for a string using the ~4 chars/token heuristic.
115
+ * Estimate token count for a string.
116
+ * Uses ~4 chars/token for ASCII, ~1.5 chars/token for CJK characters.
113
117
  * @param {string} text Input text
114
118
  * @returns {number} Estimated token count (minimum 1)
115
119
  */
116
120
  export function estimateTokens(text) {
117
- return Math.ceil(((text || '').length || 1) / 4);
121
+ const s = text || '';
122
+ if (!s) return 1;
123
+ // Count CJK characters (each ~1 token) vs ASCII (~4 chars/token)
124
+ let cjkCount = 0;
125
+ for (let i = 0; i < s.length; i++) {
126
+ const c = s.charCodeAt(i);
127
+ if ((c >= 0x4e00 && c <= 0x9fff) || (c >= 0x3400 && c <= 0x4dbf) ||
128
+ (c >= 0x3000 && c <= 0x303f) || (c >= 0xff00 && c <= 0xffef) ||
129
+ (c >= 0xac00 && c <= 0xd7af)) {
130
+ cjkCount++;
131
+ }
132
+ }
133
+ const asciiLen = s.length - cjkCount;
134
+ return Math.max(1, Math.ceil(asciiLen / 4) + Math.ceil(cjkCount / 1.5));
118
135
  }
119
136
 
120
137
  // ─── MinHash Signatures ──────────────────────────────────────────────────
@@ -236,32 +253,22 @@ const SYNONYM_PAIRS = [
236
253
  ['prod', 'production'],
237
254
  ['async', 'asynchronous'],
238
255
  ['sync', 'synchronous'],
239
- // Semantic equivalents — bridges terms users type interchangeably
256
+ // Semantic equivalents — precise synonyms only (overly broad bridges removed)
240
257
  ['login', 'signin'],
241
- ['login', 'auth'],
242
- ['signin', 'auth'],
243
258
  ['bug', 'error'],
244
- ['bug', 'issue'],
245
259
  ['bug', 'defect'],
246
260
  ['crash', 'panic'],
247
261
  ['crash', 'segfault'],
248
262
  ['slow', 'latency'],
249
- ['slow', 'perf'],
250
263
  ['remove', 'delete'],
251
264
  ['setup', 'install'],
252
- ['setup', 'config'],
253
265
  ['deploy', 'release'],
254
266
  ['deploy', 'publish'],
255
267
  ['refactor', 'restructure'],
256
- ['refactor', 'cleanup'],
257
268
  ['test', 'spec'],
258
- ['api', 'endpoint'],
259
- ['api', 'route'],
260
269
  ['cache', 'caching'],
261
270
  ['cache', 'memoize'],
262
271
  ['optimize', 'optimization'],
263
- ['optimize', 'performance'],
264
- ['speed', 'performance'],
265
272
  ['fix', 'bugfix'],
266
273
  ['fix', 'patch'],
267
274
  ['debug', 'debugging'],
@@ -357,11 +364,11 @@ function expandToken(token) {
357
364
  export function sanitizeFtsQuery(query) {
358
365
  if (!query) return null;
359
366
  const cleaned = query
360
- .replace(/[{}()[\]^~*:"]/g, ' ')
367
+ .replace(/[{}()[\]^~*:"\\]/g, ' ')
361
368
  .replace(/(^|\s)-/g, '$1')
362
369
  .trim();
363
370
  if (!cleaned) return null;
364
- const tokens = cleaned.split(/\s+/).filter(t => t && !/^-+$/.test(t) && !FTS5_KEYWORDS.has(t.toUpperCase()));
371
+ const tokens = cleaned.split(/\s+/).filter(t => t && !/^-+$/.test(t) && !FTS5_KEYWORDS.has(t.toUpperCase()) && !/^NEAR\/\d+$/i.test(t));
365
372
  if (tokens.length === 0) return null;
366
373
  // Replace single CJK character tokens with bigrams for better phrase matching.
367
374
  // Individual CJK chars ("系","统") are too noisy; bigrams ("系统") capture compound words.
@@ -370,14 +377,15 @@ export function sanitizeFtsQuery(query) {
370
377
  const hasBigrams = bigramSet.size > 0;
371
378
  const finalTokens = [];
372
379
  const seen = new Set();
380
+ const rawTokensSeen = new Set(); // track raw tokens to prevent bigram duplicates
373
381
  for (const t of tokens) {
374
382
  // Skip single CJK characters when we have bigrams — they're subsumed by bigram tokens
375
383
  if (hasBigrams && /^[\u4e00-\u9fff\u3400-\u4dbf]$/.test(t)) continue;
376
384
  const expanded = expandToken(t);
377
- if (!seen.has(expanded)) { seen.add(expanded); finalTokens.push(expanded); }
385
+ if (!seen.has(expanded)) { seen.add(expanded); rawTokensSeen.add(t); finalTokens.push(expanded); }
378
386
  }
379
387
  for (const bg of bigramSet) {
380
- if (!seen.has(bg)) { seen.add(bg); finalTokens.push(bg); }
388
+ if (!seen.has(bg) && !rawTokensSeen.has(bg)) { seen.add(bg); finalTokens.push(bg); }
381
389
  }
382
390
  if (finalTokens.length === 0) return null;
383
391
  // FTS5 requires explicit AND after parenthesized OR groups
@@ -450,12 +458,10 @@ export function computeRuleImportance(episode) {
450
458
  if (files.some(f => /\.config\.|tsconfig|Dockerfile|docker-compose|package\.json|\.yml$|\.yaml$/i.test(basename(f))) && importance < 2) importance = 2;
451
459
  }
452
460
 
453
- // Tool diversity: Edit + Bash + another tool = complete dev cycle
454
- if (toolTypes.size >= 3 && toolTypes.has('Edit') && importance < 2) importance = 2;
455
461
  // Debug cycle: error followed by edit = active debugging
456
462
  if (hasErrorThenEdit && importance < 2) importance = 2;
457
- // Broad change: many files touched
458
- if ((episode.files || []).length >= 5 && importance < 2) importance = 2;
463
+ // Broad change: many files touched (8+ indicates significant scope)
464
+ if ((episode.files || []).length >= 8 && importance < 2) importance = 2;
459
465
 
460
466
  return importance;
461
467
  }
@@ -466,16 +472,65 @@ export function computeRuleImportance(episode) {
466
472
  * @param {string} text Input text containing CJK characters
467
473
  * @returns {string} Space-separated bigrams
468
474
  */
475
+ // Common CJK compound words (2-4 chars) — dictionary-first tokenization.
476
+ // When a compound word is found, it's emitted as a whole token instead of being
477
+ // split into overlapping bigrams. This dramatically reduces noise:
478
+ // "数据库" → "数据库" (1 token) instead of "数据 据库" (2 noisy tokens)
479
+ const CJK_COMPOUNDS = new Set([
480
+ // tech/programming
481
+ '数据库', '数据', '接口', '函数', '变量', '组件', '模块', '配置', '框架', '部署',
482
+ '测试', '调试', '编译', '打包', '构建', '缓存', '索引', '迁移', '回滚', '权限',
483
+ '认证', '授权', '加密', '解密', '序列', '并发', '异步', '同步', '线程', '进程',
484
+ '容器', '集群', '服务器', '中间件', '网关', '负载', '监控', '日志', '告警',
485
+ '前端', '后端', '全栈', '响应式', '路由', '状态', '渲染', '样式', '布局',
486
+ // actions
487
+ '修复', '重构', '优化', '升级', '安装', '卸载', '导入', '导出', '上传', '下载',
488
+ '提交', '推送', '合并', '发布', '上线', '回退', '审查', '审核', '评审',
489
+ // errors/issues
490
+ '报错', '崩溃', '泄露', '溢出', '死锁', '超时', '中断', '异常', '故障',
491
+ // architecture
492
+ '架构', '设计', '方案', '规划', '文档', '注释', '版本', '分支', '依赖',
493
+ '性能', '安全', '漏洞', '补丁',
494
+ ]);
495
+
496
+ // Sort by length descending for greedy matching
497
+ const CJK_SORTED = [...CJK_COMPOUNDS].sort((a, b) => b.length - a.length);
498
+
499
+ /**
500
+ * Generate search tokens from CJK text using dictionary-first tokenization.
501
+ * Compound words are emitted whole; remaining chars use bigram fallback.
502
+ * "修复了数据库崩溃" → "修复 数据库 崩溃" (3 clean tokens)
503
+ * vs old bigram: "修复 复了 了数 数据 据库 库崩 崩溃" (7 noisy tokens)
504
+ * @param {string} text Input text containing CJK characters
505
+ * @returns {string} Space-separated tokens
506
+ */
469
507
  export function cjkBigrams(text) {
470
508
  if (!text) return '';
471
509
  const runs = text.match(/[\u4e00-\u9fff\u3400-\u4dbf]{2,}/g) || [];
472
- const bigrams = [];
510
+ const tokens = [];
473
511
  for (const run of runs) {
474
- for (let i = 0; i < run.length - 1; i++) {
475
- bigrams.push(run[i] + run[i + 1]);
512
+ let i = 0;
513
+ while (i < run.length) {
514
+ let matched = false;
515
+ // Greedy dictionary match (longest first)
516
+ for (const word of CJK_SORTED) {
517
+ if (i + word.length <= run.length && run.slice(i, i + word.length) === word) {
518
+ tokens.push(word);
519
+ i += word.length;
520
+ matched = true;
521
+ break;
522
+ }
523
+ }
524
+ if (!matched) {
525
+ // Fallback: bigram for unknown compound
526
+ if (i + 1 < run.length) {
527
+ tokens.push(run[i] + run[i + 1]);
528
+ }
529
+ i++;
530
+ }
476
531
  }
477
532
  }
478
- return bigrams.join(' ');
533
+ return [...new Set(tokens)].join(' ');
479
534
  }
480
535
 
481
536
  // ─── Project Inference ───────────────────────────────────────────────────────
@@ -506,8 +561,8 @@ export function inferProject() {
506
561
  */
507
562
  export function detectBashSignificance(input, response) {
508
563
  const cmd = (input.command || '').toLowerCase();
509
- const isError = /\berror\b|fail(ed|ure)?|exception|panic|traceback|errno|enoent|command not found/i.test(response)
510
- && response.length > 30;
564
+ const isError = /\berror\b|\bERR!|fail(ed|ure)?|exception|panic|traceback|errno|enoent|command not found/i.test(response)
565
+ && response.length > 15;
511
566
  const isTest = /\b(test|jest|pytest|vitest|mocha|spec|cypress|playwright)\b/i.test(cmd);
512
567
  const isBuild = /\b(build|compile|tsc|webpack|vite|rollup|esbuild|make|cargo)\b/i.test(cmd);
513
568
  const isGit = /\bgit\s+(commit|merge|rebase|cherry-pick|push)\b/i.test(cmd);
@@ -573,7 +628,9 @@ export function extractFilePaths(input) {
573
628
  if (match) {
574
629
  for (const m of match) {
575
630
  const p = m.trim();
576
- if (!p.startsWith('/dev/') && !p.startsWith('/proc/') && !p.startsWith('/tmp/')) {
631
+ if (!p.startsWith('/dev/') && !p.startsWith('/proc/') && !p.startsWith('/tmp/')
632
+ // Skip single-component paths like /exit, /clear — likely slash commands, not files
633
+ && (p.indexOf('/', 1) !== -1 || /\.\w+$/.test(p))) {
577
634
  paths.push(p);
578
635
  }
579
636
  }