claude-mem-lite 2.49.1 → 2.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/install.mjs +132 -30
- package/mem-cli.mjs +14 -3
- package/nlp.mjs +52 -0
- package/package.json +1 -1
- package/scripts/user-prompt-search.js +6 -1
- package/server.mjs +14 -4
package/install.mjs
CHANGED
|
@@ -25,10 +25,115 @@ const MARKETPLACE_KEY = 'sdsrss';
|
|
|
25
25
|
const PLUGIN_KEY = `claude-mem-lite@${MARKETPLACE_KEY}`;
|
|
26
26
|
const NPM_INSTALL_CMD = 'npm install --omit=dev --no-audit --no-fund';
|
|
27
27
|
|
|
28
|
+
import { createRequire } from 'module';
|
|
29
|
+
|
|
28
30
|
import { RESOURCE_METADATA } from './install-metadata.mjs';
|
|
29
31
|
import { scanPluginCacheHookPollution } from './plugin-cache-guard.mjs';
|
|
30
32
|
import { SOURCE_FILES } from './source-files.mjs';
|
|
31
33
|
|
|
34
|
+
/**
|
|
35
|
+
* Hook scripts that non-dev install must copy into ~/.claude-mem-lite/scripts/
|
|
36
|
+
* to keep settings.json hook commands resolvable. Single source of truth so
|
|
37
|
+
* adding a new PreToolUse/PostToolUse hook script can't drift from the install
|
|
38
|
+
* copy block (which previously hand-listed only 3 of these and silently
|
|
39
|
+
* dropped pre-tool-recall.js + pre-skill-bridge.js — every fresh install left
|
|
40
|
+
* settings.json pointing at non-existent files).
|
|
41
|
+
*/
|
|
42
|
+
export const HOOK_SCRIPT_FILES = [
|
|
43
|
+
'post-tool-use.sh',
|
|
44
|
+
'user-prompt-search.js',
|
|
45
|
+
'prompt-search-utils.mjs',
|
|
46
|
+
'pre-tool-recall.js',
|
|
47
|
+
'pre-skill-bridge.js',
|
|
48
|
+
];
|
|
49
|
+
|
|
50
|
+
export function copyHookScripts(srcDir, destDir) {
|
|
51
|
+
for (const name of HOOK_SCRIPT_FILES) {
|
|
52
|
+
const src = join(srcDir, name);
|
|
53
|
+
if (existsSync(src)) copyFileSync(src, join(destDir, name));
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Move legacy `~/.claude-mem/claude-mem.db` (+ -wal/-shm sidecars) to
|
|
59
|
+
* timestamped `*.legacy-backup-<ms>` files inside `newDir`. The legacy DB
|
|
60
|
+
* carries v16 schema (schema_versions plural table); the new claude-mem-lite
|
|
61
|
+
* code expects v28 (schema_version singular + memory_session_id column) and
|
|
62
|
+
* MIGRATIONS[] has no v16→v28 bridge — so loading the legacy DB FATALs on
|
|
63
|
+
* first launch. Backing up rather than copying-as-current lets the new
|
|
64
|
+
* install create a fresh v28 DB while preserving legacy bytes for recovery.
|
|
65
|
+
*
|
|
66
|
+
* Returns: {action: 'noop'|'skip'|'backed-up', backupPath?}
|
|
67
|
+
* - noop: no legacy DB found
|
|
68
|
+
* - skip: working `claude-mem-lite.db` already exists in newDir
|
|
69
|
+
* - backed-up: legacy files renamed to `<newDir>/claude-mem-lite.db.legacy-backup-<ts>` etc.
|
|
70
|
+
*/
|
|
71
|
+
export function migrateLegacyClaudeMemData(oldDir, newDir, opts = {}) {
|
|
72
|
+
const legacyDb = join(oldDir, 'claude-mem.db');
|
|
73
|
+
const targetDb = join(newDir, 'claude-mem-lite.db');
|
|
74
|
+
if (!existsSync(legacyDb)) return { action: 'noop' };
|
|
75
|
+
if (existsSync(targetDb)) return { action: 'skip' };
|
|
76
|
+
|
|
77
|
+
if (!existsSync(newDir)) mkdirSync(newDir, { recursive: true });
|
|
78
|
+
const ts = opts.now ?? Date.now();
|
|
79
|
+
const backupPath = join(newDir, `claude-mem-lite.db.legacy-backup-${ts}`);
|
|
80
|
+
renameSync(legacyDb, backupPath);
|
|
81
|
+
for (const ext of ['-wal', '-shm']) {
|
|
82
|
+
const src = legacyDb + ext;
|
|
83
|
+
if (existsSync(src)) renameSync(src, join(newDir, `claude-mem-lite.db${ext}.legacy-backup-${ts}`));
|
|
84
|
+
}
|
|
85
|
+
return { action: 'backed-up', backupPath };
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
/**
|
|
89
|
+
* Probe better-sqlite3's native binding by importing it from `installDir`'s
|
|
90
|
+
* node_modules and opening an in-memory DB. Returns {ok, error?}. `npm install`
|
|
91
|
+
* exits 0 even when the prebuilt .node binary mismatches the running Node ABI
|
|
92
|
+
* (e.g. NODE_MODULE_VERSION 137 on Node v24), so install must verify before
|
|
93
|
+
* declaring success — otherwise the next launch FATALs with "Could not locate
|
|
94
|
+
* the bindings file".
|
|
95
|
+
*/
|
|
96
|
+
export async function probeBetterSqlite3Binding(installDir) {
|
|
97
|
+
try {
|
|
98
|
+
const localRequire = createRequire(join(installDir, 'package.json'));
|
|
99
|
+
const Database = localRequire('better-sqlite3');
|
|
100
|
+
const db = new Database(':memory:');
|
|
101
|
+
db.close();
|
|
102
|
+
return { ok: true };
|
|
103
|
+
} catch (e) {
|
|
104
|
+
return { ok: false, error: e.message };
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
/**
|
|
109
|
+
* Verify better-sqlite3 binding works in `installDir`; if not, run
|
|
110
|
+
* `npm rebuild better-sqlite3` and re-probe. Returns
|
|
111
|
+
* { ok: true, action: 'verified' | 'rebuilt' } on success or
|
|
112
|
+
* { ok: false, error } if rebuild can't fix it. The `probe` and `rebuild`
|
|
113
|
+
* deps are injectable so this can be unit-tested without a real npm
|
|
114
|
+
* subprocess.
|
|
115
|
+
*/
|
|
116
|
+
export async function ensureBetterSqlite3Working(installDir, deps = {}) {
|
|
117
|
+
const probe = deps.probe || (() => probeBetterSqlite3Binding(installDir));
|
|
118
|
+
const rebuild = deps.rebuild || (async () => {
|
|
119
|
+
execSync('npm rebuild better-sqlite3', { cwd: installDir, stdio: 'pipe' });
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
const first = await probe();
|
|
123
|
+
if (first.ok) return { ok: true, action: 'verified' };
|
|
124
|
+
|
|
125
|
+
try {
|
|
126
|
+
await rebuild();
|
|
127
|
+
} catch (e) {
|
|
128
|
+
return { ok: false, error: `rebuild failed: ${e.message}` };
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
const second = await probe();
|
|
132
|
+
if (second.ok) return { ok: true, action: 'rebuilt' };
|
|
133
|
+
|
|
134
|
+
return { ok: false, error: second.error || first.error };
|
|
135
|
+
}
|
|
136
|
+
|
|
32
137
|
/**
|
|
33
138
|
* Derive invocation_name from resource name when metadata doesn't provide one.
|
|
34
139
|
* Rules:
|
|
@@ -265,13 +370,9 @@ async function install() {
|
|
|
265
370
|
copyFileSync(src, dst);
|
|
266
371
|
}
|
|
267
372
|
}
|
|
268
|
-
// Copy scripts
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
const promptSearchSrc = join(PROJECT_DIR, 'scripts', 'user-prompt-search.js');
|
|
272
|
-
if (existsSync(promptSearchSrc)) copyFileSync(promptSearchSrc, join(scriptsDir, 'user-prompt-search.js'));
|
|
273
|
-
const promptSearchUtilsSrc = join(PROJECT_DIR, 'scripts', 'prompt-search-utils.mjs');
|
|
274
|
-
if (existsSync(promptSearchUtilsSrc)) copyFileSync(promptSearchUtilsSrc, join(scriptsDir, 'prompt-search-utils.mjs'));
|
|
373
|
+
// Copy hook scripts (settings.json hook commands point at these — must
|
|
374
|
+
// stay in sync with HOOK_SCRIPT_FILES manifest)
|
|
375
|
+
copyHookScripts(join(PROJECT_DIR, 'scripts'), scriptsDir);
|
|
275
376
|
// Ensure bash script is executable
|
|
276
377
|
try { execFileSync('chmod', ['+x', join(scriptsDir, 'post-tool-use.sh')], { stdio: 'pipe' }); } catch {}
|
|
277
378
|
// Copy commands directory
|
|
@@ -314,6 +415,18 @@ async function install() {
|
|
|
314
415
|
fail('npm install failed: ' + e.message);
|
|
315
416
|
process.exit(1);
|
|
316
417
|
}
|
|
418
|
+
// npm install exits 0 even when the better-sqlite3 prebuilt .node binary
|
|
419
|
+
// mismatches the running Node ABI (e.g. NODE_MODULE_VERSION 137 on Node v24).
|
|
420
|
+
// Probe and auto-rebuild before declaring success — otherwise the next
|
|
421
|
+
// launch FATALs with "Could not locate the bindings file".
|
|
422
|
+
const verify = await ensureBetterSqlite3Working(INSTALL_DIR);
|
|
423
|
+
if (verify.ok) {
|
|
424
|
+
ok(`better-sqlite3: ${verify.action}`);
|
|
425
|
+
} else {
|
|
426
|
+
fail(`better-sqlite3 binding unusable after rebuild: ${verify.error}`);
|
|
427
|
+
log('Try manually: cd ' + INSTALL_DIR + ' && npm rebuild better-sqlite3 --build-from-source');
|
|
428
|
+
process.exit(1);
|
|
429
|
+
}
|
|
317
430
|
}
|
|
318
431
|
|
|
319
432
|
// 2b. Create global CLI symlink (claude-mem-lite command)
|
|
@@ -542,30 +655,19 @@ async function install() {
|
|
|
542
655
|
writeSettings(settings);
|
|
543
656
|
ok('Hooks configured (PreToolUse, PostToolUse, SessionStart, Stop, UserPromptSubmit)');
|
|
544
657
|
|
|
545
|
-
// 5.
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
if (existsSync(src)) copyFileSync(src, DB_PATH + ext);
|
|
556
|
-
}
|
|
557
|
-
// Migrate runtime directory
|
|
558
|
-
const oldRuntime = join(OLD_DATA_DIR, 'runtime');
|
|
559
|
-
const newRuntime = join(DATA_DIR, 'runtime');
|
|
560
|
-
if (existsSync(oldRuntime) && !existsSync(newRuntime)) {
|
|
561
|
-
cpSync(oldRuntime, newRuntime, { recursive: true });
|
|
562
|
-
}
|
|
563
|
-
ok('Data migrated from ~/.claude-mem/ → ~/.claude-mem-lite/');
|
|
564
|
-
log('Old ~/.claude-mem/ preserved (remove manually when ready)');
|
|
565
|
-
} catch (e) {
|
|
566
|
-
warn('Migration failed: ' + e.message);
|
|
567
|
-
log('You can copy manually: cp ~/.claude-mem/claude-mem.db ~/.claude-mem-lite/claude-mem-lite.db');
|
|
658
|
+
// 5. Legacy ~/.claude-mem/ → ~/.claude-mem-lite/ — back up, don't reuse.
|
|
659
|
+
// The legacy DB is schema v16 (schema_versions plural) and there's no
|
|
660
|
+
// bridge in MIGRATIONS[] to v28. Reusing it FATALs on first launch with
|
|
661
|
+
// "no such column: memory_session_id". Rename to a timestamped backup
|
|
662
|
+
// so the new install creates a fresh v28 DB.
|
|
663
|
+
try {
|
|
664
|
+
const r = migrateLegacyClaudeMemData(OLD_DATA_DIR, DATA_DIR);
|
|
665
|
+
if (r.action === 'backed-up') {
|
|
666
|
+
ok(`Legacy ~/.claude-mem/ DB backed up to ${r.backupPath}`);
|
|
667
|
+
log('New v28 DB will be created on first launch (legacy schema is incompatible).');
|
|
568
668
|
}
|
|
669
|
+
} catch (e) {
|
|
670
|
+
warn('Legacy DB backup failed: ' + e.message);
|
|
569
671
|
}
|
|
570
672
|
|
|
571
673
|
// 5b. Rename claude-mem.db → claude-mem-lite.db in same directory
|
package/mem-cli.mjs
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
import { homedir } from 'os';
|
|
6
6
|
import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
|
|
7
7
|
import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, jaccardSimilarity, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, isoWeekKey, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, DEFAULT_DECAY_HALF_LIFE_MS, getCurrentBranch, notLowSignalTitleClause, LOW_SIGNAL_TITLE } from './utils.mjs';
|
|
8
|
+
import { cjkPrecisionOk } from './nlp.mjs';
|
|
8
9
|
import { extractCjkLikePatterns } from './nlp.mjs';
|
|
9
10
|
import { resolveProject } from './project-utils.mjs';
|
|
10
11
|
import { computeTier, TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
|
|
@@ -240,9 +241,14 @@ function cmdSearch(db, args) {
|
|
|
240
241
|
ORDER BY score
|
|
241
242
|
LIMIT ? OFFSET ?
|
|
242
243
|
`).all(...promptParams);
|
|
243
|
-
|
|
244
|
+
// CJK precision filter (read-path parity with server.mjs): unicode61
|
|
245
|
+
// degrades bigram queries to single-char AND, letting common-char
|
|
246
|
+
// Chinese prose leak through. Drop rows that miss < 20% of query
|
|
247
|
+
// bigrams/keywords as contiguous substrings.
|
|
248
|
+
const keptPromptRows = promptRows.filter(r => cjkPrecisionOk(query, r.prompt_text));
|
|
249
|
+
for (const r of keptPromptRows) results.push({ ...r, _source: 'prompt' });
|
|
244
250
|
// CJK LIKE fallback: FTS5 unicode61 can't tokenize CJK substrings in prompts
|
|
245
|
-
if (
|
|
251
|
+
if (keptPromptRows.length === 0) {
|
|
246
252
|
const cjkPatterns = extractCjkLikePatterns(query);
|
|
247
253
|
if (cjkPatterns.length > 0) {
|
|
248
254
|
const likeConds = cjkPatterns.map(() => 'p.prompt_text LIKE ?');
|
|
@@ -263,7 +269,12 @@ function cmdSearch(db, args) {
|
|
|
263
269
|
ORDER BY p.created_at_epoch DESC
|
|
264
270
|
LIMIT ? OFFSET ?
|
|
265
271
|
`).all(...likeParams);
|
|
266
|
-
|
|
272
|
+
// CJK precision filter applies here too: the LIKE fallback is just
|
|
273
|
+
// OR'd substring bigrams; without the precision gate it re-admits
|
|
274
|
+
// the same common-char noise the FTS path dropped (this was the
|
|
275
|
+
// actual leak source — FTS returned 0, fallback filled 20).
|
|
276
|
+
const keptFallback = fallbackRows.filter(r => cjkPrecisionOk(query, r.prompt_text));
|
|
277
|
+
for (const r of keptFallback) results.push({ ...r, _source: 'prompt', score: 0 });
|
|
267
278
|
}
|
|
268
279
|
}
|
|
269
280
|
} catch { /* prompt FTS may not exist in older DBs */ }
|
package/nlp.mjs
CHANGED
|
@@ -124,6 +124,58 @@ export function extractCjkLikePatterns(query) {
|
|
|
124
124
|
return [...new Set([...keywords, ...bigrams])];
|
|
125
125
|
}
|
|
126
126
|
|
|
127
|
+
/**
|
|
128
|
+
* Post-FTS precision filter for CJK queries.
|
|
129
|
+
*
|
|
130
|
+
* Background: FTS5 unicode61 tokenizer splits every CJK character into its
|
|
131
|
+
* own token. An application-layer bigram query like "我是" then reduces to
|
|
132
|
+
* (我 AND 是) at match time — matching any document that happens to contain
|
|
133
|
+
* both chars anywhere, which is extremely permissive in Chinese prose.
|
|
134
|
+
*
|
|
135
|
+
* Precision check: given the raw query and a candidate result's full text,
|
|
136
|
+
* require that at least `threshold` fraction of the query's CJK bigrams
|
|
137
|
+
* (or dictionary words, if any matched) appear as contiguous substrings in
|
|
138
|
+
* the result. Non-CJK queries bypass this filter entirely.
|
|
139
|
+
*
|
|
140
|
+
* Applied only to the prompts/user-prompt path — observations have richer
|
|
141
|
+
* rerank + low-signal filtering that already control noise there. Also,
|
|
142
|
+
* obs-side synonym expansion ("查询"→"(查询 OR query OR search)") is a
|
|
143
|
+
* legitimate recall mechanism that this filter would break.
|
|
144
|
+
*
|
|
145
|
+
* Threshold default 0.2 is tunable via `CLAUDE_MEM_CJK_PREC_MIN` env var.
|
|
146
|
+
* Explicit threshold arg still overrides the env value — tests and in-code
|
|
147
|
+
* callers with domain context stay authoritative.
|
|
148
|
+
*
|
|
149
|
+
* Default was tuned from 0.3 → 0.2 after a 20-query production-DB fixture
|
|
150
|
+
* showed 0.3 over-rejected legitimate multi-bigram queries whose dict-
|
|
151
|
+
* keyword coverage was incomplete (e.g. "同义词扩展" — neither compound
|
|
152
|
+
* is in CJK_COMPOUNDS → 4 bigrams required, single-keyword match only
|
|
153
|
+
* 25% < 30% rejected 19/20 real hits). At 0.2, pure-noise reduction stays
|
|
154
|
+
* ≥85% on noise fixture while SIG-6 recall recovered to 100%.
|
|
155
|
+
*
|
|
156
|
+
* @param {string} query Raw query text
|
|
157
|
+
* @param {string} text Candidate result text
|
|
158
|
+
* @param {number} [threshold] Fraction of patterns that must match. If
|
|
159
|
+
* omitted, reads CLAUDE_MEM_CJK_PREC_MIN (default 0.2).
|
|
160
|
+
* @returns {boolean}
|
|
161
|
+
*/
|
|
162
|
+
export function cjkPrecisionOk(query, text, threshold) {
|
|
163
|
+
if (threshold === undefined) {
|
|
164
|
+
const envVal = process.env.CLAUDE_MEM_CJK_PREC_MIN;
|
|
165
|
+
const parsed = envVal ? parseFloat(envVal) : NaN;
|
|
166
|
+
threshold = Number.isFinite(parsed) && parsed >= 0 && parsed <= 1 ? parsed : 0.2;
|
|
167
|
+
}
|
|
168
|
+
if (!query || !text) return true;
|
|
169
|
+
if (!/[一-鿿㐀-䶿]{2,}/.test(query)) return true;
|
|
170
|
+
const keywords = extractCjkKeywords(query);
|
|
171
|
+
const required = keywords.length > 0
|
|
172
|
+
? keywords
|
|
173
|
+
: cjkBigrams(query).split(' ').filter(b => b && !CJK_STOP_WORDS.has(b));
|
|
174
|
+
if (required.length === 0) return true;
|
|
175
|
+
const hit = required.filter(w => text.includes(w)).length;
|
|
176
|
+
return (hit / required.length) >= threshold;
|
|
177
|
+
}
|
|
178
|
+
|
|
127
179
|
// ─── FTS5 Token Formatting ──────────────────────────────────────────────────
|
|
128
180
|
|
|
129
181
|
// Format a term for FTS5: quote if it contains spaces, hyphens, or special chars
|
package/package.json
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
import { ensureDb, DB_DIR, REGISTRY_DB_PATH } from '../schema.mjs';
|
|
7
7
|
import { sanitizeFtsQuery, relaxFtsQueryToOr, truncate, typeIcon, inferProject, OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, notLowSignalTitleClause, noisePenaltyClause } from '../utils.mjs';
|
|
8
|
+
import { cjkPrecisionOk } from '../nlp.mjs';
|
|
8
9
|
import { writeFileSync, readFileSync, existsSync, renameSync } from 'fs';
|
|
9
10
|
import { join } from 'path';
|
|
10
11
|
import Database from 'better-sqlite3';
|
|
@@ -242,7 +243,11 @@ function searchByUserPrompts(db, queryText, project, limit) {
|
|
|
242
243
|
}
|
|
243
244
|
}
|
|
244
245
|
|
|
245
|
-
|
|
246
|
+
// CJK precision filter (parity with server.mjs + mem-cli.mjs): unicode61
|
|
247
|
+
// FTS degrades CJK bigram queries to single-char AND, letting any prose
|
|
248
|
+
// sharing common chars leak through. Drop rows that miss < 20% of query
|
|
249
|
+
// bigrams/keywords as contiguous substrings. Non-CJK queries bypass.
|
|
250
|
+
return rows.filter(r => cjkPrecisionOk(queryText, r.prompt_text));
|
|
246
251
|
}
|
|
247
252
|
|
|
248
253
|
function searchRecent(db, project, limit) {
|
package/server.mjs
CHANGED
|
@@ -6,7 +6,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
|
6
6
|
import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
|
|
7
7
|
import { ListToolsRequestSchema } from '@modelcontextprotocol/sdk/types.js';
|
|
8
8
|
import { jaccardSimilarity, truncate, typeIcon, sanitizeFtsQuery, relaxFtsQueryToOr, inferProject, computeMinHash, estimateJaccardFromMinHash, scrubSecrets, cjkBigrams, fmtDate, isoWeekKey, debugLog, debugCatch, COMPRESSED_PENDING_PURGE, OBS_BM25, SESS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE, getCurrentBranch, DEFAULT_DECAY_HALF_LIFE_MS, isPathConfined, notLowSignalTitleClause, LOW_SIGNAL_TITLE } from './utils.mjs';
|
|
9
|
-
import { extractCjkLikePatterns } from './nlp.mjs';
|
|
9
|
+
import { extractCjkLikePatterns, cjkPrecisionOk } from './nlp.mjs';
|
|
10
10
|
import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
|
|
11
11
|
import { ensureDb, DB_PATH, REGISTRY_DB_PATH } from './schema.mjs';
|
|
12
12
|
import { reRankWithContext, markSuperseded, extractPRFTerms, expandQueryByConcepts, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
|
|
@@ -463,11 +463,15 @@ function searchPrompts(ctx) {
|
|
|
463
463
|
epochTo, epochTo,
|
|
464
464
|
perSourceLimit, perSourceOffset
|
|
465
465
|
);
|
|
466
|
-
|
|
466
|
+
// CJK precision filter: unicode61 FTS degrades CJK bigram queries to
|
|
467
|
+
// single-char AND, letting any prose sharing common chars leak through.
|
|
468
|
+
// Require ≥30% of query's CJK bigrams/keywords as contiguous substrings.
|
|
469
|
+
const keptRows = args.query ? rows.filter(r => cjkPrecisionOk(args.query, r.prompt_text)) : rows;
|
|
470
|
+
for (const r of keptRows) {
|
|
467
471
|
results.push({ source: 'prompt', id: r.id, text: r.prompt_text, session: r.content_session_id, date: r.created_at, created_at_epoch: r.created_at_epoch, score: r.score });
|
|
468
472
|
}
|
|
469
473
|
// CJK LIKE fallback: FTS5 unicode61 can't tokenize CJK substrings in prompts
|
|
470
|
-
if (
|
|
474
|
+
if (keptRows.length === 0 && args.query) {
|
|
471
475
|
const cjkPatterns = extractCjkLikePatterns(args.query);
|
|
472
476
|
if (cjkPatterns.length > 0) {
|
|
473
477
|
const likeConds = cjkPatterns.map(() => 'p.prompt_text LIKE ?');
|
|
@@ -490,7 +494,13 @@ function searchPrompts(ctx) {
|
|
|
490
494
|
epochTo, epochTo,
|
|
491
495
|
perSourceLimit, perSourceOffset
|
|
492
496
|
);
|
|
493
|
-
|
|
497
|
+
// Parity with mem-cli.mjs: the LIKE fallback is an OR'd bigram
|
|
498
|
+
// substring scan with no scoring gate. The precision filter must
|
|
499
|
+
// apply here too — without it, queries whose FTS set is empty
|
|
500
|
+
// re-admit the full common-char noise band that FTS would have
|
|
501
|
+
// dropped downstream anyway.
|
|
502
|
+
const keptFallback = args.query ? fallbackRows.filter(r => cjkPrecisionOk(args.query, r.prompt_text)) : fallbackRows;
|
|
503
|
+
for (const r of keptFallback) {
|
|
494
504
|
results.push({ source: 'prompt', id: r.id, text: r.prompt_text, session: r.content_session_id, date: r.created_at, created_at_epoch: r.created_at_epoch, score: 0 });
|
|
495
505
|
}
|
|
496
506
|
}
|