claude-mem-lite 2.46.0 → 2.48.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/hook-context.mjs +28 -6
- package/hook-llm.mjs +11 -1
- package/hook.mjs +23 -0
- package/install.mjs +62 -0
- package/lib/low-signal-patterns.mjs +34 -0
- package/package.json +1 -1
- package/schema.mjs +17 -1
- package/scoring-sql.mjs +13 -8
- package/tfidf.mjs +7 -0
package/hook-context.mjs
CHANGED
|
@@ -2,13 +2,13 @@
|
|
|
2
2
|
// Handles adaptive time windows, token-budgeted selection, and legacy CLAUDE.md cleanup.
|
|
3
3
|
|
|
4
4
|
import { basename, join } from 'path';
|
|
5
|
-
import { readFileSync, writeFileSync, renameSync, unlinkSync } from 'fs';
|
|
5
|
+
import { existsSync, readFileSync, writeFileSync, renameSync, unlinkSync } from 'fs';
|
|
6
6
|
import {
|
|
7
|
-
estimateTokens, truncate, typeIcon, fmtTime,
|
|
7
|
+
estimateTokens, truncate, typeIcon, fmtTime, inferProject,
|
|
8
8
|
debugLog, debugCatch,
|
|
9
9
|
DECAY_HALF_LIFE_BY_TYPE, DEFAULT_DECAY_HALF_LIFE_MS, notLowSignalTitleClause,
|
|
10
10
|
} from './utils.mjs';
|
|
11
|
-
import { STALE_SESSION_MS, FALLBACK_OBS_WINDOW_MS, effectiveQuiet } from './hook-shared.mjs';
|
|
11
|
+
import { STALE_SESSION_MS, FALLBACK_OBS_WINDOW_MS, RUNTIME_DIR, effectiveQuiet } from './hook-shared.mjs';
|
|
12
12
|
import { extractUnfinishedSummary } from './hook-handoff.mjs';
|
|
13
13
|
|
|
14
14
|
/**
|
|
@@ -176,9 +176,26 @@ export function selectWithTokenBudget(db, project, budget = 2000) {
|
|
|
176
176
|
* at the seam. Uses atomic tmp+rename write.
|
|
177
177
|
*/
|
|
178
178
|
export function cleanupClaudeMdLegacyBlock() {
|
|
179
|
+
// v2.48 P2-4: idempotent marker. First run (whether it finds a block or not,
|
|
180
|
+
// whether CLAUDE.md exists or not) drops a project-scoped marker in RUNTIME_DIR.
|
|
181
|
+
// Subsequent SessionStarts short-circuit here — no CLAUDE.md stat, no regex scan.
|
|
182
|
+
// Recovery path if a user manually re-adds a legacy block: delete the marker
|
|
183
|
+
// file (`~/.claude-mem-lite/runtime/.legacy-claude-md-cleaned-<project>`) and
|
|
184
|
+
// the next SessionStart will sweep again.
|
|
185
|
+
const markerPath = join(RUNTIME_DIR, `.legacy-claude-md-cleaned-${inferProject()}`);
|
|
186
|
+
if (existsSync(markerPath)) return;
|
|
187
|
+
|
|
179
188
|
const claudeMdPath = join(inferProjectDir(), 'CLAUDE.md');
|
|
180
189
|
let content;
|
|
181
|
-
try { content = readFileSync(claudeMdPath, 'utf8'); } catch {
|
|
190
|
+
try { content = readFileSync(claudeMdPath, 'utf8'); } catch {
|
|
191
|
+
// CLAUDE.md missing — still drop the marker so we don't re-stat every session
|
|
192
|
+
try { writeFileSync(markerPath, String(Date.now())); } catch {}
|
|
193
|
+
return;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Helper: drop the marker regardless of exit path (found / not found / write failed).
|
|
197
|
+
// Kept inline so the early-return sites below stay readable.
|
|
198
|
+
const dropMarker = () => { try { writeFileSync(markerPath, String(Date.now())); } catch {} };
|
|
182
199
|
|
|
183
200
|
const startTag = '<claude-mem-context>';
|
|
184
201
|
const endTag = '</claude-mem-context>';
|
|
@@ -187,7 +204,10 @@ export function cleanupClaudeMdLegacyBlock() {
|
|
|
187
204
|
// (e.g. inside a code block in architecture notes) are not accidentally swept.
|
|
188
205
|
const startIdx = content.lastIndexOf(startTag);
|
|
189
206
|
const endIdx = content.lastIndexOf(endTag);
|
|
190
|
-
if (startIdx === -1 || endIdx === -1 || startIdx >= endIdx)
|
|
207
|
+
if (startIdx === -1 || endIdx === -1 || startIdx >= endIdx) {
|
|
208
|
+
dropMarker();
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
191
211
|
|
|
192
212
|
// Extend forward to swallow a trailing newline so we don't leave a stranded blank line.
|
|
193
213
|
let removeEnd = endIdx + endTag.length;
|
|
@@ -213,15 +233,17 @@ export function cleanupClaudeMdLegacyBlock() {
|
|
|
213
233
|
// Collapse any ≥3 consecutive newlines to two, then ensure exactly one trailing newline.
|
|
214
234
|
const normalized = cleaned.replace(/\n{3,}/g, '\n\n').replace(/\s*$/, '\n');
|
|
215
235
|
|
|
216
|
-
if (normalized === content) return;
|
|
236
|
+
if (normalized === content) { dropMarker(); return; }
|
|
217
237
|
|
|
218
238
|
const tmp = claudeMdPath + '.mem-tmp';
|
|
219
239
|
try {
|
|
220
240
|
writeFileSync(tmp, normalized);
|
|
221
241
|
renameSync(tmp, claudeMdPath);
|
|
242
|
+
dropMarker();
|
|
222
243
|
} catch (e) {
|
|
223
244
|
try { unlinkSync(tmp); } catch {}
|
|
224
245
|
debugLog('ERROR', 'cleanupClaudeMdLegacyBlock', `CLAUDE.md write failed: ${e.message}`);
|
|
246
|
+
// Intentionally do NOT drop the marker on write failure — retry next session.
|
|
225
247
|
}
|
|
226
248
|
}
|
|
227
249
|
|
package/hook-llm.mjs
CHANGED
|
@@ -16,7 +16,7 @@ import {
|
|
|
16
16
|
sessionFile, getSessionId, openDb, callLLM, sleep,
|
|
17
17
|
} from './hook-shared.mjs';
|
|
18
18
|
import { EVENT_TYPES, saveEvent } from './lib/activity.mjs';
|
|
19
|
-
import { isNoiseObservation } from './lib/low-signal-patterns.mjs';
|
|
19
|
+
import { isNoiseObservation, capNoiseImportance } from './lib/low-signal-patterns.mjs';
|
|
20
20
|
|
|
21
21
|
// T9: memdir-incompatible types live in the `events` table, not `observations`.
|
|
22
22
|
// Set lookup is O(1) — authoritative source is lib/activity.mjs::EVENT_TYPES.
|
|
@@ -78,6 +78,16 @@ export function saveObservation(obs, projectOverride, sessionIdOverride, externa
|
|
|
78
78
|
return null;
|
|
79
79
|
}
|
|
80
80
|
|
|
81
|
+
// v2.47 P0-3: importance cap for LOW_SIGNAL titles that kept the drop gate
|
|
82
|
+
// open via importance>=2 but carry no lesson/facts signal. 341 rows in live
|
|
83
|
+
// DB had imp=3 under these conditions (99.4% noise). Cap to 1 so they
|
|
84
|
+
// enter the 7-day accelerated auto-compress window in hook.mjs.
|
|
85
|
+
const capped = capNoiseImportance(obs);
|
|
86
|
+
if (capped !== (obs.importance ?? 1)) {
|
|
87
|
+
debugLog('saveObservation', `capped imp ${obs.importance}→${capped}: ${truncate(obs.title || '', 60)}`);
|
|
88
|
+
obs.importance = capped;
|
|
89
|
+
}
|
|
90
|
+
|
|
81
91
|
// Three-tier dedup — returns null (not throw) for dedup hits
|
|
82
92
|
// Tier 1 (fast): 5-min Jaccard on titles
|
|
83
93
|
const fiveMinAgo = now.getTime() - DEDUP_WINDOW_MS;
|
package/hook.mjs
CHANGED
|
@@ -648,6 +648,29 @@ async function handleSessionStart() {
|
|
|
648
648
|
if (compressed.changes > 0) {
|
|
649
649
|
debugLog('DEBUG', 'session-start', `auto-compressed ${compressed.changes} old observations`);
|
|
650
650
|
}
|
|
651
|
+
|
|
652
|
+
// v2.47 P0-3: accelerated compress for LOW_SIGNAL + no-signal noise.
|
|
653
|
+
// 7-day window instead of 30. The write-side capNoiseImportance forces
|
|
654
|
+
// imp=1 on these already; this just shrinks the GC latency so the
|
|
655
|
+
// projected 32.5% corpus reduction materializes within a week on live
|
|
656
|
+
// DBs instead of bleeding into the 30-day tier.
|
|
657
|
+
const noiseCompressAge = Date.now() - 7 * 86400000;
|
|
658
|
+
const noiseCompressed = db.prepare(`
|
|
659
|
+
UPDATE observations SET compressed_into = ${COMPRESSED_AUTO}
|
|
660
|
+
WHERE COALESCE(compressed_into, 0) = 0
|
|
661
|
+
AND importance = 1
|
|
662
|
+
AND (lesson_learned IS NULL OR lesson_learned = '' OR lesson_learned = 'none')
|
|
663
|
+
AND (facts IS NULL OR facts = '' OR facts = '[]')
|
|
664
|
+
AND (
|
|
665
|
+
title LIKE 'Modified %' OR title LIKE 'Worked on %'
|
|
666
|
+
OR title LIKE 'Reviewed %' OR title LIKE 'Error%'
|
|
667
|
+
)
|
|
668
|
+
AND created_at_epoch < ?
|
|
669
|
+
AND project = ?
|
|
670
|
+
`).run(noiseCompressAge, project);
|
|
671
|
+
if (noiseCompressed.changes > 0) {
|
|
672
|
+
debugLog('DEBUG', 'session-start', `auto-compressed ${noiseCompressed.changes} LOW_SIGNAL noise (7d window)`);
|
|
673
|
+
}
|
|
651
674
|
})();
|
|
652
675
|
|
|
653
676
|
// Auto-maintain: cleanup + decay + boost + purge, gated to once per 24h
|
package/install.mjs
CHANGED
|
@@ -289,6 +289,17 @@ async function install() {
|
|
|
289
289
|
const manifestSrc = join(PROJECT_DIR, 'registry', 'preinstalled.json');
|
|
290
290
|
if (existsSync(manifestSrc)) copyFileSync(manifestSrc, join(registryDir, 'preinstalled.json'));
|
|
291
291
|
ok('Source files copied to ~/.claude-mem-lite/');
|
|
292
|
+
|
|
293
|
+
// v2.48 P1-4: prune stale top-level .mjs + 0-byte .db files left behind by
|
|
294
|
+
// prior upgrades (e.g. dispatch.mjs removed in v2.20.0, zero-byte mem.db /
|
|
295
|
+
// memory.db / registry.db from pre-consolidation installs). Subdirs +
|
|
296
|
+
// symlinks + non-empty DBs are always preserved.
|
|
297
|
+
try {
|
|
298
|
+
const pruned = pruneStaleInstallFiles(DATA_DIR, SOURCE_FILES);
|
|
299
|
+
if (pruned.length > 0) {
|
|
300
|
+
ok(`Pruned ${pruned.length} stale file(s): ${pruned.map(p => p.split('/').pop()).join(', ')}`);
|
|
301
|
+
}
|
|
302
|
+
} catch (e) { /* prune is best-effort — never block install */ void e; }
|
|
292
303
|
}
|
|
293
304
|
|
|
294
305
|
// 2. npm install (skip for --dev: node_modules is symlinked)
|
|
@@ -1295,6 +1306,57 @@ function hasMemHooksConfigured(settings) {
|
|
|
1295
1306
|
);
|
|
1296
1307
|
}
|
|
1297
1308
|
|
|
1309
|
+
/**
|
|
1310
|
+
* v2.48 P1-4: prune top-level stale files left behind by removed-module upgrades.
|
|
1311
|
+
*
|
|
1312
|
+
* Strict whitelist: only removes files under `dataDir` (no recursion) that match
|
|
1313
|
+
* - `*.mjs` whose basename is NOT in SOURCE_FILES (comparing against both the
|
|
1314
|
+
* bare entry and any `subdir/basename` entry flattened to its basename — the
|
|
1315
|
+
* prune intentionally skips subdir files; see below)
|
|
1316
|
+
* - 0-byte `.db` files that are NOT in the protected-db allow-list
|
|
1317
|
+
*
|
|
1318
|
+
* Protections (never touched):
|
|
1319
|
+
* - subdirectories (managed/, runtime/, scripts/, lib/, cli/, commands/, server/, node_modules/, .claude-plugin/, registry/, etc.)
|
|
1320
|
+
* - non-empty `.db` files — real data risk, always preserved
|
|
1321
|
+
* - WAL/SHM (`*-wal`, `*-shm`) transients
|
|
1322
|
+
* - files not ending in `.mjs` or `.db`
|
|
1323
|
+
* - the two canonical DBs (`claude-mem-lite.db`, `resource-registry.db`) even when 0-byte (fresh-install transient state)
|
|
1324
|
+
*
|
|
1325
|
+
* @param {string} dataDir Absolute path, typically `~/.claude-mem-lite`
|
|
1326
|
+
* @param {string[]} sourceFiles SOURCE_FILES manifest
|
|
1327
|
+
* @returns {string[]} Absolute paths of files that were deleted (ordered by readdir)
|
|
1328
|
+
*/
|
|
1329
|
+
export function pruneStaleInstallFiles(dataDir, sourceFiles) {
|
|
1330
|
+
if (!existsSync(dataDir)) return [];
|
|
1331
|
+
// Flatten manifest to just top-level basenames. SOURCE_FILES contains entries
|
|
1332
|
+
// like 'lib/activity.mjs' — those belong to a subdir and prune never touches
|
|
1333
|
+
// subdirs anyway. For top-level entries ('server.mjs'), basename === entry.
|
|
1334
|
+
const topLevelAllowed = new Set(
|
|
1335
|
+
sourceFiles
|
|
1336
|
+
.filter(f => !f.includes('/'))
|
|
1337
|
+
.map(f => f)
|
|
1338
|
+
);
|
|
1339
|
+
const PROTECTED_DBS = new Set(['claude-mem-lite.db', 'resource-registry.db']);
|
|
1340
|
+
const removed = [];
|
|
1341
|
+
let entries;
|
|
1342
|
+
try { entries = readdirSync(dataDir); } catch { return removed; }
|
|
1343
|
+
for (const name of entries) {
|
|
1344
|
+
const full = join(dataDir, name);
|
|
1345
|
+
let st;
|
|
1346
|
+
try { st = lstatSync(full); } catch { continue; }
|
|
1347
|
+
// Skip directories and symlinks (dev mode uses symlinks; treat as intentional).
|
|
1348
|
+
if (!st.isFile()) continue;
|
|
1349
|
+
if (name.endsWith('.mjs') && !topLevelAllowed.has(name)) {
|
|
1350
|
+
try { unlinkSync(full); removed.push(full); } catch { /* best-effort */ }
|
|
1351
|
+
continue;
|
|
1352
|
+
}
|
|
1353
|
+
if (name.endsWith('.db') && !PROTECTED_DBS.has(name) && st.size === 0) {
|
|
1354
|
+
try { unlinkSync(full); removed.push(full); } catch { /* best-effort */ }
|
|
1355
|
+
}
|
|
1356
|
+
}
|
|
1357
|
+
return removed;
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1298
1360
|
export function clearPluginDisabledMarkerForDirectInstall(settings) {
|
|
1299
1361
|
if (settings?.enabledPlugins?.[PLUGIN_KEY] !== false) return false;
|
|
1300
1362
|
delete settings.enabledPlugins[PLUGIN_KEY];
|
|
@@ -113,6 +113,40 @@ function _isLikelyToolOutputPassthrough(narrative) {
|
|
|
113
113
|
* @param {object} [env=process.env] Environment (injected for testability)
|
|
114
114
|
* @returns {boolean} true = noise, caller should drop
|
|
115
115
|
*/
|
|
116
|
+
/**
|
|
117
|
+
* v2.47 P0-3: Importance cap for LOW_SIGNAL titles that slipped through with
|
|
118
|
+
* inflated importance. Complements isNoiseObservation — that one drops rows
|
|
119
|
+
* entirely when narrative is also thin; this one keeps the row (useful for
|
|
120
|
+
* session history) but demotes the importance so injection ranking and
|
|
121
|
+
* auto-compress treat it as the noise it is.
|
|
122
|
+
*
|
|
123
|
+
* Production baseline (2026-04-24, projects--mem DB, 3789 obs):
|
|
124
|
+
* LOW_SIGNAL title + importance=3 → 341 rows; only 1 had lesson, 1 had facts
|
|
125
|
+
* LOW_SIGNAL title + importance=2 → 80 rows; only 5 had lesson, 6 had facts
|
|
126
|
+
* 99%+ of those were Haiku-inflated noise. Cap forces imp=1 and the 7-day
|
|
127
|
+
* accelerated auto-compress in hook.mjs GCs them.
|
|
128
|
+
*
|
|
129
|
+
* Preserves importance when ANY real signal exists:
|
|
130
|
+
* - lesson_learned (or camelCase lessonLearned) set and not 'none'
|
|
131
|
+
* - facts array has >=1 non-empty string
|
|
132
|
+
* Non-LOW_SIGNAL titles are never capped (substantive prose is trusted).
|
|
133
|
+
*
|
|
134
|
+
* @param {object} obs { title, facts, importance, lesson_learned|lessonLearned }
|
|
135
|
+
* @returns {number} Capped importance (1 if LOW_SIGNAL+no-signal, else original)
|
|
136
|
+
*/
|
|
137
|
+
export function capNoiseImportance(obs) {
|
|
138
|
+
const original = obs?.importance ?? 1;
|
|
139
|
+
const title = (obs && obs.title) || '';
|
|
140
|
+
if (!_LOW_SIG_RE.test(title)) return original;
|
|
141
|
+
const lesson = obs.lessonLearned ?? obs.lesson_learned;
|
|
142
|
+
if (lesson && String(lesson).trim() && String(lesson).trim().toLowerCase() !== 'none') return original;
|
|
143
|
+
if (Array.isArray(obs.facts) &&
|
|
144
|
+
obs.facts.filter(f => typeof f === 'string' && f.trim().length > 0).length >= 1) {
|
|
145
|
+
return original;
|
|
146
|
+
}
|
|
147
|
+
return original > 1 ? 1 : original;
|
|
148
|
+
}
|
|
149
|
+
|
|
116
150
|
export function isNoiseObservation(obs, env = process.env) {
|
|
117
151
|
if (env && env.CLAUDE_MEM_KEEP_LOW_SIGNAL === '1') return false;
|
|
118
152
|
const title = (obs && obs.title) || '';
|
package/package.json
CHANGED
package/schema.mjs
CHANGED
|
@@ -21,7 +21,12 @@ export const REGISTRY_DB_PATH = join(DB_DIR, 'resource-registry.db');
|
|
|
21
21
|
// FTS delete+reinsert cycles and amplified SQLITE_CORRUPT_VTAB blast radius
|
|
22
22
|
// (project_non_obvious.md). Migration drops the old triggers once and lets
|
|
23
23
|
// ensureFTS recreate them with the scoped form.
|
|
24
|
-
|
|
24
|
+
//
|
|
25
|
+
// v28 (v2.47): observation_vectors orphan + stale-vocab cleanup. Live DBs had
|
|
26
|
+
// 2839/6429 (44%) orphaned rows (historic deletes during FK-OFF migrations)
|
|
27
|
+
// and 3282/6429 (51%) stale-vocab rows (rebuildVocabulary never pruned old
|
|
28
|
+
// versions before v2.47). Idempotent one-shot DELETE on ensureDb.
|
|
29
|
+
export const CURRENT_SCHEMA_VERSION = 28;
|
|
25
30
|
|
|
26
31
|
const CORE_SCHEMA = `
|
|
27
32
|
CREATE TABLE IF NOT EXISTS sdk_sessions (
|
|
@@ -402,6 +407,17 @@ export function initSchema(db) {
|
|
|
402
407
|
|
|
403
408
|
db.exec(`CREATE INDEX IF NOT EXISTS idx_obs_vectors_version ON observation_vectors(vocab_version)`);
|
|
404
409
|
|
|
410
|
+
// v28 (v2.47) P0-1: one-shot cleanup of orphaned observation_vectors.
|
|
411
|
+
// Live DBs accumulated 44% orphans even with ON DELETE CASCADE because
|
|
412
|
+
// early migrations ran with `foreign_keys=OFF` and deletes skipped cascade.
|
|
413
|
+
// Idempotent (NOT IN is empty on a clean DB), runs once per ensureDb().
|
|
414
|
+
try {
|
|
415
|
+
db.prepare(`
|
|
416
|
+
DELETE FROM observation_vectors
|
|
417
|
+
WHERE observation_id NOT IN (SELECT id FROM observations)
|
|
418
|
+
`).run();
|
|
419
|
+
} catch { /* non-critical — table-missing path handled by earlier CREATE */ }
|
|
420
|
+
|
|
405
421
|
// Persisted vocabulary for stable TF-IDF vector indexing
|
|
406
422
|
db.exec(`
|
|
407
423
|
CREATE TABLE IF NOT EXISTS vocab_state (
|
package/scoring-sql.mjs
CHANGED
|
@@ -70,12 +70,17 @@ export const TYPE_QUALITY_CASE = `(
|
|
|
70
70
|
* - injection_count: bumped ONLY on UserPromptSubmit / hook-memory auto-inject
|
|
71
71
|
* - access_count: bumped on citation (c039352 P4), explicit recall, get, timeline
|
|
72
72
|
*
|
|
73
|
-
* Empirical thresholds (
|
|
74
|
-
*
|
|
75
|
-
* •
|
|
76
|
-
* •
|
|
77
|
-
*
|
|
78
|
-
*
|
|
73
|
+
* Empirical thresholds (v2.47 recalibration — 2026-04-24 live projects--mem,
|
|
74
|
+
* 3789 obs, baseline 10/20 never fired because max injection_count=9):
|
|
75
|
+
* • Legitimate heavy use (#5588 9/10=0.9, #7549 7/13=0.54): ratio≤3 ⇒ 1.0×
|
|
76
|
+
* • Early noise candidate (#3518 6/1=6.0): inj≥4 AND ratio>3 ⇒ 0.5× (tier-1)
|
|
77
|
+
* • Entrenched noise (inj≥8 AND ratio>5): 0.2× (tier-2)
|
|
78
|
+
*
|
|
79
|
+
* Old thresholds (v26→v2.46, inj≥10/≥20) were chosen as theoretical upper bounds
|
|
80
|
+
* before injection_count accumulated 2 months of data — live distribution shows
|
|
81
|
+
* 100% of rows stayed under 10 inject events. The recalibrated gates bite the
|
|
82
|
+
* moderate-noise tier (first real data band) while still sparing ratio-clean
|
|
83
|
+
* heavy-use rows (ratio gate is the primary precision signal).
|
|
79
84
|
*
|
|
80
85
|
* Applied as: BM25 × time_decay × TYPE_QUALITY × (0.5 + 0.5·importance) × NOISE_PENALTY
|
|
81
86
|
* Note: multiplicative so ORDER BY relevance ASC (negative scores) still works —
|
|
@@ -88,10 +93,10 @@ export function noisePenaltyClause(alias = 'o') {
|
|
|
88
93
|
const a = alias ? `${alias}.` : '';
|
|
89
94
|
return `(
|
|
90
95
|
CASE
|
|
91
|
-
WHEN COALESCE(${a}injection_count, 0) >=
|
|
96
|
+
WHEN COALESCE(${a}injection_count, 0) >= 8
|
|
92
97
|
AND COALESCE(${a}injection_count, 0) > COALESCE(${a}access_count, 0) * 5
|
|
93
98
|
THEN 0.2
|
|
94
|
-
WHEN COALESCE(${a}injection_count, 0) >=
|
|
99
|
+
WHEN COALESCE(${a}injection_count, 0) >= 4
|
|
95
100
|
AND COALESCE(${a}injection_count, 0) > COALESCE(${a}access_count, 0) * 3
|
|
96
101
|
THEN 0.5
|
|
97
102
|
ELSE 1.0
|
package/tfidf.mjs
CHANGED
|
@@ -252,6 +252,13 @@ export function rebuildVocabulary(db) {
|
|
|
252
252
|
for (const [term, entry] of vocab.terms) {
|
|
253
253
|
insertStmt.run(term, entry.index, entry.idf, vocab.version, now);
|
|
254
254
|
}
|
|
255
|
+
// v2.47 P0-1: drop observation_vectors from earlier vocab versions.
|
|
256
|
+
// Without this, rebuildVocabulary compounded the stale set on every call
|
|
257
|
+
// (live DB measured 3282/6429 = 51% stale). vectorSearch filters by
|
|
258
|
+
// vocab_version at query time, so stale rows were dead storage.
|
|
259
|
+
try {
|
|
260
|
+
db.prepare('DELETE FROM observation_vectors WHERE vocab_version != ?').run(vocab.version);
|
|
261
|
+
} catch { /* table missing on legacy DBs — non-critical */ }
|
|
255
262
|
})();
|
|
256
263
|
|
|
257
264
|
_vocabCache = vocab;
|