claude-mem-lite 2.98.0 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/lib/file-intel.mjs +160 -0
- package/lib/reread-guard.mjs +55 -0
- package/lib/search-core.mjs +200 -0
- package/lib/timeline-core.mjs +195 -0
- package/mem-cli.mjs +54 -257
- package/package.json +6 -2
- package/scripts/pre-tool-recall.js +69 -3
- package/server.mjs +63 -273
- package/source-files.mjs +13 -0
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"plugins": [
|
|
11
11
|
{
|
|
12
12
|
"name": "claude-mem-lite",
|
|
13
|
-
"version": "
|
|
13
|
+
"version": "3.0.0",
|
|
14
14
|
"source": "./",
|
|
15
15
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark)."
|
|
16
16
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.0",
|
|
4
4
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "sdsrss"
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
// lib/file-intel.mjs — pure, zero-dependency builder for the PreToolUse:Read
|
|
2
|
+
// "file intelligence" injection (feature ①). Before Claude reads a file, surface
|
|
3
|
+
// its approximate token size + a one-line "what's in it" so the agent can decide
|
|
4
|
+
// to read fully, read a slice, or grep instead.
|
|
5
|
+
//
|
|
6
|
+
// Imported by the hot standalone scripts/pre-tool-recall.js, so it MUST stay
|
|
7
|
+
// dependency-free and cheap: one bounded file read + regex, no heavy imports.
|
|
8
|
+
// (Lesson #8447: fast-path scripts can't pull in utils.mjs, which drags in
|
|
9
|
+
// child_process/nlp/scoring-sql.) estimateContentTokens is therefore a hand-
|
|
10
|
+
// mirror of utils.estimateTokens; tests/file-intel.test.mjs pins the two so a
|
|
11
|
+
// change to the canonical estimator surfaces as a failing mirror test.
|
|
12
|
+
|
|
13
|
+
import { statSync, openSync, readSync, closeSync } from 'fs';
|
|
14
|
+
import { basename as pathBasename } from 'path';
|
|
15
|
+
|
|
16
|
+
const SUMMARY_MAX = 80;
|
|
17
|
+
const DEFAULT_MIN_TOKENS = 800;
|
|
18
|
+
const DEFAULT_MAX_READ_BYTES = 24 * 1024;
|
|
19
|
+
|
|
20
|
+
// Mirror of utils.estimateTokens (ASCII ~4 chars/token, CJK ~1.5). Kept local so
|
|
21
|
+
// the standalone hook stays lean — see file header + the mirror test.
|
|
22
|
+
export function estimateContentTokens(text) {
|
|
23
|
+
const s = text || '';
|
|
24
|
+
if (!s) return 1;
|
|
25
|
+
let cjkCount = 0;
|
|
26
|
+
for (let i = 0; i < s.length; i++) {
|
|
27
|
+
const c = s.charCodeAt(i);
|
|
28
|
+
if ((c >= 0x4e00 && c <= 0x9fff) || (c >= 0x3400 && c <= 0x4dbf) ||
|
|
29
|
+
(c >= 0x3000 && c <= 0x303f) || (c >= 0xff00 && c <= 0xffef) ||
|
|
30
|
+
(c >= 0xac00 && c <= 0xd7af)) {
|
|
31
|
+
cjkCount++;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
const asciiLen = s.length - cjkCount;
|
|
35
|
+
return Math.max(1, Math.ceil(asciiLen / 4) + Math.ceil(cjkCount / 1.5));
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
// 850 → "850", 6100 → "6.1k", 12000 → "12k".
|
|
39
|
+
export function humanTokens(n) {
|
|
40
|
+
if (n < 1000) return String(n);
|
|
41
|
+
if (n < 10000) return (n / 1000).toFixed(1) + 'k';
|
|
42
|
+
return Math.round(n / 1000) + 'k';
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function cap(s) {
|
|
46
|
+
const t = s.replace(/\s+/g, ' ').trim();
|
|
47
|
+
return t.length <= SUMMARY_MAX ? t : t.slice(0, SUMMARY_MAX - 1) + '…';
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function isGenericComment(text) {
|
|
51
|
+
if (/^[-=*_#·]{3,}$/.test(text)) return true;
|
|
52
|
+
const l = text.toLowerCase();
|
|
53
|
+
return l.startsWith('eslint') || l.startsWith('prettier') || l.startsWith('tslint') ||
|
|
54
|
+
l.startsWith('stylelint') || l.startsWith('istanbul') || l.startsWith('c8 ') ||
|
|
55
|
+
l.startsWith('copyright') || l.startsWith('license') || l.startsWith('spdx') ||
|
|
56
|
+
l.startsWith('use strict') || l.startsWith('@') || l.startsWith('global ') ||
|
|
57
|
+
l.startsWith('generated') || l.startsWith('auto-generated') || l.startsWith('nolint');
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// First meaningful header comment in the first 15 lines, skipping blanks,
|
|
61
|
+
// shebangs, and boilerplate (eslint/license/etc). Stops at the first real code
|
|
62
|
+
// line so we don't scan deep into the body.
|
|
63
|
+
function extractHeaderComment(content) {
|
|
64
|
+
const lines = content.split('\n');
|
|
65
|
+
const limit = Math.min(lines.length, 15);
|
|
66
|
+
for (let i = 0; i < limit; i++) {
|
|
67
|
+
const t = lines[i].trim();
|
|
68
|
+
if (!t) continue;
|
|
69
|
+
if (t.startsWith('#!')) continue; // shebang
|
|
70
|
+
const m = t.match(/^(?:\/\/\/?|#|--|\/\*\*?|\*)\s*(.+)/);
|
|
71
|
+
if (m) {
|
|
72
|
+
const text = m[1].replace(/\*\/\s*$/, '').trim();
|
|
73
|
+
if (text.length > 4 && !isGenericComment(text)) return text;
|
|
74
|
+
continue;
|
|
75
|
+
}
|
|
76
|
+
break; // real code line — no header comment
|
|
77
|
+
}
|
|
78
|
+
return '';
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function extractExports(content) {
|
|
82
|
+
const names = [];
|
|
83
|
+
const re = /export\s+(?:default\s+)?(?:async\s+)?(?:function\*?|class|const|let|var|interface|type|enum)\s+(\w+)/g;
|
|
84
|
+
let m;
|
|
85
|
+
while ((m = re.exec(content)) !== null) {
|
|
86
|
+
if (!names.includes(m[1])) names.push(m[1]);
|
|
87
|
+
}
|
|
88
|
+
if (names.length === 0) return '';
|
|
89
|
+
const shown = names.slice(0, 5).join(', ');
|
|
90
|
+
return names.length > 5 ? `Exports ${shown} + ${names.length - 5} more` : `Exports ${shown}`;
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Best-effort one-line "what's in it". '' when nothing useful is found.
|
|
94
|
+
export function extractFileSummary(content, filename) {
|
|
95
|
+
const src = content || '';
|
|
96
|
+
if (!src.trim()) return '';
|
|
97
|
+
const name = (filename || '').toLowerCase();
|
|
98
|
+
const dot = name.lastIndexOf('.');
|
|
99
|
+
const ext = dot >= 0 ? name.slice(dot) : '';
|
|
100
|
+
|
|
101
|
+
if (ext === '.md' || ext === '.mdx') {
|
|
102
|
+
const m = src.match(/^#{1,6}\s+(.+)$/m);
|
|
103
|
+
if (m) return cap(m[1]);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
if (ext === '.json') {
|
|
107
|
+
try {
|
|
108
|
+
const obj = JSON.parse(src);
|
|
109
|
+
if (obj && typeof obj.description === 'string' && obj.description.trim()) return cap(obj.description);
|
|
110
|
+
if (obj && typeof obj.name === 'string' && obj.name.trim()) return cap(obj.name);
|
|
111
|
+
} catch { /* partial / invalid JSON — no summary */ }
|
|
112
|
+
return '';
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
const hdr = extractHeaderComment(src);
|
|
116
|
+
if (hdr) return cap(hdr);
|
|
117
|
+
|
|
118
|
+
if (['.js', '.mjs', '.cjs', '.ts', '.tsx', '.jsx'].includes(ext)) {
|
|
119
|
+
const exp = extractExports(src);
|
|
120
|
+
if (exp) return cap(exp);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
return '';
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export function formatFileIntelLine({ basename, tokens, summary }) {
|
|
127
|
+
const head = `[mem] 📄 ${basename} ~${humanTokens(tokens)} tok`;
|
|
128
|
+
return summary ? `${head} · ${summary}` : head;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// IO wrapper: returns the formatted intel line for filePath, or null when the
|
|
132
|
+
// file is unreadable or below the token threshold. Never throws — it runs inside
|
|
133
|
+
// a PreToolUse hook that must always exit 0.
|
|
134
|
+
export function fileIntelFor(filePath, opts = {}) {
|
|
135
|
+
const minTokens = opts.minTokens ?? DEFAULT_MIN_TOKENS;
|
|
136
|
+
const maxReadBytes = opts.maxReadBytes ?? DEFAULT_MAX_READ_BYTES;
|
|
137
|
+
|
|
138
|
+
let size;
|
|
139
|
+
try {
|
|
140
|
+
const st = statSync(filePath);
|
|
141
|
+
if (!st.isFile()) return null;
|
|
142
|
+
size = st.size;
|
|
143
|
+
} catch { return null; }
|
|
144
|
+
|
|
145
|
+
try {
|
|
146
|
+
const fd = openSync(filePath, 'r');
|
|
147
|
+
try {
|
|
148
|
+
const buf = Buffer.allocUnsafe(Math.min(size, maxReadBytes));
|
|
149
|
+
const n = buf.length > 0 ? readSync(fd, buf, 0, buf.length, 0) : 0;
|
|
150
|
+
const sample = buf.subarray(0, n).toString('utf8');
|
|
151
|
+
// Files within the read window are estimated exactly; larger files estimate
|
|
152
|
+
// from byte size (≈4 ASCII bytes/token). The '~' already signals approximation
|
|
153
|
+
// and we never slurp a multi-MB file inside a hook.
|
|
154
|
+
const tokens = size <= maxReadBytes ? estimateContentTokens(sample) : Math.ceil(size / 4);
|
|
155
|
+
if (tokens < minTokens) return null;
|
|
156
|
+
const summary = extractFileSummary(sample, filePath);
|
|
157
|
+
return formatFileIntelLine({ basename: pathBasename(filePath), tokens, summary });
|
|
158
|
+
} finally { closeSync(fd); }
|
|
159
|
+
} catch { return null; }
|
|
160
|
+
}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
// lib/reread-guard.mjs — pure logic + one IO helper for feature ② (repeated-read
|
|
2
|
+
// guard). When the agent does a full Read of a file it already read this session
|
|
3
|
+
// and the file is unchanged, nudge it to reuse what it has instead of re-slurping.
|
|
4
|
+
//
|
|
5
|
+
// Imported by the hot standalone scripts/pre-tool-recall.js — stays light, reuses
|
|
6
|
+
// the token estimator from ./file-intel.mjs (also pure). Never throws.
|
|
7
|
+
//
|
|
8
|
+
// False-positive guards (the bit OpenWolf's equivalent omits — its "unless
|
|
9
|
+
// modified" lives only in instructions, not the hook):
|
|
10
|
+
// - full-vs-full only: paging with offset/limit never warns
|
|
11
|
+
// - mtime check: a file changed since the prior read never warns
|
|
12
|
+
// - token floor: re-reading a tiny file is cheap, not worth a nudge
|
|
13
|
+
|
|
14
|
+
import { statSync, openSync, readSync, closeSync } from 'fs';
|
|
15
|
+
import { estimateContentTokens, humanTokens } from './file-intel.mjs';
|
|
16
|
+
|
|
17
|
+
const DEFAULT_MIN_TOKENS = 600;
|
|
18
|
+
const DEFAULT_MAX_READ_BYTES = 24 * 1024;
|
|
19
|
+
|
|
20
|
+
// IO: { mtimeMs, tokens } for an on-disk file, or null. Never throws.
|
|
21
|
+
export function readFileMeta(filePath, maxReadBytes = DEFAULT_MAX_READ_BYTES) {
|
|
22
|
+
let st;
|
|
23
|
+
try {
|
|
24
|
+
st = statSync(filePath);
|
|
25
|
+
if (!st.isFile()) return null;
|
|
26
|
+
} catch { return null; }
|
|
27
|
+
|
|
28
|
+
const size = st.size;
|
|
29
|
+
if (size > maxReadBytes) {
|
|
30
|
+
return { mtimeMs: st.mtimeMs, tokens: Math.ceil(size / 4) };
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
const fd = openSync(filePath, 'r');
|
|
34
|
+
try {
|
|
35
|
+
const buf = Buffer.allocUnsafe(size);
|
|
36
|
+
const n = size > 0 ? readSync(fd, buf, 0, size, 0) : 0;
|
|
37
|
+
return { mtimeMs: st.mtimeMs, tokens: estimateContentTokens(buf.subarray(0, n).toString('utf8')) };
|
|
38
|
+
} finally { closeSync(fd); }
|
|
39
|
+
} catch { return null; }
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// Pure: should a repeat read warn? recorded = { mtimeMs, tokens, full }.
|
|
43
|
+
export function shouldWarnReread(recorded, currentMtimeMs, isFullRead, minTokens = DEFAULT_MIN_TOKENS) {
|
|
44
|
+
if (!recorded || typeof recorded !== 'object') return false;
|
|
45
|
+
if (!recorded.full || !isFullRead) return false; // only full-vs-full re-reads
|
|
46
|
+
if (!(recorded.tokens >= minTokens)) return false; // big enough to matter
|
|
47
|
+
if (currentMtimeMs === null || currentMtimeMs === undefined) return false;
|
|
48
|
+
return currentMtimeMs <= recorded.mtimeMs; // unchanged since last read
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Pure: the warning line (no framing — the hook prepends the shared framing line).
|
|
52
|
+
export function buildRereadWarning(basename, tokens) {
|
|
53
|
+
return `[mem] 🔁 You already read ${basename} this session (~${humanTokens(tokens)} tok, unchanged) `
|
|
54
|
+
+ `— reuse what you have instead of re-reading; pass offset/limit if you need a specific part.`;
|
|
55
|
+
}
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
// Shared cross-source search core (query build / source queries / scoring
|
|
2
|
+
// normalization / sort / pagination math).
|
|
3
|
+
//
|
|
4
|
+
// Single source of truth for cmdSearch (CLI) and mem_search (MCP). The
|
|
5
|
+
// observation path already converged in search-engine.mjs (#8198/#8212); the
|
|
6
|
+
// sessions/prompts FTS queries, CJK precision + LIKE fallback, cross-source
|
|
7
|
+
// score normalization, user-sort, over-fetch sizing, and date-bound parsing
|
|
8
|
+
// were still copy-pasted and synced by "paired-path" comments — the drift
|
|
9
|
+
// class compress-core (ARCH-1), recall-core, and timeline-core were extracted
|
|
10
|
+
// to close. Call sites keep what legitimately differs: flag/schema parsing,
|
|
11
|
+
// result-row dialect (CLI `_source`+raw columns vs MCP `source`+mapped
|
|
12
|
+
// fields), error-message wording, and output rendering.
|
|
13
|
+
//
|
|
14
|
+
// Behavioral asymmetries that are PRESERVED, not converged (documented so a
|
|
15
|
+
// future "fix" is a deliberate contract change, not an accident):
|
|
16
|
+
// • CLI forces source=observations when --type/--tier/--importance/--branch
|
|
17
|
+
// is set; MCP only forces it for obs_type.
|
|
18
|
+
// • CLI warns on inverted --from/--to ranges; MCP does not.
|
|
19
|
+
// • CLI wraps session/prompt FTS in try/catch for pre-FTS legacy DBs.
|
|
20
|
+
|
|
21
|
+
import { sanitizeFtsQuery, relaxFtsQueryToOr, SESS_BM25, DEFAULT_DECAY_HALF_LIFE_MS } from '../utils.mjs';
|
|
22
|
+
import { cjkPrecisionOk, extractCjkLikePatterns } from '../nlp.mjs';
|
|
23
|
+
import { computeTier } from '../tier.mjs';
|
|
24
|
+
|
|
25
|
+
/** Sanitize a user query to FTS5 syntax; optionally force OR semantics. */
|
|
26
|
+
export function buildSearchFtsQuery(query, { or = false } = {}) {
|
|
27
|
+
let ftsQuery = sanitizeFtsQuery(query);
|
|
28
|
+
if (ftsQuery && or) ftsQuery = relaxFtsQueryToOr(ftsQuery) || ftsQuery;
|
|
29
|
+
return ftsQuery;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Parse from/to date bounds to epoch ms. Date-only `to` (YYYY-MM-DD) extends
|
|
34
|
+
* to end-of-day so "to 2026-06-12" includes that day's rows.
|
|
35
|
+
* @returns {{ ok: true, epochFrom: number|null, epochTo: number|null }
|
|
36
|
+
* | { ok: false, bad: 'from'|'to', value: string }}
|
|
37
|
+
*/
|
|
38
|
+
export function parseDateBounds(fromRaw, toRaw) {
|
|
39
|
+
const epochFrom = fromRaw ? new Date(fromRaw).getTime() : null;
|
|
40
|
+
let epochTo = toRaw ? new Date(toRaw).getTime() : null;
|
|
41
|
+
if (epochTo !== null && toRaw && /^\d{4}-\d{2}-\d{2}$/.test(toRaw)) {
|
|
42
|
+
epochTo += 86400000 - 1; // extend to 23:59:59.999
|
|
43
|
+
}
|
|
44
|
+
if (epochFrom !== null && isNaN(epochFrom)) return { ok: false, bad: 'from', value: fromRaw };
|
|
45
|
+
if (epochTo !== null && isNaN(epochTo)) return { ok: false, bad: 'to', value: toRaw };
|
|
46
|
+
return { ok: true, epochFrom, epochTo };
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Over-fetch window: every source fetches from offset 0 and the caller slices
|
|
51
|
+
* [offset, offset+limit) exactly ONCE post-merge. Pushing OFFSET into the
|
|
52
|
+
* per-source SQL double-applied it and gapped/overlapped pages, because the
|
|
53
|
+
* obs hybrid path (AND→OR fallback / vector / concept stages) re-adds rows the
|
|
54
|
+
* SQL OFFSET already skipped (#8217/#8638).
|
|
55
|
+
*/
|
|
56
|
+
export function computePerSourceWindow(limit, offset) {
|
|
57
|
+
return { perSourceLimit: Math.max(limit * 3, offset + limit + 10), perSourceOffset: 0 };
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
/** obs-side total query: when the AND→OR fallback fired, count the OR set. */
|
|
61
|
+
export function effectiveObsFtsQuery(ftsQuery, orFallbackFired) {
|
|
62
|
+
return orFallbackFired ? (relaxFtsQueryToOr(ftsQuery) || ftsQuery) : ftsQuery;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Session FTS search with recency decay + same-project boost. Returns raw SQL
|
|
67
|
+
* rows: { id, request, completed, project, created_at, created_at_epoch, score }.
|
|
68
|
+
* `projectBoost` is the inferred current project, only applied when the caller
|
|
69
|
+
* did NOT filter by project explicitly (pass null then).
|
|
70
|
+
*/
|
|
71
|
+
export function searchSessionsFts(db, { ftsQuery, project = null, projectBoost = null, epochFrom = null, epochTo = null, perSourceLimit, perSourceOffset = 0 }) {
|
|
72
|
+
const wheres = ['session_summaries_fts MATCH ?'];
|
|
73
|
+
const params = [Date.now(), projectBoost, projectBoost, ftsQuery];
|
|
74
|
+
if (project) { wheres.push('s.project = ?'); params.push(project); }
|
|
75
|
+
if (epochFrom !== null) { wheres.push('s.created_at_epoch >= ?'); params.push(epochFrom); }
|
|
76
|
+
if (epochTo !== null) { wheres.push('s.created_at_epoch <= ?'); params.push(epochTo); }
|
|
77
|
+
params.push(perSourceLimit, perSourceOffset);
|
|
78
|
+
return db.prepare(`
|
|
79
|
+
SELECT s.id, s.request, s.completed, s.project, s.created_at, s.created_at_epoch,
|
|
80
|
+
${SESS_BM25}
|
|
81
|
+
* (1.0 + EXP(-0.693 * (? - s.created_at_epoch) / ${DEFAULT_DECAY_HALF_LIFE_MS}.0))
|
|
82
|
+
* (CASE WHEN ? IS NOT NULL AND s.project = ? THEN 2.0 ELSE 1.0 END) as score
|
|
83
|
+
FROM session_summaries_fts
|
|
84
|
+
JOIN session_summaries s ON session_summaries_fts.rowid = s.id
|
|
85
|
+
WHERE ${wheres.join(' AND ')}
|
|
86
|
+
ORDER BY score
|
|
87
|
+
LIMIT ? OFFSET ?
|
|
88
|
+
`).all(...params);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Prompt FTS search with CJK precision gate + CJK LIKE fallback. Returns raw
|
|
93
|
+
* SQL rows: { id, prompt_text, content_session_id, created_at,
|
|
94
|
+
* created_at_epoch, score } (fallback rows carry score = 0).
|
|
95
|
+
*
|
|
96
|
+
* The precision gate applies to BOTH paths: unicode61 degrades CJK bigram
|
|
97
|
+
* queries to single-char AND, and the LIKE fallback is an OR'd substring scan
|
|
98
|
+
* — without the gate each re-admits the common-char noise band the other
|
|
99
|
+
* dropped (that asymmetry was the actual leak source: FTS returned 0,
|
|
100
|
+
* fallback filled 20).
|
|
101
|
+
*/
|
|
102
|
+
export function searchPromptsFts(db, { query, ftsQuery, project = null, epochFrom = null, epochTo = null, perSourceLimit, perSourceOffset = 0 }) {
|
|
103
|
+
const wheres = ['user_prompts_fts MATCH ?', "p.prompt_text NOT LIKE '<task-notification>%'"];
|
|
104
|
+
const params = [ftsQuery];
|
|
105
|
+
if (project) { wheres.push('s.project = ?'); params.push(project); }
|
|
106
|
+
if (epochFrom !== null) { wheres.push('p.created_at_epoch >= ?'); params.push(epochFrom); }
|
|
107
|
+
if (epochTo !== null) { wheres.push('p.created_at_epoch <= ?'); params.push(epochTo); }
|
|
108
|
+
params.push(perSourceLimit, perSourceOffset);
|
|
109
|
+
const rows = db.prepare(`
|
|
110
|
+
SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch,
|
|
111
|
+
bm25(user_prompts_fts, 1) as score
|
|
112
|
+
FROM user_prompts_fts
|
|
113
|
+
JOIN user_prompts p ON user_prompts_fts.rowid = p.id
|
|
114
|
+
JOIN sdk_sessions s ON p.content_session_id = s.content_session_id
|
|
115
|
+
WHERE ${wheres.join(' AND ')}
|
|
116
|
+
ORDER BY score
|
|
117
|
+
LIMIT ? OFFSET ?
|
|
118
|
+
`).all(...params);
|
|
119
|
+
const kept = query ? rows.filter((r) => cjkPrecisionOk(query, r.prompt_text)) : rows;
|
|
120
|
+
if (kept.length > 0 || !query) return kept;
|
|
121
|
+
|
|
122
|
+
// CJK LIKE fallback: FTS5 unicode61 can't tokenize CJK substrings in prompts
|
|
123
|
+
const cjkPatterns = extractCjkLikePatterns(query);
|
|
124
|
+
if (cjkPatterns.length === 0) return kept;
|
|
125
|
+
const likeConds = cjkPatterns.map(() => 'p.prompt_text LIKE ?');
|
|
126
|
+
const likeParams = cjkPatterns.map((p) => `%${p}%`);
|
|
127
|
+
if (project) likeParams.push(project);
|
|
128
|
+
if (epochFrom !== null) likeParams.push(epochFrom);
|
|
129
|
+
if (epochTo !== null) likeParams.push(epochTo);
|
|
130
|
+
likeParams.push(perSourceLimit, perSourceOffset);
|
|
131
|
+
const fallbackRows = db.prepare(`
|
|
132
|
+
SELECT p.id, p.prompt_text, p.content_session_id, p.created_at, p.created_at_epoch
|
|
133
|
+
FROM user_prompts p
|
|
134
|
+
JOIN sdk_sessions s ON p.content_session_id = s.content_session_id
|
|
135
|
+
WHERE (${likeConds.join(' OR ')})
|
|
136
|
+
AND p.prompt_text NOT LIKE '<task-notification>%'
|
|
137
|
+
${project ? 'AND s.project = ?' : ''}
|
|
138
|
+
${epochFrom !== null ? 'AND p.created_at_epoch >= ?' : ''}
|
|
139
|
+
${epochTo !== null ? 'AND p.created_at_epoch <= ?' : ''}
|
|
140
|
+
ORDER BY p.created_at_epoch DESC
|
|
141
|
+
LIMIT ? OFFSET ?
|
|
142
|
+
`).all(...likeParams);
|
|
143
|
+
return fallbackRows
|
|
144
|
+
.filter((r) => cjkPrecisionOk(query, r.prompt_text))
|
|
145
|
+
.map((r) => ({ ...r, score: 0 }));
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Normalize each source's BM25 scores to [-1, 0] before cross-source merge.
|
|
150
|
+
* Prevents observations (BM25 can reach -40) from systematically outranking
|
|
151
|
+
* sessions (-6) and prompts (-1) regardless of relevance. Sources with a
|
|
152
|
+
* single scored row are skipped — normalizing would inflate a weak match to
|
|
153
|
+
* -1.0. Mutates `results` in place; callers re-sort afterwards.
|
|
154
|
+
*/
|
|
155
|
+
export function normalizeCrossSourceScores(results, sourceKey) {
|
|
156
|
+
for (const src of ['obs', 'session', 'prompt']) {
|
|
157
|
+
const srcResults = results.filter((r) => r[sourceKey] === src && r.score !== null && r.score !== undefined);
|
|
158
|
+
if (srcResults.length < 2) continue;
|
|
159
|
+
const maxAbs = Math.max(...srcResults.map((r) => Math.abs(r.score)));
|
|
160
|
+
if (maxAbs > 0) {
|
|
161
|
+
for (const r of srcResults) r.score = r.score / maxAbs;
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
/**
|
|
167
|
+
* Apply the user-requested sort AFTER relevance scoring. 'relevance' is a
|
|
168
|
+
* no-op — BM25 score order is already in place from the merge sort.
|
|
169
|
+
*/
|
|
170
|
+
export function applyUserSort(results, sort) {
|
|
171
|
+
if (sort === 'time') {
|
|
172
|
+
results.sort((a, b) => (b.created_at_epoch ?? 0) - (a.created_at_epoch ?? 0));
|
|
173
|
+
} else if (sort === 'importance') {
|
|
174
|
+
results.sort((a, b) => (b.importance ?? 1) - (a.importance ?? 1) || (b.created_at_epoch ?? 0) - (a.created_at_epoch ?? 0));
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
/**
|
|
179
|
+
* Tier post-filter: batch-lookup full obs rows and keep only those whose
|
|
180
|
+
* computed tier matches. Non-obs rows pass through untouched. Classification
|
|
181
|
+
* uses the explicitly-requested project when given — CWD-inferred fallback
|
|
182
|
+
* breaks computeTier's "obs.project === currentProject" rules on
|
|
183
|
+
* cross-project searches and silently drops valid rows.
|
|
184
|
+
* @returns filtered array (input is not mutated)
|
|
185
|
+
*/
|
|
186
|
+
export function applyTierFilter(db, results, { tier, sourceKey, currentProject }) {
|
|
187
|
+
const obsIds = results.filter((r) => r[sourceKey] === 'obs').map((r) => r.id);
|
|
188
|
+
if (obsIds.length === 0) return results;
|
|
189
|
+
const placeholders = obsIds.map(() => '?').join(',');
|
|
190
|
+
const fullRows = db.prepare(
|
|
191
|
+
`SELECT id, compressed_into, superseded_at, memory_session_id, project, importance, last_accessed_at, created_at_epoch, type FROM observations WHERE id IN (${placeholders})`
|
|
192
|
+
).all(...obsIds);
|
|
193
|
+
const rowMap = new Map(fullRows.map((r) => [r.id, r]));
|
|
194
|
+
const tierCtx = { now: Date.now(), currentProject, currentSessionId: '' };
|
|
195
|
+
return results.filter((r) => {
|
|
196
|
+
if (r[sourceKey] !== 'obs') return true;
|
|
197
|
+
const full = rowMap.get(r.id);
|
|
198
|
+
return full && computeTier(full, tierCtx) === tier;
|
|
199
|
+
});
|
|
200
|
+
}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
// Shared "timeline around an anchor" core.
|
|
2
|
+
//
|
|
3
|
+
// Single source of truth for cmdTimeline (CLI) and mem_timeline (MCP). Pre-
|
|
4
|
+
// extraction the anchor-resolution ladder (P#/S# token → nearest obs,
|
|
5
|
+
// bare int → obs with compressed_into re-anchor → prompt/session fallback),
|
|
6
|
+
// the query-anchor wrapper around findFtsAnchor, and the before/after window
|
|
7
|
+
// queries were copy-pasted across both and kept in sync by hand-written
|
|
8
|
+
// "aligned with" comments — the same drift vector compress-core (ARCH-1) and
|
|
9
|
+
// recall-core were extracted to close. Call sites keep what legitimately
|
|
10
|
+
// differs: argument parsing, output rendering (CLI relativeTime text / JSON vs
|
|
11
|
+
// MCP fmtDate lines), and error-message dialect (formatAnchorError owns both
|
|
12
|
+
// dialects so the wording cannot drift independently).
|
|
13
|
+
|
|
14
|
+
import { parseIdToken } from './id-routing.mjs';
|
|
15
|
+
import { findFtsAnchor } from '../search-engine.mjs';
|
|
16
|
+
import { sanitizeFtsQuery } from '../utils.mjs';
|
|
17
|
+
|
|
18
|
+
const TIMELINE_COLS = 'id, type, title, subtitle, project, created_at, created_at_epoch';
|
|
19
|
+
|
|
20
|
+
/** Nearest non-compressed observation to `epoch` (optionally project-scoped). */
|
|
21
|
+
function nearestObservation(db, epoch, project) {
|
|
22
|
+
return db.prepare(`
|
|
23
|
+
SELECT id FROM observations
|
|
24
|
+
WHERE COALESCE(compressed_into, 0) = 0 ${project ? 'AND project = ?' : ''}
|
|
25
|
+
ORDER BY ABS(created_at_epoch - ?) ASC LIMIT 1
|
|
26
|
+
`).get(...(project ? [project, epoch] : [epoch]));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Resolve a raw anchor token (number, "N", "#N", "P#N", "S#N") to an
|
|
31
|
+
* observation id. Prompt/session anchors resolve to the nearest-in-time
|
|
32
|
+
* observation so before/after semantics still apply; compressed observations
|
|
33
|
+
* re-anchor to their live parent (negative sentinels error — no canonical
|
|
34
|
+
* parent); bare ints that miss observations fall back to prompt, then session.
|
|
35
|
+
*
|
|
36
|
+
* @returns {{ ok: true, anchorId: number, anchorNote: string|null }
|
|
37
|
+
* | { ok: false, error: object }} — render error via formatAnchorError
|
|
38
|
+
*/
|
|
39
|
+
export function resolveAnchorToken(db, rawAnchor, { project = null } = {}) {
|
|
40
|
+
const parsed = parseIdToken(rawAnchor);
|
|
41
|
+
if (!parsed) {
|
|
42
|
+
return { ok: false, error: { code: 'invalid-token', raw: rawAnchor } };
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (parsed.source === 'prompt' || parsed.source === 'session') {
|
|
46
|
+
const srcTable = parsed.source === 'prompt' ? 'user_prompts' : 'session_summaries';
|
|
47
|
+
const srcPrefix = parsed.source === 'prompt' ? 'P#' : 'S#';
|
|
48
|
+
const srcName = parsed.source === 'prompt' ? 'Prompt' : 'Session';
|
|
49
|
+
const row = db.prepare(`SELECT created_at_epoch FROM ${srcTable} WHERE id = ?`).get(parsed.id);
|
|
50
|
+
if (!row) return { ok: false, error: { code: 'source-not-found', name: srcName, prefix: srcPrefix, id: parsed.id } };
|
|
51
|
+
const nearest = nearestObservation(db, row.created_at_epoch, project);
|
|
52
|
+
if (!nearest) return { ok: false, error: { code: 'no-obs-near', prefix: srcPrefix, id: parsed.id } };
|
|
53
|
+
return { ok: true, anchorId: nearest.id, anchorNote: `(anchored to #${nearest.id}, closest obs to ${srcPrefix}${parsed.id})` };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Bare "#N" or "N" — observation first. Route compressed obs to its live
|
|
57
|
+
// parent so the window (which filters compressed) isn't shown around a dead
|
|
58
|
+
// record; negative sentinels (-1 dropped, -2 pending purge) have no parent.
|
|
59
|
+
const obsRow = db.prepare('SELECT compressed_into FROM observations WHERE id = ?').get(parsed.id);
|
|
60
|
+
if (obsRow) {
|
|
61
|
+
const ci = obsRow.compressed_into;
|
|
62
|
+
if (ci && ci > 0) {
|
|
63
|
+
return { ok: true, anchorId: ci, anchorNote: `(anchored to #${ci}, #${parsed.id} was compressed into it)` };
|
|
64
|
+
}
|
|
65
|
+
if (ci && ci < 0) {
|
|
66
|
+
return { ok: false, error: { code: 'compressed-pruned', id: parsed.id } };
|
|
67
|
+
}
|
|
68
|
+
return { ok: true, anchorId: parsed.id, anchorNote: null };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Fall back to user_prompts then session_summaries so pasted P#/S# ids still
|
|
72
|
+
// work when the prefix is omitted — matches prefix-aware routing in search/probe.
|
|
73
|
+
const promptRow = db.prepare('SELECT created_at_epoch FROM user_prompts WHERE id = ?').get(parsed.id);
|
|
74
|
+
const sessionRow = promptRow ? null : db.prepare('SELECT created_at_epoch FROM session_summaries WHERE id = ?').get(parsed.id);
|
|
75
|
+
const hit = promptRow ? { row: promptRow, prefix: 'P#', name: 'prompt' }
|
|
76
|
+
: sessionRow ? { row: sessionRow, prefix: 'S#', name: 'session' }
|
|
77
|
+
: null;
|
|
78
|
+
if (!hit) return { ok: false, error: { code: 'id-not-found', id: parsed.id } };
|
|
79
|
+
const nearest = nearestObservation(db, hit.row.created_at_epoch, project);
|
|
80
|
+
if (!nearest) return { ok: false, error: { code: 'no-obs-near', prefix: hit.prefix, id: parsed.id, srcName: hit.name } };
|
|
81
|
+
return { ok: true, anchorId: nearest.id, anchorNote: `(anchored to #${nearest.id}, closest obs to ${hit.prefix}${parsed.id})` };
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/**
|
|
85
|
+
* Render a resolveAnchorToken error in either caller dialect. Owning BOTH
|
|
86
|
+
* renderings here is deliberate: the strings are regression-anchored on each
|
|
87
|
+
* side (tests/cli.test.mjs, tests/server.test.mjs) and previously drifted only
|
|
88
|
+
* in prefix/period; one table keeps the divergence explicit and frozen.
|
|
89
|
+
*
|
|
90
|
+
* cli: "[mem] "-prefixed, no trailing period, flag spelled "--anchor".
|
|
91
|
+
* mcp: bare sentence with trailing period.
|
|
92
|
+
*/
|
|
93
|
+
export function formatAnchorError(error, dialect) {
|
|
94
|
+
const cli = dialect === 'cli';
|
|
95
|
+
switch (error.code) {
|
|
96
|
+
case 'invalid-token':
|
|
97
|
+
return cli
|
|
98
|
+
? `[mem] Invalid --anchor "${error.raw}". Expected N, #N, P#N, or S#N.`
|
|
99
|
+
: `Invalid anchor "${error.raw}". Expected N, #N, P#N, or S#N.`;
|
|
100
|
+
case 'source-not-found':
|
|
101
|
+
return cli
|
|
102
|
+
? `[mem] ${error.name} ${error.prefix}${error.id} not found`
|
|
103
|
+
: `${error.name} ${error.prefix}${error.id} not found.`;
|
|
104
|
+
case 'no-obs-near': {
|
|
105
|
+
const suffix = error.srcName ? ` (${error.srcName})` : '';
|
|
106
|
+
return cli
|
|
107
|
+
? `[mem] No observations near ${error.prefix}${error.id}${suffix}`
|
|
108
|
+
: `No observations near ${error.prefix}${error.id}${suffix}.`;
|
|
109
|
+
}
|
|
110
|
+
case 'compressed-pruned':
|
|
111
|
+
return cli
|
|
112
|
+
? `[mem] Observation #${error.id} was compressed and pruned; no canonical anchor available`
|
|
113
|
+
: `Observation #${error.id} was compressed and pruned; no canonical anchor available.`;
|
|
114
|
+
case 'id-not-found':
|
|
115
|
+
return cli
|
|
116
|
+
? `[mem] Observation, prompt, or session with id ${error.id} not found`
|
|
117
|
+
: `Observation, prompt, or session with id ${error.id} not found.`;
|
|
118
|
+
default:
|
|
119
|
+
return cli ? `[mem] Anchor resolution failed` : 'Anchor resolution failed.';
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
/**
|
|
124
|
+
* Query-based anchor: route through shared findFtsAnchor so CLI
|
|
125
|
+
* `timeline --query` and MCP mem_timeline keep identical AND→OR fallback
|
|
126
|
+
* semantics (#8217). Returns null when the query sanitizes to nothing or
|
|
127
|
+
* matches no row; anchorNote is set only when the OR relaxation fired.
|
|
128
|
+
*/
|
|
129
|
+
export function resolveQueryAnchor(db, queryStr, { project = null } = {}) {
|
|
130
|
+
const ftsQuery = sanitizeFtsQuery(queryStr);
|
|
131
|
+
const found = findFtsAnchor(db, { ftsQuery, project });
|
|
132
|
+
if (!found) return null;
|
|
133
|
+
return {
|
|
134
|
+
anchorId: found.id,
|
|
135
|
+
anchorNote: found.relaxed
|
|
136
|
+
? `(query "${queryStr}" relaxed AND→OR — no row matched all terms)`
|
|
137
|
+
: null,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
/** No-anchor fallback: most recent non-compressed observations, newest first. */
|
|
142
|
+
export function fetchRecentTimeline(db, { project = null, limit }) {
|
|
143
|
+
const compressedFilter = 'COALESCE(compressed_into, 0) = 0';
|
|
144
|
+
const where = project ? `WHERE ${compressedFilter} AND project = ?` : `WHERE ${compressedFilter}`;
|
|
145
|
+
const params = project ? [project, limit] : [limit];
|
|
146
|
+
return db.prepare(`
|
|
147
|
+
SELECT ${TIMELINE_COLS}
|
|
148
|
+
FROM observations ${where}
|
|
149
|
+
ORDER BY created_at_epoch DESC
|
|
150
|
+
LIMIT ?
|
|
151
|
+
`).all(...params);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Fetch the before/after window around a resolved anchor id. Bumps the
|
|
156
|
+
* anchor's access_count (read-path popularity signal), and auto-scopes to the
|
|
157
|
+
* anchor's project when the caller didn't pass one — "timeline around #N"
|
|
158
|
+
* means same-project context, not cross-project time-bleed.
|
|
159
|
+
*
|
|
160
|
+
* @returns {null | { anchor, beforeRows, afterRows, effectiveProject }}
|
|
161
|
+
* null when the anchor row vanished (e.g. deleted between resolve and fetch).
|
|
162
|
+
* beforeRows are CHRONOLOGICAL (oldest→newest) — callers no longer reverse.
|
|
163
|
+
*/
|
|
164
|
+
export function fetchTimelineWindow(db, anchorId, { before, after, project = null }) {
|
|
165
|
+
const anchorRow = db.prepare('SELECT created_at_epoch, project FROM observations WHERE id = ?').get(anchorId);
|
|
166
|
+
if (!anchorRow) return null;
|
|
167
|
+
|
|
168
|
+
try {
|
|
169
|
+
db.prepare('UPDATE observations SET access_count = COALESCE(access_count, 0) + 1, last_accessed_at = ? WHERE id = ?').run(Date.now(), anchorId);
|
|
170
|
+
} catch { /* non-critical: FTS5 trigger may fail on corrupted index */ }
|
|
171
|
+
|
|
172
|
+
const effectiveProject = project || anchorRow.project;
|
|
173
|
+
const projectFilter = effectiveProject ? 'AND project = ?' : '';
|
|
174
|
+
const baseParams = effectiveProject ? [effectiveProject] : [];
|
|
175
|
+
|
|
176
|
+
const beforeRows = db.prepare(`
|
|
177
|
+
SELECT ${TIMELINE_COLS}
|
|
178
|
+
FROM observations
|
|
179
|
+
WHERE created_at_epoch < ? AND COALESCE(compressed_into, 0) = 0 AND superseded_at IS NULL ${projectFilter}
|
|
180
|
+
ORDER BY created_at_epoch DESC
|
|
181
|
+
LIMIT ?
|
|
182
|
+
`).all(anchorRow.created_at_epoch, ...baseParams, before).reverse();
|
|
183
|
+
|
|
184
|
+
const afterRows = db.prepare(`
|
|
185
|
+
SELECT ${TIMELINE_COLS}
|
|
186
|
+
FROM observations
|
|
187
|
+
WHERE created_at_epoch > ? AND COALESCE(compressed_into, 0) = 0 AND superseded_at IS NULL ${projectFilter}
|
|
188
|
+
ORDER BY created_at_epoch ASC
|
|
189
|
+
LIMIT ?
|
|
190
|
+
`).all(anchorRow.created_at_epoch, ...baseParams, after);
|
|
191
|
+
|
|
192
|
+
const anchor = db.prepare(`SELECT ${TIMELINE_COLS} FROM observations WHERE id = ?`).get(anchorId);
|
|
193
|
+
|
|
194
|
+
return { anchor, beforeRows, afterRows, effectiveProject };
|
|
195
|
+
}
|