claude-mem-lite 3.3.1 → 3.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +1 -1
- package/.claude-plugin/plugin.json +1 -1
- package/README.md +53 -0
- package/deep-search.mjs +79 -5
- package/mem-cli.mjs +32 -7
- package/package.json +2 -1
- package/rerank.mjs +78 -0
- package/search-engine.mjs +40 -1
- package/server.mjs +33 -13
- package/source-files.mjs +1 -1
- package/tool-schemas.mjs +1 -0
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
"plugins": [
|
|
11
11
|
{
|
|
12
12
|
"name": "claude-mem-lite",
|
|
13
|
-
"version": "3.
|
|
13
|
+
"version": "3.5.0",
|
|
14
14
|
"source": "./",
|
|
15
15
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark)."
|
|
16
16
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.5.0",
|
|
4
4
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
|
|
5
5
|
"author": {
|
|
6
6
|
"name": "sdsrss"
|
package/README.md
CHANGED
|
@@ -644,6 +644,59 @@ Benchmarked on 200 observations across 30 queries (standard + hard-negative cate
|
|
|
644
644
|
|
|
645
645
|
The benchmark suite runs as a CI gate (`npm run benchmark:gate`) to prevent search quality regressions.
|
|
646
646
|
|
|
647
|
+
### Recall on LongMemEval (standard benchmark)
|
|
648
|
+
|
|
649
|
+
Beyond the in-repo micro-benchmark above, claude-mem-lite is measured on
|
|
650
|
+
[LongMemEval](https://github.com/xiaowu0162/LongMemEval) (Wu et al.) — a
|
|
651
|
+
500-question long-term-memory benchmark — so its recall is comparable to the
|
|
652
|
+
field, not just to itself. Metric is **recall_any@k**: does *any* gold evidence session appear in the
|
|
653
|
+
top *k* retrieved? This is the same session-level definition the systems we
|
|
654
|
+
compare against report on this split — [agentmemory](https://github.com/rohitg00/agentmemory)
|
|
655
|
+
(BM25 + vector + graph) and dense-embedding systems like MemPalace — so the rows
|
|
656
|
+
below sit on one axis, not metric-shopped. (Note: 65% of the 500 questions have
|
|
657
|
+
multiple gold sessions, so `recall_any@k` is looser than fractional recall there;
|
|
658
|
+
all systems in this comparison report the any-hit form.) Corpus is user-turns-only
|
|
659
|
+
(the standard raw-baseline rule). Runners: `benchmark/longmemeval.mjs` (lexical)
|
|
660
|
+
and `benchmark/longmemeval-rerank.mjs` (rerank).
|
|
661
|
+
|
|
662
|
+
| Retriever (zero embeddings) | @1 | @5 | @10 |
|
|
663
|
+
|---|---|---|---|
|
|
664
|
+
| Lexical hybrid — FTS5 + TF-IDF + RRF | 76.8% | 90.6% | 95.2% |
|
|
665
|
+
| + one top-20 LLM rerank pass | **92.8%** | **96.8%** | **97.4%** |
|
|
666
|
+
|
|
667
|
+
*n = 500 questions; 99.8% JSON parse-rate at concurrency 3.* The rerank pass
|
|
668
|
+
hands the top 20 lexical candidates to a single Haiku call (~1.4 s/query) that
|
|
669
|
+
reorders them. It is **never worse than the lexical baseline by construction** —
|
|
670
|
+
any LLM or parse failure falls back to the original candidate order.
|
|
671
|
+
|
|
672
|
+
**Stricter metric, for the record.** The rows above are `recall_any@k` — does *any*
|
|
673
|
+
gold session reach the top *k* — the metric agentmemory and MemPalace publish, so the
|
|
674
|
+
comparison is like-for-like. Under the stricter **standard recall@k** (`|gold ∩ top-k| /
|
|
675
|
+
|gold|`, the *fraction* of all gold sessions retrieved), the lexical stack scores
|
|
676
|
+
@1 = 46.9% / @5 = 84.4% / @10 = 91.9%. The whole gap is the 65% of questions with
|
|
677
|
+
multiple gold sessions — any-hit needs one, fractional needs them all, and @1 is capped
|
|
678
|
+
at 1/|gold| there; single-gold question types score identically under both.
|
|
679
|
+
`benchmark/longmemeval.mjs` reports both columns (the rerank row's fractional is not yet
|
|
680
|
+
measured).
|
|
681
|
+
|
|
682
|
+
**On embeddings, honestly.** With no LLM in the loop, both a dense-embedding
|
|
683
|
+
baseline (MemPalace, ~96.6% @5) and a BM25 + vector + graph hybrid (agentmemory,
|
|
684
|
+
95.2% @5) out-recall our zero-embedding lexical stack (90.6% @5) at the same
|
|
685
|
+
retrieval stage — dense and graph signal genuinely help raw recall, and most of
|
|
686
|
+
our remaining gap is paraphrase (single-session-preference is our lowest category
|
|
687
|
+
at 63%). The rerank row's point is that a *single cheap LLM call closes it*:
|
|
688
|
+
reordering the top-20 lexical candidates reaches 96.8% @5 — matching the dense raw
|
|
689
|
+
number and edging the hybrid's retrieval score — because the lexical candidate set
|
|
690
|
+
is already rich enough (recall@20 = 97.8%) that ranking, not recall, is the
|
|
691
|
+
bottleneck. An embedding-plus-rerank stack still leads when both sides spend an LLM
|
|
692
|
+
call; the takeaway is that claude-mem-lite reaches embedding-competitive precision
|
|
693
|
+
with **no vector model, no knowledge graph, no Python, and no external service**.
|
|
694
|
+
|
|
695
|
+
Per-category @5 (lexical → +rerank): knowledge-update 98.7 → 100.0 ·
|
|
696
|
+
single-session-user 91.4 → 98.6 · temporal-reasoning 89.5 → 97.7 · multi-session
|
|
697
|
+
95.5 → 97.7 · single-session-assistant 83.9 → 94.6 · single-session-preference
|
|
698
|
+
63.3 → 80.0. Every category improves; none regress.
|
|
699
|
+
|
|
647
700
|
## Development
|
|
648
701
|
|
|
649
702
|
```bash
|
package/deep-search.mjs
CHANGED
|
@@ -34,10 +34,19 @@
|
|
|
34
34
|
import { searchObservationsHybrid } from './search-engine.mjs';
|
|
35
35
|
import { sanitizeFtsQuery } from './utils.mjs';
|
|
36
36
|
import { RRF_K } from './tfidf.mjs';
|
|
37
|
+
import { llmRerankOrder, defaultRerankLLM } from './rerank.mjs';
|
|
37
38
|
|
|
38
39
|
// original + up to 3 rewrites (keyword / concept-expansion / HyDE).
|
|
39
40
|
export const MAX_VARIANTS = 4;
|
|
40
41
|
|
|
42
|
+
// How many RRF-fused candidates the opt-in rerank stage hands to the LLM. The
|
|
43
|
+
// LongMemEval rerank benchmark (benchmark/longmemeval-rerank.mjs) measured the
|
|
44
|
+
// lexical candidate set as rich enough at 20 (recall@20 = 97.8%) that reranking
|
|
45
|
+
// the top-20 captures nearly all of that ceiling (96.8%@5); matching it here keeps
|
|
46
|
+
// the shipped behaviour aligned with the measured number. Module-internal — callers
|
|
47
|
+
// override per-call via deps.rerankTopK; export it if a config surface ever needs it.
|
|
48
|
+
const RERANK_TOPK = 20;
|
|
49
|
+
|
|
41
50
|
// ─── Auto-escalation (opt-in adaptive deep search) ──────────────────────────
|
|
42
51
|
// Result-count floor below which a normal search is "weak" enough to auto-escalate
|
|
43
52
|
// to deepSearch. Calibrated against the deep-search benchmark fixtures; 3 is the
|
|
@@ -371,7 +380,33 @@ function defaultSearchFn(db, query, params) {
|
|
|
371
380
|
}
|
|
372
381
|
|
|
373
382
|
/**
|
|
374
|
-
*
|
|
383
|
+
* Build the candidate text the opt-in rerank stage shows the LLM. Prefers each
|
|
384
|
+
* observation's full `narrative` (the field the LongMemEval rerank benchmark
|
|
385
|
+
* scored); falls back to title / subtitle / snippet / lesson when narrative is
|
|
386
|
+
* unavailable or the db can't be read (injected rows / null db in unit tests).
|
|
387
|
+
* @param {Database|null} db
|
|
388
|
+
* @param {Array<object>} rows fused candidate rows (already sliced to top-K)
|
|
389
|
+
* @returns {Map<any,string>} id → candidate text
|
|
390
|
+
*/
|
|
391
|
+
function defaultRerankText(db, rows) {
|
|
392
|
+
const fallback = (r) => [r.title, r.subtitle, r.snippet, r.lesson_learned].filter(Boolean).join(' — ');
|
|
393
|
+
if (!db) return new Map(rows.map((r) => [r.id, fallback(r)]));
|
|
394
|
+
try {
|
|
395
|
+
const ids = rows.map((r) => r.id);
|
|
396
|
+
const ph = ids.map(() => '?').join(',');
|
|
397
|
+
const found = new Map(
|
|
398
|
+
db.prepare(`SELECT id, narrative, title, subtitle FROM observations WHERE id IN (${ph})`)
|
|
399
|
+
.all(...ids)
|
|
400
|
+
.map((o) => [o.id, o.narrative || [o.title, o.subtitle].filter(Boolean).join(' — ')]),
|
|
401
|
+
);
|
|
402
|
+
return new Map(rows.map((r) => [r.id, found.get(r.id) || fallback(r)]));
|
|
403
|
+
} catch {
|
|
404
|
+
return new Map(rows.map((r) => [r.id, fallback(r)]));
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
/**
|
|
409
|
+
* Opt-in deep search: rewrite → per-variant hybrid search → RRF fusion → opt-in rerank.
|
|
375
410
|
* @param {Database} db open better-sqlite3 handle
|
|
376
411
|
* @param {object} params
|
|
377
412
|
* @param {string} params.query The user query.
|
|
@@ -386,11 +421,15 @@ function defaultSearchFn(db, query, params) {
|
|
|
386
421
|
* @param {(db:Database, query:string, params:object)=>Array} [deps.searchFn]
|
|
387
422
|
* @param {number} [deps.rrfK=RRF_K]
|
|
388
423
|
* @param {boolean} [deps.auto=false] use the fail-fast/throttled/cached auto provider
|
|
389
|
-
* @
|
|
424
|
+
* @param {boolean} [deps.rerank=false] opt-in: LLM-rerank the fused top-K (never on the auto path)
|
|
425
|
+
* @param {(prompt:object)=>Promise<any>} [deps.rerankLlm] rerank provider (default: lazy haiku)
|
|
426
|
+
* @param {number} [deps.rerankTopK=RERANK_TOPK] how many fused candidates to rerank
|
|
427
|
+
* @param {(db:Database, rows:Array)=>Map} [deps.rerankTextFn] id→text builder for the rerank prompt
|
|
428
|
+
* @returns {Promise<{results: Array, variants: string[], reranked: boolean}>}
|
|
390
429
|
*/
|
|
391
|
-
export async function deepSearch(db, params, { llm, searchFn = defaultSearchFn, rrfK = RRF_K, auto = false } = {}) {
|
|
430
|
+
export async function deepSearch(db, params, { llm, searchFn = defaultSearchFn, rrfK = RRF_K, auto = false, rerank = false, rerankLlm, rerankTopK = RERANK_TOPK, rerankTextFn = defaultRerankText } = {}) {
|
|
392
431
|
const query = String(params?.query ?? '').trim();
|
|
393
|
-
if (!query) return { results: [], variants: [] };
|
|
432
|
+
if (!query) return { results: [], variants: [], reranked: false };
|
|
394
433
|
|
|
395
434
|
// No injected llm: EXPLICIT deep=true uses the patient defaultLLM; the AUTO
|
|
396
435
|
// path uses a fail-fast + throttled provider with no retry and a process-
|
|
@@ -418,5 +457,40 @@ export async function deepSearch(db, params, { llm, searchFn = defaultSearchFn,
|
|
|
418
457
|
|
|
419
458
|
const fused = rrfFuseN(lists, rrfK);
|
|
420
459
|
const limit = params.limit ?? 10;
|
|
421
|
-
|
|
460
|
+
|
|
461
|
+
// Opt-in rerank stage (option C): reorder the fused top-K by an LLM relevance
|
|
462
|
+
// read, using the same core the LongMemEval benchmark measures (rerank.mjs) so
|
|
463
|
+
// the shipped algorithm == the measured one. Strictly opt-in — the AUTO
|
|
464
|
+
// escalation path never reranks, so no default search behaviour changes and the
|
|
465
|
+
// hot path stays a single LLM call. "Never worse than the fused order" by
|
|
466
|
+
// construction: a failed/unparseable rerank leaves the fused order untouched.
|
|
467
|
+
// The candidate set fed here is RICHER than the benchmark's single-query top-20
|
|
468
|
+
// (it is multi-query RRF), so the measured 96.8%@5 is a conservative floor.
|
|
469
|
+
let ordered = fused;
|
|
470
|
+
let reranked = false;
|
|
471
|
+
if (rerank && fused.length > 1) {
|
|
472
|
+
const k = Math.min(rerankTopK, fused.length);
|
|
473
|
+
const top = fused.slice(0, k);
|
|
474
|
+
const text = rerankTextFn(db, top);
|
|
475
|
+
const cand = top.map((r) => ({ sid: r.id, text: text.get(r.id) || '' }));
|
|
476
|
+
const { order, parsed } = await llmRerankOrder(query, cand, rerankLlm || defaultRerankLLM);
|
|
477
|
+
if (parsed) {
|
|
478
|
+
const byId = new Map(top.map((r) => [r.id, r]));
|
|
479
|
+
const head = order.map((id) => byId.get(id)).filter(Boolean);
|
|
480
|
+
// Re-stamp scores so `score` stays monotonic with the rerank order, reusing
|
|
481
|
+
// the top-K's OWN values ascending (best = most-negative first): the reranked
|
|
482
|
+
// block keeps the K best scores so it stays ahead of the fused tail, and orders
|
|
483
|
+
// within itself by rerank rank. This keeps the shared CLI↔MCP `score` ordering
|
|
484
|
+
// contract (#8217) consistent with the array order, so a consumer that re-sorts
|
|
485
|
+
// by score reproduces the rerank order instead of restoring the RRF order.
|
|
486
|
+
// (server.mjs also skips its context re-rank/re-sort when reranked, so the LLM
|
|
487
|
+
// judgement is the final order — the re-stamp keeps score honest regardless.)
|
|
488
|
+
const scores = top.map((r) => r.score).sort((a, b) => a - b);
|
|
489
|
+
head.forEach((r, i) => { r.score = scores[i]; r.rrfScore = -scores[i]; });
|
|
490
|
+
ordered = [...head, ...fused.slice(k)];
|
|
491
|
+
reranked = true;
|
|
492
|
+
}
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
return { results: ordered.slice(0, limit), variants, reranked };
|
|
422
496
|
}
|
package/mem-cli.mjs
CHANGED
|
@@ -9,7 +9,7 @@ import { resolveProject } from './project-utils.mjs';
|
|
|
9
9
|
import { TIER_CASE_SQL, tierSqlParams } from './tier.mjs';
|
|
10
10
|
import { _resetVocabCache } from './tfidf.mjs';
|
|
11
11
|
import { autoBoostIfNeeded, reRankWithContext, markSuperseded } from './server-internals.mjs';
|
|
12
|
-
import { searchObservationsHybrid, countSearchTotal } from './search-engine.mjs';
|
|
12
|
+
import { searchObservationsHybrid, countSearchTotal, attachBodyTokens } from './search-engine.mjs';
|
|
13
13
|
import { deepSearch, resolveDeepMode, shouldEscalateToDeep, autoDeepLlmReady, hasEscalatableCorpus } from './deep-search.mjs';
|
|
14
14
|
import { ensureRegistryDb, upsertResource } from './registry.mjs';
|
|
15
15
|
import { searchResources } from './registry-retriever.mjs';
|
|
@@ -52,7 +52,7 @@ async function cmdSearch(db, args, { llm } = {}) {
|
|
|
52
52
|
const { positional, flags } = parseArgs(args);
|
|
53
53
|
const query = positional.join(' ');
|
|
54
54
|
if (!query) {
|
|
55
|
-
fail('[mem] Usage: claude-mem-lite search <query> [--type TYPE] [--source SOURCE] [--limit N] [--project P] [--from DATE] [--to DATE] [--importance N] [--branch B] [--offset N] [--sort relevance|time|importance] [--include-noise] [--deep] [--no-deep]');
|
|
55
|
+
fail('[mem] Usage: claude-mem-lite search <query> [--type TYPE] [--source SOURCE] [--limit N] [--project P] [--from DATE] [--to DATE] [--importance N] [--branch B] [--offset N] [--sort relevance|time|importance] [--include-noise] [--deep] [--no-deep] [--rerank]');
|
|
56
56
|
return;
|
|
57
57
|
}
|
|
58
58
|
|
|
@@ -109,6 +109,15 @@ async function cmdSearch(db, args, { llm } = {}) {
|
|
|
109
109
|
: ((flags['no-deep'] === true || flags['no-deep'] === 'true') ? false : undefined);
|
|
110
110
|
const deepMode = resolveDeepMode(explicitDeep, { surface: 'cli' });
|
|
111
111
|
|
|
112
|
+
// --rerank: opt-in LLM rerank of the fused top-20 (option C, deep-search.mjs).
|
|
113
|
+
// One extra Haiku call (~1.4s); only meaningful on the explicit --deep path,
|
|
114
|
+
// never on auto-escalation. Same rerank core the LongMemEval benchmark measures.
|
|
115
|
+
const rerankFlag = flags.rerank === true || flags.rerank === 'true';
|
|
116
|
+
const rerank = rerankFlag && deepMode === 'deep';
|
|
117
|
+
if (rerankFlag && deepMode !== 'deep') {
|
|
118
|
+
process.stderr.write('[mem] Note: --rerank requires --deep (it reranks deep-search candidates); ignored\n');
|
|
119
|
+
}
|
|
120
|
+
|
|
112
121
|
if (source && !['observations', 'sessions', 'prompts'].includes(source)) {
|
|
113
122
|
fail(`[mem] Invalid --source "${source}". Use: observations, sessions, prompts`);
|
|
114
123
|
return;
|
|
@@ -160,6 +169,7 @@ async function cmdSearch(db, args, { llm } = {}) {
|
|
|
160
169
|
let orFallbackFired = false;
|
|
161
170
|
|
|
162
171
|
let deepVariants = null;
|
|
172
|
+
let isReranked = false;
|
|
163
173
|
let isDeep = deepMode === 'deep';
|
|
164
174
|
|
|
165
175
|
// Search observations — shared engine with server.mjs (#8198/#8212 paired-path fix)
|
|
@@ -194,13 +204,19 @@ async function cmdSearch(db, args, { llm } = {}) {
|
|
|
194
204
|
epochTo: dateTo,
|
|
195
205
|
limit: perSourceLimit,
|
|
196
206
|
currentProject: project ? null : inferProject(),
|
|
197
|
-
}, llm ? { llm } : { auto });
|
|
207
|
+
}, llm ? { llm, rerank: rerank && !auto } : { auto, rerank: rerank && !auto });
|
|
198
208
|
deepVariants = ds.variants;
|
|
209
|
+
isReranked = ds.reranked;
|
|
199
210
|
if (deepVariants.length > 1) {
|
|
200
211
|
process.stderr.write(`[mem] Deep search: rewrote into ${deepVariants.length} query variants, RRF-fused\n`);
|
|
201
212
|
} else {
|
|
202
213
|
process.stderr.write('[mem] Deep search: rewrite returned no usable variants; used original query only\n');
|
|
203
214
|
}
|
|
215
|
+
if (rerank && !auto) {
|
|
216
|
+
process.stderr.write(ds.reranked
|
|
217
|
+
? '[mem] Deep search: LLM-reranked the fused top-20\n'
|
|
218
|
+
: '[mem] Deep search: rerank produced no usable order; kept fused order\n');
|
|
219
|
+
}
|
|
204
220
|
return ds.results;
|
|
205
221
|
};
|
|
206
222
|
|
|
@@ -270,7 +286,9 @@ async function cmdSearch(db, args, { llm } = {}) {
|
|
|
270
286
|
if (obsResults.length > 0) {
|
|
271
287
|
// reRankWithContext/markSuperseded expect source='obs' — alias _source for compatibility
|
|
272
288
|
for (const r of obsResults) r.source = 'obs';
|
|
273
|
-
|
|
289
|
+
// Explicit LLM rerank order is final — skip file-context re-rank when reranked
|
|
290
|
+
// (paired-path with mem_search; markSuperseded still runs for stale-tagging).
|
|
291
|
+
if (!isReranked) reRankWithContext(db, obsResults, project || inferProject());
|
|
274
292
|
markSuperseded(obsResults);
|
|
275
293
|
if (isCrossSource) results.sort((a, b) => (a.score ?? 0) - (b.score ?? 0));
|
|
276
294
|
}
|
|
@@ -305,6 +323,9 @@ async function cmdSearch(db, args, { llm } = {}) {
|
|
|
305
323
|
includeNoise,
|
|
306
324
|
}), results.length);
|
|
307
325
|
const paged = results.slice(offset, offset + limit);
|
|
326
|
+
// Enrich the final page with the ~Nt fetch-cost hint (paired with MCP mem_search; #8654 both
|
|
327
|
+
// source keys handled). Batch-fetches heavy obs fields by id — no-op on an empty page.
|
|
328
|
+
attachBodyTokens(db, paged);
|
|
308
329
|
|
|
309
330
|
if (paged.length === 0) {
|
|
310
331
|
if (jsonOutput) {
|
|
@@ -343,6 +364,7 @@ async function cmdSearch(db, args, { llm } = {}) {
|
|
|
343
364
|
importance: r.importance ?? null,
|
|
344
365
|
superseded: Boolean(r.superseded),
|
|
345
366
|
files_modified: r.files_modified || null,
|
|
367
|
+
body_tokens: r.bodyTokens ?? null,
|
|
346
368
|
};
|
|
347
369
|
});
|
|
348
370
|
out(JSON.stringify({
|
|
@@ -364,19 +386,22 @@ async function cmdSearch(db, args, { llm } = {}) {
|
|
|
364
386
|
// Pluralize on total — "Found 1 of 44 result" reads wrong; the population (44) drives
|
|
365
387
|
// grammatical number, not the page slice (1).
|
|
366
388
|
out(`[mem] Found ${countLabel} result${total !== 1 ? 's' : ''} for "${query}"${fallbackHint}:${hasMixed ? ' (# observation, S# session, P# prompt)' : ''}`);
|
|
389
|
+
// `~Nt` = est. tokens to fetch this row's full body via mem_get (attachBodyTokens, paired with
|
|
390
|
+
// MCP). Conditional so a row that skipped enrichment renders cleanly, not "~undefinedt".
|
|
391
|
+
const tok = r => (r.bodyTokens ? ` ~${r.bodyTokens}t` : '');
|
|
367
392
|
for (const r of paged) {
|
|
368
393
|
const timeStr = showTime && r.created_at_epoch ? ` (${relativeTime(r.created_at_epoch)})` : '';
|
|
369
394
|
if (r._source === 'session') {
|
|
370
395
|
const date = fmtDateShort(r.created_at);
|
|
371
|
-
out(`S#${r.id} 📋 ${date}${timeStr} ${truncate(r.request || r.completed || '(no summary)', 80)}`);
|
|
396
|
+
out(`S#${r.id} 📋 ${date}${timeStr} ${truncate(r.request || r.completed || '(no summary)', 80)}${tok(r)}`);
|
|
372
397
|
} else if (r._source === 'prompt') {
|
|
373
398
|
const date = fmtDateShort(r.created_at);
|
|
374
|
-
out(`P#${r.id} 💬 ${date}${timeStr} ${truncate(r.prompt_text || '(empty)', 80)}`);
|
|
399
|
+
out(`P#${r.id} 💬 ${date}${timeStr} ${truncate(r.prompt_text || '(empty)', 80)}${tok(r)}`);
|
|
375
400
|
} else {
|
|
376
401
|
const date = fmtDateShort(r.created_at);
|
|
377
402
|
const title = truncate(r.title || r.subtitle || '(untitled)', 80);
|
|
378
403
|
const supersededTag = r.superseded ? ' [SUPERSEDED]' : '';
|
|
379
|
-
out(`#${r.id} ${typeIcon(r.type)} ${date}${timeStr} ${title}${supersededTag}`);
|
|
404
|
+
out(`#${r.id} ${typeIcon(r.type)} ${date}${timeStr} ${title}${supersededTag}${tok(r)}`);
|
|
380
405
|
if (r.lesson_learned) {
|
|
381
406
|
out(` -> ${truncate(r.lesson_learned, 80)}`);
|
|
382
407
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "claude-mem-lite",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.5.0",
|
|
4
4
|
"description": "Persistent long-term memory for Claude Code via MCP — captures coding decisions, bugfixes, and context across sessions. Hybrid FTS5 + TF-IDF search with episode batching. Single SQLite DB, no external services. A lighter, lower-cost alternative to claude-mem (episode batching + a smaller model; cost savings are an internal estimate, not a measured benchmark).",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"packageManager": "npm@10.9.2",
|
|
@@ -31,6 +31,7 @@
|
|
|
31
31
|
"server-internals.mjs",
|
|
32
32
|
"search-engine.mjs",
|
|
33
33
|
"deep-search.mjs",
|
|
34
|
+
"rerank.mjs",
|
|
34
35
|
"hook.mjs",
|
|
35
36
|
"hook-shared.mjs",
|
|
36
37
|
"hook-llm.mjs",
|
package/rerank.mjs
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
// Shared LLM-rerank core: reorder a top-K candidate list by an LLM relevance read.
|
|
2
|
+
//
|
|
3
|
+
// Used by BOTH the production deep-search rerank stage (deep-search.mjs) and the
|
|
4
|
+
// LongMemEval rerank benchmark (benchmark/longmemeval-rerank.mjs), so the measured
|
|
5
|
+
// lift number reflects the EXACT algorithm that ships. "Never worse than the input
|
|
6
|
+
// candidate order" by construction: any LLM/parse failure returns the original order.
|
|
7
|
+
//
|
|
8
|
+
// The LLM is dependency-injected by every caller, so this module is unit-tested with
|
|
9
|
+
// deterministic stubs and never statically imports the native-heavy LLM client (the
|
|
10
|
+
// default provider is pulled in lazily on first real call).
|
|
11
|
+
import { parseJsonFromLLM } from './utils.mjs';
|
|
12
|
+
|
|
13
|
+
// Module-internal: only buildRerankPrompt (below) consumes these. Kept un-exported
|
|
14
|
+
// so the module's public surface is just the three functions callers actually import.
|
|
15
|
+
const RERANK_SYSTEM =
|
|
16
|
+
'You rerank search results. Given a QUERY and numbered candidate session snippets, ' +
|
|
17
|
+
'decide which sessions most likely contain the answer to the query. ' +
|
|
18
|
+
'Return ONLY JSON {"ranked":[<candidate numbers, most relevant first, each number once>]}. No prose, no markdown.';
|
|
19
|
+
|
|
20
|
+
function buildRerankPrompt(query, snippets) {
|
|
21
|
+
const lines = snippets.map((s, i) => `${i + 1}. ${String(s).replace(/\s+/g, ' ').slice(0, 400)}`);
|
|
22
|
+
return {
|
|
23
|
+
system: RERANK_SYSTEM,
|
|
24
|
+
user: `QUERY: ${query}\n\nCANDIDATES:\n${lines.join('\n')}\n\nReturn {"ranked":[...]} over 1..${snippets.length}, best first.`,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Extract a 1-based ranking array from whatever the LLM returned: a {ranked:[...]}
|
|
29
|
+
// object (stub / clean JSON), a bare array (clean OR prose-wrapped [..]), or a
|
|
30
|
+
// {text} envelope from callLLMWithModel. The bare-array path is what lifts the
|
|
31
|
+
// real parse-rate: claude-haiku often answers "[3,1,5]" instead of {"ranked":..},
|
|
32
|
+
// and parseJsonFromLLM's leading JSON.parse returns that as an array (no .ranked),
|
|
33
|
+
// which the old object-only check silently dropped. null → nothing recoverable.
|
|
34
|
+
export function extractRanked(raw) {
|
|
35
|
+
if (raw === null || raw === undefined) return null;
|
|
36
|
+
if (Array.isArray(raw)) return raw;
|
|
37
|
+
if (typeof raw === 'object' && Array.isArray(raw.ranked)) return raw.ranked;
|
|
38
|
+
const text = typeof raw === 'string' ? raw : typeof raw.text === 'string' ? raw.text : '';
|
|
39
|
+
if (!text) return null;
|
|
40
|
+
const obj = parseJsonFromLLM(text);
|
|
41
|
+
if (Array.isArray(obj)) return obj; // bare array [3,1,5]
|
|
42
|
+
if (obj && Array.isArray(obj.ranked)) return obj.ranked; // {"ranked":[...]}
|
|
43
|
+
const m = text.match(/\[\s*\d+(?:\s*,\s*\d+)*\s*\]/); // prose-wrapped [..]
|
|
44
|
+
if (m) {
|
|
45
|
+
try { const a = JSON.parse(m[0]); if (Array.isArray(a)) return a; } catch { /* fall through */ }
|
|
46
|
+
}
|
|
47
|
+
return null;
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Reorder candidate session ids per the LLM's chosen 1-based order; any failure →
|
|
51
|
+
// original order ("never worse than baseline"). { order: sid[], parsed: bool }.
|
|
52
|
+
export async function llmRerankOrder(query, cand /* [{sid,text}] */, llm) {
|
|
53
|
+
const prompt = buildRerankPrompt(query, cand.map((c) => c.text));
|
|
54
|
+
let raw;
|
|
55
|
+
try { raw = await llm(prompt); } catch { raw = null; }
|
|
56
|
+
const order = extractRanked(raw);
|
|
57
|
+
if (!order) return { order: cand.map((c) => c.sid), parsed: false };
|
|
58
|
+
const seen = new Set();
|
|
59
|
+
const out = [];
|
|
60
|
+
for (const n of order) {
|
|
61
|
+
const idx = Number(n) - 1;
|
|
62
|
+
if (Number.isInteger(idx) && idx >= 0 && idx < cand.length && !seen.has(idx)) {
|
|
63
|
+
seen.add(idx);
|
|
64
|
+
out.push(cand[idx].sid);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
cand.forEach((c, i) => { if (!seen.has(i)) out.push(c.sid); }); // append omitted, original order
|
|
68
|
+
return { order: out, parsed: true };
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Default provider — lazy import so stub-injected callers never load the client.
|
|
72
|
+
// Uses callLLMWithModel (returns {text}) rather than callModelJSONAsync (which
|
|
73
|
+
// JSON-parses internally and nulls on any non-{...} output) so extractRanked can
|
|
74
|
+
// recover bare-array answers the strict JSON parse drops.
|
|
75
|
+
export async function defaultRerankLLM(prompt) {
|
|
76
|
+
const { callLLMWithModel } = await import('./haiku-client.mjs');
|
|
77
|
+
return callLLMWithModel(prompt, 'haiku', { timeout: 20000, maxTokens: 300 });
|
|
78
|
+
}
|
package/search-engine.mjs
CHANGED
|
@@ -9,7 +9,7 @@ import {
|
|
|
9
9
|
OBS_BM25, TYPE_DECAY_CASE, TYPE_QUALITY_CASE,
|
|
10
10
|
DEFAULT_DECAY_HALF_LIFE_MS,
|
|
11
11
|
notLowSignalTitleClause, LOW_SIGNAL_TITLE,
|
|
12
|
-
relaxFtsQueryToOr, debugLog, debugCatch,
|
|
12
|
+
relaxFtsQueryToOr, debugLog, debugCatch, estimateTokens,
|
|
13
13
|
} from './utils.mjs';
|
|
14
14
|
import { getVocabulary, computeVector, vectorSearch, rrfMerge } from './tfidf.mjs';
|
|
15
15
|
import { extractPRFTerms, expandQueryByConcepts } from './server-internals.mjs';
|
|
@@ -190,6 +190,45 @@ export function ftsRowToResult(r, { scoreMultiplier, snippet } = {}) {
|
|
|
190
190
|
};
|
|
191
191
|
}
|
|
192
192
|
|
|
193
|
+
// Per-result estimate of the token cost to fetch the FULL body via mem_get, surfaced as the
|
|
194
|
+
// `~Nt` hint in search output so the agent can budget the 3-layer protocol (search → timeline →
|
|
195
|
+
// get) before paying to expand any ID. Adopted from thedotmack/claude-mem's token-cost column
|
|
196
|
+
// (reference_claude_mem_comparison) — the one genuinely portable idea from that analysis.
|
|
197
|
+
//
|
|
198
|
+
// Layer-1 search deliberately omits narrative/facts (that's what keeps the index light), so the
|
|
199
|
+
// heavy obs fields are batch-fetched by id HERE rather than carried on every result. The source
|
|
200
|
+
// key is read as `source || _source` because the two render paths disagree (#8654): MCP sets
|
|
201
|
+
// `source`+`text`, CLI sets `_source`+`prompt_text`. estimateTokens floors at 1, so a missing row
|
|
202
|
+
// or empty body yields 1 — never 0/NaN.
|
|
203
|
+
export function attachBodyTokens(db, results) {
|
|
204
|
+
if (!Array.isArray(results) || results.length === 0) return results;
|
|
205
|
+
const obsIds = results
|
|
206
|
+
.filter(r => (r.source || r._source) === 'obs' && Number.isInteger(r.id))
|
|
207
|
+
.map(r => r.id);
|
|
208
|
+
const bodyById = new Map();
|
|
209
|
+
if (obsIds.length > 0) {
|
|
210
|
+
try {
|
|
211
|
+
const ph = obsIds.map(() => '?').join(',');
|
|
212
|
+
const rows = db.prepare(`SELECT id, narrative, facts, text FROM observations WHERE id IN (${ph})`).all(...obsIds);
|
|
213
|
+
for (const row of rows) bodyById.set(row.id, row);
|
|
214
|
+
} catch (e) { debugCatch(e, 'attachBodyTokens'); }
|
|
215
|
+
}
|
|
216
|
+
for (const r of results) {
|
|
217
|
+
const src = r.source || r._source;
|
|
218
|
+
let parts;
|
|
219
|
+
if (src === 'obs') {
|
|
220
|
+
const row = bodyById.get(r.id) || {};
|
|
221
|
+
parts = [r.title, r.subtitle, r.lesson_learned, row.narrative, row.facts, row.text];
|
|
222
|
+
} else if (src === 'session') {
|
|
223
|
+
parts = [r.request, r.completed, r.working_on];
|
|
224
|
+
} else {
|
|
225
|
+
parts = [r.text, r.prompt_text];
|
|
226
|
+
}
|
|
227
|
+
r.bodyTokens = estimateTokens(parts.filter(Boolean).join(' '));
|
|
228
|
+
}
|
|
229
|
+
return results;
|
|
230
|
+
}
|
|
231
|
+
|
|
193
232
|
function expandObsByConceptCo(db, ctx, now, existingIds, results, includeNoise = false) {
|
|
194
233
|
const { ftsQuery, args, epochFrom, epochTo, limit } = ctx;
|
|
195
234
|
if (results.length >= Math.ceil(limit / 2)) return;
|
package/server.mjs
CHANGED
|
@@ -9,7 +9,7 @@ import { truncate, typeIcon, inferProject, scrubSecrets, fmtDate, debugLog, debu
|
|
|
9
9
|
import { resolveProject as _resolveProjectShared } from './project-utils.mjs';
|
|
10
10
|
import { ensureDb, DB_PATH, DB_DIR, REGISTRY_DB_PATH } from './schema.mjs';
|
|
11
11
|
import { reRankWithContext, markSuperseded, autoBoostIfNeeded, runIdleCleanup, buildServerInstructions } from './server-internals.mjs';
|
|
12
|
-
import { searchObservationsHybrid, countSearchTotal } from './search-engine.mjs';
|
|
12
|
+
import { searchObservationsHybrid, countSearchTotal, attachBodyTokens } from './search-engine.mjs';
|
|
13
13
|
import { deepSearch, resolveDeepMode, shouldEscalateToDeep, autoDeepLlmReady, hasEscalatableCorpus } from './deep-search.mjs';
|
|
14
14
|
import { selectCompressionCandidates, groupByProjectWeek, compressGroup } from './lib/compress-core.mjs';
|
|
15
15
|
import { resolveAnchorToken, formatAnchorError, resolveQueryAnchor, fetchRecentTimeline, fetchTimelineWindow } from './lib/timeline-core.mjs';
|
|
@@ -294,21 +294,24 @@ function formatSearchOutput(paginatedResults, args, ftsQuery, totalCount, orFall
|
|
|
294
294
|
const fallbackHint = orFallbackFired && !args.or ? ' (relaxed AND→OR)' : '';
|
|
295
295
|
lines.push(`Found ${countLabel} result(s)${qLabel}${fallbackHint}:${hasMixed ? ' (# observation, S# session, P# prompt)' : ''}\n`);
|
|
296
296
|
|
|
297
|
+
// `~Nt` = estimated tokens to fetch this row's full body via mem_get (attachBodyTokens).
|
|
298
|
+
// Conditional so a result that skipped enrichment renders cleanly, not "~undefinedt".
|
|
299
|
+
const tok = r => (r.bodyTokens ? ` ~${r.bodyTokens}t` : '');
|
|
297
300
|
for (const r of paginatedResults) {
|
|
298
301
|
if (r.source === 'obs') {
|
|
299
302
|
const supersededTag = r.superseded ? ' [SUPERSEDED]' : '';
|
|
300
|
-
lines.push(`#${r.id} ${typeIcon(r.type)} [${r.type}] ${truncate(r.title || r.subtitle || '(untitled)')} | ${r.project} | ${fmtDate(r.date)}${supersededTag}`);
|
|
303
|
+
lines.push(`#${r.id} ${typeIcon(r.type)} [${r.type}] ${truncate(r.title || r.subtitle || '(untitled)')} | ${r.project} | ${fmtDate(r.date)}${supersededTag}${tok(r)}`);
|
|
301
304
|
if (r.snippet && r.snippet.length > 10 && r.snippet !== r.title) {
|
|
302
305
|
lines.push(` ${truncate(r.snippet, 100)}`);
|
|
303
306
|
}
|
|
304
307
|
} else if (r.source === 'session') {
|
|
305
|
-
lines.push(`S#${r.id} 📋 ${truncate(r.request || r.completed || '(no summary)')} | ${r.project} | ${fmtDate(r.date)}`);
|
|
308
|
+
lines.push(`S#${r.id} 📋 ${truncate(r.request || r.completed || '(no summary)')} | ${r.project} | ${fmtDate(r.date)}${tok(r)}`);
|
|
306
309
|
} else if (r.source === 'prompt') {
|
|
307
|
-
lines.push(`P#${r.id} 💬 ${truncate(r.text)} | ${fmtDate(r.date)}`);
|
|
310
|
+
lines.push(`P#${r.id} 💬 ${truncate(r.text)} | ${fmtDate(r.date)}${tok(r)}`);
|
|
308
311
|
}
|
|
309
312
|
}
|
|
310
313
|
|
|
311
|
-
lines.push(`\nWorkflow: mem_timeline(anchor=ID) for context | mem_get(ids=[...]) for full details`);
|
|
314
|
+
lines.push(`\nWorkflow: mem_timeline(anchor=ID) for context | mem_get(ids=[...]) for full details · ~Nt = est. tokens to fetch full detail`);
|
|
312
315
|
return { content: [{ type: 'text', text: lines.join('\n') }] };
|
|
313
316
|
}
|
|
314
317
|
|
|
@@ -320,11 +323,11 @@ function formatSearchOutput(paginatedResults, args, ftsQuery, totalCount, orFall
|
|
|
320
323
|
// NOTE: resolveProject() inside runSearchPipeline closes over the module-level `db`,
|
|
321
324
|
// not the injected one. Tests that pass a project: arg via this seam will trigger
|
|
322
325
|
// resolveProject() against the real (module) DB, not the test DB.
|
|
323
|
-
export async function handleSearchForTest(db, args, { llm } = {}) {
|
|
324
|
-
return runSearchPipeline(db, args, { llm });
|
|
326
|
+
export async function handleSearchForTest(db, args, { llm, rerankLlm } = {}) {
|
|
327
|
+
return runSearchPipeline(db, args, { llm, rerankLlm });
|
|
325
328
|
}
|
|
326
329
|
|
|
327
|
-
async function runSearchPipeline(db, args, { llm } = {}) {
|
|
330
|
+
async function runSearchPipeline(db, args, { llm, rerankLlm } = {}) {
|
|
328
331
|
if (args.project) args = { ...args, project: resolveProject(args.project) };
|
|
329
332
|
const limit = args.limit ?? 20;
|
|
330
333
|
const offset = args.offset ?? 0;
|
|
@@ -349,6 +352,9 @@ async function runSearchPipeline(db, args, { llm } = {}) {
|
|
|
349
352
|
// Resolve tri-state deep mode. MCP defaults to 'auto' (escalate on weak results)
|
|
350
353
|
// unless explicitly overridden via args.deep or CLAUDE_MEM_AUTO_DEEP env flag.
|
|
351
354
|
const deepMode = resolveDeepMode(args.deep, { surface: 'mcp' });
|
|
355
|
+
// Opt-in LLM rerank (D#43): explicit-deep only — never on AUTO escalation — so
|
|
356
|
+
// no default search behaviour changes. Parity with CLI `search --deep --rerank`.
|
|
357
|
+
const rerank = args.rerank === true && deepMode === 'deep';
|
|
352
358
|
|
|
353
359
|
// Early return when query was provided but sanitized to nothing (all FTS5
|
|
354
360
|
// keywords/special chars). Skipped for deep/auto — deep's LLM rewrite may
|
|
@@ -365,13 +371,14 @@ async function runSearchPipeline(db, args, { llm } = {}) {
|
|
|
365
371
|
const ctx = { db, ftsQuery, searchType: effectiveType, args, epochFrom, epochTo, perSourceLimit, perSourceOffset, currentProject, limit };
|
|
366
372
|
const results = [];
|
|
367
373
|
let deepVariants = null;
|
|
374
|
+
let deepReranked = false;
|
|
368
375
|
let isDeep = deepMode === 'deep';
|
|
369
376
|
let escalated = false;
|
|
370
377
|
let escalatedObsCount = 0;
|
|
371
378
|
|
|
372
379
|
// Helper: run deepSearch and load results into the shared `results` array.
|
|
373
380
|
const runDeepInto = async ({ auto = false } = {}) => {
|
|
374
|
-
const { results: deepRows, variants } = await deepSearch(db, {
|
|
381
|
+
const { results: deepRows, variants, reranked } = await deepSearch(db, {
|
|
375
382
|
query: args.query,
|
|
376
383
|
project: args.project || null,
|
|
377
384
|
type: args.obs_type || null,
|
|
@@ -381,11 +388,12 @@ async function runSearchPipeline(db, args, { llm } = {}) {
|
|
|
381
388
|
epochFrom, epochTo,
|
|
382
389
|
limit: perSourceLimit,
|
|
383
390
|
currentProject,
|
|
384
|
-
}, llm ? { llm } : { auto });
|
|
391
|
+
}, llm ? { llm, rerank: rerank && !auto, rerankLlm } : { auto, rerank: rerank && !auto, rerankLlm });
|
|
385
392
|
// Safe to reset: sessions/prompts are pushed AFTER the obs block, so nothing is lost here.
|
|
386
393
|
results.length = 0;
|
|
387
394
|
results.push(...deepRows);
|
|
388
395
|
deepVariants = variants;
|
|
396
|
+
deepReranked = reranked;
|
|
389
397
|
};
|
|
390
398
|
|
|
391
399
|
if (!effectiveType || effectiveType === 'observations') {
|
|
@@ -460,9 +468,13 @@ async function runSearchPipeline(db, args, { llm } = {}) {
|
|
|
460
468
|
// empty-ftsQuery deep path we tag-but-don't-reorder (keep RRF order).
|
|
461
469
|
if ((ftsQuery || isDeep) && results.some(r => r.source === 'obs')) {
|
|
462
470
|
const obsResults = results.filter(r => r.source === 'obs');
|
|
463
|
-
|
|
471
|
+
// When the deep candidates were explicitly LLM-reranked, that order is final:
|
|
472
|
+
// skip the file-context re-rank + re-sort (they would perturb the rerank order
|
|
473
|
+
// via score multiplication / score-sort). markSuperseded is pure stale-tagging
|
|
474
|
+
// and still runs. (D#43 — parity with the CLI deep path, which keeps array order.)
|
|
475
|
+
if (ftsQuery && !deepReranked) reRankWithContext(db, obsResults, currentProject);
|
|
464
476
|
markSuperseded(obsResults);
|
|
465
|
-
if (ftsQuery) results.sort((a, b) => (a.score ?? 0) - (b.score ?? 0));
|
|
477
|
+
if (ftsQuery && !deepReranked) results.sort((a, b) => (a.score ?? 0) - (b.score ?? 0));
|
|
466
478
|
}
|
|
467
479
|
|
|
468
480
|
// Tier post-filter: batch-lookup full rows and classify (shared with CLI).
|
|
@@ -499,6 +511,9 @@ async function runSearchPipeline(db, args, { llm } = {}) {
|
|
|
499
511
|
}), results.length);
|
|
500
512
|
// Always apply pagination — single-source results can exceed SQL LIMIT due to expansion (concept co-occurrence, PRF, vector search)
|
|
501
513
|
const paginatedResults = (offset > 0 || results.length > limit) ? results.slice(offset, offset + limit) : results;
|
|
514
|
+
// Enrich the FINAL page with a fetch-cost estimate (~Nt) so the agent budgets before mem_get.
|
|
515
|
+
// Uses the same db threaded through the pipeline (#8743) — batch-fetches heavy obs fields by id.
|
|
516
|
+
attachBodyTokens(db, paginatedResults);
|
|
502
517
|
|
|
503
518
|
// Observability: announce auto-escalation on stderr (parity with CLI deep note).
|
|
504
519
|
if (escalated) process.stderr.write(`[mem] auto-escalated to deep search (weak results: ${escalatedObsCount} hits)\n`);
|
|
@@ -512,9 +527,14 @@ async function runSearchPipeline(db, args, { llm } = {}) {
|
|
|
512
527
|
? `\n\n[deep search: rewrote into ${deepVariants.length} variants — ${deepVariants.slice(1).map(v => JSON.stringify(v)).join(', ')}]`
|
|
513
528
|
: '\n\n[deep search: rewrite produced no usable variants; searched the original query only (== baseline)]';
|
|
514
529
|
}
|
|
530
|
+
// Discoverability signal for the opt-in rerank (D#43): tell the calling agent the
|
|
531
|
+
// candidates were LLM-reranked — parity with the CLI stderr note.
|
|
532
|
+
if (deepReranked && output.content?.[0]?.type === 'text') {
|
|
533
|
+
output.content[0].text += '\n\n[deep search: LLM-reranked the top candidates by relevance]';
|
|
534
|
+
}
|
|
515
535
|
|
|
516
536
|
// Return an object that exposes structured fields for tests + the MCP content blob.
|
|
517
|
-
return { ...output, results: paginatedResults, total: totalBeforePagination, escalated, variants: deepVariants };
|
|
537
|
+
return { ...output, results: paginatedResults, total: totalBeforePagination, escalated, variants: deepVariants, reranked: deepReranked };
|
|
518
538
|
}
|
|
519
539
|
|
|
520
540
|
server.registerTool(
|
package/source-files.mjs
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
|
|
7
7
|
export const SOURCE_FILES = [
|
|
8
8
|
// Entry points and top-level modules
|
|
9
|
-
'cli.mjs', 'cli-path.mjs', 'server.mjs', 'server-internals.mjs', 'search-engine.mjs', 'deep-search.mjs', 'tool-schemas.mjs',
|
|
9
|
+
'cli.mjs', 'cli-path.mjs', 'server.mjs', 'server-internals.mjs', 'search-engine.mjs', 'deep-search.mjs', 'rerank.mjs', 'tool-schemas.mjs',
|
|
10
10
|
'hook.mjs', 'hook-shared.mjs', 'hook-llm.mjs', 'hook-memory.mjs', 'skip-tools.mjs',
|
|
11
11
|
'hook-semaphore.mjs', 'hook-episode.mjs', 'hook-context.mjs', 'hook-handoff.mjs',
|
|
12
12
|
'hook-update.mjs', 'hook-optimize.mjs', 'hook-precompact.mjs',
|
package/tool-schemas.mjs
CHANGED
|
@@ -94,6 +94,7 @@ export const memSearchSchema = {
|
|
|
94
94
|
include_noise: z.boolean().optional().describe('Include hook-llm fallback titles ("Modified X", "Worked on X", raw error logs) — hidden by default as they have ~3% access rate'),
|
|
95
95
|
or: coerceBool.optional().describe('Force OR semantics between query terms from the start (default: AND with automatic OR-fallback when AND returns 0). Aligns with CLI --or.'),
|
|
96
96
|
deep: coerceBool.optional().describe('Tri-state LLM multi-query/HyDE deep search (observations-only). true=force; false=never; omit=AUTO (default ON for mem_search): a normal search that returns weak/few results auto-escalates with ONE Haiku call (query rewritten to keyword/concept/HyDE variants, RRF-fused). Set CLAUDE_MEM_AUTO_DEEP=0 to disable AUTO. Passive recall stays single-query.'),
|
|
97
|
+
rerank: coerceBool.optional().describe('Opt-in: LLM-rerank the deep-search candidates for ranking precision (one extra Haiku call, ~1.4s). Requires deep=true (no effect on AUTO/normal). Reserve for hard, ranking-sensitive queries where the right memory is likely retrieved but mis-ranked — skip for routine search. Default off.'),
|
|
97
98
|
};
|
|
98
99
|
|
|
99
100
|
export const memRecentSchema = {
|