@xdarkicex/openclaw-memory-libravdb 1.3.17 → 1.3.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +53 -0
- package/package.json +4 -1
- package/packaging/homebrew/libravdbd.rb.tmpl +21 -11
- package/src/context-engine.ts +104 -19
- package/src/scoring.ts +95 -0
- package/src/types.ts +14 -0
package/README.md
CHANGED
|
@@ -185,6 +185,59 @@ The formal math lives in:
|
|
|
185
185
|
- [docs/ast-v2.md](./docs/ast-v2.md)
|
|
186
186
|
- [docs/elevated-guidance.md](./docs/elevated-guidance.md)
|
|
187
187
|
|
|
188
|
+
## LongMemEval Harness
|
|
189
|
+
|
|
190
|
+
For internal tuning, the repo includes a local LongMemEval harness that runs the
|
|
191
|
+
dataset through the plugin layer and measures whether the assembled prompt still
|
|
192
|
+
contains the evidence turns.
|
|
193
|
+
|
|
194
|
+
The benchmark runner is committed, but the dataset and generated reports are not.
|
|
195
|
+
Keep downloaded data and local outputs under `benchmarks/longmemeval/`, which is
|
|
196
|
+
ignored by default.
|
|
197
|
+
|
|
198
|
+
The harness writes JSONL incrementally, so partial results survive if a transient
|
|
199
|
+
daemon failure interrupts a long run.
|
|
200
|
+
|
|
201
|
+
The run summary now prints a compact table with total questions, processed rows,
|
|
202
|
+
skipped abstentions, errors, session hit rate, turn hit rate, and average prompt
|
|
203
|
+
size.
|
|
204
|
+
|
|
205
|
+
Run it with:
|
|
206
|
+
|
|
207
|
+
```bash
|
|
208
|
+
LONGMEMEVAL_DATA_FILE=/path/to/longmemeval_oracle.json pnpm run benchmark:longmemeval
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
If you already have a daemon running and do not want the benchmark to spawn
|
|
212
|
+
another one, set:
|
|
213
|
+
|
|
214
|
+
```bash
|
|
215
|
+
LONGMEMEVAL_USE_EXISTING_DAEMON=1 LONGMEMEVAL_SIDECAR_PATH=unix:/path/to/libravdb.sock
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
If the local test daemon drops mid-run, the benchmark will restart it and retry
|
|
219
|
+
the current instance once before recording an error result.
|
|
220
|
+
|
|
221
|
+
Optional outputs:
|
|
222
|
+
|
|
223
|
+
- `LONGMEMEVAL_LIMIT` to cap the number of questions
|
|
224
|
+
- `LONGMEMEVAL_TOPK` to change the search budget
|
|
225
|
+
- `LONGMEMEVAL_OUT_FILE` to write JSONL records for analysis
|
|
226
|
+
|
|
227
|
+
To score a hypothesis JSONL file with the official LongMemEval evaluator, point
|
|
228
|
+
the repo at a local checkout of the benchmark and run:
|
|
229
|
+
|
|
230
|
+
```bash
|
|
231
|
+
LONGMEMEVAL_EVAL_REPO=/path/to/LongMemEval \
|
|
232
|
+
LONGMEMEVAL_HYPOTHESIS_FILE=/path/to/hypotheses.jsonl \
|
|
233
|
+
LONGMEMEVAL_DATA_FILE=/path/to/longmemeval_oracle.json \
|
|
234
|
+
OPENAI_API_KEY=... \
|
|
235
|
+
pnpm run benchmark:longmemeval:score
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
That scorer wrapper shells out to the official Python evaluation script and then
|
|
239
|
+
prints the aggregate metrics from the generated log when available.
|
|
240
|
+
|
|
188
241
|
## Compaction Model
|
|
189
242
|
|
|
190
243
|
This system does not treat long chats as append-only forever.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@xdarkicex/openclaw-memory-libravdb",
|
|
3
|
-
"version": "1.3.
|
|
3
|
+
"version": "1.3.19",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"publishConfig": {
|
|
6
6
|
"access": "public"
|
|
@@ -31,6 +31,9 @@
|
|
|
31
31
|
"gate:assemble_optimization": "./.ts-toolchain/node_modules/.bin/tsc -p tsconfig.tests.json && OPENCLAW_PROFILE_ASSEMBLE=1 OPENCLAW_ENFORCE_ASSEMBLE_EVIDENCE_GATE=1 node --test --test-name-pattern=\"real sidecar mid-sized session search benchmark\" .ts-build/test/integration/host-flow.test.js",
|
|
32
32
|
"probe:session_recall": "./.ts-toolchain/node_modules/.bin/tsc -p tsconfig.tests.json && OPENCLAW_PROFILE_ASSEMBLE=1 node --test --test-name-pattern=\"real sidecar mid-sized session search benchmark\" .ts-build/test/integration/host-flow.test.js",
|
|
33
33
|
"probe:session_recall_threshold": "./.ts-toolchain/node_modules/.bin/tsc -p tsconfig.tests.json && OPENCLAW_PROFILE_ASSEMBLE=1 node --test --test-name-pattern=\"real sidecar session_recall index threshold probe\" .ts-build/test/integration/host-flow.test.js",
|
|
34
|
+
"benchmark:longmemeval": "./.ts-toolchain/node_modules/.bin/tsc -p tsconfig.tests.json && node --test .ts-build/test/integration/longmemeval-benchmark.test.js",
|
|
35
|
+
"benchmark:longmemeval:score": "node scripts/longmemeval-score.mjs",
|
|
36
|
+
"benchmark:longmemeval:diagnose": "node scripts/longmemeval-diagnose.mjs",
|
|
34
37
|
"build:daemon": "bash scripts/build-daemon.sh"
|
|
35
38
|
},
|
|
36
39
|
"dependencies": {
|
|
@@ -26,18 +26,18 @@ class Libravdbd < Formula
|
|
|
26
26
|
if OS.mac?
|
|
27
27
|
resource "onnxruntime" do
|
|
28
28
|
url "https://github.com/microsoft/onnxruntime/releases/download/v1.23.0/onnxruntime-osx-universal2-1.23.0.tgz"
|
|
29
|
-
sha256
|
|
29
|
+
sha256 "5e4365fb4a05aef353f6232b9a1848f37e608c421c9227e9224572205c0cfc08"
|
|
30
30
|
end
|
|
31
31
|
elsif OS.linux?
|
|
32
32
|
if Hardware::CPU.arm?
|
|
33
33
|
resource "onnxruntime" do
|
|
34
34
|
url "https://github.com/microsoft/onnxruntime/releases/download/v1.23.0/onnxruntime-linux-aarch64-1.23.0.tgz"
|
|
35
|
-
sha256
|
|
35
|
+
sha256 "0b9f47d140411d938e47915824d8daaa424df95a88b5f1fc843172a75168f7a0"
|
|
36
36
|
end
|
|
37
37
|
else
|
|
38
38
|
resource "onnxruntime" do
|
|
39
39
|
url "https://github.com/microsoft/onnxruntime/releases/download/v1.23.0/onnxruntime-linux-x64-1.23.0.tgz"
|
|
40
|
-
sha256
|
|
40
|
+
sha256 "b6deea7f2e22c10c043019f294a0ea4d2a6c0ae52a009c34847640db75ec5580"
|
|
41
41
|
end
|
|
42
42
|
end
|
|
43
43
|
end
|
|
@@ -54,12 +54,12 @@ class Libravdbd < Formula
|
|
|
54
54
|
|
|
55
55
|
resource "all-minilm-l6-v2-model" do
|
|
56
56
|
url "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx"
|
|
57
|
-
sha256 "
|
|
57
|
+
sha256 "6fd5d72fe4589f189f8ebc006442dbb529bb7ce38f8082112682524616046452"
|
|
58
58
|
end
|
|
59
59
|
|
|
60
60
|
resource "all-minilm-l6-v2-tokenizer" do
|
|
61
61
|
url "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json"
|
|
62
|
-
sha256 "
|
|
62
|
+
sha256 "be50c3628f2bf5bb5e3a7f17b1f74611b2561a3a27eeab05e5aa30f411572037"
|
|
63
63
|
end
|
|
64
64
|
|
|
65
65
|
resource "t5-small-encoder" do
|
|
@@ -88,7 +88,7 @@ class Libravdbd < Formula
|
|
|
88
88
|
end
|
|
89
89
|
|
|
90
90
|
resource "provision" do
|
|
91
|
-
url "https://github.com/xDarkicex/openclaw-memory-libravdb/releases/download/
|
|
91
|
+
url "https://github.com/xDarkicex/openclaw-memory-libravdb/releases/download/v__VERSION__/provision.sh"
|
|
92
92
|
sha256 "__SHA256_PROVISION__"
|
|
93
93
|
end
|
|
94
94
|
|
|
@@ -107,7 +107,13 @@ class Libravdbd < Formula
|
|
|
107
107
|
t5_dir.mkpath
|
|
108
108
|
|
|
109
109
|
resource("onnxruntime").stage do
|
|
110
|
-
|
|
110
|
+
# Homebrew may auto-strip the top-level dir from the tgz
|
|
111
|
+
subdir = Dir["onnxruntime-*"].first
|
|
112
|
+
if subdir
|
|
113
|
+
cp_r "#{subdir}/.", runtime_dir
|
|
114
|
+
else
|
|
115
|
+
cp_r ".", runtime_dir
|
|
116
|
+
end
|
|
111
117
|
end
|
|
112
118
|
|
|
113
119
|
resource("nomic-embed-text-v1.5-model").stage do
|
|
@@ -143,12 +149,14 @@ class Libravdbd < Formula
|
|
|
143
149
|
end
|
|
144
150
|
write_summarizer_manifest(t5_dir, "t5-small")
|
|
145
151
|
|
|
146
|
-
|
|
152
|
+
resource("provision").stage do
|
|
153
|
+
libexec.install "provision.sh"
|
|
154
|
+
end
|
|
147
155
|
chmod 0755, libexec/"provision.sh"
|
|
148
156
|
end
|
|
149
157
|
|
|
150
158
|
def post_install
|
|
151
|
-
(var/"clawdb
|
|
159
|
+
(var/"clawdb").mkpath
|
|
152
160
|
(var/"clawdb/run").mkpath
|
|
153
161
|
end
|
|
154
162
|
|
|
@@ -160,7 +168,8 @@ class Libravdbd < Formula
|
|
|
160
168
|
|
|
161
169
|
#{libexec}/provision.sh --target #{prefix}/models
|
|
162
170
|
|
|
163
|
-
Data directory: #{var}/clawdb
|
|
171
|
+
Data directory: #{var}/clawdb
|
|
172
|
+
Database file: #{var}/clawdb/data.libravdb
|
|
164
173
|
Socket directory: #{var}/clawdb/run
|
|
165
174
|
EOS
|
|
166
175
|
end
|
|
@@ -202,7 +211,8 @@ class Libravdbd < Formula
|
|
|
202
211
|
service do
|
|
203
212
|
run [opt_bin/"libravdbd", "serve"]
|
|
204
213
|
environment_variables LIBRAVDB_RPC_ENDPOINT: "unix:#{var}/clawdb/run/libravdb.sock",
|
|
205
|
-
LIBRAVDB_DB_PATH: "#{var}/clawdb/data",
|
|
214
|
+
LIBRAVDB_DB_PATH: "#{var}/clawdb/data.libravdb",
|
|
215
|
+
LIBRAVDB_ONNX_RUNTIME: "#{opt_prefix}/models/onnxruntime/lib/libonnxruntime.dylib",
|
|
206
216
|
LIBRAVDB_SUMMARIZER_BACKEND: "bundled"
|
|
207
217
|
keep_alive true
|
|
208
218
|
working_dir var/"clawdb"
|
package/src/context-engine.ts
CHANGED
|
@@ -7,6 +7,7 @@ import {
|
|
|
7
7
|
import {
|
|
8
8
|
detectRetrievalFailure,
|
|
9
9
|
expandSection7HopCandidates,
|
|
10
|
+
rankRawUserRecoveryCandidates,
|
|
10
11
|
mergeSection7VariantCandidates,
|
|
11
12
|
rankSection7VariantCandidates,
|
|
12
13
|
} from "./scoring.js";
|
|
@@ -179,6 +180,7 @@ export function buildContextEngineFactory(
|
|
|
179
180
|
},
|
|
180
181
|
async assemble({ sessionId, userId, messages, tokenBudget }: ContextAssembleArgs) {
|
|
181
182
|
const PROFILE = process.env.OPENCLAW_PROFILE_ASSEMBLE === "1";
|
|
183
|
+
const DEBUG_RECOVERY = process.env.LONGMEMEVAL_DEBUG_RANKING === "1";
|
|
182
184
|
|
|
183
185
|
const queryText = messages.at(-1)?.content ?? "";
|
|
184
186
|
if (!queryText) {
|
|
@@ -256,6 +258,7 @@ export function buildContextEngineFactory(
|
|
|
256
258
|
messages,
|
|
257
259
|
tokenBudget,
|
|
258
260
|
profiler,
|
|
261
|
+
debugRecovery: DEBUG_RECOVERY,
|
|
259
262
|
});
|
|
260
263
|
|
|
261
264
|
const profileLines = profiler?.lines() ?? [];
|
|
@@ -289,6 +292,7 @@ export function buildContextEngineFactory(
|
|
|
289
292
|
messages,
|
|
290
293
|
tokenBudget,
|
|
291
294
|
profiler,
|
|
295
|
+
debugRecovery,
|
|
292
296
|
}: {
|
|
293
297
|
rpc: Awaited<ReturnType<RpcGetter>>;
|
|
294
298
|
cfg: PluginConfig;
|
|
@@ -304,6 +308,7 @@ export function buildContextEngineFactory(
|
|
|
304
308
|
messages: Array<{ role: string; content: string }>;
|
|
305
309
|
tokenBudget: number;
|
|
306
310
|
profiler: { mark(label: string): void; emit(): void } | null;
|
|
311
|
+
debugRecovery: boolean;
|
|
307
312
|
}): Promise<ContextAssembleResult> {
|
|
308
313
|
const memoryBudget = tokenBudget * (cfg.tokenBudgetFraction ?? 0.25);
|
|
309
314
|
const hardItems = authoredHard;
|
|
@@ -517,7 +522,10 @@ export function buildContextEngineFactory(
|
|
|
517
522
|
minTopK: cfg.recoveryMinTopK ?? 4,
|
|
518
523
|
meanConfidenceThresh: cfg.recoveryMinConfidenceMean ?? 0.5,
|
|
519
524
|
});
|
|
520
|
-
const
|
|
525
|
+
const crossSessionRawRecovery =
|
|
526
|
+
rawSessionTurns.length === 0 &&
|
|
527
|
+
sessionHits.results.length === 0;
|
|
528
|
+
const recoveryReserveTokens = (recoveryTrigger.fire || crossSessionRawRecovery)
|
|
521
529
|
? Math.min(memoryBudget, Math.max(Math.floor(memoryBudget * 0.10), 16), 128)
|
|
522
530
|
: 0;
|
|
523
531
|
const elevatedGuidanceBudget = Math.max(
|
|
@@ -553,26 +561,83 @@ export function buildContextEngineFactory(
|
|
|
553
561
|
// Recovery is a policy overlay — it appends raw content only when triggered,
|
|
554
562
|
// it never modifies the C_total(q) output and does not spend from tau_V.
|
|
555
563
|
let recoveryItems: SearchResult[] = [];
|
|
556
|
-
|
|
564
|
+
let rawUserRecoveryDebug: NonNullable<NonNullable<ContextAssembleResult["_debug"]>["rawUserRecoveryCandidates"]> = [];
|
|
565
|
+
if (recoveryTrigger.fire || crossSessionRawRecovery) {
|
|
557
566
|
profiler?.mark("recovery_expand");
|
|
558
|
-
// Recovery searches immutable raw history directly — never the active view, elevated shards,
|
|
559
|
-
// or authored collections. Raw turns are immutable (storage axiom, unchanged).
|
|
560
567
|
const recoveryExcludeIDs = [...excluded, ...recentTailIDs, ...theoremSelectedIDs];
|
|
561
|
-
const
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
568
|
+
const recoveryCandidates: SearchResult[] = [];
|
|
569
|
+
|
|
570
|
+
if (recoveryTrigger.fire) {
|
|
571
|
+
// Recovery searches immutable raw session history directly — never the active view,
|
|
572
|
+
// elevated shards, or authored collections.
|
|
573
|
+
const rawResults = await rpc.call<{ results: SearchResult[] }>("query_raw_session", {
|
|
574
|
+
sessionId,
|
|
575
|
+
text: queryText,
|
|
576
|
+
k: Math.max(cfg.topK ?? 8, 4),
|
|
577
|
+
excludeIds: recoveryExcludeIDs,
|
|
578
|
+
});
|
|
579
|
+
recoveryCandidates.push(
|
|
580
|
+
...(rawResults.results ?? []).map((item) => ({
|
|
581
|
+
...item,
|
|
582
|
+
finalScore: typeof item.finalScore === "number" ? item.finalScore : item.score,
|
|
583
|
+
metadata: {
|
|
584
|
+
...item.metadata,
|
|
585
|
+
recovery_fallback: true,
|
|
586
|
+
recovery_scope: "session_raw",
|
|
587
|
+
},
|
|
588
|
+
})),
|
|
589
|
+
);
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
if (crossSessionRawRecovery) {
|
|
593
|
+
// When a fresh query session has no searchable history yet, durable memory can be too
|
|
594
|
+
// coarse for exact-turn recall. Search the immutable per-user raw turn index instead of
|
|
595
|
+
// widening topK so precise historical turns still have a bounded path back into context.
|
|
596
|
+
const rawUserResults = await rpc.call<{ results: SearchResult[] }>("search_text", {
|
|
597
|
+
collection: `turns:${userId}`,
|
|
598
|
+
text: queryText,
|
|
599
|
+
k: Math.max((cfg.topK ?? 8) * 4, 8),
|
|
600
|
+
excludeIds: recoveryExcludeIDs,
|
|
601
|
+
});
|
|
602
|
+
const reranked = rankRawUserRecoveryCandidates(
|
|
603
|
+
annotateCollection(rawUserResults.results ?? [], `turns:${userId}`),
|
|
604
|
+
{ queryText },
|
|
605
|
+
);
|
|
606
|
+
if (debugRecovery) {
|
|
607
|
+
rawUserRecoveryDebug = reranked.debug.slice(0, 8).map((item) => ({
|
|
608
|
+
...item,
|
|
609
|
+
selected: false,
|
|
610
|
+
}));
|
|
611
|
+
}
|
|
612
|
+
recoveryCandidates.push(
|
|
613
|
+
...reranked.ranked.map((item) => ({
|
|
614
|
+
...item,
|
|
615
|
+
finalScore: typeof item.finalScore === "number" ? item.finalScore : item.score,
|
|
616
|
+
metadata: {
|
|
617
|
+
...item.metadata,
|
|
618
|
+
recovery_fallback: true,
|
|
619
|
+
recovery_scope: "user_turns",
|
|
620
|
+
},
|
|
621
|
+
})),
|
|
622
|
+
);
|
|
623
|
+
}
|
|
624
|
+
|
|
625
|
+
const fittedRecovery = fitPromptBudget(
|
|
626
|
+
dedupeRecoveryCandidates(recoveryCandidates),
|
|
627
|
+
recoveryReserveTokens,
|
|
628
|
+
);
|
|
629
|
+
recoveryItems = fittedRecovery;
|
|
630
|
+
if (debugRecovery && rawUserRecoveryDebug.length > 0) {
|
|
631
|
+
const selectedIDs = new Set(
|
|
632
|
+
fittedRecovery
|
|
633
|
+
.filter((item) => item.metadata.recovery_scope === "user_turns")
|
|
634
|
+
.map((item: SearchResult) => item.id),
|
|
635
|
+
);
|
|
636
|
+
rawUserRecoveryDebug = rawUserRecoveryDebug.map((item) => ({
|
|
637
|
+
...item,
|
|
638
|
+
selected: selectedIDs.has(item.id),
|
|
639
|
+
}));
|
|
640
|
+
}
|
|
576
641
|
}
|
|
577
642
|
|
|
578
643
|
const selected = [
|
|
@@ -598,6 +663,13 @@ export function buildContextEngineFactory(
|
|
|
598
663
|
messages: [...selectedMessages, ...messages],
|
|
599
664
|
estimatedTokens: countTokens(selectedMessages) + countTokens(messages),
|
|
600
665
|
systemPromptAddition: buildMemoryHeader(selected),
|
|
666
|
+
_debug: debugRecovery
|
|
667
|
+
? {
|
|
668
|
+
recoveryTriggerFired: recoveryTrigger.fire,
|
|
669
|
+
crossSessionRawRecovery,
|
|
670
|
+
rawUserRecoveryCandidates: rawUserRecoveryDebug,
|
|
671
|
+
}
|
|
672
|
+
: undefined,
|
|
601
673
|
};
|
|
602
674
|
},
|
|
603
675
|
async compact({ sessionId, force, targetSize }: ContextCompactArgs) {
|
|
@@ -836,6 +908,19 @@ function groupAccessCountUpdates(items: SearchResult[]): Array<{ collection: str
|
|
|
836
908
|
return [...grouped.entries()].map(([collection, ids]) => ({ collection, ids }));
|
|
837
909
|
}
|
|
838
910
|
|
|
911
|
+
function dedupeRecoveryCandidates(items: SearchResult[]): SearchResult[] {
|
|
912
|
+
const byKey = new Map<string, SearchResult>();
|
|
913
|
+
for (const item of items) {
|
|
914
|
+
const collection = typeof item.metadata.collection === "string" ? item.metadata.collection : "";
|
|
915
|
+
const key = `${collection}::${item.id}`;
|
|
916
|
+
const existing = byKey.get(key);
|
|
917
|
+
if (!existing || (item.finalScore ?? item.score) > (existing.finalScore ?? existing.score)) {
|
|
918
|
+
byKey.set(key, item);
|
|
919
|
+
}
|
|
920
|
+
}
|
|
921
|
+
return [...byKey.values()].sort((left, right) => (right.finalScore ?? right.score) - (left.finalScore ?? left.score));
|
|
922
|
+
}
|
|
923
|
+
|
|
839
924
|
function clampFraction(value: number | undefined): number {
|
|
840
925
|
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
841
926
|
return 0;
|
package/src/scoring.ts
CHANGED
|
@@ -32,6 +32,22 @@ interface HopOptions {
|
|
|
32
32
|
thetaHop?: number;
|
|
33
33
|
}
|
|
34
34
|
|
|
35
|
+
interface RawUserRecoveryOptions {
|
|
36
|
+
queryText: string;
|
|
37
|
+
nowMs?: number;
|
|
38
|
+
recencyLambda?: number;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface RawUserRecoveryDebugCandidate {
|
|
42
|
+
id: string;
|
|
43
|
+
text: string;
|
|
44
|
+
semanticScore: number;
|
|
45
|
+
lexicalCoverage: number;
|
|
46
|
+
recencyScore: number;
|
|
47
|
+
finalScore: number;
|
|
48
|
+
rationale: string;
|
|
49
|
+
}
|
|
50
|
+
|
|
35
51
|
interface ExpansionOptions {
|
|
36
52
|
confidenceThreshold?: number;
|
|
37
53
|
maxDepth?: number;
|
|
@@ -296,6 +312,61 @@ export function expandSection7HopCandidates(
|
|
|
296
312
|
.sort((left, right) => (right.finalScore ?? 0) - (left.finalScore ?? 0));
|
|
297
313
|
}
|
|
298
314
|
|
|
315
|
+
export function rankRawUserRecoveryCandidates(
|
|
316
|
+
items: SearchResult[],
|
|
317
|
+
opts: RawUserRecoveryOptions,
|
|
318
|
+
): { ranked: SearchResult[]; debug: RawUserRecoveryDebugCandidate[] } {
|
|
319
|
+
const now = opts.nowMs ?? Date.now();
|
|
320
|
+
const recencyLambda = Math.max(0, opts.recencyLambda ?? 0.00001);
|
|
321
|
+
const keywords = extractKeywords(opts.queryText);
|
|
322
|
+
|
|
323
|
+
const ranked = items
|
|
324
|
+
.map((item) => {
|
|
325
|
+
const semanticScore = clamp01(typeof item.score === "number" ? item.score : 0);
|
|
326
|
+
const lexicalCoverage = normalizedKeywordCoverage(keywords, item.text);
|
|
327
|
+
const recencyScore = computeRecencyScore(item, now, recencyLambda);
|
|
328
|
+
const finalScore = clamp01((0.30 * semanticScore) + (0.60 * lexicalCoverage) + (0.10 * recencyScore));
|
|
329
|
+
const rationale = buildRawUserRecoveryRationale({
|
|
330
|
+
semanticScore,
|
|
331
|
+
lexicalCoverage,
|
|
332
|
+
recencyScore,
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
return {
|
|
336
|
+
ranked: {
|
|
337
|
+
...item,
|
|
338
|
+
finalScore,
|
|
339
|
+
},
|
|
340
|
+
debug: {
|
|
341
|
+
id: item.id,
|
|
342
|
+
text: item.text,
|
|
343
|
+
semanticScore,
|
|
344
|
+
lexicalCoverage,
|
|
345
|
+
recencyScore,
|
|
346
|
+
finalScore,
|
|
347
|
+
rationale,
|
|
348
|
+
},
|
|
349
|
+
};
|
|
350
|
+
})
|
|
351
|
+
.sort((left, right) => {
|
|
352
|
+
if (right.ranked.finalScore !== left.ranked.finalScore) {
|
|
353
|
+
return (right.ranked.finalScore ?? 0) - (left.ranked.finalScore ?? 0);
|
|
354
|
+
}
|
|
355
|
+
if (right.debug.lexicalCoverage !== left.debug.lexicalCoverage) {
|
|
356
|
+
return right.debug.lexicalCoverage - left.debug.lexicalCoverage;
|
|
357
|
+
}
|
|
358
|
+
if (right.debug.semanticScore !== left.debug.semanticScore) {
|
|
359
|
+
return right.debug.semanticScore - left.debug.semanticScore;
|
|
360
|
+
}
|
|
361
|
+
return left.ranked.id.localeCompare(right.ranked.id);
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
return {
|
|
365
|
+
ranked: ranked.map((entry) => entry.ranked),
|
|
366
|
+
debug: ranked.map((entry) => entry.debug),
|
|
367
|
+
};
|
|
368
|
+
}
|
|
369
|
+
|
|
299
370
|
function clamp01(value: number): number {
|
|
300
371
|
return Math.min(1, Math.max(0, value));
|
|
301
372
|
}
|
|
@@ -392,6 +463,30 @@ function normalizedFrequency(accessCount: number, maxAccessCount: number): numbe
|
|
|
392
463
|
return Math.log(1 + accessCount) / Math.log(1 + maxAccessCount + 1);
|
|
393
464
|
}
|
|
394
465
|
|
|
466
|
+
function computeRecencyScore(item: SearchResult, now: number, recencyLambda: number): number {
|
|
467
|
+
const ts = typeof item.metadata.ts === "number" ? item.metadata.ts : now;
|
|
468
|
+
const ageSeconds = Math.max(0, now - ts) / 1000;
|
|
469
|
+
return Math.exp(-recencyLambda * ageSeconds);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
function buildRawUserRecoveryRationale(scores: {
|
|
473
|
+
semanticScore: number;
|
|
474
|
+
lexicalCoverage: number;
|
|
475
|
+
recencyScore: number;
|
|
476
|
+
}): string {
|
|
477
|
+
const lexicalDelta = scores.lexicalCoverage - scores.semanticScore;
|
|
478
|
+
if (lexicalDelta > 0.15) {
|
|
479
|
+
return "lexical coverage lifted this candidate above its semantic score";
|
|
480
|
+
}
|
|
481
|
+
if (lexicalDelta < -0.15) {
|
|
482
|
+
return "semantic similarity carried this candidate despite weaker lexical coverage";
|
|
483
|
+
}
|
|
484
|
+
if (scores.recencyScore > 0.9) {
|
|
485
|
+
return "semantic and lexical scores were close; recency broke the tie";
|
|
486
|
+
}
|
|
487
|
+
return "semantic and lexical scores were balanced";
|
|
488
|
+
}
|
|
489
|
+
|
|
395
490
|
function extractKeywords(text: string): string[] {
|
|
396
491
|
const tokens = normalizeTerms(text);
|
|
397
492
|
const seen = new Set<string>();
|
package/src/types.ts
CHANGED
|
@@ -196,6 +196,20 @@ export interface ContextAssembleResult {
|
|
|
196
196
|
estimatedTokens: number;
|
|
197
197
|
systemPromptAddition: string;
|
|
198
198
|
_profile?: string[];
|
|
199
|
+
_debug?: {
|
|
200
|
+
recoveryTriggerFired?: boolean;
|
|
201
|
+
crossSessionRawRecovery?: boolean;
|
|
202
|
+
rawUserRecoveryCandidates?: Array<{
|
|
203
|
+
id: string;
|
|
204
|
+
text: string;
|
|
205
|
+
selected: boolean;
|
|
206
|
+
semanticScore: number;
|
|
207
|
+
lexicalCoverage: number;
|
|
208
|
+
recencyScore: number;
|
|
209
|
+
finalScore: number;
|
|
210
|
+
rationale: string;
|
|
211
|
+
}>;
|
|
212
|
+
};
|
|
199
213
|
}
|
|
200
214
|
|
|
201
215
|
export interface ContextCompactArgs {
|