npm - @xdarkicex/openclaw-memory-libravdb - Versions diffs - 1.3.17 → 1.3.19 - Mend

@xdarkicex/openclaw-memory-libravdb 1.3.17 → 1.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/README.md +53 -0
package/package.json +4 -1
package/packaging/homebrew/libravdbd.rb.tmpl +21 -11
package/src/context-engine.ts +104 -19
package/src/scoring.ts +95 -0
package/src/types.ts +14 -0

package/README.md CHANGED Viewed

@@ -185,6 +185,59 @@ The formal math lives in:
 - [docs/ast-v2.md](./docs/ast-v2.md)
 - [docs/elevated-guidance.md](./docs/elevated-guidance.md)
+## LongMemEval Harness
+For internal tuning, the repo includes a local LongMemEval harness that runs the
+dataset through the plugin layer and measures whether the assembled prompt still
+contains the evidence turns.
+The benchmark runner is committed, but the dataset and generated reports are not.
+Keep downloaded data and local outputs under `benchmarks/longmemeval/`, which is
+ignored by default.
+The harness writes JSONL incrementally, so partial results survive if a transient
+daemon failure interrupts a long run.
+The run summary now prints a compact table with total questions, processed rows,
+skipped abstentions, errors, session hit rate, turn hit rate, and average prompt
+size.
+Run it with:
+```bash
+LONGMEMEVAL_DATA_FILE=/path/to/longmemeval_oracle.json pnpm run benchmark:longmemeval
+```
+If you already have a daemon running and do not want the benchmark to spawn
+another one, set:
+```bash
+LONGMEMEVAL_USE_EXISTING_DAEMON=1 LONGMEMEVAL_SIDECAR_PATH=unix:/path/to/libravdb.sock
+```
+If the local test daemon drops mid-run, the benchmark will restart it and retry
+the current instance once before recording an error result.
+Optional outputs:
+- `LONGMEMEVAL_LIMIT` to cap the number of questions
+- `LONGMEMEVAL_TOPK` to change the search budget
+- `LONGMEMEVAL_OUT_FILE` to write JSONL records for analysis
+To score a hypothesis JSONL file with the official LongMemEval evaluator, point
+the repo at a local checkout of the benchmark and run:
+```bash
+LONGMEMEVAL_EVAL_REPO=/path/to/LongMemEval \
+LONGMEMEVAL_HYPOTHESIS_FILE=/path/to/hypotheses.jsonl \
+LONGMEMEVAL_DATA_FILE=/path/to/longmemeval_oracle.json \
+OPENAI_API_KEY=... \
+pnpm run benchmark:longmemeval:score
+```
+That scorer wrapper shells out to the official Python evaluation script and then
+prints the aggregate metrics from the generated log when available.
 ## Compaction Model
 This system does not treat long chats as append-only forever.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@xdarkicex/openclaw-memory-libravdb",
-  "version": "1.3.17",
+  "version": "1.3.19",
   "type": "module",
   "publishConfig": {
     "access": "public"
@@ -31,6 +31,9 @@
     "gate:assemble_optimization": "./.ts-toolchain/node_modules/.bin/tsc -p tsconfig.tests.json && OPENCLAW_PROFILE_ASSEMBLE=1 OPENCLAW_ENFORCE_ASSEMBLE_EVIDENCE_GATE=1 node --test --test-name-pattern=\"real sidecar mid-sized session search benchmark\" .ts-build/test/integration/host-flow.test.js",
     "probe:session_recall": "./.ts-toolchain/node_modules/.bin/tsc -p tsconfig.tests.json && OPENCLAW_PROFILE_ASSEMBLE=1 node --test --test-name-pattern=\"real sidecar mid-sized session search benchmark\" .ts-build/test/integration/host-flow.test.js",
     "probe:session_recall_threshold": "./.ts-toolchain/node_modules/.bin/tsc -p tsconfig.tests.json && OPENCLAW_PROFILE_ASSEMBLE=1 node --test --test-name-pattern=\"real sidecar session_recall index threshold probe\" .ts-build/test/integration/host-flow.test.js",
+    "benchmark:longmemeval": "./.ts-toolchain/node_modules/.bin/tsc -p tsconfig.tests.json && node --test .ts-build/test/integration/longmemeval-benchmark.test.js",
+    "benchmark:longmemeval:score": "node scripts/longmemeval-score.mjs",
+    "benchmark:longmemeval:diagnose": "node scripts/longmemeval-diagnose.mjs",
     "build:daemon": "bash scripts/build-daemon.sh"
   },
   "dependencies": {

package/packaging/homebrew/libravdbd.rb.tmpl CHANGED Viewed

@@ -26,18 +26,18 @@ class Libravdbd < Formula
   if OS.mac?
     resource "onnxruntime" do
       url "https://github.com/microsoft/onnxruntime/releases/download/v1.23.0/onnxruntime-osx-universal2-1.23.0.tgz"
-      sha256 :no_check
+      sha256 "5e4365fb4a05aef353f6232b9a1848f37e608c421c9227e9224572205c0cfc08"
     end
   elsif OS.linux?
     if Hardware::CPU.arm?
       resource "onnxruntime" do
         url "https://github.com/microsoft/onnxruntime/releases/download/v1.23.0/onnxruntime-linux-aarch64-1.23.0.tgz"
-        sha256 :no_check
+        sha256 "0b9f47d140411d938e47915824d8daaa424df95a88b5f1fc843172a75168f7a0"
       end
     else
       resource "onnxruntime" do
         url "https://github.com/microsoft/onnxruntime/releases/download/v1.23.0/onnxruntime-linux-x64-1.23.0.tgz"
-        sha256 :no_check
+        sha256 "b6deea7f2e22c10c043019f294a0ea4d2a6c0ae52a009c34847640db75ec5580"
       end
     end
   end
@@ -54,12 +54,12 @@ class Libravdbd < Formula
   resource "all-minilm-l6-v2-model" do
     url "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/onnx/model.onnx"
-    sha256 "759c3cd2b7fe7e93933ad23c4c9181b7396442a2ed746ec7c1d46192c469c46e"
+    sha256 "6fd5d72fe4589f189f8ebc006442dbb529bb7ce38f8082112682524616046452"
   end
   resource "all-minilm-l6-v2-tokenizer" do
     url "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/tokenizer.json"
-    sha256 "da0e79933b9ed51798a3ae27893d3c5fa4a201126cef75586296df9b4d2c62a0"
+    sha256 "be50c3628f2bf5bb5e3a7f17b1f74611b2561a3a27eeab05e5aa30f411572037"
   end
   resource "t5-small-encoder" do
@@ -88,7 +88,7 @@ class Libravdbd < Formula
   end
   resource "provision" do
-    url "https://github.com/xDarkicex/openclaw-memory-libravdb/releases/download/v#{version}/provision.sh"
+    url "https://github.com/xDarkicex/openclaw-memory-libravdb/releases/download/v__VERSION__/provision.sh"
     sha256 "__SHA256_PROVISION__"
   end
@@ -107,7 +107,13 @@ class Libravdbd < Formula
     t5_dir.mkpath
     resource("onnxruntime").stage do
-      cp_r Dir["onnxruntime-*"].first, runtime_dir
+      # Homebrew may auto-strip the top-level dir from the tgz
+      subdir = Dir["onnxruntime-*"].first
+      if subdir
+        cp_r "#{subdir}/.", runtime_dir
+      else
+        cp_r ".", runtime_dir
+      end
     end
     resource("nomic-embed-text-v1.5-model").stage do
@@ -143,12 +149,14 @@ class Libravdbd < Formula
     end
     write_summarizer_manifest(t5_dir, "t5-small")
-    libexec.install resource("provision")
+    resource("provision").stage do
+      libexec.install "provision.sh"
+    end
     chmod 0755, libexec/"provision.sh"
   end
   def post_install
-    (var/"clawdb/data").mkpath
+    (var/"clawdb").mkpath
     (var/"clawdb/run").mkpath
   end
@@ -160,7 +168,8 @@ class Libravdbd < Formula
         #{libexec}/provision.sh --target #{prefix}/models
-      Data directory:   #{var}/clawdb/data
+      Data directory:   #{var}/clawdb
+      Database file:    #{var}/clawdb/data.libravdb
       Socket directory: #{var}/clawdb/run
     EOS
   end
@@ -202,7 +211,8 @@ class Libravdbd < Formula
   service do
     run [opt_bin/"libravdbd", "serve"]
     environment_variables LIBRAVDB_RPC_ENDPOINT: "unix:#{var}/clawdb/run/libravdb.sock",
-                          LIBRAVDB_DB_PATH: "#{var}/clawdb/data",
+                          LIBRAVDB_DB_PATH: "#{var}/clawdb/data.libravdb",
+                          LIBRAVDB_ONNX_RUNTIME: "#{opt_prefix}/models/onnxruntime/lib/libonnxruntime.dylib",
                           LIBRAVDB_SUMMARIZER_BACKEND: "bundled"
     keep_alive true
     working_dir var/"clawdb"

package/src/context-engine.ts CHANGED Viewed

@@ -7,6 +7,7 @@ import {
 import {
   detectRetrievalFailure,
   expandSection7HopCandidates,
+  rankRawUserRecoveryCandidates,
   mergeSection7VariantCandidates,
   rankSection7VariantCandidates,
 } from "./scoring.js";
@@ -179,6 +180,7 @@ export function buildContextEngineFactory(
     },
     async assemble({ sessionId, userId, messages, tokenBudget }: ContextAssembleArgs) {
       const PROFILE = process.env.OPENCLAW_PROFILE_ASSEMBLE === "1";
+      const DEBUG_RECOVERY = process.env.LONGMEMEVAL_DEBUG_RANKING === "1";
       const queryText = messages.at(-1)?.content ?? "";
       if (!queryText) {
@@ -256,6 +258,7 @@ export function buildContextEngineFactory(
           messages,
           tokenBudget,
           profiler,
+          debugRecovery: DEBUG_RECOVERY,
         });
         const profileLines = profiler?.lines() ?? [];
@@ -289,6 +292,7 @@ export function buildContextEngineFactory(
       messages,
       tokenBudget,
       profiler,
+      debugRecovery,
     }: {
       rpc: Awaited<ReturnType<RpcGetter>>;
       cfg: PluginConfig;
@@ -304,6 +308,7 @@ export function buildContextEngineFactory(
       messages: Array<{ role: string; content: string }>;
       tokenBudget: number;
       profiler: { mark(label: string): void; emit(): void } | null;
+      debugRecovery: boolean;
     }): Promise<ContextAssembleResult> {
       const memoryBudget = tokenBudget * (cfg.tokenBudgetFraction ?? 0.25);
       const hardItems = authoredHard;
@@ -517,7 +522,10 @@ export function buildContextEngineFactory(
         minTopK: cfg.recoveryMinTopK ?? 4,
         meanConfidenceThresh: cfg.recoveryMinConfidenceMean ?? 0.5,
       });
-      const recoveryReserveTokens = recoveryTrigger.fire
+      const crossSessionRawRecovery =
+        rawSessionTurns.length === 0 &&
+        sessionHits.results.length === 0;
+      const recoveryReserveTokens = (recoveryTrigger.fire || crossSessionRawRecovery)
         ? Math.min(memoryBudget, Math.max(Math.floor(memoryBudget * 0.10), 16), 128)
         : 0;
       const elevatedGuidanceBudget = Math.max(
@@ -553,26 +561,83 @@ export function buildContextEngineFactory(
       // Recovery is a policy overlay — it appends raw content only when triggered,
       // it never modifies the C_total(q) output and does not spend from tau_V.
       let recoveryItems: SearchResult[] = [];
-      if (recoveryTrigger.fire) {
+      let rawUserRecoveryDebug: NonNullable<NonNullable<ContextAssembleResult["_debug"]>["rawUserRecoveryCandidates"]> = [];
+      if (recoveryTrigger.fire || crossSessionRawRecovery) {
         profiler?.mark("recovery_expand");
-        // Recovery searches immutable raw history directly — never the active view, elevated shards,
-        // or authored collections. Raw turns are immutable (storage axiom, unchanged).
         const recoveryExcludeIDs = [...excluded, ...recentTailIDs, ...theoremSelectedIDs];
-        const rawResults = await rpc.call<{ results: SearchResult[] }>("query_raw_session", {
-          sessionId,
-          text: queryText,
-          k: Math.max(cfg.topK ?? 8, 4),
-          excludeIds: recoveryExcludeIDs,
-        });
-        // Fit recovered raw items to the reserved recovery budget — never exceed it.
-        const fittedRecovery = fitPromptBudget(rawResults.results ?? [], recoveryReserveTokens);
-        recoveryItems = fittedRecovery.map((item: SearchResult) => ({
-          ...item,
-          metadata: {
-            ...item.metadata,
-            recovery_fallback: true,
-          },
-        }));
+        const recoveryCandidates: SearchResult[] = [];
+        if (recoveryTrigger.fire) {
+          // Recovery searches immutable raw session history directly — never the active view,
+          // elevated shards, or authored collections.
+          const rawResults = await rpc.call<{ results: SearchResult[] }>("query_raw_session", {
+            sessionId,
+            text: queryText,
+            k: Math.max(cfg.topK ?? 8, 4),
+            excludeIds: recoveryExcludeIDs,
+          });
+          recoveryCandidates.push(
+            ...(rawResults.results ?? []).map((item) => ({
+              ...item,
+              finalScore: typeof item.finalScore === "number" ? item.finalScore : item.score,
+              metadata: {
+                ...item.metadata,
+                recovery_fallback: true,
+                recovery_scope: "session_raw",
+              },
+            })),
+          );
+        }
+        if (crossSessionRawRecovery) {
+          // When a fresh query session has no searchable history yet, durable memory can be too
+          // coarse for exact-turn recall. Search the immutable per-user raw turn index instead of
+          // widening topK so precise historical turns still have a bounded path back into context.
+          const rawUserResults = await rpc.call<{ results: SearchResult[] }>("search_text", {
+            collection: `turns:${userId}`,
+            text: queryText,
+            k: Math.max((cfg.topK ?? 8) * 4, 8),
+            excludeIds: recoveryExcludeIDs,
+          });
+          const reranked = rankRawUserRecoveryCandidates(
+            annotateCollection(rawUserResults.results ?? [], `turns:${userId}`),
+            { queryText },
+          );
+          if (debugRecovery) {
+            rawUserRecoveryDebug = reranked.debug.slice(0, 8).map((item) => ({
+              ...item,
+              selected: false,
+            }));
+          }
+          recoveryCandidates.push(
+            ...reranked.ranked.map((item) => ({
+              ...item,
+              finalScore: typeof item.finalScore === "number" ? item.finalScore : item.score,
+              metadata: {
+                ...item.metadata,
+                recovery_fallback: true,
+                recovery_scope: "user_turns",
+              },
+            })),
+          );
+        }
+        const fittedRecovery = fitPromptBudget(
+          dedupeRecoveryCandidates(recoveryCandidates),
+          recoveryReserveTokens,
+        );
+        recoveryItems = fittedRecovery;
+        if (debugRecovery && rawUserRecoveryDebug.length > 0) {
+          const selectedIDs = new Set(
+            fittedRecovery
+              .filter((item) => item.metadata.recovery_scope === "user_turns")
+              .map((item: SearchResult) => item.id),
+          );
+          rawUserRecoveryDebug = rawUserRecoveryDebug.map((item) => ({
+            ...item,
+            selected: selectedIDs.has(item.id),
+          }));
+        }
       }
       const selected = [
@@ -598,6 +663,13 @@ export function buildContextEngineFactory(
         messages: [...selectedMessages, ...messages],
         estimatedTokens: countTokens(selectedMessages) + countTokens(messages),
         systemPromptAddition: buildMemoryHeader(selected),
+        _debug: debugRecovery
+          ? {
+              recoveryTriggerFired: recoveryTrigger.fire,
+              crossSessionRawRecovery,
+              rawUserRecoveryCandidates: rawUserRecoveryDebug,
+            }
+          : undefined,
       };
     },
     async compact({ sessionId, force, targetSize }: ContextCompactArgs) {
@@ -836,6 +908,19 @@ function groupAccessCountUpdates(items: SearchResult[]): Array<{ collection: str
   return [...grouped.entries()].map(([collection, ids]) => ({ collection, ids }));
 }
+function dedupeRecoveryCandidates(items: SearchResult[]): SearchResult[] {
+  const byKey = new Map<string, SearchResult>();
+  for (const item of items) {
+    const collection = typeof item.metadata.collection === "string" ? item.metadata.collection : "";
+    const key = `${collection}::${item.id}`;
+    const existing = byKey.get(key);
+    if (!existing || (item.finalScore ?? item.score) > (existing.finalScore ?? existing.score)) {
+      byKey.set(key, item);
+    }
+  }
+  return [...byKey.values()].sort((left, right) => (right.finalScore ?? right.score) - (left.finalScore ?? left.score));
+}
 function clampFraction(value: number | undefined): number {
   if (typeof value !== "number" || !Number.isFinite(value)) {
     return 0;

package/src/scoring.ts CHANGED Viewed

@@ -32,6 +32,22 @@ interface HopOptions {
   thetaHop?: number;
 }
+interface RawUserRecoveryOptions {
+  queryText: string;
+  nowMs?: number;
+  recencyLambda?: number;
+}
+export interface RawUserRecoveryDebugCandidate {
+  id: string;
+  text: string;
+  semanticScore: number;
+  lexicalCoverage: number;
+  recencyScore: number;
+  finalScore: number;
+  rationale: string;
+}
 interface ExpansionOptions {
   confidenceThreshold?: number;
   maxDepth?: number;
@@ -296,6 +312,61 @@ export function expandSection7HopCandidates(
     .sort((left, right) => (right.finalScore ?? 0) - (left.finalScore ?? 0));
 }
+export function rankRawUserRecoveryCandidates(
+  items: SearchResult[],
+  opts: RawUserRecoveryOptions,
+): { ranked: SearchResult[]; debug: RawUserRecoveryDebugCandidate[] } {
+  const now = opts.nowMs ?? Date.now();
+  const recencyLambda = Math.max(0, opts.recencyLambda ?? 0.00001);
+  const keywords = extractKeywords(opts.queryText);
+  const ranked = items
+    .map((item) => {
+      const semanticScore = clamp01(typeof item.score === "number" ? item.score : 0);
+      const lexicalCoverage = normalizedKeywordCoverage(keywords, item.text);
+      const recencyScore = computeRecencyScore(item, now, recencyLambda);
+      const finalScore = clamp01((0.30 * semanticScore) + (0.60 * lexicalCoverage) + (0.10 * recencyScore));
+      const rationale = buildRawUserRecoveryRationale({
+        semanticScore,
+        lexicalCoverage,
+        recencyScore,
+      });
+      return {
+        ranked: {
+          ...item,
+          finalScore,
+        },
+        debug: {
+          id: item.id,
+          text: item.text,
+          semanticScore,
+          lexicalCoverage,
+          recencyScore,
+          finalScore,
+          rationale,
+        },
+      };
+    })
+    .sort((left, right) => {
+      if (right.ranked.finalScore !== left.ranked.finalScore) {
+        return (right.ranked.finalScore ?? 0) - (left.ranked.finalScore ?? 0);
+      }
+      if (right.debug.lexicalCoverage !== left.debug.lexicalCoverage) {
+        return right.debug.lexicalCoverage - left.debug.lexicalCoverage;
+      }
+      if (right.debug.semanticScore !== left.debug.semanticScore) {
+        return right.debug.semanticScore - left.debug.semanticScore;
+      }
+      return left.ranked.id.localeCompare(right.ranked.id);
+    });
+  return {
+    ranked: ranked.map((entry) => entry.ranked),
+    debug: ranked.map((entry) => entry.debug),
+  };
+}
 function clamp01(value: number): number {
   return Math.min(1, Math.max(0, value));
 }
@@ -392,6 +463,30 @@ function normalizedFrequency(accessCount: number, maxAccessCount: number): numbe
   return Math.log(1 + accessCount) / Math.log(1 + maxAccessCount + 1);
 }
+function computeRecencyScore(item: SearchResult, now: number, recencyLambda: number): number {
+  const ts = typeof item.metadata.ts === "number" ? item.metadata.ts : now;
+  const ageSeconds = Math.max(0, now - ts) / 1000;
+  return Math.exp(-recencyLambda * ageSeconds);
+}
+function buildRawUserRecoveryRationale(scores: {
+  semanticScore: number;
+  lexicalCoverage: number;
+  recencyScore: number;
+}): string {
+  const lexicalDelta = scores.lexicalCoverage - scores.semanticScore;
+  if (lexicalDelta > 0.15) {
+    return "lexical coverage lifted this candidate above its semantic score";
+  }
+  if (lexicalDelta < -0.15) {
+    return "semantic similarity carried this candidate despite weaker lexical coverage";
+  }
+  if (scores.recencyScore > 0.9) {
+    return "semantic and lexical scores were close; recency broke the tie";
+  }
+  return "semantic and lexical scores were balanced";
+}
 function extractKeywords(text: string): string[] {
   const tokens = normalizeTerms(text);
   const seen = new Set<string>();

package/src/types.ts CHANGED Viewed

@@ -196,6 +196,20 @@ export interface ContextAssembleResult {
   estimatedTokens: number;
   systemPromptAddition: string;
   _profile?: string[];
+  _debug?: {
+    recoveryTriggerFired?: boolean;
+    crossSessionRawRecovery?: boolean;
+    rawUserRecoveryCandidates?: Array<{
+      id: string;
+      text: string;
+      selected: boolean;
+      semanticScore: number;
+      lexicalCoverage: number;
+      recencyScore: number;
+      finalScore: number;
+      rationale: string;
+    }>;
+  };
 }
 export interface ContextCompactArgs {