npm - @totalreclaw/totalreclaw - Versions diffs - 1.5.0 → 3.0.6 - Mend

@totalreclaw/totalreclaw 1.5.0 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (40) hide show

package/CLAWHUB.md +134 -0
package/README.md +407 -64
package/SKILL.md +1032 -0
package/api-client.ts +5 -5
package/claims-helper.ts +686 -0
package/config.ts +211 -0
package/consolidation.ts +141 -33
package/contradiction-sync.ts +1389 -0
package/crypto.ts +63 -261
package/digest-sync.ts +516 -0
package/embedding.ts +69 -46
package/extractor.ts +1307 -84
package/hot-cache-wrapper.ts +1 -1
package/import-adapters/base-adapter.ts +4 -5
package/import-adapters/chatgpt-adapter.ts +323 -0
package/import-adapters/claude-adapter.ts +146 -0
package/import-adapters/gemini-adapter.ts +243 -0
package/import-adapters/index.ts +9 -0
package/import-adapters/mcp-memory-adapter.ts +4 -2
package/import-adapters/mem0-adapter.ts +2 -2
package/import-adapters/types.ts +25 -2
package/index.ts +2002 -319
package/llm-client.ts +106 -53
package/lsh.ts +21 -210
package/package.json +20 -7
package/pin.ts +502 -0
package/reranker.ts +96 -124
package/skill.json +213 -0
package/subgraph-search.ts +112 -5
package/subgraph-store.ts +559 -275
package/consolidation.test.ts +0 -356
package/extractor-dedup.test.ts +0 -168
package/import-adapters/import-adapters.test.ts +0 -595
package/lsh.test.ts +0 -463
package/pocv2-e2e-test.ts +0 -917
package/porter-stemmer.d.ts +0 -4
package/reranker.test.ts +0 -594
package/semantic-dedup.test.ts +0 -392
package/setup.sh +0 -19
package/store-dedup-wiring.test.ts +0 -186

package/reranker.ts CHANGED Viewed

@@ -4,18 +4,46 @@
  * Replaces the naive `textScore` word-overlap scorer with a proper ranking
  * pipeline:
  *   1. Okapi BM25 — term frequency / inverse document frequency
- *   2. Cosine similarity — between query and fact embeddings
+ *   2. Cosine similarity — between query and fact embeddings (WASM-backed)
  *   3. Importance — normalized importance score (0-1)
  *   4. Recency — time-decay with 1-week half-life
  *   5. Weighted RRF (Reciprocal Rank Fusion) — combines all ranking lists
  *   6. MMR (Maximal Marginal Relevance) — promotes diversity in results
  *
- * All functions are pure TypeScript with zero external dependencies (except
- * porter-stemmer for morphological normalization). This module runs
- * CLIENT-SIDE after decrypting candidates from the server.
+ * Cosine similarity delegates to the Rust WASM core for performance.
+ * All other functions are pure TypeScript. This module runs CLIENT-SIDE
+ * after decrypting candidates from the server.
  */
-import { stemmer } from 'porter-stemmer';
+// ---------------------------------------------------------------------------
+// Cosine Similarity
+// ---------------------------------------------------------------------------
+/**
+ * Compute cosine similarity between two vectors.
+ *
+ * Returns dot(a, b) / (||a|| * ||b||).
+ * Returns 0 if either vector has zero magnitude (avoids division by zero).
+ */
+export function cosineSimilarity(a: number[], b: number[]): number {
+  if (a.length === 0 || b.length === 0) return 0;
+  const len = Math.min(a.length, b.length);
+  let dot = 0;
+  let normA = 0;
+  let normB = 0;
+  for (let i = 0; i < len; i++) {
+    dot += a[i] * b[i];
+    normA += a[i] * a[i];
+    normB += b[i] * b[i];
+  }
+  const denom = Math.sqrt(normA) * Math.sqrt(normB);
+  if (denom === 0) return 0;
+  return dot / denom;
+}
 // ---------------------------------------------------------------------------
 // Tokenization
@@ -30,8 +58,8 @@ import { stemmer } from 'porter-stemmer';
  *   3. Split on whitespace
  *   4. Filter tokens shorter than 2 characters
  *
- * Optionally removes common English stop words (enabled by default) to
- * improve BM25 signal — stop words have low IDF and add noise.
+ * Removes common English stop words to improve BM25 signal — stop words
+ * have low IDF and add noise.
  */
 const STOP_WORDS = new Set([
   'a', 'an', 'and', 'are', 'as', 'at', 'be', 'but', 'by', 'do', 'for',
@@ -54,9 +82,7 @@ export function tokenize(text: string, removeStopWords: boolean = true): string[
     tokens = tokens.filter((t) => !STOP_WORDS.has(t));
   }
-  // Stem each token for morphological normalization.
-  // This ensures BM25 matches "gaming" with "games" (both stem to "game").
-  return tokens.map((t) => stemmer(t));
+  return tokens;
 }
 // ---------------------------------------------------------------------------
@@ -66,17 +92,6 @@ export function tokenize(text: string, removeStopWords: boolean = true): string[
 /**
  * Compute the Okapi BM25 score for a single document against a query.
  *
- * Formula:
- *   score = SUM_i IDF(qi) * (f(qi, D) * (k1 + 1)) / (f(qi, D) + k1 * (1 - b + b * |D| / avgdl))
- *
- * where:
- *   IDF(qi) = ln((N - n(qi) + 0.5) / (n(qi) + 0.5) + 1)
- *   f(qi, D) = frequency of term qi in document D
- *   |D| = length of document D (in tokens)
- *   avgdl = average document length across the corpus
- *   N = total number of documents
- *   n(qi) = number of documents containing term qi
- *
  * @param queryTerms  - Tokenized query terms
  * @param docTerms    - Tokenized document terms
  * @param avgDocLen   - Average document length (in tokens) across the candidate corpus
@@ -112,7 +127,6 @@ export function bm25Score(
     const nqi = termDocFreqs.get(qi) ?? 0;
     // IDF with Robertson-Walker floor: ln((N - n + 0.5) / (n + 0.5) + 1)
-    // The +1 inside ln ensures IDF is always >= 0 even when n > N/2.
     const idf = Math.log((docCount - nqi + 0.5) / (nqi + 0.5) + 1);
     // TF saturation with length normalization.
@@ -124,36 +138,6 @@ export function bm25Score(
   return score;
 }
-// ---------------------------------------------------------------------------
-// Cosine Similarity
-// ---------------------------------------------------------------------------
-/**
- * Compute cosine similarity between two vectors.
- *
- * Returns dot(a, b) / (||a|| * ||b||).
- * Returns 0 if either vector has zero magnitude (avoids division by zero).
- */
-export function cosineSimilarity(a: number[], b: number[]): number {
-  if (a.length === 0 || b.length === 0) return 0;
-  const len = Math.min(a.length, b.length);
-  let dot = 0;
-  let normA = 0;
-  let normB = 0;
-  for (let i = 0; i < len; i++) {
-    dot += a[i] * b[i];
-    normA += a[i] * a[i];
-    normB += b[i] * b[i];
-  }
-  const denom = Math.sqrt(normA) * Math.sqrt(normB);
-  if (denom === 0) return 0;
-  return dot / denom;
-}
 // ---------------------------------------------------------------------------
 // Reciprocal Rank Fusion (RRF)
 // ---------------------------------------------------------------------------
@@ -165,18 +149,6 @@ export interface RankedItem {
 /**
  * Fuse multiple ranking lists using Reciprocal Rank Fusion.
- *
- * For each document d appearing in any ranking list:
- *   rrfScore(d) = SUM_i 1 / (k + rank_i(d))
- *
- * where rank_i(d) is the 1-based rank of document d in the i-th list.
- * Documents not present in a list are not penalized (they simply receive
- * no contribution from that list).
- *
- * @param rankings - Array of ranking lists, each sorted by score descending.
- *                   Each item has an `id` and a `score`.
- * @param k        - RRF smoothing constant (default 60, per the original paper).
- * @returns        - Fused ranking sorted by RRF score descending.
  */
 export function rrfFuse(
   rankings: RankedItem[][],
@@ -187,7 +159,7 @@ export function rrfFuse(
   for (const ranking of rankings) {
     for (let rank = 0; rank < ranking.length; rank++) {
       const item = ranking[rank];
-      const contribution = 1 / (k + rank + 1); // rank is 0-based, formula uses 1-based
+      const contribution = 1 / (k + rank + 1);
       fusedScores.set(item.id, (fusedScores.get(item.id) ?? 0) + contribution);
     }
   }
@@ -207,14 +179,6 @@ export function rrfFuse(
 /**
  * Fuse multiple ranking lists using Weighted Reciprocal Rank Fusion.
- *
- * Like standard RRF, but each ranking list's contribution is multiplied by
- * its weight, allowing callers to emphasize or de-emphasize specific signals.
- *
- * @param rankings - Array of ranking lists, each sorted by score descending.
- * @param weights  - Weight for each ranking list (same length as rankings).
- * @param k        - RRF smoothing constant (default 60).
- * @returns        - Fused ranking sorted by weighted RRF score descending.
  */
 export function weightedRrfFuse(
   rankings: RankedItem[][],
@@ -279,7 +243,7 @@ export const INTENT_WEIGHTS: Record<QueryIntent, RankingWeights> = {
 /**
  * Classify a query into one of three intent types using lightweight heuristics.
- * Temporal is checked first so "What did we discuss yesterday?" → temporal.
+ * Temporal is checked first so "What did we discuss yesterday?" -> temporal.
  */
 export function detectQueryIntent(query: string): QueryIntent {
   if (TEMPORAL_KEYWORDS.test(query)) return 'temporal';
@@ -293,11 +257,49 @@ export interface RerankerCandidate {
   embedding?: number[];
   importance?: number;   // 0-1 normalized importance score
   createdAt?: number;    // Unix timestamp (seconds) when fact was created
+  /**
+   * Memory Taxonomy v1 provenance tag. Plugin v3.0.0+ surfaces this when a
+   * candidate was decrypted from a v1 blob. When present and
+   * `applySourceWeights: true` is passed to rerank(), the final RRF score
+   * is multiplied by the Retrieval v2 Tier 1 source weight from core.
+   */
+  source?: string;
 }
 export interface RerankerResult extends RerankerCandidate {
   rrfScore: number;
   cosineSimilarity?: number;
+  /** Source weight multiplier applied (1.0 = no weighting). */
+  sourceWeight?: number;
+}
+// ---------------------------------------------------------------------------
+// Source-weight lookup (Retrieval v2 Tier 1)
+//
+// Mirrors the table in `rust/totalreclaw-core/src/reranker.rs` exactly so
+// the TypeScript reranker produces the same ordering as core rerankWithConfig
+// when `applySourceWeights: true` is passed.
+//
+// NOTE: this is duplicated here (vs calling core via WASM) because the
+// plugin's local reranker handles RRF + MMR on the client side with rich
+// candidate metadata. The core `rerankWithConfig` is the canonical source
+// of truth and will be used directly by MCP/Python adapters.
+// ---------------------------------------------------------------------------
+const SOURCE_WEIGHTS: Record<string, number> = {
+  'user': 1.0,
+  'user-inferred': 0.9,
+  'derived': 0.7,
+  'external': 0.7,
+  'assistant': 0.55,
+};
+const LEGACY_FALLBACK_WEIGHT = 0.85;
+export function getSourceWeight(source: string | undefined): number {
+  if (!source) return LEGACY_FALLBACK_WEIGHT;
+  const w = SOURCE_WEIGHTS[source.toLowerCase()];
+  return w ?? 0.85; // unknown source → moderate penalty
 }
 // ---------------------------------------------------------------------------
@@ -306,14 +308,6 @@ export interface RerankerResult extends RerankerCandidate {
 /**
  * Compute a recency score with a 1-week half-life.
- *
- * Score = 1 / (1 + hours_since_creation / 168)
- *
- * A fact created just now scores ~1.0, one week ago scores 0.5,
- * two weeks ago scores ~0.33, etc.
- *
- * @param createdAt - Unix timestamp in seconds
- * @returns         - Recency score in (0, 1]
  */
 function recencyScore(createdAt: number): number {
   const nowSeconds = Date.now() / 1000;
@@ -327,21 +321,6 @@ function recencyScore(createdAt: number): number {
 /**
  * Apply Maximal Marginal Relevance to promote diversity in results.
- *
- * MMR re-orders a ranked list of candidates so that highly similar candidates
- * are spread out. The algorithm greedily selects the candidate that maximizes:
- *
- *   MMR(d) = lambda * relevance(d) - (1 - lambda) * max_sim(d, selected)
- *
- * where:
- *   - relevance(d) = position-based score (1.0 for first, linearly decreasing)
- *   - max_sim(d, selected) = max cosine similarity between d and any already
- *     selected candidate (0 if no embeddings available)
- *
- * @param candidates - Candidates in relevance order (best first)
- * @param lambda     - Trade-off between relevance and diversity (default 0.7)
- * @param topK       - Number of results to return (default 8)
- * @returns          - Re-ordered candidates with diversity
  */
 export function applyMMR(
   candidates: RerankerCandidate[],
@@ -402,31 +381,12 @@ export function applyMMR(
  * Re-rank decrypted candidates using BM25 + Cosine + Importance + Recency
  * with Weighted RRF fusion and MMR diversity.
  *
- * Pipeline:
- *   1. Tokenize query and all candidate texts
- *   2. Build corpus statistics (term document frequencies, average doc length)
- *   3. Score each candidate with BM25
- *   4. Score each candidate with cosine similarity (if embedding available)
- *   5. Score each candidate by importance
- *   6. Score each candidate by recency
- *   7. Fuse all 4 rankings with weighted RRF
- *   8. Apply MMR for diversity
- *   9. Return top-k candidates sorted by fused score
- *
- * Backward compatibility:
- *   - Candidates without embeddings get cosine score = 0 and are excluded
- *     from the cosine ranking list. They can still rank well via other signals.
- *   - If NO candidates have embeddings, cosine ranking is omitted.
- *   - Candidates without importance get neutral score (0.5).
- *   - Candidates without createdAt get neutral recency score (0.5).
- *   - topK defaults to 8, weights default to equal (0.25 each).
- *
- * @param query          - The user's search query (plaintext)
- * @param queryEmbedding - Embedding vector for the query
- * @param candidates     - Decrypted candidates with text and optional embeddings
- * @param topK           - Number of results to return (default 8)
- * @param weights        - Optional partial ranking weights (merged with defaults)
- * @returns              - Top-k candidates sorted by fused score, with scores attached
+ * When `applySourceWeights` is true, the final RRF score for each candidate
+ * is multiplied by a Retrieval v2 Tier 1 source weight based on the
+ * candidate's `source` field (user=1.0, user-inferred=0.9, derived/external=0.7,
+ * assistant=0.55). Candidates without a `source` field use the legacy
+ * fallback weight (0.85). This is the flag equivalent of core
+ * `rerankWithConfig(.., apply_source_weights=true)`.
  */
 export function rerank(
   query: string,
@@ -434,6 +394,7 @@ export function rerank(
   candidates: RerankerCandidate[],
   topK: number = 8,
   weights?: Partial<RankingWeights>,
+  applySourceWeights: boolean = false,
 ): RerankerResult[] {
   if (candidates.length === 0) return [];
@@ -448,7 +409,6 @@ export function rerank(
   const docCount = candidates.length;
   let totalDocLen = 0;
-  // Count how many documents contain each term.
   const termDocFreqs = new Map<string, number>();
   for (const terms of candidateTerms) {
     totalDocLen += terms.length;
@@ -521,14 +481,26 @@ export function rerank(
   for (const item of fused) {
     const candidate = candidateMap.get(item.id);
     if (candidate) {
+      const sourceWeight = applySourceWeights
+        ? getSourceWeight(candidate.source)
+        : 1.0;
       rrfResults.push({
         ...candidate,
-        rrfScore: item.score,
+        rrfScore: item.score * sourceWeight,
         cosineSimilarity: cosineScores.get(item.id),
+        sourceWeight: applySourceWeights ? sourceWeight : undefined,
       });
     }
   }
+  // When source weights are applied the RRF-scaled scores may no longer be in
+  // descending order (weighted=0.55 assistant could slip below a weighted=1.0
+  // user fact that was originally ranked lower). Re-sort so the top-K picked
+  // by MMR is meaningful.
+  if (applySourceWeights) {
+    rrfResults.sort((a, b) => b.rrfScore - a.rrfScore);
+  }
   // --- Step 9: Apply MMR for diversity, then return top-k ---
   const mmrResults = applyMMR(rrfResults, 0.7, topK);

package/skill.json ADDED Viewed

@@ -0,0 +1,213 @@
+{
+  "name": "totalreclaw",
+  "version": "1.6.1",
+  "description": "End-to-end encrypted memory for AI agents — portable, yours forever. XChaCha20-Poly1305 E2EE: server never sees plaintext.",
+  "author": "TotalReclaw Team",
+  "license": "MIT",
+  "homepage": "https://github.com/p-diogo/totalreclaw",
+  "repository": "https://github.com/p-diogo/totalreclaw",
+  "keywords": [
+    "memory",
+    "e2ee",
+    "e2e-encryption",
+    "encryption",
+    "privacy",
+    "agent-memory",
+    "persistent-context"
+  ],
+  "openclaw": {
+    "minVersion": "0.1.0",
+    "maxVersion": "1.0.0",
+    "requires": {
+      "env": [],
+      "bins": []
+    },
+    "emoji": "🧠",
+    "os": ["macos", "linux", "windows"],
+    "hooks": {
+      "before_agent_start": {
+        "priority": 10,
+        "description": "Retrieve relevant memories before agent processes message"
+      },
+      "agent_end": {
+        "priority": 90,
+        "description": "Extract and store facts after agent completes turn"
+      },
+      "pre_compaction": {
+        "priority": 5,
+        "description": "Full memory flush before context compaction"
+      },
+      "before_reset": {
+        "priority": 5,
+        "description": "Full memory flush before conversation reset"
+      }
+    },
+    "tools": [
+      {
+        "name": "totalreclaw_remember",
+        "description": "Store a new fact or preference in long-term memory",
+        "parameters": {
+          "text": {
+            "type": "string",
+            "required": true,
+            "description": "The fact or information to remember"
+          },
+          "type": {
+            "type": "string",
+            "required": false,
+            "enum": ["fact", "preference", "decision", "episodic", "goal"],
+            "default": "fact",
+            "description": "Type of memory"
+          },
+          "importance": {
+            "type": "integer",
+            "required": false,
+            "min": 1,
+            "max": 10,
+            "description": "Importance score 1-10. Default: auto-detected by LLM"
+          }
+        }
+      },
+      {
+        "name": "totalreclaw_recall",
+        "description": "Search and retrieve relevant memories from long-term storage",
+        "parameters": {
+          "query": {
+            "type": "string",
+            "required": true,
+            "description": "Natural language query to search memories"
+          },
+          "k": {
+            "type": "integer",
+            "required": false,
+            "default": 8,
+            "max": 20,
+            "description": "Number of results to return"
+          }
+        }
+      },
+      {
+        "name": "totalreclaw_forget",
+        "description": "Delete a specific fact from memory",
+        "parameters": {
+          "factId": {
+            "type": "string",
+            "required": true,
+            "description": "UUID of the fact to delete"
+          }
+        }
+      },
+      {
+        "name": "totalreclaw_export",
+        "description": "Export all stored memories in plaintext format",
+        "parameters": {
+          "format": {
+            "type": "string",
+            "required": false,
+            "enum": ["json", "markdown"],
+            "default": "json",
+            "description": "Export format"
+          }
+        }
+      },
+      {
+        "name": "totalreclaw_status",
+        "description": "Check billing and subscription status, including quota usage and upgrade options",
+        "parameters": {}
+      },
+      {
+        "name": "totalreclaw_upgrade",
+        "description": "Get a checkout URL to upgrade to TotalReclaw Pro (unlimited memories on Gnosis mainnet)",
+        "parameters": {}
+      },
+      {
+        "name": "totalreclaw_import_from",
+        "description": "Import memories from other AI memory tools (Mem0, MCP Memory Server) into TotalReclaw",
+        "parameters": {
+          "source": {
+            "type": "string",
+            "required": true,
+            "enum": ["mem0", "mcp-memory"],
+            "description": "Source system to import from"
+          },
+          "api_key": {
+            "type": "string",
+            "required": false,
+            "description": "API key for the source (Mem0). Used once, never stored."
+          },
+          "source_user_id": {
+            "type": "string",
+            "required": false,
+            "description": "User or agent ID in the source system"
+          },
+          "content": {
+            "type": "string",
+            "required": false,
+            "description": "File content (JSON, JSONL, or CSV) for file-based sources"
+          },
+          "file_path": {
+            "type": "string",
+            "required": false,
+            "description": "Path to a file on disk for file-based sources"
+          },
+          "namespace": {
+            "type": "string",
+            "required": false,
+            "default": "imported",
+            "description": "Target namespace in TotalReclaw"
+          },
+          "dry_run": {
+            "type": "boolean",
+            "required": false,
+            "default": false,
+            "description": "Preview without importing"
+          }
+        }
+      },
+      {
+        "name": "totalreclaw_consolidate",
+        "description": "Scan all stored memories and merge near-duplicates, keeping the most important/recent version",
+        "parameters": {
+          "dry_run": {
+            "type": "boolean",
+            "required": false,
+            "default": false,
+            "description": "Preview consolidation without deleting"
+          }
+        }
+      }
+    ],
+    "config": {
+      "serverUrl": {
+        "type": "string",
+        "default": "https://api.totalreclaw.xyz",
+        "description": "TotalReclaw server URL (only change for self-hosted mode)"
+      },
+      "autoExtractEveryTurns": {
+        "type": "number",
+        "default": 3,
+        "description": "Number of turns between automatic extractions"
+      },
+      "minImportanceForAutoStore": {
+        "type": "number",
+        "default": 6,
+        "description": "Minimum importance (1-10) to auto-store memories"
+      },
+      "maxMemoriesInContext": {
+        "type": "number",
+        "default": 8,
+        "description": "Maximum memories to inject into context"
+      },
+      "forgetThreshold": {
+        "type": "number",
+        "default": 0.3,
+        "description": "Decay score threshold for eviction"
+      },
+      "rerankerModel": {
+        "type": "string",
+        "default": "BAAI/bge-reranker-base",
+        "description": "ONNX reranker model for result reranking"
+      }
+    }
+  }
+}