npm - @totalreclaw/totalreclaw - Versions diffs - 1.6.0 → 3.0.6 - Mend

@totalreclaw/totalreclaw 1.6.0 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/CLAWHUB.md +134 -0
package/README.md +407 -64
package/SKILL.md +1032 -0
package/api-client.ts +5 -5
package/claims-helper.ts +686 -0
package/config.ts +211 -0
package/consolidation.ts +141 -33
package/contradiction-sync.ts +1389 -0
package/crypto.ts +63 -261
package/digest-sync.ts +516 -0
package/embedding.ts +69 -46
package/extractor.ts +1307 -84
package/hot-cache-wrapper.ts +1 -1
package/import-adapters/gemini-adapter.ts +243 -0
package/import-adapters/index.ts +3 -0
package/import-adapters/types.ts +1 -1
package/index.ts +1887 -323
package/llm-client.ts +106 -53
package/lsh.ts +21 -210
package/package.json +20 -7
package/pin.ts +502 -0
package/reranker.ts +96 -124
package/skill.json +213 -0
package/subgraph-search.ts +112 -5
package/subgraph-store.ts +559 -275
package/consolidation.test.ts +0 -356
package/extractor-dedup.test.ts +0 -168
package/import-adapters/import-adapters.test.ts +0 -1123
package/lsh.test.ts +0 -463
package/pocv2-e2e-test.ts +0 -917
package/porter-stemmer.d.ts +0 -4
package/reranker.test.ts +0 -594
package/semantic-dedup.test.ts +0 -392
package/setup.sh +0 -19
package/store-dedup-wiring.test.ts +0 -186

package/config.ts ADDED Viewed

@@ -0,0 +1,211 @@
+/**
+ * Plugin configuration — centralized env var reads.
+ * This file ONLY reads process.env. No network calls, no I/O.
+ * Other modules import config values from here.
+ *
+ * OpenClaw's security scanner flags files that contain BOTH process.env reads
+ * AND network calls. By centralizing all env reads here, no other file needs
+ * to touch process.env directly.
+ *
+ * v1 env var cleanup — see `docs/guides/env-vars-reference.md`.
+ * Removed user-facing vars: TOTALRECLAW_CHAIN_ID, TOTALRECLAW_EMBEDDING_MODEL,
+ * TOTALRECLAW_STORE_DEDUP, TOTALRECLAW_LLM_MODEL, TOTALRECLAW_SESSION_ID,
+ * TOTALRECLAW_TAXONOMY_VERSION.
+ * Removed legacy gates: TOTALRECLAW_CLAIM_FORMAT, TOTALRECLAW_DIGEST_MODE,
+ * TOTALRECLAW_AUTO_RESOLVE_MODE (the last one moved to an internal debug
+ * module; see `contradiction-sync.ts`).
+ *
+ * Tuning knobs (cosine threshold, min importance, cache TTL, etc.) are now
+ * delivered via the relay billing response. Env-var fallbacks are kept only
+ * for self-hosted deployments where the server may not surface those values.
+ */
+import path from 'node:path';
+const home = process.env.HOME ?? '/home/node';
+/**
+ * Removed env vars — warn once per process if still set so operators know
+ * their config is a no-op. The removal list matches `docs/guides/env-vars-reference.md`.
+ */
+const REMOVED_ENV_VARS = [
+  'TOTALRECLAW_CHAIN_ID',
+  'TOTALRECLAW_EMBEDDING_MODEL',
+  'TOTALRECLAW_STORE_DEDUP',
+  'TOTALRECLAW_LLM_MODEL',
+  'TOTALRECLAW_SESSION_ID',
+  'TOTALRECLAW_TAXONOMY_VERSION',
+  'TOTALRECLAW_CLAIM_FORMAT',
+  'TOTALRECLAW_DIGEST_MODE',
+] as const;
+function warnRemovedEnvVars(warn: (msg: string) => void = console.warn): void {
+  const set = REMOVED_ENV_VARS.filter((name) => process.env[name] !== undefined);
+  if (set.length === 0) return;
+  warn(
+    `TotalReclaw: ignoring removed env var(s): ${set.join(', ')}. ` +
+      `See docs/guides/env-vars-reference.md for the v1 env var surface.`,
+  );
+}
+// Emit the warning once at import time. Safe because this module is loaded
+// exactly once per process.
+warnRemovedEnvVars();
+/** Runtime override for recovery phrase (set by hot-reload after setup). */
+let _recoveryPhraseOverride: string | null = null;
+export function setRecoveryPhraseOverride(phrase: string): void {
+  _recoveryPhraseOverride = phrase;
+}
+export function getRecoveryPhrase(): string {
+  return _recoveryPhraseOverride ?? process.env.TOTALRECLAW_RECOVERY_PHRASE ?? '';
+}
+/**
+ * Runtime override for chain ID, set after the relay billing response is
+ * read. Free tier stays on 84532 (Base Sepolia); Pro tier flips to 100
+ * (Gnosis mainnet). The relay routes Pro writes to Gnosis, so Pro-tier
+ * UserOps MUST be signed against chain 100 — otherwise the bundler rejects
+ * the signature with AA23.
+ *
+ * See index.ts: after the billing lookup completes, call
+ * `setChainIdOverride(100)` for Pro users. Free users can leave the
+ * override unset.
+ */
+let _chainIdOverride: number | null = null;
+export function setChainIdOverride(chainId: number): void {
+  _chainIdOverride = chainId;
+}
+/** Reset the chain override — used by tests. */
+export function __resetChainIdOverrideForTests(): void {
+  _chainIdOverride = null;
+}
+export const CONFIG = {
+  // Core — recoveryPhrase reads from override first, then env var.
+  // Use getRecoveryPhrase() for dynamic access; this property is for
+  // backward-compat with code that reads CONFIG.recoveryPhrase at init time.
+  get recoveryPhrase(): string {
+    return getRecoveryPhrase();
+  },
+  serverUrl: (process.env.TOTALRECLAW_SERVER_URL || 'https://api.totalreclaw.xyz').replace(/\/+$/, ''),
+  selfHosted: process.env.TOTALRECLAW_SELF_HOSTED === 'true',
+  credentialsPath: process.env.TOTALRECLAW_CREDENTIALS_PATH || path.join(home, '.totalreclaw', 'credentials.json'),
+  // Chain — chainId is no longer user-configurable. It is auto-detected from
+  // the relay billing response (free = Base Sepolia / 84532, Pro = Gnosis /
+  // 100). The default here is used only before the first billing lookup
+  // completes. Self-hosted users can still point at a custom DataEdge via
+  // TOTALRECLAW_DATA_EDGE_ADDRESS / TOTALRECLAW_ENTRYPOINT_ADDRESS /
+  // TOTALRECLAW_RPC_URL (undocumented; internal knobs).
+  //
+  // Reads the runtime override set by the billing auto-detect in index.ts.
+  // Falls back to 84532 (free tier / pre-billing-lookup). Must be a getter,
+  // not a literal — a literal would freeze all Pro-tier UserOps to the
+  // wrong chainId and AA23 at the bundler.
+  get chainId(): number {
+    return _chainIdOverride ?? 84532;
+  },
+  dataEdgeAddress: process.env.TOTALRECLAW_DATA_EDGE_ADDRESS || '',
+  entryPointAddress: process.env.TOTALRECLAW_ENTRYPOINT_ADDRESS || '',
+  rpcUrl: process.env.TOTALRECLAW_RPC_URL || '',
+  // Tuning knobs — default values used only as local fallback for
+  // self-hosted mode. Managed-service clients override these from the relay
+  // billing response via `resolveTuning(...)`.
+  // See: docs/specs/totalreclaw/client-consistency.md
+  cosineThreshold: parseFloat(process.env.TOTALRECLAW_COSINE_THRESHOLD ?? '0.15'),
+  extractInterval: parseInt(process.env.TOTALRECLAW_EXTRACT_INTERVAL ?? process.env.TOTALRECLAW_EXTRACT_EVERY_TURNS ?? '3', 10),
+  relevanceThreshold: parseFloat(process.env.TOTALRECLAW_RELEVANCE_THRESHOLD ?? '0.3'),
+  semanticSkipThreshold: parseFloat(process.env.TOTALRECLAW_SEMANTIC_SKIP_THRESHOLD ?? '0.85'),
+  cacheTtlMs: parseInt(process.env.TOTALRECLAW_CACHE_TTL_MS ?? String(5 * 60 * 1000), 10),
+  minImportance: Math.max(1, Math.min(10, Number(process.env.TOTALRECLAW_MIN_IMPORTANCE) || 6)),
+  trapdoorBatchSize: parseInt(process.env.TOTALRECLAW_TRAPDOOR_BATCH_SIZE ?? '5', 10),
+  pageSize: parseInt(process.env.TOTALRECLAW_SUBGRAPH_PAGE_SIZE ?? '1000', 10),
+  // Store-time dedup is always ON. TOTALRECLAW_STORE_DEDUP was removed in v1.
+  storeDedupEnabled: true,
+  // LLM provider API keys (read once, passed to llm-client). Model selection
+  // is entirely automatic via `deriveCheapModel(provider)` — the
+  // TOTALRECLAW_LLM_MODEL override was removed in v1.
+  llmApiKeys: {
+    zai: process.env.ZAI_API_KEY || '',
+    anthropic: process.env.ANTHROPIC_API_KEY || '',
+    openai: process.env.OPENAI_API_KEY || '',
+    gemini: process.env.GEMINI_API_KEY || '',
+    google: process.env.GOOGLE_API_KEY || '',
+    mistral: process.env.MISTRAL_API_KEY || '',
+    groq: process.env.GROQ_API_KEY || '',
+    deepseek: process.env.DEEPSEEK_API_KEY || '',
+    openrouter: process.env.OPENROUTER_API_KEY || '',
+    xai: process.env.XAI_API_KEY || '',
+    together: process.env.TOGETHER_API_KEY || '',
+    cerebras: process.env.CEREBRAS_API_KEY || '',
+  } as Record<string, string>,
+  // Paths
+  home,
+  billingCachePath: path.join(home, '.totalreclaw', 'billing-cache.json'),
+  cachePath: process.env.TOTALRECLAW_CACHE_PATH || path.join(home, '.totalreclaw', 'cache.enc'),
+  openclawWorkspace: path.join(home, '.openclaw', 'workspace'),
+} as const;
+// ---------------------------------------------------------------------------
+// Server-side tuning resolution
+// ---------------------------------------------------------------------------
+/**
+ * Optional tuning fields delivered via the relay billing response.
+ *
+ * Relay may populate these in `features` (same cache consumed by
+ * `isLlmDedupEnabled`, `getExtractInterval`, etc.). When present, they
+ * override the env/defaults resolved above. When absent (self-hosted or
+ * pre-rollout relay), clients fall back to `CONFIG` values.
+ */
+export interface BillingTuning {
+  cosine_threshold?: number;
+  relevance_threshold?: number;
+  semantic_skip_threshold?: number;
+  min_importance?: number;
+  cache_ttl_ms?: number;
+  trapdoor_batch_size?: number;
+  subgraph_page_size?: number;
+}
+/**
+ * Merge a billing-response tuning block with the local fallback values.
+ *
+ * Use this at the call-site that needs a threshold, passing the features
+ * blob from the billing cache. No I/O here — callers read the cache once
+ * and hand the features in.
+ */
+export function resolveTuning(features?: BillingTuning | null): {
+  cosineThreshold: number;
+  relevanceThreshold: number;
+  semanticSkipThreshold: number;
+  minImportance: number;
+  cacheTtlMs: number;
+  trapdoorBatchSize: number;
+  pageSize: number;
+} {
+  return {
+    cosineThreshold: features?.cosine_threshold ?? CONFIG.cosineThreshold,
+    relevanceThreshold: features?.relevance_threshold ?? CONFIG.relevanceThreshold,
+    semanticSkipThreshold: features?.semantic_skip_threshold ?? CONFIG.semanticSkipThreshold,
+    minImportance: features?.min_importance ?? CONFIG.minImportance,
+    cacheTtlMs: features?.cache_ttl_ms ?? CONFIG.cacheTtlMs,
+    trapdoorBatchSize: features?.trapdoor_batch_size ?? CONFIG.trapdoorBatchSize,
+    pageSize: features?.subgraph_page_size ?? CONFIG.pageSize,
+  };
+}
+// Exposed for tests that want to assert the removed-var warning behaviour.
+export const __internal = {
+  REMOVED_ENV_VARS,
+  warnRemovedEnvVars,
+};

package/consolidation.ts CHANGED Viewed

@@ -12,13 +12,30 @@
  *   3. Bulk consolidation — cluster all facts in the vault and identify
  *      groups of near-duplicates for cleanup (clusterFacts).
  *
- * This module intentionally has minimal dependencies (only reranker for
- * cosineSimilarity) so it can be tested without pulling in the full
- * plugin dependency graph.
+ * Delegates core computation to `@totalreclaw/core` Rust WASM module where
+ * bindings are available. `shouldSupersede` uses the core directly.
+ * `findNearDuplicate` and `clusterFacts` use the core's `findBestNearDuplicate`
+ * and `clusterFacts` WASM functions when available, falling back to local
+ * implementations that use WASM-backed `cosineSimilarity`.
+ *
+ * Threshold helpers remain local (they read process.env).
  */
+import { createRequire } from 'node:module';
 import { cosineSimilarity } from './reranker.js';
+// ---------------------------------------------------------------------------
+// Lazy-load WASM core (mirrors claims-helper.ts / contradiction-sync.ts
+// pattern — plays nicely under both the OpenClaw runtime (CJS-ish tsx) and
+// bare Node ESM used by tests).
+// ---------------------------------------------------------------------------
+const requireWasm = createRequire(import.meta.url);
+let _wasm: typeof import('@totalreclaw/core') | null = null;
+function getWasm(): typeof import('@totalreclaw/core') {
+  if (!_wasm) _wasm = requireWasm('@totalreclaw/core');
+  return _wasm!;
+}
 // ---------------------------------------------------------------------------
 // Configuration
 // ---------------------------------------------------------------------------
@@ -106,6 +123,36 @@ export function findNearDuplicate(
   candidates: DecryptedCandidate[],
   threshold: number,
 ): NearDuplicateMatch | null {
+  const wasm = getWasm();
+  // Use core's findBestNearDuplicate if available (added in core >=1.5.0;
+  // guaranteed present in core >=2.0.0 which this plugin depends on).
+  if (typeof (wasm as any).findBestNearDuplicate === 'function') {
+    const existing = candidates
+      .filter((c) => c.embedding && c.embedding.length > 0)
+      .map((c) => ({ id: c.id, embedding: c.embedding! }));
+    if (existing.length === 0) return null;
+    const resultJs = (wasm as any).findBestNearDuplicate(
+      JSON.stringify(newFactEmbedding),
+      JSON.stringify(existing),
+      threshold,
+    );
+    if (resultJs == null) return null;
+    const result: { fact_id: string; similarity: number } =
+      typeof resultJs === 'string' ? JSON.parse(resultJs) : resultJs;
+    const matched = candidates.find((c) => c.id === result.fact_id);
+    if (!matched) return null;
+    return { existingFact: matched, similarity: result.similarity };
+  }
+  // Fallback: local loop using WASM-backed cosineSimilarity. Defensive only
+  // — core >=2.0.0 always exposes findBestNearDuplicate.
   let bestMatch: NearDuplicateMatch | null = null;
   for (const candidate of candidates) {
@@ -132,6 +179,8 @@ export function findNearDuplicate(
  * - Higher importance wins.
  * - Equal importance: new fact supersedes (newer is preferred).
  *
+ * Delegates to `@totalreclaw/core` WASM `shouldSupersede`.
+ *
  * @param newImportance - Importance score of the new fact
  * @param existingFact  - The existing near-duplicate candidate
  * @returns             - 'supersede' if new fact should replace, 'skip' otherwise
@@ -140,43 +189,21 @@ export function shouldSupersede(
   newImportance: number,
   existingFact: DecryptedCandidate,
 ): 'supersede' | 'skip' {
-  if (newImportance >= existingFact.importance) return 'supersede';
-  return 'skip';
+  const wasm = getWasm();
+  return wasm.shouldSupersede(newImportance, existingFact.importance) ? 'supersede' : 'skip';
 }
 // ---------------------------------------------------------------------------
 // Bulk consolidation
 // ---------------------------------------------------------------------------
-/**
- * Pick the best representative from a group of near-duplicate facts.
- *
- * Tiebreak order:
- *   1. Highest decayScore
- *   2. Most recent (highest createdAt)
- *   3. Longest text
- */
-function pickRepresentative(facts: DecryptedCandidate[]): DecryptedCandidate {
-  let best = facts[0];
-  for (let i = 1; i < facts.length; i++) {
-    const f = facts[i];
-    if (
-      f.decayScore > best.decayScore ||
-      (f.decayScore === best.decayScore && f.createdAt > best.createdAt) ||
-      (f.decayScore === best.decayScore && f.createdAt === best.createdAt && f.text.length > best.text.length)
-    ) {
-      best = f;
-    }
-  }
-  return best;
-}
 /**
  * Cluster facts by semantic similarity using greedy single-pass clustering.
  *
- * For each fact (in order), assigns it to the first existing cluster whose
- * representative has cosine similarity >= threshold. If no cluster matches,
- * a new cluster is started.
+ * Delegates to `@totalreclaw/core` WASM `clusterFacts` which performs the
+ * same greedy single-pass algorithm and representative selection. The WASM
+ * function returns ID-only clusters; this wrapper maps IDs back to full
+ * `DecryptedCandidate` objects for callers.
  *
  * Only returns clusters that have duplicates (i.e. more than one member).
  * Facts without embeddings are not clustered.
@@ -189,6 +216,62 @@ export function clusterFacts(
   facts: DecryptedCandidate[],
   threshold: number,
 ): ConsolidationCluster[] {
+  const wasm = getWasm();
+  // Use core's clusterFacts if available (added in core >=1.5.0;
+  // guaranteed present in core >=2.0.0 which this plugin depends on).
+  if (typeof (wasm as any).clusterFacts === 'function') {
+    // Build ConsolidationCandidate JSON for WASM (snake_case fields).
+    const wasmCandidates = facts
+      .filter((f) => f.embedding && f.embedding.length > 0)
+      .map((f) => ({
+        id: f.id,
+        text: f.text,
+        embedding: f.embedding!,
+        importance: f.importance,
+        decay_score: f.decayScore,
+        created_at: f.createdAt,
+        version: f.version,
+      }));
+    if (wasmCandidates.length === 0) return [];
+    const resultJs = (wasm as any).clusterFacts(
+      JSON.stringify(wasmCandidates),
+      threshold,
+    );
+    // WASM returns a JSON string: [{ representative: string, duplicates: string[] }]
+    const wasmClusters: { representative: string; duplicates: string[] }[] =
+      typeof resultJs === 'string' ? JSON.parse(resultJs) : resultJs;
+    // Build a lookup map for fast ID -> DecryptedCandidate resolution.
+    const byId = new Map<string, DecryptedCandidate>();
+    for (const f of facts) byId.set(f.id, f);
+    // Map ID-only clusters back to full DecryptedCandidate objects.
+    // Filter out singleton clusters (no duplicates) to match the pre-WASM
+    // plugin contract — callers rely on `clusters.length === 0` when nothing
+    // duplicates anything.
+    const result: ConsolidationCluster[] = [];
+    for (const wc of wasmClusters) {
+      const rep = byId.get(wc.representative);
+      if (!rep) continue;
+      const dups = wc.duplicates
+        .map((id) => byId.get(id))
+        .filter((d): d is DecryptedCandidate => d !== undefined);
+      if (dups.length > 0) {
+        result.push({ representative: rep, duplicates: dups });
+      }
+    }
+    return result;
+  }
+  // Fallback: local greedy single-pass clustering using WASM-backed
+  // cosineSimilarity. Defensive only — core >=2.0.0 always exposes clusterFacts.
   const clusters: { members: DecryptedCandidate[] }[] = [];
   for (const fact of facts) {
@@ -196,7 +279,6 @@ export function clusterFacts(
     let assigned = false;
     for (const cluster of clusters) {
-      // Compare against the first member's embedding (cluster seed)
       const seed = cluster.members[0];
       if (!seed.embedding) continue;
@@ -213,7 +295,6 @@ export function clusterFacts(
     }
   }
-  // Only return clusters with duplicates, pick representative for each
   const result: ConsolidationCluster[] = [];
   for (const cluster of clusters) {
     if (cluster.members.length < 2) continue;
@@ -225,3 +306,30 @@ export function clusterFacts(
   return result;
 }
+// ---------------------------------------------------------------------------
+// Local helpers (used only in fallback paths)
+// ---------------------------------------------------------------------------
+/**
+ * Pick the best representative from a group of near-duplicate facts.
+ *
+ * Tiebreak order:
+ *   1. Highest decayScore
+ *   2. Most recent (highest createdAt)
+ *   3. Longest text
+ */
+function pickRepresentative(facts: DecryptedCandidate[]): DecryptedCandidate {
+  let best = facts[0];
+  for (let i = 1; i < facts.length; i++) {
+    const f = facts[i];
+    if (
+      f.decayScore > best.decayScore ||
+      (f.decayScore === best.decayScore && f.createdAt > best.createdAt) ||
+      (f.decayScore === best.decayScore && f.createdAt === best.createdAt && f.text.length > best.text.length)
+    ) {
+      best = f;
+    }
+  }
+  return best;
+}