npm - @totalreclaw/totalreclaw - Versions diffs - 1.6.0 → 3.0.6 - Mend

@totalreclaw/totalreclaw 1.6.0 → 3.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/CLAWHUB.md +134 -0
package/README.md +407 -64
package/SKILL.md +1032 -0
package/api-client.ts +5 -5
package/claims-helper.ts +686 -0
package/config.ts +211 -0
package/consolidation.ts +141 -33
package/contradiction-sync.ts +1389 -0
package/crypto.ts +63 -261
package/digest-sync.ts +516 -0
package/embedding.ts +69 -46
package/extractor.ts +1307 -84
package/hot-cache-wrapper.ts +1 -1
package/import-adapters/gemini-adapter.ts +243 -0
package/import-adapters/index.ts +3 -0
package/import-adapters/types.ts +1 -1
package/index.ts +1887 -323
package/llm-client.ts +106 -53
package/lsh.ts +21 -210
package/package.json +20 -7
package/pin.ts +502 -0
package/reranker.ts +96 -124
package/skill.json +213 -0
package/subgraph-search.ts +112 -5
package/subgraph-store.ts +559 -275
package/consolidation.test.ts +0 -356
package/extractor-dedup.test.ts +0 -168
package/import-adapters/import-adapters.test.ts +0 -1123
package/lsh.test.ts +0 -463
package/pocv2-e2e-test.ts +0 -917
package/porter-stemmer.d.ts +0 -4
package/reranker.test.ts +0 -594
package/semantic-dedup.test.ts +0 -392
package/setup.sh +0 -19
package/store-dedup-wiring.test.ts +0 -186

package/llm-client.ts CHANGED Viewed

@@ -9,6 +9,8 @@
  * @huggingface/transformers). No API key needed for embeddings.
  */
+import { CONFIG } from './config.js';
 // ---------------------------------------------------------------------------
 // Types
 // ---------------------------------------------------------------------------
@@ -41,27 +43,37 @@ export interface LLMClientConfig {
   apiFormat: 'openai' | 'anthropic';
 }
+/** Shape of an OpenClaw model provider config entry. */
+interface OpenClawProviderConfig {
+  baseUrl: string;
+  apiKey?: string;
+  api?: string;
+  models?: Array<{ id: string; [k: string]: unknown }>;
+  [k: string]: unknown;
+}
 // ---------------------------------------------------------------------------
 // Provider mappings
 // ---------------------------------------------------------------------------
-const PROVIDER_ENV_VARS: Record<string, string[]> = {
-  zai:        ['ZAI_API_KEY'],
-  anthropic:  ['ANTHROPIC_API_KEY'],
-  openai:     ['OPENAI_API_KEY'],
-  gemini:     ['GEMINI_API_KEY'],
-  google:     ['GEMINI_API_KEY', 'GOOGLE_API_KEY'],
-  mistral:    ['MISTRAL_API_KEY'],
-  groq:       ['GROQ_API_KEY'],
-  deepseek:   ['DEEPSEEK_API_KEY'],
-  openrouter: ['OPENROUTER_API_KEY'],
-  xai:        ['XAI_API_KEY'],
-  together:   ['TOGETHER_API_KEY'],
-  cerebras:   ['CEREBRAS_API_KEY'],
+/** Maps provider name to CONFIG.llmApiKeys property names to check (in order). */
+const PROVIDER_KEY_NAMES: Record<string, string[]> = {
+  zai:        ['zai'],
+  anthropic:  ['anthropic'],
+  openai:     ['openai'],
+  gemini:     ['gemini'],
+  google:     ['gemini', 'google'],
+  mistral:    ['mistral'],
+  groq:       ['groq'],
+  deepseek:   ['deepseek'],
+  openrouter: ['openrouter'],
+  xai:        ['xai'],
+  together:   ['together'],
+  cerebras:   ['cerebras'],
 };
 const PROVIDER_BASE_URLS: Record<string, string> = {
-  zai:        'https://api.z.ai/api/paas/v4',
+  zai:        'https://api.z.ai/api/coding/paas/v4',
   anthropic:  'https://api.anthropic.com/v1',
   openai:     'https://api.openai.com/v1',
   gemini:     'https://generativelanguage.googleapis.com/v1beta/openai',
@@ -94,8 +106,8 @@ function deriveCheapModel(provider: string, primaryModel: string): string {
   // Derive based on provider naming conventions
   switch (provider) {
     case 'zai': {
-      // glm-5 -> glm-4.5-flash, glm-4.6 -> glm-4.5-flash
-      return 'glm-4.5-flash';
+      // glm-5.1 -> glm-5-turbo (fast, available on coding endpoint)
+      return 'glm-5-turbo';
     }
     case 'anthropic': {
       // claude-sonnet-4-5 -> claude-haiku-4-5-20251001
@@ -149,21 +161,26 @@ let _logger: { warn: (msg: string) => void } | null = null;
  * Called once from the plugin's `register()` function.
  *
  * Resolution order (highest priority first):
- *   1. TOTALRECLAW_LLM_MODEL env var (power user override for model)
- *   2. Plugin config `extraction.model` (if provided)
- *   3. Auto-derived from provider heuristic
+ *   1. Plugin config `extraction.model` (if provided)
+ *   2. Auto-derived from provider heuristic using env var API keys
+ *   3. OpenClaw's model provider config (api.config.models.providers)
  *   4. Fallback: try common env vars (ZAI_API_KEY, OPENAI_API_KEY) for dev/test
+ *
+ * The `TOTALRECLAW_LLM_MODEL` user-facing override was removed in v1 —
+ * `deriveCheapModel(provider)` covers the 99% case and a model-level knob
+ * was adding config surface for no tangible win.
  */
 export function initLLMClient(options: {
   primaryModel?: string;
   pluginConfig?: Record<string, unknown>;
+  openclawProviders?: Record<string, OpenClawProviderConfig>;
   logger?: { warn: (msg: string) => void };
 }): void {
   _logger = options.logger ?? null;
   _initialized = true;
   _cachedConfig = null;
-  const { primaryModel, pluginConfig } = options;
+  const { primaryModel, pluginConfig, openclawProviders } = options;
   // Check if extraction is explicitly disabled
   const extraction = pluginConfig?.extraction as Record<string, unknown> | undefined;
@@ -179,44 +196,81 @@ export function initLLMClient(options: {
     const modelName = parts.length >= 2 ? parts.slice(1).join('/') : primaryModel;
     if (provider) {
-      // Find the API key for this provider
-      const envVarNames = PROVIDER_ENV_VARS[provider];
-      const apiKey = envVarNames
-        ? envVarNames.map((name) => process.env[name]).find(Boolean)
+      // Find the API key for this provider — first from env vars, then from
+      // OpenClaw's provider config (api.config.models.providers)
+      const keyNames = PROVIDER_KEY_NAMES[provider];
+      let apiKey = keyNames
+        ? keyNames.map((name) => CONFIG.llmApiKeys[name]).find(Boolean)
         : undefined;
-      if (apiKey) {
-        const baseUrl = PROVIDER_BASE_URLS[provider];
-        if (baseUrl) {
-          // Determine model: env override > plugin config > auto-derived
-          const model =
-            process.env.TOTALRECLAW_LLM_MODEL ??
-            (typeof extraction?.model === 'string' ? extraction.model : null) ??
-            deriveCheapModel(provider, modelName);
+      let baseUrl = PROVIDER_BASE_URLS[provider];
-          const apiFormat: 'openai' | 'anthropic' =
-            provider === 'anthropic' ? 'anthropic' : 'openai';
-          _cachedConfig = { apiKey, baseUrl, model, apiFormat };
-          return;
+      // If no env var key found, check OpenClaw's provider config
+      if (!apiKey && openclawProviders) {
+        const ocProvider = openclawProviders[provider];
+        if (ocProvider?.apiKey) {
+          apiKey = ocProvider.apiKey;
+          if (ocProvider.baseUrl) {
+            baseUrl = ocProvider.baseUrl.replace(/\/+$/, '');
+          }
         }
       }
+      if (apiKey && baseUrl) {
+        // Determine model: plugin config > auto-derived
+        const model =
+          (typeof extraction?.model === 'string' ? extraction.model : null) ||
+          deriveCheapModel(provider, modelName);
+        const apiFormat: 'openai' | 'anthropic' =
+          provider === 'anthropic' ? 'anthropic' : 'openai';
+        _cachedConfig = { apiKey, baseUrl, model, apiFormat };
+        return;
+      }
+    }
+  }
+  // --- Fallback: try OpenClaw provider configs (any provider with an apiKey) ---
+  if (openclawProviders) {
+    for (const [providerName, providerConfig] of Object.entries(openclawProviders)) {
+      if (!providerConfig?.apiKey) continue;
+      const provider = providerName.toLowerCase();
+      let baseUrl = providerConfig.baseUrl?.replace(/\/+$/, '') || PROVIDER_BASE_URLS[provider];
+      if (!baseUrl) continue;
+      // Pick a model from the provider's configured models, or use our default
+      const firstModelId = providerConfig.models?.[0]?.id;
+      const model =
+        (typeof extraction?.model === 'string' ? extraction.model : null) ||
+        (firstModelId ? deriveCheapModel(provider, firstModelId) : null);
+      if (!model) continue;
+      const apiFormat: 'openai' | 'anthropic' =
+        providerConfig.api === 'anthropic-messages' || provider === 'anthropic'
+          ? 'anthropic'
+          : 'openai';
+      _cachedConfig = { apiKey: providerConfig.apiKey, baseUrl, model, apiFormat };
+      return;
     }
   }
-  // --- Fallback: try common env vars (for dev/test without OpenClaw config) ---
+  // --- Fallback: try common env var API keys (for dev/test without OpenClaw config) ---
   const fallbackProviders: Array<[string, string, string]> = [
-    ['zai', 'ZAI_API_KEY', 'glm-4.5-flash'],
-    ['openai', 'OPENAI_API_KEY', 'gpt-4.1-mini'],
-    ['anthropic', 'ANTHROPIC_API_KEY', 'claude-haiku-4-5-20251001'],
-    ['gemini', 'GEMINI_API_KEY', 'gemini-2.0-flash'],
+    ['zai', 'zai', 'glm-4.5-flash'],
+    ['openai', 'openai', 'gpt-4.1-mini'],
+    ['anthropic', 'anthropic', 'claude-haiku-4-5-20251001'],
+    ['gemini', 'gemini', 'gemini-2.0-flash'],
   ];
-  for (const [provider, envVar, defaultModel] of fallbackProviders) {
-    const apiKey = process.env[envVar];
+  for (const [provider, keyName, defaultModel] of fallbackProviders) {
+    const apiKey = CONFIG.llmApiKeys[keyName];
     if (apiKey) {
-      const model = process.env.TOTALRECLAW_LLM_MODEL ??
-        (typeof extraction?.model === 'string' ? extraction.model : null) ??
+      const model =
+        (typeof extraction?.model === 'string' ? extraction.model : null) ||
         defaultModel;
       const apiFormat: 'openai' | 'anthropic' =
@@ -253,17 +307,16 @@ export function resolveLLMConfig(): LLMClientConfig | null {
   }
   // Legacy fallback: if initLLMClient() was never called (e.g. running outside
-  // the plugin context), try the old env-var approach for backwards compat.
-  const zaiKey = process.env.ZAI_API_KEY;
-  const openaiKey = process.env.OPENAI_API_KEY;
+  // the plugin context), try the config-based approach for backwards compat.
+  const zaiKey = CONFIG.llmApiKeys.zai;
+  const openaiKey = CONFIG.llmApiKeys.openai;
-  const model = process.env.TOTALRECLAW_LLM_MODEL
-    ?? (zaiKey ? 'glm-4.5-flash' : 'gpt-4.1-mini');
+  const model = zaiKey ? 'glm-4.5-flash' : 'gpt-4.1-mini';
   if (zaiKey) {
     return {
       apiKey: zaiKey,
-      baseUrl: 'https://api.z.ai/api/paas/v4',
+      baseUrl: 'https://api.z.ai/api/coding/paas/v4',
       model,
       apiFormat: 'openai',
     };
@@ -413,6 +466,6 @@ async function chatCompletionAnthropic(
 // ---------------------------------------------------------------------------
 // Embeddings are now generated locally via @huggingface/transformers
-// (bge-small-en-v1.5 ONNX model). No API key needed.
+// (Harrier-OSS-v1-270M ONNX model). No API key needed.
 // See embedding.ts for implementation details.
 export { generateEmbedding, getEmbeddingDims } from './embedding.js';

package/lsh.ts CHANGED Viewed

@@ -1,48 +1,18 @@
 /**
- * TotalReclaw Plugin - LSH Hasher (Locality-Sensitive Hashing)
+ * TotalReclaw Plugin - LSH Hasher
  *
- * Pure TypeScript implementation of Random Hyperplane LSH for server-blind
- * semantic search. Generates deterministic hyperplane matrices from a seed
- * derived from the user's master key, so the same embedding always hashes to
- * the same buckets across sessions.
+ * Re-exports `WasmLshHasher` from `@totalreclaw/core` as `LSHHasher`
+ * for backward compatibility with existing plugin code.
  *
- * Architecture overview:
- *   1. Seed (32 bytes from HKDF) -> HKDF per table -> random bytes
- *   2. Random bytes -> Box-Muller transform -> Gaussian-distributed hyperplanes
- *   3. Embedding dot hyperplane -> sign bit -> N-bit signature per table
- *   4. Signature -> `lsh_t{table}_{signature}` -> SHA-256 -> blind hash
- *
- * The blind hashes are merged with the existing blind word indices in the
- * `blind_indices` array. The server never knows which hashes are word-based
- * and which are LSH-based.
- *
- * Default parameters:
- *   - 32 bits per table (balanced discrimination vs. recall)
- *   - 20 tables (moderate table count for good coverage)
- *   - Middle ground between 64-bit x 12 (too strict) and 12-bit x 28 (too loose)
- *
- * Dependencies: @noble/hashes only (already in project).
+ * Default parameters: 32 bits per table, 20 tables.
  */
-import { hkdf } from '@noble/hashes/hkdf.js';
-import { sha256 } from '@noble/hashes/sha2.js';
-// ---------------------------------------------------------------------------
-// Constants
-// ---------------------------------------------------------------------------
-/** Default number of independent hash tables. */
-const DEFAULT_N_TABLES = 20;
-/** Default number of bits (hyperplanes) per table. */
-const DEFAULT_N_BITS = 32;
-/** Number of bytes needed per Gaussian float via Box-Muller (2 x uint32 = 8 bytes). */
-const BYTES_PER_FLOAT = 8;
-// ---------------------------------------------------------------------------
-// LSHHasher
-// ---------------------------------------------------------------------------
+// Lazy-load WASM to avoid crash when npm install hasn't finished yet.
+let _WasmLshHasher: typeof import('@totalreclaw/core')['WasmLshHasher'] | null = null;
+function getWasmLshHasher() {
+  if (!_WasmLshHasher) _WasmLshHasher = require('@totalreclaw/core').WasmLshHasher;
+  return _WasmLshHasher!;
+}
 /**
  * Random Hyperplane LSH hasher.
@@ -51,207 +21,48 @@ const BYTES_PER_FLOAT = 8;
  * Construct once per session; call `hash()` for every store/search operation.
  */
 export class LSHHasher {
-  /**
-   * Flat hyperplane storage.
-   *
-   * `hyperplanes[t]` is a Float64Array of length `dims * nBits` containing the
-   * hyperplane matrix for table `t`. The hyperplane for bit `b` starts at
-   * offset `b * dims`.
-   */
-  private hyperplanes: Float64Array[];
-  /** Embedding dimensionality. */
-  private readonly dims: number;
-  /** Number of independent hash tables. */
-  private readonly nTables: number;
-  /** Number of bits (hyperplanes) per table. */
-  private readonly nBits: number;
+  private inner: InstanceType<typeof import('@totalreclaw/core')['WasmLshHasher']>;
   /**
    * Create a new LSH hasher.
    *
-   * @param seed   - 32-byte seed from `deriveLshSeed()` in crypto.ts.
-   * @param dims   - Embedding dimensionality (e.g. 1536 for text-embedding-3-small).
+   * @param seed    - 32-byte seed from `deriveLshSeed()` in crypto.ts.
+   * @param dims    - Embedding dimensionality (e.g. 640 for Harrier).
    * @param nTables - Number of independent hash tables (default 20).
    * @param nBits   - Number of bits per table (default 32).
    */
   constructor(
     seed: Uint8Array,
     dims: number,
-    nTables: number = DEFAULT_N_TABLES,
-    nBits: number = DEFAULT_N_BITS,
+    nTables: number = 20,
+    nBits: number = 32,
   ) {
-    if (seed.length < 16) {
-      throw new Error(`LSH seed too short: expected >= 16 bytes, got ${seed.length}`);
-    }
-    if (dims < 1) {
-      throw new Error(`dims must be positive, got ${dims}`);
-    }
-    if (nTables < 1) {
-      throw new Error(`nTables must be positive, got ${nTables}`);
-    }
-    if (nBits < 1) {
-      throw new Error(`nBits must be positive, got ${nBits}`);
-    }
-    this.dims = dims;
-    this.nTables = nTables;
-    this.nBits = nBits;
-    this.hyperplanes = new Array(nTables);
-    // Generate hyperplane matrices deterministically from the seed.
-    for (let t = 0; t < nTables; t++) {
-      this.hyperplanes[t] = this.generateTableHyperplanes(seed, t);
-    }
-  }
-  // -------------------------------------------------------------------------
-  // Hyperplane generation (deterministic from seed)
-  // -------------------------------------------------------------------------
-  /**
-   * Generate the hyperplane matrix for a single table.
-   *
-   * Each table gets a unique HKDF-derived byte stream. We consume 8 bytes
-   * per Gaussian sample (Box-Muller uses two uniform uint32 values).
-   *
-   * The hyperplanes are NOT normalised to unit length. Normalisation is
-   * unnecessary because we only care about the sign of the dot product,
-   * which is scale-invariant.
-   */
-  private generateTableHyperplanes(seed: Uint8Array, tableIndex: number): Float64Array {
-    const totalFloats = this.dims * this.nBits;
-    const totalBytes = totalFloats * BYTES_PER_FLOAT;
-    // Derive enough random bytes for this table.
-    // HKDF can produce up to 255 * HashLen bytes (255 * 32 = 8,160 for SHA-256).
-    // For large dims (e.g. 1536 * 64 * 8 = 786,432 bytes) we need multiple
-    // HKDF calls with sub-block indexing.
-    const randomBytes = this.deriveRandomBytes(
-      seed,
-      `lsh_table_${tableIndex}`,
-      totalBytes,
-    );
-    // Convert the random bytes to Gaussian-distributed floats via Box-Muller.
-    const hyperplaneMatrix = new Float64Array(totalFloats);
-    const view = new DataView(randomBytes.buffer, randomBytes.byteOffset, randomBytes.byteLength);
-    for (let i = 0; i < totalFloats; i++) {
-      const offset = i * BYTES_PER_FLOAT;
-      // Two uint32 values -> two uniform [0,1) samples -> one Gaussian via Box-Muller.
-      const u1Raw = view.getUint32(offset, true);
-      const u2Raw = view.getUint32(offset + 4, true);
-      // Map to (0, 1] -- avoid exactly 0 for the log in Box-Muller.
-      const u1 = (u1Raw + 1) / (0xFFFFFFFF + 2);
-      const u2 = (u2Raw + 1) / (0xFFFFFFFF + 2);
-      // Box-Muller transform (we only need one of the two outputs).
-      hyperplaneMatrix[i] = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
-    }
-    return hyperplaneMatrix;
-  }
-  /**
-   * Derive `length` pseudo-random bytes from the seed using HKDF with
-   * chunked sub-blocks.
-   *
-   * A single HKDF-SHA256 call can output at most 255 * 32 = 8,160 bytes.
-   * For large embedding dimensions we need more, so we iterate over
-   * sub-block indices as part of the info string.
-   */
-  private deriveRandomBytes(
-    seed: Uint8Array,
-    baseInfo: string,
-    length: number,
-  ): Uint8Array {
-    const MAX_HKDF_OUTPUT = 255 * 32; // SHA-256 hash length = 32
-    const result = new Uint8Array(length);
-    let offset = 0;
-    let blockIndex = 0;
-    while (offset < length) {
-      const remaining = length - offset;
-      const chunkLen = Math.min(remaining, MAX_HKDF_OUTPUT);
-      const info = Buffer.from(`${baseInfo}_block_${blockIndex}`, 'utf8');
-      const chunk = hkdf(sha256, seed, new Uint8Array(0), info, chunkLen);
-      result.set(new Uint8Array(chunk), offset);
-      offset += chunkLen;
-      blockIndex++;
-    }
-    return result;
+    const seedHex = Buffer.from(seed).toString('hex');
+    this.inner = getWasmLshHasher().withParams(seedHex, dims, nTables, nBits);
   }
-  // -------------------------------------------------------------------------
-  // Hash function
-  // -------------------------------------------------------------------------
   /**
    * Hash an embedding vector to an array of blind-hashed bucket IDs.
    *
-   * For each table:
-   *   1. Compute the 64-bit signature (sign of dot product with each hyperplane).
-   *   2. Build the bucket string: `lsh_t{tableIndex}_{binarySignature}`.
-   *   3. SHA-256 the bucket string to produce a blind hash (hex).
-   *
    * @param embedding - The embedding vector (must have `dims` elements).
    * @returns Array of `nTables` hex strings (one blind hash per table).
    */
   hash(embedding: number[]): string[] {
-    if (embedding.length !== this.dims) {
-      throw new Error(
-        `Embedding dimension mismatch: expected ${this.dims}, got ${embedding.length}`,
-      );
-    }
-    const results: string[] = new Array(this.nTables);
-    for (let t = 0; t < this.nTables; t++) {
-      const matrix = this.hyperplanes[t];
-      // Build the binary signature.
-      const bits = new Array<string>(this.nBits);
-      for (let b = 0; b < this.nBits; b++) {
-        const baseOffset = b * this.dims;
-        let dot = 0;
-        for (let d = 0; d < this.dims; d++) {
-          dot += matrix[baseOffset + d] * embedding[d];
-        }
-        bits[b] = dot >= 0 ? '1' : '0';
-      }
-      const signature = bits.join('');
-      const bucketId = `lsh_t${t}_${signature}`;
-      // Blind-hash the bucket ID with SHA-256.
-      const hashBytes = sha256(Buffer.from(bucketId, 'utf8'));
-      results[t] = Buffer.from(hashBytes).toString('hex');
-    }
-    return results;
+    return this.inner.hash(new Float64Array(embedding));
   }
-  // -------------------------------------------------------------------------
-  // Accessors
-  // -------------------------------------------------------------------------
   /** Number of hash tables. */
   get tables(): number {
-    return this.nTables;
+    return this.inner.tables;
   }
   /** Number of bits per table. */
   get bits(): number {
-    return this.nBits;
+    return this.inner.bits;
   }
   /** Embedding dimensionality. */
   get dimensions(): number {
-    return this.dims;
+    return this.inner.dimensions;
   }
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@totalreclaw/totalreclaw",
-  "version": "1.6.0",
+  "version": "3.0.6",
   "description": "End-to-end encrypted memory for AI agents — portable, yours forever. Automatic extraction, semantic search, and on-chain storage",
   "type": "module",
   "keywords": [
@@ -24,12 +24,25 @@
   "author": "TotalReclaw Team",
   "license": "MIT",
   "dependencies": {
-    "@huggingface/transformers": "^3.8.1",
-    "@noble/hashes": "^2.0.1",
-    "@scure/bip39": "^2.0.1",
-    "permissionless": "^0.3.4",
-    "porter-stemmer": "^0.9.1",
-    "viem": "^2.46.3"
+    "@totalreclaw/client": "^1.2.0",
+    "@totalreclaw/core": "^2.0.0",
+    "@huggingface/transformers": "^4.0.1",
+    "onnxruntime-node": "^1.24.0"
+  },
+  "files": [
+    "*.ts",
+    "import-adapters/",
+    "!**/*.test.ts",
+    "!pocv2-e2e-test.ts",
+    "openclaw.plugin.json",
+    "SKILL.md",
+    "README.md",
+    "CLAWHUB.md",
+    "skill.json"
+  ],
+  "scripts": {
+    "check-scanner": "node ../scripts/check-scanner.mjs",
+    "prepublishOnly": "node ../scripts/check-scanner.mjs"
   },
   "openclaw": {
     "extensions": [