npm - @totalreclaw/totalreclaw - Versions diffs - 1.0.0 - Mend

@totalreclaw/totalreclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/.github/workflows/ci.yml +27 -0
package/.github/workflows/publish.yml +39 -0
package/README.md +104 -0
package/SKILL.md +687 -0
package/api-client.ts +300 -0
package/crypto.ts +351 -0
package/embedding.ts +84 -0
package/extractor.ts +210 -0
package/generate-mnemonic.ts +14 -0
package/hot-cache-wrapper.ts +126 -0
package/index.ts +1885 -0
package/llm-client.ts +418 -0
package/lsh.test.ts +463 -0
package/lsh.ts +257 -0
package/package.json +40 -0
package/porter-stemmer.d.ts +4 -0
package/reranker.test.ts +594 -0
package/reranker.ts +537 -0
package/semantic-dedup.test.ts +392 -0
package/semantic-dedup.ts +100 -0
package/subgraph-search.ts +278 -0
package/subgraph-store.ts +342 -0

package/api-client.ts ADDED Viewed

@@ -0,0 +1,300 @@
+/**
+ * TotalReclaw Plugin - HTTP API Client
+ *
+ * Communicates with the TotalReclaw server over JSON/HTTP. Uses Node.js
+ * built-in `fetch` (available since Node 18).
+ *
+ * All authenticated endpoints expect:
+ *   Authorization: Bearer <hex-encoded-auth-key>
+ *
+ * The server hashes the auth key with SHA-256 to look up the user.
+ */
+// ---------------------------------------------------------------------------
+// Request / Response Types
+// ---------------------------------------------------------------------------
+/**
+ * A single fact payload for the `/v1/store` endpoint.
+ *
+ * Field naming matches the server's `FactJSON` Pydantic model in
+ * `server/src/handlers/store.py`.
+ */
+export interface StoreFactPayload {
+  /** UUIDv7 fact identifier */
+  id: string;
+  /** ISO 8601 timestamp */
+  timestamp: string;
+  /** Hex-encoded AES-256-GCM ciphertext (iv || tag || ciphertext) */
+  encrypted_blob: string;
+  /** SHA-256 hashes of tokens for blind search */
+  blind_indices: string[];
+  /** Importance / decay score (0-10) */
+  decay_score: number;
+  /** Origin label */
+  source: string;
+  /** HMAC-SHA256 content fingerprint for dedup (hex) */
+  content_fp?: string;
+  /** Identifier of the creating agent */
+  agent_id?: string;
+  /** Hex-encoded AES-256-GCM encrypted embedding vector (PoC v2) */
+  encrypted_embedding?: string;
+}
+/**
+ * A search result candidate returned by `/v1/search`.
+ *
+ * Field naming matches the server's `SearchResultJSON` model.
+ */
+export interface SearchCandidate {
+  fact_id: string;
+  /** Hex-encoded AES-256-GCM ciphertext */
+  encrypted_blob: string;
+  decay_score: number;
+  /** Unix milliseconds */
+  timestamp: number;
+  version: number;
+  /** Hex-encoded AES-256-GCM encrypted embedding vector (PoC v2, optional) */
+  encrypted_embedding?: string;
+}
+/**
+ * A fact object returned by `/v1/export`.
+ */
+export interface ExportedFact {
+  id: string;
+  encrypted_blob: string;
+  blind_indices: string[];
+  decay_score: number;
+  version: number;
+  source: string;
+  created_at: string;
+  updated_at: string;
+}
+// ---------------------------------------------------------------------------
+// API Client Factory
+// ---------------------------------------------------------------------------
+/**
+ * Create an API client bound to a specific TotalReclaw server URL.
+ *
+ * All methods are async and throw descriptive errors on non-2xx responses.
+ */
+export function createApiClient(serverUrl: string) {
+  // Normalise URL -- strip trailing slash.
+  const baseUrl = serverUrl.replace(/\/+$/, '');
+  // ------------------------------------------------------------------
+  // Shared helpers
+  // ------------------------------------------------------------------
+  /**
+   * Throw a descriptive error when the server returns a non-2xx status.
+   */
+  async function assertOk(res: Response, context: string): Promise<void> {
+    if (res.ok) return;
+    let body: string;
+    try {
+      body = await res.text();
+    } catch {
+      body = '(could not read response body)';
+    }
+    const hint = res.status === 401
+      ? ' Authentication failed. If using a recovery phrase, check that all 12 words are in the correct order and spelled correctly.'
+      : '';
+    throw new Error(`${context}: HTTP ${res.status} - ${body}${hint}`);
+  }
+  // ------------------------------------------------------------------
+  // Public methods
+  // ------------------------------------------------------------------
+  return {
+    // ---- Registration (unauthenticated) ----
+    /**
+     * Register a new user.
+     *
+     * @param authKeyHash  Hex-encoded SHA-256 of the auth key (64 chars).
+     * @param saltHex      Hex-encoded 32-byte salt (64 chars).
+     * @returns `{ user_id }` on success.
+     */
+    async register(
+      authKeyHash: string,
+      saltHex: string,
+    ): Promise<{ user_id: string }> {
+      const res = await fetch(`${baseUrl}/v1/register`, {
+        method: 'POST',
+        headers: { 'Content-Type': 'application/json' },
+        body: JSON.stringify({ auth_key_hash: authKeyHash, salt: saltHex }),
+      });
+      await assertOk(res, 'register');
+      const json = (await res.json()) as Record<string, unknown>;
+      if (!json.success && json.error_code !== 'USER_EXISTS') {
+        throw new Error(
+          `register: server returned success=false - ${json.error_code}: ${json.error_message}`,
+        );
+      }
+      if (!json.user_id) {
+        throw new Error(
+          `register: server did not return user_id (error_code=${json.error_code})`,
+        );
+      }
+      return { user_id: json.user_id as string };
+    },
+    // ---- Store (authenticated) ----
+    /**
+     * Store one or more encrypted facts.
+     *
+     * @param userId       The authenticated user's ID.
+     * @param facts        Array of `StoreFactPayload` objects.
+     * @param authKeyHex   Hex-encoded raw auth key (64 chars) for Bearer header.
+     */
+    async store(
+      userId: string,
+      facts: StoreFactPayload[],
+      authKeyHex: string,
+    ): Promise<{ ids: string[]; duplicate_ids?: string[] }> {
+      const res = await fetch(`${baseUrl}/v1/store`, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+          Authorization: `Bearer ${authKeyHex}`,
+        },
+        body: JSON.stringify({ user_id: userId, facts }),
+      });
+      await assertOk(res, 'store');
+      const json = (await res.json()) as Record<string, unknown>;
+      if (!json.success) {
+        throw new Error(
+          `store: server returned success=false - ${json.error_code}: ${json.error_message}`,
+        );
+      }
+      return {
+        ids: (json.ids as string[]) ?? [],
+        duplicate_ids: json.duplicate_ids as string[] | undefined,
+      };
+    },
+    // ---- Search (authenticated) ----
+    /**
+     * Search for facts using blind trapdoors.
+     *
+     * @param userId         The authenticated user's ID.
+     * @param trapdoors      SHA-256 hex hashes of query tokens.
+     * @param maxCandidates  Maximum candidates to retrieve.
+     * @param authKeyHex     Hex-encoded raw auth key for Bearer header.
+     * @returns Array of encrypted search candidates.
+     */
+    async search(
+      userId: string,
+      trapdoors: string[],
+      maxCandidates: number,
+      authKeyHex: string,
+    ): Promise<SearchCandidate[]> {
+      const res = await fetch(`${baseUrl}/v1/search`, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+          Authorization: `Bearer ${authKeyHex}`,
+        },
+        body: JSON.stringify({
+          user_id: userId,
+          trapdoors,
+          max_candidates: maxCandidates,
+        }),
+      });
+      await assertOk(res, 'search');
+      const json = (await res.json()) as Record<string, unknown>;
+      if (!json.success) {
+        throw new Error(
+          `search: server returned success=false - ${json.error_code}: ${json.error_message}`,
+        );
+      }
+      return (json.results as SearchCandidate[]) ?? [];
+    },
+    // ---- Delete (authenticated) ----
+    /**
+     * Soft-delete a fact by ID.
+     *
+     * @param factId      The fact UUID to delete.
+     * @param authKeyHex  Hex-encoded raw auth key for Bearer header.
+     */
+    async deleteFact(factId: string, authKeyHex: string): Promise<void> {
+      const res = await fetch(`${baseUrl}/v1/facts/${encodeURIComponent(factId)}`, {
+        method: 'DELETE',
+        headers: {
+          Authorization: `Bearer ${authKeyHex}`,
+        },
+      });
+      await assertOk(res, 'deleteFact');
+      const json = (await res.json()) as Record<string, unknown>;
+      if (!json.success) {
+        throw new Error(
+          `deleteFact: server returned success=false - ${json.error_code}: ${json.error_message}`,
+        );
+      }
+    },
+    // ---- Export (authenticated) ----
+    /**
+     * Export all active facts (paginated).
+     *
+     * @param authKeyHex  Hex-encoded raw auth key for Bearer header.
+     * @param limit       Page size (default 1000, max 5000).
+     * @param cursor      Cursor from previous page (omit for first page).
+     * @returns Page of facts with pagination metadata.
+     */
+    async exportFacts(
+      authKeyHex: string,
+      limit: number = 1000,
+      cursor?: string,
+    ): Promise<{ facts: ExportedFact[]; cursor?: string; has_more: boolean; total_count?: number }> {
+      const params = new URLSearchParams({ limit: String(limit) });
+      if (cursor) params.set('cursor', cursor);
+      const res = await fetch(`${baseUrl}/v1/export?${params.toString()}`, {
+        method: 'GET',
+        headers: {
+          Authorization: `Bearer ${authKeyHex}`,
+        },
+      });
+      await assertOk(res, 'exportFacts');
+      const json = (await res.json()) as Record<string, unknown>;
+      if (!json.success) {
+        throw new Error(
+          `exportFacts: server returned success=false - ${json.error_code}: ${json.error_message}`,
+        );
+      }
+      return {
+        facts: (json.facts as ExportedFact[]) ?? [],
+        cursor: json.cursor as string | undefined,
+        has_more: (json.has_more as boolean) ?? false,
+        total_count: json.total_count as number | undefined,
+      };
+    },
+    // ---- Health (unauthenticated) ----
+    /**
+     * Check server health.
+     *
+     * @returns `true` if the server responds with HTTP 200.
+     */
+    async health(): Promise<boolean> {
+      try {
+        const res = await fetch(`${baseUrl}/health`, { method: 'GET' });
+        return res.status === 200;
+      } catch {
+        return false;
+      }
+    },
+  };
+}

package/crypto.ts ADDED Viewed

@@ -0,0 +1,351 @@
+/**
+ * TotalReclaw Plugin - Crypto Operations
+ *
+ * All cryptographic primitives used by the OpenClaw plugin. These must
+ * produce byte-for-byte identical output to the TotalReclaw client library
+ * (`client/src/crypto/`) so that memories written by one can be read by
+ * the other.
+ *
+ * Key derivation chain:
+ *   master_password + salt
+ *     -> Argon2id(t=3, m=65536, p=4, dkLen=32) -> masterKey
+ *     -> HKDF-SHA256(masterKey, salt, "totalreclaw-auth-key-v1",       32) -> authKey
+ *     -> HKDF-SHA256(masterKey, salt, "totalreclaw-encryption-key-v1", 32) -> encryptionKey
+ *     -> HKDF-SHA256(masterKey, salt, "openmemory-dedup-v1",          32) -> dedupKey
+ *
+ * Encryption: AES-256-GCM (12-byte IV, 16-byte tag)
+ * Blind indices: SHA-256 of lowercase tokens
+ * Content fingerprint: HMAC-SHA256(dedupKey, normalizeText(plaintext))
+ */
+import { argon2id } from '@noble/hashes/argon2.js';
+import { hkdf } from '@noble/hashes/hkdf.js';
+import { sha256 } from '@noble/hashes/sha2.js';
+import { hmac } from '@noble/hashes/hmac.js';
+import { mnemonicToSeedSync, validateMnemonic } from '@scure/bip39';
+import { wordlist } from '@scure/bip39/wordlists/english.js';
+import { stemmer } from 'porter-stemmer';
+import crypto from 'node:crypto';
+// ---------------------------------------------------------------------------
+// Key Derivation
+// ---------------------------------------------------------------------------
+/** HKDF context strings -- must match client/src/crypto/kdf.ts exactly. */
+const AUTH_KEY_INFO = 'totalreclaw-auth-key-v1';
+const ENCRYPTION_KEY_INFO = 'totalreclaw-encryption-key-v1';
+const DEDUP_KEY_INFO = 'openmemory-dedup-v1';
+/** Argon2id parameters -- OWASP recommendations, matching client defaults. */
+const ARGON2_TIME_COST = 3;
+const ARGON2_MEMORY_COST = 65536; // 64 MB in KiB
+const ARGON2_PARALLELISM = 4;
+const ARGON2_DK_LEN = 32;
+/** AES-256-GCM constants. */
+const IV_LENGTH = 12;
+const TAG_LENGTH = 16;
+const KEY_LENGTH = 32;
+/**
+ * Check if the input looks like a BIP-39 mnemonic (12 or 24 words from the BIP-39 English wordlist).
+ */
+function isBip39Mnemonic(input: string): boolean {
+  const words = input.trim().split(/\s+/);
+  if (words.length !== 12 && words.length !== 24) return false;
+  return validateMnemonic(input.trim(), wordlist);
+}
+/**
+ * Derive encryption keys from a BIP-39 mnemonic.
+ * Uses the 512-bit BIP-39 seed as HKDF input (NOT the derived private key)
+ * for proper key separation from the Ethereum signing key.
+ */
+function deriveKeysFromMnemonic(
+  mnemonic: string,
+): { authKey: Buffer; encryptionKey: Buffer; dedupKey: Buffer; salt: Buffer } {
+  // BIP-39: mnemonic -> 512-bit seed via PBKDF2(mnemonic, "mnemonic", 2048 rounds)
+  const seed = mnemonicToSeedSync(mnemonic.trim());
+  // Use first 32 bytes of seed as deterministic salt for HKDF
+  // (BIP-39 mnemonics are self-salting via PBKDF2, no random salt needed)
+  const salt = Buffer.from(seed.slice(0, 32));
+  // HKDF-SHA256 from the full 512-bit seed, using distinct info strings
+  const enc = (s: string) => Buffer.from(s, 'utf8');
+  const seedBuf = Buffer.from(seed);
+  const authKey = Buffer.from(
+    hkdf(sha256, seedBuf, salt, enc(AUTH_KEY_INFO), 32),
+  );
+  const encryptionKey = Buffer.from(
+    hkdf(sha256, seedBuf, salt, enc(ENCRYPTION_KEY_INFO), 32),
+  );
+  const dedupKey = Buffer.from(
+    hkdf(sha256, seedBuf, salt, enc(DEDUP_KEY_INFO), 32),
+  );
+  return { authKey, encryptionKey, dedupKey, salt };
+}
+/**
+ * Derive auth, encryption, and dedup keys from a master password.
+ *
+ * If the password is a valid BIP-39 mnemonic (12 or 24 words), keys are
+ * derived from the 512-bit BIP-39 seed via HKDF. Otherwise, the legacy
+ * Argon2id path is used.
+ *
+ * For the Argon2id path: if no salt is provided a fresh 32-byte random salt
+ * is generated. Pass an existing salt when restoring a previously-registered
+ * account so that the derived keys match the original registration.
+ *
+ * @returns Object containing authKey, encryptionKey, dedupKey, and salt (all Buffers).
+ */
+export function deriveKeys(
+  password: string,
+  existingSalt?: Buffer,
+): { authKey: Buffer; encryptionKey: Buffer; dedupKey: Buffer; salt: Buffer } {
+  // Auto-detect BIP-39 mnemonic vs arbitrary password
+  if (isBip39Mnemonic(password)) {
+    // BIP-39 path: mnemonic is self-salting, existingSalt is ignored for derivation
+    // but we still return the deterministic salt for server registration
+    return deriveKeysFromMnemonic(password);
+  }
+  // Legacy path: arbitrary password via Argon2id
+  const salt = existingSalt ?? crypto.randomBytes(32);
+  // Step 1 -- Argon2id to derive a 32-byte master key.
+  // @noble/hashes argon2id accepts Uint8Array for both password and salt.
+  const masterKey = argon2id(
+    Buffer.from(password, 'utf8'),
+    salt,
+    { t: ARGON2_TIME_COST, m: ARGON2_MEMORY_COST, p: ARGON2_PARALLELISM, dkLen: ARGON2_DK_LEN },
+  );
+  // Step 2 -- HKDF-SHA256 for each sub-key using distinct info strings.
+  // @noble/hashes v2 requires Uint8Array for info param.
+  const enc = (s: string) => Buffer.from(s, 'utf8');
+  const authKey = Buffer.from(
+    hkdf(sha256, masterKey, salt, enc(AUTH_KEY_INFO), 32),
+  );
+  const encryptionKey = Buffer.from(
+    hkdf(sha256, masterKey, salt, enc(ENCRYPTION_KEY_INFO), 32),
+  );
+  const dedupKey = Buffer.from(
+    hkdf(sha256, masterKey, salt, enc(DEDUP_KEY_INFO), 32),
+  );
+  return { authKey, encryptionKey, dedupKey, salt: Buffer.from(salt) };
+}
+// ---------------------------------------------------------------------------
+// LSH Seed Derivation
+// ---------------------------------------------------------------------------
+/**
+ * HKDF context string for LSH seed derivation.
+ *
+ * The LSH hasher needs a deterministic seed so that the same master key
+ * always generates the same random hyperplane matrices. We derive this seed
+ * from the master key using HKDF with a unique info string.
+ *
+ * For the BIP-39 path the HKDF input is the 512-bit BIP-39 seed; for the
+ * Argon2id path it is the 32-byte master key.
+ */
+const LSH_SEED_INFO = 'openmemory-lsh-seed-v1';
+/**
+ * Derive a 32-byte seed for the LSH hasher from the master key derivation
+ * chain.
+ *
+ * Call this once during initialization and pass the result to `new LSHHasher(seed, dims)`.
+ *
+ * For the BIP-39 path we use the full 512-bit BIP-39 seed as IKM; for the
+ * Argon2id path we use the 32-byte Argon2id-derived master key. In both
+ * cases the salt from `deriveKeys()` is reused for domain separation.
+ */
+export function deriveLshSeed(
+  password: string,
+  salt: Buffer,
+): Uint8Array {
+  if (isBip39Mnemonic(password)) {
+    const seed = mnemonicToSeedSync(password.trim());
+    return new Uint8Array(
+      hkdf(sha256, Buffer.from(seed), salt, Buffer.from(LSH_SEED_INFO, 'utf8'), 32),
+    );
+  }
+  // Argon2id path: re-derive the master key, then HKDF with LSH info string.
+  const masterKey = argon2id(
+    Buffer.from(password, 'utf8'),
+    salt,
+    { t: ARGON2_TIME_COST, m: ARGON2_MEMORY_COST, p: ARGON2_PARALLELISM, dkLen: ARGON2_DK_LEN },
+  );
+  return new Uint8Array(
+    hkdf(sha256, masterKey, salt, Buffer.from(LSH_SEED_INFO, 'utf8'), 32),
+  );
+}
+// ---------------------------------------------------------------------------
+// Auth Key Hash
+// ---------------------------------------------------------------------------
+/**
+ * Compute the SHA-256 hash of the auth key.
+ *
+ * The server stores SHA256(authKey) during registration and uses it to look
+ * up users on every request. The hex string returned here is what the plugin
+ * sends to `/v1/register` as `auth_key_hash`.
+ */
+export function computeAuthKeyHash(authKey: Buffer): string {
+  return Buffer.from(sha256(authKey)).toString('hex');
+}
+// ---------------------------------------------------------------------------
+// AES-256-GCM Encrypt / Decrypt
+// ---------------------------------------------------------------------------
+/**
+ * Encrypt a UTF-8 plaintext string with AES-256-GCM.
+ *
+ * Wire format (base64-encoded):
+ *   [iv: 12 bytes][tag: 16 bytes][ciphertext: variable]
+ *
+ * This matches `serializeEncryptedData` in `client/src/crypto/aes.ts`.
+ */
+export function encrypt(plaintext: string, encryptionKey: Buffer): string {
+  if (encryptionKey.length !== KEY_LENGTH) {
+    throw new Error(`Invalid key length: expected ${KEY_LENGTH}, got ${encryptionKey.length}`);
+  }
+  const iv = crypto.randomBytes(IV_LENGTH);
+  const cipher = crypto.createCipheriv('aes-256-gcm', encryptionKey, iv, {
+    authTagLength: TAG_LENGTH,
+  });
+  const ciphertext = Buffer.concat([cipher.update(plaintext, 'utf8'), cipher.final()]);
+  const tag = cipher.getAuthTag();
+  // Combine: iv || tag || ciphertext  (same order as client library)
+  const combined = Buffer.concat([iv, tag, ciphertext]);
+  return combined.toString('base64');
+}
+/**
+ * Decrypt a base64-encoded AES-256-GCM blob back to a UTF-8 string.
+ *
+ * Expects the wire format produced by `encrypt()` above.
+ */
+export function decrypt(encryptedBase64: string, encryptionKey: Buffer): string {
+  if (encryptionKey.length !== KEY_LENGTH) {
+    throw new Error(`Invalid key length: expected ${KEY_LENGTH}, got ${encryptionKey.length}`);
+  }
+  const combined = Buffer.from(encryptedBase64, 'base64');
+  if (combined.length < IV_LENGTH + TAG_LENGTH) {
+    throw new Error('Encrypted data too short');
+  }
+  const iv = combined.subarray(0, IV_LENGTH);
+  const tag = combined.subarray(IV_LENGTH, IV_LENGTH + TAG_LENGTH);
+  const ciphertext = combined.subarray(IV_LENGTH + TAG_LENGTH);
+  const decipher = crypto.createDecipheriv('aes-256-gcm', encryptionKey, iv, {
+    authTagLength: TAG_LENGTH,
+  });
+  decipher.setAuthTag(tag);
+  const plaintext = Buffer.concat([decipher.update(ciphertext), decipher.final()]);
+  return plaintext.toString('utf8');
+}
+// ---------------------------------------------------------------------------
+// Blind Indices
+// ---------------------------------------------------------------------------
+/**
+ * Generate blind indices (SHA-256 hashes of tokens) for a text string.
+ *
+ * Tokenization rules (must match `client/src/crypto/blind.ts#tokenize`):
+ *   1. Lowercase
+ *   2. Remove punctuation (keep Unicode letters, numbers, whitespace)
+ *   3. Split on whitespace
+ *   4. Filter tokens shorter than 2 characters
+ *
+ * Each surviving token is SHA-256 hashed and returned as a hex string.
+ * The returned array is deduplicated.
+ */
+export function generateBlindIndices(text: string): string[] {
+  const tokens = text
+    .toLowerCase()
+    .replace(/[^\p{L}\p{N}\s]/gu, ' ') // Remove punctuation, keep letters/numbers
+    .split(/\s+/)
+    .filter((t) => t.length >= 2);
+  const seen = new Set<string>();
+  const indices: string[] = [];
+  for (const token of tokens) {
+    // Exact word hash (unchanged behavior).
+    const hash = Buffer.from(sha256(Buffer.from(token, 'utf8'))).toString('hex');
+    if (!seen.has(hash)) {
+      seen.add(hash);
+      indices.push(hash);
+    }
+    // Stemmed word hash. The stem is prefixed with "stem:" before hashing
+    // to avoid collisions between a word that happens to equal another
+    // word's stem (e.g., the word "commun" vs the stem of "community").
+    const stem = stemmer(token);
+    if (stem.length >= 2 && stem !== token) {
+      const stemHash = Buffer.from(
+        sha256(Buffer.from(`stem:${stem}`, 'utf8'))
+      ).toString('hex');
+      if (!seen.has(stemHash)) {
+        seen.add(stemHash);
+        indices.push(stemHash);
+      }
+    }
+  }
+  return indices;
+}
+// ---------------------------------------------------------------------------
+// Content Fingerprint (Dedup)
+// ---------------------------------------------------------------------------
+/**
+ * Normalize text for deterministic fingerprinting.
+ *
+ * Steps (matching `client/src/crypto/fingerprint.ts#normalizeText`):
+ *   1. Unicode NFC normalization
+ *   2. Lowercase
+ *   3. Collapse whitespace (spaces/tabs/newlines to single space)
+ *   4. Trim leading/trailing whitespace
+ */
+function normalizeText(text: string): string {
+  return text
+    .normalize('NFC')
+    .toLowerCase()
+    .replace(/\s+/g, ' ')
+    .trim();
+}
+/**
+ * Compute an HMAC-SHA256 content fingerprint for exact-duplicate detection.
+ *
+ * The server stores this fingerprint and uses it to reject duplicate writes
+ * without ever seeing the plaintext.
+ *
+ * @returns 64-character hex string.
+ */
+export function generateContentFingerprint(plaintext: string, dedupKey: Buffer): string {
+  const normalized = normalizeText(plaintext);
+  return Buffer.from(
+    hmac(sha256, dedupKey, Buffer.from(normalized, 'utf8')),
+  ).toString('hex');
+}