@totalreclaw/totalreclaw 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lsh.ts ADDED
@@ -0,0 +1,257 @@
1
+ /**
2
+ * TotalReclaw Plugin - LSH Hasher (Locality-Sensitive Hashing)
3
+ *
4
+ * Pure TypeScript implementation of Random Hyperplane LSH for zero-knowledge
5
+ * semantic search. Generates deterministic hyperplane matrices from a seed
6
+ * derived from the user's master key, so the same embedding always hashes to
7
+ * the same buckets across sessions.
8
+ *
9
+ * Architecture overview:
10
+ * 1. Seed (32 bytes from HKDF) -> HKDF per table -> random bytes
11
+ * 2. Random bytes -> Box-Muller transform -> Gaussian-distributed hyperplanes
12
+ * 3. Embedding dot hyperplane -> sign bit -> N-bit signature per table
13
+ * 4. Signature -> `lsh_t{table}_{signature}` -> SHA-256 -> blind hash
14
+ *
15
+ * The blind hashes are merged with the existing blind word indices in the
16
+ * `blind_indices` array. The server never knows which hashes are word-based
17
+ * and which are LSH-based.
18
+ *
19
+ * Default parameters:
20
+ * - 32 bits per table (balanced discrimination vs. recall)
21
+ * - 20 tables (moderate table count for good coverage)
22
+ * - Middle ground between 64-bit x 12 (too strict) and 12-bit x 28 (too loose)
23
+ *
24
+ * Dependencies: @noble/hashes only (already in project).
25
+ */
26
+
27
+ import { hkdf } from '@noble/hashes/hkdf.js';
28
+ import { sha256 } from '@noble/hashes/sha2.js';
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Constants
32
+ // ---------------------------------------------------------------------------
33
+
34
+ /** Default number of independent hash tables. */
35
+ const DEFAULT_N_TABLES = 20;
36
+
37
+ /** Default number of bits (hyperplanes) per table. */
38
+ const DEFAULT_N_BITS = 32;
39
+
40
+ /** Number of bytes needed per Gaussian float via Box-Muller (2 x uint32 = 8 bytes). */
41
+ const BYTES_PER_FLOAT = 8;
42
+
43
+ // ---------------------------------------------------------------------------
44
+ // LSHHasher
45
+ // ---------------------------------------------------------------------------
46
+
47
+ /**
48
+ * Random Hyperplane LSH hasher.
49
+ *
50
+ * All state is deterministic from the seed -- no randomness at hash time.
51
+ * Construct once per session; call `hash()` for every store/search operation.
52
+ */
53
+ export class LSHHasher {
54
+ /**
55
+ * Flat hyperplane storage.
56
+ *
57
+ * `hyperplanes[t]` is a Float64Array of length `dims * nBits` containing the
58
+ * hyperplane matrix for table `t`. The hyperplane for bit `b` starts at
59
+ * offset `b * dims`.
60
+ */
61
+ private hyperplanes: Float64Array[];
62
+
63
+ /** Embedding dimensionality. */
64
+ private readonly dims: number;
65
+
66
+ /** Number of independent hash tables. */
67
+ private readonly nTables: number;
68
+
69
+ /** Number of bits (hyperplanes) per table. */
70
+ private readonly nBits: number;
71
+
72
+ /**
73
+ * Create a new LSH hasher.
74
+ *
75
+ * @param seed - 32-byte seed from `deriveLshSeed()` in crypto.ts.
76
+ * @param dims - Embedding dimensionality (e.g. 1536 for text-embedding-3-small).
77
+ * @param nTables - Number of independent hash tables (default 20).
78
+ * @param nBits - Number of bits per table (default 32).
79
+ */
80
+ constructor(
81
+ seed: Uint8Array,
82
+ dims: number,
83
+ nTables: number = DEFAULT_N_TABLES,
84
+ nBits: number = DEFAULT_N_BITS,
85
+ ) {
86
+ if (seed.length < 16) {
87
+ throw new Error(`LSH seed too short: expected >= 16 bytes, got ${seed.length}`);
88
+ }
89
+ if (dims < 1) {
90
+ throw new Error(`dims must be positive, got ${dims}`);
91
+ }
92
+ if (nTables < 1) {
93
+ throw new Error(`nTables must be positive, got ${nTables}`);
94
+ }
95
+ if (nBits < 1) {
96
+ throw new Error(`nBits must be positive, got ${nBits}`);
97
+ }
98
+
99
+ this.dims = dims;
100
+ this.nTables = nTables;
101
+ this.nBits = nBits;
102
+ this.hyperplanes = new Array(nTables);
103
+
104
+ // Generate hyperplane matrices deterministically from the seed.
105
+ for (let t = 0; t < nTables; t++) {
106
+ this.hyperplanes[t] = this.generateTableHyperplanes(seed, t);
107
+ }
108
+ }
109
+
110
+ // -------------------------------------------------------------------------
111
+ // Hyperplane generation (deterministic from seed)
112
+ // -------------------------------------------------------------------------
113
+
114
+ /**
115
+ * Generate the hyperplane matrix for a single table.
116
+ *
117
+ * Each table gets a unique HKDF-derived byte stream. We consume 8 bytes
118
+ * per Gaussian sample (Box-Muller uses two uniform uint32 values).
119
+ *
120
+ * The hyperplanes are NOT normalised to unit length. Normalisation is
121
+ * unnecessary because we only care about the sign of the dot product,
122
+ * which is scale-invariant.
123
+ */
124
+ private generateTableHyperplanes(seed: Uint8Array, tableIndex: number): Float64Array {
125
+ const totalFloats = this.dims * this.nBits;
126
+ const totalBytes = totalFloats * BYTES_PER_FLOAT;
127
+
128
+ // Derive enough random bytes for this table.
129
+ // HKDF can produce up to 255 * HashLen bytes (255 * 32 = 8,160 for SHA-256).
130
+ // For large dims (e.g. 1536 * 64 * 8 = 786,432 bytes) we need multiple
131
+ // HKDF calls with sub-block indexing.
132
+ const randomBytes = this.deriveRandomBytes(
133
+ seed,
134
+ `lsh_table_${tableIndex}`,
135
+ totalBytes,
136
+ );
137
+
138
+ // Convert the random bytes to Gaussian-distributed floats via Box-Muller.
139
+ const hyperplaneMatrix = new Float64Array(totalFloats);
140
+ const view = new DataView(randomBytes.buffer, randomBytes.byteOffset, randomBytes.byteLength);
141
+
142
+ for (let i = 0; i < totalFloats; i++) {
143
+ const offset = i * BYTES_PER_FLOAT;
144
+ // Two uint32 values -> two uniform [0,1) samples -> one Gaussian via Box-Muller.
145
+ const u1Raw = view.getUint32(offset, true);
146
+ const u2Raw = view.getUint32(offset + 4, true);
147
+
148
+ // Map to (0, 1] -- avoid exactly 0 for the log in Box-Muller.
149
+ const u1 = (u1Raw + 1) / (0xFFFFFFFF + 2);
150
+ const u2 = (u2Raw + 1) / (0xFFFFFFFF + 2);
151
+
152
+ // Box-Muller transform (we only need one of the two outputs).
153
+ hyperplaneMatrix[i] = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
154
+ }
155
+
156
+ return hyperplaneMatrix;
157
+ }
158
+
159
+ /**
160
+ * Derive `length` pseudo-random bytes from the seed using HKDF with
161
+ * chunked sub-blocks.
162
+ *
163
+ * A single HKDF-SHA256 call can output at most 255 * 32 = 8,160 bytes.
164
+ * For large embedding dimensions we need more, so we iterate over
165
+ * sub-block indices as part of the info string.
166
+ */
167
+ private deriveRandomBytes(
168
+ seed: Uint8Array,
169
+ baseInfo: string,
170
+ length: number,
171
+ ): Uint8Array {
172
+ const MAX_HKDF_OUTPUT = 255 * 32; // SHA-256 hash length = 32
173
+ const result = new Uint8Array(length);
174
+ let offset = 0;
175
+ let blockIndex = 0;
176
+
177
+ while (offset < length) {
178
+ const remaining = length - offset;
179
+ const chunkLen = Math.min(remaining, MAX_HKDF_OUTPUT);
180
+ const info = Buffer.from(`${baseInfo}_block_${blockIndex}`, 'utf8');
181
+ const chunk = hkdf(sha256, seed, new Uint8Array(0), info, chunkLen);
182
+ result.set(new Uint8Array(chunk), offset);
183
+ offset += chunkLen;
184
+ blockIndex++;
185
+ }
186
+
187
+ return result;
188
+ }
189
+
190
+ // -------------------------------------------------------------------------
191
+ // Hash function
192
+ // -------------------------------------------------------------------------
193
+
194
+ /**
195
+ * Hash an embedding vector to an array of blind-hashed bucket IDs.
196
+ *
197
+ * For each table:
198
+ * 1. Compute the 64-bit signature (sign of dot product with each hyperplane).
199
+ * 2. Build the bucket string: `lsh_t{tableIndex}_{binarySignature}`.
200
+ * 3. SHA-256 the bucket string to produce a blind hash (hex).
201
+ *
202
+ * @param embedding - The embedding vector (must have `dims` elements).
203
+ * @returns Array of `nTables` hex strings (one blind hash per table).
204
+ */
205
+ hash(embedding: number[]): string[] {
206
+ if (embedding.length !== this.dims) {
207
+ throw new Error(
208
+ `Embedding dimension mismatch: expected ${this.dims}, got ${embedding.length}`,
209
+ );
210
+ }
211
+
212
+ const results: string[] = new Array(this.nTables);
213
+
214
+ for (let t = 0; t < this.nTables; t++) {
215
+ const matrix = this.hyperplanes[t];
216
+
217
+ // Build the binary signature.
218
+ const bits = new Array<string>(this.nBits);
219
+ for (let b = 0; b < this.nBits; b++) {
220
+ const baseOffset = b * this.dims;
221
+ let dot = 0;
222
+ for (let d = 0; d < this.dims; d++) {
223
+ dot += matrix[baseOffset + d] * embedding[d];
224
+ }
225
+ bits[b] = dot >= 0 ? '1' : '0';
226
+ }
227
+
228
+ const signature = bits.join('');
229
+ const bucketId = `lsh_t${t}_${signature}`;
230
+
231
+ // Blind-hash the bucket ID with SHA-256.
232
+ const hashBytes = sha256(Buffer.from(bucketId, 'utf8'));
233
+ results[t] = Buffer.from(hashBytes).toString('hex');
234
+ }
235
+
236
+ return results;
237
+ }
238
+
239
+ // -------------------------------------------------------------------------
240
+ // Accessors
241
+ // -------------------------------------------------------------------------
242
+
243
+ /** Number of hash tables. */
244
+ get tables(): number {
245
+ return this.nTables;
246
+ }
247
+
248
+ /** Number of bits per table. */
249
+ get bits(): number {
250
+ return this.nBits;
251
+ }
252
+
253
+ /** Embedding dimensionality. */
254
+ get dimensions(): number {
255
+ return this.dims;
256
+ }
257
+ }
package/package.json ADDED
@@ -0,0 +1,40 @@
1
+ {
2
+ "name": "@totalreclaw/totalreclaw",
3
+ "version": "1.0.0",
4
+ "description": "Zero-knowledge encrypted memory vault for AI agents — the password manager for AI memory.",
5
+ "type": "module",
6
+ "license": "MIT",
7
+ "author": "TotalReclaw Team",
8
+ "homepage": "https://totalreclaw.xyz",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "https://github.com/p-diogo/totalreclaw-plugin"
12
+ },
13
+ "keywords": [
14
+ "totalreclaw",
15
+ "openclaw",
16
+ "memory",
17
+ "e2ee",
18
+ "zero-knowledge",
19
+ "encryption",
20
+ "privacy",
21
+ "ai",
22
+ "agent"
23
+ ],
24
+ "dependencies": {
25
+ "@huggingface/transformers": "^3.8.1",
26
+ "@noble/hashes": "^2.0.1",
27
+ "@scure/bip39": "^2.0.1",
28
+ "permissionless": "^0.3.4",
29
+ "porter-stemmer": "^0.9.1",
30
+ "viem": "^2.46.3"
31
+ },
32
+ "openclaw": {
33
+ "extensions": [
34
+ "./index.ts"
35
+ ]
36
+ },
37
+ "engines": {
38
+ "node": ">=18.0.0"
39
+ }
40
+ }
@@ -0,0 +1,4 @@
1
+ declare module 'porter-stemmer' {
2
+ export function stemmer(word: string): string;
3
+ export function memoizingStemmer(word: string): string;
4
+ }