@totalreclaw/totalreclaw 1.6.0 → 3.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CLAWHUB.md +134 -0
- package/README.md +407 -64
- package/SKILL.md +1032 -0
- package/api-client.ts +5 -5
- package/claims-helper.ts +686 -0
- package/config.ts +211 -0
- package/consolidation.ts +141 -33
- package/contradiction-sync.ts +1389 -0
- package/crypto.ts +63 -261
- package/digest-sync.ts +516 -0
- package/embedding.ts +69 -46
- package/extractor.ts +1307 -84
- package/hot-cache-wrapper.ts +1 -1
- package/import-adapters/gemini-adapter.ts +243 -0
- package/import-adapters/index.ts +3 -0
- package/import-adapters/types.ts +1 -1
- package/index.ts +1887 -323
- package/llm-client.ts +106 -53
- package/lsh.ts +21 -210
- package/package.json +20 -7
- package/pin.ts +502 -0
- package/reranker.ts +96 -124
- package/skill.json +213 -0
- package/subgraph-search.ts +112 -5
- package/subgraph-store.ts +559 -275
- package/consolidation.test.ts +0 -356
- package/extractor-dedup.test.ts +0 -168
- package/import-adapters/import-adapters.test.ts +0 -1123
- package/lsh.test.ts +0 -463
- package/pocv2-e2e-test.ts +0 -917
- package/porter-stemmer.d.ts +0 -4
- package/reranker.test.ts +0 -594
- package/semantic-dedup.test.ts +0 -392
- package/setup.sh +0 -19
- package/store-dedup-wiring.test.ts +0 -186
package/llm-client.ts
CHANGED
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
* @huggingface/transformers). No API key needed for embeddings.
|
|
10
10
|
*/
|
|
11
11
|
|
|
12
|
+
import { CONFIG } from './config.js';
|
|
13
|
+
|
|
12
14
|
// ---------------------------------------------------------------------------
|
|
13
15
|
// Types
|
|
14
16
|
// ---------------------------------------------------------------------------
|
|
@@ -41,27 +43,37 @@ export interface LLMClientConfig {
|
|
|
41
43
|
apiFormat: 'openai' | 'anthropic';
|
|
42
44
|
}
|
|
43
45
|
|
|
46
|
+
/** Shape of an OpenClaw model provider config entry. */
|
|
47
|
+
interface OpenClawProviderConfig {
|
|
48
|
+
baseUrl: string;
|
|
49
|
+
apiKey?: string;
|
|
50
|
+
api?: string;
|
|
51
|
+
models?: Array<{ id: string; [k: string]: unknown }>;
|
|
52
|
+
[k: string]: unknown;
|
|
53
|
+
}
|
|
54
|
+
|
|
44
55
|
// ---------------------------------------------------------------------------
|
|
45
56
|
// Provider mappings
|
|
46
57
|
// ---------------------------------------------------------------------------
|
|
47
58
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
59
|
+
/** Maps provider name to CONFIG.llmApiKeys property names to check (in order). */
|
|
60
|
+
const PROVIDER_KEY_NAMES: Record<string, string[]> = {
|
|
61
|
+
zai: ['zai'],
|
|
62
|
+
anthropic: ['anthropic'],
|
|
63
|
+
openai: ['openai'],
|
|
64
|
+
gemini: ['gemini'],
|
|
65
|
+
google: ['gemini', 'google'],
|
|
66
|
+
mistral: ['mistral'],
|
|
67
|
+
groq: ['groq'],
|
|
68
|
+
deepseek: ['deepseek'],
|
|
69
|
+
openrouter: ['openrouter'],
|
|
70
|
+
xai: ['xai'],
|
|
71
|
+
together: ['together'],
|
|
72
|
+
cerebras: ['cerebras'],
|
|
61
73
|
};
|
|
62
74
|
|
|
63
75
|
const PROVIDER_BASE_URLS: Record<string, string> = {
|
|
64
|
-
zai: 'https://api.z.ai/api/paas/v4',
|
|
76
|
+
zai: 'https://api.z.ai/api/coding/paas/v4',
|
|
65
77
|
anthropic: 'https://api.anthropic.com/v1',
|
|
66
78
|
openai: 'https://api.openai.com/v1',
|
|
67
79
|
gemini: 'https://generativelanguage.googleapis.com/v1beta/openai',
|
|
@@ -94,8 +106,8 @@ function deriveCheapModel(provider: string, primaryModel: string): string {
|
|
|
94
106
|
// Derive based on provider naming conventions
|
|
95
107
|
switch (provider) {
|
|
96
108
|
case 'zai': {
|
|
97
|
-
// glm-5 -> glm-
|
|
98
|
-
return 'glm-
|
|
109
|
+
// glm-5.1 -> glm-5-turbo (fast, available on coding endpoint)
|
|
110
|
+
return 'glm-5-turbo';
|
|
99
111
|
}
|
|
100
112
|
case 'anthropic': {
|
|
101
113
|
// claude-sonnet-4-5 -> claude-haiku-4-5-20251001
|
|
@@ -149,21 +161,26 @@ let _logger: { warn: (msg: string) => void } | null = null;
|
|
|
149
161
|
* Called once from the plugin's `register()` function.
|
|
150
162
|
*
|
|
151
163
|
* Resolution order (highest priority first):
|
|
152
|
-
* 1.
|
|
153
|
-
* 2.
|
|
154
|
-
* 3.
|
|
164
|
+
* 1. Plugin config `extraction.model` (if provided)
|
|
165
|
+
* 2. Auto-derived from provider heuristic using env var API keys
|
|
166
|
+
* 3. OpenClaw's model provider config (api.config.models.providers)
|
|
155
167
|
* 4. Fallback: try common env vars (ZAI_API_KEY, OPENAI_API_KEY) for dev/test
|
|
168
|
+
*
|
|
169
|
+
* The `TOTALRECLAW_LLM_MODEL` user-facing override was removed in v1 —
|
|
170
|
+
* `deriveCheapModel(provider)` covers the 99% case and a model-level knob
|
|
171
|
+
* was adding config surface for no tangible win.
|
|
156
172
|
*/
|
|
157
173
|
export function initLLMClient(options: {
|
|
158
174
|
primaryModel?: string;
|
|
159
175
|
pluginConfig?: Record<string, unknown>;
|
|
176
|
+
openclawProviders?: Record<string, OpenClawProviderConfig>;
|
|
160
177
|
logger?: { warn: (msg: string) => void };
|
|
161
178
|
}): void {
|
|
162
179
|
_logger = options.logger ?? null;
|
|
163
180
|
_initialized = true;
|
|
164
181
|
_cachedConfig = null;
|
|
165
182
|
|
|
166
|
-
const { primaryModel, pluginConfig } = options;
|
|
183
|
+
const { primaryModel, pluginConfig, openclawProviders } = options;
|
|
167
184
|
|
|
168
185
|
// Check if extraction is explicitly disabled
|
|
169
186
|
const extraction = pluginConfig?.extraction as Record<string, unknown> | undefined;
|
|
@@ -179,44 +196,81 @@ export function initLLMClient(options: {
|
|
|
179
196
|
const modelName = parts.length >= 2 ? parts.slice(1).join('/') : primaryModel;
|
|
180
197
|
|
|
181
198
|
if (provider) {
|
|
182
|
-
// Find the API key for this provider
|
|
183
|
-
|
|
184
|
-
const
|
|
185
|
-
|
|
199
|
+
// Find the API key for this provider — first from env vars, then from
|
|
200
|
+
// OpenClaw's provider config (api.config.models.providers)
|
|
201
|
+
const keyNames = PROVIDER_KEY_NAMES[provider];
|
|
202
|
+
let apiKey = keyNames
|
|
203
|
+
? keyNames.map((name) => CONFIG.llmApiKeys[name]).find(Boolean)
|
|
186
204
|
: undefined;
|
|
187
205
|
|
|
188
|
-
|
|
189
|
-
const baseUrl = PROVIDER_BASE_URLS[provider];
|
|
190
|
-
if (baseUrl) {
|
|
191
|
-
// Determine model: env override > plugin config > auto-derived
|
|
192
|
-
const model =
|
|
193
|
-
process.env.TOTALRECLAW_LLM_MODEL ??
|
|
194
|
-
(typeof extraction?.model === 'string' ? extraction.model : null) ??
|
|
195
|
-
deriveCheapModel(provider, modelName);
|
|
206
|
+
let baseUrl = PROVIDER_BASE_URLS[provider];
|
|
196
207
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
208
|
+
// If no env var key found, check OpenClaw's provider config
|
|
209
|
+
if (!apiKey && openclawProviders) {
|
|
210
|
+
const ocProvider = openclawProviders[provider];
|
|
211
|
+
if (ocProvider?.apiKey) {
|
|
212
|
+
apiKey = ocProvider.apiKey;
|
|
213
|
+
if (ocProvider.baseUrl) {
|
|
214
|
+
baseUrl = ocProvider.baseUrl.replace(/\/+$/, '');
|
|
215
|
+
}
|
|
202
216
|
}
|
|
203
217
|
}
|
|
218
|
+
|
|
219
|
+
if (apiKey && baseUrl) {
|
|
220
|
+
// Determine model: plugin config > auto-derived
|
|
221
|
+
const model =
|
|
222
|
+
(typeof extraction?.model === 'string' ? extraction.model : null) ||
|
|
223
|
+
deriveCheapModel(provider, modelName);
|
|
224
|
+
|
|
225
|
+
const apiFormat: 'openai' | 'anthropic' =
|
|
226
|
+
provider === 'anthropic' ? 'anthropic' : 'openai';
|
|
227
|
+
|
|
228
|
+
_cachedConfig = { apiKey, baseUrl, model, apiFormat };
|
|
229
|
+
return;
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
// --- Fallback: try OpenClaw provider configs (any provider with an apiKey) ---
|
|
235
|
+
if (openclawProviders) {
|
|
236
|
+
for (const [providerName, providerConfig] of Object.entries(openclawProviders)) {
|
|
237
|
+
if (!providerConfig?.apiKey) continue;
|
|
238
|
+
|
|
239
|
+
const provider = providerName.toLowerCase();
|
|
240
|
+
let baseUrl = providerConfig.baseUrl?.replace(/\/+$/, '') || PROVIDER_BASE_URLS[provider];
|
|
241
|
+
if (!baseUrl) continue;
|
|
242
|
+
|
|
243
|
+
// Pick a model from the provider's configured models, or use our default
|
|
244
|
+
const firstModelId = providerConfig.models?.[0]?.id;
|
|
245
|
+
const model =
|
|
246
|
+
(typeof extraction?.model === 'string' ? extraction.model : null) ||
|
|
247
|
+
(firstModelId ? deriveCheapModel(provider, firstModelId) : null);
|
|
248
|
+
|
|
249
|
+
if (!model) continue;
|
|
250
|
+
|
|
251
|
+
const apiFormat: 'openai' | 'anthropic' =
|
|
252
|
+
providerConfig.api === 'anthropic-messages' || provider === 'anthropic'
|
|
253
|
+
? 'anthropic'
|
|
254
|
+
: 'openai';
|
|
255
|
+
|
|
256
|
+
_cachedConfig = { apiKey: providerConfig.apiKey, baseUrl, model, apiFormat };
|
|
257
|
+
return;
|
|
204
258
|
}
|
|
205
259
|
}
|
|
206
260
|
|
|
207
|
-
// --- Fallback: try common env
|
|
261
|
+
// --- Fallback: try common env var API keys (for dev/test without OpenClaw config) ---
|
|
208
262
|
const fallbackProviders: Array<[string, string, string]> = [
|
|
209
|
-
['zai', '
|
|
210
|
-
['openai', '
|
|
211
|
-
['anthropic', '
|
|
212
|
-
['gemini', '
|
|
263
|
+
['zai', 'zai', 'glm-4.5-flash'],
|
|
264
|
+
['openai', 'openai', 'gpt-4.1-mini'],
|
|
265
|
+
['anthropic', 'anthropic', 'claude-haiku-4-5-20251001'],
|
|
266
|
+
['gemini', 'gemini', 'gemini-2.0-flash'],
|
|
213
267
|
];
|
|
214
268
|
|
|
215
|
-
for (const [provider,
|
|
216
|
-
const apiKey =
|
|
269
|
+
for (const [provider, keyName, defaultModel] of fallbackProviders) {
|
|
270
|
+
const apiKey = CONFIG.llmApiKeys[keyName];
|
|
217
271
|
if (apiKey) {
|
|
218
|
-
const model =
|
|
219
|
-
(typeof extraction?.model === 'string' ? extraction.model : null)
|
|
272
|
+
const model =
|
|
273
|
+
(typeof extraction?.model === 'string' ? extraction.model : null) ||
|
|
220
274
|
defaultModel;
|
|
221
275
|
|
|
222
276
|
const apiFormat: 'openai' | 'anthropic' =
|
|
@@ -253,17 +307,16 @@ export function resolveLLMConfig(): LLMClientConfig | null {
|
|
|
253
307
|
}
|
|
254
308
|
|
|
255
309
|
// Legacy fallback: if initLLMClient() was never called (e.g. running outside
|
|
256
|
-
// the plugin context), try the
|
|
257
|
-
const zaiKey =
|
|
258
|
-
const openaiKey =
|
|
310
|
+
// the plugin context), try the config-based approach for backwards compat.
|
|
311
|
+
const zaiKey = CONFIG.llmApiKeys.zai;
|
|
312
|
+
const openaiKey = CONFIG.llmApiKeys.openai;
|
|
259
313
|
|
|
260
|
-
const model =
|
|
261
|
-
?? (zaiKey ? 'glm-4.5-flash' : 'gpt-4.1-mini');
|
|
314
|
+
const model = zaiKey ? 'glm-4.5-flash' : 'gpt-4.1-mini';
|
|
262
315
|
|
|
263
316
|
if (zaiKey) {
|
|
264
317
|
return {
|
|
265
318
|
apiKey: zaiKey,
|
|
266
|
-
baseUrl: 'https://api.z.ai/api/paas/v4',
|
|
319
|
+
baseUrl: 'https://api.z.ai/api/coding/paas/v4',
|
|
267
320
|
model,
|
|
268
321
|
apiFormat: 'openai',
|
|
269
322
|
};
|
|
@@ -413,6 +466,6 @@ async function chatCompletionAnthropic(
|
|
|
413
466
|
// ---------------------------------------------------------------------------
|
|
414
467
|
|
|
415
468
|
// Embeddings are now generated locally via @huggingface/transformers
|
|
416
|
-
// (
|
|
469
|
+
// (Harrier-OSS-v1-270M ONNX model). No API key needed.
|
|
417
470
|
// See embedding.ts for implementation details.
|
|
418
471
|
export { generateEmbedding, getEmbeddingDims } from './embedding.js';
|
package/lsh.ts
CHANGED
|
@@ -1,48 +1,18 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* TotalReclaw Plugin - LSH Hasher
|
|
2
|
+
* TotalReclaw Plugin - LSH Hasher
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
* derived from the user's master key, so the same embedding always hashes to
|
|
7
|
-
* the same buckets across sessions.
|
|
4
|
+
* Re-exports `WasmLshHasher` from `@totalreclaw/core` as `LSHHasher`
|
|
5
|
+
* for backward compatibility with existing plugin code.
|
|
8
6
|
*
|
|
9
|
-
*
|
|
10
|
-
* 1. Seed (32 bytes from HKDF) -> HKDF per table -> random bytes
|
|
11
|
-
* 2. Random bytes -> Box-Muller transform -> Gaussian-distributed hyperplanes
|
|
12
|
-
* 3. Embedding dot hyperplane -> sign bit -> N-bit signature per table
|
|
13
|
-
* 4. Signature -> `lsh_t{table}_{signature}` -> SHA-256 -> blind hash
|
|
14
|
-
*
|
|
15
|
-
* The blind hashes are merged with the existing blind word indices in the
|
|
16
|
-
* `blind_indices` array. The server never knows which hashes are word-based
|
|
17
|
-
* and which are LSH-based.
|
|
18
|
-
*
|
|
19
|
-
* Default parameters:
|
|
20
|
-
* - 32 bits per table (balanced discrimination vs. recall)
|
|
21
|
-
* - 20 tables (moderate table count for good coverage)
|
|
22
|
-
* - Middle ground between 64-bit x 12 (too strict) and 12-bit x 28 (too loose)
|
|
23
|
-
*
|
|
24
|
-
* Dependencies: @noble/hashes only (already in project).
|
|
7
|
+
* Default parameters: 32 bits per table, 20 tables.
|
|
25
8
|
*/
|
|
26
9
|
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
/** Default number of independent hash tables. */
|
|
35
|
-
const DEFAULT_N_TABLES = 20;
|
|
36
|
-
|
|
37
|
-
/** Default number of bits (hyperplanes) per table. */
|
|
38
|
-
const DEFAULT_N_BITS = 32;
|
|
39
|
-
|
|
40
|
-
/** Number of bytes needed per Gaussian float via Box-Muller (2 x uint32 = 8 bytes). */
|
|
41
|
-
const BYTES_PER_FLOAT = 8;
|
|
42
|
-
|
|
43
|
-
// ---------------------------------------------------------------------------
|
|
44
|
-
// LSHHasher
|
|
45
|
-
// ---------------------------------------------------------------------------
|
|
10
|
+
// Lazy-load WASM to avoid crash when npm install hasn't finished yet.
|
|
11
|
+
let _WasmLshHasher: typeof import('@totalreclaw/core')['WasmLshHasher'] | null = null;
|
|
12
|
+
function getWasmLshHasher() {
|
|
13
|
+
if (!_WasmLshHasher) _WasmLshHasher = require('@totalreclaw/core').WasmLshHasher;
|
|
14
|
+
return _WasmLshHasher!;
|
|
15
|
+
}
|
|
46
16
|
|
|
47
17
|
/**
|
|
48
18
|
* Random Hyperplane LSH hasher.
|
|
@@ -51,207 +21,48 @@ const BYTES_PER_FLOAT = 8;
|
|
|
51
21
|
* Construct once per session; call `hash()` for every store/search operation.
|
|
52
22
|
*/
|
|
53
23
|
export class LSHHasher {
|
|
54
|
-
|
|
55
|
-
* Flat hyperplane storage.
|
|
56
|
-
*
|
|
57
|
-
* `hyperplanes[t]` is a Float64Array of length `dims * nBits` containing the
|
|
58
|
-
* hyperplane matrix for table `t`. The hyperplane for bit `b` starts at
|
|
59
|
-
* offset `b * dims`.
|
|
60
|
-
*/
|
|
61
|
-
private hyperplanes: Float64Array[];
|
|
62
|
-
|
|
63
|
-
/** Embedding dimensionality. */
|
|
64
|
-
private readonly dims: number;
|
|
65
|
-
|
|
66
|
-
/** Number of independent hash tables. */
|
|
67
|
-
private readonly nTables: number;
|
|
68
|
-
|
|
69
|
-
/** Number of bits (hyperplanes) per table. */
|
|
70
|
-
private readonly nBits: number;
|
|
24
|
+
private inner: InstanceType<typeof import('@totalreclaw/core')['WasmLshHasher']>;
|
|
71
25
|
|
|
72
26
|
/**
|
|
73
27
|
* Create a new LSH hasher.
|
|
74
28
|
*
|
|
75
|
-
* @param seed
|
|
76
|
-
* @param dims
|
|
29
|
+
* @param seed - 32-byte seed from `deriveLshSeed()` in crypto.ts.
|
|
30
|
+
* @param dims - Embedding dimensionality (e.g. 640 for Harrier).
|
|
77
31
|
* @param nTables - Number of independent hash tables (default 20).
|
|
78
32
|
* @param nBits - Number of bits per table (default 32).
|
|
79
33
|
*/
|
|
80
34
|
constructor(
|
|
81
35
|
seed: Uint8Array,
|
|
82
36
|
dims: number,
|
|
83
|
-
nTables: number =
|
|
84
|
-
nBits: number =
|
|
37
|
+
nTables: number = 20,
|
|
38
|
+
nBits: number = 32,
|
|
85
39
|
) {
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
}
|
|
89
|
-
if (dims < 1) {
|
|
90
|
-
throw new Error(`dims must be positive, got ${dims}`);
|
|
91
|
-
}
|
|
92
|
-
if (nTables < 1) {
|
|
93
|
-
throw new Error(`nTables must be positive, got ${nTables}`);
|
|
94
|
-
}
|
|
95
|
-
if (nBits < 1) {
|
|
96
|
-
throw new Error(`nBits must be positive, got ${nBits}`);
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
this.dims = dims;
|
|
100
|
-
this.nTables = nTables;
|
|
101
|
-
this.nBits = nBits;
|
|
102
|
-
this.hyperplanes = new Array(nTables);
|
|
103
|
-
|
|
104
|
-
// Generate hyperplane matrices deterministically from the seed.
|
|
105
|
-
for (let t = 0; t < nTables; t++) {
|
|
106
|
-
this.hyperplanes[t] = this.generateTableHyperplanes(seed, t);
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
|
|
110
|
-
// -------------------------------------------------------------------------
|
|
111
|
-
// Hyperplane generation (deterministic from seed)
|
|
112
|
-
// -------------------------------------------------------------------------
|
|
113
|
-
|
|
114
|
-
/**
|
|
115
|
-
* Generate the hyperplane matrix for a single table.
|
|
116
|
-
*
|
|
117
|
-
* Each table gets a unique HKDF-derived byte stream. We consume 8 bytes
|
|
118
|
-
* per Gaussian sample (Box-Muller uses two uniform uint32 values).
|
|
119
|
-
*
|
|
120
|
-
* The hyperplanes are NOT normalised to unit length. Normalisation is
|
|
121
|
-
* unnecessary because we only care about the sign of the dot product,
|
|
122
|
-
* which is scale-invariant.
|
|
123
|
-
*/
|
|
124
|
-
private generateTableHyperplanes(seed: Uint8Array, tableIndex: number): Float64Array {
|
|
125
|
-
const totalFloats = this.dims * this.nBits;
|
|
126
|
-
const totalBytes = totalFloats * BYTES_PER_FLOAT;
|
|
127
|
-
|
|
128
|
-
// Derive enough random bytes for this table.
|
|
129
|
-
// HKDF can produce up to 255 * HashLen bytes (255 * 32 = 8,160 for SHA-256).
|
|
130
|
-
// For large dims (e.g. 1536 * 64 * 8 = 786,432 bytes) we need multiple
|
|
131
|
-
// HKDF calls with sub-block indexing.
|
|
132
|
-
const randomBytes = this.deriveRandomBytes(
|
|
133
|
-
seed,
|
|
134
|
-
`lsh_table_${tableIndex}`,
|
|
135
|
-
totalBytes,
|
|
136
|
-
);
|
|
137
|
-
|
|
138
|
-
// Convert the random bytes to Gaussian-distributed floats via Box-Muller.
|
|
139
|
-
const hyperplaneMatrix = new Float64Array(totalFloats);
|
|
140
|
-
const view = new DataView(randomBytes.buffer, randomBytes.byteOffset, randomBytes.byteLength);
|
|
141
|
-
|
|
142
|
-
for (let i = 0; i < totalFloats; i++) {
|
|
143
|
-
const offset = i * BYTES_PER_FLOAT;
|
|
144
|
-
// Two uint32 values -> two uniform [0,1) samples -> one Gaussian via Box-Muller.
|
|
145
|
-
const u1Raw = view.getUint32(offset, true);
|
|
146
|
-
const u2Raw = view.getUint32(offset + 4, true);
|
|
147
|
-
|
|
148
|
-
// Map to (0, 1] -- avoid exactly 0 for the log in Box-Muller.
|
|
149
|
-
const u1 = (u1Raw + 1) / (0xFFFFFFFF + 2);
|
|
150
|
-
const u2 = (u2Raw + 1) / (0xFFFFFFFF + 2);
|
|
151
|
-
|
|
152
|
-
// Box-Muller transform (we only need one of the two outputs).
|
|
153
|
-
hyperplaneMatrix[i] = Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
|
|
154
|
-
}
|
|
155
|
-
|
|
156
|
-
return hyperplaneMatrix;
|
|
157
|
-
}
|
|
158
|
-
|
|
159
|
-
/**
|
|
160
|
-
* Derive `length` pseudo-random bytes from the seed using HKDF with
|
|
161
|
-
* chunked sub-blocks.
|
|
162
|
-
*
|
|
163
|
-
* A single HKDF-SHA256 call can output at most 255 * 32 = 8,160 bytes.
|
|
164
|
-
* For large embedding dimensions we need more, so we iterate over
|
|
165
|
-
* sub-block indices as part of the info string.
|
|
166
|
-
*/
|
|
167
|
-
private deriveRandomBytes(
|
|
168
|
-
seed: Uint8Array,
|
|
169
|
-
baseInfo: string,
|
|
170
|
-
length: number,
|
|
171
|
-
): Uint8Array {
|
|
172
|
-
const MAX_HKDF_OUTPUT = 255 * 32; // SHA-256 hash length = 32
|
|
173
|
-
const result = new Uint8Array(length);
|
|
174
|
-
let offset = 0;
|
|
175
|
-
let blockIndex = 0;
|
|
176
|
-
|
|
177
|
-
while (offset < length) {
|
|
178
|
-
const remaining = length - offset;
|
|
179
|
-
const chunkLen = Math.min(remaining, MAX_HKDF_OUTPUT);
|
|
180
|
-
const info = Buffer.from(`${baseInfo}_block_${blockIndex}`, 'utf8');
|
|
181
|
-
const chunk = hkdf(sha256, seed, new Uint8Array(0), info, chunkLen);
|
|
182
|
-
result.set(new Uint8Array(chunk), offset);
|
|
183
|
-
offset += chunkLen;
|
|
184
|
-
blockIndex++;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
return result;
|
|
40
|
+
const seedHex = Buffer.from(seed).toString('hex');
|
|
41
|
+
this.inner = getWasmLshHasher().withParams(seedHex, dims, nTables, nBits);
|
|
188
42
|
}
|
|
189
43
|
|
|
190
|
-
// -------------------------------------------------------------------------
|
|
191
|
-
// Hash function
|
|
192
|
-
// -------------------------------------------------------------------------
|
|
193
|
-
|
|
194
44
|
/**
|
|
195
45
|
* Hash an embedding vector to an array of blind-hashed bucket IDs.
|
|
196
46
|
*
|
|
197
|
-
* For each table:
|
|
198
|
-
* 1. Compute the 64-bit signature (sign of dot product with each hyperplane).
|
|
199
|
-
* 2. Build the bucket string: `lsh_t{tableIndex}_{binarySignature}`.
|
|
200
|
-
* 3. SHA-256 the bucket string to produce a blind hash (hex).
|
|
201
|
-
*
|
|
202
47
|
* @param embedding - The embedding vector (must have `dims` elements).
|
|
203
48
|
* @returns Array of `nTables` hex strings (one blind hash per table).
|
|
204
49
|
*/
|
|
205
50
|
hash(embedding: number[]): string[] {
|
|
206
|
-
|
|
207
|
-
throw new Error(
|
|
208
|
-
`Embedding dimension mismatch: expected ${this.dims}, got ${embedding.length}`,
|
|
209
|
-
);
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
const results: string[] = new Array(this.nTables);
|
|
213
|
-
|
|
214
|
-
for (let t = 0; t < this.nTables; t++) {
|
|
215
|
-
const matrix = this.hyperplanes[t];
|
|
216
|
-
|
|
217
|
-
// Build the binary signature.
|
|
218
|
-
const bits = new Array<string>(this.nBits);
|
|
219
|
-
for (let b = 0; b < this.nBits; b++) {
|
|
220
|
-
const baseOffset = b * this.dims;
|
|
221
|
-
let dot = 0;
|
|
222
|
-
for (let d = 0; d < this.dims; d++) {
|
|
223
|
-
dot += matrix[baseOffset + d] * embedding[d];
|
|
224
|
-
}
|
|
225
|
-
bits[b] = dot >= 0 ? '1' : '0';
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
const signature = bits.join('');
|
|
229
|
-
const bucketId = `lsh_t${t}_${signature}`;
|
|
230
|
-
|
|
231
|
-
// Blind-hash the bucket ID with SHA-256.
|
|
232
|
-
const hashBytes = sha256(Buffer.from(bucketId, 'utf8'));
|
|
233
|
-
results[t] = Buffer.from(hashBytes).toString('hex');
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
return results;
|
|
51
|
+
return this.inner.hash(new Float64Array(embedding));
|
|
237
52
|
}
|
|
238
53
|
|
|
239
|
-
// -------------------------------------------------------------------------
|
|
240
|
-
// Accessors
|
|
241
|
-
// -------------------------------------------------------------------------
|
|
242
|
-
|
|
243
54
|
/** Number of hash tables. */
|
|
244
55
|
get tables(): number {
|
|
245
|
-
return this.
|
|
56
|
+
return this.inner.tables;
|
|
246
57
|
}
|
|
247
58
|
|
|
248
59
|
/** Number of bits per table. */
|
|
249
60
|
get bits(): number {
|
|
250
|
-
return this.
|
|
61
|
+
return this.inner.bits;
|
|
251
62
|
}
|
|
252
63
|
|
|
253
64
|
/** Embedding dimensionality. */
|
|
254
65
|
get dimensions(): number {
|
|
255
|
-
return this.
|
|
66
|
+
return this.inner.dimensions;
|
|
256
67
|
}
|
|
257
68
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@totalreclaw/totalreclaw",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "3.0.6",
|
|
4
4
|
"description": "End-to-end encrypted memory for AI agents — portable, yours forever. Automatic extraction, semantic search, and on-chain storage",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"keywords": [
|
|
@@ -24,12 +24,25 @@
|
|
|
24
24
|
"author": "TotalReclaw Team",
|
|
25
25
|
"license": "MIT",
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@
|
|
28
|
-
"@
|
|
29
|
-
"@
|
|
30
|
-
"
|
|
31
|
-
|
|
32
|
-
|
|
27
|
+
"@totalreclaw/client": "^1.2.0",
|
|
28
|
+
"@totalreclaw/core": "^2.0.0",
|
|
29
|
+
"@huggingface/transformers": "^4.0.1",
|
|
30
|
+
"onnxruntime-node": "^1.24.0"
|
|
31
|
+
},
|
|
32
|
+
"files": [
|
|
33
|
+
"*.ts",
|
|
34
|
+
"import-adapters/",
|
|
35
|
+
"!**/*.test.ts",
|
|
36
|
+
"!pocv2-e2e-test.ts",
|
|
37
|
+
"openclaw.plugin.json",
|
|
38
|
+
"SKILL.md",
|
|
39
|
+
"README.md",
|
|
40
|
+
"CLAWHUB.md",
|
|
41
|
+
"skill.json"
|
|
42
|
+
],
|
|
43
|
+
"scripts": {
|
|
44
|
+
"check-scanner": "node ../scripts/check-scanner.mjs",
|
|
45
|
+
"prepublishOnly": "node ../scripts/check-scanner.mjs"
|
|
33
46
|
},
|
|
34
47
|
"openclaw": {
|
|
35
48
|
"extensions": [
|