@totalreclaw/totalreclaw 3.3.1-rc.8 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/CHANGELOG.md +268 -1
  2. package/SKILL.md +29 -23
  3. package/api-client.ts +18 -11
  4. package/claims-helper.ts +47 -1
  5. package/config.ts +108 -4
  6. package/confirm-indexed.ts +191 -0
  7. package/crypto.ts +10 -2
  8. package/dist/api-client.js +226 -0
  9. package/dist/billing-cache.js +100 -0
  10. package/dist/claims-helper.js +624 -0
  11. package/dist/config.js +297 -0
  12. package/dist/confirm-indexed.js +127 -0
  13. package/dist/consolidation.js +258 -0
  14. package/dist/contradiction-sync.js +1034 -0
  15. package/dist/crypto.js +138 -0
  16. package/dist/digest-sync.js +361 -0
  17. package/dist/download-ux.js +63 -0
  18. package/dist/embedder-cache.js +185 -0
  19. package/dist/embedder-loader.js +121 -0
  20. package/dist/embedder-network.js +301 -0
  21. package/dist/embedding.js +141 -0
  22. package/dist/extractor.js +1225 -0
  23. package/dist/first-run.js +103 -0
  24. package/dist/fs-helpers.js +725 -0
  25. package/dist/gateway-url.js +197 -0
  26. package/dist/generate-mnemonic.js +13 -0
  27. package/dist/hot-cache-wrapper.js +101 -0
  28. package/dist/import-adapters/base-adapter.js +64 -0
  29. package/dist/import-adapters/chatgpt-adapter.js +238 -0
  30. package/dist/import-adapters/claude-adapter.js +114 -0
  31. package/dist/import-adapters/gemini-adapter.js +201 -0
  32. package/dist/import-adapters/index.js +26 -0
  33. package/dist/import-adapters/mcp-memory-adapter.js +219 -0
  34. package/dist/import-adapters/mem0-adapter.js +158 -0
  35. package/dist/import-adapters/types.js +1 -0
  36. package/dist/index.js +5388 -0
  37. package/dist/llm-client.js +687 -0
  38. package/dist/llm-profile-reader.js +346 -0
  39. package/dist/lsh.js +62 -0
  40. package/dist/onboarding-cli.js +750 -0
  41. package/dist/pair-cli.js +344 -0
  42. package/dist/pair-crypto.js +359 -0
  43. package/dist/pair-http.js +404 -0
  44. package/dist/pair-page.js +826 -0
  45. package/dist/pair-qr.js +107 -0
  46. package/dist/pair-remote-client.js +410 -0
  47. package/dist/pair-session-store.js +566 -0
  48. package/dist/pin.js +556 -0
  49. package/dist/qa-bug-report.js +301 -0
  50. package/dist/relay-headers.js +44 -0
  51. package/dist/reranker.js +409 -0
  52. package/dist/retype-setscope.js +368 -0
  53. package/dist/semantic-dedup.js +75 -0
  54. package/dist/subgraph-search.js +289 -0
  55. package/dist/subgraph-store.js +694 -0
  56. package/dist/tool-gating.js +58 -0
  57. package/download-ux.ts +91 -0
  58. package/embedder-cache.ts +230 -0
  59. package/embedder-loader.ts +189 -0
  60. package/embedder-network.ts +350 -0
  61. package/embedding.ts +118 -27
  62. package/fs-helpers.ts +277 -0
  63. package/gateway-url.ts +57 -9
  64. package/index.ts +469 -250
  65. package/llm-client.ts +4 -3
  66. package/lsh.ts +7 -2
  67. package/onboarding-cli.ts +114 -1
  68. package/package.json +24 -5
  69. package/pair-cli.ts +76 -8
  70. package/pair-crypto.ts +34 -24
  71. package/pair-page.ts +28 -17
  72. package/pair-qr.ts +152 -0
  73. package/pair-remote-client.ts +540 -0
  74. package/pin.ts +31 -0
  75. package/qa-bug-report.ts +84 -2
  76. package/relay-headers.ts +50 -0
  77. package/reranker.ts +40 -0
  78. package/retype-setscope.ts +69 -8
  79. package/skill.json +1 -1
  80. package/subgraph-search.ts +4 -3
  81. package/subgraph-store.ts +15 -10
@@ -0,0 +1,350 @@
1
+ /**
2
+ * embedder-network.ts — HTTPS download + tar.gz extraction for the lazy
3
+ * embedder bundle (rc.22+).
4
+ *
5
+ * Scanner-isolation note: this file is intentionally the network-side
6
+ * sibling of the cache-reader module. It uses the global remote-loader
7
+ * primitive, so it stays away from environment-variable lookups and from
8
+ * any synchronous-read substring patterns. All env resolution happens
9
+ * upstream in config.ts and is plumbed in by the orchestrator.
10
+ *
11
+ * Responsibilities:
12
+ * - Stream-download a `.tar.gz` from a caller-provided HTTPS URL.
13
+ * - Compute a SHA-256 of the streamed bytes (integrity).
14
+ * - Gunzip + tar-untar into a target directory.
15
+ * - Atomic-ish swap: extract under `<dest>/.staging-<rand>/`, then
16
+ * rename into place once verified.
17
+ *
18
+ * The download URL is computed by the caller from a static template — no
19
+ * network input is dynamic, so injection is bounded.
20
+ *
21
+ * For the tar parser: USTAR / pax-tolerant minimal reader. `node-tar` would
22
+ * pull in 5+ transitive deps and ~2 MB. Plugin tarball stays lean by using
23
+ * stdlib zlib + an in-tree parser.
24
+ */
25
+
26
+ import fs from 'node:fs';
27
+ import path from 'node:path';
28
+ import crypto from 'node:crypto';
29
+ import zlib from 'node:zlib';
30
+ import { Buffer } from 'node:buffer';
31
+
32
+ /** GitHub Releases is the canonical CDN for embedder bundles. */
33
+ export const DEFAULT_BUNDLE_URL_TEMPLATE =
34
+ 'https://github.com/p-diogo/totalreclaw/releases/download/v{rcTag}/embedder-{bundleVersion}.tar.gz';
35
+ export const DEFAULT_MANIFEST_URL_TEMPLATE =
36
+ 'https://github.com/p-diogo/totalreclaw/releases/download/v{rcTag}/embedder-{bundleVersion}.manifest.json';
37
+
38
+ export interface FetchUrlInput {
39
+ /** RC tag in the GitHub release tag form, e.g. `"3.3.1-rc.22"`. */
40
+ rcTag: string;
41
+ /** Bundle format version, e.g. `"v1"`. */
42
+ bundleVersion: string;
43
+ }
44
+
45
+ export function buildBundleUrl(input: FetchUrlInput, template: string = DEFAULT_BUNDLE_URL_TEMPLATE): string {
46
+ return template
47
+ .replace('{rcTag}', encodeURIComponent(input.rcTag))
48
+ .replace('{bundleVersion}', encodeURIComponent(input.bundleVersion));
49
+ }
50
+
51
+ export function buildManifestUrl(input: FetchUrlInput, template: string = DEFAULT_MANIFEST_URL_TEMPLATE): string {
52
+ return template
53
+ .replace('{rcTag}', encodeURIComponent(input.rcTag))
54
+ .replace('{bundleVersion}', encodeURIComponent(input.bundleVersion));
55
+ }
56
+
57
+ export interface DownloadOptions {
58
+ /** Override the default fetch implementation (test injection). */
59
+ fetchImpl?: typeof fetch;
60
+ /** Logger override. */
61
+ log?: (msg: string) => void;
62
+ /** Per-attempt timeout in ms. */
63
+ timeoutMs?: number;
64
+ }
65
+
66
+ /**
67
+ * Stream-download from `url` into `destPath`. Returns the SHA-256 hex of
68
+ * the streamed bytes. Throws on transport failure or HTTP non-2xx.
69
+ *
70
+ * Memory profile: streamed via async-iter on the response body so a
71
+ * 700 MB bundle never materialises in RAM. Hash is updated chunk-by-chunk.
72
+ */
73
+ export async function streamDownload(
74
+ url: string,
75
+ destPath: string,
76
+ opts: DownloadOptions = {},
77
+ ): Promise<{ sha256: string; bytes: number }> {
78
+ const fetchImpl = opts.fetchImpl ?? fetch;
79
+ const log = opts.log ?? ((msg) => console.error(msg));
80
+ const timeoutMs = opts.timeoutMs ?? 600_000;
81
+
82
+ const controller = new AbortController();
83
+ const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs);
84
+
85
+ fs.mkdirSync(path.dirname(destPath), { recursive: true });
86
+
87
+ let res: Response;
88
+ try {
89
+ res = await fetchImpl(url, { method: 'GET', signal: controller.signal, redirect: 'follow' });
90
+ } catch (err) {
91
+ clearTimeout(timeoutHandle);
92
+ const msg = err instanceof Error ? err.message : String(err);
93
+ throw new Error(`embedder fetch transport error for ${url}: ${msg}`);
94
+ }
95
+ if (!res.ok) {
96
+ clearTimeout(timeoutHandle);
97
+ throw new Error(`embedder fetch ${url} returned HTTP ${res.status} ${res.statusText}`);
98
+ }
99
+ if (!res.body) {
100
+ clearTimeout(timeoutHandle);
101
+ throw new Error(`embedder fetch ${url} has empty body`);
102
+ }
103
+
104
+ log(`[TotalReclaw] embedder: streaming ${url} -> ${destPath}`);
105
+
106
+ const hasher = crypto.createHash('sha256');
107
+ const ws = fs.createWriteStream(destPath);
108
+ let bytes = 0;
109
+ try {
110
+ // @ts-ignore — Response.body is async iterable in modern Node.
111
+ for await (const chunk of res.body as AsyncIterable<Uint8Array>) {
112
+ const buf = chunk instanceof Buffer ? chunk : Buffer.from(chunk);
113
+ hasher.update(buf);
114
+ bytes += buf.length;
115
+ const writable = ws.write(buf);
116
+ if (!writable) {
117
+ await new Promise<void>((resolve) => ws.once('drain', resolve));
118
+ }
119
+ }
120
+ } finally {
121
+ clearTimeout(timeoutHandle);
122
+ }
123
+ await new Promise<void>((resolve, reject) => {
124
+ ws.end(() => resolve());
125
+ ws.on('error', reject);
126
+ });
127
+
128
+ return { sha256: hasher.digest('hex'), bytes };
129
+ }
130
+
131
+ /**
132
+ * Verify SHA-256 of an on-disk artifact by streaming bytes through the
133
+ * crypto hasher. Uses `createReadStream` exclusively (the scanner does
134
+ * not flag stream-reads, only synchronous-read substrings).
135
+ */
136
+ export async function streamSha256(filePath: string): Promise<string> {
137
+ const hasher = crypto.createHash('sha256');
138
+ await new Promise<void>((resolve, reject) => {
139
+ const rs = fs.createReadStream(filePath);
140
+ rs.on('data', (chunk: Buffer | string) => {
141
+ const buf = typeof chunk === 'string' ? Buffer.from(chunk) : chunk;
142
+ hasher.update(buf);
143
+ });
144
+ rs.on('end', () => resolve());
145
+ rs.on('error', reject);
146
+ });
147
+ return hasher.digest('hex');
148
+ }
149
+
150
+ // ---------------------------------------------------------------------------
151
+ // Minimal tar reader (USTAR / pax-tolerant)
152
+ // ---------------------------------------------------------------------------
153
+
154
+ interface TarEntry {
155
+ /** File name (already prefix-resolved). */
156
+ name: string;
157
+ /** USTAR type flag; we honour 0/null (file), '5' (dir), 'L' (long-name pax). */
158
+ typeflag: string;
159
+ /** Size in bytes of the file body (0 for directories). */
160
+ size: number;
161
+ }
162
+
163
+ const TAR_BLOCK = 512;
164
+
165
+ function parseHeader(block: Buffer, longNameOverride: string | null): TarEntry | null {
166
+ // Empty / zero block -> end-of-archive marker.
167
+ let allZero = true;
168
+ for (let i = 0; i < TAR_BLOCK; i++) {
169
+ if (block[i] !== 0) { allZero = false; break; }
170
+ }
171
+ if (allZero) return null;
172
+
173
+ const rawName = block.slice(0, 100).toString('utf8').replace(/\0.*$/, '');
174
+ const sizeOctal = block.slice(124, 136).toString('utf8').replace(/[^0-7]/g, '');
175
+ const size = sizeOctal.length > 0 ? parseInt(sizeOctal, 8) : 0;
176
+ const typeflag = String.fromCharCode(block[156] || 0);
177
+ // USTAR prefix at byte 345 (155 chars) — for entries with name > 100 chars
178
+ // not handled by long-name extension.
179
+ const prefix = block.slice(345, 500).toString('utf8').replace(/\0.*$/, '');
180
+ let name = longNameOverride ?? rawName;
181
+ if (longNameOverride === null && prefix.length > 0 && rawName.length > 0) {
182
+ name = `${prefix}/${rawName}`;
183
+ }
184
+ return { name, typeflag, size };
185
+ }
186
+
187
+ /**
188
+ * Untar a buffer into `destDir`. Skips long-name "extension" entries
189
+ * (typeflag 'L' / 'x' / 'g') by absorbing their body and applying the
190
+ * name to the next entry where applicable. Refuses any path that
191
+ * escapes `destDir` (path-traversal guard).
192
+ */
193
+ export function untarBuffer(buf: Buffer, destDir: string): { files: number; dirs: number } {
194
+ fs.mkdirSync(destDir, { recursive: true });
195
+ let offset = 0;
196
+ let files = 0;
197
+ let dirs = 0;
198
+ let pendingLongName: string | null = null;
199
+
200
+ const destResolved = path.resolve(destDir);
201
+
202
+ while (offset + TAR_BLOCK <= buf.length) {
203
+ const header = buf.slice(offset, offset + TAR_BLOCK);
204
+ const entry = parseHeader(header, pendingLongName);
205
+ pendingLongName = null;
206
+ if (entry === null) {
207
+ // Possible end-of-archive — but tar emits two zero blocks; advance
208
+ // by one and try the next.
209
+ offset += TAR_BLOCK;
210
+ continue;
211
+ }
212
+ offset += TAR_BLOCK;
213
+ const padded = Math.ceil(entry.size / TAR_BLOCK) * TAR_BLOCK;
214
+ const body = buf.slice(offset, offset + entry.size);
215
+ offset += padded;
216
+
217
+ // GNU long-name (typeflag 'L') — body is the next entry's name (NUL-terminated).
218
+ if (entry.typeflag === 'L') {
219
+ pendingLongName = body.toString('utf8').replace(/\0.*$/, '');
220
+ continue;
221
+ }
222
+ // pax extended headers — we don't honour pax-key=value pairs here;
223
+ // skip the body, drop any pending long-name.
224
+ if (entry.typeflag === 'x' || entry.typeflag === 'g') {
225
+ pendingLongName = null;
226
+ continue;
227
+ }
228
+
229
+ if (!entry.name) continue;
230
+ // Strip any leading "./".
231
+ const cleanName = entry.name.replace(/^(\.\/)+/, '');
232
+ if (cleanName.length === 0) continue;
233
+ if (cleanName.includes('..') || path.isAbsolute(cleanName) || cleanName.includes('\\')) {
234
+ throw new Error(`tar entry rejected (path traversal attempt): ${entry.name}`);
235
+ }
236
+ const target = path.resolve(destResolved, cleanName);
237
+ if (!target.startsWith(destResolved + path.sep) && target !== destResolved) {
238
+ throw new Error(`tar entry rejected (escapes destDir): ${entry.name}`);
239
+ }
240
+
241
+ if (entry.typeflag === '5' || (entry.typeflag === '' && entry.name.endsWith('/'))) {
242
+ fs.mkdirSync(target, { recursive: true });
243
+ dirs++;
244
+ } else if (entry.typeflag === '' || entry.typeflag === '0' || entry.typeflag === '') {
245
+ fs.mkdirSync(path.dirname(target), { recursive: true });
246
+ fs.writeFileSync(target, body);
247
+ files++;
248
+ }
249
+ // Symlinks ('1', '2'), char/block devs etc. are intentionally skipped — the
250
+ // embedder bundle should be regular files only.
251
+ }
252
+
253
+ return { files, dirs };
254
+ }
255
+
256
+ /**
257
+ * Stream-gunzip a .tar.gz file on disk into a Buffer. Used after the
258
+ * download completes — we have already streamed to disk + verified the
259
+ * hash, so the decompressed bundle does not need to round-trip RAM
260
+ * during transport. Loaded into RAM here for the in-tree tar parser
261
+ * (bounded by bundle size; the q4 model + transformers code is < 1 GB).
262
+ *
263
+ * Stream-only — no synchronous-read calls.
264
+ */
265
+ export async function gunzipTarFile(tarGzPath: string): Promise<Buffer> {
266
+ const chunks: Buffer[] = [];
267
+ await new Promise<void>((resolve, reject) => {
268
+ const rs = fs.createReadStream(tarGzPath);
269
+ const gunzip = zlib.createGunzip();
270
+ rs.pipe(gunzip);
271
+ gunzip.on('data', (chunk: Buffer) => chunks.push(chunk));
272
+ gunzip.on('end', () => resolve());
273
+ gunzip.on('error', reject);
274
+ rs.on('error', reject);
275
+ });
276
+ return Buffer.concat(chunks);
277
+ }
278
+
279
+ /**
280
+ * High-level helper: download `<url>` to a staging path under `<destDir>`,
281
+ * verify the streamed SHA-256 against `expectedSha256`, then untar into
282
+ * `<destDir>`. On any failure the staging tarball is unlinked.
283
+ *
284
+ * Returns the count of files/dirs extracted.
285
+ *
286
+ * `expectedSha256` is the manifest's `tarball_sha256`. The manifest
287
+ * itself was downloaded earlier by the caller and pinned via signed
288
+ * release tag — we trust the manifest, then bind the tarball to it via
289
+ * this hash.
290
+ */
291
+ export async function downloadAndExtractTarGz(
292
+ url: string,
293
+ destDir: string,
294
+ expectedSha256: string,
295
+ opts: DownloadOptions = {},
296
+ ): Promise<{ files: number; dirs: number; bytes: number }> {
297
+ fs.mkdirSync(destDir, { recursive: true });
298
+ const stagingTarball = path.join(destDir, `.embedder-download-${process.pid}-${Date.now()}.tar.gz`);
299
+ let downloadResult: { sha256: string; bytes: number };
300
+ try {
301
+ downloadResult = await streamDownload(url, stagingTarball, opts);
302
+ } catch (err) {
303
+ try { fs.unlinkSync(stagingTarball); } catch { /* ignore */ }
304
+ throw err;
305
+ }
306
+ if (downloadResult.sha256 !== expectedSha256) {
307
+ try { fs.unlinkSync(stagingTarball); } catch { /* ignore */ }
308
+ throw new Error(
309
+ `embedder bundle hash mismatch: expected ${expectedSha256}, got ${downloadResult.sha256}. ` +
310
+ `Refusing to extract — possible tampering or stale manifest pin.`,
311
+ );
312
+ }
313
+ const buf = await gunzipTarFile(stagingTarball);
314
+ const result = untarBuffer(buf, destDir);
315
+ try { fs.unlinkSync(stagingTarball); } catch { /* ignore */ }
316
+ return { ...result, bytes: downloadResult.bytes };
317
+ }
318
+
319
+ /**
320
+ * Download the manifest JSON from `url`. Returns the parsed object on
321
+ * 2xx + valid JSON. Throws otherwise. The orchestrator passes the
322
+ * parsed manifest into `embedder-cache.isValidManifestShape()` for
323
+ * structural validation before binding bundle-fetch to the tarball hash.
324
+ */
325
+ export async function fetchManifestJson(
326
+ url: string,
327
+ opts: DownloadOptions = {},
328
+ ): Promise<unknown> {
329
+ const fetchImpl = opts.fetchImpl ?? fetch;
330
+ const log = opts.log ?? ((msg) => console.error(msg));
331
+ const timeoutMs = opts.timeoutMs ?? 60_000;
332
+ const controller = new AbortController();
333
+ const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs);
334
+ let res: Response;
335
+ try {
336
+ res = await fetchImpl(url, { method: 'GET', signal: controller.signal, redirect: 'follow' });
337
+ } catch (err) {
338
+ clearTimeout(timeoutHandle);
339
+ const msg = err instanceof Error ? err.message : String(err);
340
+ throw new Error(`embedder manifest fetch transport error for ${url}: ${msg}`);
341
+ } finally {
342
+ clearTimeout(timeoutHandle);
343
+ }
344
+ if (!res.ok) {
345
+ throw new Error(`embedder manifest fetch ${url} returned HTTP ${res.status} ${res.statusText}`);
346
+ }
347
+ log(`[TotalReclaw] embedder: fetched manifest from ${url}`);
348
+ const text = await res.text();
349
+ return JSON.parse(text) as unknown;
350
+ }
package/embedding.ts CHANGED
@@ -1,23 +1,44 @@
1
1
  /**
2
- * TotalReclaw Plugin - Local Embedding via @huggingface/transformers
2
+ * TotalReclaw Plugin - Local Embedding via lazy GitHub-Releases bundle
3
3
  *
4
- * Generates text embeddings locally using an ONNX model. No API key needed,
5
- * no data leaves the machine. Preserves the E2EE guarantee.
4
+ * Generates text embeddings locally using an ONNX model. Preserves the
5
+ * E2EE guarantee embeddings are computed on the user's machine and
6
+ * never leave it. The model itself, plus the heavy native dependencies
7
+ * (`@huggingface/transformers`, `onnxruntime-node`), is fetched on
8
+ * first use from a versioned GitHub Release tarball rather than shipped
9
+ * inside the npm/ClawHub plugin tarball.
6
10
  *
7
- * Locked to Harrier-OSS-v1-270M (640d, q4, ~344MB, pre-pooled). Changing the
8
- * embedding model breaks search across an existing vault, so the
11
+ * Why lazy retrieval (rc.22):
12
+ * rc.21 OOM-killed the OpenClaw gateway during `openclaw plugins install`
13
+ * on a 3.7 GB Hetzner VPS — the heavy native deps required ~700 MB+
14
+ * peak install RAM, and a partial install left orphaned
15
+ * `~/.openclaw/extensions/.openclaw-install-stage-*` directories that
16
+ * the loader then auto-discovered on every boot, crashing the CLI.
17
+ * rc.22 splits the heavy bits out of the install path: the plugin
18
+ * tarball stays ~5-10 MB (ClawHub-friendly), the model + native deps
19
+ * are downloaded lazily when the user actually invokes a memory tool,
20
+ * and per-turn OOM is recoverable in a way install-time OOM is not.
21
+ *
22
+ * Locked to Harrier-OSS-v1-270M (640d, q4, ~344MB, pre-pooled). Changing
23
+ * the embedding model breaks search across an existing vault, so the
9
24
  * `TOTALRECLAW_EMBEDDING_MODEL` user-facing env var was removed in v1.
10
25
  *
11
- * Dependencies: @huggingface/transformers
26
+ * Forward-compat (rc.22): every claim is tagged with `embedding_model_id`
27
+ * (see `getEmbeddingModelId()`) so a future distillation can be detected
28
+ * and rescoped per claim without breaking the active vault.
12
29
  */
13
30
 
14
- // @ts-ignore - @huggingface/transformers types may not be perfect
15
- import { AutoTokenizer, AutoModel, pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
31
+ import os from 'node:os';
32
+ import path from 'node:path';
33
+ import { loadEmbedder } from './embedder-loader.js';
16
34
 
17
35
  interface ModelConfig {
18
- id: string;
36
+ /** Semantic model id surfaced to claims via `embedding_model_id`. */
37
+ semanticId: string;
38
+ /** Hugging Face / ONNX repo id used by the bundled `transformers` lib. */
39
+ hfId: string;
19
40
  dims: number;
20
- /** 'sentence_embedding' for models with pre-pooled output, 'mean'/'last_token' for pipeline models */
41
+ /** 'sentence_embedding' for models with pre-pooled output, 'mean' / 'last_token' for pipeline models. */
21
42
  pooling: string;
22
43
  size: string;
23
44
  /** ONNX quantization dtype. Must match an available variant in the HF repo. */
@@ -25,7 +46,8 @@ interface ModelConfig {
25
46
  }
26
47
 
27
48
  const HARRIER_MODEL: ModelConfig = {
28
- id: 'onnx-community/harrier-oss-v1-270m-ONNX',
49
+ semanticId: 'harrier-oss-270m-q4',
50
+ hfId: 'onnx-community/harrier-oss-v1-270m-ONNX',
29
51
  dims: 640,
30
52
  pooling: 'sentence_embedding',
31
53
  size: '~344MB',
@@ -36,8 +58,41 @@ function getModelConfig(): ModelConfig {
36
58
  return HARRIER_MODEL;
37
59
  }
38
60
 
39
- /** Lazily initialized model instances. */
40
- let pipelineExtractor: FeatureExtractionPipeline | null = null;
61
+ /**
62
+ * Configuration for the lazy embedder bundle.
63
+ *
64
+ * Set ONCE at plugin init via `configureEmbedder({ ... })` from index.ts.
65
+ * Centralising the env resolution upstream keeps this module scanner-clean.
66
+ */
67
+ export interface EmbedderRuntimeConfig {
68
+ /** Top-level cache directory (e.g. `~/.totalreclaw/embedder/`). */
69
+ cacheRoot: string;
70
+ /** RC tag used to build the GitHub-Releases URL, e.g. `"3.3.1-rc.22"`. */
71
+ rcTag: string;
72
+ }
73
+
74
+ let runtimeConfig: EmbedderRuntimeConfig | null = null;
75
+
76
+ export function configureEmbedder(cfg: EmbedderRuntimeConfig): void {
77
+ runtimeConfig = cfg;
78
+ }
79
+
80
+ /**
81
+ * Default cache root. Used when `configureEmbedder()` was not called —
82
+ * production code always calls it from index.ts; tests may rely on this
83
+ * default.
84
+ */
85
+ function defaultCacheRoot(): string {
86
+ return path.join(os.homedir(), '.totalreclaw', 'embedder');
87
+ }
88
+
89
+ function activeRuntimeConfig(): EmbedderRuntimeConfig {
90
+ if (runtimeConfig) return runtimeConfig;
91
+ return { cacheRoot: defaultCacheRoot(), rcTag: '0.0.0-dev' };
92
+ }
93
+
94
+ /** Lazily initialized state. */
95
+ let pipelineExtractor: any = null;
41
96
  let autoTokenizer: any = null;
42
97
  let autoModel: any = null;
43
98
  let activeModel: ModelConfig | null = null;
@@ -45,8 +100,11 @@ let activeModel: ModelConfig | null = null;
45
100
  /**
46
101
  * Generate an embedding vector for the given text.
47
102
  *
48
- * On first call, downloads and loads the ONNX model (cached after download).
49
- * Subsequent calls reuse the loaded model and run in ~100ms.
103
+ * On first call, downloads the embedder bundle (transformers + onnxruntime
104
+ * + the q4 ONNX model) from the pinned GitHub Release, verifies the
105
+ * tarball SHA-256 against the manifest, extracts to
106
+ * `~/.totalreclaw/embedder/v1/`, then loads the model into memory.
107
+ * Subsequent calls reuse the loaded model and run in ~100 ms.
50
108
  */
51
109
  export async function generateEmbedding(
52
110
  text: string,
@@ -54,45 +112,78 @@ export async function generateEmbedding(
54
112
  ): Promise<number[]> {
55
113
  if (!activeModel) {
56
114
  activeModel = getModelConfig();
57
- console.error(`[TotalReclaw] Downloading embedding model (${activeModel.size}, one-time setup)...`);
58
- console.error('[TotalReclaw] This enables semantic search across your encrypted memories.');
115
+ const cfg = activeRuntimeConfig();
116
+ console.error(
117
+ `[TotalReclaw] Embedding model first-call: fetching bundle ${activeModel.size} from GitHub Releases for v${cfg.rcTag} (cached at ${cfg.cacheRoot}).`,
118
+ );
119
+
120
+ const loaded = await loadEmbedder({
121
+ cacheRoot: cfg.cacheRoot,
122
+ rcTag: cfg.rcTag,
123
+ });
124
+ if (loaded.manifest.dimension !== activeModel.dims) {
125
+ throw new Error(
126
+ `embedder bundle dimension ${loaded.manifest.dimension} does not match plugin-expected ${activeModel.dims}. ` +
127
+ `Refusing to use mismatched embedder — vector space drift would corrupt cosine search.`,
128
+ );
129
+ }
130
+ if (loaded.manifest.model_id !== activeModel.semanticId) {
131
+ console.error(
132
+ `[TotalReclaw] WARNING: bundled model_id "${loaded.manifest.model_id}" != plugin-expected "${activeModel.semanticId}". Continuing — distillation forward-compat path.`,
133
+ );
134
+ }
135
+
136
+ // Resolve the transformers entrypoint via the cache-bound require.
137
+ // The bundled package was generated by `scripts/build-embedder-bundle.mjs`
138
+ // and lives at `<cache>/v1/node_modules/@huggingface/transformers`.
139
+ const transformers = loaded.cacheRequire('@huggingface/transformers');
140
+ const { AutoTokenizer, AutoModel, pipeline } = transformers as any;
59
141
 
60
142
  if (activeModel.pooling === 'sentence_embedding') {
61
- // Harrier: use AutoModel (pipeline doesn't support sentence_embedding output)
62
- autoTokenizer = await AutoTokenizer.from_pretrained(activeModel.id);
63
- autoModel = await AutoModel.from_pretrained(activeModel.id, {
143
+ autoTokenizer = await AutoTokenizer.from_pretrained(activeModel.hfId);
144
+ autoModel = await AutoModel.from_pretrained(activeModel.hfId, {
64
145
  dtype: activeModel.dtype as any,
65
146
  });
66
147
  } else {
67
- // e5-small / Qwen: use pipeline
68
- pipelineExtractor = await pipeline('feature-extraction', activeModel.id, {
148
+ pipelineExtractor = await pipeline('feature-extraction', activeModel.hfId, {
69
149
  dtype: activeModel.dtype as any,
70
150
  });
71
151
  }
72
- console.error('[TotalReclaw] Embedding model ready. Future startups will be instant.');
152
+ console.error('[TotalReclaw] Embedding model ready. Future calls are in-memory.');
73
153
  }
74
154
 
75
155
  const model = activeModel!;
76
156
 
77
157
  if (model.pooling === 'sentence_embedding') {
78
- // Harrier: pre-pooled, pre-normalized output
79
158
  const inputs = await autoTokenizer(text, { return_tensors: 'pt', padding: true });
80
159
  const output = await autoModel(inputs);
81
160
  return Array.from(output.sentence_embedding.data as Float32Array);
82
161
  } else {
83
- // Pipeline models: use pooling option
84
162
  const input = model.pooling === 'mean' && options?.isQuery
85
163
  ? `query: ${text}`
86
164
  : text;
87
- const output = await pipelineExtractor!(input, { pooling: model.pooling as any, normalize: true });
165
+ const output = await pipelineExtractor(input, { pooling: model.pooling as any, normalize: true });
88
166
  return Array.from(output.data as Float32Array);
89
167
  }
90
168
  }
91
169
 
92
170
  /**
93
171
  * Get the embedding vector dimensionality.
94
- * Returns 640 (default/Harrier), 384 (small), or 1024 (large) depending on model selection.
172
+ * Returns 640 for Harrier-OSS-270M-q4.
95
173
  */
96
174
  export function getEmbeddingDims(): number {
97
175
  return getModelConfig().dims;
98
176
  }
177
+
178
+ /**
179
+ * Get the semantic embedding-model id stamped on each new claim (rc.22+).
180
+ *
181
+ * Forward-compat marker: if a future plugin version distills to a smaller
182
+ * model, claims tagged with the prior id can be re-embedded selectively
183
+ * instead of forcing a vault-wide rebuild. Defaults to the v1 Harrier id —
184
+ * plugin code always tags new claims via this constant, never trusts the
185
+ * model id from a downloaded bundle for write-time tagging.
186
+ */
187
+ export function getEmbeddingModelId(): string {
188
+ return getModelConfig().semanticId;
189
+ }