@totalreclaw/totalreclaw 3.3.1-rc.15 → 3.3.1-rc.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -4,46 +4,37 @@ All notable changes to `@totalreclaw/totalreclaw` (the OpenClaw plugin) are docu
4
4
 
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
- ## [3.3.1-rc.15] — 2026-04-24
8
-
9
- Install-time unblock for bandwidth-constrained hosts. Lazy-loads the ONNX
10
- runtime instead of forcing a ~216MB download during `openclaw plugins
11
- install`.
12
-
13
- ### Lazy-loaded ONNX / `@huggingface/transformers`
14
-
15
- `openclaw plugins install @totalreclaw/totalreclaw` on slow hosts (VPNs,
16
- CI containers with limited bandwidth, metered connections) was exceeding
17
- the plugin-install timeout mid-download and getting SIGTERM'd, leaving
18
- the plugin partially installed. Root cause: `@huggingface/transformers`
19
- was a direct `dependency`, which transitively pulled
20
- `onnxruntime-node`'s postinstall binary fetch (~216MB from GitHub
21
- Releases).
22
-
23
- - `@huggingface/transformers` and `onnxruntime-node` moved from
24
- `dependencies` to optional `peerDependencies`
25
- (`peerDependenciesMeta.<name>.optional: true`). npm v7+ and the
26
- OpenClaw install path (`--legacy-peer-deps`) both skip these by
27
- default — plugin install is now lean.
28
- - `embedding.ts` converts the static
29
- `import { AutoTokenizer, AutoModel, pipeline } from
30
- '@huggingface/transformers'` into a dynamic `await import(...)` on
31
- the first `generateEmbedding` call. If the optional peer is missing,
32
- the error surfaces a clear install hint:
33
- `npm install @huggingface/transformers`.
34
- - New regression test `lazy-load-embedding.test.ts` asserts the
35
- invariants (no top-level static runtime import; heavy packages not in
36
- `dependencies`; peer-dep `optional` flag set) so a future refactor
37
- can't silently reintroduce the install-time block.
38
-
39
- **User impact:** users on constrained hosts can now install the plugin
40
- without the 216MB download. Users who want semantic memory (recall /
41
- search over encrypted facts) install `@huggingface/transformers`
42
- separately — one-time, resumable if it times out.
43
-
44
- Fixes [issue #92][i92] (QA bug 6 of 10, split from #84).
45
-
46
- [i92]: https://github.com/p-diogo/totalreclaw-internal/issues/92
7
+ ## [3.3.1-rc.16] — 2026-04-24
8
+
9
+ Fixes #92 slow-host install times out during ONNX-runtime / embedding-model
10
+ download. ONNX stays mandatory (no opt-in flag); first-call download is now
11
+ wrapped with timeout, progress, and retry UX so slow connections succeed
12
+ instead of silently hanging until OpenClaw SIGTERMs.
13
+
14
+ ### Embedding-model download UX
15
+
16
+ - New `download-ux.ts` module — pure stdlib, no third-party imports — exposes
17
+ `downloadWithUX(label, fn, opts)`. Wraps a download promise with:
18
+ - **Per-attempt timeout**, default 600s (covers ~290 KB/s for the 344 MB
19
+ Harrier model). Configurable via env `TOTALRECLAW_ONNX_INSTALL_TIMEOUT`
20
+ (in seconds). Per-attempt timeout grows 1x/2x/4x across retries.
21
+ - **60s keep-alive log** during long downloads so users on slow networks
22
+ see "still downloading… (Ns elapsed)" rather than a frozen prompt.
23
+ - **3-attempt exponential-backoff retry** (5s/10s backoff between attempts)
24
+ to absorb transient network blips.
25
+ - **Loud actionable error** on exhaustion: names the env var to extend the
26
+ timeout and the exact `openclaw plugins install totalreclaw` command to
27
+ rerun.
28
+ - `embedding.ts` now wraps `AutoTokenizer.from_pretrained`,
29
+ `AutoModel.from_pretrained`, and the `pipeline()` call with
30
+ `downloadWithUX`. Prints a user-visible "Downloading embedding model
31
+ (~344MB) this may take a few minutes on slower connections. Please wait."
32
+ message before the first download starts.
33
+ - ONNX remains a mandatory hard `dependency` (no `[embedding]`-style opt-in
34
+ extra). Recall accuracy is unchanged.
35
+ - Regression: `test_issue_92_onnx_download_ux.test.ts` exercises happy path,
36
+ transient failure retry, full exhaustion, per-attempt timeout, and
37
+ keep-alive cadence. Wired into the plugin `npm test` chain.
47
38
 
48
39
  ## [3.3.1-rc.14] — 2026-04-24
49
40
 
package/download-ux.ts ADDED
@@ -0,0 +1,91 @@
1
+ /**
2
+ * download-ux.ts — Wrapper for heavy first-call downloads (rc.16, fixes #92).
3
+ *
4
+ * Wraps a download promise with:
5
+ * - per-attempt timeout (default 600s, override via TOTALRECLAW_ONNX_INSTALL_TIMEOUT in seconds)
6
+ * - 60s keep-alive log so slow-bandwidth users don't think it's frozen
7
+ * - 3-attempt exponential-backoff retry (per-attempt timeout grows 1x/2x/4x)
8
+ * - loud actionable error after exhaustion
9
+ *
10
+ * No third-party imports here — pure stdlib so the unit test can exercise it
11
+ * without pulling the heavy `@huggingface/transformers` chain.
12
+ */
13
+
14
+ const DEFAULT_DOWNLOAD_TIMEOUT_MS = 600_000;
15
+ const KEEPALIVE_INTERVAL_MS = 60_000;
16
+ const MAX_DOWNLOAD_ATTEMPTS = 3;
17
+
18
+ export function getDownloadTimeoutMs(): number {
19
+ const raw = process.env.TOTALRECLAW_ONNX_INSTALL_TIMEOUT;
20
+ if (!raw) return DEFAULT_DOWNLOAD_TIMEOUT_MS;
21
+ const parsed = Number(raw);
22
+ if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_DOWNLOAD_TIMEOUT_MS;
23
+ // Spec accepts seconds; convert to ms.
24
+ return Math.floor(parsed * 1000);
25
+ }
26
+
27
+ export interface DownloadWithUXOpts {
28
+ /** Override the per-attempt base timeout in ms (env var takes precedence by default). */
29
+ timeoutMs?: number;
30
+ /** Override the keep-alive cadence in ms. */
31
+ keepaliveMs?: number;
32
+ /** Override the max attempts. */
33
+ maxAttempts?: number;
34
+ /** Logger override (defaults to console.error). */
35
+ log?: (msg: string) => void;
36
+ /** Sleep override for tests; defaults to setTimeout. */
37
+ sleep?: (ms: number) => Promise<void>;
38
+ }
39
+
40
+ export async function downloadWithUX<T>(
41
+ label: string,
42
+ download: () => Promise<T>,
43
+ opts?: DownloadWithUXOpts,
44
+ ): Promise<T> {
45
+ const baseTimeoutMs = opts?.timeoutMs ?? getDownloadTimeoutMs();
46
+ const keepaliveMs = opts?.keepaliveMs ?? KEEPALIVE_INTERVAL_MS;
47
+ const maxAttempts = opts?.maxAttempts ?? MAX_DOWNLOAD_ATTEMPTS;
48
+ const log = opts?.log ?? ((msg: string) => console.error(msg));
49
+ const sleep = opts?.sleep ?? ((ms: number) => new Promise(r => setTimeout(r, ms)));
50
+
51
+ let lastErr: unknown = null;
52
+
53
+ for (let attempt = 1; attempt <= maxAttempts; attempt++) {
54
+ const attemptTimeoutMs = baseTimeoutMs * Math.pow(2, attempt - 1);
55
+ const startedAt = Date.now();
56
+ const keepaliveTimer = setInterval(() => {
57
+ const elapsedSec = Math.floor((Date.now() - startedAt) / 1000);
58
+ log(`[TotalReclaw] ${label}: still downloading… (${elapsedSec}s elapsed, attempt ${attempt}/${maxAttempts})`);
59
+ }, keepaliveMs);
60
+
61
+ try {
62
+ const result = await Promise.race([
63
+ download(),
64
+ new Promise<never>((_, reject) =>
65
+ setTimeout(
66
+ () => reject(new Error(`Download timeout after ${Math.floor(attemptTimeoutMs / 1000)}s (attempt ${attempt}/${maxAttempts})`)),
67
+ attemptTimeoutMs,
68
+ ),
69
+ ),
70
+ ]);
71
+ clearInterval(keepaliveTimer);
72
+ return result;
73
+ } catch (err) {
74
+ clearInterval(keepaliveTimer);
75
+ lastErr = err;
76
+ const msg = err instanceof Error ? err.message : String(err);
77
+ if (attempt < maxAttempts) {
78
+ const backoffMs = Math.min(5_000 * Math.pow(2, attempt - 1), 30_000);
79
+ log(`[TotalReclaw] ${label}: attempt ${attempt} failed (${msg}). Retrying in ${Math.floor(backoffMs / 1000)}s…`);
80
+ await sleep(backoffMs);
81
+ }
82
+ }
83
+ }
84
+
85
+ const finalMsg = lastErr instanceof Error ? lastErr.message : String(lastErr);
86
+ throw new Error(
87
+ `[TotalReclaw] Embedding model download failed after ${maxAttempts} attempts (last error: ${finalMsg}). ` +
88
+ `Check your network connection and retry: \`openclaw plugins install totalreclaw\`. ` +
89
+ `On slow connections, set TOTALRECLAW_ONNX_INSTALL_TIMEOUT=1200 (in seconds) to extend the per-attempt timeout.`,
90
+ );
91
+ }
package/embedding.ts CHANGED
@@ -8,44 +8,18 @@
8
8
  * embedding model breaks search across an existing vault, so the
9
9
  * `TOTALRECLAW_EMBEDDING_MODEL` user-facing env var was removed in v1.
10
10
  *
11
- * Dependencies: @huggingface/transformers is declared as an optional peer
12
- * dependency. It is lazy-loaded on the first `generateEmbedding` call so
13
- * `openclaw plugins install @totalreclaw/totalreclaw` does not block on the
14
- * ~216MB onnxruntime-node native-binary download. Install it separately to
15
- * enable semantic search: `npm install @huggingface/transformers`.
11
+ * Dependencies: @huggingface/transformers
12
+ *
13
+ * Download UX (rc.16, fixes #92):
14
+ * First-call download is wrapped via `downloadWithUX` from `download-ux.ts`
15
+ * configurable timeout (`TOTALRECLAW_ONNX_INSTALL_TIMEOUT`, default 600s),
16
+ * 60s keep-alive, 3-attempt exponential-backoff retry, loud actionable
17
+ * failure. Slow-bandwidth hosts no longer see a silent freeze.
16
18
  */
17
19
 
18
- // Type-only import — erased at compile time, no runtime dep on the package.
19
20
  // @ts-ignore - @huggingface/transformers types may not be perfect
20
- import type { FeatureExtractionPipeline } from '@huggingface/transformers';
21
-
22
- type HFTransformers = typeof import('@huggingface/transformers');
23
-
24
- /** Cached module handle after first successful dynamic import. */
25
- let transformersModule: HFTransformers | null = null;
26
-
27
- /**
28
- * Lazily import @huggingface/transformers. The package is declared as an
29
- * optional peer dependency so the plugin installs on bandwidth-constrained
30
- * hosts without pulling the onnxruntime-node native binary (~216MB). On first
31
- * use, try to load it; if the user never installed it, surface a clear
32
- * actionable error with the install command.
33
- */
34
- async function loadTransformers(): Promise<HFTransformers> {
35
- if (transformersModule) return transformersModule;
36
- try {
37
- // @ts-ignore - dynamic import target is the optional peer dep
38
- transformersModule = (await import('@huggingface/transformers')) as HFTransformers;
39
- return transformersModule;
40
- } catch (err) {
41
- const hint =
42
- '[TotalReclaw] @huggingface/transformers is not installed. ' +
43
- 'Semantic memory requires it (one-time ~216MB download of ONNX runtime + model). ' +
44
- 'Install with: npm install @huggingface/transformers';
45
- const detail = err instanceof Error ? err.message : String(err);
46
- throw new Error(`${hint}\nUnderlying load error: ${detail}`);
47
- }
48
- }
21
+ import { AutoTokenizer, AutoModel, pipeline, type FeatureExtractionPipeline } from '@huggingface/transformers';
22
+ import { downloadWithUX, getDownloadTimeoutMs } from './download-ux.js';
49
23
 
50
24
  interface ModelConfig {
51
25
  id: string;
@@ -78,32 +52,45 @@ let activeModel: ModelConfig | null = null;
78
52
  /**
79
53
  * Generate an embedding vector for the given text.
80
54
  *
81
- * On first call, dynamically imports @huggingface/transformers (requires it
82
- * to be installed see module docstring) and downloads the ONNX model
83
- * (cached after download). Subsequent calls reuse the loaded module + model
84
- * and run in ~100ms.
55
+ * On first call, downloads and loads the ONNX model (cached after download).
56
+ * Subsequent calls reuse the loaded model and run in ~100ms.
85
57
  */
86
58
  export async function generateEmbedding(
87
59
  text: string,
88
60
  options?: { isQuery?: boolean },
89
61
  ): Promise<number[]> {
90
62
  if (!activeModel) {
91
- const { AutoTokenizer, AutoModel, pipeline } = await loadTransformers();
92
63
  activeModel = getModelConfig();
93
- console.error(`[TotalReclaw] Downloading embedding model (${activeModel.size}, one-time setup)...`);
94
- console.error('[TotalReclaw] This enables semantic search across your encrypted memories.');
64
+ const timeoutSec = Math.floor(getDownloadTimeoutMs() / 1000);
65
+ console.error(
66
+ `[TotalReclaw] Downloading embedding model (${activeModel.size}) — this may take a few minutes on slower connections. Please wait.`,
67
+ );
68
+ console.error(
69
+ `[TotalReclaw] One-time setup. Per-attempt timeout: ${timeoutSec}s (configurable via TOTALRECLAW_ONNX_INSTALL_TIMEOUT). Cached after first download.`,
70
+ );
95
71
 
96
72
  if (activeModel.pooling === 'sentence_embedding') {
97
73
  // Harrier: use AutoModel (pipeline doesn't support sentence_embedding output)
98
- autoTokenizer = await AutoTokenizer.from_pretrained(activeModel.id);
99
- autoModel = await AutoModel.from_pretrained(activeModel.id, {
100
- dtype: activeModel.dtype as any,
101
- });
74
+ autoTokenizer = await downloadWithUX(
75
+ 'tokenizer',
76
+ () => AutoTokenizer.from_pretrained(activeModel!.id),
77
+ );
78
+ autoModel = await downloadWithUX(
79
+ 'embedding model',
80
+ () =>
81
+ AutoModel.from_pretrained(activeModel!.id, {
82
+ dtype: activeModel!.dtype as any,
83
+ }),
84
+ );
102
85
  } else {
103
86
  // e5-small / Qwen: use pipeline
104
- pipelineExtractor = await pipeline('feature-extraction', activeModel.id, {
105
- dtype: activeModel.dtype as any,
106
- });
87
+ pipelineExtractor = await downloadWithUX(
88
+ 'embedding pipeline',
89
+ () =>
90
+ pipeline('feature-extraction', activeModel!.id, {
91
+ dtype: activeModel!.dtype as any,
92
+ }),
93
+ );
107
94
  }
108
95
  console.error('[TotalReclaw] Embedding model ready. Future startups will be instant.');
109
96
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@totalreclaw/totalreclaw",
3
- "version": "3.3.1-rc.15",
3
+ "version": "3.3.1-rc.16",
4
4
  "description": "End-to-end encrypted, agent-portable memory for OpenClaw and any LLM-agent runtime. XChaCha20-Poly1305 with protobuf v4 + on-chain Memory Taxonomy v1 (claim / preference / directive / commitment / episode / summary).",
5
5
  "type": "module",
6
6
  "keywords": [
@@ -31,26 +31,16 @@
31
31
  "author": "TotalReclaw Team",
32
32
  "license": "MIT",
33
33
  "dependencies": {
34
+ "@huggingface/transformers": "^4.0.1",
34
35
  "@totalreclaw/client": "^1.2.0",
35
36
  "@totalreclaw/core": "^2.1.1",
36
37
  "@types/qrcode": "^1.5.6",
37
38
  "@types/ws": "^8.5.12",
39
+ "onnxruntime-node": "^1.24.0",
38
40
  "qrcode": "^1.5.4",
39
41
  "qrcode-terminal": "^0.12.0",
40
42
  "ws": "^8.18.3"
41
43
  },
42
- "peerDependencies": {
43
- "@huggingface/transformers": "^4.0.1",
44
- "onnxruntime-node": "^1.24.0"
45
- },
46
- "peerDependenciesMeta": {
47
- "@huggingface/transformers": {
48
- "optional": true
49
- },
50
- "onnxruntime-node": {
51
- "optional": true
52
- }
53
- },
54
44
  "files": [
55
45
  "*.ts",
56
46
  "import-adapters/",
@@ -64,7 +54,7 @@
64
54
  "skill.json"
65
55
  ],
66
56
  "scripts": {
67
- "test": "npx tsx manifest-shape.test.ts && npx tsx config-schema.test.ts && npx tsx llm-profile-reader.test.ts && npx tsx llm-client.test.ts && npx tsx llm-client-retry.test.ts && npx tsx gateway-url.test.ts && npx tsx retype-setscope.test.ts && npx tsx tool-gating.test.ts && npx tsx onboarding-noninteractive.test.ts && npx tsx pair-cli-json.test.ts && npx tsx pair-qr.test.ts && npx tsx pair-remote-client.test.ts && npx tsx qa-bug-report.test.ts && npx tsx nonce-serialization.test.ts && npx tsx phrase-safety-registry.test.ts && npx tsx lazy-load-embedding.test.ts",
57
+ "test": "npx tsx manifest-shape.test.ts && npx tsx config-schema.test.ts && npx tsx llm-profile-reader.test.ts && npx tsx llm-client.test.ts && npx tsx llm-client-retry.test.ts && npx tsx gateway-url.test.ts && npx tsx retype-setscope.test.ts && npx tsx tool-gating.test.ts && npx tsx onboarding-noninteractive.test.ts && npx tsx pair-cli-json.test.ts && npx tsx pair-qr.test.ts && npx tsx pair-remote-client.test.ts && npx tsx qa-bug-report.test.ts && npx tsx nonce-serialization.test.ts && npx tsx phrase-safety-registry.test.ts && npx tsx test_issue_92_onnx_download_ux.test.ts",
68
58
  "check-scanner": "node ../scripts/check-scanner.mjs",
69
59
  "prepublishOnly": "node ../scripts/check-scanner.mjs"
70
60
  },