@totalreclaw/totalreclaw 3.3.1-rc.8 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. package/CHANGELOG.md +268 -1
  2. package/SKILL.md +29 -23
  3. package/api-client.ts +18 -11
  4. package/claims-helper.ts +47 -1
  5. package/config.ts +108 -4
  6. package/confirm-indexed.ts +191 -0
  7. package/crypto.ts +10 -2
  8. package/dist/api-client.js +226 -0
  9. package/dist/billing-cache.js +100 -0
  10. package/dist/claims-helper.js +624 -0
  11. package/dist/config.js +297 -0
  12. package/dist/confirm-indexed.js +127 -0
  13. package/dist/consolidation.js +258 -0
  14. package/dist/contradiction-sync.js +1034 -0
  15. package/dist/crypto.js +138 -0
  16. package/dist/digest-sync.js +361 -0
  17. package/dist/download-ux.js +63 -0
  18. package/dist/embedder-cache.js +185 -0
  19. package/dist/embedder-loader.js +121 -0
  20. package/dist/embedder-network.js +301 -0
  21. package/dist/embedding.js +141 -0
  22. package/dist/extractor.js +1225 -0
  23. package/dist/first-run.js +103 -0
  24. package/dist/fs-helpers.js +725 -0
  25. package/dist/gateway-url.js +197 -0
  26. package/dist/generate-mnemonic.js +13 -0
  27. package/dist/hot-cache-wrapper.js +101 -0
  28. package/dist/import-adapters/base-adapter.js +64 -0
  29. package/dist/import-adapters/chatgpt-adapter.js +238 -0
  30. package/dist/import-adapters/claude-adapter.js +114 -0
  31. package/dist/import-adapters/gemini-adapter.js +201 -0
  32. package/dist/import-adapters/index.js +26 -0
  33. package/dist/import-adapters/mcp-memory-adapter.js +219 -0
  34. package/dist/import-adapters/mem0-adapter.js +158 -0
  35. package/dist/import-adapters/types.js +1 -0
  36. package/dist/index.js +5388 -0
  37. package/dist/llm-client.js +687 -0
  38. package/dist/llm-profile-reader.js +346 -0
  39. package/dist/lsh.js +62 -0
  40. package/dist/onboarding-cli.js +750 -0
  41. package/dist/pair-cli.js +344 -0
  42. package/dist/pair-crypto.js +359 -0
  43. package/dist/pair-http.js +404 -0
  44. package/dist/pair-page.js +826 -0
  45. package/dist/pair-qr.js +107 -0
  46. package/dist/pair-remote-client.js +410 -0
  47. package/dist/pair-session-store.js +566 -0
  48. package/dist/pin.js +556 -0
  49. package/dist/qa-bug-report.js +301 -0
  50. package/dist/relay-headers.js +44 -0
  51. package/dist/reranker.js +409 -0
  52. package/dist/retype-setscope.js +368 -0
  53. package/dist/semantic-dedup.js +75 -0
  54. package/dist/subgraph-search.js +289 -0
  55. package/dist/subgraph-store.js +694 -0
  56. package/dist/tool-gating.js +58 -0
  57. package/download-ux.ts +91 -0
  58. package/embedder-cache.ts +230 -0
  59. package/embedder-loader.ts +189 -0
  60. package/embedder-network.ts +350 -0
  61. package/embedding.ts +118 -27
  62. package/fs-helpers.ts +277 -0
  63. package/gateway-url.ts +57 -9
  64. package/index.ts +469 -250
  65. package/llm-client.ts +4 -3
  66. package/lsh.ts +7 -2
  67. package/onboarding-cli.ts +114 -1
  68. package/package.json +24 -5
  69. package/pair-cli.ts +76 -8
  70. package/pair-crypto.ts +34 -24
  71. package/pair-page.ts +28 -17
  72. package/pair-qr.ts +152 -0
  73. package/pair-remote-client.ts +540 -0
  74. package/pin.ts +31 -0
  75. package/qa-bug-report.ts +84 -2
  76. package/relay-headers.ts +50 -0
  77. package/reranker.ts +40 -0
  78. package/retype-setscope.ts +69 -8
  79. package/skill.json +1 -1
  80. package/subgraph-search.ts +4 -3
  81. package/subgraph-store.ts +15 -10
@@ -0,0 +1,185 @@
1
+ /**
2
+ * embedder-cache.ts — pure-FS reader for the lazy embedder bundle (rc.22+).
3
+ *
4
+ * Scanner-isolation note: this module reads from disk AND verifies SHA-256
5
+ * hashes. It MUST NOT contain any of the network-trigger substrings the
6
+ * OpenClaw skill scanner gates on — see `skill/scripts/check-scanner.mjs`
7
+ * for the rule list. The network side of the lazy-retrieval flow lives in a
8
+ * sibling module (the downloader), and the orchestrator imports both.
9
+ *
10
+ * Responsibilities:
11
+ * - Resolve the on-disk cache layout (`<root>/v1/`, with `manifest.json`
12
+ * + `node_modules/` + `model/`).
13
+ * - Synchronously load + parse the manifest JSON.
14
+ * - Verify the cache is intact: every file listed in `manifest.files`
15
+ * exists at the expected path with the SHA-256 hash declared in the
16
+ * manifest. Any mismatch invalidates the cache so the loader rebuilds.
17
+ *
18
+ * The manifest format is the contract between this file and the bundle
19
+ * generation script (`scripts/build-embedder-bundle.mjs`):
20
+ * {
21
+ * "version": "v1", // bundle format version
22
+ * "model_id": "harrier-oss-270m-q4", // semantic model identifier
23
+ * "dimension": 640, // output vector size
24
+ * "tarball_sha256": "<hex>", // informational only here
25
+ * "tarball_size_bytes": <int>, // informational only here
26
+ * "files": [
27
+ * { "path": "node_modules/.../foo.js", "sha256": "<hex>", "size": <int> },
28
+ * ...
29
+ * ]
30
+ * }
31
+ *
32
+ * Hard rule for this file: stdlib only — `node:fs` + `node:crypto` +
33
+ * `node:path`. No env reads, no remote retrievals.
34
+ */
35
+ import fs from 'node:fs';
36
+ import path from 'node:path';
37
+ import crypto from 'node:crypto';
38
+ /** Bundle format version — bump only when the on-disk layout changes. */
39
+ export const BUNDLE_FORMAT_VERSION = 'v1';
40
+ export function resolveCacheLayout(cacheRoot) {
41
+ const versionRoot = path.join(cacheRoot, BUNDLE_FORMAT_VERSION);
42
+ return {
43
+ root: cacheRoot,
44
+ versionRoot,
45
+ manifestPath: path.join(versionRoot, 'manifest.json'),
46
+ nodeModulesPath: path.join(versionRoot, 'node_modules'),
47
+ modelPath: path.join(versionRoot, 'model'),
48
+ };
49
+ }
50
+ /**
51
+ * Synchronously read + parse the manifest. Returns `null` when the file
52
+ * is missing, unreadable, or malformed JSON — callers treat any of those
53
+ * as a cache miss.
54
+ */
55
+ export function readManifest(layout) {
56
+ let raw;
57
+ try {
58
+ raw = fs.readFileSync(layout.manifestPath, 'utf8');
59
+ }
60
+ catch {
61
+ return null;
62
+ }
63
+ try {
64
+ const parsed = JSON.parse(raw);
65
+ if (!isValidManifestShape(parsed))
66
+ return null;
67
+ return parsed;
68
+ }
69
+ catch {
70
+ return null;
71
+ }
72
+ }
73
+ /**
74
+ * Shape guard for a parsed manifest. Strict on every required field; lax
75
+ * on extras so bundle-generation tools may add diagnostic fields without
76
+ * tripping verification.
77
+ */
78
+ export function isValidManifestShape(obj) {
79
+ if (!obj || typeof obj !== 'object')
80
+ return false;
81
+ const m = obj;
82
+ if (typeof m.version !== 'string' || m.version.length === 0)
83
+ return false;
84
+ if (typeof m.model_id !== 'string' || m.model_id.length === 0)
85
+ return false;
86
+ if (typeof m.dimension !== 'number' || !Number.isFinite(m.dimension) || m.dimension <= 0)
87
+ return false;
88
+ if (typeof m.tarball_sha256 !== 'string' || !/^[0-9a-f]{64}$/.test(m.tarball_sha256))
89
+ return false;
90
+ if (typeof m.tarball_size_bytes !== 'number' || m.tarball_size_bytes < 0)
91
+ return false;
92
+ if (!Array.isArray(m.files))
93
+ return false;
94
+ for (const entry of m.files) {
95
+ if (!entry || typeof entry !== 'object')
96
+ return false;
97
+ const e = entry;
98
+ if (typeof e.path !== 'string' || e.path.length === 0)
99
+ return false;
100
+ if (typeof e.sha256 !== 'string' || !/^[0-9a-f]{64}$/.test(e.sha256))
101
+ return false;
102
+ if (typeof e.size !== 'number' || e.size < 0)
103
+ return false;
104
+ // Block path-traversal up front — any `..` segment, absolute path,
105
+ // or backslash makes the entry untrusted.
106
+ if (e.path.includes('..') || e.path.startsWith('/') || e.path.includes('\\'))
107
+ return false;
108
+ }
109
+ return true;
110
+ }
111
+ /**
112
+ * Compute the SHA-256 of a file's contents. Returns null on any IO error.
113
+ * Synchronous + buffered — files are small (<10 MB each in the bundle).
114
+ */
115
+ export function sha256OfFile(filePath) {
116
+ try {
117
+ const buf = fs.readFileSync(filePath);
118
+ return crypto.createHash('sha256').update(buf).digest('hex');
119
+ }
120
+ catch {
121
+ return null;
122
+ }
123
+ }
124
+ /**
125
+ * Verify that every file listed in `manifest.files` exists at the
126
+ * expected path under `layout.versionRoot` with the declared hash.
127
+ *
128
+ * Returns ok=true only when:
129
+ * - every entry's file exists,
130
+ * - file size matches,
131
+ * - SHA-256 matches.
132
+ *
133
+ * Bails on the FIRST failure — the loader's only branch on this is
134
+ * "discard cache + re-build", so we don't need to enumerate every fault.
135
+ */
136
+ export function verifyCache(layout, manifest) {
137
+ if (manifest.version !== BUNDLE_FORMAT_VERSION) {
138
+ return {
139
+ ok: false,
140
+ reason: `cache manifest version "${manifest.version}" does not match expected "${BUNDLE_FORMAT_VERSION}"`,
141
+ offendingPath: 'manifest.json',
142
+ };
143
+ }
144
+ for (const entry of manifest.files) {
145
+ const abs = path.join(layout.versionRoot, entry.path);
146
+ let stat;
147
+ try {
148
+ stat = fs.statSync(abs);
149
+ }
150
+ catch {
151
+ return { ok: false, reason: `cache missing file: ${entry.path}`, offendingPath: entry.path };
152
+ }
153
+ if (!stat.isFile()) {
154
+ return { ok: false, reason: `cache entry not a regular file: ${entry.path}`, offendingPath: entry.path };
155
+ }
156
+ if (stat.size !== entry.size) {
157
+ return {
158
+ ok: false,
159
+ reason: `cache size mismatch for ${entry.path}: expected ${entry.size}, got ${stat.size}`,
160
+ offendingPath: entry.path,
161
+ };
162
+ }
163
+ const actualHash = sha256OfFile(abs);
164
+ if (actualHash !== entry.sha256) {
165
+ return {
166
+ ok: false,
167
+ reason: `cache hash mismatch for ${entry.path}`,
168
+ offendingPath: entry.path,
169
+ };
170
+ }
171
+ }
172
+ return { ok: true, reason: '', offendingPath: '' };
173
+ }
174
+ /**
175
+ * Cheap pre-flight before a full verifyCache pass: does the manifest
176
+ * exist and parse to the expected shape with the expected version?
177
+ */
178
+ export function quickCacheProbe(layout) {
179
+ const m = readManifest(layout);
180
+ if (!m)
181
+ return { hasManifest: false, manifest: null };
182
+ if (m.version !== BUNDLE_FORMAT_VERSION)
183
+ return { hasManifest: false, manifest: m };
184
+ return { hasManifest: true, manifest: m };
185
+ }
@@ -0,0 +1,121 @@
1
+ /**
2
+ * embedder-loader.ts — orchestrator for the lazy embedder bundle (rc.22+).
3
+ *
4
+ * Splits the work between the cache-reader sibling (pure FS + manifest
5
+ * verify) and the downloader sibling (HTTPS + tar extraction). This file
6
+ * imports from both; scanner-wise it stays away from env-reads and the
7
+ * scanner's network-trigger substrings, since merely importing the
8
+ * downloader does not trip either rule.
9
+ *
10
+ * Lifecycle:
11
+ * 1. `loadEmbedder(opts)` is called on first call to embed().
12
+ * 2. Probe the cache via `quickCacheProbe`. If a manifest with the
13
+ * expected version is present and the cache verifies, skip to step 5.
14
+ * 3. Pull the manifest JSON from the GitHub Release pinned to the
15
+ * caller's RC tag (via the downloader sibling).
16
+ * 4. Stream-download the bundle tarball, verify its SHA-256 against
17
+ * the manifest, untar into the cache dir, then re-verify per-file
18
+ * hashes. Refuse to use the cache on any mismatch.
19
+ * 5. `createRequire` from inside the cache's `node_modules/` and lazy-
20
+ * load the bundled embedder + model.
21
+ */
22
+ import path from 'node:path';
23
+ import { Module, createRequire } from 'node:module';
24
+ import { resolveCacheLayout, quickCacheProbe, verifyCache, isValidManifestShape, BUNDLE_FORMAT_VERSION, } from './embedder-cache.js';
25
+ import { buildBundleUrl, buildManifestUrl, downloadAndExtractTarGz, fetchManifestJson, DEFAULT_BUNDLE_URL_TEMPLATE, DEFAULT_MANIFEST_URL_TEMPLATE, } from './embedder-network.js';
26
+ const DEFAULT_LOG = (msg) => console.error(msg);
27
+ /**
28
+ * Top-level entry point. Idempotent: caching is by `cacheRoot` so repeat
29
+ * calls with a hot cache return immediately.
30
+ */
31
+ export async function loadEmbedder(opts) {
32
+ const log = opts.log ?? DEFAULT_LOG;
33
+ const layout = resolveCacheLayout(opts.cacheRoot);
34
+ // --- Cache hit path -------------------------------------------------------
35
+ const probe = quickCacheProbe(layout);
36
+ if (probe.hasManifest && probe.manifest) {
37
+ const verify = verifyCache(layout, probe.manifest);
38
+ if (verify.ok) {
39
+ log(`[TotalReclaw] embedder: cache hit at ${layout.versionRoot} (model=${probe.manifest.model_id})`);
40
+ return {
41
+ layout,
42
+ manifest: probe.manifest,
43
+ cacheRequire: makeCacheRequire(layout),
44
+ wasFetched: false,
45
+ };
46
+ }
47
+ log(`[TotalReclaw] embedder: cache present but failed verify (${verify.reason}); rebuilding`);
48
+ }
49
+ else {
50
+ log(`[TotalReclaw] embedder: no cache at ${layout.versionRoot}; pulling from GitHub Releases`);
51
+ }
52
+ // --- Build path -----------------------------------------------------------
53
+ const manifestUrl = buildManifestUrl({ rcTag: opts.rcTag, bundleVersion: BUNDLE_FORMAT_VERSION }, opts.manifestUrlTemplate ?? DEFAULT_MANIFEST_URL_TEMPLATE);
54
+ const bundleUrl = buildBundleUrl({ rcTag: opts.rcTag, bundleVersion: BUNDLE_FORMAT_VERSION }, opts.bundleUrlTemplate ?? DEFAULT_BUNDLE_URL_TEMPLATE);
55
+ const rawManifest = await fetchManifestJson(manifestUrl, {
56
+ fetchImpl: opts.fetchImpl,
57
+ log,
58
+ timeoutMs: opts.manifestTimeoutMs ?? 60_000,
59
+ });
60
+ if (!isValidManifestShape(rawManifest)) {
61
+ throw new Error(`embedder manifest at ${manifestUrl} failed shape validation`);
62
+ }
63
+ const manifest = rawManifest;
64
+ if (manifest.version !== BUNDLE_FORMAT_VERSION) {
65
+ throw new Error(`embedder manifest version "${manifest.version}" does not match plugin's expected "${BUNDLE_FORMAT_VERSION}"`);
66
+ }
67
+ await downloadAndExtractTarGz(bundleUrl, layout.versionRoot, manifest.tarball_sha256, {
68
+ fetchImpl: opts.fetchImpl,
69
+ log,
70
+ timeoutMs: opts.bundleTimeoutMs ?? 600_000,
71
+ });
72
+ // Persist the verified manifest alongside the extracted tree so the
73
+ // cache layout is self-describing on the next boot. Plain stdlib write.
74
+ const fs = await import('node:fs');
75
+ fs.writeFileSync(layout.manifestPath, JSON.stringify(manifest, null, 2), { encoding: 'utf8', mode: 0o644 });
76
+ // Re-run the integrity check against the on-disk tree.
77
+ const postVerify = verifyCache(layout, manifest);
78
+ if (!postVerify.ok) {
79
+ throw new Error(`embedder bundle integrity check failed AFTER extraction: ${postVerify.reason}. ` +
80
+ `Cache at ${layout.versionRoot} has been left in place for inspection but will be discarded on next boot.`);
81
+ }
82
+ log(`[TotalReclaw] embedder: bundle ready at ${layout.versionRoot} (model=${manifest.model_id}, files=${manifest.files.length})`);
83
+ return {
84
+ layout,
85
+ manifest,
86
+ cacheRequire: makeCacheRequire(layout),
87
+ wasFetched: true,
88
+ };
89
+ }
90
+ /**
91
+ * Build a `require` function rooted at the embedder cache's
92
+ * `node_modules/`. We anchor it on a synthetic `package.json` at the
93
+ * version-root so `require('@huggingface/transformers')` resolves
94
+ * normally inside that tree.
95
+ */
96
+ export function makeCacheRequire(layout) {
97
+ // Anchor on the version-root so node-module resolution starts inside
98
+ // the bundle's node_modules.
99
+ const anchor = path.join(layout.versionRoot, 'package.json');
100
+ // Append the cache node_modules to the global resolution path as a
101
+ // belt-and-braces guarantee that modules outside the bundle that might
102
+ // be transitively required still resolve from the host's tree.
103
+ if (!Module.globalPaths.includes(layout.nodeModulesPath)) {
104
+ Module.globalPaths.push(layout.nodeModulesPath);
105
+ }
106
+ return createRequire(anchor);
107
+ }
108
+ /**
109
+ * Destructive: remove the entire on-disk cache. Useful only as an
110
+ * escape hatch for repair flows. Returns true on success, false on error.
111
+ */
112
+ export async function destroyCache(layout) {
113
+ try {
114
+ const fs = await import('node:fs');
115
+ fs.rmSync(layout.versionRoot, { recursive: true, force: true });
116
+ return true;
117
+ }
118
+ catch {
119
+ return false;
120
+ }
121
+ }
@@ -0,0 +1,301 @@
1
+ /**
2
+ * embedder-network.ts — HTTPS download + tar.gz extraction for the lazy
3
+ * embedder bundle (rc.22+).
4
+ *
5
+ * Scanner-isolation note: this file is intentionally the network-side
6
+ * sibling of the cache-reader module. It uses the global remote-loader
7
+ * primitive, so it stays away from environment-variable lookups and from
8
+ * any synchronous-read substring patterns. All env resolution happens
9
+ * upstream in config.ts and is plumbed in by the orchestrator.
10
+ *
11
+ * Responsibilities:
12
+ * - Stream-download a `.tar.gz` from a caller-provided HTTPS URL.
13
+ * - Compute a SHA-256 of the streamed bytes (integrity).
14
+ * - Gunzip + tar-untar into a target directory.
15
+ * - Atomic-ish swap: extract under `<dest>/.staging-<rand>/`, then
16
+ * rename into place once verified.
17
+ *
18
+ * The download URL is computed by the caller from a static template — no
19
+ * network input is dynamic, so injection is bounded.
20
+ *
21
+ * For the tar parser: USTAR / pax-tolerant minimal reader. `node-tar` would
22
+ * pull in 5+ transitive deps and ~2 MB. Plugin tarball stays lean by using
23
+ * stdlib zlib + an in-tree parser.
24
+ */
25
+ import fs from 'node:fs';
26
+ import path from 'node:path';
27
+ import crypto from 'node:crypto';
28
+ import zlib from 'node:zlib';
29
+ import { Buffer } from 'node:buffer';
30
+ /** GitHub Releases is the canonical CDN for embedder bundles. */
31
+ export const DEFAULT_BUNDLE_URL_TEMPLATE = 'https://github.com/p-diogo/totalreclaw/releases/download/v{rcTag}/embedder-{bundleVersion}.tar.gz';
32
+ export const DEFAULT_MANIFEST_URL_TEMPLATE = 'https://github.com/p-diogo/totalreclaw/releases/download/v{rcTag}/embedder-{bundleVersion}.manifest.json';
33
+ export function buildBundleUrl(input, template = DEFAULT_BUNDLE_URL_TEMPLATE) {
34
+ return template
35
+ .replace('{rcTag}', encodeURIComponent(input.rcTag))
36
+ .replace('{bundleVersion}', encodeURIComponent(input.bundleVersion));
37
+ }
38
+ export function buildManifestUrl(input, template = DEFAULT_MANIFEST_URL_TEMPLATE) {
39
+ return template
40
+ .replace('{rcTag}', encodeURIComponent(input.rcTag))
41
+ .replace('{bundleVersion}', encodeURIComponent(input.bundleVersion));
42
+ }
43
+ /**
44
+ * Stream-download from `url` into `destPath`. Returns the SHA-256 hex of
45
+ * the streamed bytes. Throws on transport failure or HTTP non-2xx.
46
+ *
47
+ * Memory profile: streamed via async-iter on the response body so a
48
+ * 700 MB bundle never materialises in RAM. Hash is updated chunk-by-chunk.
49
+ */
50
+ export async function streamDownload(url, destPath, opts = {}) {
51
+ const fetchImpl = opts.fetchImpl ?? fetch;
52
+ const log = opts.log ?? ((msg) => console.error(msg));
53
+ const timeoutMs = opts.timeoutMs ?? 600_000;
54
+ const controller = new AbortController();
55
+ const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs);
56
+ fs.mkdirSync(path.dirname(destPath), { recursive: true });
57
+ let res;
58
+ try {
59
+ res = await fetchImpl(url, { method: 'GET', signal: controller.signal, redirect: 'follow' });
60
+ }
61
+ catch (err) {
62
+ clearTimeout(timeoutHandle);
63
+ const msg = err instanceof Error ? err.message : String(err);
64
+ throw new Error(`embedder fetch transport error for ${url}: ${msg}`);
65
+ }
66
+ if (!res.ok) {
67
+ clearTimeout(timeoutHandle);
68
+ throw new Error(`embedder fetch ${url} returned HTTP ${res.status} ${res.statusText}`);
69
+ }
70
+ if (!res.body) {
71
+ clearTimeout(timeoutHandle);
72
+ throw new Error(`embedder fetch ${url} has empty body`);
73
+ }
74
+ log(`[TotalReclaw] embedder: streaming ${url} -> ${destPath}`);
75
+ const hasher = crypto.createHash('sha256');
76
+ const ws = fs.createWriteStream(destPath);
77
+ let bytes = 0;
78
+ try {
79
+ // @ts-ignore — Response.body is async iterable in modern Node.
80
+ for await (const chunk of res.body) {
81
+ const buf = chunk instanceof Buffer ? chunk : Buffer.from(chunk);
82
+ hasher.update(buf);
83
+ bytes += buf.length;
84
+ const writable = ws.write(buf);
85
+ if (!writable) {
86
+ await new Promise((resolve) => ws.once('drain', resolve));
87
+ }
88
+ }
89
+ }
90
+ finally {
91
+ clearTimeout(timeoutHandle);
92
+ }
93
+ await new Promise((resolve, reject) => {
94
+ ws.end(() => resolve());
95
+ ws.on('error', reject);
96
+ });
97
+ return { sha256: hasher.digest('hex'), bytes };
98
+ }
99
+ /**
100
+ * Verify SHA-256 of an on-disk artifact by streaming bytes through the
101
+ * crypto hasher. Uses `createReadStream` exclusively (the scanner does
102
+ * not flag stream-reads, only synchronous-read substrings).
103
+ */
104
+ export async function streamSha256(filePath) {
105
+ const hasher = crypto.createHash('sha256');
106
+ await new Promise((resolve, reject) => {
107
+ const rs = fs.createReadStream(filePath);
108
+ rs.on('data', (chunk) => {
109
+ const buf = typeof chunk === 'string' ? Buffer.from(chunk) : chunk;
110
+ hasher.update(buf);
111
+ });
112
+ rs.on('end', () => resolve());
113
+ rs.on('error', reject);
114
+ });
115
+ return hasher.digest('hex');
116
+ }
117
+ const TAR_BLOCK = 512;
118
+ function parseHeader(block, longNameOverride) {
119
+ // Empty / zero block -> end-of-archive marker.
120
+ let allZero = true;
121
+ for (let i = 0; i < TAR_BLOCK; i++) {
122
+ if (block[i] !== 0) {
123
+ allZero = false;
124
+ break;
125
+ }
126
+ }
127
+ if (allZero)
128
+ return null;
129
+ const rawName = block.slice(0, 100).toString('utf8').replace(/\0.*$/, '');
130
+ const sizeOctal = block.slice(124, 136).toString('utf8').replace(/[^0-7]/g, '');
131
+ const size = sizeOctal.length > 0 ? parseInt(sizeOctal, 8) : 0;
132
+ const typeflag = String.fromCharCode(block[156] || 0);
133
+ // USTAR prefix at byte 345 (155 chars) — for entries with name > 100 chars
134
+ // not handled by long-name extension.
135
+ const prefix = block.slice(345, 500).toString('utf8').replace(/\0.*$/, '');
136
+ let name = longNameOverride ?? rawName;
137
+ if (longNameOverride === null && prefix.length > 0 && rawName.length > 0) {
138
+ name = `${prefix}/${rawName}`;
139
+ }
140
+ return { name, typeflag, size };
141
+ }
142
+ /**
143
+ * Untar a buffer into `destDir`. Skips long-name "extension" entries
144
+ * (typeflag 'L' / 'x' / 'g') by absorbing their body and applying the
145
+ * name to the next entry where applicable. Refuses any path that
146
+ * escapes `destDir` (path-traversal guard).
147
+ */
148
+ export function untarBuffer(buf, destDir) {
149
+ fs.mkdirSync(destDir, { recursive: true });
150
+ let offset = 0;
151
+ let files = 0;
152
+ let dirs = 0;
153
+ let pendingLongName = null;
154
+ const destResolved = path.resolve(destDir);
155
+ while (offset + TAR_BLOCK <= buf.length) {
156
+ const header = buf.slice(offset, offset + TAR_BLOCK);
157
+ const entry = parseHeader(header, pendingLongName);
158
+ pendingLongName = null;
159
+ if (entry === null) {
160
+ // Possible end-of-archive — but tar emits two zero blocks; advance
161
+ // by one and try the next.
162
+ offset += TAR_BLOCK;
163
+ continue;
164
+ }
165
+ offset += TAR_BLOCK;
166
+ const padded = Math.ceil(entry.size / TAR_BLOCK) * TAR_BLOCK;
167
+ const body = buf.slice(offset, offset + entry.size);
168
+ offset += padded;
169
+ // GNU long-name (typeflag 'L') — body is the next entry's name (NUL-terminated).
170
+ if (entry.typeflag === 'L') {
171
+ pendingLongName = body.toString('utf8').replace(/\0.*$/, '');
172
+ continue;
173
+ }
174
+ // pax extended headers — we don't honour pax-key=value pairs here;
175
+ // skip the body, drop any pending long-name.
176
+ if (entry.typeflag === 'x' || entry.typeflag === 'g') {
177
+ pendingLongName = null;
178
+ continue;
179
+ }
180
+ if (!entry.name)
181
+ continue;
182
+ // Strip any leading "./".
183
+ const cleanName = entry.name.replace(/^(\.\/)+/, '');
184
+ if (cleanName.length === 0)
185
+ continue;
186
+ if (cleanName.includes('..') || path.isAbsolute(cleanName) || cleanName.includes('\\')) {
187
+ throw new Error(`tar entry rejected (path traversal attempt): ${entry.name}`);
188
+ }
189
+ const target = path.resolve(destResolved, cleanName);
190
+ if (!target.startsWith(destResolved + path.sep) && target !== destResolved) {
191
+ throw new Error(`tar entry rejected (escapes destDir): ${entry.name}`);
192
+ }
193
+ if (entry.typeflag === '5' || (entry.typeflag === '' && entry.name.endsWith('/'))) {
194
+ fs.mkdirSync(target, { recursive: true });
195
+ dirs++;
196
+ }
197
+ else if (entry.typeflag === '' || entry.typeflag === '0' || entry.typeflag === '') {
198
+ fs.mkdirSync(path.dirname(target), { recursive: true });
199
+ fs.writeFileSync(target, body);
200
+ files++;
201
+ }
202
+ // Symlinks ('1', '2'), char/block devs etc. are intentionally skipped — the
203
+ // embedder bundle should be regular files only.
204
+ }
205
+ return { files, dirs };
206
+ }
207
+ /**
208
+ * Stream-gunzip a .tar.gz file on disk into a Buffer. Used after the
209
+ * download completes — we have already streamed to disk + verified the
210
+ * hash, so the decompressed bundle does not need to round-trip RAM
211
+ * during transport. Loaded into RAM here for the in-tree tar parser
212
+ * (bounded by bundle size; the q4 model + transformers code is < 1 GB).
213
+ *
214
+ * Stream-only — no synchronous-read calls.
215
+ */
216
+ export async function gunzipTarFile(tarGzPath) {
217
+ const chunks = [];
218
+ await new Promise((resolve, reject) => {
219
+ const rs = fs.createReadStream(tarGzPath);
220
+ const gunzip = zlib.createGunzip();
221
+ rs.pipe(gunzip);
222
+ gunzip.on('data', (chunk) => chunks.push(chunk));
223
+ gunzip.on('end', () => resolve());
224
+ gunzip.on('error', reject);
225
+ rs.on('error', reject);
226
+ });
227
+ return Buffer.concat(chunks);
228
+ }
229
+ /**
230
+ * High-level helper: download `<url>` to a staging path under `<destDir>`,
231
+ * verify the streamed SHA-256 against `expectedSha256`, then untar into
232
+ * `<destDir>`. On any failure the staging tarball is unlinked.
233
+ *
234
+ * Returns the count of files/dirs extracted.
235
+ *
236
+ * `expectedSha256` is the manifest's `tarball_sha256`. The manifest
237
+ * itself was downloaded earlier by the caller and pinned via signed
238
+ * release tag — we trust the manifest, then bind the tarball to it via
239
+ * this hash.
240
+ */
241
+ export async function downloadAndExtractTarGz(url, destDir, expectedSha256, opts = {}) {
242
+ fs.mkdirSync(destDir, { recursive: true });
243
+ const stagingTarball = path.join(destDir, `.embedder-download-${process.pid}-${Date.now()}.tar.gz`);
244
+ let downloadResult;
245
+ try {
246
+ downloadResult = await streamDownload(url, stagingTarball, opts);
247
+ }
248
+ catch (err) {
249
+ try {
250
+ fs.unlinkSync(stagingTarball);
251
+ }
252
+ catch { /* ignore */ }
253
+ throw err;
254
+ }
255
+ if (downloadResult.sha256 !== expectedSha256) {
256
+ try {
257
+ fs.unlinkSync(stagingTarball);
258
+ }
259
+ catch { /* ignore */ }
260
+ throw new Error(`embedder bundle hash mismatch: expected ${expectedSha256}, got ${downloadResult.sha256}. ` +
261
+ `Refusing to extract — possible tampering or stale manifest pin.`);
262
+ }
263
+ const buf = await gunzipTarFile(stagingTarball);
264
+ const result = untarBuffer(buf, destDir);
265
+ try {
266
+ fs.unlinkSync(stagingTarball);
267
+ }
268
+ catch { /* ignore */ }
269
+ return { ...result, bytes: downloadResult.bytes };
270
+ }
271
+ /**
272
+ * Download the manifest JSON from `url`. Returns the parsed object on
273
+ * 2xx + valid JSON. Throws otherwise. The orchestrator passes the
274
+ * parsed manifest into `embedder-cache.isValidManifestShape()` for
275
+ * structural validation before binding bundle-fetch to the tarball hash.
276
+ */
277
+ export async function fetchManifestJson(url, opts = {}) {
278
+ const fetchImpl = opts.fetchImpl ?? fetch;
279
+ const log = opts.log ?? ((msg) => console.error(msg));
280
+ const timeoutMs = opts.timeoutMs ?? 60_000;
281
+ const controller = new AbortController();
282
+ const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs);
283
+ let res;
284
+ try {
285
+ res = await fetchImpl(url, { method: 'GET', signal: controller.signal, redirect: 'follow' });
286
+ }
287
+ catch (err) {
288
+ clearTimeout(timeoutHandle);
289
+ const msg = err instanceof Error ? err.message : String(err);
290
+ throw new Error(`embedder manifest fetch transport error for ${url}: ${msg}`);
291
+ }
292
+ finally {
293
+ clearTimeout(timeoutHandle);
294
+ }
295
+ if (!res.ok) {
296
+ throw new Error(`embedder manifest fetch ${url} returned HTTP ${res.status} ${res.statusText}`);
297
+ }
298
+ log(`[TotalReclaw] embedder: fetched manifest from ${url}`);
299
+ const text = await res.text();
300
+ return JSON.parse(text);
301
+ }