@totalreclaw/totalreclaw 3.3.1-rc.8 → 3.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +268 -1
- package/SKILL.md +29 -23
- package/api-client.ts +18 -11
- package/claims-helper.ts +47 -1
- package/config.ts +108 -4
- package/confirm-indexed.ts +191 -0
- package/crypto.ts +10 -2
- package/dist/api-client.js +226 -0
- package/dist/billing-cache.js +100 -0
- package/dist/claims-helper.js +624 -0
- package/dist/config.js +297 -0
- package/dist/confirm-indexed.js +127 -0
- package/dist/consolidation.js +258 -0
- package/dist/contradiction-sync.js +1034 -0
- package/dist/crypto.js +138 -0
- package/dist/digest-sync.js +361 -0
- package/dist/download-ux.js +63 -0
- package/dist/embedder-cache.js +185 -0
- package/dist/embedder-loader.js +121 -0
- package/dist/embedder-network.js +301 -0
- package/dist/embedding.js +141 -0
- package/dist/extractor.js +1225 -0
- package/dist/first-run.js +103 -0
- package/dist/fs-helpers.js +725 -0
- package/dist/gateway-url.js +197 -0
- package/dist/generate-mnemonic.js +13 -0
- package/dist/hot-cache-wrapper.js +101 -0
- package/dist/import-adapters/base-adapter.js +64 -0
- package/dist/import-adapters/chatgpt-adapter.js +238 -0
- package/dist/import-adapters/claude-adapter.js +114 -0
- package/dist/import-adapters/gemini-adapter.js +201 -0
- package/dist/import-adapters/index.js +26 -0
- package/dist/import-adapters/mcp-memory-adapter.js +219 -0
- package/dist/import-adapters/mem0-adapter.js +158 -0
- package/dist/import-adapters/types.js +1 -0
- package/dist/index.js +5388 -0
- package/dist/llm-client.js +687 -0
- package/dist/llm-profile-reader.js +346 -0
- package/dist/lsh.js +62 -0
- package/dist/onboarding-cli.js +750 -0
- package/dist/pair-cli.js +344 -0
- package/dist/pair-crypto.js +359 -0
- package/dist/pair-http.js +404 -0
- package/dist/pair-page.js +826 -0
- package/dist/pair-qr.js +107 -0
- package/dist/pair-remote-client.js +410 -0
- package/dist/pair-session-store.js +566 -0
- package/dist/pin.js +556 -0
- package/dist/qa-bug-report.js +301 -0
- package/dist/relay-headers.js +44 -0
- package/dist/reranker.js +409 -0
- package/dist/retype-setscope.js +368 -0
- package/dist/semantic-dedup.js +75 -0
- package/dist/subgraph-search.js +289 -0
- package/dist/subgraph-store.js +694 -0
- package/dist/tool-gating.js +58 -0
- package/download-ux.ts +91 -0
- package/embedder-cache.ts +230 -0
- package/embedder-loader.ts +189 -0
- package/embedder-network.ts +350 -0
- package/embedding.ts +118 -27
- package/fs-helpers.ts +277 -0
- package/gateway-url.ts +57 -9
- package/index.ts +469 -250
- package/llm-client.ts +4 -3
- package/lsh.ts +7 -2
- package/onboarding-cli.ts +114 -1
- package/package.json +24 -5
- package/pair-cli.ts +76 -8
- package/pair-crypto.ts +34 -24
- package/pair-page.ts +28 -17
- package/pair-qr.ts +152 -0
- package/pair-remote-client.ts +540 -0
- package/pin.ts +31 -0
- package/qa-bug-report.ts +84 -2
- package/relay-headers.ts +50 -0
- package/reranker.ts +40 -0
- package/retype-setscope.ts +69 -8
- package/skill.json +1 -1
- package/subgraph-search.ts +4 -3
- package/subgraph-store.ts +15 -10
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tool gating predicate for 3.2.0 — the `before_tool_call` hook in index.ts
|
|
3
|
+
* delegates to this module so the logic is testable without standing up a
|
|
4
|
+
* full OpenClaw plugin host.
|
|
5
|
+
*
|
|
6
|
+
* Scope: the 3.2.0 state machine has two states (`fresh`, `active`). Memory
|
|
7
|
+
* tools are blocked when state is anything other than `active`. Billing +
|
|
8
|
+
* setup-adjacent tools remain usable — users need to be able to upgrade,
|
|
9
|
+
* migrate, and start onboarding before their vault is active.
|
|
10
|
+
*
|
|
11
|
+
* This module imports ONLY types + the state resolver. No I/O beyond what
|
|
12
|
+
* `resolveOnboardingState` already does; no network; no env reads.
|
|
13
|
+
*/
|
|
14
|
+
/**
|
|
15
|
+
* Tool names gated on `state=active`. Keep in sync with the actual
|
|
16
|
+
* `registerTool` calls in `index.ts`. Anything NOT in this set is always
|
|
17
|
+
* callable (e.g. totalreclaw_upgrade, totalreclaw_migrate,
|
|
18
|
+
* totalreclaw_onboarding_start, totalreclaw_setup).
|
|
19
|
+
*/
|
|
20
|
+
export const GATED_TOOL_NAMES = Object.freeze([
|
|
21
|
+
'totalreclaw_remember',
|
|
22
|
+
'totalreclaw_recall',
|
|
23
|
+
'totalreclaw_forget',
|
|
24
|
+
'totalreclaw_export',
|
|
25
|
+
'totalreclaw_status',
|
|
26
|
+
'totalreclaw_consolidate',
|
|
27
|
+
'totalreclaw_pin',
|
|
28
|
+
'totalreclaw_unpin',
|
|
29
|
+
'totalreclaw_retype',
|
|
30
|
+
'totalreclaw_set_scope',
|
|
31
|
+
'totalreclaw_import_from',
|
|
32
|
+
'totalreclaw_import_batch',
|
|
33
|
+
]);
|
|
34
|
+
/**
|
|
35
|
+
* Decide whether a specific tool call should be blocked given the current
|
|
36
|
+
* onboarding state. Does not read any files — caller resolves state first
|
|
37
|
+
* (that lets tests stub state without touching disk).
|
|
38
|
+
*/
|
|
39
|
+
export function decideToolGate(toolName, state) {
|
|
40
|
+
if (!toolName)
|
|
41
|
+
return { block: false };
|
|
42
|
+
if (!GATED_TOOL_NAMES.includes(toolName))
|
|
43
|
+
return { block: false };
|
|
44
|
+
if (state?.onboardingState === 'active')
|
|
45
|
+
return { block: false };
|
|
46
|
+
return {
|
|
47
|
+
block: true,
|
|
48
|
+
blockReason: 'TotalReclaw onboarding required. Run `openclaw totalreclaw onboard` ' +
|
|
49
|
+
'in a terminal (or call the `totalreclaw_onboarding_start` tool for ' +
|
|
50
|
+
'details). Memory tools are gated until the user completes setup.',
|
|
51
|
+
};
|
|
52
|
+
}
|
|
53
|
+
/**
|
|
54
|
+
* Convenience predicate — useful for tests + documentation.
|
|
55
|
+
*/
|
|
56
|
+
export function isGatedToolName(toolName) {
|
|
57
|
+
return GATED_TOOL_NAMES.includes(toolName);
|
|
58
|
+
}
|
package/download-ux.ts
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* download-ux.ts — Wrapper for heavy first-call downloads (rc.16, fixes #92).
|
|
3
|
+
*
|
|
4
|
+
* Wraps a download promise with:
|
|
5
|
+
* - per-attempt timeout (default 600s, override via TOTALRECLAW_ONNX_INSTALL_TIMEOUT in seconds)
|
|
6
|
+
* - 60s keep-alive log so slow-bandwidth users don't think it's frozen
|
|
7
|
+
* - 3-attempt exponential-backoff retry (per-attempt timeout grows 1x/2x/4x)
|
|
8
|
+
* - loud actionable error after exhaustion
|
|
9
|
+
*
|
|
10
|
+
* No third-party imports here — pure stdlib so the unit test can exercise it
|
|
11
|
+
* without pulling the heavy `@huggingface/transformers` chain.
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
const DEFAULT_DOWNLOAD_TIMEOUT_MS = 600_000;
|
|
15
|
+
const KEEPALIVE_INTERVAL_MS = 60_000;
|
|
16
|
+
const MAX_DOWNLOAD_ATTEMPTS = 3;
|
|
17
|
+
|
|
18
|
+
export function getDownloadTimeoutMs(): number {
|
|
19
|
+
const raw = process.env.TOTALRECLAW_ONNX_INSTALL_TIMEOUT;
|
|
20
|
+
if (!raw) return DEFAULT_DOWNLOAD_TIMEOUT_MS;
|
|
21
|
+
const parsed = Number(raw);
|
|
22
|
+
if (!Number.isFinite(parsed) || parsed <= 0) return DEFAULT_DOWNLOAD_TIMEOUT_MS;
|
|
23
|
+
// Spec accepts seconds; convert to ms.
|
|
24
|
+
return Math.floor(parsed * 1000);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export interface DownloadWithUXOpts {
|
|
28
|
+
/** Override the per-attempt base timeout in ms (env var takes precedence by default). */
|
|
29
|
+
timeoutMs?: number;
|
|
30
|
+
/** Override the keep-alive cadence in ms. */
|
|
31
|
+
keepaliveMs?: number;
|
|
32
|
+
/** Override the max attempts. */
|
|
33
|
+
maxAttempts?: number;
|
|
34
|
+
/** Logger override (defaults to console.error). */
|
|
35
|
+
log?: (msg: string) => void;
|
|
36
|
+
/** Sleep override for tests; defaults to setTimeout. */
|
|
37
|
+
sleep?: (ms: number) => Promise<void>;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export async function downloadWithUX<T>(
|
|
41
|
+
label: string,
|
|
42
|
+
download: () => Promise<T>,
|
|
43
|
+
opts?: DownloadWithUXOpts,
|
|
44
|
+
): Promise<T> {
|
|
45
|
+
const baseTimeoutMs = opts?.timeoutMs ?? getDownloadTimeoutMs();
|
|
46
|
+
const keepaliveMs = opts?.keepaliveMs ?? KEEPALIVE_INTERVAL_MS;
|
|
47
|
+
const maxAttempts = opts?.maxAttempts ?? MAX_DOWNLOAD_ATTEMPTS;
|
|
48
|
+
const log = opts?.log ?? ((msg: string) => console.error(msg));
|
|
49
|
+
const sleep = opts?.sleep ?? ((ms: number) => new Promise(r => setTimeout(r, ms)));
|
|
50
|
+
|
|
51
|
+
let lastErr: unknown = null;
|
|
52
|
+
|
|
53
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
|
54
|
+
const attemptTimeoutMs = baseTimeoutMs * Math.pow(2, attempt - 1);
|
|
55
|
+
const startedAt = Date.now();
|
|
56
|
+
const keepaliveTimer = setInterval(() => {
|
|
57
|
+
const elapsedSec = Math.floor((Date.now() - startedAt) / 1000);
|
|
58
|
+
log(`[TotalReclaw] ${label}: still downloading… (${elapsedSec}s elapsed, attempt ${attempt}/${maxAttempts})`);
|
|
59
|
+
}, keepaliveMs);
|
|
60
|
+
|
|
61
|
+
try {
|
|
62
|
+
const result = await Promise.race([
|
|
63
|
+
download(),
|
|
64
|
+
new Promise<never>((_, reject) =>
|
|
65
|
+
setTimeout(
|
|
66
|
+
() => reject(new Error(`Download timeout after ${Math.floor(attemptTimeoutMs / 1000)}s (attempt ${attempt}/${maxAttempts})`)),
|
|
67
|
+
attemptTimeoutMs,
|
|
68
|
+
),
|
|
69
|
+
),
|
|
70
|
+
]);
|
|
71
|
+
clearInterval(keepaliveTimer);
|
|
72
|
+
return result;
|
|
73
|
+
} catch (err) {
|
|
74
|
+
clearInterval(keepaliveTimer);
|
|
75
|
+
lastErr = err;
|
|
76
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
77
|
+
if (attempt < maxAttempts) {
|
|
78
|
+
const backoffMs = Math.min(5_000 * Math.pow(2, attempt - 1), 30_000);
|
|
79
|
+
log(`[TotalReclaw] ${label}: attempt ${attempt} failed (${msg}). Retrying in ${Math.floor(backoffMs / 1000)}s…`);
|
|
80
|
+
await sleep(backoffMs);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const finalMsg = lastErr instanceof Error ? lastErr.message : String(lastErr);
|
|
86
|
+
throw new Error(
|
|
87
|
+
`[TotalReclaw] Embedding model download failed after ${maxAttempts} attempts (last error: ${finalMsg}). ` +
|
|
88
|
+
`Check your network connection and retry: \`openclaw plugins install totalreclaw\`. ` +
|
|
89
|
+
`On slow connections, set TOTALRECLAW_ONNX_INSTALL_TIMEOUT=1200 (in seconds) to extend the per-attempt timeout.`,
|
|
90
|
+
);
|
|
91
|
+
}
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* embedder-cache.ts — pure-FS reader for the lazy embedder bundle (rc.22+).
|
|
3
|
+
*
|
|
4
|
+
* Scanner-isolation note: this module reads from disk AND verifies SHA-256
|
|
5
|
+
* hashes. It MUST NOT contain any of the network-trigger substrings the
|
|
6
|
+
* OpenClaw skill scanner gates on — see `skill/scripts/check-scanner.mjs`
|
|
7
|
+
* for the rule list. The network side of the lazy-retrieval flow lives in a
|
|
8
|
+
* sibling module (the downloader), and the orchestrator imports both.
|
|
9
|
+
*
|
|
10
|
+
* Responsibilities:
|
|
11
|
+
* - Resolve the on-disk cache layout (`<root>/v1/`, with `manifest.json`
|
|
12
|
+
* + `node_modules/` + `model/`).
|
|
13
|
+
* - Synchronously load + parse the manifest JSON.
|
|
14
|
+
* - Verify the cache is intact: every file listed in `manifest.files`
|
|
15
|
+
* exists at the expected path with the SHA-256 hash declared in the
|
|
16
|
+
* manifest. Any mismatch invalidates the cache so the loader rebuilds.
|
|
17
|
+
*
|
|
18
|
+
* The manifest format is the contract between this file and the bundle
|
|
19
|
+
* generation script (`scripts/build-embedder-bundle.mjs`):
|
|
20
|
+
* {
|
|
21
|
+
* "version": "v1", // bundle format version
|
|
22
|
+
* "model_id": "harrier-oss-270m-q4", // semantic model identifier
|
|
23
|
+
* "dimension": 640, // output vector size
|
|
24
|
+
* "tarball_sha256": "<hex>", // informational only here
|
|
25
|
+
* "tarball_size_bytes": <int>, // informational only here
|
|
26
|
+
* "files": [
|
|
27
|
+
* { "path": "node_modules/.../foo.js", "sha256": "<hex>", "size": <int> },
|
|
28
|
+
* ...
|
|
29
|
+
* ]
|
|
30
|
+
* }
|
|
31
|
+
*
|
|
32
|
+
* Hard rule for this file: stdlib only — `node:fs` + `node:crypto` +
|
|
33
|
+
* `node:path`. No env reads, no remote retrievals.
|
|
34
|
+
*/
|
|
35
|
+
|
|
36
|
+
import fs from 'node:fs';
|
|
37
|
+
import path from 'node:path';
|
|
38
|
+
import crypto from 'node:crypto';
|
|
39
|
+
|
|
40
|
+
/** Bundle format version — bump only when the on-disk layout changes. */
|
|
41
|
+
export const BUNDLE_FORMAT_VERSION = 'v1' as const;
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Layout: `<cacheRoot>/<BUNDLE_FORMAT_VERSION>/`. The version subdirectory
|
|
45
|
+
* lets us ship `v2/` side-by-side with `v1/` later (e.g. for a distilled
|
|
46
|
+
* model) without invalidating active vaults.
|
|
47
|
+
*/
|
|
48
|
+
export interface CacheLayout {
|
|
49
|
+
/** Top-level embedder cache directory (e.g. `~/.totalreclaw/embedder/`). */
|
|
50
|
+
root: string;
|
|
51
|
+
/** Versioned bundle root (e.g. `~/.totalreclaw/embedder/v1/`). */
|
|
52
|
+
versionRoot: string;
|
|
53
|
+
/** Path to the manifest JSON file. */
|
|
54
|
+
manifestPath: string;
|
|
55
|
+
/** Path to the extracted node_modules tree (transformers + onnxruntime). */
|
|
56
|
+
nodeModulesPath: string;
|
|
57
|
+
/** Path to the extracted ONNX model directory. */
|
|
58
|
+
modelPath: string;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
export function resolveCacheLayout(cacheRoot: string): CacheLayout {
|
|
62
|
+
const versionRoot = path.join(cacheRoot, BUNDLE_FORMAT_VERSION);
|
|
63
|
+
return {
|
|
64
|
+
root: cacheRoot,
|
|
65
|
+
versionRoot,
|
|
66
|
+
manifestPath: path.join(versionRoot, 'manifest.json'),
|
|
67
|
+
nodeModulesPath: path.join(versionRoot, 'node_modules'),
|
|
68
|
+
modelPath: path.join(versionRoot, 'model'),
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
export interface BundleManifestFileEntry {
|
|
73
|
+
/** Path RELATIVE to the version-root directory (e.g. `node_modules/foo/bar.js`). */
|
|
74
|
+
path: string;
|
|
75
|
+
/** Lowercase hex SHA-256 of the file's content. */
|
|
76
|
+
sha256: string;
|
|
77
|
+
/** Byte size — informational; not load-bearing for verification. */
|
|
78
|
+
size: number;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export interface BundleManifest {
|
|
82
|
+
/** Bundle format version. MUST match `BUNDLE_FORMAT_VERSION`. */
|
|
83
|
+
version: string;
|
|
84
|
+
/** Semantic model id, e.g. `"harrier-oss-270m-q4"`. */
|
|
85
|
+
model_id: string;
|
|
86
|
+
/** Output vector dimensionality. */
|
|
87
|
+
dimension: number;
|
|
88
|
+
/** Lowercase hex SHA-256 of the entire .tar.gz tarball. */
|
|
89
|
+
tarball_sha256: string;
|
|
90
|
+
/** Tarball size in bytes. */
|
|
91
|
+
tarball_size_bytes: number;
|
|
92
|
+
/** Per-file integrity table — used by the loader after extraction. */
|
|
93
|
+
files: BundleManifestFileEntry[];
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
/**
|
|
97
|
+
* Synchronously read + parse the manifest. Returns `null` when the file
|
|
98
|
+
* is missing, unreadable, or malformed JSON — callers treat any of those
|
|
99
|
+
* as a cache miss.
|
|
100
|
+
*/
|
|
101
|
+
export function readManifest(layout: CacheLayout): BundleManifest | null {
|
|
102
|
+
let raw: string;
|
|
103
|
+
try {
|
|
104
|
+
raw = fs.readFileSync(layout.manifestPath, 'utf8');
|
|
105
|
+
} catch {
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
try {
|
|
109
|
+
const parsed = JSON.parse(raw) as Partial<BundleManifest>;
|
|
110
|
+
if (!isValidManifestShape(parsed)) return null;
|
|
111
|
+
return parsed as BundleManifest;
|
|
112
|
+
} catch {
|
|
113
|
+
return null;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
/**
|
|
118
|
+
* Shape guard for a parsed manifest. Strict on every required field; lax
|
|
119
|
+
* on extras so bundle-generation tools may add diagnostic fields without
|
|
120
|
+
* tripping verification.
|
|
121
|
+
*/
|
|
122
|
+
export function isValidManifestShape(obj: unknown): obj is BundleManifest {
|
|
123
|
+
if (!obj || typeof obj !== 'object') return false;
|
|
124
|
+
const m = obj as Record<string, unknown>;
|
|
125
|
+
if (typeof m.version !== 'string' || m.version.length === 0) return false;
|
|
126
|
+
if (typeof m.model_id !== 'string' || m.model_id.length === 0) return false;
|
|
127
|
+
if (typeof m.dimension !== 'number' || !Number.isFinite(m.dimension) || m.dimension <= 0) return false;
|
|
128
|
+
if (typeof m.tarball_sha256 !== 'string' || !/^[0-9a-f]{64}$/.test(m.tarball_sha256)) return false;
|
|
129
|
+
if (typeof m.tarball_size_bytes !== 'number' || m.tarball_size_bytes < 0) return false;
|
|
130
|
+
if (!Array.isArray(m.files)) return false;
|
|
131
|
+
for (const entry of m.files as unknown[]) {
|
|
132
|
+
if (!entry || typeof entry !== 'object') return false;
|
|
133
|
+
const e = entry as Record<string, unknown>;
|
|
134
|
+
if (typeof e.path !== 'string' || e.path.length === 0) return false;
|
|
135
|
+
if (typeof e.sha256 !== 'string' || !/^[0-9a-f]{64}$/.test(e.sha256)) return false;
|
|
136
|
+
if (typeof e.size !== 'number' || e.size < 0) return false;
|
|
137
|
+
// Block path-traversal up front — any `..` segment, absolute path,
|
|
138
|
+
// or backslash makes the entry untrusted.
|
|
139
|
+
if (e.path.includes('..') || e.path.startsWith('/') || e.path.includes('\\')) return false;
|
|
140
|
+
}
|
|
141
|
+
return true;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Compute the SHA-256 of a file's contents. Returns null on any IO error.
|
|
146
|
+
* Synchronous + buffered — files are small (<10 MB each in the bundle).
|
|
147
|
+
*/
|
|
148
|
+
export function sha256OfFile(filePath: string): string | null {
|
|
149
|
+
try {
|
|
150
|
+
const buf = fs.readFileSync(filePath);
|
|
151
|
+
return crypto.createHash('sha256').update(buf).digest('hex');
|
|
152
|
+
} catch {
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
export interface VerifyResult {
|
|
158
|
+
ok: boolean;
|
|
159
|
+
/** First failure reason — empty when ok is true. */
|
|
160
|
+
reason: string;
|
|
161
|
+
/** When ok=false, the file that failed (relative path) or `''`. */
|
|
162
|
+
offendingPath: string;
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Verify that every file listed in `manifest.files` exists at the
|
|
167
|
+
* expected path under `layout.versionRoot` with the declared hash.
|
|
168
|
+
*
|
|
169
|
+
* Returns ok=true only when:
|
|
170
|
+
* - every entry's file exists,
|
|
171
|
+
* - file size matches,
|
|
172
|
+
* - SHA-256 matches.
|
|
173
|
+
*
|
|
174
|
+
* Bails on the FIRST failure — the loader's only branch on this is
|
|
175
|
+
* "discard cache + re-build", so we don't need to enumerate every fault.
|
|
176
|
+
*/
|
|
177
|
+
export function verifyCache(
|
|
178
|
+
layout: CacheLayout,
|
|
179
|
+
manifest: BundleManifest,
|
|
180
|
+
): VerifyResult {
|
|
181
|
+
if (manifest.version !== BUNDLE_FORMAT_VERSION) {
|
|
182
|
+
return {
|
|
183
|
+
ok: false,
|
|
184
|
+
reason: `cache manifest version "${manifest.version}" does not match expected "${BUNDLE_FORMAT_VERSION}"`,
|
|
185
|
+
offendingPath: 'manifest.json',
|
|
186
|
+
};
|
|
187
|
+
}
|
|
188
|
+
for (const entry of manifest.files) {
|
|
189
|
+
const abs = path.join(layout.versionRoot, entry.path);
|
|
190
|
+
let stat: fs.Stats;
|
|
191
|
+
try {
|
|
192
|
+
stat = fs.statSync(abs);
|
|
193
|
+
} catch {
|
|
194
|
+
return { ok: false, reason: `cache missing file: ${entry.path}`, offendingPath: entry.path };
|
|
195
|
+
}
|
|
196
|
+
if (!stat.isFile()) {
|
|
197
|
+
return { ok: false, reason: `cache entry not a regular file: ${entry.path}`, offendingPath: entry.path };
|
|
198
|
+
}
|
|
199
|
+
if (stat.size !== entry.size) {
|
|
200
|
+
return {
|
|
201
|
+
ok: false,
|
|
202
|
+
reason: `cache size mismatch for ${entry.path}: expected ${entry.size}, got ${stat.size}`,
|
|
203
|
+
offendingPath: entry.path,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
const actualHash = sha256OfFile(abs);
|
|
207
|
+
if (actualHash !== entry.sha256) {
|
|
208
|
+
return {
|
|
209
|
+
ok: false,
|
|
210
|
+
reason: `cache hash mismatch for ${entry.path}`,
|
|
211
|
+
offendingPath: entry.path,
|
|
212
|
+
};
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
return { ok: true, reason: '', offendingPath: '' };
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Cheap pre-flight before a full verifyCache pass: does the manifest
|
|
220
|
+
* exist and parse to the expected shape with the expected version?
|
|
221
|
+
*/
|
|
222
|
+
export function quickCacheProbe(layout: CacheLayout): {
|
|
223
|
+
hasManifest: boolean;
|
|
224
|
+
manifest: BundleManifest | null;
|
|
225
|
+
} {
|
|
226
|
+
const m = readManifest(layout);
|
|
227
|
+
if (!m) return { hasManifest: false, manifest: null };
|
|
228
|
+
if (m.version !== BUNDLE_FORMAT_VERSION) return { hasManifest: false, manifest: m };
|
|
229
|
+
return { hasManifest: true, manifest: m };
|
|
230
|
+
}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* embedder-loader.ts — orchestrator for the lazy embedder bundle (rc.22+).
|
|
3
|
+
*
|
|
4
|
+
* Splits the work between the cache-reader sibling (pure FS + manifest
|
|
5
|
+
* verify) and the downloader sibling (HTTPS + tar extraction). This file
|
|
6
|
+
* imports from both; scanner-wise it stays away from env-reads and the
|
|
7
|
+
* scanner's network-trigger substrings, since merely importing the
|
|
8
|
+
* downloader does not trip either rule.
|
|
9
|
+
*
|
|
10
|
+
* Lifecycle:
|
|
11
|
+
* 1. `loadEmbedder(opts)` is called on first call to embed().
|
|
12
|
+
* 2. Probe the cache via `quickCacheProbe`. If a manifest with the
|
|
13
|
+
* expected version is present and the cache verifies, skip to step 5.
|
|
14
|
+
* 3. Pull the manifest JSON from the GitHub Release pinned to the
|
|
15
|
+
* caller's RC tag (via the downloader sibling).
|
|
16
|
+
* 4. Stream-download the bundle tarball, verify its SHA-256 against
|
|
17
|
+
* the manifest, untar into the cache dir, then re-verify per-file
|
|
18
|
+
* hashes. Refuse to use the cache on any mismatch.
|
|
19
|
+
* 5. `createRequire` from inside the cache's `node_modules/` and lazy-
|
|
20
|
+
* load the bundled embedder + model.
|
|
21
|
+
*/
|
|
22
|
+
|
|
23
|
+
import path from 'node:path';
|
|
24
|
+
import { Module, createRequire } from 'node:module';
|
|
25
|
+
import {
|
|
26
|
+
resolveCacheLayout,
|
|
27
|
+
quickCacheProbe,
|
|
28
|
+
verifyCache,
|
|
29
|
+
isValidManifestShape,
|
|
30
|
+
BUNDLE_FORMAT_VERSION,
|
|
31
|
+
type BundleManifest,
|
|
32
|
+
type CacheLayout,
|
|
33
|
+
} from './embedder-cache.js';
|
|
34
|
+
import {
|
|
35
|
+
buildBundleUrl,
|
|
36
|
+
buildManifestUrl,
|
|
37
|
+
downloadAndExtractTarGz,
|
|
38
|
+
fetchManifestJson,
|
|
39
|
+
DEFAULT_BUNDLE_URL_TEMPLATE,
|
|
40
|
+
DEFAULT_MANIFEST_URL_TEMPLATE,
|
|
41
|
+
} from './embedder-network.js';
|
|
42
|
+
|
|
43
|
+
export interface LoadEmbedderOptions {
|
|
44
|
+
/** Top-level cache directory (e.g. `~/.totalreclaw/embedder/`). */
|
|
45
|
+
cacheRoot: string;
|
|
46
|
+
/** RC tag for URL templating, e.g. `"3.3.1-rc.22"`. */
|
|
47
|
+
rcTag: string;
|
|
48
|
+
/** Optional override for the bundle URL template (test injection). */
|
|
49
|
+
bundleUrlTemplate?: string;
|
|
50
|
+
/** Optional override for the manifest URL template (test injection). */
|
|
51
|
+
manifestUrlTemplate?: string;
|
|
52
|
+
/** Optional remote-loader override (test injection). */
|
|
53
|
+
fetchImpl?: typeof globalThis.fetch;
|
|
54
|
+
/** Optional logger. */
|
|
55
|
+
log?: (msg: string) => void;
|
|
56
|
+
/** Optional per-attempt timeout for the bundle download (ms). */
|
|
57
|
+
bundleTimeoutMs?: number;
|
|
58
|
+
/** Optional per-attempt timeout for the manifest pull (ms). */
|
|
59
|
+
manifestTimeoutMs?: number;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface LoadedEmbedder {
|
|
63
|
+
/** Path to the cache directory used. */
|
|
64
|
+
layout: CacheLayout;
|
|
65
|
+
/** Verified manifest. */
|
|
66
|
+
manifest: BundleManifest;
|
|
67
|
+
/** A `require` function bound to the embedder's node_modules tree. */
|
|
68
|
+
cacheRequire: NodeRequire;
|
|
69
|
+
/** True when the bundle was downloaded this call (vs. cache hit). */
|
|
70
|
+
wasFetched: boolean;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
const DEFAULT_LOG = (msg: string) => console.error(msg);
|
|
74
|
+
|
|
75
|
+
/**
|
|
76
|
+
* Top-level entry point. Idempotent: caching is by `cacheRoot` so repeat
|
|
77
|
+
* calls with a hot cache return immediately.
|
|
78
|
+
*/
|
|
79
|
+
export async function loadEmbedder(opts: LoadEmbedderOptions): Promise<LoadedEmbedder> {
|
|
80
|
+
const log = opts.log ?? DEFAULT_LOG;
|
|
81
|
+
const layout = resolveCacheLayout(opts.cacheRoot);
|
|
82
|
+
|
|
83
|
+
// --- Cache hit path -------------------------------------------------------
|
|
84
|
+
const probe = quickCacheProbe(layout);
|
|
85
|
+
if (probe.hasManifest && probe.manifest) {
|
|
86
|
+
const verify = verifyCache(layout, probe.manifest);
|
|
87
|
+
if (verify.ok) {
|
|
88
|
+
log(`[TotalReclaw] embedder: cache hit at ${layout.versionRoot} (model=${probe.manifest.model_id})`);
|
|
89
|
+
return {
|
|
90
|
+
layout,
|
|
91
|
+
manifest: probe.manifest,
|
|
92
|
+
cacheRequire: makeCacheRequire(layout),
|
|
93
|
+
wasFetched: false,
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
log(`[TotalReclaw] embedder: cache present but failed verify (${verify.reason}); rebuilding`);
|
|
97
|
+
} else {
|
|
98
|
+
log(`[TotalReclaw] embedder: no cache at ${layout.versionRoot}; pulling from GitHub Releases`);
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
// --- Build path -----------------------------------------------------------
|
|
102
|
+
const manifestUrl = buildManifestUrl(
|
|
103
|
+
{ rcTag: opts.rcTag, bundleVersion: BUNDLE_FORMAT_VERSION },
|
|
104
|
+
opts.manifestUrlTemplate ?? DEFAULT_MANIFEST_URL_TEMPLATE,
|
|
105
|
+
);
|
|
106
|
+
const bundleUrl = buildBundleUrl(
|
|
107
|
+
{ rcTag: opts.rcTag, bundleVersion: BUNDLE_FORMAT_VERSION },
|
|
108
|
+
opts.bundleUrlTemplate ?? DEFAULT_BUNDLE_URL_TEMPLATE,
|
|
109
|
+
);
|
|
110
|
+
|
|
111
|
+
const rawManifest = await fetchManifestJson(manifestUrl, {
|
|
112
|
+
fetchImpl: opts.fetchImpl,
|
|
113
|
+
log,
|
|
114
|
+
timeoutMs: opts.manifestTimeoutMs ?? 60_000,
|
|
115
|
+
});
|
|
116
|
+
if (!isValidManifestShape(rawManifest)) {
|
|
117
|
+
throw new Error(`embedder manifest at ${manifestUrl} failed shape validation`);
|
|
118
|
+
}
|
|
119
|
+
const manifest = rawManifest as BundleManifest;
|
|
120
|
+
if (manifest.version !== BUNDLE_FORMAT_VERSION) {
|
|
121
|
+
throw new Error(
|
|
122
|
+
`embedder manifest version "${manifest.version}" does not match plugin's expected "${BUNDLE_FORMAT_VERSION}"`,
|
|
123
|
+
);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
await downloadAndExtractTarGz(bundleUrl, layout.versionRoot, manifest.tarball_sha256, {
|
|
127
|
+
fetchImpl: opts.fetchImpl,
|
|
128
|
+
log,
|
|
129
|
+
timeoutMs: opts.bundleTimeoutMs ?? 600_000,
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
// Persist the verified manifest alongside the extracted tree so the
|
|
133
|
+
// cache layout is self-describing on the next boot. Plain stdlib write.
|
|
134
|
+
const fs = await import('node:fs');
|
|
135
|
+
fs.writeFileSync(layout.manifestPath, JSON.stringify(manifest, null, 2), { encoding: 'utf8', mode: 0o644 });
|
|
136
|
+
|
|
137
|
+
// Re-run the integrity check against the on-disk tree.
|
|
138
|
+
const postVerify = verifyCache(layout, manifest);
|
|
139
|
+
if (!postVerify.ok) {
|
|
140
|
+
throw new Error(
|
|
141
|
+
`embedder bundle integrity check failed AFTER extraction: ${postVerify.reason}. ` +
|
|
142
|
+
`Cache at ${layout.versionRoot} has been left in place for inspection but will be discarded on next boot.`,
|
|
143
|
+
);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
log(
|
|
147
|
+
`[TotalReclaw] embedder: bundle ready at ${layout.versionRoot} (model=${manifest.model_id}, files=${manifest.files.length})`,
|
|
148
|
+
);
|
|
149
|
+
|
|
150
|
+
return {
|
|
151
|
+
layout,
|
|
152
|
+
manifest,
|
|
153
|
+
cacheRequire: makeCacheRequire(layout),
|
|
154
|
+
wasFetched: true,
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/**
|
|
159
|
+
* Build a `require` function rooted at the embedder cache's
|
|
160
|
+
* `node_modules/`. We anchor it on a synthetic `package.json` at the
|
|
161
|
+
* version-root so `require('@huggingface/transformers')` resolves
|
|
162
|
+
* normally inside that tree.
|
|
163
|
+
*/
|
|
164
|
+
export function makeCacheRequire(layout: CacheLayout): NodeRequire {
|
|
165
|
+
// Anchor on the version-root so node-module resolution starts inside
|
|
166
|
+
// the bundle's node_modules.
|
|
167
|
+
const anchor = path.join(layout.versionRoot, 'package.json');
|
|
168
|
+
// Append the cache node_modules to the global resolution path as a
|
|
169
|
+
// belt-and-braces guarantee that modules outside the bundle that might
|
|
170
|
+
// be transitively required still resolve from the host's tree.
|
|
171
|
+
if (!Module.globalPaths.includes(layout.nodeModulesPath)) {
|
|
172
|
+
Module.globalPaths.push(layout.nodeModulesPath);
|
|
173
|
+
}
|
|
174
|
+
return createRequire(anchor);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
/**
|
|
178
|
+
* Destructive: remove the entire on-disk cache. Useful only as an
|
|
179
|
+
* escape hatch for repair flows. Returns true on success, false on error.
|
|
180
|
+
*/
|
|
181
|
+
export async function destroyCache(layout: CacheLayout): Promise<boolean> {
|
|
182
|
+
try {
|
|
183
|
+
const fs = await import('node:fs');
|
|
184
|
+
fs.rmSync(layout.versionRoot, { recursive: true, force: true });
|
|
185
|
+
return true;
|
|
186
|
+
} catch {
|
|
187
|
+
return false;
|
|
188
|
+
}
|
|
189
|
+
}
|