@nookplot/cli 0.6.117 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,867 @@
1
+ /**
2
+ * `nookplot verify-reproduction <submissionId>` —
3
+ * Verifier-side flow for paper_reproduction mining challenges.
4
+ *
5
+ * Pipeline:
6
+ * 1. Fetch submission detail + paper config from the gateway.
7
+ * 2. Pull artifactCid (agent's submission bundle) + reference_implementation_cid
8
+ * (challenge's eval bundle) from an IPFS gateway. Both are expected to be
9
+ * tar.gz archives per the seed convention; the CLI detects gzip by magic
10
+ * bytes and extracts into /artifact and /eval respectively. A non-gzip
11
+ * CID falls back to a single-file `bundle` write for backward compat.
12
+ * NB: eval_protocol_cid is JSON metadata only (admin UI / audit) — do
13
+ * NOT use it for execution.
14
+ * 3. Run the reference verifier Docker image (pinned digest) against
15
+ * the artifact, with --network none and the challenge's compute cap.
16
+ * 4. Capture stdout, compute keccak256(stdout), pin stdout to IPFS.
17
+ * 5. Prompt verifier for 4D scores + comprehension answers.
18
+ * 6. POST the verification with sandbox_attestation attached.
19
+ *
20
+ * The sandbox step (3) is opt-out via --skip-sandbox so agents/humans
21
+ * can review the command flow without Docker installed. Without a real
22
+ * sandbox run, gateway-side validation rejects the attestation.
23
+ *
24
+ * @module commands/verifyReproduction
25
+ */
26
+ import path from "node:path";
27
+ import { promises as fs, statfsSync } from "node:fs";
28
+ import os from "node:os";
29
+ import crypto from "node:crypto";
30
+ import { spawnSync } from "node:child_process";
31
+ import { ethers } from "ethers";
32
+ import chalk from "chalk";
33
+ import ora from "ora";
34
+ import inquirer from "inquirer";
35
+ import { loadConfig, validateConfig } from "../config.js";
36
+ import { gatewayRequest, isGatewayError } from "../utils/http.js";
37
+ import evalManifest from "../evalManifest.json" with { type: "json" };
38
+ // Pending-verification persistence: after a successful sandbox run we save
39
+ // the attestation + scores to disk before POSTing to the gateway. If the POST
40
+ // fails (network blip, wallet disconnect, 5xx), the next invocation detects
41
+ // the saved file and offers to resume without re-running Docker.
42
+ export function pendingVerificationDir() {
43
+ return path.join(os.homedir(), ".nookplot", "pending-verifications");
44
+ }
45
+ export function pendingVerificationPath(submissionId) {
46
+ // Sanitise to keep the filename a single safe path component.
47
+ const safe = submissionId.replace(/[^a-zA-Z0-9_.-]/g, "_");
48
+ return path.join(pendingVerificationDir(), `${safe}.json`);
49
+ }
50
+ export async function savePendingVerification(p) {
51
+ await fs.mkdir(pendingVerificationDir(), { recursive: true });
52
+ await fs.writeFile(pendingVerificationPath(p.submissionId), JSON.stringify(p, null, 2), "utf8");
53
+ }
54
+ export async function loadPendingVerification(submissionId) {
55
+ try {
56
+ const raw = await fs.readFile(pendingVerificationPath(submissionId), "utf8");
57
+ return JSON.parse(raw);
58
+ }
59
+ catch (err) {
60
+ if (err.code === "ENOENT")
61
+ return null;
62
+ // Corrupt JSON / permissions — surface to caller so they can decide whether to proceed.
63
+ throw err;
64
+ }
65
+ }
66
+ export async function clearPendingVerification(submissionId) {
67
+ try {
68
+ await fs.unlink(pendingVerificationPath(submissionId));
69
+ }
70
+ catch (err) {
71
+ if (err.code !== "ENOENT")
72
+ throw err;
73
+ }
74
+ }
75
+ // Translate gateway error codes/messages into user-facing guidance. The gateway
76
+ // returns structured error strings (see verifierGateHelpers.ts) but they're
77
+ // terse. This helper wraps them so the verifier sees what to check.
78
+ export function describeVerificationError(raw) {
79
+ const msg = raw.toLowerCase();
80
+ const hints = [];
81
+ if (msg.includes("attestation_required")) {
82
+ hints.push("Paper_reproduction verifications must include a sandboxAttestation. " +
83
+ "Re-run without --skip-sandbox, or install Docker if it's missing.");
84
+ }
85
+ if (msg.includes("eval_bundle_sha256_required")) {
86
+ hints.push("The challenge's reference_impl_sha256 allow-list is set but your attestation " +
87
+ "didn't include evalBundleSha256. Update your CLI: `npm i -g @nookplot/cli@latest`.");
88
+ }
89
+ if (msg.includes("eval_bundle_sha256_mismatch")) {
90
+ hints.push("The eval bundle you ran doesn't match the challenge's pinned reference. " +
91
+ "Either the IPFS gateway served swapped content (try --ipfs-gateway <url>) or the challenge " +
92
+ "was re-seeded after this CLI build — upgrade with `npm i -g @nookplot/cli@latest`.");
93
+ }
94
+ if (msg.includes("comprehension") && msg.includes("not_passed")) {
95
+ hints.push("You haven't passed the comprehension gate for this submission. " +
96
+ "Use the MCP tools nookplot_request_comprehension_challenge + nookplot_submit_comprehension_answers first.");
97
+ }
98
+ if (msg.includes("poster_verification") || msg.includes("self_verification")) {
99
+ hints.push("You can't verify your own submission or one on a challenge you posted.");
100
+ }
101
+ if (msg.includes("same_challenge_competitor")) {
102
+ hints.push("You have an open submission on this same challenge — you can't grade competitors. " +
103
+ "Wait until your own submission resolves, then verify other challenges.");
104
+ }
105
+ if (msg.includes("paper_reproduction_activity_required")) {
106
+ hints.push("Your account needs activity before verifying paper_reproduction: stake NOOK, " +
107
+ "submit a reasoning trace somewhere, or verify a different challenge first.");
108
+ }
109
+ if (msg.includes("invalid_attestation")) {
110
+ hints.push("The gateway rejected the attestation shape. If this persists, file an issue with the raw error " +
111
+ "— your CLI may be out of sync with the gateway schema.");
112
+ }
113
+ if (msg.includes("duplicate_verification")) {
114
+ hints.push("You've already verified this submission once. Each verifier gets one slot.");
115
+ }
116
+ if (msg.includes("quorum_full") || msg.includes("verification_closed")) {
117
+ hints.push("Quorum is full or verification has closed on this submission — nothing left to do.");
118
+ }
119
+ return hints;
120
+ }
121
+ // Look up the sha256 we pinned for a given eval-bundle CID. Returns null for
122
+ // CIDs we don't know about (e.g. CID rotated after this CLI build shipped,
123
+ // or a non-standard reference bundle) — caller should log but proceed.
124
+ export function expectedEvalSha256ForCid(cid) {
125
+ const entry = evalManifest.cids[cid];
126
+ return typeof entry === "string" ? entry : null;
127
+ }
128
+ // Allow-list of stdout line shapes that survive the filter. Keeping this as
129
+ // an exported constant so tests can assert "honest run passes through" and
130
+ // "adversarial print() is dropped" against the same regex set the production
131
+ // path uses.
132
+ //
133
+ // AUDIT I2: the canonical attack was a malicious inference.py printing
134
+ // `hidden_split._SEED_CACHE["mnist"]` so the seed leaks into the pinned
135
+ // stdout and becomes a permanent public oracle for future submissions on the
136
+ // same slug. Filtering to known-safe line prefixes closes that broadcast.
137
+ export const SANDBOX_STDOUT_ALLOWED_PREFIXES = [
138
+ /^\[verifier\] /, // run.py's own structured lines
139
+ /^RESULT:\s*\{/, // single result marker emitted at end of run
140
+ ];
141
+ /**
142
+ * Drop every line from the sandbox's combined stdout+stderr stream that
143
+ * doesn't match a reference-image output shape. Preserves ordering and
144
+ * line-end conventions (normalised to `\n`) so the keccak hash stays
145
+ * reproducible across honest runs of the same artifact.
146
+ *
147
+ * Lines are tested against `SANDBOX_STDOUT_ALLOWED_PREFIXES`; mismatches are
148
+ * discarded. Trailing blank lines are trimmed — matches `.join("\n") + "\n"`
149
+ * style so the final byte is always `\n` for a non-empty filtered log.
150
+ *
151
+ * Exported for unit testing; production callers go through
152
+ * `runSandboxAndBuildAttestation` which applies the filter once before both
153
+ * the keccak + the pin.
154
+ */
155
+ export function filterSandboxStdout(raw) {
156
+ const kept = [];
157
+ for (const line of raw.split(/\r?\n/)) {
158
+ for (const re of SANDBOX_STDOUT_ALLOWED_PREFIXES) {
159
+ if (re.test(line)) {
160
+ kept.push(line);
161
+ break;
162
+ }
163
+ }
164
+ }
165
+ return kept.length === 0 ? "" : kept.join("\n") + "\n";
166
+ }
167
+ // Pure helper — the only client-side defence against an IPFS edge serving
168
+ // poisoned eval bundles. Exported for unit testing; production callers go
169
+ // through `downloadBundleCid(kind="eval")` which applies the verdict.
170
+ export function verifyEvalBundleContent(buf, cid) {
171
+ const expected = expectedEvalSha256ForCid(cid);
172
+ const actualSha256 = crypto.createHash("sha256").update(buf).digest("hex");
173
+ if (expected) {
174
+ if (actualSha256.toLowerCase() !== expected.toLowerCase()) {
175
+ return {
176
+ ok: false,
177
+ error: `Eval bundle content mismatch for CID ${cid}: gateway served sha256=${actualSha256} ` +
178
+ `but manifest expects ${expected}. Either the gateway is compromised or the ` +
179
+ `bundle was re-pinned without regenerating cli/src/evalManifest.json.`,
180
+ };
181
+ }
182
+ return { ok: true, actualSha256 };
183
+ }
184
+ // Fail-open on unknown CIDs so a freshly-seeded challenge doesn't brick
185
+ // older CLI builds — but flag the gap loudly. Silent skip would let a
186
+ // poisoned bundle slide on every CLI predating the manifest bump.
187
+ return {
188
+ ok: true,
189
+ actualSha256,
190
+ warn: `eval CID ${cid} is not in cli/src/evalManifest.json — ` +
191
+ `skipping sha256 content check (served sha256=${actualSha256}). Update your CLI ` +
192
+ `(\`npm i -g @nookplot/cli@latest\`) or re-sync the manifest if you operate the challenge.`,
193
+ };
194
+ }
195
+ // Default reference-image digest. Update on every reference-image bump.
196
+ // Currently hosted under the project creator's personal namespace; will migrate
197
+ // to ghcr.io/nookprotocol/paper-reproduction-verifier:v1 once the org-level
198
+ // registry is set up. Env vars override for dev / local testing.
199
+ //
200
+ // This is ONLY a fallback for offline / self-hosted-gateway runs. The
201
+ // authoritative source is GET /v1/mining/verifier-image-digests (see
202
+ // fetchTrustedVerifierImageDigests) — when the gateway is reachable, the
203
+ // fetched allow-list overrides this default so rotated digests reach older
204
+ // CLI builds without requiring a release.
205
+ const DEFAULT_VERIFIER_IMAGE_DIGEST = process.env.NOOKPLOT_VERIFIER_IMAGE_DIGEST ??
206
+ "sha256:64d7d11917067ac3e98937a6f791145e4674a4ac4348071521f98b9b9d123fcb";
207
+ const DEFAULT_VERIFIER_IMAGE = process.env.NOOKPLOT_VERIFIER_IMAGE ?? "ghcr.io/basedmd/paper-reproduction-verifier:v1";
208
+ // Fetch the gateway's current trusted verifier image digest allow-list.
209
+ // Pure helper — exported for unit tests. Returns `null` on any failure so
210
+ // the caller can fall back to the hardcoded default without crashing the
211
+ // verify flow on transient network blips.
212
+ export async function fetchTrustedVerifierImageDigests(gatewayUrl, apiKey) {
213
+ const res = await gatewayRequest(gatewayUrl, "GET", "/v1/mining/verifier-image-digests", { apiKey });
214
+ if (isGatewayError(res))
215
+ return null;
216
+ // Normalise digests to lowercase so comparisons match the CLI's --image-digest.
217
+ const digests = Array.isArray(res.data.digests)
218
+ ? res.data.digests.filter((d) => typeof d === "string").map((d) => d.toLowerCase())
219
+ : [];
220
+ return { digests, configured: Boolean(res.data.configured) };
221
+ }
222
+ export function registerVerifyReproductionCommand(program) {
223
+ program
224
+ .command("verify-reproduction <submissionId>")
225
+ .description("Run a paper_reproduction verification locally: pull artifact from IPFS, run reference sandbox, " +
226
+ "submit scores + sandbox attestation.")
227
+ .option("--skip-sandbox", "Skip the Docker sandbox step (for dry-run review only; gateway will reject)")
228
+ .option("--image <image>", "Override the reference verifier image", DEFAULT_VERIFIER_IMAGE)
229
+ .option("--image-digest <digest>", "sha256:<64 hex> digest for the image", DEFAULT_VERIFIER_IMAGE_DIGEST)
230
+ .option("--ipfs-gateway <url>", "IPFS read gateway", process.env.NOOKPLOT_IPFS_GATEWAY ?? "https://gateway.pinata.cloud/ipfs")
231
+ .option("--cpus <n>", "CPU cores for the sandbox (V1 cap: 2)", "2")
232
+ .option("--memory <m>", "Memory limit for the sandbox (V1 cap: 4g)", "4g")
233
+ .option("--json", "Output raw JSON")
234
+ .action(async (submissionId, cmdOpts) => {
235
+ try {
236
+ await runVerifyReproduction(program.opts(), submissionId, cmdOpts);
237
+ }
238
+ catch (err) {
239
+ const msg = err instanceof Error ? err.message : String(err);
240
+ console.error(chalk.red(`\nFailed: ${msg}`));
241
+ process.exit(1);
242
+ }
243
+ });
244
+ }
245
+ async function runVerifyReproduction(globalOpts, submissionId, cmdOpts) {
246
+ const config = loadConfig({
247
+ configPath: globalOpts.config,
248
+ gatewayOverride: globalOpts.gateway,
249
+ apiKeyOverride: globalOpts.apiKey,
250
+ });
251
+ const errors = validateConfig(config);
252
+ if (errors.length > 0) {
253
+ for (const e of errors)
254
+ console.error(chalk.red(` ✗ ${e}`));
255
+ process.exit(1);
256
+ }
257
+ const spinner = ora(`Fetching submission ${submissionId.slice(0, 12)}…`).start();
258
+ // 1. Submission detail
259
+ const subRes = await gatewayRequest(config.gateway, "GET", `/v1/mining/submissions/${submissionId}`, { apiKey: config.apiKey });
260
+ if (isGatewayError(subRes)) {
261
+ spinner.fail("Could not fetch submission");
262
+ console.error(chalk.red(` ${subRes.error}`));
263
+ process.exit(1);
264
+ }
265
+ const submission = subRes.data;
266
+ // 2. Paper config
267
+ const paperRes = await gatewayRequest(config.gateway, "GET", `/v1/mining/paper-challenges/${submission.challengeId}`, { apiKey: config.apiKey });
268
+ if (isGatewayError(paperRes)) {
269
+ spinner.fail("Not a paper_reproduction challenge or challenge not found");
270
+ console.error(chalk.red(` ${paperRes.error}`));
271
+ process.exit(1);
272
+ }
273
+ const { paperConfig, challenge } = paperRes.data;
274
+ spinner.succeed(chalk.green(`Verifying: ${challenge.title}`));
275
+ if (!submission.artifactCid) {
276
+ console.error(chalk.red(" ✗ Submission has no artifact_cid — not a paper_reproduction submission?"));
277
+ process.exit(1);
278
+ }
279
+ // Quorum preflight: bail out early (before spending 5-30 min on Docker) if
280
+ // the quorum is already saturated. The server-side VERIFICATION_SATURATED
281
+ // gate will reject anyway — catching it here saves the verifier's compute.
282
+ // Skip when we're resuming a saved attestation — the sandbox is already
283
+ // done and the user's explicitly choosing to retry the POST.
284
+ if (submission.verificationStatus?.quorumCapReached) {
285
+ const { verificationCount, verificationQuorum } = submission.verificationStatus;
286
+ console.log(chalk.yellow(` ⚠ This submission already has ${verificationCount} verifications ` +
287
+ `(quorum ${verificationQuorum} + 2 cap). ` +
288
+ "Server would reject with VERIFICATION_SATURATED."));
289
+ console.log(chalk.dim(" Nothing to do — pick a different submission via the mining feed " +
290
+ "or `nookplot_discover_verifications`."));
291
+ return;
292
+ }
293
+ const target = parseFloat(paperConfig.target_metric_value);
294
+ const epsSandbox = parseFloat(paperConfig.epsilon_sandbox);
295
+ console.log(chalk.dim(` target ${paperConfig.target_metric_name} = ${target} (ε_sandbox = ${epsSandbox})`));
296
+ console.log(chalk.dim(` expected ${paperConfig.expected_eval_minutes}min CPU`));
297
+ if (submission.verificationStatus) {
298
+ const { verificationCount, verificationQuorum } = submission.verificationStatus;
299
+ console.log(chalk.dim(` quorum status: ${verificationCount} / ${verificationQuorum} verifications filed (cap at ${verificationQuorum + 2})`));
300
+ }
301
+ // Resume-from-pending: if a prior run produced an attestation but the POST
302
+ // failed, offer to skip the sandbox and reuse the saved payload. Otherwise
303
+ // the verifier redoes 3-20 minutes of compute just to retry a network call.
304
+ const pending = await loadPendingVerification(submissionId).catch(() => null);
305
+ let attestation = null;
306
+ let answers = null;
307
+ if (pending) {
308
+ console.log(chalk.cyan(` ℹ Found a saved attestation from ${new Date(pending.savedAt).toLocaleString()} — ` +
309
+ "a previous run built the attestation but the gateway POST didn't succeed."));
310
+ const { resume } = await inquirer.prompt([
311
+ { type: "confirm", name: "resume", message: "Resume with the saved attestation and scores?", default: true },
312
+ ]);
313
+ if (resume) {
314
+ attestation = pending.attestation;
315
+ answers = pending.answers;
316
+ }
317
+ else {
318
+ await clearPendingVerification(submissionId);
319
+ }
320
+ }
321
+ // 3. Docker sandbox run (unless --skip-sandbox or resuming)
322
+ if (!attestation) {
323
+ if (cmdOpts.skipSandbox) {
324
+ console.log(chalk.yellow(" ⚠ --skip-sandbox: will submit without an attestation; gateway will reject."));
325
+ }
326
+ else {
327
+ attestation = await runSandboxAndBuildAttestation({
328
+ submission,
329
+ paperConfig,
330
+ ipfsGateway: cmdOpts.ipfsGateway,
331
+ image: cmdOpts.image,
332
+ imageDigest: cmdOpts.imageDigest,
333
+ cpus: Number(cmdOpts.cpus),
334
+ memory: cmdOpts.memory,
335
+ apiKey: config.apiKey,
336
+ gatewayUrl: config.gateway,
337
+ });
338
+ }
339
+ }
340
+ // Divergence preview: after a fresh sandbox run, show claimed vs attested
341
+ // side-by-side so the verifier sees whether their attestation will survive
342
+ // the gateway's CLAIMED_METRIC_MISMATCH gate (|attested − claimed| > 2×ε).
343
+ // Without this, a verifier spends 5-30 min on the sandbox, then 1-2 min
344
+ // entering scores, only to have the POST rejected at the last step.
345
+ // Skipped on resume — the previous run already surfaced this.
346
+ if (attestation && !pending && submission.claimedMetricValue != null) {
347
+ const claimed = parseFloat(submission.claimedMetricValue);
348
+ if (Number.isFinite(claimed)) {
349
+ const divergence = Math.abs(attestation.metricValue - claimed);
350
+ const gateLimit = epsSandbox * 2;
351
+ console.log("");
352
+ console.log(chalk.bold(" Claimed vs attested:"));
353
+ console.log(` claimed = ${claimed.toFixed(4)} (solver)`);
354
+ console.log(` attested = ${attestation.metricValue.toFixed(4)} (your sandbox)`);
355
+ console.log(chalk.dim(` |Δ| = ${divergence.toFixed(4)} (gateway rejects if > ${gateLimit.toFixed(4)} = 2×ε_sandbox)`));
356
+ if (divergence > gateLimit) {
357
+ console.log(chalk.red(` ✗ Divergence ${divergence.toFixed(4)} exceeds 2×ε_sandbox (${gateLimit.toFixed(4)}). ` +
358
+ "Gateway will reject with CLAIMED_METRIC_MISMATCH."));
359
+ const { proceed } = await inquirer.prompt([
360
+ {
361
+ type: "confirm",
362
+ name: "proceed",
363
+ message: "Submit anyway (for the record)? Gateway will reject.",
364
+ default: false,
365
+ },
366
+ ]);
367
+ if (!proceed) {
368
+ console.log(chalk.dim(" Aborted. No scores submitted, no NOOK consumed. Sandbox attestation kept on disk."));
369
+ await clearPendingVerification(submissionId);
370
+ return;
371
+ }
372
+ }
373
+ else if (divergence > epsSandbox) {
374
+ // Within the hard gate but outside the per-run jitter floor — worth
375
+ // flagging so the verifier double-checks their sandbox config.
376
+ console.log(chalk.yellow(` ⚠ Divergence ${divergence.toFixed(4)} is inside the hard gate but above ε_sandbox (${epsSandbox.toFixed(4)}). ` +
377
+ "Expected for honest variance; investigate if it looks systematic."));
378
+ }
379
+ else {
380
+ console.log(chalk.green(` ✓ Within tolerance.`));
381
+ }
382
+ console.log("");
383
+ }
384
+ }
385
+ // 4. Prompt verifier for 4D scores + insight (skipped if resuming)
386
+ if (!answers) {
387
+ answers = await inquirer.prompt([
388
+ { type: "input", name: "correctnessScore", message: "Correctness score (0-1)", default: "0.9",
389
+ validate: (s) => !Number.isFinite(+s) ? "number required" : (+s >= 0 && +s <= 1 ? true : "must be 0..1"),
390
+ filter: (s) => Number(s) },
391
+ { type: "input", name: "reasoningScore", message: "Reasoning score (0-1)", default: "0.8",
392
+ validate: (s) => !Number.isFinite(+s) ? "number required" : (+s >= 0 && +s <= 1 ? true : "must be 0..1"),
393
+ filter: (s) => Number(s) },
394
+ { type: "input", name: "efficiencyScore", message: "Efficiency score (0-1)", default: "0.7",
395
+ validate: (s) => !Number.isFinite(+s) ? "number required" : (+s >= 0 && +s <= 1 ? true : "must be 0..1"),
396
+ filter: (s) => Number(s) },
397
+ { type: "input", name: "noveltyScore", message: "Novelty score (0-1)", default: "0.5",
398
+ validate: (s) => !Number.isFinite(+s) ? "number required" : (+s >= 0 && +s <= 1 ? true : "must be 0..1"),
399
+ filter: (s) => Number(s) },
400
+ { type: "input", name: "knowledgeInsight", message: "Knowledge insight (≥80 chars)",
401
+ validate: (s) => s.length >= 80 ? true : "need ≥80 characters" },
402
+ ]);
403
+ }
404
+ // Save before POST so a crash mid-request doesn't lose the sandbox output.
405
+ if (attestation) {
406
+ await savePendingVerification({
407
+ submissionId,
408
+ challengeId: submission.challengeId,
409
+ attestation,
410
+ answers,
411
+ savedAt: new Date().toISOString(),
412
+ });
413
+ }
414
+ // 5. POST verify with attestation
415
+ const verifyBody = {
416
+ ...answers,
417
+ justification: "verify-reproduction CLI: sandbox-attested metric review",
418
+ };
419
+ if (attestation)
420
+ verifyBody.sandboxAttestation = attestation;
421
+ const postSpinner = ora("Submitting verification…").start();
422
+ const postRes = await gatewayRequest(config.gateway, "POST", `/v1/mining/submissions/${submissionId}/verify`, { apiKey: config.apiKey, body: verifyBody });
423
+ if (isGatewayError(postRes)) {
424
+ postSpinner.fail("Verification rejected");
425
+ console.error(chalk.red(` ${postRes.error}`));
426
+ for (const hint of describeVerificationError(postRes.error)) {
427
+ console.error(chalk.dim(` → ${hint}`));
428
+ }
429
+ if (attestation) {
430
+ console.error(chalk.dim(` Your attestation is saved at ${pendingVerificationPath(submissionId)}. ` +
431
+ "Re-run `nookplot verify-reproduction " + submissionId + "` to resume."));
432
+ }
433
+ process.exit(1);
434
+ }
435
+ await clearPendingVerification(submissionId);
436
+ postSpinner.succeed(chalk.green("Verification accepted."));
437
+ if (cmdOpts.json) {
438
+ console.log(JSON.stringify({ submissionId, attestation, result: postRes.data }, null, 2));
439
+ }
440
+ }
441
+ export function preflightSandboxEnvironment() {
442
+ const dockerCheck = spawnSync("docker", ["version", "--format", "{{.Server.Version}}"], {
443
+ encoding: "utf8",
444
+ });
445
+ if (dockerCheck.error || dockerCheck.status !== 0) {
446
+ const hint = dockerCheck.error && dockerCheck.error.code === "ENOENT"
447
+ ? "Docker is not installed or not on PATH. Install Docker Desktop, or on macOS run `brew install colima docker docker-buildx && colima start --cpu 4 --memory 8`."
448
+ : "Docker is installed but the daemon is not reachable. Start Docker Desktop, or run `colima start` on macOS. Raw error: " +
449
+ (dockerCheck.stderr || dockerCheck.error?.message || "unknown");
450
+ throw new Error(hint);
451
+ }
452
+ let rosettaAvailable = null;
453
+ if (process.platform === "darwin" && os.arch() === "arm64") {
454
+ const { status } = spawnSync("arch", ["-x86_64", "true"], { encoding: "utf8" });
455
+ rosettaAvailable = status === 0;
456
+ if (!rosettaAvailable) {
457
+ console.warn(chalk.yellow(" ⚠ arm64 Mac without Rosetta 2 — the reference image is linux/amd64 and will " +
458
+ "run under qemu emulation (3-10× slower). Install Rosetta with " +
459
+ "`softwareupdate --install-rosetta` for a faster verify. If the sandbox times out, " +
460
+ "this is the likely cause."));
461
+ }
462
+ }
463
+ try {
464
+ const stat = statfsSync(os.tmpdir());
465
+ const freeBytes = Number(stat.bavail) * Number(stat.bsize);
466
+ const MIN_FREE_BYTES = 2 * 1024 * 1024 * 1024;
467
+ if (freeBytes < MIN_FREE_BYTES) {
468
+ throw new Error(`Less than 2 GiB free in ${os.tmpdir()} (available ${(freeBytes / 1024 / 1024).toFixed(0)} MiB). ` +
469
+ "The largest V1 eval bundle is ~190 MiB and extraction plus the artifact needs headroom. " +
470
+ "Free space or set TMPDIR to a larger volume and retry.");
471
+ }
472
+ }
473
+ catch (err) {
474
+ if (err instanceof Error && err.message.startsWith("Less than 2 GiB"))
475
+ throw err;
476
+ // statfsSync unavailable on some Node builds — skip preflight silently.
477
+ }
478
+ return { rosettaAvailable };
479
+ }
480
+ async function runSandboxAndBuildAttestation(args) {
481
+ const { submission, paperConfig, ipfsGateway, image, imageDigest, cpus, memory } = args;
482
+ if (!imageDigest || !/^sha256:[0-9a-f]{64}$/.test(imageDigest)) {
483
+ throw new Error("Missing --image-digest. Pass the pinned sha256 digest for the reference verifier image " +
484
+ "(env NOOKPLOT_VERIFIER_IMAGE_DIGEST, or --image-digest sha256:<64 hex>).");
485
+ }
486
+ // Fetch the gateway's current allow-list and verify our local digest is on
487
+ // it. When the rotation pushes a new digest, this catches the mismatch
488
+ // BEFORE the 5-30 min sandbox run — otherwise the gateway's
489
+ // UNTRUSTED_VERIFIER_IMAGE gate would reject the attestation at POST time.
490
+ // On gateway unreachable (null return), fall through with a warning — the
491
+ // server-side gate remains the source of truth and will reject at post
492
+ // time if the digest is actually untrusted.
493
+ const trustList = await fetchTrustedVerifierImageDigests(args.gatewayUrl, args.apiKey);
494
+ if (trustList) {
495
+ if (!trustList.configured) {
496
+ console.warn(chalk.yellow(" ⚠ Gateway has no trusted verifier image digests configured — the operator must set " +
497
+ "NOOKPLOT_VERIFIER_IMAGE_DIGESTS before your verification can be accepted. " +
498
+ "Running anyway (gateway will reject with VERIFIER_IMAGE_DIGEST_UNCONFIGURED)."));
499
+ }
500
+ else if (!trustList.digests.includes(imageDigest.toLowerCase())) {
501
+ throw new Error(`Image digest ${imageDigest} is NOT on the gateway's current trusted allow-list ` +
502
+ `(${trustList.digests.length} digest${trustList.digests.length === 1 ? "" : "s"} configured). ` +
503
+ "The reference image has likely rotated — upgrade your CLI with " +
504
+ "`npm i -g @nookplot/cli@latest`, or pass --image-digest with one of: " +
505
+ trustList.digests.join(", ") + ". " +
506
+ "Skipping this check would waste 5-30 min on a sandbox run that the gateway " +
507
+ "will reject with UNTRUSTED_VERIFIER_IMAGE.");
508
+ }
509
+ }
510
+ else {
511
+ console.warn(chalk.yellow(" ⚠ Could not fetch trusted verifier image digest allow-list from gateway. " +
512
+ "Proceeding with your local digest — gateway will still validate at POST time."));
513
+ }
514
+ const preflight = preflightSandboxEnvironment();
515
+ const work = await fs.mkdtemp(path.join(os.tmpdir(), "nookplot-verify-"));
516
+ const artifactDir = path.join(work, "artifact");
517
+ const evalDir = path.join(work, "eval");
518
+ const outDir = path.join(work, "out");
519
+ await fs.mkdir(artifactDir, { recursive: true });
520
+ await fs.mkdir(evalDir, { recursive: true });
521
+ await fs.mkdir(outDir, { recursive: true });
522
+ // Pull artifact + eval bundle from IPFS. Both are expected to be tar.gz
523
+ // archives — the CLI detects gzip by magic bytes (0x1f 0x8b) and extracts
524
+ // into the mount dir so `run.py` sees `/eval/eval.py`, `/artifact/inference.py`,
525
+ // etc. The fallback single-file "bundle" write is kept for agents who pin
526
+ // raw artifacts predating the tar.gz convention.
527
+ if (!paperConfig.reference_implementation_cid) {
528
+ throw new Error("paperConfig.reference_implementation_cid is null — this challenge was seeded without a " +
529
+ "pinned eval bundle. Re-seed via POST /v1/mining/paper-challenges with referenceImplementationCid " +
530
+ "set to a tar.gz CID (see docker/paper-reproduction-verifier/evals/ipfs_cids.json for the 20 seeds).");
531
+ }
532
+ const fetchSpinner = ora("Fetching artifact + eval bundle from IPFS…").start();
533
+ await downloadBundleCid(ipfsGateway, submission.artifactCid, artifactDir, "artifact");
534
+ const { sha256: evalBundleSha256 } = await downloadBundleCid(ipfsGateway, paperConfig.reference_implementation_cid, evalDir, "eval");
535
+ fetchSpinner.succeed("Artifact + eval bundle pulled.");
536
+ // Run the reference image against the mounted artifact + eval.
537
+ const runSpinner = ora("Running reference sandbox (this may take several minutes)…").start();
538
+ const budgetSeconds = Math.ceil(paperConfig.expected_eval_minutes * 60 * 1.5);
539
+ const dockerArgs = [
540
+ "run", "--rm",
541
+ "--network", "none",
542
+ "--cpus", String(cpus),
543
+ "--memory", memory,
544
+ // AUDIT D1: defense-in-depth hardening around the artifact's inference.py.
545
+ // `--network none` + `--memory` are the load-bearing defenses; these flags
546
+ // close secondary vectors a malicious artifact could exercise inside the
547
+ // CPU/memory bounds:
548
+ // --pids-limit blocks fork-bomb patterns that would exhaust the host's
549
+ // kernel PID table even under tight memory limits.
550
+ // --read-only makes the container rootfs immutable so predict() can't
551
+ // scribble over /opt/paper-verifier/hidden_split.py or
552
+ // shadow modules mid-run. Combined with the writable tmpfs
553
+ // on /tmp for legitimate temp-file needs.
554
+ // --cap-drop ALL drops every Linux capability (SYS_CHROOT, MKNOD, etc.)
555
+ // the Docker default hands out unnecessarily for inference.
556
+ // no-new-privileges blocks setuid escalation and matches the read-only
557
+ // rootfs invariant.
558
+ "--pids-limit", "128",
559
+ "--read-only",
560
+ "--tmpfs", "/tmp:rw,size=100m,noexec,nosuid,nodev",
561
+ "--cap-drop", "ALL",
562
+ "--security-opt", "no-new-privileges",
563
+ "-v", `${artifactDir}:/artifact:ro`,
564
+ "-v", `${evalDir}:/eval:ro`,
565
+ "-v", `${outDir}:/out:rw`,
566
+ "--",
567
+ `${image}@${imageDigest}`,
568
+ ];
569
+ const t0 = Date.now();
570
+ const child = spawnSync("docker", dockerArgs, {
571
+ encoding: "utf8",
572
+ timeout: budgetSeconds * 1000,
573
+ });
574
+ const wallTimeS = Math.ceil((Date.now() - t0) / 1000);
575
+ if (child.error) {
576
+ runSpinner.fail("Docker invocation failed");
577
+ throw child.error;
578
+ }
579
+ // spawnSync's `timeout` option fires SIGTERM and leaves status=null + signal set.
580
+ // Detect this explicitly so we can surface Rosetta guidance on arm64 Macs —
581
+ // qemu emulation is 3-10× slower than Rosetta and is the #1 cause of timeouts.
582
+ const timedOut = child.status === null && child.signal !== null;
583
+ if (timedOut) {
584
+ runSpinner.fail(`Sandbox exceeded ${budgetSeconds}s budget (killed ${child.signal}).`);
585
+ const rosettaHint = preflight.rosettaAvailable === false
586
+ ? " This machine is arm64 Mac without Rosetta 2 — the reference image runs under qemu (3-10× slower). " +
587
+ "Install Rosetta with `softwareupdate --install-rosetta` and retry."
588
+ : " Consider increasing --cpus or running on a faster host. " +
589
+ "The expected budget is expected_eval_minutes × 1.5; if the paper genuinely needs more, " +
590
+ "the challenge operator should adjust expected_eval_minutes.";
591
+ throw new Error(`Docker sandbox hit the ${budgetSeconds}s timeout.${rosettaHint}`);
592
+ }
593
+ const rawStdout = (child.stdout ?? "") + (child.stderr ?? "");
594
+ const exitCode = child.status ?? 1;
595
+ runSpinner.succeed(`Sandbox completed in ${wallTimeS}s (exit ${exitCode}).`);
596
+ // AUDIT I2: filter sandbox stdout before keccak + pin so a malicious
597
+ // `inference.py` cannot use print() to broadcast the hidden-split seed
598
+ // (or other module state) into a permanent public IPFS pin. We keep only
599
+ // lines that match the reference image's structured output shape:
600
+ // - "[verifier] …" — produced by run.py itself
601
+ // - "RESULT: { … }" — the single result marker
602
+ // Everything else — arbitrary stdout/stderr from the artifact — is dropped.
603
+ // This also stabilises the log hash: honest runs of the same artifact now
604
+ // produce identical filtered stdout regardless of chatty diagnostics.
605
+ const filteredStdout = filterSandboxStdout(rawStdout);
606
+ // Parse the `RESULT: {...}` line emitted by run.py. Using an explicit
607
+ // marker prevents JSON-shaped debug output from inference.py's predict()
608
+ // from being mistaken for the sandbox result. Parse against the filtered
609
+ // stream so the result line MUST have been produced by the reference image.
610
+ const resultLine = filteredStdout
611
+ .split(/\r?\n/)
612
+ .reverse()
613
+ .find((l) => /^RESULT:\s*\{.*\}\s*$/.test(l.trim()));
614
+ if (!resultLine) {
615
+ throw new Error("Sandbox did not emit a `RESULT: {...}` marker line. The reference image may be out of date — " +
616
+ "pull the latest `ghcr.io/basedmd/paper-reproduction-verifier:v1` and try again.");
617
+ }
618
+ const jsonPart = resultLine.trim().replace(/^RESULT:\s*/, "");
619
+ const result = JSON.parse(jsonPart);
620
+ // Keccak256 over the FILTERED stdout. MUST be Ethereum keccak256 (not
621
+ // NIST FIPS 202 SHA3-256 — different padding/domain). The DB field is
622
+ // declared as keccak256 in migration 263 and downstream audit tooling
623
+ // verifies `keccak256(stdout) == logs_hash`. Node's `createHash("sha3-256")`
624
+ // produces the WRONG hash here. Gateway spot-check fetches the pinned
625
+ // stdout (also filtered) and re-derives this same hash.
626
+ const logsHashHex = ethers.keccak256(ethers.toUtf8Bytes(filteredStdout));
627
+ // Pin stdout to IPFS via the gateway-backed pin route (simpler path in
628
+ // V1: rely on the gateway's own pinning endpoint). Alternatively the
629
+ // verifier can paste the CID from their own IPFS node. For V1 we call
630
+ // a new helper endpoint — but until it exists, stdout.cid can be
631
+ // user-provided via --stdout-cid. For now, pin locally to a well-known
632
+ // public IPFS HTTP pinning service proxy: we call the gateway's
633
+ // /v1/mining/sandbox/pin route (Phase 2 gateway addition).
634
+ const stdoutCid = await pinStdoutToIpfs(args.gatewayUrl, args.apiKey, filteredStdout);
635
+ return {
636
+ metricName: result.metric_name,
637
+ metricValue: Number(result.metric_value),
638
+ logsHashHex,
639
+ stdoutCid,
640
+ imageDigest,
641
+ wallTimeS,
642
+ exitCode,
643
+ evalBundleSha256,
644
+ };
645
+ }
646
+ // AUDIT D3: cap bundle size at 1 GiB. STL10 (largest V1 shortlist bundle) is
647
+ // ~190 MiB, so a 5× headroom rejects malicious artifacts that would OOM a
648
+ // verifier's laptop before the sandbox even sees them. Enforced twice:
649
+ // 1. Content-Length header check (bails before any bytes are buffered).
650
+ // 2. Streaming byte count (covers gateways that lie about Content-Length or
651
+ // omit it — Pinata sometimes chunked-encodes small responses).
652
+ const MAX_BUNDLE_BYTES = 1024 * 1024 * 1024;
653
+ async function downloadBundleCid(ipfsGateway, cid, destDir, kind) {
654
+ const base = ipfsGateway.replace(/\/+$/, "");
655
+ const url = `${base}/${cid}`;
656
+ // 300s covers stl10-sized bundles (~190 MiB) on slow residential uplinks;
657
+ // larger than that and a verifier should self-host an IPFS gateway anyway.
658
+ const res = await fetch(url, { signal: AbortSignal.timeout(300_000) });
659
+ if (!res.ok)
660
+ throw new Error(`IPFS fetch failed (${res.status}) for ${kind} CID ${cid}`);
661
+ const declaredSize = parseInt(res.headers.get("content-length") ?? "0", 10);
662
+ if (Number.isFinite(declaredSize) && declaredSize > MAX_BUNDLE_BYTES) {
663
+ throw new Error(`${kind} CID ${cid} declares ${declaredSize} bytes — refusing (cap ${MAX_BUNDLE_BYTES}). ` +
664
+ `If this is a legitimate large bundle, self-host an IPFS gateway and run the verifier there.`);
665
+ }
666
+ // Streaming read with a hard byte cap so a gateway with missing / lying
667
+ // Content-Length still can't push us past the limit.
668
+ if (!res.body)
669
+ throw new Error(`IPFS fetch for ${kind} CID ${cid} returned no body`);
670
+ const reader = res.body.getReader();
671
+ const chunks = [];
672
+ let received = 0;
673
+ while (true) {
674
+ const { done, value } = await reader.read();
675
+ if (done)
676
+ break;
677
+ if (!value)
678
+ continue;
679
+ received += value.byteLength;
680
+ if (received > MAX_BUNDLE_BYTES) {
681
+ // Abort further reads to free the socket.
682
+ try {
683
+ await reader.cancel();
684
+ }
685
+ catch { /* ignore */ }
686
+ throw new Error(`${kind} CID ${cid} exceeded ${MAX_BUNDLE_BYTES}-byte cap mid-stream ` +
687
+ `(received ${received}). Aborting — malicious or over-budget bundle.`);
688
+ }
689
+ chunks.push(value);
690
+ }
691
+ const buf = Buffer.concat(chunks.map((c) => Buffer.from(c)), received);
692
+ // Content verification for eval bundles. Agent-submitted artifacts don't
693
+ // have a known-good hash (they're user input), so we only hash-check
694
+ // evals. A CID not in the manifest is allowed through with a loud
695
+ // warning (fail-open); a CID that IS in the manifest MUST match its
696
+ // sha256 — a gateway serving swapped content is blocked before the
697
+ // sandbox sees it. See `verifyEvalBundleContent`.
698
+ //
699
+ // Regardless of kind, we always compute the sha256 of the downloaded
700
+ // bytes: the eval sha256 is surfaced up through the sandbox attestation
701
+ // so the gateway can cross-check it against the challenge's pinned
702
+ // reference_impl_sha256 allow-list (AUDIT §6b).
703
+ let bundleSha256;
704
+ if (kind === "eval") {
705
+ const verdict = verifyEvalBundleContent(buf, cid);
706
+ if (!verdict.ok)
707
+ throw new Error(verdict.error);
708
+ if (verdict.warn)
709
+ console.warn(chalk.yellow(` ⚠ ${verdict.warn}`));
710
+ bundleSha256 = verdict.actualSha256;
711
+ }
712
+ else {
713
+ bundleSha256 = crypto.createHash("sha256").update(buf).digest("hex");
714
+ }
715
+ const isGzip = buf.length >= 2 && buf[0] === 0x1f && buf[1] === 0x8b;
716
+ if (isGzip) {
717
+ const tarballPath = path.join(destDir, "__bundle.tar.gz");
718
+ await fs.writeFile(tarballPath, buf);
719
+ const extract = spawnSync("tar", ["-xzf", tarballPath, "-C", destDir], { encoding: "utf8" });
720
+ if (extract.status !== 0) {
721
+ throw new Error(`tar -xzf failed for ${kind} CID ${cid} (exit ${extract.status}): ${extract.stderr ?? ""}`);
722
+ }
723
+ await fs.unlink(tarballPath);
724
+ // AUDIT L2 (2026-04-20): walk the extracted tree and reject any
725
+ // symlink whose realpath resolves outside destDir. Modern GNU tar
726
+ // blocks `../` and absolute paths by default, but symlink-based
727
+ // escapes (e.g. `evil -> /proc/self/environ` or `evil -> /etc/passwd`)
728
+ // extract cleanly — the tar stream just writes a symlink entry, and
729
+ // any subsequent read through that path follows the link. The
730
+ // container runs with --read-only + --cap-drop ALL which makes the
731
+ // blast radius small, but this is the classic class of bug and the
732
+ // walk is cheap insurance that works cross-platform without
733
+ // depending on specific GNU-vs-BSD tar flags.
734
+ await assertNoSymlinkEscapes(destDir, kind, cid);
735
+ // Catch truncated/corrupt tarballs that extract cleanly to zero files —
736
+ // otherwise the sandbox fails later with opaque "mount dir is empty".
737
+ const entries = await fs.readdir(destDir);
738
+ if (entries.length === 0) {
739
+ throw new Error(`${kind} bundle at CID ${cid} extracted to zero files. The archive is empty or truncated. ` +
740
+ "Re-pin the bundle (if you operate the challenge) or try a different --ipfs-gateway.");
741
+ }
742
+ // Post-extraction entry-point check: the reference image's run.py expects
743
+ // /eval/eval.py and /artifact/inference.py. A bundle missing the entry
744
+ // point (malicious strip, bad tar layout with everything nested one level
745
+ // deep, or unknown-CID fail-open where the sha256 wasn't verified) would
746
+ // otherwise waste the 5-30 min sandbox run before failing with an opaque
747
+ // Docker error. Fail-fast here with a specific diagnostic.
748
+ const entrypoint = kind === "eval" ? "eval.py" : "inference.py";
749
+ await assertEntrypointPresent(destDir, entrypoint, kind, cid);
750
+ }
751
+ else {
752
+ // Non-gzip CID — write as a single `bundle` file at the mount root and
753
+ // let the image handle it (legacy path for agents pinning raw artifacts).
754
+ await fs.writeFile(path.join(destDir, "bundle"), buf);
755
+ }
756
+ return { sha256: bundleSha256 };
757
+ }
758
+ // AUDIT L2: walk the extracted tree rooted at destDir and reject any
759
+ // symlink whose real path escapes destDir. Cross-platform (no tar-flag
760
+ // assumptions). Non-symlink directory entries are recursed into; files
761
+ // are ignored (reading them is safe — attacker-controlled file content
762
+ // doesn't escape). Exported for unit tests so we can assert rejection
763
+ // on crafted destDir trees without running tar end-to-end.
764
+ //
765
+ // Behavior:
766
+ // - Symlink points inside destDir (same prefix after realpath) → OK.
767
+ // - Symlink points outside → throw, with a diagnostic that names the
768
+ // offending entry and its real target. The whole extraction is
769
+ // aborted; the caller treats it as bundle malformation.
770
+ // - Broken symlink (target missing) → tolerated. The tar stream
771
+ // described it; realpath would throw ENOENT. Treat as safe (no
772
+ // file backs the link, nothing to escape to).
773
+ export async function assertNoSymlinkEscapes(destDir, kind, cid) {
774
+ let destReal;
775
+ try {
776
+ destReal = await fs.realpath(destDir);
777
+ }
778
+ catch {
779
+ // destDir is a caller-owned path; if realpath fails here something
780
+ // is very wrong (race with rm-rf, permission flip). Use the literal
781
+ // path as the containment root — safer than silently skipping.
782
+ destReal = destDir;
783
+ }
784
+ const destRealWithSep = destReal.endsWith(path.sep) ? destReal : destReal + path.sep;
785
+ async function walk(dir) {
786
+ const entries = await fs.readdir(dir, { withFileTypes: true });
787
+ for (const entry of entries) {
788
+ const full = path.join(dir, entry.name);
789
+ if (entry.isSymbolicLink()) {
790
+ let target;
791
+ try {
792
+ target = await fs.realpath(full);
793
+ }
794
+ catch {
795
+ // Broken symlink — no escape possible. Skip.
796
+ continue;
797
+ }
798
+ if (target !== destReal && !target.startsWith(destRealWithSep)) {
799
+ throw new Error(`${kind} bundle at CID ${cid} contains symlink ${path.relative(destDir, full)} ` +
800
+ `→ ${target}, which escapes the extraction directory (${destReal}). ` +
801
+ `Refusing to proceed — malformed or malicious archive.`);
802
+ }
803
+ }
804
+ else if (entry.isDirectory()) {
805
+ await walk(full);
806
+ }
807
+ }
808
+ }
809
+ await walk(destDir);
810
+ }
811
+ // Post-extraction check: require the expected entry-point Python file exists
812
+ // and is non-empty. Handles two common tarball layouts:
813
+ // 1. Flat: tarball root contains eval.py / inference.py directly.
814
+ // 2. Nested: tarball root contains a single subdirectory (e.g. the
815
+ // commit-hash or paper-slug) that holds the entry point one level down.
816
+ // Exported for unit tests so we can pin behavior on empty files, missing
817
+ // files, and nested-dir layouts without scaffolding a full extraction.
818
+ export async function assertEntrypointPresent(destDir, entrypoint, kind, cid) {
819
+ async function nonEmpty(p) {
820
+ try {
821
+ const st = await fs.stat(p);
822
+ return st.isFile() && st.size > 0;
823
+ }
824
+ catch {
825
+ return false;
826
+ }
827
+ }
828
+ // Case 1: flat layout.
829
+ if (await nonEmpty(path.join(destDir, entrypoint)))
830
+ return;
831
+ // Case 2: nested single-dir layout — walk one level in.
832
+ const entries = await fs.readdir(destDir, { withFileTypes: true });
833
+ const dirs = entries.filter((e) => e.isDirectory());
834
+ if (dirs.length === 1) {
835
+ const nested = path.join(destDir, dirs[0].name, entrypoint);
836
+ if (await nonEmpty(nested)) {
837
+ // Flatten so run.py's hardcoded /eval/eval.py and /artifact/inference.py
838
+ // paths resolve. Copy-then-rmdir is simpler than mount rebinding and
839
+ // runs in milliseconds for the small bundles we accept.
840
+ const nestedDir = path.join(destDir, dirs[0].name);
841
+ for (const child of await fs.readdir(nestedDir)) {
842
+ await fs.rename(path.join(nestedDir, child), path.join(destDir, child));
843
+ }
844
+ await fs.rmdir(nestedDir);
845
+ return;
846
+ }
847
+ }
848
+ throw new Error(`${kind} bundle at CID ${cid} is missing ${entrypoint} (or it is empty). ` +
849
+ `The reference sandbox mounts this bundle at /${kind} and runs ${entrypoint}; ` +
850
+ (kind === "eval"
851
+ ? "re-pin the eval bundle if you operate the challenge, or try a different --ipfs-gateway."
852
+ : "the solver's artifact is malformed — this submission cannot be verified as-is."));
853
+ }
854
+ async function pinStdoutToIpfs(gatewayUrl, apiKey, stdout) {
855
+ // The gateway exposes a helper for verifiers to pin their sandbox
856
+ // stdout without holding Pinata credentials. If the route isn't
857
+ // available, fall back to echoing a sha-derived placeholder so the
858
+ // CLI still produces a valid-shape attestation (the gateway then
859
+ // rejects with INVALID_ATTESTATION.stdoutCid — clear feedback loop).
860
+ const res = await gatewayRequest(gatewayUrl, "POST", "/v1/mining/sandbox/pin", { apiKey, body: { stdout } });
861
+ if (isGatewayError(res)) {
862
+ throw new Error(`Unable to pin stdout to IPFS via /v1/mining/sandbox/pin (${res.error}). ` +
863
+ "Pin manually and re-run with --stdout-cid once supported, or self-host a pinning endpoint.");
864
+ }
865
+ return res.data.cid;
866
+ }
867
+ //# sourceMappingURL=verifyReproduction.js.map