wb-browser-runtime 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -2
- package/bin/wb-browser-runtime.js +29 -106
- package/lib/recording-manager.js +39 -4
- package/lib/signed-url-capture.js +275 -0
- package/lib/substitution.js +128 -0
- package/package.json +1 -1
- package/verbs/download.js +596 -0
- package/verbs/index.js +2 -0
|
@@ -0,0 +1,596 @@
|
|
|
1
|
+
// download — explicit "click and capture" verb.
|
|
2
|
+
//
|
|
3
|
+
// The passive listener in lib/download-capture.js already saves any file the
|
|
4
|
+
// browser downloads, but it has no say over the filename and announces a
|
|
5
|
+
// `slice.artifact_saved` frame asynchronously after `saveAs` resolves. Some
|
|
6
|
+
// runbooks want stronger guarantees:
|
|
7
|
+
// - "the file lands at exactly $WB_ARTIFACTS_DIR/<path>"
|
|
8
|
+
// - "if it doesn't appear within ~10s, fail the slice with diagnostics"
|
|
9
|
+
// - works for SPAs that build the file in-page via fetch/XHR + Blob and
|
|
10
|
+
// don't always trip Playwright's `download` event reliably
|
|
11
|
+
//
|
|
12
|
+
// This verb installs capture hooks BEFORE clicking, races
|
|
13
|
+
// `page.waitForEvent("download")` against an in-page blob/anchor capture
|
|
14
|
+
// hook, and either saves the bytes itself (blob path) or hands the
|
|
15
|
+
// Playwright Download to `saveAs` (download path). Whichever path wins,
|
|
16
|
+
// the verb sets HANDLED_MARK on the Download (when applicable) so the
|
|
17
|
+
// passive listener doesn't double-save.
|
|
18
|
+
|
|
19
|
+
import path from "node:path";
|
|
20
|
+
import { Buffer } from "node:buffer";
|
|
21
|
+
import { promises as fsPromises } from "node:fs";
|
|
22
|
+
import { send } from "../lib/io.js";
|
|
23
|
+
import {
|
|
24
|
+
uniquePathInside,
|
|
25
|
+
parseExtensionAllowlist,
|
|
26
|
+
extensionAllowed,
|
|
27
|
+
} from "../lib/util.js";
|
|
28
|
+
import { HANDLED_MARK } from "../lib/download-capture.js";
|
|
29
|
+
import { retryableFetch } from "../lib/http.js";
|
|
30
|
+
import {
|
|
31
|
+
SIGNED_PAGE_HOOK,
|
|
32
|
+
SIGNED_POLL_SCRIPT,
|
|
33
|
+
parseSignedConfig,
|
|
34
|
+
pickSignedCandidate,
|
|
35
|
+
redactSignedUrl,
|
|
36
|
+
} from "../lib/signed-url-capture.js";
|
|
37
|
+
|
|
38
|
+
const DEFAULT_TIMEOUT_MS = 10_000;
|
|
39
|
+
const POLL_INTERVAL_MS = 50;
|
|
40
|
+
const FALLBACK_NAME = "download.bin";
|
|
41
|
+
|
|
42
|
+
// Page-side hook that traps blob/data-URL anchor clicks the SPA performs
|
|
43
|
+
// programmatically — `URL.createObjectURL(blob)` + `<a download>` + `.click()`.
|
|
44
|
+
// Playwright's own `download` event normally catches these, but a handful
|
|
45
|
+
// of SPAs trigger downloads via `window.open(blobUrl)` or
|
|
46
|
+
// `window.location = blobUrl` which slip past. The hook re-fetches the blob
|
|
47
|
+
// in-page, base64-encodes the bytes, and stashes them on
|
|
48
|
+
// `window.__wbDownload` for the Node side to poll.
|
|
49
|
+
//
|
|
50
|
+
// Idempotent: re-installing on each verb invocation is a no-op after the
|
|
51
|
+
// first. We never uninstall — leaves the page in a slightly altered state
|
|
52
|
+
// but the wrapped click is functionally equivalent to the original.
|
|
53
|
+
const PAGE_HOOK = `(() => {
|
|
54
|
+
if (window.__wbDownloadInstalled) return;
|
|
55
|
+
window.__wbDownloadInstalled = true;
|
|
56
|
+
window.__wbDownload = null;
|
|
57
|
+
|
|
58
|
+
const captureBlob = async (target, filename, mime) => {
|
|
59
|
+
try {
|
|
60
|
+
let blob;
|
|
61
|
+
if (typeof target === "string") {
|
|
62
|
+
const resp = await fetch(target);
|
|
63
|
+
blob = await resp.blob();
|
|
64
|
+
} else {
|
|
65
|
+
blob = target;
|
|
66
|
+
}
|
|
67
|
+
const buf = await blob.arrayBuffer();
|
|
68
|
+
const bin = new Uint8Array(buf);
|
|
69
|
+
let s = "";
|
|
70
|
+
for (let i = 0; i < bin.length; i++) s += String.fromCharCode(bin[i]);
|
|
71
|
+
window.__wbDownload = {
|
|
72
|
+
filename: filename || "download.bin",
|
|
73
|
+
bytes: btoa(s),
|
|
74
|
+
mimeType: mime || blob.type || "application/octet-stream",
|
|
75
|
+
};
|
|
76
|
+
} catch (e) {
|
|
77
|
+
window.__wbDownload = { error: String((e && e.message) || e) };
|
|
78
|
+
}
|
|
79
|
+
};
|
|
80
|
+
|
|
81
|
+
const origClick = HTMLAnchorElement.prototype.click;
|
|
82
|
+
HTMLAnchorElement.prototype.click = function () {
|
|
83
|
+
try {
|
|
84
|
+
const href = this.getAttribute("href") || this.href || "";
|
|
85
|
+
const hasDownload = this.hasAttribute("download");
|
|
86
|
+
if (hasDownload && (href.startsWith("blob:") || href.startsWith("data:"))) {
|
|
87
|
+
const fname = this.getAttribute("download") || this.download || "";
|
|
88
|
+
captureBlob(href, fname);
|
|
89
|
+
}
|
|
90
|
+
} catch {}
|
|
91
|
+
return origClick.apply(this, arguments);
|
|
92
|
+
};
|
|
93
|
+
})()`;
|
|
94
|
+
|
|
95
|
+
// Read-and-clear of `window.__wbDownload`. Returning the value AND nulling
|
|
96
|
+
// it lets the page hook capture multiple downloads across separate verb
|
|
97
|
+
// calls without leaking state from a prior call into the next poll.
|
|
98
|
+
const POLL_SCRIPT = `(() => {
|
|
99
|
+
const v = window.__wbDownload;
|
|
100
|
+
window.__wbDownload = null;
|
|
101
|
+
return v;
|
|
102
|
+
})()`;
|
|
103
|
+
|
|
104
|
+
export default {
|
|
105
|
+
name: "download",
|
|
106
|
+
primaryKey: "selector",
|
|
107
|
+
async execute(page, args, ctx) {
|
|
108
|
+
const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim();
|
|
109
|
+
if (!artifactsDir) {
|
|
110
|
+
throw new Error(
|
|
111
|
+
"download: $WB_ARTIFACTS_DIR is not set — run this workbook via `wb run` (wb exports the dir for you)",
|
|
112
|
+
);
|
|
113
|
+
}
|
|
114
|
+
if (!args.selector) {
|
|
115
|
+
throw new Error("download: `selector` is required");
|
|
116
|
+
}
|
|
117
|
+
const timeout = args.timeout ?? DEFAULT_TIMEOUT_MS;
|
|
118
|
+
const explicitPath =
|
|
119
|
+
typeof args.path === "string" && args.path.trim()
|
|
120
|
+
? args.path.trim()
|
|
121
|
+
: null;
|
|
122
|
+
const allowlist = parseExtensionAllowlist(
|
|
123
|
+
process.env.WB_BROWSER_DOWNLOAD_EXTENSIONS,
|
|
124
|
+
);
|
|
125
|
+
const signedCfg = parseSignedConfig(args.signed_url);
|
|
126
|
+
const signedEnabled = signedCfg.enabled !== false;
|
|
127
|
+
|
|
128
|
+
// 1) Inject the page-side blob/anchor capture hook BEFORE the click so a
|
|
129
|
+
// synchronously-dispatched anchor.click() inside the SPA's handler is
|
|
130
|
+
// observed. Best-effort: a frame mid-navigation can reject evaluate;
|
|
131
|
+
// the Playwright `download` event still works and is the primary
|
|
132
|
+
// signal anyway. When signed-URL capture is enabled, install its
|
|
133
|
+
// fetch/XHR response hook in the same pre-click window so the API call
|
|
134
|
+
// the click triggers is observed from the start.
|
|
135
|
+
try {
|
|
136
|
+
await page.evaluate(PAGE_HOOK);
|
|
137
|
+
} catch {}
|
|
138
|
+
if (signedEnabled) {
|
|
139
|
+
try {
|
|
140
|
+
await page.evaluate(SIGNED_PAGE_HOOK);
|
|
141
|
+
} catch {}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
// 2) Claim ownership of the next download synchronously — prepended to
|
|
145
|
+
// BrowserContext listeners so it runs before lib/download-capture.js's
|
|
146
|
+
// passive listener has a chance to start its async capture chain. The
|
|
147
|
+
// HANDLED_MARK tells the passive listener to bail.
|
|
148
|
+
const claim = (download) => {
|
|
149
|
+
try {
|
|
150
|
+
download[HANDLED_MARK] = true;
|
|
151
|
+
} catch {}
|
|
152
|
+
};
|
|
153
|
+
const browserContext = safeContext(page);
|
|
154
|
+
let attached = false;
|
|
155
|
+
if (browserContext) {
|
|
156
|
+
if (typeof browserContext.prependListener === "function") {
|
|
157
|
+
browserContext.prependListener("download", claim);
|
|
158
|
+
attached = true;
|
|
159
|
+
} else if (typeof browserContext.on === "function") {
|
|
160
|
+
// Fallback: append. Race window is tiny (passive listener checks
|
|
161
|
+
// HANDLED_MARK before its first await), but ordering isn't
|
|
162
|
+
// guaranteed without prependListener.
|
|
163
|
+
browserContext.on("download", claim);
|
|
164
|
+
attached = true;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
// Shared cancellation token: once the race has a winner, the losing
|
|
169
|
+
// pollers stop on their next tick instead of spinning page.evaluate against
|
|
170
|
+
// a (possibly navigating/closing) page for the rest of the timeout window.
|
|
171
|
+
const stop = { done: false };
|
|
172
|
+
|
|
173
|
+
try {
|
|
174
|
+
// 3) Race the capture sources against the click. The download event AND
|
|
175
|
+
// the click run concurrently — Playwright's standard pattern, since
|
|
176
|
+
// the click can resolve before or after the download fires.
|
|
177
|
+
const downloadPromise = page
|
|
178
|
+
.waitForEvent("download", { timeout })
|
|
179
|
+
.then((d) => ({ kind: "playwright", download: d }))
|
|
180
|
+
.catch((e) => ({ kind: "playwright_failed", error: e }));
|
|
181
|
+
|
|
182
|
+
const blobPromise = pollForBlob(page, timeout, stop);
|
|
183
|
+
|
|
184
|
+
const signedPromise = signedEnabled
|
|
185
|
+
? pollForSignedUrl(page, timeout, signedCfg, stop)
|
|
186
|
+
: null;
|
|
187
|
+
|
|
188
|
+
let clickError = null;
|
|
189
|
+
const clickPromise = (async () => {
|
|
190
|
+
try {
|
|
191
|
+
await page.click(args.selector, { timeout });
|
|
192
|
+
} catch (err) {
|
|
193
|
+
const isTimeout = err && err.name === "TimeoutError";
|
|
194
|
+
if (isTimeout && args.text_fallback) {
|
|
195
|
+
try {
|
|
196
|
+
await page
|
|
197
|
+
.getByText(args.text_fallback, { exact: false })
|
|
198
|
+
.first()
|
|
199
|
+
.click({ timeout });
|
|
200
|
+
return;
|
|
201
|
+
} catch {
|
|
202
|
+
clickError = err;
|
|
203
|
+
return;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
clickError = err;
|
|
207
|
+
}
|
|
208
|
+
})();
|
|
209
|
+
|
|
210
|
+
const winner = await raceCaptures(
|
|
211
|
+
[downloadPromise, blobPromise, signedPromise].filter(Boolean),
|
|
212
|
+
);
|
|
213
|
+
// Winner decided (success or all-failed) — release the losing pollers.
|
|
214
|
+
stop.done = true;
|
|
215
|
+
// Wait for the click to settle so we surface its error (if any) over
|
|
216
|
+
// a generic "no file captured" — a click that never landed is the
|
|
217
|
+
// more actionable failure.
|
|
218
|
+
await clickPromise;
|
|
219
|
+
if (clickError) throw clickError;
|
|
220
|
+
|
|
221
|
+
if (winner.success && winner.kind === "playwright") {
|
|
222
|
+
return await savePlaywrightDownload({
|
|
223
|
+
download: winner.download,
|
|
224
|
+
artifactsDir,
|
|
225
|
+
allowlist,
|
|
226
|
+
explicitPath,
|
|
227
|
+
page,
|
|
228
|
+
ctx,
|
|
229
|
+
});
|
|
230
|
+
}
|
|
231
|
+
if (winner.success && winner.kind === "blob") {
|
|
232
|
+
return await saveBlobDownload({
|
|
233
|
+
blob: winner.blob,
|
|
234
|
+
artifactsDir,
|
|
235
|
+
allowlist,
|
|
236
|
+
explicitPath,
|
|
237
|
+
page,
|
|
238
|
+
ctx,
|
|
239
|
+
});
|
|
240
|
+
}
|
|
241
|
+
if (winner.success && winner.kind === "signed_url") {
|
|
242
|
+
return await saveSignedUrlDownload({
|
|
243
|
+
signed: winner.signed,
|
|
244
|
+
artifactsDir,
|
|
245
|
+
allowlist,
|
|
246
|
+
explicitPath,
|
|
247
|
+
page,
|
|
248
|
+
ctx,
|
|
249
|
+
timeout,
|
|
250
|
+
});
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// No capture won — emit structured failure diagnostics.
|
|
254
|
+
const reasons = winner.failures
|
|
255
|
+
.map((f) => {
|
|
256
|
+
if (f.kind === "playwright_failed") {
|
|
257
|
+
return `playwright download: ${f.error?.message || f.error}`;
|
|
258
|
+
}
|
|
259
|
+
if (f.kind === "blob_failed") return `blob hook: ${f.error}`;
|
|
260
|
+
if (f.kind === "blob_timeout") return `blob hook: no capture within ${timeout}ms`;
|
|
261
|
+
if (f.kind === "signed_failed") return `signed url: ${f.error}`;
|
|
262
|
+
if (f.kind === "signed_timeout")
|
|
263
|
+
return `signed url: no signed file URL seen within ${timeout}ms`;
|
|
264
|
+
return f.kind;
|
|
265
|
+
})
|
|
266
|
+
.join("; ");
|
|
267
|
+
send({
|
|
268
|
+
type: "slice.download_failed",
|
|
269
|
+
verb: "download",
|
|
270
|
+
verb_index: ctx?.index ?? null,
|
|
271
|
+
selector: args.selector,
|
|
272
|
+
timeout_ms: timeout,
|
|
273
|
+
page_url: safePageUrl(page),
|
|
274
|
+
reason: reasons,
|
|
275
|
+
});
|
|
276
|
+
throw new Error(
|
|
277
|
+
`download: no file captured within ${timeout}ms after clicking ${args.selector} (page=${safePageUrl(page) || "?"}). ${reasons}`,
|
|
278
|
+
);
|
|
279
|
+
} finally {
|
|
280
|
+
// Backstop: ensure pollers are released on any exit path (thrown
|
|
281
|
+
// click/save error, extension rejection, etc.).
|
|
282
|
+
stop.done = true;
|
|
283
|
+
if (attached && browserContext && typeof browserContext.off === "function") {
|
|
284
|
+
try {
|
|
285
|
+
browserContext.off("download", claim);
|
|
286
|
+
} catch {}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
},
|
|
290
|
+
};
|
|
291
|
+
|
|
292
|
+
async function savePlaywrightDownload({
|
|
293
|
+
download,
|
|
294
|
+
artifactsDir,
|
|
295
|
+
allowlist,
|
|
296
|
+
explicitPath,
|
|
297
|
+
page,
|
|
298
|
+
ctx,
|
|
299
|
+
}) {
|
|
300
|
+
const suggested = explicitPath || safeSuggestedFilename(download);
|
|
301
|
+
const sourceUrl = safeUrl(download);
|
|
302
|
+
if (!extensionAllowed(suggested, allowlist)) {
|
|
303
|
+
try {
|
|
304
|
+
await download.cancel();
|
|
305
|
+
} catch {}
|
|
306
|
+
throw new Error(
|
|
307
|
+
`download: file "${suggested}" rejected by WB_BROWSER_DOWNLOAD_EXTENSIONS`,
|
|
308
|
+
);
|
|
309
|
+
}
|
|
310
|
+
const target = uniquePathInside(artifactsDir, suggested);
|
|
311
|
+
if (!target) {
|
|
312
|
+
throw new Error(
|
|
313
|
+
`download: refusing to save "${suggested}" — resolves outside $WB_ARTIFACTS_DIR`,
|
|
314
|
+
);
|
|
315
|
+
}
|
|
316
|
+
await fsPromises.mkdir(artifactsDir, { recursive: true });
|
|
317
|
+
await download.saveAs(target);
|
|
318
|
+
let bytes = null;
|
|
319
|
+
try {
|
|
320
|
+
bytes = (await fsPromises.stat(target)).size;
|
|
321
|
+
} catch {}
|
|
322
|
+
send({
|
|
323
|
+
type: "slice.artifact_saved",
|
|
324
|
+
filename: path.basename(target),
|
|
325
|
+
path: target,
|
|
326
|
+
bytes,
|
|
327
|
+
source: "download",
|
|
328
|
+
provenance: {
|
|
329
|
+
url: sourceUrl,
|
|
330
|
+
suggested_filename: suggested,
|
|
331
|
+
page_url: safePageUrl(page),
|
|
332
|
+
verb_index: ctx?.index ?? null,
|
|
333
|
+
verb_name: "download",
|
|
334
|
+
ts: Date.now(),
|
|
335
|
+
},
|
|
336
|
+
});
|
|
337
|
+
return `→ ${path.basename(target)}`;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
async function saveBlobDownload({
|
|
341
|
+
blob,
|
|
342
|
+
artifactsDir,
|
|
343
|
+
allowlist,
|
|
344
|
+
explicitPath,
|
|
345
|
+
page,
|
|
346
|
+
ctx,
|
|
347
|
+
}) {
|
|
348
|
+
const suggested = explicitPath || blob.filename || FALLBACK_NAME;
|
|
349
|
+
if (!extensionAllowed(suggested, allowlist)) {
|
|
350
|
+
throw new Error(
|
|
351
|
+
`download: file "${suggested}" rejected by WB_BROWSER_DOWNLOAD_EXTENSIONS`,
|
|
352
|
+
);
|
|
353
|
+
}
|
|
354
|
+
const target = uniquePathInside(artifactsDir, suggested);
|
|
355
|
+
if (!target) {
|
|
356
|
+
throw new Error(
|
|
357
|
+
`download: refusing to save "${suggested}" — resolves outside $WB_ARTIFACTS_DIR`,
|
|
358
|
+
);
|
|
359
|
+
}
|
|
360
|
+
const buf = Buffer.from(blob.bytes, "base64");
|
|
361
|
+
await fsPromises.mkdir(artifactsDir, { recursive: true });
|
|
362
|
+
await fsPromises.writeFile(target, buf);
|
|
363
|
+
send({
|
|
364
|
+
type: "slice.artifact_saved",
|
|
365
|
+
filename: path.basename(target),
|
|
366
|
+
path: target,
|
|
367
|
+
bytes: buf.length,
|
|
368
|
+
source: "download",
|
|
369
|
+
provenance: {
|
|
370
|
+
url: null,
|
|
371
|
+
suggested_filename: suggested,
|
|
372
|
+
page_url: safePageUrl(page),
|
|
373
|
+
verb_index: ctx?.index ?? null,
|
|
374
|
+
verb_name: "download",
|
|
375
|
+
mime_type: blob.mimeType || null,
|
|
376
|
+
capture: "blob",
|
|
377
|
+
ts: Date.now(),
|
|
378
|
+
},
|
|
379
|
+
});
|
|
380
|
+
return `→ ${path.basename(target)}`;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// Race N capture promises. First to report success wins. Every source must
|
|
384
|
+
// report before we declare failure, so the diagnostics frame can list every
|
|
385
|
+
// reason the verb didn't see a file. (Promise.race would shortcut on a fast
|
|
386
|
+
// failure and discard a slower success.) Each promise resolves to an object
|
|
387
|
+
// whose `kind` names the source: a success kind ("playwright" | "blob" |
|
|
388
|
+
// "signed_url") or a failure kind ("*_failed" | "*_timeout").
|
|
389
|
+
const SUCCESS_KINDS = new Set(["playwright", "blob", "signed_url"]);
|
|
390
|
+
|
|
391
|
+
function raceCaptures(promises) {
|
|
392
|
+
return new Promise((resolve) => {
|
|
393
|
+
let outstanding = promises.length;
|
|
394
|
+
const failures = [];
|
|
395
|
+
const finish = (settled) => {
|
|
396
|
+
if (settled.success) {
|
|
397
|
+
resolve(settled);
|
|
398
|
+
return;
|
|
399
|
+
}
|
|
400
|
+
failures.push(settled);
|
|
401
|
+
if (--outstanding === 0) resolve({ success: false, failures });
|
|
402
|
+
};
|
|
403
|
+
for (const pr of promises) {
|
|
404
|
+
pr.then((r) => {
|
|
405
|
+
if (SUCCESS_KINDS.has(r.kind)) finish({ success: true, ...r });
|
|
406
|
+
else finish({ success: false, ...r });
|
|
407
|
+
});
|
|
408
|
+
}
|
|
409
|
+
});
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
async function saveSignedUrlDownload({
|
|
413
|
+
signed,
|
|
414
|
+
artifactsDir,
|
|
415
|
+
allowlist,
|
|
416
|
+
explicitPath,
|
|
417
|
+
page,
|
|
418
|
+
ctx,
|
|
419
|
+
timeout,
|
|
420
|
+
}) {
|
|
421
|
+
const redacted = redactSignedUrl(signed.url);
|
|
422
|
+
// Filename: explicit path: wins, else the signed URL's basename, else a
|
|
423
|
+
// generic fallback. (S3 keys usually end in the real filename.)
|
|
424
|
+
let nameFromUrl = "";
|
|
425
|
+
try {
|
|
426
|
+
nameFromUrl = path.basename(new URL(signed.url).pathname) || "";
|
|
427
|
+
} catch {}
|
|
428
|
+
const suggested = explicitPath || (nameFromUrl.trim() ? nameFromUrl : FALLBACK_NAME);
|
|
429
|
+
if (!extensionAllowed(suggested, allowlist)) {
|
|
430
|
+
throw new Error(
|
|
431
|
+
`download: file "${suggested}" rejected by WB_BROWSER_DOWNLOAD_EXTENSIONS`,
|
|
432
|
+
);
|
|
433
|
+
}
|
|
434
|
+
const target = uniquePathInside(artifactsDir, suggested);
|
|
435
|
+
if (!target) {
|
|
436
|
+
throw new Error(
|
|
437
|
+
`download: refusing to save "${suggested}" — resolves outside $WB_ARTIFACTS_DIR`,
|
|
438
|
+
);
|
|
439
|
+
}
|
|
440
|
+
await fsPromises.mkdir(artifactsDir, { recursive: true });
|
|
441
|
+
|
|
442
|
+
// Fetch the signed URL from the sidecar (not the page) so the object store's
|
|
443
|
+
// CORS policy doesn't block the read. The label is redacted — retry logs must
|
|
444
|
+
// never echo signed credentials.
|
|
445
|
+
let res;
|
|
446
|
+
try {
|
|
447
|
+
res = await retryableFetch(
|
|
448
|
+
signed.url,
|
|
449
|
+
{ method: "GET" },
|
|
450
|
+
`signed-url download (${redacted})`,
|
|
451
|
+
{ timeoutMs: timeout },
|
|
452
|
+
);
|
|
453
|
+
} catch (e) {
|
|
454
|
+
send({
|
|
455
|
+
type: "slice.download_failed",
|
|
456
|
+
verb: "download",
|
|
457
|
+
verb_index: ctx?.index ?? null,
|
|
458
|
+
capture: "signed_url",
|
|
459
|
+
api_url: signed.api_url,
|
|
460
|
+
signed_url: redacted,
|
|
461
|
+
page_url: safePageUrl(page),
|
|
462
|
+
reason: `signed url fetch error: ${e?.message || e}`,
|
|
463
|
+
});
|
|
464
|
+
throw new Error(
|
|
465
|
+
`download: signed URL fetch failed for ${redacted}: ${e?.message || e}`,
|
|
466
|
+
);
|
|
467
|
+
}
|
|
468
|
+
if (!res.ok) {
|
|
469
|
+
// A 403 on a pre-signed URL almost always means the token expired before
|
|
470
|
+
// we fetched it — call that out so the operator knows to shorten the gap.
|
|
471
|
+
const expired = res.status === 403;
|
|
472
|
+
send({
|
|
473
|
+
type: "slice.download_failed",
|
|
474
|
+
verb: "download",
|
|
475
|
+
verb_index: ctx?.index ?? null,
|
|
476
|
+
capture: "signed_url",
|
|
477
|
+
api_url: signed.api_url,
|
|
478
|
+
signed_url: redacted,
|
|
479
|
+
page_url: safePageUrl(page),
|
|
480
|
+
http_status: res.status,
|
|
481
|
+
expired,
|
|
482
|
+
reason: `signed url fetch: HTTP ${res.status}${expired ? " (likely expired)" : ""}`,
|
|
483
|
+
});
|
|
484
|
+
throw new Error(
|
|
485
|
+
`download: signed URL fetch returned HTTP ${res.status} for ${redacted}${expired ? " (likely expired)" : ""}`,
|
|
486
|
+
);
|
|
487
|
+
}
|
|
488
|
+
const buf = Buffer.from(await res.arrayBuffer());
|
|
489
|
+
await fsPromises.writeFile(target, buf);
|
|
490
|
+
const contentType = safeHeader(res, "content-type");
|
|
491
|
+
const contentDisposition = safeHeader(res, "content-disposition");
|
|
492
|
+
send({
|
|
493
|
+
type: "slice.artifact_saved",
|
|
494
|
+
filename: path.basename(target),
|
|
495
|
+
path: target,
|
|
496
|
+
bytes: buf.length,
|
|
497
|
+
source: "download",
|
|
498
|
+
provenance: {
|
|
499
|
+
url: null,
|
|
500
|
+
signed_url: redacted,
|
|
501
|
+
api_url: signed.api_url,
|
|
502
|
+
field: signed.field,
|
|
503
|
+
suggested_filename: suggested,
|
|
504
|
+
page_url: safePageUrl(page),
|
|
505
|
+
verb_index: ctx?.index ?? null,
|
|
506
|
+
verb_name: "download",
|
|
507
|
+
capture: "signed_url",
|
|
508
|
+
content_type: contentType,
|
|
509
|
+
content_disposition: contentDisposition,
|
|
510
|
+
ts: Date.now(),
|
|
511
|
+
},
|
|
512
|
+
});
|
|
513
|
+
return `→ ${path.basename(target)}`;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
async function pollForBlob(page, timeoutMs, stop) {
|
|
517
|
+
const deadline = Date.now() + timeoutMs;
|
|
518
|
+
while (true) {
|
|
519
|
+
if (stop?.done) return { kind: "blob_timeout" };
|
|
520
|
+
let result;
|
|
521
|
+
try {
|
|
522
|
+
result = await page.evaluate(POLL_SCRIPT);
|
|
523
|
+
} catch {
|
|
524
|
+
result = null;
|
|
525
|
+
}
|
|
526
|
+
if (result && result.bytes) return { kind: "blob", blob: result };
|
|
527
|
+
if (result && result.error) return { kind: "blob_failed", error: result.error };
|
|
528
|
+
if (Date.now() >= deadline) return { kind: "blob_timeout" };
|
|
529
|
+
await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
// Poll the page-side signed-URL candidate buffer until a candidate matching
|
|
534
|
+
// the configured policy appears or the deadline passes. The bytes are NOT
|
|
535
|
+
// fetched here — the winner is fetched server-side by saveSignedUrlDownload so
|
|
536
|
+
// CORS doesn't apply. Returns the picked candidate; never throws (page
|
|
537
|
+
// evaluate errors degrade to "keep polling").
|
|
538
|
+
async function pollForSignedUrl(page, timeoutMs, signedCfg, stop) {
|
|
539
|
+
const deadline = Date.now() + timeoutMs;
|
|
540
|
+
while (true) {
|
|
541
|
+
if (stop?.done) return { kind: "signed_timeout" };
|
|
542
|
+
let cands = null;
|
|
543
|
+
try {
|
|
544
|
+
cands = await page.evaluate(SIGNED_POLL_SCRIPT);
|
|
545
|
+
} catch {
|
|
546
|
+
cands = null;
|
|
547
|
+
}
|
|
548
|
+
if (Array.isArray(cands) && cands.length) {
|
|
549
|
+
const picked = pickSignedCandidate(cands, signedCfg);
|
|
550
|
+
if (picked) return { kind: "signed_url", signed: picked };
|
|
551
|
+
}
|
|
552
|
+
if (Date.now() >= deadline) return { kind: "signed_timeout" };
|
|
553
|
+
await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
function safeHeader(res, name) {
|
|
558
|
+
try {
|
|
559
|
+
return res.headers?.get?.(name) || null;
|
|
560
|
+
} catch {
|
|
561
|
+
return null;
|
|
562
|
+
}
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
function safePageUrl(page) {
|
|
566
|
+
try {
|
|
567
|
+
return page.url();
|
|
568
|
+
} catch {
|
|
569
|
+
return null;
|
|
570
|
+
}
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
function safeContext(page) {
|
|
574
|
+
try {
|
|
575
|
+
return page.context();
|
|
576
|
+
} catch {
|
|
577
|
+
return null;
|
|
578
|
+
}
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
function safeSuggestedFilename(download) {
|
|
582
|
+
try {
|
|
583
|
+
const s = download.suggestedFilename();
|
|
584
|
+
return s && s.trim() ? s : FALLBACK_NAME;
|
|
585
|
+
} catch {
|
|
586
|
+
return FALLBACK_NAME;
|
|
587
|
+
}
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
function safeUrl(download) {
|
|
591
|
+
try {
|
|
592
|
+
return download.url();
|
|
593
|
+
} catch {
|
|
594
|
+
return null;
|
|
595
|
+
}
|
|
596
|
+
}
|
package/verbs/index.js
CHANGED
|
@@ -22,6 +22,7 @@ import saveVerb from "./save.js";
|
|
|
22
22
|
import pauseForHumanVerb from "./pause_for_human.js";
|
|
23
23
|
import waitForDropVerb from "./wait_for_drop.js";
|
|
24
24
|
import announceArtifactVerb from "./announce_artifact.js";
|
|
25
|
+
import downloadVerb from "./download.js";
|
|
25
26
|
|
|
26
27
|
const VERBS = [
|
|
27
28
|
gotoVerb,
|
|
@@ -38,6 +39,7 @@ const VERBS = [
|
|
|
38
39
|
pauseForHumanVerb,
|
|
39
40
|
waitForDropVerb,
|
|
40
41
|
announceArtifactVerb,
|
|
42
|
+
downloadVerb,
|
|
41
43
|
];
|
|
42
44
|
|
|
43
45
|
export const VERB_REGISTRY = Object.fromEntries(VERBS.map((v) => [v.name, v]));
|