wb-browser-runtime 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -208,6 +208,7 @@ example, see the `browserbase-hn-upvoted-probe` runbook in the xatabase repo.
208
208
  | `assert` | `assert: <selector>` | `selector`, `text_contains`, `url_contains` |
209
209
  | `eval` | `eval: <js>` | `script` |
210
210
  | `save` | `save: <name>` | `name`, `value` (captures prior `extract`/`eval` when omitted) |
211
+ | `download` | `download: <selector>` | `selector`, `path`, `timeout`, `text_fallback` (clicks + races Playwright `download` event with in-page blob/anchor capture; saves into `$WB_ARTIFACTS_DIR/<path>`) |
211
212
 
212
213
  `extract`'s `fields` entries are either a CSS selector string (returns
213
214
  `textContent`), or `{ selector, attr }` to read an attribute.
@@ -280,6 +281,36 @@ emitted as `slice.download_skipped` (with `reason:
280
281
  "extension_not_in_allowlist"`) so the operator sees what was discarded.
281
282
  Unset = capture everything.
282
283
 
284
+ ### Explicit `download:` verb
285
+
286
+ The passive listener handles "any file the browser saves" but gives the
287
+ runbook no control over the filename or timing. Use the `download:` verb
288
+ when the runbook needs to click a specific button, save the result at a
289
+ specific path, and fail loudly within ~10s if no file appears:
290
+
291
+ ```yaml
292
+ - download:
293
+ selector: 'button:has-text("Download as xlsx")'
294
+ path: pilot-profit-loss.xlsx # written to $WB_ARTIFACTS_DIR/<path>
295
+ timeout: 10s # default
296
+ text_fallback: "Download as xlsx" # like click — fallback when selector is brittle
297
+ ```
298
+
299
+ Behaviour:
300
+
301
+ - Installs a page-side blob/anchor capture hook **before** the click so a
302
+ synchronously-dispatched `URL.createObjectURL(blob) + <a download>.click()`
303
+ is observed even when Playwright's own `download` event misses it
304
+ (e.g. `window.location = blobUrl`).
305
+ - Races `page.waitForEvent("download")` against the in-page hook; whichever
306
+ fires first wins.
307
+ - Sets `HANDLED_MARK` on the `Download` so the always-on passive listener
308
+ doesn't double-save.
309
+ - Emits `slice.artifact_saved` with `source: "download"` and
310
+ `provenance.verb_name: "download"`.
311
+ - On timeout: throws with diagnostics (page URL, selector, both
312
+ failure reasons) AND emits a `slice.download_failed` frame.
313
+
283
314
  ## Protocol
284
315
 
285
316
  Line-framed JSON, one message per line, on stdin/stdout. `stderr` is treated as
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wb-browser-runtime",
3
- "version": "0.12.0",
3
+ "version": "0.13.0",
4
4
  "description": "Browser sidecar runtime for wb — Playwright over CDP (Browserbase, browser-use) via the wb-sidecar/1 line-framed JSON protocol.",
5
5
  "bin": {
6
6
  "wb-browser-runtime": "bin/wb-browser-runtime.js"
@@ -0,0 +1,410 @@
1
+ // download — explicit "click and capture" verb.
2
+ //
3
+ // The passive listener in lib/download-capture.js already saves any file the
4
+ // browser downloads, but it has no say over the filename and announces a
5
+ // `slice.artifact_saved` frame asynchronously after `saveAs` resolves. Some
6
+ // runbooks want stronger guarantees:
7
+ // - "the file lands at exactly $WB_ARTIFACTS_DIR/<path>"
8
+ // - "if it doesn't appear within ~10s, fail the slice with diagnostics"
9
+ // - works for SPAs that build the file in-page via fetch/XHR + Blob and
10
+ // don't always trip Playwright's `download` event reliably
11
+ //
12
+ // This verb installs capture hooks BEFORE clicking, races
13
+ // `page.waitForEvent("download")` against an in-page blob/anchor capture
14
+ // hook, and either saves the bytes itself (blob path) or hands the
15
+ // Playwright Download to `saveAs` (download path). Whichever path wins,
16
+ // the verb sets HANDLED_MARK on the Download (when applicable) so the
17
+ // passive listener doesn't double-save.
18
+
19
+ import path from "node:path";
20
+ import { Buffer } from "node:buffer";
21
+ import { promises as fsPromises } from "node:fs";
22
+ import { send } from "../lib/io.js";
23
+ import {
24
+ uniquePathInside,
25
+ parseExtensionAllowlist,
26
+ extensionAllowed,
27
+ } from "../lib/util.js";
28
+ import { HANDLED_MARK } from "../lib/download-capture.js";
29
+
30
+ const DEFAULT_TIMEOUT_MS = 10_000;
31
+ const POLL_INTERVAL_MS = 50;
32
+ const FALLBACK_NAME = "download.bin";
33
+
34
+ // Page-side hook that traps blob/data-URL anchor clicks the SPA performs
35
+ // programmatically — `URL.createObjectURL(blob)` + `<a download>` + `.click()`.
36
+ // Playwright's own `download` event normally catches these, but a handful
37
+ // of SPAs trigger downloads via `window.open(blobUrl)` or
38
+ // `window.location = blobUrl` which slip past. The hook re-fetches the blob
39
+ // in-page, base64-encodes the bytes, and stashes them on
40
+ // `window.__wbDownload` for the Node side to poll.
41
+ //
42
+ // Idempotent: re-installing on each verb invocation is a no-op after the
43
+ // first. We never uninstall — leaves the page in a slightly altered state
44
+ // but the wrapped click is functionally equivalent to the original.
45
+ const PAGE_HOOK = `(() => {
46
+ if (window.__wbDownloadInstalled) return;
47
+ window.__wbDownloadInstalled = true;
48
+ window.__wbDownload = null;
49
+
50
+ const captureBlob = async (target, filename, mime) => {
51
+ try {
52
+ let blob;
53
+ if (typeof target === "string") {
54
+ const resp = await fetch(target);
55
+ blob = await resp.blob();
56
+ } else {
57
+ blob = target;
58
+ }
59
+ const buf = await blob.arrayBuffer();
60
+ const bin = new Uint8Array(buf);
61
+ let s = "";
62
+ for (let i = 0; i < bin.length; i++) s += String.fromCharCode(bin[i]);
63
+ window.__wbDownload = {
64
+ filename: filename || "download.bin",
65
+ bytes: btoa(s),
66
+ mimeType: mime || blob.type || "application/octet-stream",
67
+ };
68
+ } catch (e) {
69
+ window.__wbDownload = { error: String((e && e.message) || e) };
70
+ }
71
+ };
72
+
73
+ const origClick = HTMLAnchorElement.prototype.click;
74
+ HTMLAnchorElement.prototype.click = function () {
75
+ try {
76
+ const href = this.getAttribute("href") || this.href || "";
77
+ const hasDownload = this.hasAttribute("download");
78
+ if (hasDownload && (href.startsWith("blob:") || href.startsWith("data:"))) {
79
+ const fname = this.getAttribute("download") || this.download || "";
80
+ captureBlob(href, fname);
81
+ }
82
+ } catch {}
83
+ return origClick.apply(this, arguments);
84
+ };
85
+ })()`;
86
+
87
+ // Read-and-clear of `window.__wbDownload`. Returning the value AND nulling
88
+ // it lets the page hook capture multiple downloads across separate verb
89
+ // calls without leaking state from a prior call into the next poll.
90
+ const POLL_SCRIPT = `(() => {
91
+ const v = window.__wbDownload;
92
+ window.__wbDownload = null;
93
+ return v;
94
+ })()`;
95
+
96
+ export default {
97
+ name: "download",
98
+ primaryKey: "selector",
99
+ async execute(page, args, ctx) {
100
+ const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim();
101
+ if (!artifactsDir) {
102
+ throw new Error(
103
+ "download: $WB_ARTIFACTS_DIR is not set — run this workbook via `wb run` (wb exports the dir for you)",
104
+ );
105
+ }
106
+ if (!args.selector) {
107
+ throw new Error("download: `selector` is required");
108
+ }
109
+ const timeout = args.timeout ?? DEFAULT_TIMEOUT_MS;
110
+ const explicitPath =
111
+ typeof args.path === "string" && args.path.trim()
112
+ ? args.path.trim()
113
+ : null;
114
+ const allowlist = parseExtensionAllowlist(
115
+ process.env.WB_BROWSER_DOWNLOAD_EXTENSIONS,
116
+ );
117
+
118
+ // 1) Inject the page-side blob/anchor capture hook BEFORE the click so a
119
+ // synchronously-dispatched anchor.click() inside the SPA's handler is
120
+ // observed. Best-effort: a frame mid-navigation can reject evaluate;
121
+ // the Playwright `download` event still works and is the primary
122
+ // signal anyway.
123
+ try {
124
+ await page.evaluate(PAGE_HOOK);
125
+ } catch {}
126
+
127
+ // 2) Claim ownership of the next download synchronously — prepended to
128
+ // BrowserContext listeners so it runs before lib/download-capture.js's
129
+ // passive listener has a chance to start its async capture chain. The
130
+ // HANDLED_MARK tells the passive listener to bail.
131
+ const claim = (download) => {
132
+ try {
133
+ download[HANDLED_MARK] = true;
134
+ } catch {}
135
+ };
136
+ const browserContext = safeContext(page);
137
+ let attached = false;
138
+ if (browserContext) {
139
+ if (typeof browserContext.prependListener === "function") {
140
+ browserContext.prependListener("download", claim);
141
+ attached = true;
142
+ } else if (typeof browserContext.on === "function") {
143
+ // Fallback: append. Race window is tiny (passive listener checks
144
+ // HANDLED_MARK before its first await), but ordering isn't
145
+ // guaranteed without prependListener.
146
+ browserContext.on("download", claim);
147
+ attached = true;
148
+ }
149
+ }
150
+
151
+ try {
152
+ // 3) Race the two capture sources against the click. The download event
153
+ // AND the click run concurrently — Playwright's standard pattern,
154
+ // since the click can resolve before or after the download fires.
155
+ const downloadPromise = page
156
+ .waitForEvent("download", { timeout })
157
+ .then((d) => ({ kind: "playwright", download: d }))
158
+ .catch((e) => ({ kind: "playwright_failed", error: e }));
159
+
160
+ const blobPromise = pollForBlob(page, timeout);
161
+
162
+ let clickError = null;
163
+ const clickPromise = (async () => {
164
+ try {
165
+ await page.click(args.selector, { timeout });
166
+ } catch (err) {
167
+ const isTimeout = err && err.name === "TimeoutError";
168
+ if (isTimeout && args.text_fallback) {
169
+ try {
170
+ await page
171
+ .getByText(args.text_fallback, { exact: false })
172
+ .first()
173
+ .click({ timeout });
174
+ return;
175
+ } catch {
176
+ clickError = err;
177
+ return;
178
+ }
179
+ }
180
+ clickError = err;
181
+ }
182
+ })();
183
+
184
+ const winner = await raceCaptures(downloadPromise, blobPromise);
185
+ // Wait for the click to settle so we surface its error (if any) over
186
+ // a generic "no file captured" — a click that never landed is the
187
+ // more actionable failure.
188
+ await clickPromise;
189
+ if (clickError) throw clickError;
190
+
191
+ if (winner.success && winner.kind === "playwright") {
192
+ return await savePlaywrightDownload({
193
+ download: winner.download,
194
+ artifactsDir,
195
+ allowlist,
196
+ explicitPath,
197
+ page,
198
+ ctx,
199
+ });
200
+ }
201
+ if (winner.success && winner.kind === "blob") {
202
+ return await saveBlobDownload({
203
+ blob: winner.blob,
204
+ artifactsDir,
205
+ allowlist,
206
+ explicitPath,
207
+ page,
208
+ ctx,
209
+ });
210
+ }
211
+
212
+ // No capture won — emit structured failure diagnostics.
213
+ const reasons = winner.failures
214
+ .map((f) => {
215
+ if (f.kind === "playwright_failed") {
216
+ return `playwright download: ${f.error?.message || f.error}`;
217
+ }
218
+ if (f.kind === "blob_failed") return `blob hook: ${f.error}`;
219
+ if (f.kind === "blob_timeout") return `blob hook: no capture within ${timeout}ms`;
220
+ return f.kind;
221
+ })
222
+ .join("; ");
223
+ send({
224
+ type: "slice.download_failed",
225
+ verb: "download",
226
+ verb_index: ctx?.index ?? null,
227
+ selector: args.selector,
228
+ timeout_ms: timeout,
229
+ page_url: safePageUrl(page),
230
+ reason: reasons,
231
+ });
232
+ throw new Error(
233
+ `download: no file captured within ${timeout}ms after clicking ${args.selector} (page=${safePageUrl(page) || "?"}). ${reasons}`,
234
+ );
235
+ } finally {
236
+ if (attached && browserContext && typeof browserContext.off === "function") {
237
+ try {
238
+ browserContext.off("download", claim);
239
+ } catch {}
240
+ }
241
+ }
242
+ },
243
+ };
244
+
245
+ async function savePlaywrightDownload({
246
+ download,
247
+ artifactsDir,
248
+ allowlist,
249
+ explicitPath,
250
+ page,
251
+ ctx,
252
+ }) {
253
+ const suggested = explicitPath || safeSuggestedFilename(download);
254
+ const sourceUrl = safeUrl(download);
255
+ if (!extensionAllowed(suggested, allowlist)) {
256
+ try {
257
+ await download.cancel();
258
+ } catch {}
259
+ throw new Error(
260
+ `download: file "${suggested}" rejected by WB_BROWSER_DOWNLOAD_EXTENSIONS`,
261
+ );
262
+ }
263
+ const target = uniquePathInside(artifactsDir, suggested);
264
+ if (!target) {
265
+ throw new Error(
266
+ `download: refusing to save "${suggested}" — resolves outside $WB_ARTIFACTS_DIR`,
267
+ );
268
+ }
269
+ await fsPromises.mkdir(artifactsDir, { recursive: true });
270
+ await download.saveAs(target);
271
+ let bytes = null;
272
+ try {
273
+ bytes = (await fsPromises.stat(target)).size;
274
+ } catch {}
275
+ send({
276
+ type: "slice.artifact_saved",
277
+ filename: path.basename(target),
278
+ path: target,
279
+ bytes,
280
+ source: "download",
281
+ provenance: {
282
+ url: sourceUrl,
283
+ suggested_filename: suggested,
284
+ page_url: safePageUrl(page),
285
+ verb_index: ctx?.index ?? null,
286
+ verb_name: "download",
287
+ ts: Date.now(),
288
+ },
289
+ });
290
+ return `→ ${path.basename(target)}`;
291
+ }
292
+
293
+ async function saveBlobDownload({
294
+ blob,
295
+ artifactsDir,
296
+ allowlist,
297
+ explicitPath,
298
+ page,
299
+ ctx,
300
+ }) {
301
+ const suggested = explicitPath || blob.filename || FALLBACK_NAME;
302
+ if (!extensionAllowed(suggested, allowlist)) {
303
+ throw new Error(
304
+ `download: file "${suggested}" rejected by WB_BROWSER_DOWNLOAD_EXTENSIONS`,
305
+ );
306
+ }
307
+ const target = uniquePathInside(artifactsDir, suggested);
308
+ if (!target) {
309
+ throw new Error(
310
+ `download: refusing to save "${suggested}" — resolves outside $WB_ARTIFACTS_DIR`,
311
+ );
312
+ }
313
+ const buf = Buffer.from(blob.bytes, "base64");
314
+ await fsPromises.mkdir(artifactsDir, { recursive: true });
315
+ await fsPromises.writeFile(target, buf);
316
+ send({
317
+ type: "slice.artifact_saved",
318
+ filename: path.basename(target),
319
+ path: target,
320
+ bytes: buf.length,
321
+ source: "download",
322
+ provenance: {
323
+ url: null,
324
+ suggested_filename: suggested,
325
+ page_url: safePageUrl(page),
326
+ verb_index: ctx?.index ?? null,
327
+ verb_name: "download",
328
+ mime_type: blob.mimeType || null,
329
+ capture: "blob",
330
+ ts: Date.now(),
331
+ },
332
+ });
333
+ return `→ ${path.basename(target)}`;
334
+ }
335
+
336
+ // Race two capture promises. First to report success wins. Both must report
337
+ // before we declare failure, so the diagnostics frame can list every reason
338
+ // the verb didn't see a file. (Promise.race would shortcut on a fast failure
339
+ // and discard the slower success.)
340
+ function raceCaptures(downloadPromise, blobPromise) {
341
+ return new Promise((resolve) => {
342
+ let outstanding = 2;
343
+ const failures = [];
344
+ const finish = (settled) => {
345
+ if (settled.success) {
346
+ resolve(settled);
347
+ return;
348
+ }
349
+ failures.push(settled);
350
+ if (--outstanding === 0) resolve({ success: false, failures });
351
+ };
352
+ downloadPromise.then((r) => {
353
+ if (r.kind === "playwright") finish({ success: true, ...r });
354
+ else finish({ success: false, ...r });
355
+ });
356
+ blobPromise.then((r) => {
357
+ if (r.kind === "blob") finish({ success: true, ...r });
358
+ else finish({ success: false, ...r });
359
+ });
360
+ });
361
+ }
362
+
363
+ async function pollForBlob(page, timeoutMs) {
364
+ const deadline = Date.now() + timeoutMs;
365
+ while (true) {
366
+ let result;
367
+ try {
368
+ result = await page.evaluate(POLL_SCRIPT);
369
+ } catch {
370
+ result = null;
371
+ }
372
+ if (result && result.bytes) return { kind: "blob", blob: result };
373
+ if (result && result.error) return { kind: "blob_failed", error: result.error };
374
+ if (Date.now() >= deadline) return { kind: "blob_timeout" };
375
+ await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
376
+ }
377
+ }
378
+
379
+ function safePageUrl(page) {
380
+ try {
381
+ return page.url();
382
+ } catch {
383
+ return null;
384
+ }
385
+ }
386
+
387
+ function safeContext(page) {
388
+ try {
389
+ return page.context();
390
+ } catch {
391
+ return null;
392
+ }
393
+ }
394
+
395
+ function safeSuggestedFilename(download) {
396
+ try {
397
+ const s = download.suggestedFilename();
398
+ return s && s.trim() ? s : FALLBACK_NAME;
399
+ } catch {
400
+ return FALLBACK_NAME;
401
+ }
402
+ }
403
+
404
+ function safeUrl(download) {
405
+ try {
406
+ return download.url();
407
+ } catch {
408
+ return null;
409
+ }
410
+ }
package/verbs/index.js CHANGED
@@ -22,6 +22,7 @@ import saveVerb from "./save.js";
22
22
  import pauseForHumanVerb from "./pause_for_human.js";
23
23
  import waitForDropVerb from "./wait_for_drop.js";
24
24
  import announceArtifactVerb from "./announce_artifact.js";
25
+ import downloadVerb from "./download.js";
25
26
 
26
27
  const VERBS = [
27
28
  gotoVerb,
@@ -38,6 +39,7 @@ const VERBS = [
38
39
  pauseForHumanVerb,
39
40
  waitForDropVerb,
40
41
  announceArtifactVerb,
42
+ downloadVerb,
41
43
  ];
42
44
 
43
45
  export const VERB_REGISTRY = Object.fromEntries(VERBS.map((v) => [v.name, v]));