wb-browser-runtime 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -2
- package/bin/wb-browser-runtime.js +55 -2
- package/lib/download-capture.js +180 -0
- package/lib/failure.js +99 -0
- package/lib/providers/browser-use.js +9 -2
- package/lib/providers/browserbase.js +12 -2
- package/lib/providers/index.js +6 -2
- package/lib/providers/local.js +120 -0
- package/lib/stub-page.js +16 -0
- package/lib/util.js +58 -0
- package/package.json +1 -1
- package/verbs/click.js +24 -2
- package/verbs/download.js +410 -0
- package/verbs/index.js +4 -0
- package/verbs/wait_for_network_idle.js +51 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
// Local provider — drives a host-installed Playwright Chromium directly
|
|
2
|
+
// instead of going to a cloud vendor. Use for dev iteration without
|
|
3
|
+
// Browserbase / browser-use cost or latency. Selected via
|
|
4
|
+
// WB_BROWSER_VENDOR=local.
|
|
5
|
+
//
|
|
6
|
+
// Differences from cloud providers:
|
|
7
|
+
// 1. allocate() launches a real Chromium via `playwright-core`'s
|
|
8
|
+
// chromium.launch() and returns a pre-built Browser handle in
|
|
9
|
+
// `_browser`. The entry point checks for it and skips the
|
|
10
|
+
// connectOverCDP step that cloud providers require.
|
|
11
|
+
// 2. getLiveUrl() returns null — there's no public live-inspector URL
|
|
12
|
+
// for a locally-launched browser. The Rust side just renders the
|
|
13
|
+
// "session started" line without a clickable URL.
|
|
14
|
+
// 3. release() is a no-op. The shutdown path already does
|
|
15
|
+
// `info.browser.close()` on every cached session, which terminates
|
|
16
|
+
// the local Chromium process.
|
|
17
|
+
// 4. Profile binding is not supported (logged + ignored). For persistent
|
|
18
|
+
// auth across runs use a vendor with profile support, or pin
|
|
19
|
+
// WB_BROWSER_LOCAL_EXECUTABLE_PATH at a Chrome instance with a
|
|
20
|
+
// pre-warmed user-data-dir (advanced; not the supported path).
|
|
21
|
+
//
|
|
22
|
+
// Resume-after-pause: not supported. The Browser is process-local memory
|
|
23
|
+
// and dies with the sidecar; on resume the sidecar re-allocates a fresh
|
|
24
|
+
// session. This matches the dev-iteration use case (you're running the
|
|
25
|
+
// runbook end-to-end, not pausing on a real wait fence).
|
|
26
|
+
|
|
27
|
+
import { chromium } from "playwright-core";
|
|
28
|
+
import { log } from "../io.js";
|
|
29
|
+
|
|
30
|
+
// Truthiness for env knobs that default to ON. "0" / "false" / "no" / "off"
|
|
31
|
+
// disables; anything else enables. Mirrors the convention used elsewhere.
|
|
32
|
+
function isOff(v) {
|
|
33
|
+
if (v === undefined || v === null) return false;
|
|
34
|
+
const s = String(v).trim().toLowerCase();
|
|
35
|
+
return s === "0" || s === "false" || s === "no" || s === "off";
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function createLocalProvider() {
|
|
39
|
+
return {
|
|
40
|
+
name: "local",
|
|
41
|
+
|
|
42
|
+
async allocate({ profile, sessionName: _sessionName } = {}) {
|
|
43
|
+
if (profile) {
|
|
44
|
+
log(
|
|
45
|
+
`[local] profile="${profile}" ignored — local vendor has no profile binding. ` +
|
|
46
|
+
`Use a cloud vendor or persist auth via WB_BROWSER_LOCAL_EXECUTABLE_PATH on a pre-warmed Chrome.`,
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Headless ON by default; flip with WB_BROWSER_LOCAL_HEADLESS=0 for
|
|
51
|
+
// visible-window dev. Operators debugging a brittle workbook can flip
|
|
52
|
+
// to headed without touching the runbook.
|
|
53
|
+
const headless = !isOff(process.env.WB_BROWSER_LOCAL_HEADLESS);
|
|
54
|
+
|
|
55
|
+
// executablePath: explicit override for system Chrome / Chromium.
|
|
56
|
+
// channel: "chrome" / "msedge" / "chrome-beta" — Playwright's named
|
|
57
|
+
// channels for OS-installed browsers (no separate download). At most
|
|
58
|
+
// one of executablePath / channel should be set; if both arrive,
|
|
59
|
+
// executablePath wins (Playwright honors it).
|
|
60
|
+
const executablePath =
|
|
61
|
+
process.env.WB_BROWSER_LOCAL_EXECUTABLE_PATH || undefined;
|
|
62
|
+
const channel = process.env.WB_BROWSER_LOCAL_CHANNEL || undefined;
|
|
63
|
+
|
|
64
|
+
log(
|
|
65
|
+
`[local] launching chromium headless=${headless}` +
|
|
66
|
+
` executablePath=${executablePath ?? "<bundled>"}` +
|
|
67
|
+
` channel=${channel ?? "<none>"}`,
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
let browser;
|
|
71
|
+
try {
|
|
72
|
+
browser = await chromium.launch({
|
|
73
|
+
headless,
|
|
74
|
+
executablePath,
|
|
75
|
+
channel,
|
|
76
|
+
});
|
|
77
|
+
} catch (e) {
|
|
78
|
+
// Most common cause: Playwright's chromium binary not installed.
|
|
79
|
+
// playwright-core ships the API but no browser; the user runs
|
|
80
|
+
// `npx playwright install chromium` once to fetch it. Surface the
|
|
81
|
+
// hint inline so this isn't a guessing game on first run.
|
|
82
|
+
const err = new Error(
|
|
83
|
+
`local browser launch failed: ${e.message}\n` +
|
|
84
|
+
`Hint: install Chromium with \`npx playwright install chromium\`, ` +
|
|
85
|
+
`or set WB_BROWSER_LOCAL_EXECUTABLE_PATH / WB_BROWSER_LOCAL_CHANNEL to use a system browser.`,
|
|
86
|
+
);
|
|
87
|
+
err.code = "SESSION_ALLOCATE_FAILED";
|
|
88
|
+
throw err;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// sid is for telemetry only — there's no remote session to release.
|
|
92
|
+
// Format: `local-<ms>-<rand>` so it's distinguishable from vendor sids
|
|
93
|
+
// in callback streams and logs.
|
|
94
|
+
const sid = `local-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
sid,
|
|
98
|
+
// No CDP URL — the entry point sees `_browser` and skips the
|
|
99
|
+
// connectOverCDP path that cloud providers go through.
|
|
100
|
+
cdpUrl: null,
|
|
101
|
+
// Stashed so getLiveUrl() is a sync property read like browser-use.
|
|
102
|
+
_liveUrl: null,
|
|
103
|
+
_browser: browser,
|
|
104
|
+
};
|
|
105
|
+
},
|
|
106
|
+
|
|
107
|
+
async getLiveUrl(_allocated) {
|
|
108
|
+
// No public inspector URL for local Chromium. Returning null tells
|
|
109
|
+
// the Rust side to render the "session started" line without a link.
|
|
110
|
+
return null;
|
|
111
|
+
},
|
|
112
|
+
|
|
113
|
+
async release(_sid) {
|
|
114
|
+
// Browser teardown happens in the entry-point shutdown loop via
|
|
115
|
+
// `info.browser.close()`, which kills the local Chromium process.
|
|
116
|
+
// Cloud providers need a separate vendor REST call here; local
|
|
117
|
+
// doesn't.
|
|
118
|
+
},
|
|
119
|
+
};
|
|
120
|
+
}
|
package/lib/stub-page.js
CHANGED
|
@@ -73,6 +73,22 @@ export function createStubPage(opts = {}) {
|
|
|
73
73
|
record({ verb: "evaluate", script });
|
|
74
74
|
return evalResult;
|
|
75
75
|
},
|
|
76
|
+
async waitForLoadState(state, options) {
|
|
77
|
+
record({ verb: "waitForLoadState", state, options });
|
|
78
|
+
},
|
|
79
|
+
getByText(text, options) {
|
|
80
|
+
record({ verb: "getByText", text, options });
|
|
81
|
+
const locator = {
|
|
82
|
+
first() {
|
|
83
|
+
return {
|
|
84
|
+
async click(opts) {
|
|
85
|
+
record({ verb: "getByText.first.click", text, options: opts });
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
return locator;
|
|
91
|
+
},
|
|
76
92
|
};
|
|
77
93
|
}
|
|
78
94
|
|
package/lib/util.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
2
|
import { randomUUID } from "node:crypto";
|
|
3
|
+
import { existsSync } from "node:fs";
|
|
3
4
|
|
|
4
5
|
// Resolve `candidate` inside `dir`, rejecting traversal and absolute paths.
|
|
5
6
|
// Returns null when the resolved path escapes `dir` (or is `dir` itself).
|
|
@@ -14,6 +15,63 @@ export function resolveInside(dir, candidate) {
|
|
|
14
15
|
return resolved;
|
|
15
16
|
}
|
|
16
17
|
|
|
18
|
+
// Collision-safe path inside `dir`. Returns the first path of the form
|
|
19
|
+
// `<base><ext>`, `<base>-2<ext>`, `<base>-3<ext>`, ... that doesn't already
|
|
20
|
+
// exist on disk. Playwright's `download.saveAs(path)` blindly overwrites,
|
|
21
|
+
// so this is the only thing standing between two same-named downloads
|
|
22
|
+
// (e.g. two `report.pdf` saves in one session) silently clobbering each
|
|
23
|
+
// other. Returns null if `name` would resolve outside `dir`.
|
|
24
|
+
//
|
|
25
|
+
// The check is racy (two concurrent downloads with the same suggestedName
|
|
26
|
+
// can both observe the same free slot before either writes) — acceptable
|
|
27
|
+
// here because downloads in a single session serialize through the same
|
|
28
|
+
// page in practice, and a stray collision would just produce one
|
|
29
|
+
// overwritten file rather than corrupting state.
|
|
30
|
+
export function uniquePathInside(dir, name) {
|
|
31
|
+
const safe = sanitizeArtifactName(name);
|
|
32
|
+
const first = resolveInside(dir, safe);
|
|
33
|
+
if (!first) return null;
|
|
34
|
+
if (!existsSync(first)) return first;
|
|
35
|
+
const ext = path.extname(safe);
|
|
36
|
+
const base = ext ? safe.slice(0, -ext.length) : safe;
|
|
37
|
+
for (let n = 2; n < 1000; n++) {
|
|
38
|
+
const candidate = resolveInside(dir, `${base}-${n}${ext}`);
|
|
39
|
+
if (!candidate) return null;
|
|
40
|
+
if (!existsSync(candidate)) return candidate;
|
|
41
|
+
}
|
|
42
|
+
// Fallback: append a random suffix. 1000 collisions on the same name in
|
|
43
|
+
// one session is unrealistic, but we'd rather degrade than throw.
|
|
44
|
+
const rand = randomUUID().slice(0, 8);
|
|
45
|
+
return resolveInside(dir, `${base}-${rand}${ext}`);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Parse a comma-separated extension allowlist from raw env (e.g.
|
|
49
|
+
// "pdf, xlsx,CSV"). Returns a Set of lowercase extensions without leading
|
|
50
|
+
// dots, or null when the input is empty/unset (callers treat null as "no
|
|
51
|
+
// filter — capture everything").
|
|
52
|
+
export function parseExtensionAllowlist(raw) {
|
|
53
|
+
if (raw == null) return null;
|
|
54
|
+
const s = String(raw).trim();
|
|
55
|
+
if (!s) return null;
|
|
56
|
+
const parts = s
|
|
57
|
+
.split(",")
|
|
58
|
+
.map((x) => x.trim().toLowerCase().replace(/^\./, ""))
|
|
59
|
+
.filter(Boolean);
|
|
60
|
+
if (parts.length === 0) return null;
|
|
61
|
+
return new Set(parts);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Match a filename against an extension allowlist. `null` allowlist means
|
|
65
|
+
// no filter (anything passes). Files with no extension never pass a
|
|
66
|
+
// non-null allowlist — the caller wanted a specific set, an unknown blob
|
|
67
|
+
// isn't it.
|
|
68
|
+
export function extensionAllowed(filename, allowlist) {
|
|
69
|
+
if (!allowlist) return true;
|
|
70
|
+
const ext = path.extname(String(filename || "")).toLowerCase().replace(/^\./, "");
|
|
71
|
+
if (!ext) return false;
|
|
72
|
+
return allowlist.has(ext);
|
|
73
|
+
}
|
|
74
|
+
|
|
17
75
|
export function sanitizeArtifactName(s) {
|
|
18
76
|
// Keep author-chosen names readable but safe as filenames. Drop anything
|
|
19
77
|
// that could escape the artifacts dir (slashes, NULs, etc.).
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wb-browser-runtime",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.13.0",
|
|
4
4
|
"description": "Browser sidecar runtime for wb — Playwright over CDP (Browserbase, browser-use) via the wb-sidecar/1 line-framed JSON protocol.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"wb-browser-runtime": "bin/wb-browser-runtime.js"
|
package/verbs/click.js
CHANGED
|
@@ -2,7 +2,29 @@ export default {
|
|
|
2
2
|
name: "click",
|
|
3
3
|
primaryKey: "selector",
|
|
4
4
|
async execute(page, args) {
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
const timeout = args.timeout ?? 10_000;
|
|
6
|
+
try {
|
|
7
|
+
await page.click(args.selector, { timeout });
|
|
8
|
+
return `${args.selector}`;
|
|
9
|
+
} catch (err) {
|
|
10
|
+
// Text-fallback: when the selector times out (typically a brittle
|
|
11
|
+
// class/id rename), retry against visible text. We DELIBERATELY
|
|
12
|
+
// re-throw the ORIGINAL error if the fallback also fails — the
|
|
13
|
+
// selector failure is the actionable signal for error classification
|
|
14
|
+
// upstream; the fallback's failure would obscure it.
|
|
15
|
+
const isTimeout = err && err.name === "TimeoutError";
|
|
16
|
+
if (isTimeout && args.text_fallback) {
|
|
17
|
+
try {
|
|
18
|
+
await page
|
|
19
|
+
.getByText(args.text_fallback, { exact: false })
|
|
20
|
+
.first()
|
|
21
|
+
.click({ timeout });
|
|
22
|
+
return `${args.selector} (via text="${args.text_fallback}")`;
|
|
23
|
+
} catch {
|
|
24
|
+
throw err;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
throw err;
|
|
28
|
+
}
|
|
7
29
|
},
|
|
8
30
|
};
|
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
// download — explicit "click and capture" verb.
|
|
2
|
+
//
|
|
3
|
+
// The passive listener in lib/download-capture.js already saves any file the
|
|
4
|
+
// browser downloads, but it has no say over the filename and announces a
|
|
5
|
+
// `slice.artifact_saved` frame asynchronously after `saveAs` resolves. Some
|
|
6
|
+
// runbooks want stronger guarantees:
|
|
7
|
+
// - "the file lands at exactly $WB_ARTIFACTS_DIR/<path>"
|
|
8
|
+
// - "if it doesn't appear within ~10s, fail the slice with diagnostics"
|
|
9
|
+
// - works for SPAs that build the file in-page via fetch/XHR + Blob and
|
|
10
|
+
// don't always trip Playwright's `download` event reliably
|
|
11
|
+
//
|
|
12
|
+
// This verb installs capture hooks BEFORE clicking, races
|
|
13
|
+
// `page.waitForEvent("download")` against an in-page blob/anchor capture
|
|
14
|
+
// hook, and either saves the bytes itself (blob path) or hands the
|
|
15
|
+
// Playwright Download to `saveAs` (download path). Whichever path wins,
|
|
16
|
+
// the verb sets HANDLED_MARK on the Download (when applicable) so the
|
|
17
|
+
// passive listener doesn't double-save.
|
|
18
|
+
|
|
19
|
+
import path from "node:path";
|
|
20
|
+
import { Buffer } from "node:buffer";
|
|
21
|
+
import { promises as fsPromises } from "node:fs";
|
|
22
|
+
import { send } from "../lib/io.js";
|
|
23
|
+
import {
|
|
24
|
+
uniquePathInside,
|
|
25
|
+
parseExtensionAllowlist,
|
|
26
|
+
extensionAllowed,
|
|
27
|
+
} from "../lib/util.js";
|
|
28
|
+
import { HANDLED_MARK } from "../lib/download-capture.js";
|
|
29
|
+
|
|
30
|
+
const DEFAULT_TIMEOUT_MS = 10_000;
|
|
31
|
+
const POLL_INTERVAL_MS = 50;
|
|
32
|
+
const FALLBACK_NAME = "download.bin";
|
|
33
|
+
|
|
34
|
+
// Page-side hook that traps blob/data-URL anchor clicks the SPA performs
|
|
35
|
+
// programmatically — `URL.createObjectURL(blob)` + `<a download>` + `.click()`.
|
|
36
|
+
// Playwright's own `download` event normally catches these, but a handful
|
|
37
|
+
// of SPAs trigger downloads via `window.open(blobUrl)` or
|
|
38
|
+
// `window.location = blobUrl` which slip past. The hook re-fetches the blob
|
|
39
|
+
// in-page, base64-encodes the bytes, and stashes them on
|
|
40
|
+
// `window.__wbDownload` for the Node side to poll.
|
|
41
|
+
//
|
|
42
|
+
// Idempotent: re-installing on each verb invocation is a no-op after the
|
|
43
|
+
// first. We never uninstall — leaves the page in a slightly altered state
|
|
44
|
+
// but the wrapped click is functionally equivalent to the original.
|
|
45
|
+
const PAGE_HOOK = `(() => {
|
|
46
|
+
if (window.__wbDownloadInstalled) return;
|
|
47
|
+
window.__wbDownloadInstalled = true;
|
|
48
|
+
window.__wbDownload = null;
|
|
49
|
+
|
|
50
|
+
const captureBlob = async (target, filename, mime) => {
|
|
51
|
+
try {
|
|
52
|
+
let blob;
|
|
53
|
+
if (typeof target === "string") {
|
|
54
|
+
const resp = await fetch(target);
|
|
55
|
+
blob = await resp.blob();
|
|
56
|
+
} else {
|
|
57
|
+
blob = target;
|
|
58
|
+
}
|
|
59
|
+
const buf = await blob.arrayBuffer();
|
|
60
|
+
const bin = new Uint8Array(buf);
|
|
61
|
+
let s = "";
|
|
62
|
+
for (let i = 0; i < bin.length; i++) s += String.fromCharCode(bin[i]);
|
|
63
|
+
window.__wbDownload = {
|
|
64
|
+
filename: filename || "download.bin",
|
|
65
|
+
bytes: btoa(s),
|
|
66
|
+
mimeType: mime || blob.type || "application/octet-stream",
|
|
67
|
+
};
|
|
68
|
+
} catch (e) {
|
|
69
|
+
window.__wbDownload = { error: String((e && e.message) || e) };
|
|
70
|
+
}
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
const origClick = HTMLAnchorElement.prototype.click;
|
|
74
|
+
HTMLAnchorElement.prototype.click = function () {
|
|
75
|
+
try {
|
|
76
|
+
const href = this.getAttribute("href") || this.href || "";
|
|
77
|
+
const hasDownload = this.hasAttribute("download");
|
|
78
|
+
if (hasDownload && (href.startsWith("blob:") || href.startsWith("data:"))) {
|
|
79
|
+
const fname = this.getAttribute("download") || this.download || "";
|
|
80
|
+
captureBlob(href, fname);
|
|
81
|
+
}
|
|
82
|
+
} catch {}
|
|
83
|
+
return origClick.apply(this, arguments);
|
|
84
|
+
};
|
|
85
|
+
})()`;
|
|
86
|
+
|
|
87
|
+
// Read-and-clear of `window.__wbDownload`. Returning the value AND nulling
|
|
88
|
+
// it lets the page hook capture multiple downloads across separate verb
|
|
89
|
+
// calls without leaking state from a prior call into the next poll.
|
|
90
|
+
const POLL_SCRIPT = `(() => {
|
|
91
|
+
const v = window.__wbDownload;
|
|
92
|
+
window.__wbDownload = null;
|
|
93
|
+
return v;
|
|
94
|
+
})()`;
|
|
95
|
+
|
|
96
|
+
export default {
|
|
97
|
+
name: "download",
|
|
98
|
+
primaryKey: "selector",
|
|
99
|
+
async execute(page, args, ctx) {
|
|
100
|
+
const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim();
|
|
101
|
+
if (!artifactsDir) {
|
|
102
|
+
throw new Error(
|
|
103
|
+
"download: $WB_ARTIFACTS_DIR is not set — run this workbook via `wb run` (wb exports the dir for you)",
|
|
104
|
+
);
|
|
105
|
+
}
|
|
106
|
+
if (!args.selector) {
|
|
107
|
+
throw new Error("download: `selector` is required");
|
|
108
|
+
}
|
|
109
|
+
const timeout = args.timeout ?? DEFAULT_TIMEOUT_MS;
|
|
110
|
+
const explicitPath =
|
|
111
|
+
typeof args.path === "string" && args.path.trim()
|
|
112
|
+
? args.path.trim()
|
|
113
|
+
: null;
|
|
114
|
+
const allowlist = parseExtensionAllowlist(
|
|
115
|
+
process.env.WB_BROWSER_DOWNLOAD_EXTENSIONS,
|
|
116
|
+
);
|
|
117
|
+
|
|
118
|
+
// 1) Inject the page-side blob/anchor capture hook BEFORE the click so a
|
|
119
|
+
// synchronously-dispatched anchor.click() inside the SPA's handler is
|
|
120
|
+
// observed. Best-effort: a frame mid-navigation can reject evaluate;
|
|
121
|
+
// the Playwright `download` event still works and is the primary
|
|
122
|
+
// signal anyway.
|
|
123
|
+
try {
|
|
124
|
+
await page.evaluate(PAGE_HOOK);
|
|
125
|
+
} catch {}
|
|
126
|
+
|
|
127
|
+
// 2) Claim ownership of the next download synchronously — prepended to
|
|
128
|
+
// BrowserContext listeners so it runs before lib/download-capture.js's
|
|
129
|
+
// passive listener has a chance to start its async capture chain. The
|
|
130
|
+
// HANDLED_MARK tells the passive listener to bail.
|
|
131
|
+
const claim = (download) => {
|
|
132
|
+
try {
|
|
133
|
+
download[HANDLED_MARK] = true;
|
|
134
|
+
} catch {}
|
|
135
|
+
};
|
|
136
|
+
const browserContext = safeContext(page);
|
|
137
|
+
let attached = false;
|
|
138
|
+
if (browserContext) {
|
|
139
|
+
if (typeof browserContext.prependListener === "function") {
|
|
140
|
+
browserContext.prependListener("download", claim);
|
|
141
|
+
attached = true;
|
|
142
|
+
} else if (typeof browserContext.on === "function") {
|
|
143
|
+
// Fallback: append. Race window is tiny (passive listener checks
|
|
144
|
+
// HANDLED_MARK before its first await), but ordering isn't
|
|
145
|
+
// guaranteed without prependListener.
|
|
146
|
+
browserContext.on("download", claim);
|
|
147
|
+
attached = true;
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
try {
|
|
152
|
+
// 3) Race the two capture sources against the click. The download event
|
|
153
|
+
// AND the click run concurrently — Playwright's standard pattern,
|
|
154
|
+
// since the click can resolve before or after the download fires.
|
|
155
|
+
const downloadPromise = page
|
|
156
|
+
.waitForEvent("download", { timeout })
|
|
157
|
+
.then((d) => ({ kind: "playwright", download: d }))
|
|
158
|
+
.catch((e) => ({ kind: "playwright_failed", error: e }));
|
|
159
|
+
|
|
160
|
+
const blobPromise = pollForBlob(page, timeout);
|
|
161
|
+
|
|
162
|
+
let clickError = null;
|
|
163
|
+
const clickPromise = (async () => {
|
|
164
|
+
try {
|
|
165
|
+
await page.click(args.selector, { timeout });
|
|
166
|
+
} catch (err) {
|
|
167
|
+
const isTimeout = err && err.name === "TimeoutError";
|
|
168
|
+
if (isTimeout && args.text_fallback) {
|
|
169
|
+
try {
|
|
170
|
+
await page
|
|
171
|
+
.getByText(args.text_fallback, { exact: false })
|
|
172
|
+
.first()
|
|
173
|
+
.click({ timeout });
|
|
174
|
+
return;
|
|
175
|
+
} catch {
|
|
176
|
+
clickError = err;
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
clickError = err;
|
|
181
|
+
}
|
|
182
|
+
})();
|
|
183
|
+
|
|
184
|
+
const winner = await raceCaptures(downloadPromise, blobPromise);
|
|
185
|
+
// Wait for the click to settle so we surface its error (if any) over
|
|
186
|
+
// a generic "no file captured" — a click that never landed is the
|
|
187
|
+
// more actionable failure.
|
|
188
|
+
await clickPromise;
|
|
189
|
+
if (clickError) throw clickError;
|
|
190
|
+
|
|
191
|
+
if (winner.success && winner.kind === "playwright") {
|
|
192
|
+
return await savePlaywrightDownload({
|
|
193
|
+
download: winner.download,
|
|
194
|
+
artifactsDir,
|
|
195
|
+
allowlist,
|
|
196
|
+
explicitPath,
|
|
197
|
+
page,
|
|
198
|
+
ctx,
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
if (winner.success && winner.kind === "blob") {
|
|
202
|
+
return await saveBlobDownload({
|
|
203
|
+
blob: winner.blob,
|
|
204
|
+
artifactsDir,
|
|
205
|
+
allowlist,
|
|
206
|
+
explicitPath,
|
|
207
|
+
page,
|
|
208
|
+
ctx,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
// No capture won — emit structured failure diagnostics.
|
|
213
|
+
const reasons = winner.failures
|
|
214
|
+
.map((f) => {
|
|
215
|
+
if (f.kind === "playwright_failed") {
|
|
216
|
+
return `playwright download: ${f.error?.message || f.error}`;
|
|
217
|
+
}
|
|
218
|
+
if (f.kind === "blob_failed") return `blob hook: ${f.error}`;
|
|
219
|
+
if (f.kind === "blob_timeout") return `blob hook: no capture within ${timeout}ms`;
|
|
220
|
+
return f.kind;
|
|
221
|
+
})
|
|
222
|
+
.join("; ");
|
|
223
|
+
send({
|
|
224
|
+
type: "slice.download_failed",
|
|
225
|
+
verb: "download",
|
|
226
|
+
verb_index: ctx?.index ?? null,
|
|
227
|
+
selector: args.selector,
|
|
228
|
+
timeout_ms: timeout,
|
|
229
|
+
page_url: safePageUrl(page),
|
|
230
|
+
reason: reasons,
|
|
231
|
+
});
|
|
232
|
+
throw new Error(
|
|
233
|
+
`download: no file captured within ${timeout}ms after clicking ${args.selector} (page=${safePageUrl(page) || "?"}). ${reasons}`,
|
|
234
|
+
);
|
|
235
|
+
} finally {
|
|
236
|
+
if (attached && browserContext && typeof browserContext.off === "function") {
|
|
237
|
+
try {
|
|
238
|
+
browserContext.off("download", claim);
|
|
239
|
+
} catch {}
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
},
|
|
243
|
+
};
|
|
244
|
+
|
|
245
|
+
async function savePlaywrightDownload({
|
|
246
|
+
download,
|
|
247
|
+
artifactsDir,
|
|
248
|
+
allowlist,
|
|
249
|
+
explicitPath,
|
|
250
|
+
page,
|
|
251
|
+
ctx,
|
|
252
|
+
}) {
|
|
253
|
+
const suggested = explicitPath || safeSuggestedFilename(download);
|
|
254
|
+
const sourceUrl = safeUrl(download);
|
|
255
|
+
if (!extensionAllowed(suggested, allowlist)) {
|
|
256
|
+
try {
|
|
257
|
+
await download.cancel();
|
|
258
|
+
} catch {}
|
|
259
|
+
throw new Error(
|
|
260
|
+
`download: file "${suggested}" rejected by WB_BROWSER_DOWNLOAD_EXTENSIONS`,
|
|
261
|
+
);
|
|
262
|
+
}
|
|
263
|
+
const target = uniquePathInside(artifactsDir, suggested);
|
|
264
|
+
if (!target) {
|
|
265
|
+
throw new Error(
|
|
266
|
+
`download: refusing to save "${suggested}" — resolves outside $WB_ARTIFACTS_DIR`,
|
|
267
|
+
);
|
|
268
|
+
}
|
|
269
|
+
await fsPromises.mkdir(artifactsDir, { recursive: true });
|
|
270
|
+
await download.saveAs(target);
|
|
271
|
+
let bytes = null;
|
|
272
|
+
try {
|
|
273
|
+
bytes = (await fsPromises.stat(target)).size;
|
|
274
|
+
} catch {}
|
|
275
|
+
send({
|
|
276
|
+
type: "slice.artifact_saved",
|
|
277
|
+
filename: path.basename(target),
|
|
278
|
+
path: target,
|
|
279
|
+
bytes,
|
|
280
|
+
source: "download",
|
|
281
|
+
provenance: {
|
|
282
|
+
url: sourceUrl,
|
|
283
|
+
suggested_filename: suggested,
|
|
284
|
+
page_url: safePageUrl(page),
|
|
285
|
+
verb_index: ctx?.index ?? null,
|
|
286
|
+
verb_name: "download",
|
|
287
|
+
ts: Date.now(),
|
|
288
|
+
},
|
|
289
|
+
});
|
|
290
|
+
return `→ ${path.basename(target)}`;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
async function saveBlobDownload({
|
|
294
|
+
blob,
|
|
295
|
+
artifactsDir,
|
|
296
|
+
allowlist,
|
|
297
|
+
explicitPath,
|
|
298
|
+
page,
|
|
299
|
+
ctx,
|
|
300
|
+
}) {
|
|
301
|
+
const suggested = explicitPath || blob.filename || FALLBACK_NAME;
|
|
302
|
+
if (!extensionAllowed(suggested, allowlist)) {
|
|
303
|
+
throw new Error(
|
|
304
|
+
`download: file "${suggested}" rejected by WB_BROWSER_DOWNLOAD_EXTENSIONS`,
|
|
305
|
+
);
|
|
306
|
+
}
|
|
307
|
+
const target = uniquePathInside(artifactsDir, suggested);
|
|
308
|
+
if (!target) {
|
|
309
|
+
throw new Error(
|
|
310
|
+
`download: refusing to save "${suggested}" — resolves outside $WB_ARTIFACTS_DIR`,
|
|
311
|
+
);
|
|
312
|
+
}
|
|
313
|
+
const buf = Buffer.from(blob.bytes, "base64");
|
|
314
|
+
await fsPromises.mkdir(artifactsDir, { recursive: true });
|
|
315
|
+
await fsPromises.writeFile(target, buf);
|
|
316
|
+
send({
|
|
317
|
+
type: "slice.artifact_saved",
|
|
318
|
+
filename: path.basename(target),
|
|
319
|
+
path: target,
|
|
320
|
+
bytes: buf.length,
|
|
321
|
+
source: "download",
|
|
322
|
+
provenance: {
|
|
323
|
+
url: null,
|
|
324
|
+
suggested_filename: suggested,
|
|
325
|
+
page_url: safePageUrl(page),
|
|
326
|
+
verb_index: ctx?.index ?? null,
|
|
327
|
+
verb_name: "download",
|
|
328
|
+
mime_type: blob.mimeType || null,
|
|
329
|
+
capture: "blob",
|
|
330
|
+
ts: Date.now(),
|
|
331
|
+
},
|
|
332
|
+
});
|
|
333
|
+
return `→ ${path.basename(target)}`;
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
// Race two capture promises. First to report success wins. Both must report
|
|
337
|
+
// before we declare failure, so the diagnostics frame can list every reason
|
|
338
|
+
// the verb didn't see a file. (Promise.race would shortcut on a fast failure
|
|
339
|
+
// and discard the slower success.)
|
|
340
|
+
function raceCaptures(downloadPromise, blobPromise) {
|
|
341
|
+
return new Promise((resolve) => {
|
|
342
|
+
let outstanding = 2;
|
|
343
|
+
const failures = [];
|
|
344
|
+
const finish = (settled) => {
|
|
345
|
+
if (settled.success) {
|
|
346
|
+
resolve(settled);
|
|
347
|
+
return;
|
|
348
|
+
}
|
|
349
|
+
failures.push(settled);
|
|
350
|
+
if (--outstanding === 0) resolve({ success: false, failures });
|
|
351
|
+
};
|
|
352
|
+
downloadPromise.then((r) => {
|
|
353
|
+
if (r.kind === "playwright") finish({ success: true, ...r });
|
|
354
|
+
else finish({ success: false, ...r });
|
|
355
|
+
});
|
|
356
|
+
blobPromise.then((r) => {
|
|
357
|
+
if (r.kind === "blob") finish({ success: true, ...r });
|
|
358
|
+
else finish({ success: false, ...r });
|
|
359
|
+
});
|
|
360
|
+
});
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
async function pollForBlob(page, timeoutMs) {
|
|
364
|
+
const deadline = Date.now() + timeoutMs;
|
|
365
|
+
while (true) {
|
|
366
|
+
let result;
|
|
367
|
+
try {
|
|
368
|
+
result = await page.evaluate(POLL_SCRIPT);
|
|
369
|
+
} catch {
|
|
370
|
+
result = null;
|
|
371
|
+
}
|
|
372
|
+
if (result && result.bytes) return { kind: "blob", blob: result };
|
|
373
|
+
if (result && result.error) return { kind: "blob_failed", error: result.error };
|
|
374
|
+
if (Date.now() >= deadline) return { kind: "blob_timeout" };
|
|
375
|
+
await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS));
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
function safePageUrl(page) {
|
|
380
|
+
try {
|
|
381
|
+
return page.url();
|
|
382
|
+
} catch {
|
|
383
|
+
return null;
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
function safeContext(page) {
|
|
388
|
+
try {
|
|
389
|
+
return page.context();
|
|
390
|
+
} catch {
|
|
391
|
+
return null;
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
function safeSuggestedFilename(download) {
|
|
396
|
+
try {
|
|
397
|
+
const s = download.suggestedFilename();
|
|
398
|
+
return s && s.trim() ? s : FALLBACK_NAME;
|
|
399
|
+
} catch {
|
|
400
|
+
return FALLBACK_NAME;
|
|
401
|
+
}
|
|
402
|
+
}
|
|
403
|
+
|
|
404
|
+
function safeUrl(download) {
|
|
405
|
+
try {
|
|
406
|
+
return download.url();
|
|
407
|
+
} catch {
|
|
408
|
+
return null;
|
|
409
|
+
}
|
|
410
|
+
}
|
package/verbs/index.js
CHANGED
|
@@ -13,6 +13,7 @@ import fillVerb from "./fill.js";
|
|
|
13
13
|
import clickVerb from "./click.js";
|
|
14
14
|
import pressVerb from "./press.js";
|
|
15
15
|
import waitForVerb from "./wait_for.js";
|
|
16
|
+
import waitForNetworkIdleVerb from "./wait_for_network_idle.js";
|
|
16
17
|
import screenshotVerb from "./screenshot.js";
|
|
17
18
|
import extractVerb from "./extract.js";
|
|
18
19
|
import assertVerb from "./assert.js";
|
|
@@ -21,6 +22,7 @@ import saveVerb from "./save.js";
|
|
|
21
22
|
import pauseForHumanVerb from "./pause_for_human.js";
|
|
22
23
|
import waitForDropVerb from "./wait_for_drop.js";
|
|
23
24
|
import announceArtifactVerb from "./announce_artifact.js";
|
|
25
|
+
import downloadVerb from "./download.js";
|
|
24
26
|
|
|
25
27
|
const VERBS = [
|
|
26
28
|
gotoVerb,
|
|
@@ -28,6 +30,7 @@ const VERBS = [
|
|
|
28
30
|
clickVerb,
|
|
29
31
|
pressVerb,
|
|
30
32
|
waitForVerb,
|
|
33
|
+
waitForNetworkIdleVerb,
|
|
31
34
|
screenshotVerb,
|
|
32
35
|
extractVerb,
|
|
33
36
|
assertVerb,
|
|
@@ -36,6 +39,7 @@ const VERBS = [
|
|
|
36
39
|
pauseForHumanVerb,
|
|
37
40
|
waitForDropVerb,
|
|
38
41
|
announceArtifactVerb,
|
|
42
|
+
downloadVerb,
|
|
39
43
|
];
|
|
40
44
|
|
|
41
45
|
export const VERB_REGISTRY = Object.fromEntries(VERBS.map((v) => [v.name, v]));
|