wb-browser-runtime 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -2
- package/bin/wb-browser-runtime.js +55 -2
- package/lib/download-capture.js +180 -0
- package/lib/failure.js +99 -0
- package/lib/providers/browser-use.js +9 -2
- package/lib/providers/browserbase.js +12 -2
- package/lib/providers/index.js +6 -2
- package/lib/providers/local.js +120 -0
- package/lib/stub-page.js +16 -0
- package/lib/util.js +58 -0
- package/package.json +1 -1
- package/verbs/click.js +24 -2
- package/verbs/download.js +410 -0
- package/verbs/index.js +4 -0
- package/verbs/wait_for_network_idle.js +51 -0
package/README.md
CHANGED
|
@@ -33,8 +33,8 @@ specific run.
|
|
|
33
33
|
|
|
34
34
|
## Vendor selection
|
|
35
35
|
|
|
36
|
-
`WB_BROWSER_VENDOR` — `browserbase` (default)
|
|
37
|
-
at sidecar boot; there is no per-slice override.
|
|
36
|
+
`WB_BROWSER_VENDOR` — `browserbase` (default), `browser-use`, or `local`.
|
|
37
|
+
Resolved once at sidecar boot; there is no per-slice override.
|
|
38
38
|
|
|
39
39
|
### Browserbase (default)
|
|
40
40
|
|
|
@@ -55,6 +55,40 @@ Profile (auth state) is selected per-runbook via the `profile_id:` field on a
|
|
|
55
55
|
default when the browser block omits `profile_id:`; a per-runbook `profile_id:`
|
|
56
56
|
always wins over the env var.
|
|
57
57
|
|
|
58
|
+
### local
|
|
59
|
+
|
|
60
|
+
`WB_BROWSER_VENDOR=local` drives a host-installed Playwright Chromium directly
|
|
61
|
+
— no API keys, no network calls, no per-session cost. Use for dev iteration
|
|
62
|
+
when you'd otherwise burn vendor minutes on broken selectors.
|
|
63
|
+
|
|
64
|
+
| Env var | Default | Purpose |
|
|
65
|
+
|--------------------------------------|--------------|------------------------------------------------------|
|
|
66
|
+
| `WB_BROWSER_LOCAL_HEADLESS` | `1` | Set `0`/`false` for a visible browser window. |
|
|
67
|
+
| `WB_BROWSER_LOCAL_EXECUTABLE_PATH` | *(unset)* | Absolute path to a Chrome/Chromium binary. Overrides Playwright's bundled download. |
|
|
68
|
+
| `WB_BROWSER_LOCAL_CHANNEL` | *(unset)* | Playwright channel name (`chrome`, `msedge`, `chrome-beta`, ...) for an OS-installed browser. Mutually exclusive with `EXECUTABLE_PATH`. |
|
|
69
|
+
|
|
70
|
+
Trade-offs vs cloud vendors:
|
|
71
|
+
|
|
72
|
+
- **No live URL.** `slice.session_started.live_url` is `null` — no remote
|
|
73
|
+
inspector, no Loom-style live preview. Use a non-headless run with
|
|
74
|
+
`WB_BROWSER_LOCAL_HEADLESS=0` if you want to watch.
|
|
75
|
+
- **No persistent profile.** Each run starts with a clean Chromium state.
|
|
76
|
+
Cloud-side "profile" features (auth-state binding) aren't available.
|
|
77
|
+
- **No resume after pause.** If a workbook hits a `wait` fence that suspends
|
|
78
|
+
the sidecar, the in-process Chromium dies with it. On `wb resume` the
|
|
79
|
+
local provider re-allocates a fresh browser. Cloud vendors can keep the
|
|
80
|
+
session alive for resume.
|
|
81
|
+
|
|
82
|
+
First-time install:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
npx playwright install chromium
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
If you skip this, `allocate()` fails with a hint pointing back at the
|
|
89
|
+
install command. Or set `WB_BROWSER_LOCAL_EXECUTABLE_PATH` /
|
|
90
|
+
`WB_BROWSER_LOCAL_CHANNEL` to use a system browser without the download.
|
|
91
|
+
|
|
58
92
|
## Profiles
|
|
59
93
|
|
|
60
94
|
Some vendors expose persistent browser profiles — cookies, localStorage, saved
|
|
@@ -174,6 +208,7 @@ example, see the `browserbase-hn-upvoted-probe` runbook in the xatabase repo.
|
|
|
174
208
|
| `assert` | `assert: <selector>` | `selector`, `text_contains`, `url_contains` |
|
|
175
209
|
| `eval` | `eval: <js>` | `script` |
|
|
176
210
|
| `save` | `save: <name>` | `name`, `value` (captures prior `extract`/`eval` when omitted) |
|
|
211
|
+
| `download` | `download: <selector>` | `selector`, `path`, `timeout`, `text_fallback` (clicks + races Playwright `download` event with in-page blob/anchor capture; saves into `$WB_ARTIFACTS_DIR/<path>`) |
|
|
177
212
|
|
|
178
213
|
`extract`'s `fields` entries are either a CSS selector string (returns
|
|
179
214
|
`textContent`), or `{ selector, attr }` to read an attribute.
|
|
@@ -212,6 +247,70 @@ When `WB_ARTIFACTS_UPLOAD_URL` is set (template supports `{run_id}` and
|
|
|
212
247
|
produced it completes. Auth reuses `WB_RECORDING_UPLOAD_SECRET`
|
|
213
248
|
(`Authorization: Bearer <…>`); failures are logged and non-fatal.
|
|
214
249
|
|
|
250
|
+
### Auto-captured downloads
|
|
251
|
+
|
|
252
|
+
The sidecar attaches a context-level `download` listener at session
|
|
253
|
+
start, so any file the browser downloads — clicked attachments, redirect
|
|
254
|
+
chains that end in a binary, popup-driven Save As — is saved to
|
|
255
|
+
`$WB_ARTIFACTS_DIR` automatically and emitted as a `slice.artifact_saved`
|
|
256
|
+
frame (with `source: "download"` and a `provenance` block: source URL,
|
|
257
|
+
page URL, the verb that was running, suggested filename). No verb call
|
|
258
|
+
required. For cloud-provider browsers, Playwright streams the bytes back
|
|
259
|
+
over CDP, so the file always lands on the sidecar machine where the
|
|
260
|
+
artifacts dir + uploader live.
|
|
261
|
+
|
|
262
|
+
Filename collisions in a single session get `-2`, `-3`, … suffixed
|
|
263
|
+
(Playwright's `download.saveAs()` blindly overwrites, so we apply the
|
|
264
|
+
suffixing ourselves).
|
|
265
|
+
|
|
266
|
+
There is no size cap — `download.saveAs()` only resolves once the bytes
|
|
267
|
+
are fully streamed, so a hung download trips the cell's own timeout
|
|
268
|
+
(default 120s slice deadline; bump via `WB_SLICE_DEADLINE_MS`) and
|
|
269
|
+
surfaces as a normal cell failure.
|
|
270
|
+
|
|
271
|
+
To filter, set `WB_BROWSER_DOWNLOAD_EXTENSIONS` to a comma-separated
|
|
272
|
+
list (case-insensitive, leading dots ignored):
|
|
273
|
+
|
|
274
|
+
```yaml
|
|
275
|
+
env:
|
|
276
|
+
WB_BROWSER_DOWNLOAD_EXTENSIONS: pdf,xlsx,csv,docx
|
|
277
|
+
```
|
|
278
|
+
|
|
279
|
+
When an allowlist is set, non-matching downloads are cancelled and
|
|
280
|
+
emitted as `slice.download_skipped` (with `reason:
|
|
281
|
+
"extension_not_in_allowlist"`) so the operator sees what was discarded.
|
|
282
|
+
Unset = capture everything.
|
|
283
|
+
|
|
284
|
+
### Explicit `download:` verb
|
|
285
|
+
|
|
286
|
+
The passive listener handles "any file the browser saves" but gives the
|
|
287
|
+
runbook no control over the filename or timing. Use the `download:` verb
|
|
288
|
+
when the runbook needs to click a specific button, save the result at a
|
|
289
|
+
specific path, and fail loudly within ~10s if no file appears:
|
|
290
|
+
|
|
291
|
+
```yaml
|
|
292
|
+
- download:
|
|
293
|
+
selector: 'button:has-text("Download as xlsx")'
|
|
294
|
+
path: pilot-profit-loss.xlsx # written to $WB_ARTIFACTS_DIR/<path>
|
|
295
|
+
timeout: 10s # default
|
|
296
|
+
text_fallback: "Download as xlsx" # like click — fallback when selector is brittle
|
|
297
|
+
```
|
|
298
|
+
|
|
299
|
+
Behaviour:
|
|
300
|
+
|
|
301
|
+
- Installs a page-side blob/anchor capture hook **before** the click so a
|
|
302
|
+
synchronously-dispatched `URL.createObjectURL(blob) + <a download>.click()`
|
|
303
|
+
is observed even when Playwright's own `download` event misses it
|
|
304
|
+
(e.g. `window.location = blobUrl`).
|
|
305
|
+
- Races `page.waitForEvent("download")` against the in-page hook; whichever
|
|
306
|
+
fires first wins.
|
|
307
|
+
- Sets `HANDLED_MARK` on the `Download` so the always-on passive listener
|
|
308
|
+
doesn't double-save.
|
|
309
|
+
- Emits `slice.artifact_saved` with `source: "download"` and
|
|
310
|
+
`provenance.verb_name: "download"`.
|
|
311
|
+
- On timeout: throws with diagnostics (page URL, selector, both
|
|
312
|
+
failure reasons) AND emits a `slice.download_failed` frame.
|
|
313
|
+
|
|
215
314
|
## Protocol
|
|
216
315
|
|
|
217
316
|
Line-framed JSON, one message per line, on stdin/stdout. `stderr` is treated as
|
|
@@ -34,6 +34,12 @@ import {
|
|
|
34
34
|
loadRecordingConfig,
|
|
35
35
|
} from "../lib/recording-manager.js";
|
|
36
36
|
import { getProvider } from "../lib/providers/index.js";
|
|
37
|
+
import {
|
|
38
|
+
attachConsoleBuffer,
|
|
39
|
+
captureFailureDiagnostics,
|
|
40
|
+
classifyError,
|
|
41
|
+
} from "../lib/failure.js";
|
|
42
|
+
import { installDownloadCapture } from "../lib/download-capture.js";
|
|
37
43
|
import { SUPPORTS, runVerb, verbName } from "../verbs/index.js";
|
|
38
44
|
|
|
39
45
|
const VERSION = "0.8.0";
|
|
@@ -85,10 +91,22 @@ async function ensureSession(name, { profile, restoreSession } = {}) {
|
|
|
85
91
|
let browser = null;
|
|
86
92
|
try {
|
|
87
93
|
const liveUrl = allocated._liveUrl ?? (await provider.getLiveUrl(allocated));
|
|
88
|
-
|
|
94
|
+
// Local provider returns a pre-built Browser via `_browser` (no CDP
|
|
95
|
+
// round-trip — chromium is already launched in-process). Cloud
|
|
96
|
+
// providers return a `cdpUrl` we connect to. Restored sessions
|
|
97
|
+
// always reconnect via CDP.
|
|
98
|
+
browser =
|
|
99
|
+
allocated._browser ??
|
|
100
|
+
(await chromium.connectOverCDP(allocated.cdpUrl));
|
|
89
101
|
const tConnected = Date.now();
|
|
90
|
-
|
|
102
|
+
// acceptDownloads is true by default for Playwright-launched contexts,
|
|
103
|
+
// but we set it explicitly so the listener installed below isn't a
|
|
104
|
+
// no-op against a vendor-provided context that opted out.
|
|
105
|
+
const context =
|
|
106
|
+
browser.contexts()[0] ??
|
|
107
|
+
(await browser.newContext({ acceptDownloads: true }));
|
|
91
108
|
const page = context.pages()[0] ?? (await context.newPage());
|
|
109
|
+
const consoleBuffer = attachConsoleBuffer(page);
|
|
92
110
|
const tPageReady = Date.now();
|
|
93
111
|
|
|
94
112
|
const info = {
|
|
@@ -100,8 +118,17 @@ async function ensureSession(name, { profile, restoreSession } = {}) {
|
|
|
100
118
|
page,
|
|
101
119
|
liveUrl,
|
|
102
120
|
recording: null,
|
|
121
|
+
consoleBuffer,
|
|
122
|
+
// Updated by handleSlice's verb loop so the download listener
|
|
123
|
+
// can attach `verb_index`/`verb_name` provenance to artifacts
|
|
124
|
+
// captured while a verb is running. Null between slices.
|
|
125
|
+
currentVerb: null,
|
|
103
126
|
};
|
|
104
127
|
|
|
128
|
+
// Install the always-on download listener now, before any slice
|
|
129
|
+
// runs, so a download fired by the very first verb is captured.
|
|
130
|
+
installDownloadCapture(context, () => info.currentVerb);
|
|
131
|
+
|
|
105
132
|
send({
|
|
106
133
|
type: "slice.session_started",
|
|
107
134
|
session: name,
|
|
@@ -277,6 +304,7 @@ async function handleSlice(msg) {
|
|
|
277
304
|
} catch (e) {
|
|
278
305
|
send({
|
|
279
306
|
type: "slice.failed",
|
|
307
|
+
code: classifyError(e, "session"),
|
|
280
308
|
error: `session start failed: ${scrubSecrets(e.message, sliceCtx.secrets)}`,
|
|
281
309
|
});
|
|
282
310
|
return;
|
|
@@ -298,6 +326,7 @@ async function handleSlice(msg) {
|
|
|
298
326
|
if (Date.now() >= sliceDeadline) {
|
|
299
327
|
send({
|
|
300
328
|
type: "slice.failed",
|
|
329
|
+
code: "SLICE_TIMEOUT",
|
|
301
330
|
error: `slice exceeded deadline (${sliceDeadlineMs}ms); aborted before verb index ${i} of ${verbs.length}`,
|
|
302
331
|
});
|
|
303
332
|
return;
|
|
@@ -305,6 +334,12 @@ async function handleSlice(msg) {
|
|
|
305
334
|
const v = verbs[i];
|
|
306
335
|
const name = verbName(v);
|
|
307
336
|
const verbStart = Date.now();
|
|
337
|
+
// Tell the passive download listener which verb to blame for any
|
|
338
|
+
// download that fires during this iteration. Cleared in `finally`
|
|
339
|
+
// so a download arriving between verbs (rare, but possible during
|
|
340
|
+
// a settle/redirect) records as "no current verb" instead of
|
|
341
|
+
// sticking the previous one's name on it.
|
|
342
|
+
session.currentVerb = { index: i, name };
|
|
308
343
|
try {
|
|
309
344
|
const summary = await runVerb(session.page, v, i, sliceCtx, expand);
|
|
310
345
|
// Pause-sentinel escape hatch: a verb signals a mid-slice halt by
|
|
@@ -353,26 +388,44 @@ async function handleSlice(msg) {
|
|
|
353
388
|
} catch (e) {
|
|
354
389
|
const duration_ms = Date.now() - verbStart;
|
|
355
390
|
const clean = scrubSecrets(e.message, sliceCtx.secrets);
|
|
391
|
+
const code = classifyError(e, name);
|
|
392
|
+
const diagnostics = await captureFailureDiagnostics({
|
|
393
|
+
page: session.page,
|
|
394
|
+
artifactsDir: (process.env.WB_ARTIFACTS_DIR || "").trim() || null,
|
|
395
|
+
verbIndex: i,
|
|
396
|
+
consoleBuffer: session.consoleBuffer,
|
|
397
|
+
scrubSecrets,
|
|
398
|
+
secrets: sliceCtx.secrets,
|
|
399
|
+
});
|
|
356
400
|
send({
|
|
357
401
|
type: "verb.failed",
|
|
358
402
|
verb: name,
|
|
359
403
|
verb_index: i,
|
|
404
|
+
code,
|
|
360
405
|
error: clean,
|
|
361
406
|
duration_ms,
|
|
407
|
+
screenshot_path: diagnostics.screenshot_path,
|
|
408
|
+
console_tail: diagnostics.console_tail,
|
|
362
409
|
});
|
|
363
410
|
send({
|
|
364
411
|
type: "slice.failed",
|
|
412
|
+
code,
|
|
365
413
|
error: `verb ${name} (index ${i}): ${clean}`,
|
|
366
414
|
});
|
|
367
415
|
return;
|
|
368
416
|
}
|
|
369
417
|
}
|
|
418
|
+
// Slice ended cleanly — clear the listener's "currently running verb"
|
|
419
|
+
// pointer so a stray late-arriving download doesn't get stamped with
|
|
420
|
+
// the last verb's name.
|
|
421
|
+
session.currentVerb = null;
|
|
370
422
|
send({ type: "slice.complete" });
|
|
371
423
|
} catch (e) {
|
|
372
424
|
log(`[slice] unhandled: ${e.stack || e.message}`);
|
|
373
425
|
try {
|
|
374
426
|
send({
|
|
375
427
|
type: "slice.failed",
|
|
428
|
+
code: classifyError(e, "sidecar"),
|
|
376
429
|
error: `sidecar error: ${scrubSecrets(e.message, sliceCtx.secrets)}`,
|
|
377
430
|
});
|
|
378
431
|
} catch {}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
// download-capture — passive capture of any file the browser downloads
|
|
2
|
+
// during a session, regardless of which verb (or page redirect, or popup)
|
|
3
|
+
// triggered it.
|
|
4
|
+
//
|
|
5
|
+
// The runbook author doesn't have to predict downloads. We attach a
|
|
6
|
+
// `download` listener to the BrowserContext at session start; every file
|
|
7
|
+
// the browser saves lands in `$WB_ARTIFACTS_DIR` and gets announced via a
|
|
8
|
+
// `slice.artifact_saved` frame so wb's existing R2 uploader picks it up
|
|
9
|
+
// for free. Provenance (page URL, source URL, which verb was running, ts)
|
|
10
|
+
// rides along on the frame so the run-page event feed can show *why* a
|
|
11
|
+
// given file appeared.
|
|
12
|
+
//
|
|
13
|
+
// Filtering: if WB_BROWSER_DOWNLOAD_EXTENSIONS is set, only files whose
|
|
14
|
+
// extension matches the allowlist are kept. Skipped downloads still get a
|
|
15
|
+
// `slice.download_skipped` frame so the operator sees what was discarded
|
|
16
|
+
// (rare in practice — `download` events fire on real attachments, not
|
|
17
|
+
// inline analytics pings — but useful when a SPA emits noisy JSON blobs).
|
|
18
|
+
//
|
|
19
|
+
// Big files: there is no size cap. R2 is bottomless and the runbook's own
|
|
20
|
+
// timeout governs hung downloads — `download.saveAs()` only resolves once
|
|
21
|
+
// bytes are fully streamed, so a stuck download will trip the cell deadline
|
|
22
|
+
// and surface as a normal cell failure.
|
|
23
|
+
//
|
|
24
|
+
// Cloud vs local: `download.saveAs(absPath)` works for both. Playwright
|
|
25
|
+
// streams the bytes back over CDP for cloud-attached browsers, so the file
|
|
26
|
+
// always lands on the sidecar machine where $WB_ARTIFACTS_DIR lives.
|
|
27
|
+
|
|
28
|
+
import path from "node:path";
|
|
29
|
+
import { promises as fsPromises } from "node:fs";
|
|
30
|
+
import { send, log, logWarn } from "./io.js";
|
|
31
|
+
import {
|
|
32
|
+
uniquePathInside,
|
|
33
|
+
parseExtensionAllowlist,
|
|
34
|
+
extensionAllowed,
|
|
35
|
+
} from "./util.js";
|
|
36
|
+
|
|
37
|
+
// Marker that the explicit (future) `download:` gating verb sets on a
|
|
38
|
+
// Download object once it's claimed it. The passive listener checks for
|
|
39
|
+
// this and skips, so the same file isn't saved twice.
|
|
40
|
+
export const HANDLED_MARK = Symbol.for("wb.download.handled");
|
|
41
|
+
|
|
42
|
+
// Sentinel filename used when Playwright reports an empty suggestedFilename
|
|
43
|
+
// (rare, but theoretically possible for downloads with no Content-
|
|
44
|
+
// Disposition header and an empty URL path).
|
|
45
|
+
const FALLBACK_NAME = "download.bin";
|
|
46
|
+
|
|
47
|
+
// Install the always-on download listener on `context`. Returns a no-op
|
|
48
|
+
// when WB_ARTIFACTS_DIR isn't set — without an artifacts dir there's
|
|
49
|
+
// nowhere to put the file, and bailing here is preferable to inventing a
|
|
50
|
+
// temp dir that wb's uploader doesn't watch.
|
|
51
|
+
//
|
|
52
|
+
// `getCurrentVerb()` is a callback the entry point updates each iteration
|
|
53
|
+
// of the slice loop, so the listener can attach `verb_index` / `verb_name`
|
|
54
|
+
// to the announcement without the slice loop having to reach back into
|
|
55
|
+
// this module.
|
|
56
|
+
export function installDownloadCapture(context, getCurrentVerb) {
|
|
57
|
+
const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim();
|
|
58
|
+
if (!artifactsDir) {
|
|
59
|
+
log("[download-capture] WB_ARTIFACTS_DIR not set; auto-capture disabled");
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
const allowlist = parseExtensionAllowlist(
|
|
63
|
+
process.env.WB_BROWSER_DOWNLOAD_EXTENSIONS,
|
|
64
|
+
);
|
|
65
|
+
if (allowlist) {
|
|
66
|
+
log(
|
|
67
|
+
`[download-capture] enabled; extension allowlist: ${[...allowlist].join(",")}`,
|
|
68
|
+
);
|
|
69
|
+
} else {
|
|
70
|
+
log("[download-capture] enabled; capturing all downloads");
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
context.on("download", (download) => {
|
|
74
|
+
captureOne({ download, artifactsDir, allowlist, getCurrentVerb }).catch(
|
|
75
|
+
(e) => {
|
|
76
|
+
// Never let a failed capture take down the slice — emit a frame
|
|
77
|
+
// so the operator sees the failure, then drop it.
|
|
78
|
+
logWarn(`[download-capture] ${e.stack || e.message}`);
|
|
79
|
+
try {
|
|
80
|
+
send({
|
|
81
|
+
type: "slice.download_failed",
|
|
82
|
+
error: String(e.message || e),
|
|
83
|
+
url: safeUrl(download),
|
|
84
|
+
suggested_filename: safeSuggested(download),
|
|
85
|
+
});
|
|
86
|
+
} catch {}
|
|
87
|
+
},
|
|
88
|
+
);
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function captureOne({
|
|
93
|
+
download,
|
|
94
|
+
artifactsDir,
|
|
95
|
+
allowlist,
|
|
96
|
+
getCurrentVerb,
|
|
97
|
+
}) {
|
|
98
|
+
if (download[HANDLED_MARK]) return;
|
|
99
|
+
|
|
100
|
+
const suggested = safeSuggested(download);
|
|
101
|
+
const sourceUrl = safeUrl(download);
|
|
102
|
+
const pageUrl = (() => {
|
|
103
|
+
try {
|
|
104
|
+
return download.page().url();
|
|
105
|
+
} catch {
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
})();
|
|
109
|
+
const verb = (typeof getCurrentVerb === "function" && getCurrentVerb()) || {};
|
|
110
|
+
|
|
111
|
+
if (!extensionAllowed(suggested, allowlist)) {
|
|
112
|
+
send({
|
|
113
|
+
type: "slice.download_skipped",
|
|
114
|
+
reason: "extension_not_in_allowlist",
|
|
115
|
+
suggested_filename: suggested,
|
|
116
|
+
url: sourceUrl,
|
|
117
|
+
page_url: pageUrl,
|
|
118
|
+
verb_index: verb.index ?? null,
|
|
119
|
+
verb_name: verb.name ?? null,
|
|
120
|
+
ts: Date.now(),
|
|
121
|
+
});
|
|
122
|
+
// Cancel the download so Playwright doesn't keep the temp file alive.
|
|
123
|
+
try {
|
|
124
|
+
await download.cancel();
|
|
125
|
+
} catch {}
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
await fsPromises.mkdir(artifactsDir, { recursive: true });
|
|
130
|
+
const target = uniquePathInside(artifactsDir, suggested);
|
|
131
|
+
if (!target) {
|
|
132
|
+
throw new Error(
|
|
133
|
+
`download-capture: refusing to save "${suggested}" — resolves outside $WB_ARTIFACTS_DIR`,
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
await download.saveAs(target);
|
|
138
|
+
|
|
139
|
+
let bytes = null;
|
|
140
|
+
try {
|
|
141
|
+
bytes = (await fsPromises.stat(target)).size;
|
|
142
|
+
} catch {
|
|
143
|
+
// saveAs resolved successfully so the file should exist; if stat fails
|
|
144
|
+
// we still announce, just without size. Better partial info than no
|
|
145
|
+
// event at all.
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
send({
|
|
149
|
+
type: "slice.artifact_saved",
|
|
150
|
+
filename: path.basename(target),
|
|
151
|
+
path: target,
|
|
152
|
+
bytes,
|
|
153
|
+
source: "download",
|
|
154
|
+
provenance: {
|
|
155
|
+
url: sourceUrl,
|
|
156
|
+
suggested_filename: suggested,
|
|
157
|
+
page_url: pageUrl,
|
|
158
|
+
verb_index: verb.index ?? null,
|
|
159
|
+
verb_name: verb.name ?? null,
|
|
160
|
+
ts: Date.now(),
|
|
161
|
+
},
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function safeSuggested(download) {
|
|
166
|
+
try {
|
|
167
|
+
const s = download.suggestedFilename();
|
|
168
|
+
return s && s.trim() ? s : FALLBACK_NAME;
|
|
169
|
+
} catch {
|
|
170
|
+
return FALLBACK_NAME;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function safeUrl(download) {
|
|
175
|
+
try {
|
|
176
|
+
return download.url();
|
|
177
|
+
} catch {
|
|
178
|
+
return null;
|
|
179
|
+
}
|
|
180
|
+
}
|
package/lib/failure.js
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
// Failure-event helpers — classifier + screenshot/console capture.
|
|
2
|
+
//
|
|
3
|
+
// `verb.failed` and `slice.failed` carry a stable `code` field so agents can
|
|
4
|
+
// switch on category instead of regex-matching English. Verb failures also
|
|
5
|
+
// snapshot a screenshot (best-effort) and the recent console buffer so
|
|
6
|
+
// post-hoc debugging doesn't depend on a single line of stderr.
|
|
7
|
+
//
|
|
8
|
+
// All capture is best-effort: a failed screenshot or a missing artifacts dir
|
|
9
|
+
// must NOT prevent the failure event from emitting.
|
|
10
|
+
|
|
11
|
+
import { promises as fs } from "node:fs";
|
|
12
|
+
import path from "node:path";
|
|
13
|
+
import { randomUUID } from "node:crypto";
|
|
14
|
+
|
|
15
|
+
const MAX_CONSOLE_ENTRIES = 50;
|
|
16
|
+
const MAX_LINE_CHARS = 512;
|
|
17
|
+
|
|
18
|
+
// Map a verb-execution error to a stable code. Order matters: an explicit
|
|
19
|
+
// `err.code` (e.g. set by a provider for AUTH_FAILED) wins over inference.
|
|
20
|
+
export function classifyError(err, verbName) {
|
|
21
|
+
if (err && typeof err.code === "string" && err.code) return err.code;
|
|
22
|
+
if (!err) return "INTERNAL_ERROR";
|
|
23
|
+
const name = err.name || "";
|
|
24
|
+
const msg = String(err.message || "");
|
|
25
|
+
if (name === "TimeoutError") {
|
|
26
|
+
if (verbName === "goto") return "NAV_TIMEOUT";
|
|
27
|
+
if (/load\s*state|networkidle|navigation|wait\s+for\s+url/i.test(msg)) {
|
|
28
|
+
return "NAV_TIMEOUT";
|
|
29
|
+
}
|
|
30
|
+
return "SELECTOR_NOT_FOUND";
|
|
31
|
+
}
|
|
32
|
+
if (verbName === "eval" || verbName === "extract") return "SCRIPT_ERROR";
|
|
33
|
+
return "INTERNAL_ERROR";
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Attach console + pageerror listeners to a Page. Returns the buffer object
|
|
37
|
+
// (FIFO-capped) so callers can stash it next to the Page (e.g. on the
|
|
38
|
+
// SessionManager `info`). Calling twice on the same Page would double-record;
|
|
39
|
+
// callers are expected to only invoke once per page.
|
|
40
|
+
export function attachConsoleBuffer(page) {
|
|
41
|
+
const buffer = [];
|
|
42
|
+
const push = (entry) => {
|
|
43
|
+
const text = String(entry.text ?? "");
|
|
44
|
+
buffer.push({
|
|
45
|
+
type: entry.type,
|
|
46
|
+
text: text.length > MAX_LINE_CHARS ? text.slice(0, MAX_LINE_CHARS) : text,
|
|
47
|
+
at: entry.at ?? Date.now(),
|
|
48
|
+
});
|
|
49
|
+
while (buffer.length > MAX_CONSOLE_ENTRIES) buffer.shift();
|
|
50
|
+
};
|
|
51
|
+
page.on("console", (msg) => {
|
|
52
|
+
push({ type: msg.type(), text: msg.text() });
|
|
53
|
+
});
|
|
54
|
+
page.on("pageerror", (err) => {
|
|
55
|
+
push({ type: "pageerror", text: err?.message ?? String(err) });
|
|
56
|
+
});
|
|
57
|
+
return buffer;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Snapshot console buffer (with secret scrubbing) and capture a screenshot.
|
|
61
|
+
// Returns `{ screenshot_path, console_tail }`. Both fields may be null/empty;
|
|
62
|
+
// caller decides whether to attach them to the failure event.
|
|
63
|
+
export async function captureFailureDiagnostics({
|
|
64
|
+
page,
|
|
65
|
+
artifactsDir,
|
|
66
|
+
verbIndex,
|
|
67
|
+
consoleBuffer,
|
|
68
|
+
scrubSecrets,
|
|
69
|
+
secrets,
|
|
70
|
+
}) {
|
|
71
|
+
const out = { screenshot_path: null, console_tail: [] };
|
|
72
|
+
|
|
73
|
+
if (Array.isArray(consoleBuffer)) {
|
|
74
|
+
const scrub = typeof scrubSecrets === "function" ? scrubSecrets : null;
|
|
75
|
+
out.console_tail = consoleBuffer.map((entry) => ({
|
|
76
|
+
type: entry.type,
|
|
77
|
+
text: scrub ? scrub(entry.text, secrets) : String(entry.text),
|
|
78
|
+
at: entry.at,
|
|
79
|
+
}));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (page && artifactsDir) {
|
|
83
|
+
try {
|
|
84
|
+
const filename = `wb-failure-${verbIndex}-${Date.now()}.png`;
|
|
85
|
+
const fullPath = path.join(artifactsDir, filename);
|
|
86
|
+
const tmp = `${fullPath}.${process.pid}.${randomUUID().slice(0, 8)}.tmp`;
|
|
87
|
+
const buf = await page.screenshot({ type: "png" });
|
|
88
|
+
await fs.mkdir(path.dirname(fullPath), { recursive: true });
|
|
89
|
+
await fs.writeFile(tmp, buf);
|
|
90
|
+
await fs.rename(tmp, fullPath);
|
|
91
|
+
out.screenshot_path = filename;
|
|
92
|
+
} catch {
|
|
93
|
+
// Screenshot capture is best-effort; don't let a Page crash or a
|
|
94
|
+
// permission error mask the underlying verb failure.
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return out;
|
|
99
|
+
}
|
|
@@ -77,15 +77,22 @@ export function createBrowserUseProvider() {
|
|
|
77
77
|
"bu.create",
|
|
78
78
|
);
|
|
79
79
|
if (!res.ok) {
|
|
80
|
-
|
|
80
|
+
const err = new Error(
|
|
81
81
|
`browser-use create failed (${res.status}): ${await safeText(res)}`,
|
|
82
82
|
);
|
|
83
|
+
err.code =
|
|
84
|
+
res.status === 401 || res.status === 403
|
|
85
|
+
? "AUTH_FAILED"
|
|
86
|
+
: "SESSION_ALLOCATE_FAILED";
|
|
87
|
+
throw err;
|
|
83
88
|
}
|
|
84
89
|
const created = await res.json();
|
|
85
90
|
if (!created.cdpUrl) {
|
|
86
|
-
|
|
91
|
+
const err = new Error(
|
|
87
92
|
`browser-use create returned no cdpUrl (status=${created.status ?? "?"}); session unusable`,
|
|
88
93
|
);
|
|
94
|
+
err.code = "SESSION_ALLOCATE_FAILED";
|
|
95
|
+
throw err;
|
|
89
96
|
}
|
|
90
97
|
return {
|
|
91
98
|
sid: created.id,
|
|
@@ -65,9 +65,14 @@ export function createBrowserbaseProvider() {
|
|
|
65
65
|
"bb.create",
|
|
66
66
|
);
|
|
67
67
|
if (!res.ok) {
|
|
68
|
-
|
|
68
|
+
const err = new Error(
|
|
69
69
|
`Browserbase create failed (${res.status}): ${await safeText(res)}`,
|
|
70
70
|
);
|
|
71
|
+
err.code =
|
|
72
|
+
res.status === 401 || res.status === 403
|
|
73
|
+
? "AUTH_FAILED"
|
|
74
|
+
: "SESSION_ALLOCATE_FAILED";
|
|
75
|
+
throw err;
|
|
71
76
|
}
|
|
72
77
|
const created = await res.json();
|
|
73
78
|
return { sid: created.id, cdpUrl: created.connectUrl };
|
|
@@ -81,9 +86,14 @@ export function createBrowserbaseProvider() {
|
|
|
81
86
|
"bb.debug",
|
|
82
87
|
);
|
|
83
88
|
if (!res.ok) {
|
|
84
|
-
|
|
89
|
+
const err = new Error(
|
|
85
90
|
`Browserbase debug fetch failed (${res.status}): ${await safeText(res)}`,
|
|
86
91
|
);
|
|
92
|
+
err.code =
|
|
93
|
+
res.status === 401 || res.status === 403
|
|
94
|
+
? "AUTH_FAILED"
|
|
95
|
+
: "SESSION_ALLOCATE_FAILED";
|
|
96
|
+
throw err;
|
|
87
97
|
}
|
|
88
98
|
const body = await res.json();
|
|
89
99
|
return body.debuggerFullscreenUrl;
|
package/lib/providers/index.js
CHANGED
|
@@ -21,10 +21,12 @@
|
|
|
21
21
|
//
|
|
22
22
|
// Vendor selection is a single env var, resolved once at sidecar boot:
|
|
23
23
|
// WB_BROWSER_VENDOR=browserbase (default)
|
|
24
|
-
// WB_BROWSER_VENDOR=browser-use
|
|
24
|
+
// WB_BROWSER_VENDOR=browser-use
|
|
25
|
+
// WB_BROWSER_VENDOR=local — host-installed Chromium (dev iteration)
|
|
25
26
|
|
|
26
27
|
import { createBrowserbaseProvider } from "./browserbase.js";
|
|
27
28
|
import { createBrowserUseProvider } from "./browser-use.js";
|
|
29
|
+
import { createLocalProvider } from "./local.js";
|
|
28
30
|
|
|
29
31
|
export function getProvider() {
|
|
30
32
|
const raw = (process.env.WB_BROWSER_VENDOR || "browserbase")
|
|
@@ -35,9 +37,11 @@ export function getProvider() {
|
|
|
35
37
|
return createBrowserbaseProvider();
|
|
36
38
|
case "browser-use":
|
|
37
39
|
return createBrowserUseProvider();
|
|
40
|
+
case "local":
|
|
41
|
+
return createLocalProvider();
|
|
38
42
|
default:
|
|
39
43
|
throw new Error(
|
|
40
|
-
`WB_BROWSER_VENDOR="${raw}" is not a known vendor (expected: browserbase | browser-use)`,
|
|
44
|
+
`WB_BROWSER_VENDOR="${raw}" is not a known vendor (expected: browserbase | browser-use | local)`,
|
|
41
45
|
);
|
|
42
46
|
}
|
|
43
47
|
}
|