wb-browser-runtime 0.10.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +70 -2
- package/bin/wb-browser-runtime.js +128 -9
- package/lib/download-capture.js +180 -0
- package/lib/failure.js +99 -0
- package/lib/providers/browser-use.js +9 -2
- package/lib/providers/browserbase.js +12 -2
- package/lib/providers/index.js +6 -2
- package/lib/providers/local.js +120 -0
- package/lib/stub-page.js +16 -0
- package/lib/util.js +58 -0
- package/package.json +1 -1
- package/verbs/click.js +24 -2
- package/verbs/index.js +2 -0
- package/verbs/wait_for_network_idle.js +51 -0
package/README.md
CHANGED
|
@@ -33,8 +33,8 @@ specific run.
|
|
|
33
33
|
|
|
34
34
|
## Vendor selection
|
|
35
35
|
|
|
36
|
-
`WB_BROWSER_VENDOR` — `browserbase` (default)
|
|
37
|
-
at sidecar boot; there is no per-slice override.
|
|
36
|
+
`WB_BROWSER_VENDOR` — `browserbase` (default), `browser-use`, or `local`.
|
|
37
|
+
Resolved once at sidecar boot; there is no per-slice override.
|
|
38
38
|
|
|
39
39
|
### Browserbase (default)
|
|
40
40
|
|
|
@@ -55,6 +55,40 @@ Profile (auth state) is selected per-runbook via the `profile_id:` field on a
|
|
|
55
55
|
default when the browser block omits `profile_id:`; a per-runbook `profile_id:`
|
|
56
56
|
always wins over the env var.
|
|
57
57
|
|
|
58
|
+
### local
|
|
59
|
+
|
|
60
|
+
`WB_BROWSER_VENDOR=local` drives a host-installed Playwright Chromium directly
|
|
61
|
+
— no API keys, no network calls, no per-session cost. Use for dev iteration
|
|
62
|
+
when you'd otherwise burn vendor minutes on broken selectors.
|
|
63
|
+
|
|
64
|
+
| Env var | Default | Purpose |
|
|
65
|
+
|--------------------------------------|--------------|------------------------------------------------------|
|
|
66
|
+
| `WB_BROWSER_LOCAL_HEADLESS` | `1` | Set `0`/`false` for a visible browser window. |
|
|
67
|
+
| `WB_BROWSER_LOCAL_EXECUTABLE_PATH` | *(unset)* | Absolute path to a Chrome/Chromium binary. Overrides Playwright's bundled download. |
|
|
68
|
+
| `WB_BROWSER_LOCAL_CHANNEL` | *(unset)* | Playwright channel name (`chrome`, `msedge`, `chrome-beta`, ...) for an OS-installed browser. Mutually exclusive with `EXECUTABLE_PATH`. |
|
|
69
|
+
|
|
70
|
+
Trade-offs vs cloud vendors:
|
|
71
|
+
|
|
72
|
+
- **No live URL.** `slice.session_started.live_url` is `null` — no remote
|
|
73
|
+
inspector, no Loom-style live preview. Use a non-headless run with
|
|
74
|
+
`WB_BROWSER_LOCAL_HEADLESS=0` if you want to watch.
|
|
75
|
+
- **No persistent profile.** Each run starts with a clean Chromium state.
|
|
76
|
+
Cloud-side "profile" features (auth-state binding) aren't available.
|
|
77
|
+
- **No resume after pause.** If a workbook hits a `wait` fence that suspends
|
|
78
|
+
the sidecar, the in-process Chromium dies with it. On `wb resume` the
|
|
79
|
+
local provider re-allocates a fresh browser. Cloud vendors can keep the
|
|
80
|
+
session alive for resume.
|
|
81
|
+
|
|
82
|
+
First-time install:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
npx playwright install chromium
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
If you skip this, `allocate()` fails with a hint pointing back at the
|
|
89
|
+
install command. Or set `WB_BROWSER_LOCAL_EXECUTABLE_PATH` /
|
|
90
|
+
`WB_BROWSER_LOCAL_CHANNEL` to use a system browser without the download.
|
|
91
|
+
|
|
58
92
|
## Profiles
|
|
59
93
|
|
|
60
94
|
Some vendors expose persistent browser profiles — cookies, localStorage, saved
|
|
@@ -212,6 +246,40 @@ When `WB_ARTIFACTS_UPLOAD_URL` is set (template supports `{run_id}` and
|
|
|
212
246
|
produced it completes. Auth reuses `WB_RECORDING_UPLOAD_SECRET`
|
|
213
247
|
(`Authorization: Bearer <…>`); failures are logged and non-fatal.
|
|
214
248
|
|
|
249
|
+
### Auto-captured downloads
|
|
250
|
+
|
|
251
|
+
The sidecar attaches a context-level `download` listener at session
|
|
252
|
+
start, so any file the browser downloads — clicked attachments, redirect
|
|
253
|
+
chains that end in a binary, popup-driven Save As — is saved to
|
|
254
|
+
`$WB_ARTIFACTS_DIR` automatically and emitted as a `slice.artifact_saved`
|
|
255
|
+
frame (with `source: "download"` and a `provenance` block: source URL,
|
|
256
|
+
page URL, the verb that was running, suggested filename). No verb call
|
|
257
|
+
required. For cloud-provider browsers, Playwright streams the bytes back
|
|
258
|
+
over CDP, so the file always lands on the sidecar machine where the
|
|
259
|
+
artifacts dir + uploader live.
|
|
260
|
+
|
|
261
|
+
Filename collisions in a single session get `-2`, `-3`, … suffixed
|
|
262
|
+
(Playwright's `download.saveAs()` blindly overwrites, so we apply the
|
|
263
|
+
suffixing ourselves).
|
|
264
|
+
|
|
265
|
+
There is no size cap — `download.saveAs()` only resolves once the bytes
|
|
266
|
+
are fully streamed, so a hung download trips the cell's own timeout
|
|
267
|
+
(default 120s slice deadline; bump via `WB_SLICE_DEADLINE_MS`) and
|
|
268
|
+
surfaces as a normal cell failure.
|
|
269
|
+
|
|
270
|
+
To filter, set `WB_BROWSER_DOWNLOAD_EXTENSIONS` to a comma-separated
|
|
271
|
+
list (case-insensitive, leading dots ignored):
|
|
272
|
+
|
|
273
|
+
```yaml
|
|
274
|
+
env:
|
|
275
|
+
WB_BROWSER_DOWNLOAD_EXTENSIONS: pdf,xlsx,csv,docx
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
When an allowlist is set, non-matching downloads are cancelled and
|
|
279
|
+
emitted as `slice.download_skipped` (with `reason:
|
|
280
|
+
"extension_not_in_allowlist"`) so the operator sees what was discarded.
|
|
281
|
+
Unset = capture everything.
|
|
282
|
+
|
|
215
283
|
## Protocol
|
|
216
284
|
|
|
217
285
|
Line-framed JSON, one message per line, on stdin/stdout. `stderr` is treated as
|
|
@@ -34,6 +34,12 @@ import {
|
|
|
34
34
|
loadRecordingConfig,
|
|
35
35
|
} from "../lib/recording-manager.js";
|
|
36
36
|
import { getProvider } from "../lib/providers/index.js";
|
|
37
|
+
import {
|
|
38
|
+
attachConsoleBuffer,
|
|
39
|
+
captureFailureDiagnostics,
|
|
40
|
+
classifyError,
|
|
41
|
+
} from "../lib/failure.js";
|
|
42
|
+
import { installDownloadCapture } from "../lib/download-capture.js";
|
|
37
43
|
import { SUPPORTS, runVerb, verbName } from "../verbs/index.js";
|
|
38
44
|
|
|
39
45
|
const VERSION = "0.8.0";
|
|
@@ -56,7 +62,7 @@ if (recording.enabled) {
|
|
|
56
62
|
|
|
57
63
|
const sessions = new SessionManager();
|
|
58
64
|
|
|
59
|
-
async function ensureSession(name, { profile } = {}) {
|
|
65
|
+
async function ensureSession(name, { profile, restoreSession } = {}) {
|
|
60
66
|
return sessions.ensure(name, async () => {
|
|
61
67
|
// Vendors charge for the session the moment allocate() returns; if
|
|
62
68
|
// anything after this point throws (getLiveUrl, CDP connect, newContext,
|
|
@@ -69,32 +75,67 @@ async function ensureSession(name, { profile } = {}) {
|
|
|
69
75
|
// against a cold vendor region, but the live-URL fetch and
|
|
70
76
|
// newContext/newPage can each stall independently.
|
|
71
77
|
const t0 = Date.now();
|
|
72
|
-
const
|
|
78
|
+
const restored =
|
|
79
|
+
restoreSession &&
|
|
80
|
+
restoreSession.vendor === provider.name &&
|
|
81
|
+
restoreSession.cdpUrl;
|
|
82
|
+
const allocated = restored
|
|
83
|
+
? {
|
|
84
|
+
sid: restoreSession.sid,
|
|
85
|
+
cdpUrl: restoreSession.cdpUrl,
|
|
86
|
+
_liveUrl: restoreSession.liveUrl ?? null,
|
|
87
|
+
_restored: true,
|
|
88
|
+
}
|
|
89
|
+
: await provider.allocate({ profile, sessionName: name });
|
|
73
90
|
const tAllocated = Date.now();
|
|
74
91
|
let browser = null;
|
|
75
92
|
try {
|
|
76
|
-
const liveUrl = await provider.getLiveUrl(allocated);
|
|
77
|
-
|
|
93
|
+
const liveUrl = allocated._liveUrl ?? (await provider.getLiveUrl(allocated));
|
|
94
|
+
// Local provider returns a pre-built Browser via `_browser` (no CDP
|
|
95
|
+
// round-trip — chromium is already launched in-process). Cloud
|
|
96
|
+
// providers return a `cdpUrl` we connect to. Restored sessions
|
|
97
|
+
// always reconnect via CDP.
|
|
98
|
+
browser =
|
|
99
|
+
allocated._browser ??
|
|
100
|
+
(await chromium.connectOverCDP(allocated.cdpUrl));
|
|
78
101
|
const tConnected = Date.now();
|
|
79
|
-
|
|
102
|
+
// acceptDownloads is true by default for Playwright-launched contexts,
|
|
103
|
+
// but we set it explicitly so the listener installed below isn't a
|
|
104
|
+
// no-op against a vendor-provided context that opted out.
|
|
105
|
+
const context =
|
|
106
|
+
browser.contexts()[0] ??
|
|
107
|
+
(await browser.newContext({ acceptDownloads: true }));
|
|
80
108
|
const page = context.pages()[0] ?? (await context.newPage());
|
|
109
|
+
const consoleBuffer = attachConsoleBuffer(page);
|
|
81
110
|
const tPageReady = Date.now();
|
|
82
111
|
|
|
83
112
|
const info = {
|
|
84
113
|
sid: allocated.sid,
|
|
114
|
+
cdpUrl: allocated.cdpUrl,
|
|
115
|
+
vendor: provider.name,
|
|
85
116
|
browser,
|
|
86
117
|
context,
|
|
87
118
|
page,
|
|
88
119
|
liveUrl,
|
|
89
120
|
recording: null,
|
|
121
|
+
consoleBuffer,
|
|
122
|
+
// Updated by handleSlice's verb loop so the download listener
|
|
123
|
+
// can attach `verb_index`/`verb_name` provenance to artifacts
|
|
124
|
+
// captured while a verb is running. Null between slices.
|
|
125
|
+
currentVerb: null,
|
|
90
126
|
};
|
|
91
127
|
|
|
128
|
+
// Install the always-on download listener now, before any slice
|
|
129
|
+
// runs, so a download fired by the very first verb is captured.
|
|
130
|
+
installDownloadCapture(context, () => info.currentVerb);
|
|
131
|
+
|
|
92
132
|
send({
|
|
93
133
|
type: "slice.session_started",
|
|
94
134
|
session: name,
|
|
95
135
|
session_id: allocated.sid,
|
|
96
136
|
live_url: liveUrl,
|
|
97
137
|
vendor: provider.name,
|
|
138
|
+
restored: Boolean(restored),
|
|
98
139
|
started_at: new Date().toISOString(),
|
|
99
140
|
timings: {
|
|
100
141
|
allocate_ms: tAllocated - t0,
|
|
@@ -107,12 +148,12 @@ async function ensureSession(name, { profile } = {}) {
|
|
|
107
148
|
await recording.start(info, name);
|
|
108
149
|
return info;
|
|
109
150
|
} catch (e) {
|
|
110
|
-
if (browser) {
|
|
151
|
+
if (browser && !allocated._restored) {
|
|
111
152
|
try {
|
|
112
153
|
await browser.close();
|
|
113
154
|
} catch {}
|
|
114
155
|
}
|
|
115
|
-
await provider.release(allocated.sid);
|
|
156
|
+
if (!allocated._restored) await provider.release(allocated.sid);
|
|
116
157
|
throw e;
|
|
117
158
|
}
|
|
118
159
|
});
|
|
@@ -252,13 +293,18 @@ async function handleSlice(msg) {
|
|
|
252
293
|
const verbs = Array.isArray(msg.verbs) ? msg.verbs : [];
|
|
253
294
|
const sessionName = msg.session || "default";
|
|
254
295
|
const restore = msg.restore || null;
|
|
296
|
+
const restoreSession = restore?.state?.session || null;
|
|
255
297
|
|
|
256
298
|
let session;
|
|
257
299
|
try {
|
|
258
|
-
session = await ensureSession(sessionName, {
|
|
300
|
+
session = await ensureSession(sessionName, {
|
|
301
|
+
profile: msg.profile,
|
|
302
|
+
restoreSession,
|
|
303
|
+
});
|
|
259
304
|
} catch (e) {
|
|
260
305
|
send({
|
|
261
306
|
type: "slice.failed",
|
|
307
|
+
code: classifyError(e, "session"),
|
|
262
308
|
error: `session start failed: ${scrubSecrets(e.message, sliceCtx.secrets)}`,
|
|
263
309
|
});
|
|
264
310
|
return;
|
|
@@ -280,6 +326,7 @@ async function handleSlice(msg) {
|
|
|
280
326
|
if (Date.now() >= sliceDeadline) {
|
|
281
327
|
send({
|
|
282
328
|
type: "slice.failed",
|
|
329
|
+
code: "SLICE_TIMEOUT",
|
|
283
330
|
error: `slice exceeded deadline (${sliceDeadlineMs}ms); aborted before verb index ${i} of ${verbs.length}`,
|
|
284
331
|
});
|
|
285
332
|
return;
|
|
@@ -287,6 +334,12 @@ async function handleSlice(msg) {
|
|
|
287
334
|
const v = verbs[i];
|
|
288
335
|
const name = verbName(v);
|
|
289
336
|
const verbStart = Date.now();
|
|
337
|
+
// Tell the passive download listener which verb to blame for any
|
|
338
|
+
// download that fires during this iteration. Cleared in `finally`
|
|
339
|
+
// so a download arriving between verbs (rare, but possible during
|
|
340
|
+
// a settle/redirect) records as "no current verb" instead of
|
|
341
|
+
// sticking the previous one's name on it.
|
|
342
|
+
session.currentVerb = { index: i, name };
|
|
290
343
|
try {
|
|
291
344
|
const summary = await runVerb(session.page, v, i, sliceCtx, expand);
|
|
292
345
|
// Pause-sentinel escape hatch: a verb signals a mid-slice halt by
|
|
@@ -311,7 +364,17 @@ async function handleSlice(msg) {
|
|
|
311
364
|
// descriptor and handed back on resume. The verb can stash
|
|
312
365
|
// whatever it needs here; we always ensure verb_index is set
|
|
313
366
|
// so the dispatcher can compute startAt on re-entry.
|
|
314
|
-
sidecar_state: {
|
|
367
|
+
sidecar_state: {
|
|
368
|
+
...(pauseMeta.sidecar_state || {}),
|
|
369
|
+
verb_index: i,
|
|
370
|
+
session: {
|
|
371
|
+
vendor: session.vendor,
|
|
372
|
+
name: sessionName,
|
|
373
|
+
sid: session.sid,
|
|
374
|
+
cdpUrl: session.cdpUrl,
|
|
375
|
+
liveUrl: session.liveUrl,
|
|
376
|
+
},
|
|
377
|
+
},
|
|
315
378
|
});
|
|
316
379
|
return;
|
|
317
380
|
}
|
|
@@ -325,26 +388,44 @@ async function handleSlice(msg) {
|
|
|
325
388
|
} catch (e) {
|
|
326
389
|
const duration_ms = Date.now() - verbStart;
|
|
327
390
|
const clean = scrubSecrets(e.message, sliceCtx.secrets);
|
|
391
|
+
const code = classifyError(e, name);
|
|
392
|
+
const diagnostics = await captureFailureDiagnostics({
|
|
393
|
+
page: session.page,
|
|
394
|
+
artifactsDir: (process.env.WB_ARTIFACTS_DIR || "").trim() || null,
|
|
395
|
+
verbIndex: i,
|
|
396
|
+
consoleBuffer: session.consoleBuffer,
|
|
397
|
+
scrubSecrets,
|
|
398
|
+
secrets: sliceCtx.secrets,
|
|
399
|
+
});
|
|
328
400
|
send({
|
|
329
401
|
type: "verb.failed",
|
|
330
402
|
verb: name,
|
|
331
403
|
verb_index: i,
|
|
404
|
+
code,
|
|
332
405
|
error: clean,
|
|
333
406
|
duration_ms,
|
|
407
|
+
screenshot_path: diagnostics.screenshot_path,
|
|
408
|
+
console_tail: diagnostics.console_tail,
|
|
334
409
|
});
|
|
335
410
|
send({
|
|
336
411
|
type: "slice.failed",
|
|
412
|
+
code,
|
|
337
413
|
error: `verb ${name} (index ${i}): ${clean}`,
|
|
338
414
|
});
|
|
339
415
|
return;
|
|
340
416
|
}
|
|
341
417
|
}
|
|
418
|
+
// Slice ended cleanly — clear the listener's "currently running verb"
|
|
419
|
+
// pointer so a stray late-arriving download doesn't get stamped with
|
|
420
|
+
// the last verb's name.
|
|
421
|
+
session.currentVerb = null;
|
|
342
422
|
send({ type: "slice.complete" });
|
|
343
423
|
} catch (e) {
|
|
344
424
|
log(`[slice] unhandled: ${e.stack || e.message}`);
|
|
345
425
|
try {
|
|
346
426
|
send({
|
|
347
427
|
type: "slice.failed",
|
|
428
|
+
code: classifyError(e, "sidecar"),
|
|
348
429
|
error: `sidecar error: ${scrubSecrets(e.message, sliceCtx.secrets)}`,
|
|
349
430
|
});
|
|
350
431
|
} catch {}
|
|
@@ -391,6 +472,32 @@ async function shutdown() {
|
|
|
391
472
|
process.exit(0);
|
|
392
473
|
}
|
|
393
474
|
|
|
475
|
+
async function suspend() {
|
|
476
|
+
if (shuttingDown) return;
|
|
477
|
+
shuttingDown = true;
|
|
478
|
+
// Flush recordings while CDP is still connected, but intentionally leave
|
|
479
|
+
// browser contexts and vendor sessions open. The operator needs the live
|
|
480
|
+
// inspector after wb exits 42, and wb resume reconnects using the persisted
|
|
481
|
+
// cdpUrl/liveUrl in sidecar_state.
|
|
482
|
+
for (const [name, info] of sessions) {
|
|
483
|
+
try {
|
|
484
|
+
await recording.flush(info, name);
|
|
485
|
+
} catch (e) {
|
|
486
|
+
log(`[suspend] flush recording ${name}: ${e.message}`);
|
|
487
|
+
try {
|
|
488
|
+
send({
|
|
489
|
+
type: "slice.recording.failed",
|
|
490
|
+
session: name,
|
|
491
|
+
run_id: recording.runId,
|
|
492
|
+
reason: `suspend_finalize_error: ${e.message}`,
|
|
493
|
+
});
|
|
494
|
+
} catch {}
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
log("[suspend] leaving browser session alive for external resume");
|
|
498
|
+
process.exit(0);
|
|
499
|
+
}
|
|
500
|
+
|
|
394
501
|
// --- Main loop --------------------------------------------------------------
|
|
395
502
|
|
|
396
503
|
const rl = readline.createInterface({ input: process.stdin, terminal: false });
|
|
@@ -426,6 +533,15 @@ async function drainAndShutdown() {
|
|
|
426
533
|
await shutdown();
|
|
427
534
|
}
|
|
428
535
|
|
|
536
|
+
async function drainAndSuspend() {
|
|
537
|
+
try {
|
|
538
|
+
await sessions.drainAll();
|
|
539
|
+
} catch (e) {
|
|
540
|
+
log(`[suspend] drain failed: ${e.message}`);
|
|
541
|
+
}
|
|
542
|
+
await suspend();
|
|
543
|
+
}
|
|
544
|
+
|
|
429
545
|
rl.on("line", (line) => {
|
|
430
546
|
const trimmed = line.trim();
|
|
431
547
|
if (!trimmed) return;
|
|
@@ -453,6 +569,9 @@ rl.on("line", (line) => {
|
|
|
453
569
|
case "shutdown":
|
|
454
570
|
drainAndShutdown();
|
|
455
571
|
break;
|
|
572
|
+
case "suspend":
|
|
573
|
+
drainAndSuspend();
|
|
574
|
+
break;
|
|
456
575
|
default:
|
|
457
576
|
log(`[warn] unknown message type: ${msg.type}`);
|
|
458
577
|
}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
// download-capture — passive capture of any file the browser downloads
|
|
2
|
+
// during a session, regardless of which verb (or page redirect, or popup)
|
|
3
|
+
// triggered it.
|
|
4
|
+
//
|
|
5
|
+
// The runbook author doesn't have to predict downloads. We attach a
|
|
6
|
+
// `download` listener to the BrowserContext at session start; every file
|
|
7
|
+
// the browser saves lands in `$WB_ARTIFACTS_DIR` and gets announced via a
|
|
8
|
+
// `slice.artifact_saved` frame so wb's existing R2 uploader picks it up
|
|
9
|
+
// for free. Provenance (page URL, source URL, which verb was running, ts)
|
|
10
|
+
// rides along on the frame so the run-page event feed can show *why* a
|
|
11
|
+
// given file appeared.
|
|
12
|
+
//
|
|
13
|
+
// Filtering: if WB_BROWSER_DOWNLOAD_EXTENSIONS is set, only files whose
|
|
14
|
+
// extension matches the allowlist are kept. Skipped downloads still get a
|
|
15
|
+
// `slice.download_skipped` frame so the operator sees what was discarded
|
|
16
|
+
// (rare in practice — `download` events fire on real attachments, not
|
|
17
|
+
// inline analytics pings — but useful when a SPA emits noisy JSON blobs).
|
|
18
|
+
//
|
|
19
|
+
// Big files: there is no size cap. R2 is bottomless and the runbook's own
|
|
20
|
+
// timeout governs hung downloads — `download.saveAs()` only resolves once
|
|
21
|
+
// bytes are fully streamed, so a stuck download will trip the cell deadline
|
|
22
|
+
// and surface as a normal cell failure.
|
|
23
|
+
//
|
|
24
|
+
// Cloud vs local: `download.saveAs(absPath)` works for both. Playwright
|
|
25
|
+
// streams the bytes back over CDP for cloud-attached browsers, so the file
|
|
26
|
+
// always lands on the sidecar machine where $WB_ARTIFACTS_DIR lives.
|
|
27
|
+
|
|
28
|
+
import path from "node:path";
|
|
29
|
+
import { promises as fsPromises } from "node:fs";
|
|
30
|
+
import { send, log, logWarn } from "./io.js";
|
|
31
|
+
import {
|
|
32
|
+
uniquePathInside,
|
|
33
|
+
parseExtensionAllowlist,
|
|
34
|
+
extensionAllowed,
|
|
35
|
+
} from "./util.js";
|
|
36
|
+
|
|
37
|
+
// Marker that the explicit (future) `download:` gating verb sets on a
|
|
38
|
+
// Download object once it's claimed it. The passive listener checks for
|
|
39
|
+
// this and skips, so the same file isn't saved twice.
|
|
40
|
+
export const HANDLED_MARK = Symbol.for("wb.download.handled");
|
|
41
|
+
|
|
42
|
+
// Sentinel filename used when Playwright reports an empty suggestedFilename
|
|
43
|
+
// (rare, but theoretically possible for downloads with no Content-
|
|
44
|
+
// Disposition header and an empty URL path).
|
|
45
|
+
const FALLBACK_NAME = "download.bin";
|
|
46
|
+
|
|
47
|
+
// Install the always-on download listener on `context`. Returns a no-op
|
|
48
|
+
// when WB_ARTIFACTS_DIR isn't set — without an artifacts dir there's
|
|
49
|
+
// nowhere to put the file, and bailing here is preferable to inventing a
|
|
50
|
+
// temp dir that wb's uploader doesn't watch.
|
|
51
|
+
//
|
|
52
|
+
// `getCurrentVerb()` is a callback the entry point updates each iteration
|
|
53
|
+
// of the slice loop, so the listener can attach `verb_index` / `verb_name`
|
|
54
|
+
// to the announcement without the slice loop having to reach back into
|
|
55
|
+
// this module.
|
|
56
|
+
export function installDownloadCapture(context, getCurrentVerb) {
|
|
57
|
+
const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim();
|
|
58
|
+
if (!artifactsDir) {
|
|
59
|
+
log("[download-capture] WB_ARTIFACTS_DIR not set; auto-capture disabled");
|
|
60
|
+
return;
|
|
61
|
+
}
|
|
62
|
+
const allowlist = parseExtensionAllowlist(
|
|
63
|
+
process.env.WB_BROWSER_DOWNLOAD_EXTENSIONS,
|
|
64
|
+
);
|
|
65
|
+
if (allowlist) {
|
|
66
|
+
log(
|
|
67
|
+
`[download-capture] enabled; extension allowlist: ${[...allowlist].join(",")}`,
|
|
68
|
+
);
|
|
69
|
+
} else {
|
|
70
|
+
log("[download-capture] enabled; capturing all downloads");
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
context.on("download", (download) => {
|
|
74
|
+
captureOne({ download, artifactsDir, allowlist, getCurrentVerb }).catch(
|
|
75
|
+
(e) => {
|
|
76
|
+
// Never let a failed capture take down the slice — emit a frame
|
|
77
|
+
// so the operator sees the failure, then drop it.
|
|
78
|
+
logWarn(`[download-capture] ${e.stack || e.message}`);
|
|
79
|
+
try {
|
|
80
|
+
send({
|
|
81
|
+
type: "slice.download_failed",
|
|
82
|
+
error: String(e.message || e),
|
|
83
|
+
url: safeUrl(download),
|
|
84
|
+
suggested_filename: safeSuggested(download),
|
|
85
|
+
});
|
|
86
|
+
} catch {}
|
|
87
|
+
},
|
|
88
|
+
);
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
async function captureOne({
|
|
93
|
+
download,
|
|
94
|
+
artifactsDir,
|
|
95
|
+
allowlist,
|
|
96
|
+
getCurrentVerb,
|
|
97
|
+
}) {
|
|
98
|
+
if (download[HANDLED_MARK]) return;
|
|
99
|
+
|
|
100
|
+
const suggested = safeSuggested(download);
|
|
101
|
+
const sourceUrl = safeUrl(download);
|
|
102
|
+
const pageUrl = (() => {
|
|
103
|
+
try {
|
|
104
|
+
return download.page().url();
|
|
105
|
+
} catch {
|
|
106
|
+
return null;
|
|
107
|
+
}
|
|
108
|
+
})();
|
|
109
|
+
const verb = (typeof getCurrentVerb === "function" && getCurrentVerb()) || {};
|
|
110
|
+
|
|
111
|
+
if (!extensionAllowed(suggested, allowlist)) {
|
|
112
|
+
send({
|
|
113
|
+
type: "slice.download_skipped",
|
|
114
|
+
reason: "extension_not_in_allowlist",
|
|
115
|
+
suggested_filename: suggested,
|
|
116
|
+
url: sourceUrl,
|
|
117
|
+
page_url: pageUrl,
|
|
118
|
+
verb_index: verb.index ?? null,
|
|
119
|
+
verb_name: verb.name ?? null,
|
|
120
|
+
ts: Date.now(),
|
|
121
|
+
});
|
|
122
|
+
// Cancel the download so Playwright doesn't keep the temp file alive.
|
|
123
|
+
try {
|
|
124
|
+
await download.cancel();
|
|
125
|
+
} catch {}
|
|
126
|
+
return;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
await fsPromises.mkdir(artifactsDir, { recursive: true });
|
|
130
|
+
const target = uniquePathInside(artifactsDir, suggested);
|
|
131
|
+
if (!target) {
|
|
132
|
+
throw new Error(
|
|
133
|
+
`download-capture: refusing to save "${suggested}" — resolves outside $WB_ARTIFACTS_DIR`,
|
|
134
|
+
);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
await download.saveAs(target);
|
|
138
|
+
|
|
139
|
+
let bytes = null;
|
|
140
|
+
try {
|
|
141
|
+
bytes = (await fsPromises.stat(target)).size;
|
|
142
|
+
} catch {
|
|
143
|
+
// saveAs resolved successfully so the file should exist; if stat fails
|
|
144
|
+
// we still announce, just without size. Better partial info than no
|
|
145
|
+
// event at all.
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
send({
|
|
149
|
+
type: "slice.artifact_saved",
|
|
150
|
+
filename: path.basename(target),
|
|
151
|
+
path: target,
|
|
152
|
+
bytes,
|
|
153
|
+
source: "download",
|
|
154
|
+
provenance: {
|
|
155
|
+
url: sourceUrl,
|
|
156
|
+
suggested_filename: suggested,
|
|
157
|
+
page_url: pageUrl,
|
|
158
|
+
verb_index: verb.index ?? null,
|
|
159
|
+
verb_name: verb.name ?? null,
|
|
160
|
+
ts: Date.now(),
|
|
161
|
+
},
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
function safeSuggested(download) {
|
|
166
|
+
try {
|
|
167
|
+
const s = download.suggestedFilename();
|
|
168
|
+
return s && s.trim() ? s : FALLBACK_NAME;
|
|
169
|
+
} catch {
|
|
170
|
+
return FALLBACK_NAME;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
function safeUrl(download) {
|
|
175
|
+
try {
|
|
176
|
+
return download.url();
|
|
177
|
+
} catch {
|
|
178
|
+
return null;
|
|
179
|
+
}
|
|
180
|
+
}
|
package/lib/failure.js
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
// Failure-event helpers — classifier + screenshot/console capture.
|
|
2
|
+
//
|
|
3
|
+
// `verb.failed` and `slice.failed` carry a stable `code` field so agents can
|
|
4
|
+
// switch on category instead of regex-matching English. Verb failures also
|
|
5
|
+
// snapshot a screenshot (best-effort) and the recent console buffer so
|
|
6
|
+
// post-hoc debugging doesn't depend on a single line of stderr.
|
|
7
|
+
//
|
|
8
|
+
// All capture is best-effort: a failed screenshot or a missing artifacts dir
|
|
9
|
+
// must NOT prevent the failure event from emitting.
|
|
10
|
+
|
|
11
|
+
import { promises as fs } from "node:fs";
|
|
12
|
+
import path from "node:path";
|
|
13
|
+
import { randomUUID } from "node:crypto";
|
|
14
|
+
|
|
15
|
+
const MAX_CONSOLE_ENTRIES = 50;
|
|
16
|
+
const MAX_LINE_CHARS = 512;
|
|
17
|
+
|
|
18
|
+
// Map a verb-execution error to a stable code. Order matters: an explicit
|
|
19
|
+
// `err.code` (e.g. set by a provider for AUTH_FAILED) wins over inference.
|
|
20
|
+
export function classifyError(err, verbName) {
|
|
21
|
+
if (err && typeof err.code === "string" && err.code) return err.code;
|
|
22
|
+
if (!err) return "INTERNAL_ERROR";
|
|
23
|
+
const name = err.name || "";
|
|
24
|
+
const msg = String(err.message || "");
|
|
25
|
+
if (name === "TimeoutError") {
|
|
26
|
+
if (verbName === "goto") return "NAV_TIMEOUT";
|
|
27
|
+
if (/load\s*state|networkidle|navigation|wait\s+for\s+url/i.test(msg)) {
|
|
28
|
+
return "NAV_TIMEOUT";
|
|
29
|
+
}
|
|
30
|
+
return "SELECTOR_NOT_FOUND";
|
|
31
|
+
}
|
|
32
|
+
if (verbName === "eval" || verbName === "extract") return "SCRIPT_ERROR";
|
|
33
|
+
return "INTERNAL_ERROR";
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// Attach console + pageerror listeners to a Page. Returns the buffer object
|
|
37
|
+
// (FIFO-capped) so callers can stash it next to the Page (e.g. on the
|
|
38
|
+
// SessionManager `info`). Calling twice on the same Page would double-record;
|
|
39
|
+
// callers are expected to only invoke once per page.
|
|
40
|
+
export function attachConsoleBuffer(page) {
|
|
41
|
+
const buffer = [];
|
|
42
|
+
const push = (entry) => {
|
|
43
|
+
const text = String(entry.text ?? "");
|
|
44
|
+
buffer.push({
|
|
45
|
+
type: entry.type,
|
|
46
|
+
text: text.length > MAX_LINE_CHARS ? text.slice(0, MAX_LINE_CHARS) : text,
|
|
47
|
+
at: entry.at ?? Date.now(),
|
|
48
|
+
});
|
|
49
|
+
while (buffer.length > MAX_CONSOLE_ENTRIES) buffer.shift();
|
|
50
|
+
};
|
|
51
|
+
page.on("console", (msg) => {
|
|
52
|
+
push({ type: msg.type(), text: msg.text() });
|
|
53
|
+
});
|
|
54
|
+
page.on("pageerror", (err) => {
|
|
55
|
+
push({ type: "pageerror", text: err?.message ?? String(err) });
|
|
56
|
+
});
|
|
57
|
+
return buffer;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// Snapshot console buffer (with secret scrubbing) and capture a screenshot.
|
|
61
|
+
// Returns `{ screenshot_path, console_tail }`. Both fields may be null/empty;
|
|
62
|
+
// caller decides whether to attach them to the failure event.
|
|
63
|
+
export async function captureFailureDiagnostics({
|
|
64
|
+
page,
|
|
65
|
+
artifactsDir,
|
|
66
|
+
verbIndex,
|
|
67
|
+
consoleBuffer,
|
|
68
|
+
scrubSecrets,
|
|
69
|
+
secrets,
|
|
70
|
+
}) {
|
|
71
|
+
const out = { screenshot_path: null, console_tail: [] };
|
|
72
|
+
|
|
73
|
+
if (Array.isArray(consoleBuffer)) {
|
|
74
|
+
const scrub = typeof scrubSecrets === "function" ? scrubSecrets : null;
|
|
75
|
+
out.console_tail = consoleBuffer.map((entry) => ({
|
|
76
|
+
type: entry.type,
|
|
77
|
+
text: scrub ? scrub(entry.text, secrets) : String(entry.text),
|
|
78
|
+
at: entry.at,
|
|
79
|
+
}));
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (page && artifactsDir) {
|
|
83
|
+
try {
|
|
84
|
+
const filename = `wb-failure-${verbIndex}-${Date.now()}.png`;
|
|
85
|
+
const fullPath = path.join(artifactsDir, filename);
|
|
86
|
+
const tmp = `${fullPath}.${process.pid}.${randomUUID().slice(0, 8)}.tmp`;
|
|
87
|
+
const buf = await page.screenshot({ type: "png" });
|
|
88
|
+
await fs.mkdir(path.dirname(fullPath), { recursive: true });
|
|
89
|
+
await fs.writeFile(tmp, buf);
|
|
90
|
+
await fs.rename(tmp, fullPath);
|
|
91
|
+
out.screenshot_path = filename;
|
|
92
|
+
} catch {
|
|
93
|
+
// Screenshot capture is best-effort; don't let a Page crash or a
|
|
94
|
+
// permission error mask the underlying verb failure.
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
return out;
|
|
99
|
+
}
|
|
@@ -77,15 +77,22 @@ export function createBrowserUseProvider() {
|
|
|
77
77
|
"bu.create",
|
|
78
78
|
);
|
|
79
79
|
if (!res.ok) {
|
|
80
|
-
|
|
80
|
+
const err = new Error(
|
|
81
81
|
`browser-use create failed (${res.status}): ${await safeText(res)}`,
|
|
82
82
|
);
|
|
83
|
+
err.code =
|
|
84
|
+
res.status === 401 || res.status === 403
|
|
85
|
+
? "AUTH_FAILED"
|
|
86
|
+
: "SESSION_ALLOCATE_FAILED";
|
|
87
|
+
throw err;
|
|
83
88
|
}
|
|
84
89
|
const created = await res.json();
|
|
85
90
|
if (!created.cdpUrl) {
|
|
86
|
-
|
|
91
|
+
const err = new Error(
|
|
87
92
|
`browser-use create returned no cdpUrl (status=${created.status ?? "?"}); session unusable`,
|
|
88
93
|
);
|
|
94
|
+
err.code = "SESSION_ALLOCATE_FAILED";
|
|
95
|
+
throw err;
|
|
89
96
|
}
|
|
90
97
|
return {
|
|
91
98
|
sid: created.id,
|
|
@@ -65,9 +65,14 @@ export function createBrowserbaseProvider() {
|
|
|
65
65
|
"bb.create",
|
|
66
66
|
);
|
|
67
67
|
if (!res.ok) {
|
|
68
|
-
|
|
68
|
+
const err = new Error(
|
|
69
69
|
`Browserbase create failed (${res.status}): ${await safeText(res)}`,
|
|
70
70
|
);
|
|
71
|
+
err.code =
|
|
72
|
+
res.status === 401 || res.status === 403
|
|
73
|
+
? "AUTH_FAILED"
|
|
74
|
+
: "SESSION_ALLOCATE_FAILED";
|
|
75
|
+
throw err;
|
|
71
76
|
}
|
|
72
77
|
const created = await res.json();
|
|
73
78
|
return { sid: created.id, cdpUrl: created.connectUrl };
|
|
@@ -81,9 +86,14 @@ export function createBrowserbaseProvider() {
|
|
|
81
86
|
"bb.debug",
|
|
82
87
|
);
|
|
83
88
|
if (!res.ok) {
|
|
84
|
-
|
|
89
|
+
const err = new Error(
|
|
85
90
|
`Browserbase debug fetch failed (${res.status}): ${await safeText(res)}`,
|
|
86
91
|
);
|
|
92
|
+
err.code =
|
|
93
|
+
res.status === 401 || res.status === 403
|
|
94
|
+
? "AUTH_FAILED"
|
|
95
|
+
: "SESSION_ALLOCATE_FAILED";
|
|
96
|
+
throw err;
|
|
87
97
|
}
|
|
88
98
|
const body = await res.json();
|
|
89
99
|
return body.debuggerFullscreenUrl;
|
package/lib/providers/index.js
CHANGED
|
@@ -21,10 +21,12 @@
|
|
|
21
21
|
//
|
|
22
22
|
// Vendor selection is a single env var, resolved once at sidecar boot:
|
|
23
23
|
// WB_BROWSER_VENDOR=browserbase (default)
|
|
24
|
-
// WB_BROWSER_VENDOR=browser-use
|
|
24
|
+
// WB_BROWSER_VENDOR=browser-use
|
|
25
|
+
// WB_BROWSER_VENDOR=local — host-installed Chromium (dev iteration)
|
|
25
26
|
|
|
26
27
|
import { createBrowserbaseProvider } from "./browserbase.js";
|
|
27
28
|
import { createBrowserUseProvider } from "./browser-use.js";
|
|
29
|
+
import { createLocalProvider } from "./local.js";
|
|
28
30
|
|
|
29
31
|
export function getProvider() {
|
|
30
32
|
const raw = (process.env.WB_BROWSER_VENDOR || "browserbase")
|
|
@@ -35,9 +37,11 @@ export function getProvider() {
|
|
|
35
37
|
return createBrowserbaseProvider();
|
|
36
38
|
case "browser-use":
|
|
37
39
|
return createBrowserUseProvider();
|
|
40
|
+
case "local":
|
|
41
|
+
return createLocalProvider();
|
|
38
42
|
default:
|
|
39
43
|
throw new Error(
|
|
40
|
-
`WB_BROWSER_VENDOR="${raw}" is not a known vendor (expected: browserbase | browser-use)`,
|
|
44
|
+
`WB_BROWSER_VENDOR="${raw}" is not a known vendor (expected: browserbase | browser-use | local)`,
|
|
41
45
|
);
|
|
42
46
|
}
|
|
43
47
|
}
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
// Local provider — drives a host-installed Playwright Chromium directly
|
|
2
|
+
// instead of going to a cloud vendor. Use for dev iteration without
|
|
3
|
+
// Browserbase / browser-use cost or latency. Selected via
|
|
4
|
+
// WB_BROWSER_VENDOR=local.
|
|
5
|
+
//
|
|
6
|
+
// Differences from cloud providers:
|
|
7
|
+
// 1. allocate() launches a real Chromium via `playwright-core`'s
|
|
8
|
+
// chromium.launch() and returns a pre-built Browser handle in
|
|
9
|
+
// `_browser`. The entry point checks for it and skips the
|
|
10
|
+
// connectOverCDP step that cloud providers require.
|
|
11
|
+
// 2. getLiveUrl() returns null — there's no public live-inspector URL
|
|
12
|
+
// for a locally-launched browser. The Rust side just renders the
|
|
13
|
+
// "session started" line without a clickable URL.
|
|
14
|
+
// 3. release() is a no-op. The shutdown path already does
|
|
15
|
+
// `info.browser.close()` on every cached session, which terminates
|
|
16
|
+
// the local Chromium process.
|
|
17
|
+
// 4. Profile binding is not supported (logged + ignored). For persistent
|
|
18
|
+
// auth across runs use a vendor with profile support, or pin
|
|
19
|
+
// WB_BROWSER_LOCAL_EXECUTABLE_PATH at a Chrome instance with a
|
|
20
|
+
// pre-warmed user-data-dir (advanced; not the supported path).
|
|
21
|
+
//
|
|
22
|
+
// Resume-after-pause: not supported. The Browser is process-local memory
|
|
23
|
+
// and dies with the sidecar; on resume the sidecar re-allocates a fresh
|
|
24
|
+
// session. This matches the dev-iteration use case (you're running the
|
|
25
|
+
// runbook end-to-end, not pausing on a real wait fence).
|
|
26
|
+
|
|
27
|
+
import { chromium } from "playwright-core";
|
|
28
|
+
import { log } from "../io.js";
|
|
29
|
+
|
|
30
|
+
// Truthiness for env knobs that default to ON. "0" / "false" / "no" / "off"
|
|
31
|
+
// disables; anything else enables. Mirrors the convention used elsewhere.
|
|
32
|
+
function isOff(v) {
|
|
33
|
+
if (v === undefined || v === null) return false;
|
|
34
|
+
const s = String(v).trim().toLowerCase();
|
|
35
|
+
return s === "0" || s === "false" || s === "no" || s === "off";
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function createLocalProvider() {
|
|
39
|
+
return {
|
|
40
|
+
name: "local",
|
|
41
|
+
|
|
42
|
+
async allocate({ profile, sessionName: _sessionName } = {}) {
|
|
43
|
+
if (profile) {
|
|
44
|
+
log(
|
|
45
|
+
`[local] profile="${profile}" ignored — local vendor has no profile binding. ` +
|
|
46
|
+
`Use a cloud vendor or persist auth via WB_BROWSER_LOCAL_EXECUTABLE_PATH on a pre-warmed Chrome.`,
|
|
47
|
+
);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
// Headless ON by default; flip with WB_BROWSER_LOCAL_HEADLESS=0 for
|
|
51
|
+
// visible-window dev. Operators debugging a brittle workbook can flip
|
|
52
|
+
// to headed without touching the runbook.
|
|
53
|
+
const headless = !isOff(process.env.WB_BROWSER_LOCAL_HEADLESS);
|
|
54
|
+
|
|
55
|
+
// executablePath: explicit override for system Chrome / Chromium.
|
|
56
|
+
// channel: "chrome" / "msedge" / "chrome-beta" — Playwright's named
|
|
57
|
+
// channels for OS-installed browsers (no separate download). At most
|
|
58
|
+
// one of executablePath / channel should be set; if both arrive,
|
|
59
|
+
// executablePath wins (Playwright honors it).
|
|
60
|
+
const executablePath =
|
|
61
|
+
process.env.WB_BROWSER_LOCAL_EXECUTABLE_PATH || undefined;
|
|
62
|
+
const channel = process.env.WB_BROWSER_LOCAL_CHANNEL || undefined;
|
|
63
|
+
|
|
64
|
+
log(
|
|
65
|
+
`[local] launching chromium headless=${headless}` +
|
|
66
|
+
` executablePath=${executablePath ?? "<bundled>"}` +
|
|
67
|
+
` channel=${channel ?? "<none>"}`,
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
let browser;
|
|
71
|
+
try {
|
|
72
|
+
browser = await chromium.launch({
|
|
73
|
+
headless,
|
|
74
|
+
executablePath,
|
|
75
|
+
channel,
|
|
76
|
+
});
|
|
77
|
+
} catch (e) {
|
|
78
|
+
// Most common cause: Playwright's chromium binary not installed.
|
|
79
|
+
// playwright-core ships the API but no browser; the user runs
|
|
80
|
+
// `npx playwright install chromium` once to fetch it. Surface the
|
|
81
|
+
// hint inline so this isn't a guessing game on first run.
|
|
82
|
+
const err = new Error(
|
|
83
|
+
`local browser launch failed: ${e.message}\n` +
|
|
84
|
+
`Hint: install Chromium with \`npx playwright install chromium\`, ` +
|
|
85
|
+
`or set WB_BROWSER_LOCAL_EXECUTABLE_PATH / WB_BROWSER_LOCAL_CHANNEL to use a system browser.`,
|
|
86
|
+
);
|
|
87
|
+
err.code = "SESSION_ALLOCATE_FAILED";
|
|
88
|
+
throw err;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// sid is for telemetry only — there's no remote session to release.
|
|
92
|
+
// Format: `local-<ms>-<rand>` so it's distinguishable from vendor sids
|
|
93
|
+
// in callback streams and logs.
|
|
94
|
+
const sid = `local-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
95
|
+
|
|
96
|
+
return {
|
|
97
|
+
sid,
|
|
98
|
+
// No CDP URL — the entry point sees `_browser` and skips the
|
|
99
|
+
// connectOverCDP path that cloud providers go through.
|
|
100
|
+
cdpUrl: null,
|
|
101
|
+
// Stashed so getLiveUrl() is a sync property read like browser-use.
|
|
102
|
+
_liveUrl: null,
|
|
103
|
+
_browser: browser,
|
|
104
|
+
};
|
|
105
|
+
},
|
|
106
|
+
|
|
107
|
+
async getLiveUrl(_allocated) {
|
|
108
|
+
// No public inspector URL for local Chromium. Returning null tells
|
|
109
|
+
// the Rust side to render the "session started" line without a link.
|
|
110
|
+
return null;
|
|
111
|
+
},
|
|
112
|
+
|
|
113
|
+
async release(_sid) {
|
|
114
|
+
// Browser teardown happens in the entry-point shutdown loop via
|
|
115
|
+
// `info.browser.close()`, which kills the local Chromium process.
|
|
116
|
+
// Cloud providers need a separate vendor REST call here; local
|
|
117
|
+
// doesn't.
|
|
118
|
+
},
|
|
119
|
+
};
|
|
120
|
+
}
|
package/lib/stub-page.js
CHANGED
|
@@ -73,6 +73,22 @@ export function createStubPage(opts = {}) {
|
|
|
73
73
|
record({ verb: "evaluate", script });
|
|
74
74
|
return evalResult;
|
|
75
75
|
},
|
|
76
|
+
async waitForLoadState(state, options) {
|
|
77
|
+
record({ verb: "waitForLoadState", state, options });
|
|
78
|
+
},
|
|
79
|
+
getByText(text, options) {
|
|
80
|
+
record({ verb: "getByText", text, options });
|
|
81
|
+
const locator = {
|
|
82
|
+
first() {
|
|
83
|
+
return {
|
|
84
|
+
async click(opts) {
|
|
85
|
+
record({ verb: "getByText.first.click", text, options: opts });
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
},
|
|
89
|
+
};
|
|
90
|
+
return locator;
|
|
91
|
+
},
|
|
76
92
|
};
|
|
77
93
|
}
|
|
78
94
|
|
package/lib/util.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import path from "node:path";
|
|
2
2
|
import { randomUUID } from "node:crypto";
|
|
3
|
+
import { existsSync } from "node:fs";
|
|
3
4
|
|
|
4
5
|
// Resolve `candidate` inside `dir`, rejecting traversal and absolute paths.
|
|
5
6
|
// Returns null when the resolved path escapes `dir` (or is `dir` itself).
|
|
@@ -14,6 +15,63 @@ export function resolveInside(dir, candidate) {
|
|
|
14
15
|
return resolved;
|
|
15
16
|
}
|
|
16
17
|
|
|
18
|
+
// Collision-safe path inside `dir`. Returns the first path of the form
|
|
19
|
+
// `<base><ext>`, `<base>-2<ext>`, `<base>-3<ext>`, ... that doesn't already
|
|
20
|
+
// exist on disk. Playwright's `download.saveAs(path)` blindly overwrites,
|
|
21
|
+
// so this is the only thing standing between two same-named downloads
|
|
22
|
+
// (e.g. two `report.pdf` saves in one session) silently clobbering each
|
|
23
|
+
// other. Returns null if `name` would resolve outside `dir`.
|
|
24
|
+
//
|
|
25
|
+
// The check is racy (two concurrent downloads with the same suggestedName
|
|
26
|
+
// can both observe the same free slot before either writes) — acceptable
|
|
27
|
+
// here because downloads in a single session serialize through the same
|
|
28
|
+
// page in practice, and a stray collision would just produce one
|
|
29
|
+
// overwritten file rather than corrupting state.
|
|
30
|
+
export function uniquePathInside(dir, name) {
|
|
31
|
+
const safe = sanitizeArtifactName(name);
|
|
32
|
+
const first = resolveInside(dir, safe);
|
|
33
|
+
if (!first) return null;
|
|
34
|
+
if (!existsSync(first)) return first;
|
|
35
|
+
const ext = path.extname(safe);
|
|
36
|
+
const base = ext ? safe.slice(0, -ext.length) : safe;
|
|
37
|
+
for (let n = 2; n < 1000; n++) {
|
|
38
|
+
const candidate = resolveInside(dir, `${base}-${n}${ext}`);
|
|
39
|
+
if (!candidate) return null;
|
|
40
|
+
if (!existsSync(candidate)) return candidate;
|
|
41
|
+
}
|
|
42
|
+
// Fallback: append a random suffix. 1000 collisions on the same name in
|
|
43
|
+
// one session is unrealistic, but we'd rather degrade than throw.
|
|
44
|
+
const rand = randomUUID().slice(0, 8);
|
|
45
|
+
return resolveInside(dir, `${base}-${rand}${ext}`);
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Parse a comma-separated extension allowlist from raw env (e.g.
|
|
49
|
+
// "pdf, xlsx,CSV"). Returns a Set of lowercase extensions without leading
|
|
50
|
+
// dots, or null when the input is empty/unset (callers treat null as "no
|
|
51
|
+
// filter — capture everything").
|
|
52
|
+
export function parseExtensionAllowlist(raw) {
|
|
53
|
+
if (raw == null) return null;
|
|
54
|
+
const s = String(raw).trim();
|
|
55
|
+
if (!s) return null;
|
|
56
|
+
const parts = s
|
|
57
|
+
.split(",")
|
|
58
|
+
.map((x) => x.trim().toLowerCase().replace(/^\./, ""))
|
|
59
|
+
.filter(Boolean);
|
|
60
|
+
if (parts.length === 0) return null;
|
|
61
|
+
return new Set(parts);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// Match a filename against an extension allowlist. `null` allowlist means
|
|
65
|
+
// no filter (anything passes). Files with no extension never pass a
|
|
66
|
+
// non-null allowlist — the caller wanted a specific set, an unknown blob
|
|
67
|
+
// isn't it.
|
|
68
|
+
export function extensionAllowed(filename, allowlist) {
|
|
69
|
+
if (!allowlist) return true;
|
|
70
|
+
const ext = path.extname(String(filename || "")).toLowerCase().replace(/^\./, "");
|
|
71
|
+
if (!ext) return false;
|
|
72
|
+
return allowlist.has(ext);
|
|
73
|
+
}
|
|
74
|
+
|
|
17
75
|
export function sanitizeArtifactName(s) {
|
|
18
76
|
// Keep author-chosen names readable but safe as filenames. Drop anything
|
|
19
77
|
// that could escape the artifacts dir (slashes, NULs, etc.).
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wb-browser-runtime",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.0",
|
|
4
4
|
"description": "Browser sidecar runtime for wb — Playwright over CDP (Browserbase, browser-use) via the wb-sidecar/1 line-framed JSON protocol.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"wb-browser-runtime": "bin/wb-browser-runtime.js"
|
package/verbs/click.js
CHANGED
|
@@ -2,7 +2,29 @@ export default {
|
|
|
2
2
|
name: "click",
|
|
3
3
|
primaryKey: "selector",
|
|
4
4
|
async execute(page, args) {
|
|
5
|
-
|
|
6
|
-
|
|
5
|
+
const timeout = args.timeout ?? 10_000;
|
|
6
|
+
try {
|
|
7
|
+
await page.click(args.selector, { timeout });
|
|
8
|
+
return `${args.selector}`;
|
|
9
|
+
} catch (err) {
|
|
10
|
+
// Text-fallback: when the selector times out (typically a brittle
|
|
11
|
+
// class/id rename), retry against visible text. We DELIBERATELY
|
|
12
|
+
// re-throw the ORIGINAL error if the fallback also fails — the
|
|
13
|
+
// selector failure is the actionable signal for error classification
|
|
14
|
+
// upstream; the fallback's failure would obscure it.
|
|
15
|
+
const isTimeout = err && err.name === "TimeoutError";
|
|
16
|
+
if (isTimeout && args.text_fallback) {
|
|
17
|
+
try {
|
|
18
|
+
await page
|
|
19
|
+
.getByText(args.text_fallback, { exact: false })
|
|
20
|
+
.first()
|
|
21
|
+
.click({ timeout });
|
|
22
|
+
return `${args.selector} (via text="${args.text_fallback}")`;
|
|
23
|
+
} catch {
|
|
24
|
+
throw err;
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
throw err;
|
|
28
|
+
}
|
|
7
29
|
},
|
|
8
30
|
};
|
package/verbs/index.js
CHANGED
|
@@ -13,6 +13,7 @@ import fillVerb from "./fill.js";
|
|
|
13
13
|
import clickVerb from "./click.js";
|
|
14
14
|
import pressVerb from "./press.js";
|
|
15
15
|
import waitForVerb from "./wait_for.js";
|
|
16
|
+
import waitForNetworkIdleVerb from "./wait_for_network_idle.js";
|
|
16
17
|
import screenshotVerb from "./screenshot.js";
|
|
17
18
|
import extractVerb from "./extract.js";
|
|
18
19
|
import assertVerb from "./assert.js";
|
|
@@ -28,6 +29,7 @@ const VERBS = [
|
|
|
28
29
|
clickVerb,
|
|
29
30
|
pressVerb,
|
|
30
31
|
waitForVerb,
|
|
32
|
+
waitForNetworkIdleVerb,
|
|
31
33
|
screenshotVerb,
|
|
32
34
|
extractVerb,
|
|
33
35
|
assertVerb,
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
// Wait until the page reports "networkidle" — at most one in-flight request
|
|
2
|
+
// for >=500ms. SPA flows that don't have a stable selector to wait on need
|
|
3
|
+
// this; otherwise the next verb fires before async XHRs settle and reads
|
|
4
|
+
// stale DOM.
|
|
5
|
+
|
|
6
|
+
const DEFAULT_TIMEOUT_MS = 30_000;
|
|
7
|
+
|
|
8
|
+
// Parse "30s" / "2m" / "500ms" / 5000 / "5000" into ms. Anything malformed
|
|
9
|
+
// falls back to the default — the sidecar would rather time out at a known
|
|
10
|
+
// bound than throw on a typo.
|
|
11
|
+
function parseTimeoutMs(value) {
|
|
12
|
+
if (value == null) return DEFAULT_TIMEOUT_MS;
|
|
13
|
+
if (typeof value === "number" && Number.isFinite(value)) return value;
|
|
14
|
+
if (typeof value !== "string") return DEFAULT_TIMEOUT_MS;
|
|
15
|
+
const trimmed = value.trim();
|
|
16
|
+
if (trimmed === "") return DEFAULT_TIMEOUT_MS;
|
|
17
|
+
const m = trimmed.match(/^(\d+(?:\.\d+)?)\s*(ms|s|m|h)?$/i);
|
|
18
|
+
if (!m) {
|
|
19
|
+
const asNum = Number(trimmed);
|
|
20
|
+
return Number.isFinite(asNum) ? asNum : DEFAULT_TIMEOUT_MS;
|
|
21
|
+
}
|
|
22
|
+
const n = Number(m[1]);
|
|
23
|
+
const unit = (m[2] || "ms").toLowerCase();
|
|
24
|
+
switch (unit) {
|
|
25
|
+
case "ms":
|
|
26
|
+
return n;
|
|
27
|
+
case "s":
|
|
28
|
+
return n * 1000;
|
|
29
|
+
case "m":
|
|
30
|
+
return n * 60_000;
|
|
31
|
+
case "h":
|
|
32
|
+
return n * 3_600_000;
|
|
33
|
+
default:
|
|
34
|
+
return DEFAULT_TIMEOUT_MS;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export default {
|
|
39
|
+
name: "wait_for_network_idle",
|
|
40
|
+
primaryKey: "timeout",
|
|
41
|
+
async execute(page, args) {
|
|
42
|
+
const raw = args.timeout;
|
|
43
|
+
const timeout = parseTimeoutMs(raw);
|
|
44
|
+
await page.waitForLoadState("networkidle", { timeout });
|
|
45
|
+
const summary =
|
|
46
|
+
typeof raw === "string" && raw.trim() !== ""
|
|
47
|
+
? `network idle (timeout=${raw.trim()})`
|
|
48
|
+
: `network idle (timeout=${timeout}ms)`;
|
|
49
|
+
return summary;
|
|
50
|
+
},
|
|
51
|
+
};
|