wb-browser-runtime 0.6.1 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +71 -7
- package/bin/wb-browser-runtime.js +184 -1003
- package/lib/http.js +63 -0
- package/lib/io.js +56 -0
- package/lib/providers/browser-use.js +133 -0
- package/lib/providers/browserbase.js +120 -0
- package/lib/providers/index.js +43 -0
- package/lib/recording-manager.js +620 -0
- package/lib/session-manager.js +101 -0
- package/lib/stub-page.js +112 -0
- package/lib/util.js +33 -0
- package/package.json +25 -3
- package/verbs/announce_artifact.js +74 -0
- package/verbs/assert.js +23 -0
- package/verbs/click.js +8 -0
- package/verbs/eval.js +20 -0
- package/verbs/extract.js +38 -0
- package/verbs/fill.js +13 -0
- package/verbs/goto.js +10 -0
- package/verbs/index.js +81 -0
- package/verbs/pause_for_human.js +60 -0
- package/verbs/press.js +9 -0
- package/verbs/save.js +55 -0
- package/verbs/screenshot.js +48 -0
- package/verbs/wait_for.js +13 -0
- package/verbs/wait_for_drop.js +235 -0
|
@@ -1,18 +1,19 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
// wb-browser-runtime —
|
|
2
|
+
// wb-browser-runtime — CDP + Playwright sidecar for `wb`.
|
|
3
3
|
//
|
|
4
4
|
// Speaks wb's line-framed JSON protocol on stdio (see ../README.md). Each
|
|
5
5
|
// `browser` fenced block in a workbook arrives as one `slice` message; this
|
|
6
6
|
// sidecar dispatches its verbs against a Playwright `Page` connected to a
|
|
7
|
-
//
|
|
7
|
+
// vendor-provided CDP endpoint.
|
|
8
|
+
//
|
|
9
|
+
// The vendor (Browserbase, browser-use, ...) is selected by WB_BROWSER_VENDOR
|
|
10
|
+
// and lives behind a provider in ../lib/providers/. Verbs, recording, session
|
|
11
|
+
// cache, and substitutions are all vendor-agnostic — they run against a
|
|
12
|
+
// Playwright Page regardless of whose chromium is on the other end.
|
|
8
13
|
//
|
|
9
14
|
// Sessions are cached by `session:` name across slices for the lifetime of
|
|
10
15
|
// this process, so a runbook with multiple browser blocks against the same
|
|
11
|
-
// vendor reuses one
|
|
12
|
-
//
|
|
13
|
-
// Env required for real runs:
|
|
14
|
-
// BROWSERBASE_API_KEY
|
|
15
|
-
// BROWSERBASE_PROJECT_ID
|
|
16
|
+
// vendor reuses one session (and one logged-in browser context).
|
|
16
17
|
//
|
|
17
18
|
// Verb args support two substitutions, expanded recursively at dispatch time:
|
|
18
19
|
// {{ env.NAME }} → process.env.NAME
|
|
@@ -24,742 +25,98 @@
|
|
|
24
25
|
|
|
25
26
|
import readline from "node:readline";
|
|
26
27
|
import { chromium } from "playwright-core";
|
|
27
|
-
import {
|
|
28
|
-
import {
|
|
29
|
-
import {
|
|
30
|
-
import
|
|
31
|
-
import
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
const
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
"wait_for",
|
|
44
|
-
"screenshot",
|
|
45
|
-
"extract",
|
|
46
|
-
"assert",
|
|
47
|
-
"eval",
|
|
48
|
-
"save",
|
|
49
|
-
];
|
|
50
|
-
|
|
51
|
-
const BB_BASE = "https://api.browserbase.com";
|
|
52
|
-
const VERSION = "0.6.0";
|
|
53
|
-
|
|
54
|
-
// --- Recording config -------------------------------------------------------
|
|
28
|
+
import { readFileSync } from "node:fs";
|
|
29
|
+
import { send, log } from "../lib/io.js";
|
|
30
|
+
import { resolveInside } from "../lib/util.js";
|
|
31
|
+
import { SessionManager } from "../lib/session-manager.js";
|
|
32
|
+
import {
|
|
33
|
+
RecordingManager,
|
|
34
|
+
loadRecordingConfig,
|
|
35
|
+
} from "../lib/recording-manager.js";
|
|
36
|
+
import { getProvider } from "../lib/providers/index.js";
|
|
37
|
+
import { SUPPORTS, runVerb, verbName } from "../verbs/index.js";
|
|
38
|
+
|
|
39
|
+
const VERSION = "0.8.0";
|
|
40
|
+
const provider = getProvider();
|
|
41
|
+
log(`[provider] ${provider.name}`);
|
|
42
|
+
|
|
43
|
+
// --- Recording --------------------------------------------------------------
|
|
55
44
|
//
|
|
56
|
-
// Feature is off unless WB_RECORDING_UPLOAD_URL is set.
|
|
57
|
-
//
|
|
58
|
-
// artifacts are POSTed to the upload URL at session close.
|
|
59
|
-
//
|
|
60
|
-
// URL template supports `{run_id}` and `{kind}` placeholders, e.g.
|
|
61
|
-
// https://host/api/runs/{run_id}/recording/{kind}
|
|
62
|
-
// kind ∈ {"rrweb", "video"}. Auth: `Authorization: Bearer <SECRET>`.
|
|
63
|
-
|
|
64
|
-
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
65
|
-
const RRWEB_VENDOR_PATH = path.join(
|
|
66
|
-
__dirname,
|
|
67
|
-
"..",
|
|
68
|
-
"vendor",
|
|
69
|
-
"rrweb-record.min.js",
|
|
70
|
-
);
|
|
71
|
-
|
|
72
|
-
function checkFfmpeg() {
|
|
73
|
-
try {
|
|
74
|
-
const res = spawnSync("ffmpeg", ["-version"], { stdio: "ignore" });
|
|
75
|
-
return res.status === 0;
|
|
76
|
-
} catch {
|
|
77
|
-
return false;
|
|
78
|
-
}
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
function loadRecordingConfig() {
|
|
82
|
-
const uploadUrl = (process.env.WB_RECORDING_UPLOAD_URL || "").trim();
|
|
83
|
-
if (!uploadUrl) return { enabled: false, reason: "no-upload-url" };
|
|
84
|
-
const secret = (process.env.WB_RECORDING_UPLOAD_SECRET || "").trim();
|
|
85
|
-
if (!secret) {
|
|
86
|
-
log(
|
|
87
|
-
"[recording] WB_RECORDING_UPLOAD_URL is set but WB_RECORDING_UPLOAD_SECRET is empty — refusing to upload unauthenticated. Recording disabled.",
|
|
88
|
-
);
|
|
89
|
-
return { enabled: false, reason: "no-secret" };
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
const runId =
|
|
93
|
-
(process.env.WB_RECORDING_RUN_ID || "").trim() ||
|
|
94
|
-
(process.env.TRIGGER_RUN_ID || "").trim() ||
|
|
95
|
-
`wb-${randomUUID()}`;
|
|
45
|
+
// Feature is off unless WB_RECORDING_UPLOAD_URL is set. See
|
|
46
|
+
// runtimes/browser/lib/recording-manager.js for the full lifecycle.
|
|
96
47
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
// + log so operators see the effective value.
|
|
100
|
-
const clamp = (v, lo, hi) => Math.max(lo, Math.min(hi, v));
|
|
101
|
-
const rawFps =
|
|
102
|
-
Number.parseInt(process.env.WB_RECORDING_SCREENCAST_FPS || "", 10) || 5;
|
|
103
|
-
const rawQuality =
|
|
104
|
-
Number.parseInt(process.env.WB_RECORDING_SCREENCAST_QUALITY || "", 10) ||
|
|
105
|
-
60;
|
|
106
|
-
const fps = clamp(rawFps, 1, 30);
|
|
107
|
-
const quality = clamp(rawQuality, 10, 95);
|
|
108
|
-
if (fps !== rawFps) {
|
|
109
|
-
log(`[recording] fps=${rawFps} clamped to ${fps} (valid range 1..30)`);
|
|
110
|
-
}
|
|
111
|
-
if (quality !== rawQuality) {
|
|
112
|
-
log(
|
|
113
|
-
`[recording] quality=${rawQuality} clamped to ${quality} (valid range 10..95)`,
|
|
114
|
-
);
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
const rrwebRequested = process.env.WB_RECORDING_RRWEB !== "0";
|
|
118
|
-
const videoRequested = process.env.WB_RECORDING_VIDEO !== "0";
|
|
119
|
-
|
|
120
|
-
let rrwebSource = null;
|
|
121
|
-
if (rrwebRequested) {
|
|
122
|
-
if (!existsSync(RRWEB_VENDOR_PATH)) {
|
|
123
|
-
log(
|
|
124
|
-
`[recording] rrweb vendor file missing at ${RRWEB_VENDOR_PATH} — disabling rrweb capture`,
|
|
125
|
-
);
|
|
126
|
-
} else {
|
|
127
|
-
rrwebSource = readFileSync(RRWEB_VENDOR_PATH, "utf8");
|
|
128
|
-
}
|
|
129
|
-
}
|
|
130
|
-
|
|
131
|
-
const hasFfmpeg = videoRequested ? checkFfmpeg() : false;
|
|
132
|
-
if (videoRequested && !hasFfmpeg) {
|
|
133
|
-
log(
|
|
134
|
-
"[recording] ffmpeg not found on $PATH — disabling video capture (rrweb will continue if enabled)",
|
|
135
|
-
);
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
const kinds = {
|
|
139
|
-
rrweb: rrwebRequested && !!rrwebSource,
|
|
140
|
-
video: videoRequested && hasFfmpeg,
|
|
141
|
-
};
|
|
142
|
-
|
|
143
|
-
if (!kinds.rrweb && !kinds.video) {
|
|
144
|
-
log("[recording] no usable kinds — recording disabled");
|
|
145
|
-
return { enabled: false, reason: "all-kinds-disabled" };
|
|
146
|
-
}
|
|
147
|
-
|
|
148
|
-
const rrwebMaxEvents =
|
|
149
|
-
Number.parseInt(process.env.WB_RECORDING_RRWEB_MAX_EVENTS || "", 10) ||
|
|
150
|
-
50_000;
|
|
151
|
-
|
|
152
|
-
return {
|
|
153
|
-
enabled: true,
|
|
154
|
-
uploadUrl,
|
|
155
|
-
secret,
|
|
156
|
-
runId,
|
|
157
|
-
fps,
|
|
158
|
-
quality,
|
|
159
|
-
kinds,
|
|
160
|
-
rrwebSource,
|
|
161
|
-
rrwebMaxEvents,
|
|
162
|
-
};
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
const RECORDING = loadRecordingConfig();
|
|
166
|
-
if (RECORDING.enabled) {
|
|
167
|
-
const activeKinds = Object.entries(RECORDING.kinds)
|
|
168
|
-
.filter(([, v]) => v)
|
|
169
|
-
.map(([k]) => k)
|
|
170
|
-
.join(",");
|
|
48
|
+
const recording = new RecordingManager(loadRecordingConfig());
|
|
49
|
+
if (recording.enabled) {
|
|
171
50
|
log(
|
|
172
|
-
`[recording] enabled run_id=${
|
|
173
|
-
);
|
|
174
|
-
}
|
|
175
|
-
|
|
176
|
-
function send(obj) {
|
|
177
|
-
process.stdout.write(JSON.stringify(obj) + "\n");
|
|
178
|
-
}
|
|
179
|
-
|
|
180
|
-
function log(...args) {
|
|
181
|
-
process.stderr.write(args.join(" ") + "\n");
|
|
182
|
-
}
|
|
183
|
-
|
|
184
|
-
// --- Browserbase REST -------------------------------------------------------
|
|
185
|
-
|
|
186
|
-
async function bbCreateSession() {
|
|
187
|
-
const apiKey = process.env.BROWSERBASE_API_KEY;
|
|
188
|
-
const projectId = process.env.BROWSERBASE_PROJECT_ID;
|
|
189
|
-
if (!apiKey || !projectId) {
|
|
190
|
-
throw new Error(
|
|
191
|
-
"BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID must be set",
|
|
192
|
-
);
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
// Both flags opt-in per session. advancedStealth is Scale-plan-gated on
|
|
196
|
-
// Browserbase's side; proxies adds residential-IP cost. Default off so a
|
|
197
|
-
// misconfigured plan doesn't break unrelated runs (HN, Google Sheets, etc.);
|
|
198
|
-
// flip per vendor when the target sits behind Cloudflare / similar bot
|
|
199
|
-
// detection (e.g., Airbase).
|
|
200
|
-
const envBool = (v) => v === "1" || (typeof v === "string" && v.toLowerCase() === "true");
|
|
201
|
-
const advancedStealth = envBool(process.env.BROWSERBASE_ADVANCED_STEALTH);
|
|
202
|
-
const proxies = envBool(process.env.BROWSERBASE_PROXIES);
|
|
203
|
-
|
|
204
|
-
// keepAlive:false — slice lifetime is tied to wb process; on shutdown
|
|
205
|
-
// we explicitly REQUEST_RELEASE so quota isn't burned by orphans.
|
|
206
|
-
const body = { projectId, keepAlive: false };
|
|
207
|
-
if (advancedStealth) {
|
|
208
|
-
body.browserSettings = { advancedStealth: true };
|
|
209
|
-
}
|
|
210
|
-
if (proxies) {
|
|
211
|
-
body.proxies = true;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
log(`[bb] session create advancedStealth=${advancedStealth} proxies=${proxies}`);
|
|
215
|
-
|
|
216
|
-
const res = await retryableFetch(
|
|
217
|
-
`${BB_BASE}/v1/sessions`,
|
|
218
|
-
{
|
|
219
|
-
method: "POST",
|
|
220
|
-
headers: {
|
|
221
|
-
"X-BB-API-Key": apiKey,
|
|
222
|
-
"Content-Type": "application/json",
|
|
223
|
-
},
|
|
224
|
-
body: JSON.stringify(body),
|
|
225
|
-
},
|
|
226
|
-
"bb.create",
|
|
227
|
-
);
|
|
228
|
-
if (!res.ok) {
|
|
229
|
-
throw new Error(
|
|
230
|
-
`Browserbase create failed (${res.status}): ${await safeText(res)}`,
|
|
231
|
-
);
|
|
232
|
-
}
|
|
233
|
-
return await res.json();
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
async function bbGetLiveUrl(sessionId) {
|
|
237
|
-
const apiKey = process.env.BROWSERBASE_API_KEY;
|
|
238
|
-
const res = await retryableFetch(
|
|
239
|
-
`${BB_BASE}/v1/sessions/${sessionId}/debug`,
|
|
240
|
-
{ headers: { "X-BB-API-Key": apiKey } },
|
|
241
|
-
"bb.debug",
|
|
51
|
+
`[recording] enabled run_id=${recording.runId} kinds=${recording.activeKinds.join(",")} fps=${recording.fps} quality=${recording.quality}`,
|
|
242
52
|
);
|
|
243
|
-
if (!res.ok) {
|
|
244
|
-
throw new Error(
|
|
245
|
-
`Browserbase debug fetch failed (${res.status}): ${await safeText(res)}`,
|
|
246
|
-
);
|
|
247
|
-
}
|
|
248
|
-
const body = await res.json();
|
|
249
|
-
return body.debuggerFullscreenUrl;
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
async function bbReleaseSession(sessionId) {
|
|
253
|
-
const apiKey = process.env.BROWSERBASE_API_KEY;
|
|
254
|
-
const projectId = process.env.BROWSERBASE_PROJECT_ID;
|
|
255
|
-
try {
|
|
256
|
-
await retryableFetch(
|
|
257
|
-
`${BB_BASE}/v1/sessions/${sessionId}`,
|
|
258
|
-
{
|
|
259
|
-
method: "POST",
|
|
260
|
-
headers: { "X-BB-API-Key": apiKey, "Content-Type": "application/json" },
|
|
261
|
-
body: JSON.stringify({ projectId, status: "REQUEST_RELEASE" }),
|
|
262
|
-
},
|
|
263
|
-
"bb.release",
|
|
264
|
-
);
|
|
265
|
-
} catch (e) {
|
|
266
|
-
log(`[shutdown] release session ${sessionId} failed: ${e.message}`);
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
|
|
270
|
-
async function safeText(res) {
|
|
271
|
-
try {
|
|
272
|
-
return (await res.text()).slice(0, 200);
|
|
273
|
-
} catch {
|
|
274
|
-
return "<unreadable>";
|
|
275
|
-
}
|
|
276
|
-
}
|
|
277
|
-
|
|
278
|
-
// Retry transient network + 5xx/429 failures with short exponential backoff.
|
|
279
|
-
// Each attempt gets its own AbortController + timeout; caller-passed signals
|
|
280
|
-
// are not plumbed through since we don't have a cancellation story above this
|
|
281
|
-
// layer. Non-retryable statuses (4xx except 429) are returned immediately for
|
|
282
|
-
// the caller to handle.
|
|
283
|
-
async function retryableFetch(url, opts = {}, label, { timeoutMs = 30_000 } = {}) {
|
|
284
|
-
const delays = [100, 500];
|
|
285
|
-
let lastErr = null;
|
|
286
|
-
let lastRes = null;
|
|
287
|
-
for (let attempt = 0; attempt <= delays.length; attempt++) {
|
|
288
|
-
if (attempt > 0) {
|
|
289
|
-
await new Promise((r) => setTimeout(r, delays[attempt - 1]));
|
|
290
|
-
const prev = lastRes
|
|
291
|
-
? `status=${lastRes.status}`
|
|
292
|
-
: `err=${lastErr?.message || lastErr}`;
|
|
293
|
-
log(`[retry] ${label} attempt ${attempt + 1}/3 (${prev})`);
|
|
294
|
-
}
|
|
295
|
-
const controller = new AbortController();
|
|
296
|
-
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
297
|
-
try {
|
|
298
|
-
const res = await fetch(url, { ...opts, signal: controller.signal });
|
|
299
|
-
if (res.ok) return res;
|
|
300
|
-
if (res.status === 429 || (res.status >= 500 && res.status < 600)) {
|
|
301
|
-
lastRes = res;
|
|
302
|
-
continue;
|
|
303
|
-
}
|
|
304
|
-
return res;
|
|
305
|
-
} catch (e) {
|
|
306
|
-
lastErr = e;
|
|
307
|
-
continue;
|
|
308
|
-
} finally {
|
|
309
|
-
clearTimeout(timer);
|
|
310
|
-
}
|
|
311
|
-
}
|
|
312
|
-
if (lastRes) return lastRes;
|
|
313
|
-
throw lastErr;
|
|
314
53
|
}
|
|
315
54
|
|
|
316
55
|
// --- Session cache ----------------------------------------------------------
|
|
317
56
|
|
|
318
|
-
const sessions = new
|
|
319
|
-
|
|
320
|
-
async function ensureSession(name) {
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
const
|
|
333
|
-
const
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
sid: created.id,
|
|
337
|
-
browser,
|
|
338
|
-
context,
|
|
339
|
-
page,
|
|
340
|
-
liveUrl,
|
|
341
|
-
recording: null,
|
|
342
|
-
};
|
|
343
|
-
sessions.set(name, info);
|
|
344
|
-
|
|
345
|
-
send({
|
|
346
|
-
type: "slice.session_started",
|
|
347
|
-
session: name,
|
|
348
|
-
session_id: created.id,
|
|
349
|
-
live_url: liveUrl,
|
|
350
|
-
started_at: new Date().toISOString(),
|
|
351
|
-
});
|
|
352
|
-
|
|
353
|
-
await startRecording(info, name);
|
|
354
|
-
return info;
|
|
355
|
-
} catch (e) {
|
|
356
|
-
if (browser) {
|
|
357
|
-
try {
|
|
358
|
-
await browser.close();
|
|
359
|
-
} catch {}
|
|
360
|
-
}
|
|
361
|
-
sessions.delete(name);
|
|
362
|
-
await bbReleaseSession(created.id);
|
|
363
|
-
throw e;
|
|
364
|
-
}
|
|
365
|
-
}
|
|
366
|
-
|
|
367
|
-
// --- Recording (rrweb + CDP screencast) ------------------------------------
|
|
368
|
-
//
|
|
369
|
-
// rrweb — vendored record bundle injected via context.addInitScript. Events
|
|
370
|
-
// are emitted to window.__wbRrwebBuffer and flushed every 500ms (and
|
|
371
|
-
// on beforeunload) to a sidecar-side buffer via exposeBinding. This
|
|
372
|
-
// survives cross-origin navigations because the init script reruns on
|
|
373
|
-
// every new document.
|
|
374
|
-
// video — per-page CDPSession.startScreencast streams JPEG frames; each frame
|
|
375
|
-
// is piped into a long-lived `ffmpeg` subprocess that encodes to VP9
|
|
376
|
-
// WebM on disk. At session end we close the stdin, wait for ffmpeg to
|
|
377
|
-
// exit, and read the file.
|
|
378
|
-
//
|
|
379
|
-
// Both artifacts are POSTed with Bearer auth to the upload URL. Failure is
|
|
380
|
-
// soft — slice.recording.failed events are emitted but the run still succeeds.
|
|
381
|
-
|
|
382
|
-
async function startRecording(info, sessionName) {
|
|
383
|
-
if (!RECORDING.enabled) return;
|
|
384
|
-
info.recording = {
|
|
385
|
-
kinds: { ...RECORDING.kinds },
|
|
386
|
-
rrwebEvents: [],
|
|
387
|
-
rrwebDropped: 0,
|
|
388
|
-
rrwebOverflowLogged: false,
|
|
389
|
-
cdp: null,
|
|
390
|
-
ffmpeg: null,
|
|
391
|
-
ffmpegDone: null,
|
|
392
|
-
videoPath: null,
|
|
393
|
-
};
|
|
394
|
-
const rec = info.recording;
|
|
395
|
-
|
|
396
|
-
// Drop oldest events once the buffer exceeds the cap — keeps the tail of a
|
|
397
|
-
// long run (usually the interesting bit) rather than failing the upload or
|
|
398
|
-
// OOMing the sidecar. One warning per session so ops can spot it.
|
|
399
|
-
const pushRrweb = (e) => {
|
|
400
|
-
if (rec.rrwebEvents.length >= RECORDING.rrwebMaxEvents) {
|
|
401
|
-
rec.rrwebEvents.shift();
|
|
402
|
-
rec.rrwebDropped++;
|
|
403
|
-
if (!rec.rrwebOverflowLogged) {
|
|
404
|
-
rec.rrwebOverflowLogged = true;
|
|
405
|
-
log(
|
|
406
|
-
`[recording] rrweb buffer hit cap (${RECORDING.rrwebMaxEvents}); dropping oldest events`,
|
|
407
|
-
);
|
|
408
|
-
}
|
|
409
|
-
}
|
|
410
|
-
rec.rrwebEvents.push(e);
|
|
411
|
-
};
|
|
412
|
-
|
|
413
|
-
if (rec.kinds.rrweb) {
|
|
57
|
+
const sessions = new SessionManager();
|
|
58
|
+
|
|
59
|
+
async function ensureSession(name, { profile } = {}) {
|
|
60
|
+
return sessions.ensure(name, async () => {
|
|
61
|
+
// Vendors charge for the session the moment allocate() returns; if
|
|
62
|
+
// anything after this point throws (getLiveUrl, CDP connect, newContext,
|
|
63
|
+
// recording setup) we must release it explicitly or quota leaks until
|
|
64
|
+
// the vendor's idle timeout. SessionManager only caches a successful
|
|
65
|
+
// return, so on throw there's no half-populated entry to clean up here.
|
|
66
|
+
//
|
|
67
|
+
// Lifecycle timings attached to `slice.session_started` tell operators
|
|
68
|
+
// which step dominated when startup feels slow — usually connectOverCDP
|
|
69
|
+
// against a cold vendor region, but the live-URL fetch and
|
|
70
|
+
// newContext/newPage can each stall independently.
|
|
71
|
+
const t0 = Date.now();
|
|
72
|
+
const allocated = await provider.allocate({ profile, sessionName: name });
|
|
73
|
+
const tAllocated = Date.now();
|
|
74
|
+
let browser = null;
|
|
414
75
|
try {
|
|
415
|
-
await
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
const
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
});
|
|
431
|
-
} catch (e) { /* rrweb unavailable on this page (e.g. chrome://) */ }
|
|
432
|
-
var flush = function(){
|
|
433
|
-
var buf = window.__wbRrwebBuffer;
|
|
434
|
-
if (buf && buf.length && typeof window.__wbRrwebFlush === 'function') {
|
|
435
|
-
window.__wbRrwebBuffer = [];
|
|
436
|
-
try { window.__wbRrwebFlush(buf); } catch (e) {}
|
|
437
|
-
}
|
|
438
|
-
};
|
|
439
|
-
setInterval(flush, 500);
|
|
440
|
-
window.addEventListener('beforeunload', flush);
|
|
441
|
-
})();
|
|
442
|
-
`;
|
|
443
|
-
await info.context.addInitScript({
|
|
444
|
-
content: RECORDING.rrwebSource + "\n" + bootstrap,
|
|
445
|
-
});
|
|
446
|
-
} catch (e) {
|
|
447
|
-
log(`[recording] rrweb setup failed: ${e.message}`);
|
|
448
|
-
rec.kinds.rrweb = false;
|
|
449
|
-
}
|
|
450
|
-
}
|
|
76
|
+
const liveUrl = await provider.getLiveUrl(allocated);
|
|
77
|
+
browser = await chromium.connectOverCDP(allocated.cdpUrl);
|
|
78
|
+
const tConnected = Date.now();
|
|
79
|
+
const context = browser.contexts()[0] ?? (await browser.newContext());
|
|
80
|
+
const page = context.pages()[0] ?? (await context.newPage());
|
|
81
|
+
const tPageReady = Date.now();
|
|
82
|
+
|
|
83
|
+
const info = {
|
|
84
|
+
sid: allocated.sid,
|
|
85
|
+
browser,
|
|
86
|
+
context,
|
|
87
|
+
page,
|
|
88
|
+
liveUrl,
|
|
89
|
+
recording: null,
|
|
90
|
+
};
|
|
451
91
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
"-y",
|
|
466
|
-
"-f",
|
|
467
|
-
"image2pipe",
|
|
468
|
-
"-vcodec",
|
|
469
|
-
"mjpeg",
|
|
470
|
-
"-framerate",
|
|
471
|
-
String(RECORDING.fps),
|
|
472
|
-
"-i",
|
|
473
|
-
"pipe:0",
|
|
474
|
-
"-c:v",
|
|
475
|
-
"libvpx-vp9",
|
|
476
|
-
"-b:v",
|
|
477
|
-
"1M",
|
|
478
|
-
"-deadline",
|
|
479
|
-
"realtime",
|
|
480
|
-
"-pix_fmt",
|
|
481
|
-
"yuv420p",
|
|
482
|
-
outPath,
|
|
483
|
-
],
|
|
484
|
-
{ stdio: ["pipe", "ignore", "pipe"] },
|
|
485
|
-
);
|
|
486
|
-
ff.stderr.on("data", (d) => {
|
|
487
|
-
const s = d.toString().trim();
|
|
488
|
-
if (s) log(`[ffmpeg] ${s.slice(0, 240)}`);
|
|
489
|
-
});
|
|
490
|
-
// Broken pipe on shutdown is normal — swallow it so it doesn't crash the
|
|
491
|
-
// node process via the default 'error' handler.
|
|
492
|
-
ff.stdin.on("error", (e) => {
|
|
493
|
-
if (e.code !== "EPIPE") log(`[ffmpeg stdin] ${e.message}`);
|
|
494
|
-
});
|
|
495
|
-
rec.ffmpeg = ff;
|
|
496
|
-
rec.ffmpegDone = new Promise((resolve) => {
|
|
497
|
-
ff.on("close", (code) => resolve(code));
|
|
92
|
+
send({
|
|
93
|
+
type: "slice.session_started",
|
|
94
|
+
session: name,
|
|
95
|
+
session_id: allocated.sid,
|
|
96
|
+
live_url: liveUrl,
|
|
97
|
+
vendor: provider.name,
|
|
98
|
+
started_at: new Date().toISOString(),
|
|
99
|
+
timings: {
|
|
100
|
+
allocate_ms: tAllocated - t0,
|
|
101
|
+
connect_ms: tConnected - tAllocated,
|
|
102
|
+
page_ready_ms: tPageReady - tConnected,
|
|
103
|
+
total_ms: tPageReady - t0,
|
|
104
|
+
},
|
|
498
105
|
});
|
|
499
106
|
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
// Dedup identical consecutive frames. CDP emits repeats when nothing
|
|
503
|
-
// changed on screen; encoding them as distinct frames bloats the WebM
|
|
504
|
-
// and mis-paces playback. Compare the base64 string directly — it's
|
|
505
|
-
// cheaper than hashing and equivalent for exact equality.
|
|
506
|
-
let lastFrameData = null;
|
|
507
|
-
let dedupCount = 0;
|
|
508
|
-
let dedupLogged = false;
|
|
509
|
-
|
|
510
|
-
cdp.on("Page.screencastFrame", async (frame) => {
|
|
511
|
-
try {
|
|
512
|
-
if (ff.stdin.writable && !ff.killed) {
|
|
513
|
-
if (frame.data === lastFrameData) {
|
|
514
|
-
dedupCount++;
|
|
515
|
-
if (!dedupLogged && dedupCount >= 100) {
|
|
516
|
-
dedupLogged = true;
|
|
517
|
-
log(
|
|
518
|
-
`[recording] dedup active (${dedupCount} duplicate frames skipped so far)`,
|
|
519
|
-
);
|
|
520
|
-
}
|
|
521
|
-
// Still ack — Chrome needs it to keep streaming.
|
|
522
|
-
await cdp.send("Page.screencastFrameAck", {
|
|
523
|
-
sessionId: frame.sessionId,
|
|
524
|
-
});
|
|
525
|
-
return;
|
|
526
|
-
}
|
|
527
|
-
lastFrameData = frame.data;
|
|
528
|
-
const buf = Buffer.from(frame.data, "base64");
|
|
529
|
-
const ok = ff.stdin.write(buf);
|
|
530
|
-
// Backpressure: if ffmpeg's stdin buffer is full, wait for drain
|
|
531
|
-
// before acking so Chrome slows frame production instead of
|
|
532
|
-
// piling JPEG frames in Node heap. 5s fail-open so a wedged
|
|
533
|
-
// ffmpeg can't stall the protocol indefinitely.
|
|
534
|
-
if (!ok) {
|
|
535
|
-
await new Promise((resolve) => {
|
|
536
|
-
let fired = false;
|
|
537
|
-
const done = () => {
|
|
538
|
-
if (fired) return;
|
|
539
|
-
fired = true;
|
|
540
|
-
ff.stdin.off("drain", done);
|
|
541
|
-
ff.stdin.off("close", done);
|
|
542
|
-
ff.stdin.off("error", done);
|
|
543
|
-
clearTimeout(timer);
|
|
544
|
-
resolve();
|
|
545
|
-
};
|
|
546
|
-
const timer = setTimeout(done, 5000);
|
|
547
|
-
ff.stdin.once("drain", done);
|
|
548
|
-
ff.stdin.once("close", done);
|
|
549
|
-
ff.stdin.once("error", done);
|
|
550
|
-
});
|
|
551
|
-
}
|
|
552
|
-
}
|
|
553
|
-
// Must ack each frame or Chrome stops streaming.
|
|
554
|
-
await cdp.send("Page.screencastFrameAck", {
|
|
555
|
-
sessionId: frame.sessionId,
|
|
556
|
-
});
|
|
557
|
-
} catch {
|
|
558
|
-
// Session tearing down — safe to ignore.
|
|
559
|
-
}
|
|
560
|
-
});
|
|
561
|
-
await cdp.send("Page.startScreencast", {
|
|
562
|
-
format: "jpeg",
|
|
563
|
-
quality: RECORDING.quality,
|
|
564
|
-
everyNthFrame: 1,
|
|
565
|
-
});
|
|
107
|
+
await recording.start(info, name);
|
|
108
|
+
return info;
|
|
566
109
|
} catch (e) {
|
|
567
|
-
|
|
568
|
-
rec.kinds.video = false;
|
|
569
|
-
if (rec.ffmpeg) {
|
|
110
|
+
if (browser) {
|
|
570
111
|
try {
|
|
571
|
-
|
|
112
|
+
await browser.close();
|
|
572
113
|
} catch {}
|
|
573
114
|
}
|
|
115
|
+
await provider.release(allocated.sid);
|
|
116
|
+
throw e;
|
|
574
117
|
}
|
|
575
|
-
}
|
|
576
|
-
|
|
577
|
-
const active = Object.entries(rec.kinds)
|
|
578
|
-
.filter(([, v]) => v)
|
|
579
|
-
.map(([k]) => k);
|
|
580
|
-
if (active.length) {
|
|
581
|
-
send({
|
|
582
|
-
type: "slice.recording.started",
|
|
583
|
-
session: sessionName,
|
|
584
|
-
run_id: RECORDING.runId,
|
|
585
|
-
kinds: active,
|
|
586
|
-
});
|
|
587
|
-
}
|
|
588
|
-
}
|
|
589
|
-
|
|
590
|
-
async function flushRecording(info, sessionName) {
|
|
591
|
-
if (!info.recording) return;
|
|
592
|
-
const rec = info.recording;
|
|
593
|
-
|
|
594
|
-
let rrwebBody = null;
|
|
595
|
-
if (rec.kinds.rrweb) {
|
|
596
|
-
try {
|
|
597
|
-
const tail = await info.page.evaluate(() => {
|
|
598
|
-
if (!Array.isArray(window.__wbRrwebBuffer)) return [];
|
|
599
|
-
const out = window.__wbRrwebBuffer;
|
|
600
|
-
window.__wbRrwebBuffer = [];
|
|
601
|
-
return out;
|
|
602
|
-
});
|
|
603
|
-
if (Array.isArray(tail)) {
|
|
604
|
-
for (const e of tail) {
|
|
605
|
-
if (rec.rrwebEvents.length >= RECORDING.rrwebMaxEvents) {
|
|
606
|
-
rec.rrwebEvents.shift();
|
|
607
|
-
rec.rrwebDropped++;
|
|
608
|
-
}
|
|
609
|
-
rec.rrwebEvents.push(e);
|
|
610
|
-
}
|
|
611
|
-
}
|
|
612
|
-
} catch (e) {
|
|
613
|
-
log(`[recording] rrweb final drain failed: ${e.message}`);
|
|
614
|
-
}
|
|
615
|
-
if (rec.rrwebEvents.length > 0) {
|
|
616
|
-
try {
|
|
617
|
-
const json = JSON.stringify({
|
|
618
|
-
run_id: RECORDING.runId,
|
|
619
|
-
session: sessionName,
|
|
620
|
-
event_count: rec.rrwebEvents.length,
|
|
621
|
-
dropped: rec.rrwebDropped,
|
|
622
|
-
events: rec.rrwebEvents,
|
|
623
|
-
});
|
|
624
|
-
rrwebBody = await gzip(Buffer.from(json, "utf8"));
|
|
625
|
-
} catch (e) {
|
|
626
|
-
log(`[recording] rrweb gzip failed: ${e.message}`);
|
|
627
|
-
}
|
|
628
|
-
}
|
|
629
|
-
}
|
|
630
|
-
|
|
631
|
-
let videoBody = null;
|
|
632
|
-
let videoFailure = null;
|
|
633
|
-
if (rec.kinds.video && rec.cdp && rec.ffmpeg) {
|
|
634
|
-
try {
|
|
635
|
-
await rec.cdp.send("Page.stopScreencast");
|
|
636
|
-
} catch {
|
|
637
|
-
// Browser may already be tearing down.
|
|
638
|
-
}
|
|
639
|
-
const timeoutMs =
|
|
640
|
-
Number.parseInt(process.env.WB_RECORDING_FFMPEG_TIMEOUT_MS || "", 10) ||
|
|
641
|
-
30_000;
|
|
642
|
-
try {
|
|
643
|
-
rec.ffmpeg.stdin.end();
|
|
644
|
-
const settled = await Promise.race([
|
|
645
|
-
rec.ffmpegDone,
|
|
646
|
-
new Promise((r) =>
|
|
647
|
-
setTimeout(() => r({ __timeout: true }), timeoutMs),
|
|
648
|
-
),
|
|
649
|
-
]);
|
|
650
|
-
if (settled && typeof settled === "object" && settled.__timeout) {
|
|
651
|
-
log(`[recording] ffmpeg did not exit within ${timeoutMs}ms; killing`);
|
|
652
|
-
try {
|
|
653
|
-
rec.ffmpeg.kill("SIGKILL");
|
|
654
|
-
} catch {}
|
|
655
|
-
videoFailure = `ffmpeg_timeout_${timeoutMs}ms`;
|
|
656
|
-
} else if (typeof settled === "number" && settled !== 0) {
|
|
657
|
-
// ff.on('close') resolves with the exit code — non-zero means ffmpeg
|
|
658
|
-
// produced a corrupt/partial webm that we should not upload.
|
|
659
|
-
videoFailure = `ffmpeg_exit_code_${settled}`;
|
|
660
|
-
log(`[recording] ffmpeg exited with code ${settled}`);
|
|
661
|
-
}
|
|
662
|
-
if (!videoFailure && rec.videoPath && existsSync(rec.videoPath)) {
|
|
663
|
-
videoBody = await fsPromises.readFile(rec.videoPath);
|
|
664
|
-
}
|
|
665
|
-
if (rec.videoPath && existsSync(rec.videoPath)) {
|
|
666
|
-
try {
|
|
667
|
-
await fsPromises.unlink(rec.videoPath);
|
|
668
|
-
} catch {}
|
|
669
|
-
}
|
|
670
|
-
} catch (e) {
|
|
671
|
-
videoFailure = `finalize_error: ${e.message}`;
|
|
672
|
-
log(`[recording] video finalize failed: ${e.message}`);
|
|
673
|
-
}
|
|
674
|
-
}
|
|
675
|
-
|
|
676
|
-
const uploads = [];
|
|
677
|
-
if (rrwebBody) {
|
|
678
|
-
uploads.push(
|
|
679
|
-
uploadArtifact(
|
|
680
|
-
"rrweb",
|
|
681
|
-
rrwebBody,
|
|
682
|
-
"application/json+gzip",
|
|
683
|
-
sessionName,
|
|
684
|
-
{ event_count: rec.rrwebEvents.length },
|
|
685
|
-
),
|
|
686
|
-
);
|
|
687
|
-
}
|
|
688
|
-
if (videoBody) {
|
|
689
|
-
uploads.push(
|
|
690
|
-
uploadArtifact("video", videoBody, "video/webm", sessionName, {
|
|
691
|
-
fps: RECORDING.fps,
|
|
692
|
-
}),
|
|
693
|
-
);
|
|
694
|
-
} else if (videoFailure) {
|
|
695
|
-
// Surface a terminal recording failure to the callback stream so the
|
|
696
|
-
// consumer knows the video was lost rather than silently missing.
|
|
697
|
-
send({
|
|
698
|
-
type: "slice.recording.failed",
|
|
699
|
-
session: sessionName,
|
|
700
|
-
run_id: RECORDING.runId,
|
|
701
|
-
kind: "video",
|
|
702
|
-
reason: videoFailure,
|
|
703
|
-
});
|
|
704
|
-
}
|
|
705
|
-
await Promise.allSettled(uploads);
|
|
706
|
-
}
|
|
707
|
-
|
|
708
|
-
async function uploadArtifact(kind, body, contentType, sessionName, extra) {
|
|
709
|
-
const url = RECORDING.uploadUrl
|
|
710
|
-
.replace("{run_id}", encodeURIComponent(RECORDING.runId))
|
|
711
|
-
.replace("{kind}", encodeURIComponent(kind));
|
|
712
|
-
try {
|
|
713
|
-
const res = await retryableFetch(
|
|
714
|
-
url,
|
|
715
|
-
{
|
|
716
|
-
method: "POST",
|
|
717
|
-
headers: {
|
|
718
|
-
Authorization: `Bearer ${RECORDING.secret}`,
|
|
719
|
-
"Content-Type": contentType,
|
|
720
|
-
"X-WB-Run-Id": RECORDING.runId,
|
|
721
|
-
"X-WB-Recording-Kind": kind,
|
|
722
|
-
"X-WB-Session": sessionName,
|
|
723
|
-
},
|
|
724
|
-
body,
|
|
725
|
-
},
|
|
726
|
-
`upload.${kind}`,
|
|
727
|
-
{ timeoutMs: 30_000 },
|
|
728
|
-
);
|
|
729
|
-
if (!res.ok) {
|
|
730
|
-
send({
|
|
731
|
-
type: "slice.recording.failed",
|
|
732
|
-
session: sessionName,
|
|
733
|
-
run_id: RECORDING.runId,
|
|
734
|
-
kind,
|
|
735
|
-
status: res.status,
|
|
736
|
-
reason: (await safeText(res)) || res.statusText || "upload rejected",
|
|
737
|
-
});
|
|
738
|
-
return;
|
|
739
|
-
}
|
|
740
|
-
send({
|
|
741
|
-
type: "slice.recording.uploaded",
|
|
742
|
-
session: sessionName,
|
|
743
|
-
run_id: RECORDING.runId,
|
|
744
|
-
kind,
|
|
745
|
-
bytes: body.length,
|
|
746
|
-
...(extra || {}),
|
|
747
|
-
});
|
|
748
|
-
} catch (e) {
|
|
749
|
-
send({
|
|
750
|
-
type: "slice.recording.failed",
|
|
751
|
-
session: sessionName,
|
|
752
|
-
run_id: RECORDING.runId,
|
|
753
|
-
kind,
|
|
754
|
-
reason: e.name === "AbortError" ? "timeout" : e.message,
|
|
755
|
-
});
|
|
756
|
-
}
|
|
757
|
-
}
|
|
758
|
-
|
|
759
|
-
function sanitize(s) {
|
|
760
|
-
return String(s || "default").replace(/[^A-Za-z0-9_-]+/g, "_");
|
|
118
|
+
});
|
|
761
119
|
}
|
|
762
|
-
|
|
763
120
|
// --- {{ env.X }} / {{ artifacts.X }} substitution --------------------------
|
|
764
121
|
|
|
765
122
|
const ENV_RE = /\{\{\s*env\.([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
@@ -767,14 +124,6 @@ const ENV_RE = /\{\{\s*env\.([A-Za-z_][A-Za-z0-9_]*)\s*\}\}/g;
|
|
|
767
124
|
// exotic would invite path traversal once composed with WB_ARTIFACTS_DIR.
|
|
768
125
|
const ARTIFACT_RE = /\{\{\s*artifacts\.([A-Za-z_][A-Za-z0-9_-]*)\s*\}\}/g;
|
|
769
126
|
|
|
770
|
-
function resolveInside(dir, candidate) {
|
|
771
|
-
const resolvedDir = path.resolve(dir);
|
|
772
|
-
const resolved = path.resolve(resolvedDir, candidate);
|
|
773
|
-
const rel = path.relative(resolvedDir, resolved);
|
|
774
|
-
if (rel === "" || rel.startsWith("..") || path.isAbsolute(rel)) return null;
|
|
775
|
-
return resolved;
|
|
776
|
-
}
|
|
777
|
-
|
|
778
127
|
// Resolved once at module load. `warn` matches historical behavior
|
|
779
128
|
// (log + empty string, runbook continues). `error` throws so a missing OTP
|
|
780
129
|
// or env var fails the slice instead of silently sending an empty value
|
|
@@ -871,251 +220,6 @@ function scrubSecrets(msg, secrets) {
|
|
|
871
220
|
return out;
|
|
872
221
|
}
|
|
873
222
|
|
|
874
|
-
// --- Verb dispatch ----------------------------------------------------------
|
|
875
|
-
|
|
876
|
-
function verbName(verb) {
|
|
877
|
-
if (!verb || typeof verb !== "object") return String(verb);
|
|
878
|
-
return Object.keys(verb)[0] || "verb";
|
|
879
|
-
}
|
|
880
|
-
|
|
881
|
-
// Most verbs accept either a bare string ("goto: https://...") or a structured
|
|
882
|
-
// object ("goto: { url: ..., wait_until: ... }"). This pulls the canonical
|
|
883
|
-
// field out of either shape.
|
|
884
|
-
function arg(value, primaryKey) {
|
|
885
|
-
if (typeof value === "string") return { [primaryKey]: value };
|
|
886
|
-
if (value && typeof value === "object") return value;
|
|
887
|
-
return {};
|
|
888
|
-
}
|
|
889
|
-
|
|
890
|
-
async function runVerb(page, verb, index, ctx) {
|
|
891
|
-
const name = verbName(verb);
|
|
892
|
-
const raw = verb[name];
|
|
893
|
-
const a = expand(
|
|
894
|
-
arg(raw, defaultKey(name)),
|
|
895
|
-
ctx?.secrets,
|
|
896
|
-
ctx?.artifactCache,
|
|
897
|
-
);
|
|
898
|
-
|
|
899
|
-
switch (name) {
|
|
900
|
-
case "goto": {
|
|
901
|
-
const url = a.url ?? "";
|
|
902
|
-
const waitUntil = a.wait_until ?? "domcontentloaded";
|
|
903
|
-
await page.goto(url, { waitUntil, timeout: a.timeout ?? 30_000 });
|
|
904
|
-
return `→ ${page.url()}`;
|
|
905
|
-
}
|
|
906
|
-
case "fill": {
|
|
907
|
-
// Don't echo the value into the summary — could be a credential.
|
|
908
|
-
await page.fill(a.selector, String(a.value ?? ""), {
|
|
909
|
-
timeout: a.timeout ?? 10_000,
|
|
910
|
-
});
|
|
911
|
-
return `${a.selector} = «${redact(a.value)}»`;
|
|
912
|
-
}
|
|
913
|
-
case "click": {
|
|
914
|
-
await page.click(a.selector, { timeout: a.timeout ?? 10_000 });
|
|
915
|
-
return `${a.selector}`;
|
|
916
|
-
}
|
|
917
|
-
case "press": {
|
|
918
|
-
const target = a.selector ?? "body";
|
|
919
|
-
await page.press(target, a.key, { timeout: a.timeout ?? 5_000 });
|
|
920
|
-
return `${target} ⌨ ${a.key}`;
|
|
921
|
-
}
|
|
922
|
-
case "wait_for": {
|
|
923
|
-
const selector = a.selector;
|
|
924
|
-
const state = a.state ?? "visible";
|
|
925
|
-
await page.waitForSelector(selector, {
|
|
926
|
-
state,
|
|
927
|
-
timeout: a.timeout ?? 15_000,
|
|
928
|
-
});
|
|
929
|
-
return `${selector} (${state})`;
|
|
930
|
-
}
|
|
931
|
-
case "screenshot": {
|
|
932
|
-
// Always resolve inside $WB_ARTIFACTS_DIR (or cwd when unset). Absolute
|
|
933
|
-
// paths and traversals are rejected — screenshots are controlled by
|
|
934
|
-
// runbook authors whose content we don't want to grant arbitrary-write.
|
|
935
|
-
const requested = a.path ?? `screenshot-${Date.now()}.png`;
|
|
936
|
-
const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim() || ".";
|
|
937
|
-
if (path.isAbsolute(requested)) {
|
|
938
|
-
throw new Error(
|
|
939
|
-
`screenshot: absolute paths are not allowed (got ${requested})`,
|
|
940
|
-
);
|
|
941
|
-
}
|
|
942
|
-
const full = resolveInside(artifactsDir, requested);
|
|
943
|
-
if (!full) {
|
|
944
|
-
throw new Error(
|
|
945
|
-
`screenshot: path escapes artifacts dir (got ${requested})`,
|
|
946
|
-
);
|
|
947
|
-
}
|
|
948
|
-
await fsPromises.mkdir(path.dirname(full), { recursive: true });
|
|
949
|
-
// Atomic write via tmp + rename so a crash mid-capture can't leave a
|
|
950
|
-
// truncated PNG that's already been announced via slice.artifact_saved
|
|
951
|
-
// and uploaded to R2. We capture to a Buffer (with `type` derived from
|
|
952
|
-
// the requested extension) and write it ourselves — passing a `.tmp`
|
|
953
|
-
// path directly to Playwright fails because it infers format from the
|
|
954
|
-
// file extension and rejects unknown ones.
|
|
955
|
-
const ext = path.extname(full).toLowerCase();
|
|
956
|
-
const type = ext === ".jpg" || ext === ".jpeg" ? "jpeg" : "png";
|
|
957
|
-
const tmp = `${full}.${process.pid}.${randomUUID().slice(0, 8)}.tmp`;
|
|
958
|
-
try {
|
|
959
|
-
const buf = await page.screenshot({ type, fullPage: !!a.full_page });
|
|
960
|
-
await fsPromises.writeFile(tmp, buf);
|
|
961
|
-
await fsPromises.rename(tmp, full);
|
|
962
|
-
} catch (e) {
|
|
963
|
-
try {
|
|
964
|
-
await fsPromises.unlink(tmp);
|
|
965
|
-
} catch {}
|
|
966
|
-
throw e;
|
|
967
|
-
}
|
|
968
|
-
return `→ ${requested}`;
|
|
969
|
-
}
|
|
970
|
-
case "extract": {
|
|
971
|
-
// Pull structured rows out of the page. Each `field` entry is either:
|
|
972
|
-
// string — CSS selector relative to row, take textContent
|
|
973
|
-
// { selector, attr } — CSS selector relative to row, take attribute
|
|
974
|
-
// { selector, text: true } — explicit textContent (default)
|
|
975
|
-
const rowSelector = a.selector;
|
|
976
|
-
const fields = a.fields ?? {};
|
|
977
|
-
const items = await page.$$eval(
|
|
978
|
-
rowSelector,
|
|
979
|
-
(rows, fieldSpec) =>
|
|
980
|
-
rows.map((row) => {
|
|
981
|
-
const out = {};
|
|
982
|
-
for (const [name, spec] of Object.entries(fieldSpec)) {
|
|
983
|
-
const sel = typeof spec === "string" ? spec : spec.selector;
|
|
984
|
-
const attr = typeof spec === "string" ? null : spec.attr ?? null;
|
|
985
|
-
const el = sel ? row.querySelector(sel) : row;
|
|
986
|
-
if (!el) {
|
|
987
|
-
out[name] = null;
|
|
988
|
-
continue;
|
|
989
|
-
}
|
|
990
|
-
out[name] = attr
|
|
991
|
-
? el.getAttribute(attr)
|
|
992
|
-
: (el.textContent || "").trim();
|
|
993
|
-
}
|
|
994
|
-
return out;
|
|
995
|
-
}),
|
|
996
|
-
fields,
|
|
997
|
-
);
|
|
998
|
-
// Emit as JSON to stdout so wb captures it in step.complete.stdout.
|
|
999
|
-
// Pretty-printed for readability when a runbook surfaces the output.
|
|
1000
|
-
console.log(JSON.stringify(items, null, 2));
|
|
1001
|
-
if (ctx) ctx.lastResult = items;
|
|
1002
|
-
return `${rowSelector} → ${items.length} rows`;
|
|
1003
|
-
}
|
|
1004
|
-
case "assert": {
|
|
1005
|
-
const sel = a.selector;
|
|
1006
|
-
const handle = await page.$(sel);
|
|
1007
|
-
if (!handle) throw new Error(`assert: selector not found: ${sel}`);
|
|
1008
|
-
if (a.text_contains) {
|
|
1009
|
-
const txt = (await handle.textContent()) ?? "";
|
|
1010
|
-
if (!txt.includes(a.text_contains)) {
|
|
1011
|
-
throw new Error(
|
|
1012
|
-
`assert: text "${a.text_contains}" not in ${sel} (got "${txt.slice(0, 80)}")`,
|
|
1013
|
-
);
|
|
1014
|
-
}
|
|
1015
|
-
}
|
|
1016
|
-
if (a.url_contains && !page.url().includes(a.url_contains)) {
|
|
1017
|
-
throw new Error(
|
|
1018
|
-
`assert: url does not contain "${a.url_contains}" (got ${page.url()})`,
|
|
1019
|
-
);
|
|
1020
|
-
}
|
|
1021
|
-
return `${sel}`;
|
|
1022
|
-
}
|
|
1023
|
-
case "eval": {
|
|
1024
|
-
// Run arbitrary JS in the page; result is JSON-serialized to stdout.
|
|
1025
|
-
const result = await page.evaluate(a.script);
|
|
1026
|
-
console.log(JSON.stringify(result, null, 2));
|
|
1027
|
-
if (ctx) ctx.lastResult = result;
|
|
1028
|
-
return `script ran`;
|
|
1029
|
-
}
|
|
1030
|
-
case "save": {
|
|
1031
|
-
// Persist a JSON artifact into $WB_ARTIFACTS_DIR so later cells can read
|
|
1032
|
-
// it and wb can upload it. Captures the previous verb's output unless
|
|
1033
|
-
// the author provides an explicit `value:`.
|
|
1034
|
-
const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim();
|
|
1035
|
-
if (!artifactsDir) {
|
|
1036
|
-
throw new Error(
|
|
1037
|
-
"save: $WB_ARTIFACTS_DIR is not set — run this workbook via `wb run` (wb exports the dir for you)",
|
|
1038
|
-
);
|
|
1039
|
-
}
|
|
1040
|
-
const explicitValue = a.value !== undefined;
|
|
1041
|
-
const payload = explicitValue ? a.value : ctx?.lastResult;
|
|
1042
|
-
if (payload === undefined) {
|
|
1043
|
-
throw new Error(
|
|
1044
|
-
"save: no value provided and no prior extract/eval result to capture",
|
|
1045
|
-
);
|
|
1046
|
-
}
|
|
1047
|
-
const name =
|
|
1048
|
-
typeof a.name === "string" && a.name.trim().length > 0
|
|
1049
|
-
? sanitizeArtifactName(a.name)
|
|
1050
|
-
: autoArtifactName(ctx?.blockIndex ?? index);
|
|
1051
|
-
const filename = name.endsWith(".json") ? name : `${name}.json`;
|
|
1052
|
-
const full = path.join(artifactsDir, filename);
|
|
1053
|
-
await fsPromises.mkdir(artifactsDir, { recursive: true });
|
|
1054
|
-
// Atomic write: serialize to .tmp, then rename. Announce the artifact
|
|
1055
|
-
// AFTER rename so a partial write can never be seen by wb's uploader.
|
|
1056
|
-
const serialized = JSON.stringify(payload, null, 2);
|
|
1057
|
-
const tmp = `${full}.${process.pid}.${randomUUID().slice(0, 8)}.tmp`;
|
|
1058
|
-
try {
|
|
1059
|
-
await fsPromises.writeFile(tmp, serialized, "utf8");
|
|
1060
|
-
await fsPromises.rename(tmp, full);
|
|
1061
|
-
} catch (e) {
|
|
1062
|
-
try {
|
|
1063
|
-
await fsPromises.unlink(tmp);
|
|
1064
|
-
} catch {}
|
|
1065
|
-
throw e;
|
|
1066
|
-
}
|
|
1067
|
-
send({
|
|
1068
|
-
type: "slice.artifact_saved",
|
|
1069
|
-
filename,
|
|
1070
|
-
path: full,
|
|
1071
|
-
bytes: Buffer.byteLength(serialized),
|
|
1072
|
-
});
|
|
1073
|
-
return `→ ${filename}`;
|
|
1074
|
-
}
|
|
1075
|
-
default:
|
|
1076
|
-
throw new Error(`unsupported verb: ${name}`);
|
|
1077
|
-
}
|
|
1078
|
-
}
|
|
1079
|
-
|
|
1080
|
-
function sanitizeArtifactName(s) {
|
|
1081
|
-
// Keep author-chosen names readable but safe as filenames. Drop anything
|
|
1082
|
-
// that could escape the artifacts dir (slashes, NULs, etc.).
|
|
1083
|
-
return String(s).replace(/[^A-Za-z0-9_.-]+/g, "_").slice(0, 200);
|
|
1084
|
-
}
|
|
1085
|
-
|
|
1086
|
-
function autoArtifactName(blockIndex) {
|
|
1087
|
-
const rand = randomUUID().replace(/-/g, "").slice(0, 8);
|
|
1088
|
-
const n = Number.isFinite(blockIndex) ? blockIndex : 0;
|
|
1089
|
-
return `cell-${n}-${rand}`;
|
|
1090
|
-
}
|
|
1091
|
-
|
|
1092
|
-
function defaultKey(name) {
|
|
1093
|
-
switch (name) {
|
|
1094
|
-
case "goto":
|
|
1095
|
-
return "url";
|
|
1096
|
-
case "click":
|
|
1097
|
-
case "wait_for":
|
|
1098
|
-
case "assert":
|
|
1099
|
-
return "selector";
|
|
1100
|
-
case "screenshot":
|
|
1101
|
-
return "path";
|
|
1102
|
-
case "press":
|
|
1103
|
-
return "key";
|
|
1104
|
-
case "eval":
|
|
1105
|
-
return "script";
|
|
1106
|
-
case "save":
|
|
1107
|
-
return "name";
|
|
1108
|
-
default:
|
|
1109
|
-
return "value";
|
|
1110
|
-
}
|
|
1111
|
-
}
|
|
1112
|
-
|
|
1113
|
-
function redact(value) {
|
|
1114
|
-
if (typeof value !== "string") return "";
|
|
1115
|
-
if (value.length <= 4) return "***";
|
|
1116
|
-
return `${value.slice(0, 2)}***`;
|
|
1117
|
-
}
|
|
1118
|
-
|
|
1119
223
|
// --- Slice handler ----------------------------------------------------------
|
|
1120
224
|
|
|
1121
225
|
async function handleSlice(msg) {
|
|
@@ -1133,6 +237,15 @@ async function handleSlice(msg) {
|
|
|
1133
237
|
// slices is seen on the next slice's first read.
|
|
1134
238
|
artifactCache: new Map(),
|
|
1135
239
|
};
|
|
240
|
+
// Per-slice wall-clock cap. Rust's SLICE_EVENT_TIMEOUT is per-event (resets
|
|
241
|
+
// on every verb.complete), so a chain of 25 × 15s wait_fors that each emit
|
|
242
|
+
// a frame never trips it — the sidecar just runs for 375s while the Rust
|
|
243
|
+
// parent assumes progress. Cap aggregate slice time so we terminate cleanly
|
|
244
|
+
// instead. Default 120s; operators who legitimately need longer can bump
|
|
245
|
+
// via WB_SLICE_DEADLINE_MS.
|
|
246
|
+
const sliceDeadlineMs =
|
|
247
|
+
Number.parseInt(process.env.WB_SLICE_DEADLINE_MS || "", 10) || 120_000;
|
|
248
|
+
const sliceDeadline = Date.now() + sliceDeadlineMs;
|
|
1136
249
|
// Top-level guard: any unhandled error must emit slice.failed so the Rust
|
|
1137
250
|
// side sees a terminal frame instead of waiting out SLICE_EVENT_TIMEOUT.
|
|
1138
251
|
try {
|
|
@@ -1142,7 +255,7 @@ async function handleSlice(msg) {
|
|
|
1142
255
|
|
|
1143
256
|
let session;
|
|
1144
257
|
try {
|
|
1145
|
-
session = await ensureSession(sessionName);
|
|
258
|
+
session = await ensureSession(sessionName, { profile: msg.profile });
|
|
1146
259
|
} catch (e) {
|
|
1147
260
|
send({
|
|
1148
261
|
type: "slice.failed",
|
|
@@ -1151,29 +264,73 @@ async function handleSlice(msg) {
|
|
|
1151
264
|
return;
|
|
1152
265
|
}
|
|
1153
266
|
|
|
1154
|
-
// Restore-from-pause
|
|
1155
|
-
//
|
|
1156
|
-
//
|
|
1157
|
-
|
|
267
|
+
// Restore-from-pause: when the Rust side resumes us after a
|
|
268
|
+
// `slice.paused` frame, `restore.state.verb_index` is the index of the
|
|
269
|
+
// verb that paused. We skip *past* it — the verb has no post-resume
|
|
270
|
+
// work (any payload from the operator is already in
|
|
271
|
+
// $WB_ARTIFACTS_DIR/pause_result.json, written by `wb resume` before
|
|
272
|
+
// it re-boots the sidecar). Skipping keeps pause verbs pure: their
|
|
273
|
+
// only job is "halt now," not "halt, then continue."
|
|
274
|
+
const startAt =
|
|
275
|
+
restore?.state?.verb_index !== undefined
|
|
276
|
+
? Number(restore.state.verb_index) + 1
|
|
277
|
+
: 0;
|
|
1158
278
|
|
|
1159
279
|
for (let i = startAt; i < verbs.length; i++) {
|
|
280
|
+
if (Date.now() >= sliceDeadline) {
|
|
281
|
+
send({
|
|
282
|
+
type: "slice.failed",
|
|
283
|
+
error: `slice exceeded deadline (${sliceDeadlineMs}ms); aborted before verb index ${i} of ${verbs.length}`,
|
|
284
|
+
});
|
|
285
|
+
return;
|
|
286
|
+
}
|
|
1160
287
|
const v = verbs[i];
|
|
1161
288
|
const name = verbName(v);
|
|
289
|
+
const verbStart = Date.now();
|
|
1162
290
|
try {
|
|
1163
|
-
const summary = await runVerb(session.page, v, i, sliceCtx);
|
|
291
|
+
const summary = await runVerb(session.page, v, i, sliceCtx, expand);
|
|
292
|
+
// Pause-sentinel escape hatch: a verb signals a mid-slice halt by
|
|
293
|
+
// returning `{ __pause: {...} }`. We translate that into a
|
|
294
|
+
// `slice.paused` frame (so the Rust side writes a pending
|
|
295
|
+
// descriptor and exits 42) and bail out of the verb loop without
|
|
296
|
+
// firing `slice.complete`. Non-pause verbs hand back a plain
|
|
297
|
+
// summary and the loop proceeds normally.
|
|
298
|
+
if (summary && typeof summary === "object" && summary.__pause) {
|
|
299
|
+
const pauseMeta = summary.__pause;
|
|
300
|
+
send({
|
|
301
|
+
type: "slice.paused",
|
|
302
|
+
reason: pauseMeta.reason || "slice.paused",
|
|
303
|
+
message: pauseMeta.message || "",
|
|
304
|
+
context_url: pauseMeta.context_url ?? null,
|
|
305
|
+
resume_on: pauseMeta.resume_on || "operator_click",
|
|
306
|
+
timeout: pauseMeta.timeout ?? null,
|
|
307
|
+
actions: pauseMeta.actions || [{ label: "Resume", value: null }],
|
|
308
|
+
verb: name,
|
|
309
|
+
verb_index: i,
|
|
310
|
+
// `sidecar_state` is forwarded verbatim into the Rust pending
|
|
311
|
+
// descriptor and handed back on resume. The verb can stash
|
|
312
|
+
// whatever it needs here; we always ensure verb_index is set
|
|
313
|
+
// so the dispatcher can compute startAt on re-entry.
|
|
314
|
+
sidecar_state: { ...(pauseMeta.sidecar_state || {}), verb_index: i },
|
|
315
|
+
});
|
|
316
|
+
return;
|
|
317
|
+
}
|
|
1164
318
|
send({
|
|
1165
319
|
type: "verb.complete",
|
|
1166
320
|
verb: name,
|
|
1167
321
|
verb_index: i,
|
|
1168
322
|
summary,
|
|
323
|
+
duration_ms: Date.now() - verbStart,
|
|
1169
324
|
});
|
|
1170
325
|
} catch (e) {
|
|
326
|
+
const duration_ms = Date.now() - verbStart;
|
|
1171
327
|
const clean = scrubSecrets(e.message, sliceCtx.secrets);
|
|
1172
328
|
send({
|
|
1173
329
|
type: "verb.failed",
|
|
1174
330
|
verb: name,
|
|
1175
331
|
verb_index: i,
|
|
1176
332
|
error: clean,
|
|
333
|
+
duration_ms,
|
|
1177
334
|
});
|
|
1178
335
|
send({
|
|
1179
336
|
type: "slice.failed",
|
|
@@ -1204,9 +361,19 @@ async function shutdown() {
|
|
|
1204
361
|
// live page.evaluate() and CDP screencast needs a live CDPSession.
|
|
1205
362
|
for (const [name, info] of sessions) {
|
|
1206
363
|
try {
|
|
1207
|
-
await
|
|
364
|
+
await recording.flush(info, name);
|
|
1208
365
|
} catch (e) {
|
|
1209
366
|
log(`[shutdown] flush recording ${name}: ${e.message}`);
|
|
367
|
+
// Unhandled flush error → consumer would otherwise see neither an
|
|
368
|
+
// uploaded nor a failed event and have to infer loss from absence.
|
|
369
|
+
try {
|
|
370
|
+
send({
|
|
371
|
+
type: "slice.recording.failed",
|
|
372
|
+
session: name,
|
|
373
|
+
run_id: recording.runId,
|
|
374
|
+
reason: `finalize_error: ${e.message}`,
|
|
375
|
+
});
|
|
376
|
+
} catch {}
|
|
1210
377
|
}
|
|
1211
378
|
}
|
|
1212
379
|
for (const [name, info] of sessions) {
|
|
@@ -1216,10 +383,10 @@ async function shutdown() {
|
|
|
1216
383
|
log(`[shutdown] close ${name}: ${e.message}`);
|
|
1217
384
|
}
|
|
1218
385
|
}
|
|
1219
|
-
// Ask
|
|
386
|
+
// Ask the vendor to release sessions explicitly so quota isn't held by
|
|
1220
387
|
// orphans waiting for their idle timeout.
|
|
1221
388
|
await Promise.all(
|
|
1222
|
-
Array.from(sessions.values()).map((s) =>
|
|
389
|
+
Array.from(sessions.values()).map((s) => provider.release(s.sid)),
|
|
1223
390
|
);
|
|
1224
391
|
process.exit(0);
|
|
1225
392
|
}
|
|
@@ -1228,21 +395,35 @@ async function shutdown() {
|
|
|
1228
395
|
|
|
1229
396
|
const rl = readline.createInterface({ input: process.stdin, terminal: false });
|
|
1230
397
|
|
|
1231
|
-
//
|
|
1232
|
-
//
|
|
1233
|
-
|
|
1234
|
-
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1238
|
-
|
|
1239
|
-
|
|
398
|
+
// Per-session dispatch: slices against the same session name serialize
|
|
399
|
+
// (shared Playwright page), slices against different names run in parallel.
|
|
400
|
+
// SessionManager owns the chain map + the in-flight-create dedup that makes
|
|
401
|
+
// this safe — two concurrent slices for "vendor-a" share one provider.allocate
|
|
402
|
+
// instead of racing to create two vendor sessions.
|
|
403
|
+
function dispatchSlice(msg) {
|
|
404
|
+
const sessionName = msg.session || "default";
|
|
405
|
+
return sessions
|
|
406
|
+
.enqueueOn(sessionName, () => handleSlice(msg))
|
|
407
|
+
.catch((e) => {
|
|
408
|
+
// handleSlice has its own top-level guard that emits slice.failed;
|
|
409
|
+
// this is the last-resort net for a bug that throws past that guard,
|
|
410
|
+
// so the Rust parent never strands waiting on SLICE_EVENT_TIMEOUT.
|
|
411
|
+
log(`[loop] ${e.stack || e.message}`);
|
|
1240
412
|
try {
|
|
1241
413
|
send({ type: "slice.failed", error: `sidecar loop error: ${e.message}` });
|
|
1242
414
|
} catch {}
|
|
1243
|
-
}
|
|
1244
|
-
|
|
1245
|
-
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
// Shutdown drains all pending per-session work, then tears down. Guarded
|
|
419
|
+
// against repeat entries via `shuttingDown` inside shutdown() itself.
|
|
420
|
+
async function drainAndShutdown() {
|
|
421
|
+
try {
|
|
422
|
+
await sessions.drainAll();
|
|
423
|
+
} catch (e) {
|
|
424
|
+
log(`[shutdown] drain failed: ${e.message}`);
|
|
425
|
+
}
|
|
426
|
+
await shutdown();
|
|
1246
427
|
}
|
|
1247
428
|
|
|
1248
429
|
rl.on("line", (line) => {
|
|
@@ -1267,10 +448,10 @@ rl.on("line", (line) => {
|
|
|
1267
448
|
});
|
|
1268
449
|
break;
|
|
1269
450
|
case "slice":
|
|
1270
|
-
|
|
451
|
+
dispatchSlice(msg);
|
|
1271
452
|
break;
|
|
1272
453
|
case "shutdown":
|
|
1273
|
-
|
|
454
|
+
drainAndShutdown();
|
|
1274
455
|
break;
|
|
1275
456
|
default:
|
|
1276
457
|
log(`[warn] unknown message type: ${msg.type}`);
|
|
@@ -1279,7 +460,7 @@ rl.on("line", (line) => {
|
|
|
1279
460
|
|
|
1280
461
|
rl.on("close", () => {
|
|
1281
462
|
// stdin closed — drain pending work then exit.
|
|
1282
|
-
|
|
463
|
+
drainAndShutdown();
|
|
1283
464
|
});
|
|
1284
465
|
|
|
1285
466
|
// If the Rust parent SIGTERMs us (timeout, abort, crash), Node's default is
|
|
@@ -1288,7 +469,7 @@ rl.on("close", () => {
|
|
|
1288
469
|
for (const sig of ["SIGTERM", "SIGINT", "SIGHUP"]) {
|
|
1289
470
|
process.on(sig, () => {
|
|
1290
471
|
log(`[shutdown] received ${sig}`);
|
|
1291
|
-
|
|
472
|
+
drainAndShutdown();
|
|
1292
473
|
});
|
|
1293
474
|
}
|
|
1294
475
|
|