wb-browser-runtime 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +104 -2
- package/bin/wb-browser-runtime.js +29 -106
- package/lib/recording-manager.js +39 -4
- package/lib/signed-url-capture.js +275 -0
- package/lib/substitution.js +128 -0
- package/package.json +1 -1
- package/verbs/download.js +596 -0
- package/verbs/index.js +2 -0
package/README.md
CHANGED
|
@@ -122,6 +122,8 @@ Verb arguments support two substitutions at dispatch time:
|
|
|
122
122
|
|
|
123
123
|
Both forms are redacted in stdout summaries — only the verb name + selector make it into the log. Expanded values are also scrubbed from `verb.failed` / `slice.failed` error messages before they cross the stdio boundary.
|
|
124
124
|
|
|
125
|
+
**Escaping.** To emit a literal `{{ … }}` that should *not* be substituted, prefix it with a backslash: `\{{ env.X }}` round-trips to the literal text `{{ env.X }}`. The escape is a single left-to-right pass, so the braces it produces are not re-scanned.
|
|
126
|
+
|
|
125
127
|
**Missing-value policy.** Set `WB_SUBSTITUTION_ON_MISSING` to choose how a missing `env.X` or `artifacts.X` is handled:
|
|
126
128
|
|
|
127
129
|
- `warn` (default) — log a stderr warning and substitute an empty string; the verb continues.
|
|
@@ -166,10 +168,16 @@ endpoint at session close. Recording is **off by default** — set
|
|
|
166
168
|
| `WB_RECORDING_SCREENCAST_QUALITY` | `60` | JPEG quality (0–100). |
|
|
167
169
|
| `WB_RECORDING_RRWEB` | `1` | Set `0` to skip rrweb even if recording is on. |
|
|
168
170
|
| `WB_RECORDING_VIDEO` | `0` if no `ffmpeg` | Set `0` to skip video even if `ffmpeg` is present. |
|
|
171
|
+
| `WB_RECORDING_MASK_ALL_INPUTS` | `1` | rrweb `maskAllInputs`. Set `0` to record input *values* (off by default for safety). |
|
|
172
|
+
| `WB_RECORDING_MASK_TEXT_SELECTOR` | *(unset)* | CSS selector whose **text content** rrweb masks (e.g. `.ssn, .acct-balance`). |
|
|
173
|
+
| `WB_RECORDING_BLOCK_SELECTOR` | *(unset)* | CSS selector rrweb records as an inert placeholder (contents never captured). |
|
|
174
|
+
| `WB_RECORDING_IGNORE_SELECTOR` | *(unset)* | CSS selector whose input events rrweb drops entirely. |
|
|
169
175
|
|
|
170
176
|
Artifacts are two parallel POSTs per session, `kind ∈ {rrweb, video}`:
|
|
171
177
|
|
|
172
|
-
- **rrweb** — gzipped JSON (`application/json+gzip`) — `{ run_id, session, event_count, events: [...] }`. DOM mutations + input events captured from every page
|
|
178
|
+
- **rrweb** — gzipped JSON (`application/json+gzip`) — `{ run_id, session, event_count, events: [...] }`. DOM mutations + input events captured from every page.
|
|
179
|
+
|
|
180
|
+
**PII scope — read this.** `maskAllInputs` (on by default) only redacts the *values* a user types into form fields. It does **not** mask field labels, placeholders, `aria-label`s, `<option>` text, or any other rendered text, and it does not alter the recorded DOM structure. A displayed account number, balance, or name that is page text — not an input value — is captured verbatim. For those, point rrweb at the sensitive nodes with `WB_RECORDING_MASK_TEXT_SELECTOR` (mask the text) or `WB_RECORDING_BLOCK_SELECTOR` (omit the subtree). When in doubt, block the region.
|
|
173
181
|
- **video** — VP9 WebM (`video/webm`) — encoded from JPEG screencast frames via `ffmpeg`. Requires `ffmpeg` on `$PATH` (droplet install: `apt-get install -y ffmpeg`). If `ffmpeg` is missing the video kind silently disables and rrweb continues alone.
|
|
174
182
|
|
|
175
183
|
Each POST carries headers `Authorization: Bearer <secret>`,
|
|
@@ -208,6 +216,7 @@ example, see the `browserbase-hn-upvoted-probe` runbook in the xatabase repo.
|
|
|
208
216
|
| `assert` | `assert: <selector>` | `selector`, `text_contains`, `url_contains` |
|
|
209
217
|
| `eval` | `eval: <js>` | `script` |
|
|
210
218
|
| `save` | `save: <name>` | `name`, `value` (captures prior `extract`/`eval` when omitted) |
|
|
219
|
+
| `download` | `download: <selector>` | `selector`, `path`, `timeout`, `text_fallback`, `signed_url` (clicks + races Playwright `download` event, in-page blob/anchor capture, and signed-URL export capture; saves into `$WB_ARTIFACTS_DIR/<path>`) |
|
|
211
220
|
|
|
212
221
|
`extract`'s `fields` entries are either a CSS selector string (returns
|
|
213
222
|
`textContent`), or `{ selector, attr }` to read an attribute.
|
|
@@ -280,6 +289,82 @@ emitted as `slice.download_skipped` (with `reason:
|
|
|
280
289
|
"extension_not_in_allowlist"`) so the operator sees what was discarded.
|
|
281
290
|
Unset = capture everything.
|
|
282
291
|
|
|
292
|
+
### Explicit `download:` verb
|
|
293
|
+
|
|
294
|
+
The passive listener handles "any file the browser saves" but gives the
|
|
295
|
+
runbook no control over the filename or timing. Use the `download:` verb
|
|
296
|
+
when the runbook needs to click a specific button, save the result at a
|
|
297
|
+
specific path, and fail loudly within ~10s if no file appears:
|
|
298
|
+
|
|
299
|
+
```yaml
|
|
300
|
+
- download:
|
|
301
|
+
selector: 'button:has-text("Download as xlsx")'
|
|
302
|
+
path: pilot-profit-loss.xlsx # written to $WB_ARTIFACTS_DIR/<path>
|
|
303
|
+
timeout: 10s # default
|
|
304
|
+
text_fallback: "Download as xlsx" # like click — fallback when selector is brittle
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
Behaviour:
|
|
308
|
+
|
|
309
|
+
- Installs a page-side blob/anchor capture hook **before** the click so a
|
|
310
|
+
synchronously-dispatched `URL.createObjectURL(blob) + <a download>.click()`
|
|
311
|
+
is observed even when Playwright's own `download` event misses it
|
|
312
|
+
(e.g. `window.location = blobUrl`).
|
|
313
|
+
- Races `page.waitForEvent("download")` against the in-page hook; whichever
|
|
314
|
+
fires first wins.
|
|
315
|
+
- Sets `HANDLED_MARK` on the `Download` so the always-on passive listener
|
|
316
|
+
doesn't double-save.
|
|
317
|
+
- Emits `slice.artifact_saved` with `source: "download"` and
|
|
318
|
+
`provenance.verb_name: "download"`.
|
|
319
|
+
- On timeout: throws with diagnostics (page URL, selector, all
|
|
320
|
+
failure reasons) AND emits a `slice.download_failed` frame.
|
|
321
|
+
|
|
322
|
+
#### Signed-URL export capture
|
|
323
|
+
|
|
324
|
+
Some SaaS "Download" buttons never trip a Playwright `download` event or an
|
|
325
|
+
in-page Blob. Instead the click calls a same-origin API that returns JSON like
|
|
326
|
+
`{ "download_url": "https://bucket.s3.amazonaws.com/…?<signed>" }` and then
|
|
327
|
+
navigates to that URL — and a page-side `fetch(signedUrl)` fails because the
|
|
328
|
+
object store's CORS policy won't let the app origin read the bytes.
|
|
329
|
+
|
|
330
|
+
The `download:` verb adds a **third** capture racer for this: it wraps the
|
|
331
|
+
page's `fetch`/`XHR` around the click, inspects small same-origin JSON
|
|
332
|
+
responses for URL-looking fields, and when it finds one pointing at a
|
|
333
|
+
recognized object-store host (S3, GCS, CloudFront, Azure Blob, R2), it
|
|
334
|
+
downloads the bytes **from the sidecar process** (where CORS doesn't apply)
|
|
335
|
+
and saves them like any other artifact.
|
|
336
|
+
|
|
337
|
+
This is **on by default in `auto` mode** — it only fires when a recognized
|
|
338
|
+
signed host appears in a JSON response around the click, so a normal
|
|
339
|
+
Playwright/blob download is unaffected. Tune or disable it per verb:
|
|
340
|
+
|
|
341
|
+
```yaml
|
|
342
|
+
- download:
|
|
343
|
+
selector: 'button:has-text("Download as xlsx")'
|
|
344
|
+
path: pilot-profit-loss.xlsx
|
|
345
|
+
timeout: 10s
|
|
346
|
+
signed_url:
|
|
347
|
+
enabled: true # true | false | auto (default auto)
|
|
348
|
+
hosts: # extra non-recognized hosts to accept
|
|
349
|
+
- pilot-report-downloads.s3.amazonaws.com
|
|
350
|
+
json_fields: # restrict to these response field names
|
|
351
|
+
- download_url
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
- Set `signed_url: false` to turn the capture off entirely for a verb.
|
|
355
|
+
- In `auto` mode only recognized object-store hosts (or an explicit `hosts:`
|
|
356
|
+
entry) are fetched. With `enabled: true` an explicit `hosts:`/`json_fields:`
|
|
357
|
+
match is honored even for an unrecognized host, since you named it.
|
|
358
|
+
- The captured URL's **query string (where signed credentials live) is
|
|
359
|
+
redacted** everywhere it crosses the stdio boundary — `provenance.signed_url`
|
|
360
|
+
is `origin+path?<redacted>`; the full URL stays only in sidecar memory for the
|
|
361
|
+
fetch. Honors `WB_BROWSER_DOWNLOAD_EXTENSIONS`.
|
|
362
|
+
- The saved frame carries `provenance.capture: "signed_url"` plus `api_url`,
|
|
363
|
+
`field`, `content_type`, and `content_disposition`.
|
|
364
|
+
- A 403 on the signed URL (expired token) emits `slice.download_failed` with
|
|
365
|
+
`expired: true` and `http_status: 403` so the operator knows to shorten the
|
|
366
|
+
click→fetch gap.
|
|
367
|
+
|
|
283
368
|
## Protocol
|
|
284
369
|
|
|
285
370
|
Line-framed JSON, one message per line, on stdin/stdout. `stderr` is treated as
|
|
@@ -290,9 +375,26 @@ opaque diagnostics by `wb` and printed dimmed to the user's terminal.
|
|
|
290
375
|
```
|
|
291
376
|
wb → {"type": "hello", "wb_version": "...", "protocol": "wb-sidecar/1"}
|
|
292
377
|
wb ← {"type": "ready", "runtime": "wb-browser-runtime", "version": "...",
|
|
293
|
-
"protocol": "wb-sidecar/1", "
|
|
378
|
+
"protocol": "wb-sidecar/1", "min_protocol": "wb-sidecar/1",
|
|
379
|
+
"supports": ["goto", "click", "fill", ...],
|
|
380
|
+
"features": ["recording", "pause", "substitution",
|
|
381
|
+
"substitution_escape", "download_capture",
|
|
382
|
+
"signed_url_download"]}
|
|
294
383
|
```
|
|
295
384
|
|
|
385
|
+
The `ready` frame advertises capabilities so a client can feature-detect
|
|
386
|
+
without a hard-coded version→capability map:
|
|
387
|
+
|
|
388
|
+
- `protocol` — the wire version this runtime speaks.
|
|
389
|
+
- `min_protocol` — the oldest protocol version it can still interoperate with
|
|
390
|
+
(equal to `protocol` until a breaking frame change ships). A client speaking
|
|
391
|
+
an older protocol than `min_protocol` should refuse rather than guess.
|
|
392
|
+
- `supports` — the per-verb list (derived from the verb registry).
|
|
393
|
+
- `features` — coarse capability tokens above the verb list.
|
|
394
|
+
|
|
395
|
+
`version` is read from `package.json` at boot, so it can never drift from the
|
|
396
|
+
published version.
|
|
397
|
+
|
|
296
398
|
### Slice
|
|
297
399
|
|
|
298
400
|
```
|
|
@@ -25,9 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
import readline from "node:readline";
|
|
27
27
|
import { chromium } from "playwright-core";
|
|
28
|
-
import { readFileSync } from "node:fs";
|
|
29
28
|
import { send, log } from "../lib/io.js";
|
|
30
|
-
import { resolveInside } from "../lib/util.js";
|
|
31
29
|
import { SessionManager } from "../lib/session-manager.js";
|
|
32
30
|
import {
|
|
33
31
|
RecordingManager,
|
|
@@ -40,9 +38,31 @@ import {
|
|
|
40
38
|
classifyError,
|
|
41
39
|
} from "../lib/failure.js";
|
|
42
40
|
import { installDownloadCapture } from "../lib/download-capture.js";
|
|
41
|
+
import { expand, scrubSecrets } from "../lib/substitution.js";
|
|
43
42
|
import { SUPPORTS, runVerb, verbName } from "../verbs/index.js";
|
|
43
|
+
import pkg from "../package.json" with { type: "json" };
|
|
44
|
+
|
|
45
|
+
// Read the version from package.json so the `ready` frame can never drift from
|
|
46
|
+
// the published version (it used to be a hand-maintained literal that fell out
|
|
47
|
+
// of sync). Node >=24 supports JSON import attributes natively.
|
|
48
|
+
const VERSION = pkg.version;
|
|
49
|
+
|
|
50
|
+
// Protocol capability advertisement. `protocol` is the wire version we speak;
|
|
51
|
+
// `min_protocol` is the oldest version a peer may speak and still interoperate
|
|
52
|
+
// (we keep it equal to `protocol` until we ship a breaking frame change).
|
|
53
|
+
// `features` is a coarse capability list above the per-verb `supports` array —
|
|
54
|
+
// a client can feature-detect without hard-coding a version→capability map.
|
|
55
|
+
const PROTOCOL = "wb-sidecar/1";
|
|
56
|
+
const MIN_PROTOCOL = "wb-sidecar/1";
|
|
57
|
+
const FEATURES = [
|
|
58
|
+
"recording", // rrweb DOM capture + CDP screencast video
|
|
59
|
+
"pause", // pause_for_human operator handoff
|
|
60
|
+
"substitution", // {{ env.X }} / {{ artifacts.X }}
|
|
61
|
+
"substitution_escape", // \{{ literal-brace escape
|
|
62
|
+
"download_capture", // passive + explicit download artifact capture
|
|
63
|
+
"signed_url_download", // server-side fetch of in-JSON signed export URLs
|
|
64
|
+
];
|
|
44
65
|
|
|
45
|
-
const VERSION = "0.8.0";
|
|
46
66
|
const provider = getProvider();
|
|
47
67
|
log(`[provider] ${provider.name}`);
|
|
48
68
|
|
|
@@ -158,108 +178,9 @@ async function ensureSession(name, { profile, restoreSession } = {}) {
|
|
|
158
178
|
}
|
|
159
179
|
});
|
|
160
180
|
}
|
|
161
|
-
//
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
// Artifact names are bare identifiers — no dots, no slashes. Anything more
|
|
165
|
-
// exotic would invite path traversal once composed with WB_ARTIFACTS_DIR.
|
|
166
|
-
const ARTIFACT_RE = /\{\{\s*artifacts\.([A-Za-z_][A-Za-z0-9_-]*)\s*\}\}/g;
|
|
167
|
-
|
|
168
|
-
// Resolved once at module load. `warn` matches historical behavior
|
|
169
|
-
// (log + empty string, runbook continues). `error` throws so a missing OTP
|
|
170
|
-
// or env var fails the slice instead of silently sending an empty value
|
|
171
|
-
// into a Playwright action. `empty` is the silent variant.
|
|
172
|
-
const ON_MISSING = (() => {
|
|
173
|
-
const raw = (process.env.WB_SUBSTITUTION_ON_MISSING || "warn")
|
|
174
|
-
.trim()
|
|
175
|
-
.toLowerCase();
|
|
176
|
-
if (raw === "error" || raw === "empty" || raw === "warn") return raw;
|
|
177
|
-
log(
|
|
178
|
-
`[warn] WB_SUBSTITUTION_ON_MISSING=${raw} is not valid (warn|error|empty); defaulting to warn`,
|
|
179
|
-
);
|
|
180
|
-
return "warn";
|
|
181
|
-
})();
|
|
182
|
-
|
|
183
|
-
function handleMissingSubstitution(kind, name) {
|
|
184
|
-
const msg = `${kind}.${name} is not set`;
|
|
185
|
-
if (ON_MISSING === "error") {
|
|
186
|
-
throw new Error(`substitution: ${msg}`);
|
|
187
|
-
}
|
|
188
|
-
if (ON_MISSING === "warn") {
|
|
189
|
-
log(`[warn] ${msg}; substituting empty string`);
|
|
190
|
-
}
|
|
191
|
-
return "";
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
function readArtifactRaw(name) {
|
|
195
|
-
const dir = (process.env.WB_ARTIFACTS_DIR || "").trim();
|
|
196
|
-
if (!dir) {
|
|
197
|
-
log(`[warn] artifacts.${name} referenced but WB_ARTIFACTS_DIR is not set`);
|
|
198
|
-
return null;
|
|
199
|
-
}
|
|
200
|
-
for (const candidate of [`${name}.txt`, name]) {
|
|
201
|
-
const full = resolveInside(dir, candidate);
|
|
202
|
-
if (!full) continue;
|
|
203
|
-
try {
|
|
204
|
-
return readFileSync(full, "utf8").trimEnd();
|
|
205
|
-
} catch {
|
|
206
|
-
// try next candidate
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
return null;
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
function readArtifact(name, cache) {
|
|
213
|
-
if (cache && cache.has(name)) {
|
|
214
|
-
const hit = cache.get(name);
|
|
215
|
-
if (hit === null) return handleMissingSubstitution("artifacts", name);
|
|
216
|
-
return hit;
|
|
217
|
-
}
|
|
218
|
-
const v = readArtifactRaw(name);
|
|
219
|
-
if (cache) cache.set(name, v);
|
|
220
|
-
if (v === null) return handleMissingSubstitution("artifacts", name);
|
|
221
|
-
return v;
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
function expand(value, collected, artifactCache) {
|
|
225
|
-
if (typeof value === "string") {
|
|
226
|
-
return value
|
|
227
|
-
.replace(ENV_RE, (_, name) => {
|
|
228
|
-
const v = process.env[name];
|
|
229
|
-
if (v === undefined) return handleMissingSubstitution("env", name);
|
|
230
|
-
if (collected && v.length >= 3) collected.add(v);
|
|
231
|
-
return v;
|
|
232
|
-
})
|
|
233
|
-
.replace(ARTIFACT_RE, (_, name) => {
|
|
234
|
-
const v = readArtifact(name, artifactCache);
|
|
235
|
-
if (collected && v && v.length >= 3) collected.add(v);
|
|
236
|
-
return v;
|
|
237
|
-
});
|
|
238
|
-
}
|
|
239
|
-
if (Array.isArray(value))
|
|
240
|
-
return value.map((v) => expand(v, collected, artifactCache));
|
|
241
|
-
if (value && typeof value === "object") {
|
|
242
|
-
const out = {};
|
|
243
|
-
for (const [k, v] of Object.entries(value))
|
|
244
|
-
out[k] = expand(v, collected, artifactCache);
|
|
245
|
-
return out;
|
|
246
|
-
}
|
|
247
|
-
return value;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
// Scrub any values that came from {{ env.X }} / {{ artifacts.X }} expansion
|
|
251
|
-
// out of error messages before they cross the stdio boundary — Playwright and
|
|
252
|
-
// fetch errors sometimes echo their inputs (URLs, script bodies, assertion
|
|
253
|
-
// text) and those inputs may contain credentials.
|
|
254
|
-
function scrubSecrets(msg, secrets) {
|
|
255
|
-
let out = String(msg == null ? "" : msg);
|
|
256
|
-
if (!secrets) return out;
|
|
257
|
-
for (const s of secrets) {
|
|
258
|
-
if (!s) continue;
|
|
259
|
-
out = out.split(s).join("«***»");
|
|
260
|
-
}
|
|
261
|
-
return out;
|
|
262
|
-
}
|
|
181
|
+
// {{ env.X }} / {{ artifacts.X }} substitution + `\{{` escape + secret scrubbing
|
|
182
|
+
// live in lib/substitution.js (extracted so they're unit-testable without
|
|
183
|
+
// booting the sidecar).
|
|
263
184
|
|
|
264
185
|
// --- Slice handler ----------------------------------------------------------
|
|
265
186
|
|
|
@@ -559,8 +480,10 @@ rl.on("line", (line) => {
|
|
|
559
480
|
type: "ready",
|
|
560
481
|
runtime: "wb-browser-runtime",
|
|
561
482
|
version: VERSION,
|
|
562
|
-
protocol:
|
|
483
|
+
protocol: PROTOCOL,
|
|
484
|
+
min_protocol: MIN_PROTOCOL,
|
|
563
485
|
supports: SUPPORTS,
|
|
486
|
+
features: FEATURES,
|
|
564
487
|
});
|
|
565
488
|
break;
|
|
566
489
|
case "slice":
|
package/lib/recording-manager.js
CHANGED
|
@@ -148,6 +148,29 @@ export function loadRecordingConfig() {
|
|
|
148
148
|
kinds,
|
|
149
149
|
rrwebSource,
|
|
150
150
|
rrwebMaxEvents,
|
|
151
|
+
mask: loadMaskConfig(),
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// rrweb's `maskAllInputs` only redacts input *values* — labels, placeholders,
|
|
156
|
+
// aria-labels, option text, and the full DOM structure are still recorded. For
|
|
157
|
+
// genuinely sensitive regions (a displayed SSN, an account balance) the author
|
|
158
|
+
// must point rrweb at the offending nodes with a CSS selector. These knobs
|
|
159
|
+
// expose rrweb's selector options without hard-coding them:
|
|
160
|
+
// WB_RECORDING_MASK_ALL_INPUTS default on; set "0" to record input values
|
|
161
|
+
// WB_RECORDING_MASK_TEXT_SELECTOR text content of matches → asterisks
|
|
162
|
+
// WB_RECORDING_BLOCK_SELECTOR matches recorded as inert placeholders
|
|
163
|
+
// WB_RECORDING_IGNORE_SELECTOR matches' input events dropped entirely
|
|
164
|
+
export function loadMaskConfig() {
|
|
165
|
+
const sel = (name) => {
|
|
166
|
+
const v = (process.env[name] || "").trim();
|
|
167
|
+
return v || null;
|
|
168
|
+
};
|
|
169
|
+
return {
|
|
170
|
+
maskAllInputs: process.env.WB_RECORDING_MASK_ALL_INPUTS !== "0",
|
|
171
|
+
maskTextSelector: sel("WB_RECORDING_MASK_TEXT_SELECTOR"),
|
|
172
|
+
blockSelector: sel("WB_RECORDING_BLOCK_SELECTOR"),
|
|
173
|
+
ignoreSelector: sel("WB_RECORDING_IGNORE_SELECTOR"),
|
|
151
174
|
};
|
|
152
175
|
}
|
|
153
176
|
|
|
@@ -221,17 +244,29 @@ export class RecordingManager {
|
|
|
221
244
|
for (const e of batch) pushRrweb(e);
|
|
222
245
|
}
|
|
223
246
|
});
|
|
247
|
+
// Build rrweb record options from the resolved mask config. Selector
|
|
248
|
+
// options are omitted entirely when unset so we don't pass `null` into
|
|
249
|
+
// rrweb (which would match nothing but still allocate a matcher).
|
|
250
|
+
const mask = cfg.mask || { maskAllInputs: true };
|
|
251
|
+
const recordOpts = {
|
|
252
|
+
maskAllInputs: mask.maskAllInputs !== false,
|
|
253
|
+
};
|
|
254
|
+
if (mask.maskTextSelector)
|
|
255
|
+
recordOpts.maskTextSelector = mask.maskTextSelector;
|
|
256
|
+
if (mask.blockSelector) recordOpts.blockSelector = mask.blockSelector;
|
|
257
|
+
if (mask.ignoreSelector)
|
|
258
|
+
recordOpts.ignoreSelector = mask.ignoreSelector;
|
|
259
|
+
const recordOptsJson = JSON.stringify(recordOpts);
|
|
224
260
|
const bootstrap = `
|
|
225
261
|
;(function(){
|
|
226
262
|
if (window.__wbRrwebActive) return;
|
|
227
263
|
window.__wbRrwebActive = true;
|
|
228
264
|
window.__wbRrwebBuffer = [];
|
|
229
265
|
try {
|
|
230
|
-
rrwebRecord({
|
|
266
|
+
rrwebRecord(Object.assign({
|
|
231
267
|
emit: function(event){ window.__wbRrwebBuffer.push(event); },
|
|
232
|
-
sampling: { scroll: 150, media: 800, input: 'last' }
|
|
233
|
-
|
|
234
|
-
});
|
|
268
|
+
sampling: { scroll: 150, media: 800, input: 'last' }
|
|
269
|
+
}, ${recordOptsJson}));
|
|
235
270
|
} catch (e) { /* rrweb unavailable on this page (e.g. chrome://) */ }
|
|
236
271
|
var flush = function(){
|
|
237
272
|
var buf = window.__wbRrwebBuffer;
|
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
// Signed-URL export capture.
|
|
2
|
+
//
|
|
3
|
+
// Some SaaS "Download" buttons never produce a Playwright `download` event or
|
|
4
|
+
// an in-page Blob. Instead the app calls a same-origin API that returns JSON
|
|
5
|
+
// like `{ "download_url": "https://bucket.s3.amazonaws.com/...?<signed>" }`
|
|
6
|
+
// and then navigates to that URL. A page-side `fetch(signedUrl)` usually fails
|
|
7
|
+
// because the object store's CORS policy doesn't allow the app origin to read
|
|
8
|
+
// the bytes — so the only reliable place to fetch it is the sidecar process,
|
|
9
|
+
// where CORS doesn't apply.
|
|
10
|
+
//
|
|
11
|
+
// This module supplies:
|
|
12
|
+
// - SIGNED_PAGE_HOOK / SIGNED_POLL_SCRIPT — page-side instrumentation that
|
|
13
|
+
// wraps fetch + XHR, inspects small same-origin JSON responses, and stashes
|
|
14
|
+
// any http(s) URL fields it finds on `window.__wbSignedCandidates`.
|
|
15
|
+
// - pure helpers (isSignedHost, redactSignedUrl, extractUrlFields,
|
|
16
|
+
// pickSignedCandidate, parseSignedConfig) that the `download` verb uses to
|
|
17
|
+
// decide which captured URL to fetch server-side.
|
|
18
|
+
//
|
|
19
|
+
// The bytes are downloaded by the verb via lib/http.js's retryableFetch. Signed
|
|
20
|
+
// query credentials are redacted everywhere they cross the stdio boundary; the
|
|
21
|
+
// full URL lives only in sidecar memory for the duration of the fetch.
|
|
22
|
+
|
|
23
|
+
// Object-store / CDN hosts that hand out pre-signed, credential-bearing URLs.
|
|
24
|
+
// A match means "this looks like an export download, not a normal app API call"
|
|
25
|
+
// — the gate that keeps auto-mode from grabbing arbitrary same-origin URLs.
|
|
26
|
+
const SIGNED_HOST_PATTERNS = [
|
|
27
|
+
// S3: s3.amazonaws.com, s3.us-east-1.amazonaws.com, s3-us-west-2.amazonaws.com,
|
|
28
|
+
// bucket.s3.amazonaws.com, bucket.s3.us-east-1.amazonaws.com
|
|
29
|
+
/(^|\.)s3([.-][a-z0-9-]+)?\.amazonaws\.com$/i,
|
|
30
|
+
// Google Cloud Storage
|
|
31
|
+
/(^|\.)storage\.googleapis\.com$/i,
|
|
32
|
+
/(^|\.)storage\.cloud\.google\.com$/i,
|
|
33
|
+
// CloudFront
|
|
34
|
+
/(^|\.)cloudfront\.net$/i,
|
|
35
|
+
// Azure Blob Storage
|
|
36
|
+
/\.blob\.core\.windows\.net$/i,
|
|
37
|
+
// Cloudflare R2
|
|
38
|
+
/\.r2\.cloudflarestorage\.com$/i,
|
|
39
|
+
];
|
|
40
|
+
|
|
41
|
+
export function isSignedHost(host) {
|
|
42
|
+
if (!host) return false;
|
|
43
|
+
const h = String(host).toLowerCase();
|
|
44
|
+
return SIGNED_HOST_PATTERNS.some((re) => re.test(h));
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Drop the query string (where signed credentials live) but keep origin + path
|
|
48
|
+
// so diagnostics stay useful. Falls back to a naive split when URL parsing
|
|
49
|
+
// fails so a malformed value still can't leak its query.
|
|
50
|
+
export function redactSignedUrl(url) {
|
|
51
|
+
const s = String(url || "");
|
|
52
|
+
try {
|
|
53
|
+
const u = new URL(s);
|
|
54
|
+
return u.search ? `${u.origin}${u.pathname}?<redacted>` : `${u.origin}${u.pathname}`;
|
|
55
|
+
} catch {
|
|
56
|
+
const i = s.indexOf("?");
|
|
57
|
+
return i >= 0 ? `${s.slice(0, i)}?<redacted>` : s;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
// Recursively collect every http(s) string value in a parsed JSON object.
|
|
62
|
+
// Bounded in depth and count so a pathological response can't blow the stack
|
|
63
|
+
// or the buffer. Returns `[{ field, url }]` with dotted field paths.
|
|
64
|
+
export function extractUrlFields(data, maxDepth = 6, maxUrls = 30) {
|
|
65
|
+
const out = [];
|
|
66
|
+
const visit = (node, fieldPath, depth) => {
|
|
67
|
+
if (depth > maxDepth || out.length >= maxUrls) return;
|
|
68
|
+
if (typeof node === "string") {
|
|
69
|
+
if (/^https?:\/\//i.test(node)) out.push({ field: fieldPath, url: node });
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
if (Array.isArray(node)) {
|
|
73
|
+
for (let i = 0; i < node.length; i++)
|
|
74
|
+
visit(node[i], `${fieldPath}[${i}]`, depth + 1);
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
if (node && typeof node === "object") {
|
|
78
|
+
for (const k of Object.keys(node)) {
|
|
79
|
+
visit(node[k], fieldPath ? `${fieldPath}.${k}` : k, depth + 1);
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
};
|
|
83
|
+
visit(data, "", 0);
|
|
84
|
+
return out;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Leaf field name from a dotted/indexed path: "data.export.download_url" →
|
|
88
|
+
// "download_url", "files[0].url" → "url". Used to match against a caller's
|
|
89
|
+
// `json_fields` allowlist.
|
|
90
|
+
function leafField(fieldPath) {
|
|
91
|
+
const last = String(fieldPath || "")
|
|
92
|
+
.split(".")
|
|
93
|
+
.pop();
|
|
94
|
+
return last.replace(/\[\d+\]$/, "");
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Normalize `args.signed_url` into a resolved policy.
|
|
98
|
+
// undefined / "auto" → { enabled: "auto" } (recognized hosts only)
|
|
99
|
+
// false / { enabled: false } → { enabled: false } (feature off)
|
|
100
|
+
// true → { enabled: true }
|
|
101
|
+
// { enabled, hosts, json_fields } → that, normalized
|
|
102
|
+
// In "auto" mode only recognized signed hosts (or an explicit `hosts` entry)
|
|
103
|
+
// are captured. In forced mode (`enabled: true`) an explicit `hosts` or
|
|
104
|
+
// `json_fields` match is honored even for an unrecognized host, since the
|
|
105
|
+
// author asked for it by name.
|
|
106
|
+
export function parseSignedConfig(raw) {
|
|
107
|
+
const norm = (cfg) => ({
|
|
108
|
+
enabled: cfg.enabled,
|
|
109
|
+
hosts: Array.isArray(cfg.hosts) ? cfg.hosts.map((h) => String(h).toLowerCase()) : [],
|
|
110
|
+
jsonFields: cfg.jsonFields && cfg.jsonFields.length ? cfg.jsonFields.map(String) : null,
|
|
111
|
+
});
|
|
112
|
+
if (raw === false) return { enabled: false, hosts: [], jsonFields: null };
|
|
113
|
+
if (raw === true) return norm({ enabled: true });
|
|
114
|
+
if (raw == null) return norm({ enabled: "auto" });
|
|
115
|
+
if (typeof raw === "object") {
|
|
116
|
+
const enabled =
|
|
117
|
+
raw.enabled === undefined
|
|
118
|
+
? "auto"
|
|
119
|
+
: raw.enabled === true
|
|
120
|
+
? true
|
|
121
|
+
: raw.enabled === false
|
|
122
|
+
? false
|
|
123
|
+
: "auto";
|
|
124
|
+
if (enabled === false) return { enabled: false, hosts: [], jsonFields: null };
|
|
125
|
+
const jsonFields = Array.isArray(raw.json_fields)
|
|
126
|
+
? raw.json_fields
|
|
127
|
+
: Array.isArray(raw.jsonFields)
|
|
128
|
+
? raw.jsonFields
|
|
129
|
+
: null;
|
|
130
|
+
return norm({ enabled, hosts: raw.hosts, jsonFields });
|
|
131
|
+
}
|
|
132
|
+
return norm({ enabled: "auto" });
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Choose the best signed-URL candidate from the page-captured list, or null.
|
|
136
|
+
// `candidates` is the shape pushed by SIGNED_PAGE_HOOK:
|
|
137
|
+
// [{ api_url, urls: [{ field, url }], ts }]
|
|
138
|
+
export function pickSignedCandidate(candidates, opts = {}) {
|
|
139
|
+
const hosts = opts.hosts || [];
|
|
140
|
+
const jsonFields = opts.jsonFields || null;
|
|
141
|
+
const forced = opts.enabled === true;
|
|
142
|
+
for (const cand of candidates || []) {
|
|
143
|
+
for (const u of cand.urls || []) {
|
|
144
|
+
if (jsonFields && !jsonFields.includes(leafField(u.field)) && !jsonFields.includes(u.field))
|
|
145
|
+
continue;
|
|
146
|
+
let host = "";
|
|
147
|
+
try {
|
|
148
|
+
host = new URL(u.url).host.toLowerCase();
|
|
149
|
+
} catch {
|
|
150
|
+
continue;
|
|
151
|
+
}
|
|
152
|
+
const hostAllowed =
|
|
153
|
+
hosts.length > 0 &&
|
|
154
|
+
hosts.some((h) => host === h || host.endsWith(`.${h}`));
|
|
155
|
+
const looksSigned = isSignedHost(host);
|
|
156
|
+
// auto: only recognized signed hosts or an explicit hosts allowlist.
|
|
157
|
+
// forced: also honor a candidate the author selected via hosts/json_fields.
|
|
158
|
+
const accept =
|
|
159
|
+
hostAllowed ||
|
|
160
|
+
looksSigned ||
|
|
161
|
+
(forced && (hosts.length > 0 || jsonFields));
|
|
162
|
+
if (accept) {
|
|
163
|
+
return { url: u.url, field: u.field, api_url: cand.api_url || null, host };
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
// Page-side hook installed BEFORE the click. Wraps fetch + XHR to inspect small
|
|
171
|
+
// same-origin JSON responses and stash any http(s) URL fields. Idempotent and
|
|
172
|
+
// fail-open — any error in the wrapper falls through to the original call so the
|
|
173
|
+
// app keeps working. Mirrors the blob hook's "never uninstall" contract.
|
|
174
|
+
export const SIGNED_PAGE_HOOK = `(() => {
|
|
175
|
+
if (window.__wbSignedInstalled) return;
|
|
176
|
+
window.__wbSignedInstalled = true;
|
|
177
|
+
window.__wbSignedCandidates = [];
|
|
178
|
+
var MAX_BODY = 64 * 1024;
|
|
179
|
+
var MAX_CAND = 50;
|
|
180
|
+
|
|
181
|
+
var sameOrigin = function(u){
|
|
182
|
+
try { return new URL(u, location.href).origin === location.origin; }
|
|
183
|
+
catch (e) { return false; }
|
|
184
|
+
};
|
|
185
|
+
|
|
186
|
+
var collect = function(apiUrl, text){
|
|
187
|
+
try {
|
|
188
|
+
if (!text || text.length > MAX_BODY) return;
|
|
189
|
+
var data;
|
|
190
|
+
try { data = JSON.parse(text); } catch (e) { return; }
|
|
191
|
+
var urls = [];
|
|
192
|
+
var visit = function(node, fp, depth){
|
|
193
|
+
if (depth > 6 || urls.length >= 30) return;
|
|
194
|
+
if (typeof node === 'string') {
|
|
195
|
+
if (/^https?:\\/\\//i.test(node)) urls.push({ field: fp, url: node });
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
if (Array.isArray(node)) {
|
|
199
|
+
for (var i = 0; i < node.length; i++) visit(node[i], fp + '[' + i + ']', depth + 1);
|
|
200
|
+
return;
|
|
201
|
+
}
|
|
202
|
+
if (node && typeof node === 'object') {
|
|
203
|
+
for (var k in node) {
|
|
204
|
+
if (Object.prototype.hasOwnProperty.call(node, k)) {
|
|
205
|
+
visit(node[k], fp ? fp + '.' + k : k, depth + 1);
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
};
|
|
210
|
+
visit(data, '', 0);
|
|
211
|
+
if (urls.length && window.__wbSignedCandidates.length < MAX_CAND) {
|
|
212
|
+
window.__wbSignedCandidates.push({ api_url: apiUrl, urls: urls, ts: Date.now() });
|
|
213
|
+
}
|
|
214
|
+
} catch (e) {}
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
var origFetch = window.fetch;
|
|
218
|
+
if (typeof origFetch === 'function') {
|
|
219
|
+
window.fetch = function(){
|
|
220
|
+
var args = arguments;
|
|
221
|
+
var reqUrl = '';
|
|
222
|
+
try { reqUrl = (typeof args[0] === 'string') ? args[0] : (args[0] && args[0].url) || ''; } catch (e) {}
|
|
223
|
+
var p = origFetch.apply(this, args);
|
|
224
|
+
try {
|
|
225
|
+
if (sameOrigin(reqUrl) && p && typeof p.then === 'function') {
|
|
226
|
+
p.then(function(res){
|
|
227
|
+
try {
|
|
228
|
+
var ct = (res && res.headers && res.headers.get && res.headers.get('content-type')) || '';
|
|
229
|
+
if (/json|text/i.test(ct) || ct === '') {
|
|
230
|
+
res.clone().text().then(function(t){ collect(reqUrl, t); }).catch(function(){});
|
|
231
|
+
}
|
|
232
|
+
} catch (e) {}
|
|
233
|
+
return res;
|
|
234
|
+
}).catch(function(){});
|
|
235
|
+
}
|
|
236
|
+
} catch (e) {}
|
|
237
|
+
return p;
|
|
238
|
+
};
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
var XHR = window.XMLHttpRequest;
|
|
242
|
+
if (XHR && XHR.prototype) {
|
|
243
|
+
var origOpen = XHR.prototype.open;
|
|
244
|
+
var origSend = XHR.prototype.send;
|
|
245
|
+
XHR.prototype.open = function(method, url){
|
|
246
|
+
try { this.__wbUrl = url; } catch (e) {}
|
|
247
|
+
return origOpen.apply(this, arguments);
|
|
248
|
+
};
|
|
249
|
+
XHR.prototype.send = function(){
|
|
250
|
+
try {
|
|
251
|
+
var self = this;
|
|
252
|
+
this.addEventListener('load', function(){
|
|
253
|
+
try {
|
|
254
|
+
if (sameOrigin(self.__wbUrl)) {
|
|
255
|
+
var rt = '';
|
|
256
|
+
try {
|
|
257
|
+
if (self.responseType === '' || self.responseType === 'text') rt = self.responseText;
|
|
258
|
+
} catch (e) {}
|
|
259
|
+
if (rt) collect(self.__wbUrl, rt);
|
|
260
|
+
}
|
|
261
|
+
} catch (e) {}
|
|
262
|
+
});
|
|
263
|
+
} catch (e) {}
|
|
264
|
+
return origSend.apply(this, arguments);
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
})()`;
|
|
268
|
+
|
|
269
|
+
// Read-and-clear of the candidate buffer so successive polls only see new
|
|
270
|
+
// responses (matches the blob hook's read-and-clear contract).
|
|
271
|
+
export const SIGNED_POLL_SCRIPT = `(() => {
|
|
272
|
+
var c = window.__wbSignedCandidates || [];
|
|
273
|
+
window.__wbSignedCandidates = [];
|
|
274
|
+
return c;
|
|
275
|
+
})()`;
|