wb-browser-runtime 0.13.0 → 0.14.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +75 -4
- package/bin/wb-browser-runtime.js +29 -106
- package/lib/http.js +200 -3
- package/lib/recording-manager.js +55 -4
- package/lib/signed-url-capture.js +276 -0
- package/lib/substitution.js +128 -0
- package/package.json +1 -1
- package/verbs/download.js +341 -21
- package/verbs/goto.js +8 -2
package/README.md
CHANGED
|
@@ -122,6 +122,8 @@ Verb arguments support two substitutions at dispatch time:
|
|
|
122
122
|
|
|
123
123
|
Both forms are redacted in stdout summaries — only the verb name + selector make it into the log. Expanded values are also scrubbed from `verb.failed` / `slice.failed` error messages before they cross the stdio boundary.
|
|
124
124
|
|
|
125
|
+
**Escaping.** To emit a literal `{{ … }}` that should *not* be substituted, prefix it with a backslash: `\{{ env.X }}` round-trips to the literal text `{{ env.X }}`. The escape is a single left-to-right pass, so the braces it produces are not re-scanned.
|
|
126
|
+
|
|
125
127
|
**Missing-value policy.** Set `WB_SUBSTITUTION_ON_MISSING` to choose how a missing `env.X` or `artifacts.X` is handled:
|
|
126
128
|
|
|
127
129
|
- `warn` (default) — log a stderr warning and substitute an empty string; the verb continues.
|
|
@@ -166,10 +168,16 @@ endpoint at session close. Recording is **off by default** — set
|
|
|
166
168
|
| `WB_RECORDING_SCREENCAST_QUALITY` | `60` | JPEG quality (0–100). |
|
|
167
169
|
| `WB_RECORDING_RRWEB` | `1` | Set `0` to skip rrweb even if recording is on. |
|
|
168
170
|
| `WB_RECORDING_VIDEO` | `0` if no `ffmpeg` | Set `0` to skip video even if `ffmpeg` is present. |
|
|
171
|
+
| `WB_RECORDING_MASK_ALL_INPUTS` | `1` | rrweb `maskAllInputs`. Set `0` to record input *values* (off by default for safety). |
|
|
172
|
+
| `WB_RECORDING_MASK_TEXT_SELECTOR` | *(unset)* | CSS selector whose **text content** rrweb masks (e.g. `.ssn, .acct-balance`). |
|
|
173
|
+
| `WB_RECORDING_BLOCK_SELECTOR` | *(unset)* | CSS selector rrweb records as an inert placeholder (contents never captured). |
|
|
174
|
+
| `WB_RECORDING_IGNORE_SELECTOR` | *(unset)* | CSS selector for elements to exclude from the recording. **In this build it is applied as a block selector** (unioned with `WB_RECORDING_BLOCK_SELECTOR`): the matching element is recorded as an inert placeholder and its subtree/inputs are never captured. The vendored rrweb bundle does not support rrweb's `ignoreSelector` (which only drops input *events*), so we map this knob onto the supported, stronger `blockSelector` to honor the "drop this field" intent. |
|
|
169
175
|
|
|
170
176
|
Artifacts are two parallel POSTs per session, `kind ∈ {rrweb, video}`:
|
|
171
177
|
|
|
172
|
-
- **rrweb** — gzipped JSON (`application/json+gzip`) — `{ run_id, session, event_count, events: [...] }`. DOM mutations + input events captured from every page
|
|
178
|
+
- **rrweb** — gzipped JSON (`application/json+gzip`) — `{ run_id, session, event_count, events: [...] }`. DOM mutations + input events captured from every page.
|
|
179
|
+
|
|
180
|
+
**PII scope — read this.** `maskAllInputs` (on by default) only redacts the *values* a user types into form fields. It does **not** mask field labels, placeholders, `aria-label`s, `<option>` text, or any other rendered text, and it does not alter the recorded DOM structure. A displayed account number, balance, or name that is page text — not an input value — is captured verbatim. For those, point rrweb at the sensitive nodes with `WB_RECORDING_MASK_TEXT_SELECTOR` (mask the text) or `WB_RECORDING_BLOCK_SELECTOR` (omit the subtree). `WB_RECORDING_IGNORE_SELECTOR` is treated as an alias for `WB_RECORDING_BLOCK_SELECTOR` in this build (the vendored rrweb bundle has no `ignoreSelector` support), so a field named there is excluded from the recording entirely rather than merely having its input events dropped. When in doubt, block the region.
|
|
173
181
|
- **video** — VP9 WebM (`video/webm`) — encoded from JPEG screencast frames via `ffmpeg`. Requires `ffmpeg` on `$PATH` (droplet install: `apt-get install -y ffmpeg`). If `ffmpeg` is missing the video kind silently disables and rrweb continues alone.
|
|
174
182
|
|
|
175
183
|
Each POST carries headers `Authorization: Bearer <secret>`,
|
|
@@ -208,7 +216,7 @@ example, see the `browserbase-hn-upvoted-probe` runbook in the xatabase repo.
|
|
|
208
216
|
| `assert` | `assert: <selector>` | `selector`, `text_contains`, `url_contains` |
|
|
209
217
|
| `eval` | `eval: <js>` | `script` |
|
|
210
218
|
| `save` | `save: <name>` | `name`, `value` (captures prior `extract`/`eval` when omitted) |
|
|
211
|
-
| `download` | `download: <selector>` | `selector`, `path`, `timeout`, `text_fallback` (clicks + races Playwright `download` event
|
|
219
|
+
| `download` | `download: <selector>` | `selector`, `path`, `timeout`, `text_fallback`, `signed_url` (clicks + races Playwright `download` event, in-page blob/anchor capture, and signed-URL export capture; saves into `$WB_ARTIFACTS_DIR/<path>`) |
|
|
212
220
|
|
|
213
221
|
`extract`'s `fields` entries are either a CSS selector string (returns
|
|
214
222
|
`textContent`), or `{ selector, attr }` to read an attribute.
|
|
@@ -308,9 +316,55 @@ Behaviour:
|
|
|
308
316
|
doesn't double-save.
|
|
309
317
|
- Emits `slice.artifact_saved` with `source: "download"` and
|
|
310
318
|
`provenance.verb_name: "download"`.
|
|
311
|
-
- On timeout: throws with diagnostics (page URL, selector,
|
|
319
|
+
- On timeout: throws with diagnostics (page URL, selector, all
|
|
312
320
|
failure reasons) AND emits a `slice.download_failed` frame.
|
|
313
321
|
|
|
322
|
+
#### Signed-URL export capture
|
|
323
|
+
|
|
324
|
+
Some SaaS "Download" buttons never trip a Playwright `download` event or an
|
|
325
|
+
in-page Blob. Instead the click calls a same-origin API that returns JSON like
|
|
326
|
+
`{ "download_url": "https://bucket.s3.amazonaws.com/…?<signed>" }` and then
|
|
327
|
+
navigates to that URL — and a page-side `fetch(signedUrl)` fails because the
|
|
328
|
+
object store's CORS policy won't let the app origin read the bytes.
|
|
329
|
+
|
|
330
|
+
The `download:` verb adds a **third** capture racer for this: it wraps the
|
|
331
|
+
page's `fetch`/`XHR` around the click, inspects small same-origin JSON
|
|
332
|
+
responses for URL-looking fields, and when it finds one pointing at a
|
|
333
|
+
recognized object-store host (S3, GCS, CloudFront, Azure Blob, R2), it
|
|
334
|
+
downloads the bytes **from the sidecar process** (where CORS doesn't apply)
|
|
335
|
+
and saves them like any other artifact.
|
|
336
|
+
|
|
337
|
+
This is **on by default in `auto` mode** — it only fires when a recognized
|
|
338
|
+
signed host appears in a JSON response around the click, so a normal
|
|
339
|
+
Playwright/blob download is unaffected. Tune or disable it per verb:
|
|
340
|
+
|
|
341
|
+
```yaml
|
|
342
|
+
- download:
|
|
343
|
+
selector: 'button:has-text("Download as xlsx")'
|
|
344
|
+
path: pilot-profit-loss.xlsx
|
|
345
|
+
timeout: 10s
|
|
346
|
+
signed_url:
|
|
347
|
+
enabled: true # true | false | auto (default auto)
|
|
348
|
+
hosts: # extra non-recognized hosts to accept
|
|
349
|
+
- pilot-report-downloads.s3.amazonaws.com
|
|
350
|
+
json_fields: # restrict to these response field names
|
|
351
|
+
- download_url
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
- Set `signed_url: false` to turn the capture off entirely for a verb.
|
|
355
|
+
- In `auto` mode only recognized object-store hosts (or an explicit `hosts:`
|
|
356
|
+
entry) are fetched. With `enabled: true` an explicit `hosts:`/`json_fields:`
|
|
357
|
+
match is honored even for an unrecognized host, since you named it.
|
|
358
|
+
- The captured URL's **query string (where signed credentials live) is
|
|
359
|
+
redacted** everywhere it crosses the stdio boundary — `provenance.signed_url`
|
|
360
|
+
is `origin+path?<redacted>`; the full URL stays only in sidecar memory for the
|
|
361
|
+
fetch. Honors `WB_BROWSER_DOWNLOAD_EXTENSIONS`.
|
|
362
|
+
- The saved frame carries `provenance.capture: "signed_url"` plus `api_url`,
|
|
363
|
+
`field`, `content_type`, and `content_disposition`.
|
|
364
|
+
- A 403 on the signed URL (expired token) emits `slice.download_failed` with
|
|
365
|
+
`expired: true` and `http_status: 403` so the operator knows to shorten the
|
|
366
|
+
click→fetch gap.
|
|
367
|
+
|
|
314
368
|
## Protocol
|
|
315
369
|
|
|
316
370
|
Line-framed JSON, one message per line, on stdin/stdout. `stderr` is treated as
|
|
@@ -321,9 +375,26 @@ opaque diagnostics by `wb` and printed dimmed to the user's terminal.
|
|
|
321
375
|
```
|
|
322
376
|
wb → {"type": "hello", "wb_version": "...", "protocol": "wb-sidecar/1"}
|
|
323
377
|
wb ← {"type": "ready", "runtime": "wb-browser-runtime", "version": "...",
|
|
324
|
-
"protocol": "wb-sidecar/1", "
|
|
378
|
+
"protocol": "wb-sidecar/1", "min_protocol": "wb-sidecar/1",
|
|
379
|
+
"supports": ["goto", "click", "fill", ...],
|
|
380
|
+
"features": ["recording", "pause", "substitution",
|
|
381
|
+
"substitution_escape", "download_capture",
|
|
382
|
+
"signed_url_download"]}
|
|
325
383
|
```
|
|
326
384
|
|
|
385
|
+
The `ready` frame advertises capabilities so a client can feature-detect
|
|
386
|
+
without a hard-coded version→capability map:
|
|
387
|
+
|
|
388
|
+
- `protocol` — the wire version this runtime speaks.
|
|
389
|
+
- `min_protocol` — the oldest protocol version it can still interoperate with
|
|
390
|
+
(equal to `protocol` until a breaking frame change ships). A client speaking
|
|
391
|
+
an older protocol than `min_protocol` should refuse rather than guess.
|
|
392
|
+
- `supports` — the per-verb list (derived from the verb registry).
|
|
393
|
+
- `features` — coarse capability tokens above the verb list.
|
|
394
|
+
|
|
395
|
+
`version` is read from `package.json` at boot, so it can never drift from the
|
|
396
|
+
published version.
|
|
397
|
+
|
|
327
398
|
### Slice
|
|
328
399
|
|
|
329
400
|
```
|
|
@@ -25,9 +25,7 @@
|
|
|
25
25
|
|
|
26
26
|
import readline from "node:readline";
|
|
27
27
|
import { chromium } from "playwright-core";
|
|
28
|
-
import { readFileSync } from "node:fs";
|
|
29
28
|
import { send, log } from "../lib/io.js";
|
|
30
|
-
import { resolveInside } from "../lib/util.js";
|
|
31
29
|
import { SessionManager } from "../lib/session-manager.js";
|
|
32
30
|
import {
|
|
33
31
|
RecordingManager,
|
|
@@ -40,9 +38,31 @@ import {
|
|
|
40
38
|
classifyError,
|
|
41
39
|
} from "../lib/failure.js";
|
|
42
40
|
import { installDownloadCapture } from "../lib/download-capture.js";
|
|
41
|
+
import { expand, scrubSecrets } from "../lib/substitution.js";
|
|
43
42
|
import { SUPPORTS, runVerb, verbName } from "../verbs/index.js";
|
|
43
|
+
import pkg from "../package.json" with { type: "json" };
|
|
44
|
+
|
|
45
|
+
// Read the version from package.json so the `ready` frame can never drift from
|
|
46
|
+
// the published version (it used to be a hand-maintained literal that fell out
|
|
47
|
+
// of sync). Node >=24 supports JSON import attributes natively.
|
|
48
|
+
const VERSION = pkg.version;
|
|
49
|
+
|
|
50
|
+
// Protocol capability advertisement. `protocol` is the wire version we speak;
|
|
51
|
+
// `min_protocol` is the oldest version a peer may speak and still interoperate
|
|
52
|
+
// (we keep it equal to `protocol` until we ship a breaking frame change).
|
|
53
|
+
// `features` is a coarse capability list above the per-verb `supports` array —
|
|
54
|
+
// a client can feature-detect without hard-coding a version→capability map.
|
|
55
|
+
const PROTOCOL = "wb-sidecar/1";
|
|
56
|
+
const MIN_PROTOCOL = "wb-sidecar/1";
|
|
57
|
+
const FEATURES = [
|
|
58
|
+
"recording", // rrweb DOM capture + CDP screencast video
|
|
59
|
+
"pause", // pause_for_human operator handoff
|
|
60
|
+
"substitution", // {{ env.X }} / {{ artifacts.X }}
|
|
61
|
+
"substitution_escape", // \{{ literal-brace escape
|
|
62
|
+
"download_capture", // passive + explicit download artifact capture
|
|
63
|
+
"signed_url_download", // server-side fetch of in-JSON signed export URLs
|
|
64
|
+
];
|
|
44
65
|
|
|
45
|
-
const VERSION = "0.8.0";
|
|
46
66
|
const provider = getProvider();
|
|
47
67
|
log(`[provider] ${provider.name}`);
|
|
48
68
|
|
|
@@ -158,108 +178,9 @@ async function ensureSession(name, { profile, restoreSession } = {}) {
|
|
|
158
178
|
}
|
|
159
179
|
});
|
|
160
180
|
}
|
|
161
|
-
//
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
// Artifact names are bare identifiers — no dots, no slashes. Anything more
|
|
165
|
-
// exotic would invite path traversal once composed with WB_ARTIFACTS_DIR.
|
|
166
|
-
const ARTIFACT_RE = /\{\{\s*artifacts\.([A-Za-z_][A-Za-z0-9_-]*)\s*\}\}/g;
|
|
167
|
-
|
|
168
|
-
// Resolved once at module load. `warn` matches historical behavior
|
|
169
|
-
// (log + empty string, runbook continues). `error` throws so a missing OTP
|
|
170
|
-
// or env var fails the slice instead of silently sending an empty value
|
|
171
|
-
// into a Playwright action. `empty` is the silent variant.
|
|
172
|
-
const ON_MISSING = (() => {
|
|
173
|
-
const raw = (process.env.WB_SUBSTITUTION_ON_MISSING || "warn")
|
|
174
|
-
.trim()
|
|
175
|
-
.toLowerCase();
|
|
176
|
-
if (raw === "error" || raw === "empty" || raw === "warn") return raw;
|
|
177
|
-
log(
|
|
178
|
-
`[warn] WB_SUBSTITUTION_ON_MISSING=${raw} is not valid (warn|error|empty); defaulting to warn`,
|
|
179
|
-
);
|
|
180
|
-
return "warn";
|
|
181
|
-
})();
|
|
182
|
-
|
|
183
|
-
function handleMissingSubstitution(kind, name) {
|
|
184
|
-
const msg = `${kind}.${name} is not set`;
|
|
185
|
-
if (ON_MISSING === "error") {
|
|
186
|
-
throw new Error(`substitution: ${msg}`);
|
|
187
|
-
}
|
|
188
|
-
if (ON_MISSING === "warn") {
|
|
189
|
-
log(`[warn] ${msg}; substituting empty string`);
|
|
190
|
-
}
|
|
191
|
-
return "";
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
function readArtifactRaw(name) {
|
|
195
|
-
const dir = (process.env.WB_ARTIFACTS_DIR || "").trim();
|
|
196
|
-
if (!dir) {
|
|
197
|
-
log(`[warn] artifacts.${name} referenced but WB_ARTIFACTS_DIR is not set`);
|
|
198
|
-
return null;
|
|
199
|
-
}
|
|
200
|
-
for (const candidate of [`${name}.txt`, name]) {
|
|
201
|
-
const full = resolveInside(dir, candidate);
|
|
202
|
-
if (!full) continue;
|
|
203
|
-
try {
|
|
204
|
-
return readFileSync(full, "utf8").trimEnd();
|
|
205
|
-
} catch {
|
|
206
|
-
// try next candidate
|
|
207
|
-
}
|
|
208
|
-
}
|
|
209
|
-
return null;
|
|
210
|
-
}
|
|
211
|
-
|
|
212
|
-
function readArtifact(name, cache) {
|
|
213
|
-
if (cache && cache.has(name)) {
|
|
214
|
-
const hit = cache.get(name);
|
|
215
|
-
if (hit === null) return handleMissingSubstitution("artifacts", name);
|
|
216
|
-
return hit;
|
|
217
|
-
}
|
|
218
|
-
const v = readArtifactRaw(name);
|
|
219
|
-
if (cache) cache.set(name, v);
|
|
220
|
-
if (v === null) return handleMissingSubstitution("artifacts", name);
|
|
221
|
-
return v;
|
|
222
|
-
}
|
|
223
|
-
|
|
224
|
-
function expand(value, collected, artifactCache) {
|
|
225
|
-
if (typeof value === "string") {
|
|
226
|
-
return value
|
|
227
|
-
.replace(ENV_RE, (_, name) => {
|
|
228
|
-
const v = process.env[name];
|
|
229
|
-
if (v === undefined) return handleMissingSubstitution("env", name);
|
|
230
|
-
if (collected && v.length >= 3) collected.add(v);
|
|
231
|
-
return v;
|
|
232
|
-
})
|
|
233
|
-
.replace(ARTIFACT_RE, (_, name) => {
|
|
234
|
-
const v = readArtifact(name, artifactCache);
|
|
235
|
-
if (collected && v && v.length >= 3) collected.add(v);
|
|
236
|
-
return v;
|
|
237
|
-
});
|
|
238
|
-
}
|
|
239
|
-
if (Array.isArray(value))
|
|
240
|
-
return value.map((v) => expand(v, collected, artifactCache));
|
|
241
|
-
if (value && typeof value === "object") {
|
|
242
|
-
const out = {};
|
|
243
|
-
for (const [k, v] of Object.entries(value))
|
|
244
|
-
out[k] = expand(v, collected, artifactCache);
|
|
245
|
-
return out;
|
|
246
|
-
}
|
|
247
|
-
return value;
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
// Scrub any values that came from {{ env.X }} / {{ artifacts.X }} expansion
|
|
251
|
-
// out of error messages before they cross the stdio boundary — Playwright and
|
|
252
|
-
// fetch errors sometimes echo their inputs (URLs, script bodies, assertion
|
|
253
|
-
// text) and those inputs may contain credentials.
|
|
254
|
-
function scrubSecrets(msg, secrets) {
|
|
255
|
-
let out = String(msg == null ? "" : msg);
|
|
256
|
-
if (!secrets) return out;
|
|
257
|
-
for (const s of secrets) {
|
|
258
|
-
if (!s) continue;
|
|
259
|
-
out = out.split(s).join("«***»");
|
|
260
|
-
}
|
|
261
|
-
return out;
|
|
262
|
-
}
|
|
181
|
+
// {{ env.X }} / {{ artifacts.X }} substitution + `\{{` escape + secret scrubbing
|
|
182
|
+
// live in lib/substitution.js (extracted so they're unit-testable without
|
|
183
|
+
// booting the sidecar).
|
|
263
184
|
|
|
264
185
|
// --- Slice handler ----------------------------------------------------------
|
|
265
186
|
|
|
@@ -559,8 +480,10 @@ rl.on("line", (line) => {
|
|
|
559
480
|
type: "ready",
|
|
560
481
|
runtime: "wb-browser-runtime",
|
|
561
482
|
version: VERSION,
|
|
562
|
-
protocol:
|
|
483
|
+
protocol: PROTOCOL,
|
|
484
|
+
min_protocol: MIN_PROTOCOL,
|
|
563
485
|
supports: SUPPORTS,
|
|
486
|
+
features: FEATURES,
|
|
564
487
|
});
|
|
565
488
|
break;
|
|
566
489
|
case "slice":
|
package/lib/http.js
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import dns from "node:dns";
|
|
2
|
+
import { isIP } from "node:net";
|
|
1
3
|
import { log } from "./io.js";
|
|
2
4
|
|
|
3
5
|
export async function safeText(res) {
|
|
@@ -8,6 +10,138 @@ export async function safeText(res) {
|
|
|
8
10
|
}
|
|
9
11
|
}
|
|
10
12
|
|
|
13
|
+
// --- Body-read timeout handoff ---------------------------------------------
|
|
14
|
+
//
|
|
15
|
+
// retryableFetch's AbortController timer normally fires until fetch() resolves
|
|
16
|
+
// (headers received) and is then cleared in `finally`. That leaves the *body*
|
|
17
|
+
// read unbounded: a server can dribble bytes forever after sending headers.
|
|
18
|
+
//
|
|
19
|
+
// `keepBodyTimeout: true` is an opt-in for callers that consume the body
|
|
20
|
+
// themselves (the signed-URL download path). When set, on a successful (2xx)
|
|
21
|
+
// response we do NOT clear the timer — instead we stash the timer + controller
|
|
22
|
+
// in a WeakMap keyed by the Response so the caller can either:
|
|
23
|
+
// - releaseBodyTimeout(res): clear it once the body is fully consumed, or
|
|
24
|
+
// - abortBody(res): abort the in-flight body read (e.g. size cap tripped).
|
|
25
|
+
// If the caller never releases, the timer still fires and aborts the socket,
|
|
26
|
+
// so a hung body read can't wedge the process. Other callers (default
|
|
27
|
+
// keepBodyTimeout=false) are unaffected — their timer is cleared as before.
|
|
28
|
+
const bodyTimers = new WeakMap();
|
|
29
|
+
|
|
30
|
+
export function releaseBodyTimeout(res) {
|
|
31
|
+
const entry = bodyTimers.get(res);
|
|
32
|
+
if (entry) {
|
|
33
|
+
clearTimeout(entry.timer);
|
|
34
|
+
bodyTimers.delete(res);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function abortBody(res) {
|
|
39
|
+
const entry = bodyTimers.get(res);
|
|
40
|
+
if (entry) {
|
|
41
|
+
try {
|
|
42
|
+
entry.controller.abort();
|
|
43
|
+
} catch {}
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Best-effort cancel/drain of a response body so a non-OK or redirect response
|
|
48
|
+
// doesn't leak the underlying socket while we throw or follow a redirect.
|
|
49
|
+
export async function drainResponseBody(res) {
|
|
50
|
+
try {
|
|
51
|
+
if (res?.body?.cancel) {
|
|
52
|
+
await res.body.cancel();
|
|
53
|
+
} else if (res?.body) {
|
|
54
|
+
// Fall back to consuming it if cancel() isn't available.
|
|
55
|
+
await res.arrayBuffer().catch(() => {});
|
|
56
|
+
}
|
|
57
|
+
} catch {}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
// --- SSRF guard: private/loopback/link-local IP detection ------------------
|
|
61
|
+
|
|
62
|
+
function isPrivateIPv4(addr) {
|
|
63
|
+
const m = /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/.exec(addr);
|
|
64
|
+
if (!m) return false;
|
|
65
|
+
const a = Number(m[1]);
|
|
66
|
+
const b = Number(m[2]);
|
|
67
|
+
if (a === 0) return true; // 0.0.0.0/8 (includes the unspecified address)
|
|
68
|
+
if (a === 127) return true; // 127.0.0.0/8 loopback
|
|
69
|
+
if (a === 10) return true; // 10.0.0.0/8
|
|
70
|
+
if (a === 172 && b >= 16 && b <= 31) return true; // 172.16.0.0/12
|
|
71
|
+
if (a === 192 && b === 168) return true; // 192.168.0.0/16
|
|
72
|
+
if (a === 169 && b === 254) return true; // 169.254.0.0/16 link-local
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
function isPrivateIPv6(addr) {
|
|
77
|
+
let s = String(addr).toLowerCase();
|
|
78
|
+
const pct = s.indexOf("%");
|
|
79
|
+
if (pct >= 0) s = s.slice(0, pct); // strip zone id
|
|
80
|
+
if (s === "::1") return true; // loopback
|
|
81
|
+
if (s === "::") return true; // unspecified
|
|
82
|
+
// IPv4-mapped / IPv4-embedded (e.g. ::ffff:127.0.0.1, ::127.0.0.1)
|
|
83
|
+
const v4 = /:(\d+\.\d+\.\d+\.\d+)$/.exec(s);
|
|
84
|
+
if (v4 && isPrivateIPv4(v4[1])) return true;
|
|
85
|
+
const first = s.split(":")[0];
|
|
86
|
+
if (/^f[cd]/.test(first)) return true; // fc00::/7 unique-local
|
|
87
|
+
if (/^fe[89ab]/.test(first)) return true; // fe80::/10 link-local
|
|
88
|
+
return false;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// True if a literal IP address falls in a private/loopback/link-local range.
|
|
92
|
+
export function isPrivateIp(addr) {
|
|
93
|
+
const fam = isIP(addr);
|
|
94
|
+
if (fam === 4) return isPrivateIPv4(addr);
|
|
95
|
+
if (fam === 6) return isPrivateIPv6(addr);
|
|
96
|
+
return false;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// Validate a single URL as an allowed download target. Applies the caller's
|
|
100
|
+
// host allowlist (the SAME check used on the initial URL — this is what makes
|
|
101
|
+
// redirect following safe) and, unless explicitly allowed, rejects any host
|
|
102
|
+
// that is a private IP literal or resolves to one (DNS rebinding / SSRF).
|
|
103
|
+
// Throws on rejection; resolves on success.
|
|
104
|
+
export async function assertAllowedTarget(
|
|
105
|
+
urlStr,
|
|
106
|
+
{ validateHost = null, allowPrivateIp = false } = {},
|
|
107
|
+
) {
|
|
108
|
+
let u;
|
|
109
|
+
try {
|
|
110
|
+
u = new URL(urlStr);
|
|
111
|
+
} catch {
|
|
112
|
+
throw new Error(`blocked target: unparseable URL`);
|
|
113
|
+
}
|
|
114
|
+
if (u.protocol !== "http:" && u.protocol !== "https:") {
|
|
115
|
+
throw new Error(`blocked target: unsupported scheme "${u.protocol}"`);
|
|
116
|
+
}
|
|
117
|
+
const host = u.host.toLowerCase(); // includes port — matches the picker
|
|
118
|
+
const hostname = u.hostname.toLowerCase();
|
|
119
|
+
if (validateHost && !validateHost(host)) {
|
|
120
|
+
throw new Error(`blocked target: host not allowed: ${host}`);
|
|
121
|
+
}
|
|
122
|
+
if (allowPrivateIp) return;
|
|
123
|
+
if (isIP(hostname)) {
|
|
124
|
+
if (isPrivateIp(hostname)) {
|
|
125
|
+
throw new Error(`blocked target: private/loopback IP ${hostname}`);
|
|
126
|
+
}
|
|
127
|
+
return;
|
|
128
|
+
}
|
|
129
|
+
let results;
|
|
130
|
+
try {
|
|
131
|
+
results = await dns.promises.lookup(hostname, { all: true });
|
|
132
|
+
} catch (e) {
|
|
133
|
+
// Fail closed: a host we can't resolve isn't a host we should fetch.
|
|
134
|
+
throw new Error(`blocked target: could not resolve ${hostname}: ${e?.message || e}`);
|
|
135
|
+
}
|
|
136
|
+
for (const r of results) {
|
|
137
|
+
if (isPrivateIp(r.address)) {
|
|
138
|
+
throw new Error(
|
|
139
|
+
`blocked target: ${hostname} resolves to private/loopback IP ${r.address}`,
|
|
140
|
+
);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
11
145
|
// Retry transient network + 5xx/429 failures with short exponential backoff.
|
|
12
146
|
// Each attempt gets its own AbortController + timeout; caller-passed signals
|
|
13
147
|
// are not plumbed through since we don't have a cancellation story above this
|
|
@@ -17,11 +151,15 @@ export async function safeText(res) {
|
|
|
17
151
|
// `bodyFactory`, when set, is invoked per attempt to produce a fresh body —
|
|
18
152
|
// required for streaming uploads where the previous attempt consumed the
|
|
19
153
|
// stream. Takes precedence over opts.body.
|
|
154
|
+
//
|
|
155
|
+
// `keepBodyTimeout`, when set, hands the attempt's abort timer to the caller on
|
|
156
|
+
// a successful (2xx) response instead of clearing it, so the body-read window
|
|
157
|
+
// stays bounded. See releaseBodyTimeout / abortBody above.
|
|
20
158
|
export async function retryableFetch(
|
|
21
159
|
url,
|
|
22
160
|
opts = {},
|
|
23
161
|
label,
|
|
24
|
-
{ timeoutMs = 30_000, bodyFactory = null } = {},
|
|
162
|
+
{ timeoutMs = 30_000, bodyFactory = null, keepBodyTimeout = false } = {},
|
|
25
163
|
) {
|
|
26
164
|
const delays = [100, 500];
|
|
27
165
|
let lastErr = null;
|
|
@@ -36,6 +174,7 @@ export async function retryableFetch(
|
|
|
36
174
|
}
|
|
37
175
|
const controller = new AbortController();
|
|
38
176
|
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
177
|
+
let handedOff = false;
|
|
39
178
|
try {
|
|
40
179
|
const fetchOpts = { ...opts, signal: controller.signal };
|
|
41
180
|
if (bodyFactory) {
|
|
@@ -45,7 +184,14 @@ export async function retryableFetch(
|
|
|
45
184
|
fetchOpts.duplex = "half";
|
|
46
185
|
}
|
|
47
186
|
const res = await fetch(url, fetchOpts);
|
|
48
|
-
if (res.ok)
|
|
187
|
+
if (res.ok) {
|
|
188
|
+
if (keepBodyTimeout) {
|
|
189
|
+
// Keep the timer armed until the caller consumes the body.
|
|
190
|
+
handedOff = true;
|
|
191
|
+
bodyTimers.set(res, { timer, controller });
|
|
192
|
+
}
|
|
193
|
+
return res;
|
|
194
|
+
}
|
|
49
195
|
if (res.status === 429 || (res.status >= 500 && res.status < 600)) {
|
|
50
196
|
lastRes = res;
|
|
51
197
|
continue;
|
|
@@ -55,9 +201,60 @@ export async function retryableFetch(
|
|
|
55
201
|
lastErr = e;
|
|
56
202
|
continue;
|
|
57
203
|
} finally {
|
|
58
|
-
clearTimeout(timer);
|
|
204
|
+
if (!handedOff) clearTimeout(timer);
|
|
59
205
|
}
|
|
60
206
|
}
|
|
61
207
|
if (lastRes) return lastRes;
|
|
62
208
|
throw lastErr;
|
|
63
209
|
}
|
|
210
|
+
|
|
211
|
+
export const MAX_DOWNLOAD_REDIRECTS = 5;
|
|
212
|
+
|
|
213
|
+
// Fetch a download target with manual redirect handling and an SSRF guard.
|
|
214
|
+
// Every hop (the initial URL and each Location target) is re-validated with
|
|
215
|
+
// assertAllowedTarget before it is fetched, so a 3xx to an unvalidated or
|
|
216
|
+
// private host is rejected instead of silently followed. Redirect bodies are
|
|
217
|
+
// drained between hops. Returns the final (non-redirect) Response; the caller
|
|
218
|
+
// owns the body (use keepBodyTimeout semantics: releaseBodyTimeout when done).
|
|
219
|
+
export async function guardedDownloadFetch(
|
|
220
|
+
url,
|
|
221
|
+
{
|
|
222
|
+
timeoutMs = 30_000,
|
|
223
|
+
validateHost = null,
|
|
224
|
+
allowPrivateIp = false,
|
|
225
|
+
maxRedirects = MAX_DOWNLOAD_REDIRECTS,
|
|
226
|
+
label,
|
|
227
|
+
} = {},
|
|
228
|
+
) {
|
|
229
|
+
let current = url;
|
|
230
|
+
for (let hop = 0; ; hop++) {
|
|
231
|
+
await assertAllowedTarget(current, { validateHost, allowPrivateIp });
|
|
232
|
+
const res = await retryableFetch(
|
|
233
|
+
current,
|
|
234
|
+
{ method: "GET", redirect: "manual" },
|
|
235
|
+
label,
|
|
236
|
+
{ timeoutMs, keepBodyTimeout: true },
|
|
237
|
+
);
|
|
238
|
+
const status = res.status;
|
|
239
|
+
if (status >= 300 && status < 400) {
|
|
240
|
+
const loc = res.headers?.get?.("location");
|
|
241
|
+
if (loc) {
|
|
242
|
+
// A 3xx is not res.ok, so it was never handed off — its timer is
|
|
243
|
+
// already cleared. Drain the redirect body and re-validate the target.
|
|
244
|
+
await drainResponseBody(res);
|
|
245
|
+
if (hop >= maxRedirects) {
|
|
246
|
+
throw new Error(`too many redirects (> ${maxRedirects})`);
|
|
247
|
+
}
|
|
248
|
+
let next;
|
|
249
|
+
try {
|
|
250
|
+
next = new URL(loc, current).toString();
|
|
251
|
+
} catch {
|
|
252
|
+
throw new Error(`blocked target: unparseable redirect Location`);
|
|
253
|
+
}
|
|
254
|
+
current = next;
|
|
255
|
+
continue;
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
return res;
|
|
259
|
+
}
|
|
260
|
+
}
|
package/lib/recording-manager.js
CHANGED
|
@@ -148,6 +148,44 @@ export function loadRecordingConfig() {
|
|
|
148
148
|
kinds,
|
|
149
149
|
rrwebSource,
|
|
150
150
|
rrwebMaxEvents,
|
|
151
|
+
mask: loadMaskConfig(),
|
|
152
|
+
};
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// rrweb's `maskAllInputs` only redacts input *values* — labels, placeholders,
|
|
156
|
+
// aria-labels, option text, and the full DOM structure are still recorded. For
|
|
157
|
+
// genuinely sensitive regions (a displayed SSN, an account balance) the author
|
|
158
|
+
// must point rrweb at the offending nodes with a CSS selector. These knobs
|
|
159
|
+
// expose rrweb's selector options without hard-coding them:
|
|
160
|
+
// WB_RECORDING_MASK_ALL_INPUTS default on; set "0" to record input values
|
|
161
|
+
// WB_RECORDING_MASK_TEXT_SELECTOR text content of matches → asterisks
|
|
162
|
+
// WB_RECORDING_BLOCK_SELECTOR matches recorded as inert placeholders
|
|
163
|
+
// WB_RECORDING_IGNORE_SELECTOR matches excluded from the recording
|
|
164
|
+
//
|
|
165
|
+
// Privacy note: the vendored `vendor/rrweb-record.min.js` bundle supports
|
|
166
|
+
// `blockSelector` but NOT `ignoreSelector` — passing `ignoreSelector` to this
|
|
167
|
+
// build is a silent no-op, so an operator relying on it to drop a sensitive
|
|
168
|
+
// field would have that value recorded verbatim. To keep the "drop this field"
|
|
169
|
+
// promise on the shipped binary, WB_RECORDING_IGNORE_SELECTOR is folded into the
|
|
170
|
+
// (supported, and strictly stronger) `blockSelector`: a blocked element is not
|
|
171
|
+
// recorded at all — its subtree and inputs are never captured. The env var name
|
|
172
|
+
// is kept for compatibility; both selectors are unioned so neither is lost.
|
|
173
|
+
export function loadMaskConfig() {
|
|
174
|
+
const sel = (name) => {
|
|
175
|
+
const v = (process.env[name] || "").trim();
|
|
176
|
+
return v || null;
|
|
177
|
+
};
|
|
178
|
+
// Comma-join the explicit block selector with the ignore selector so the
|
|
179
|
+
// ignore intent is honored via a mechanism the vendored bundle actually
|
|
180
|
+
// supports. Either, both, or neither may be set.
|
|
181
|
+
const blockParts = [
|
|
182
|
+
sel("WB_RECORDING_BLOCK_SELECTOR"),
|
|
183
|
+
sel("WB_RECORDING_IGNORE_SELECTOR"),
|
|
184
|
+
].filter(Boolean);
|
|
185
|
+
return {
|
|
186
|
+
maskAllInputs: process.env.WB_RECORDING_MASK_ALL_INPUTS !== "0",
|
|
187
|
+
maskTextSelector: sel("WB_RECORDING_MASK_TEXT_SELECTOR"),
|
|
188
|
+
blockSelector: blockParts.length ? blockParts.join(", ") : null,
|
|
151
189
|
};
|
|
152
190
|
}
|
|
153
191
|
|
|
@@ -221,17 +259,30 @@ export class RecordingManager {
|
|
|
221
259
|
for (const e of batch) pushRrweb(e);
|
|
222
260
|
}
|
|
223
261
|
});
|
|
262
|
+
// Build rrweb record options from the resolved mask config. Selector
|
|
263
|
+
// options are omitted entirely when unset so we don't pass `null` into
|
|
264
|
+
// rrweb (which would match nothing but still allocate a matcher).
|
|
265
|
+
const mask = cfg.mask || { maskAllInputs: true };
|
|
266
|
+
const recordOpts = {
|
|
267
|
+
maskAllInputs: mask.maskAllInputs !== false,
|
|
268
|
+
};
|
|
269
|
+
if (mask.maskTextSelector)
|
|
270
|
+
recordOpts.maskTextSelector = mask.maskTextSelector;
|
|
271
|
+
// `blockSelector` already folds in WB_RECORDING_IGNORE_SELECTOR (see
|
|
272
|
+
// loadMaskConfig). We never pass `ignoreSelector` — the vendored rrweb
|
|
273
|
+
// bundle does not support it, so it would be silently dropped.
|
|
274
|
+
if (mask.blockSelector) recordOpts.blockSelector = mask.blockSelector;
|
|
275
|
+
const recordOptsJson = JSON.stringify(recordOpts);
|
|
224
276
|
const bootstrap = `
|
|
225
277
|
;(function(){
|
|
226
278
|
if (window.__wbRrwebActive) return;
|
|
227
279
|
window.__wbRrwebActive = true;
|
|
228
280
|
window.__wbRrwebBuffer = [];
|
|
229
281
|
try {
|
|
230
|
-
rrwebRecord({
|
|
282
|
+
rrwebRecord(Object.assign({
|
|
231
283
|
emit: function(event){ window.__wbRrwebBuffer.push(event); },
|
|
232
|
-
sampling: { scroll: 150, media: 800, input: 'last' }
|
|
233
|
-
|
|
234
|
-
});
|
|
284
|
+
sampling: { scroll: 150, media: 800, input: 'last' }
|
|
285
|
+
}, ${recordOptsJson}));
|
|
235
286
|
} catch (e) { /* rrweb unavailable on this page (e.g. chrome://) */ }
|
|
236
287
|
var flush = function(){
|
|
237
288
|
var buf = window.__wbRrwebBuffer;
|