wb-browser-runtime 0.4.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -3
- package/bin/wb-browser-runtime.js +85 -6
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -29,6 +29,21 @@ Verb arguments support `{{ env.NAME }}` substitution at dispatch time, so any
|
|
|
29
29
|
secrets your runbook needs (e.g. `HACKERNEWS_PASSWORD`) get pulled from the
|
|
30
30
|
sidecar process env without ever appearing on stdout.
|
|
31
31
|
|
|
32
|
+
## Optional: anti-detection
|
|
33
|
+
|
|
34
|
+
Targets behind Cloudflare / Kasada / DataDome (e.g. Airbase) will reject the
|
|
35
|
+
default Browserbase session fingerprint and serve a non-interactive challenge
|
|
36
|
+
page. Flip either flag on for the affected runs.
|
|
37
|
+
|
|
38
|
+
| Env var | Default | Purpose |
|
|
39
|
+
|------------------------------------|---------|--------------------------------------------------|
|
|
40
|
+
| `BROWSERBASE_ADVANCED_STEALTH` | *(off)* | Send `browserSettings.advancedStealth: true`. Browserbase Scale-plan-gated — API errors on lower plans. |
|
|
41
|
+
| `BROWSERBASE_PROXIES` | *(off)* | Send `proxies: true`. Routes through Browserbase residential proxy pool. Incurs extra per-session cost. |
|
|
42
|
+
|
|
43
|
+
Set `=1` (or `=true`) to enable. `proxies: true` alone clears most Cloudflare
|
|
44
|
+
challenges; add `advancedStealth: true` on top when the target still blocks.
|
|
45
|
+
The sidecar logs the resolved config at session create.
|
|
46
|
+
|
|
32
47
|
## Optional: session recording (rrweb + CDP screencast)
|
|
33
48
|
|
|
34
49
|
Each browser session can be recorded two ways and uploaded to a consumer
|
|
@@ -85,10 +100,45 @@ example, see the `browserbase-hn-upvoted-probe` runbook in the xatabase repo.
|
|
|
85
100
|
| `extract` | — | `selector` (rows), `fields: { name → spec }` |
|
|
86
101
|
| `assert` | `assert: <selector>` | `selector`, `text_contains`, `url_contains` |
|
|
87
102
|
| `eval` | `eval: <js>` | `script` |
|
|
103
|
+
| `save` | `save: <name>` | `name`, `value` (captures prior `extract`/`eval` when omitted) |
|
|
88
104
|
|
|
89
105
|
`extract`'s `fields` entries are either a CSS selector string (returns
|
|
90
106
|
`textContent`), or `{ selector, attr }` to read an attribute.
|
|
91
107
|
|
|
108
|
+
## Artifacts
|
|
109
|
+
|
|
110
|
+
`wb` exports `$WB_ARTIFACTS_DIR` to every cell — a per-run directory
|
|
111
|
+
(`~/.wb/runs/<run_id>/artifacts/` by default) where any cell can drop files
|
|
112
|
+
that later cells will read back. The browser `save:` verb is the
|
|
113
|
+
sidecar-side equivalent:
|
|
114
|
+
|
|
115
|
+
```yaml
|
|
116
|
+
- extract:
|
|
117
|
+
selector: .order-row
|
|
118
|
+
fields:
|
|
119
|
+
id: .order-id
|
|
120
|
+
total: .total
|
|
121
|
+
- save: orders # writes $WB_ARTIFACTS_DIR/orders.json
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
Forms:
|
|
125
|
+
|
|
126
|
+
- `save: <name>` — captures the previous verb's JSON output (from
|
|
127
|
+
`extract` or `eval`) into `<name>.json`.
|
|
128
|
+
- `save: { name: orders, value: { ... } }` — writes an inline value.
|
|
129
|
+
- `save: {}` — auto-names the file `cell-<block_index>-<rand>.json`.
|
|
130
|
+
|
|
131
|
+
Downstream bash/python cells read the file directly:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
jq '.[0].id' "$WB_ARTIFACTS_DIR/orders.json"
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
When `WB_ARTIFACTS_UPLOAD_URL` is set (template supports `{run_id}` and
|
|
138
|
+
`{filename}`), `wb` POSTs each new artifact file after the cell that
|
|
139
|
+
produced it completes. Auth reuses `WB_RECORDING_UPLOAD_SECRET`
|
|
140
|
+
(`Authorization: Bearer <…>`); failures are logged and non-fatal.
|
|
141
|
+
|
|
92
142
|
## Protocol
|
|
93
143
|
|
|
94
144
|
Line-framed JSON, one message per line, on stdin/stdout. `stderr` is treated as
|
|
@@ -139,7 +189,8 @@ Sidecar exits 0.
|
|
|
139
189
|
- v0.1 — protocol skeleton (echo only)
|
|
140
190
|
- v0.2 — `slice.session_started` event with stub URL
|
|
141
191
|
- v0.3 — Browserbase + playwright-core, real `goto/fill/click/wait_for/extract/assert`
|
|
142
|
-
- v0.4 — rrweb + CDP screencast recording, uploaded to a consumer endpoint
|
|
143
|
-
- v0.5 — `
|
|
144
|
-
- v0.6 — `
|
|
192
|
+
- v0.4 — rrweb + CDP screencast recording, uploaded to a consumer endpoint
|
|
193
|
+
- v0.5 — `save:` verb + shared `$WB_ARTIFACTS_DIR` for cross-cell data (this)
|
|
194
|
+
- v0.6 — `act:` recovery via Stagehand, `slice.recovered` events
|
|
195
|
+
- v0.7 — `wait_for_mfa` / `wait_for_email_otp` emitting `slice.paused` with
|
|
145
196
|
`resume_url`
|
|
@@ -41,10 +41,11 @@ const SUPPORTS = [
|
|
|
41
41
|
"extract",
|
|
42
42
|
"assert",
|
|
43
43
|
"eval",
|
|
44
|
+
"save",
|
|
44
45
|
];
|
|
45
46
|
|
|
46
47
|
const BB_BASE = "https://api.browserbase.com";
|
|
47
|
-
const VERSION = "0.
|
|
48
|
+
const VERSION = "0.5.0";
|
|
48
49
|
|
|
49
50
|
// --- Recording config -------------------------------------------------------
|
|
50
51
|
//
|
|
@@ -167,15 +168,35 @@ async function bbCreateSession() {
|
|
|
167
168
|
"BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID must be set",
|
|
168
169
|
);
|
|
169
170
|
}
|
|
171
|
+
|
|
172
|
+
// Both flags opt-in per session. advancedStealth is Scale-plan-gated on
|
|
173
|
+
// Browserbase's side; proxies adds residential-IP cost. Default off so a
|
|
174
|
+
// misconfigured plan doesn't break unrelated runs (HN, Google Sheets, etc.);
|
|
175
|
+
// flip per vendor when the target sits behind Cloudflare / similar bot
|
|
176
|
+
// detection (e.g., Airbase).
|
|
177
|
+
const envBool = (v) => v === "1" || (typeof v === "string" && v.toLowerCase() === "true");
|
|
178
|
+
const advancedStealth = envBool(process.env.BROWSERBASE_ADVANCED_STEALTH);
|
|
179
|
+
const proxies = envBool(process.env.BROWSERBASE_PROXIES);
|
|
180
|
+
|
|
181
|
+
// keepAlive:false — slice lifetime is tied to wb process; on shutdown
|
|
182
|
+
// we explicitly REQUEST_RELEASE so quota isn't burned by orphans.
|
|
183
|
+
const body = { projectId, keepAlive: false };
|
|
184
|
+
if (advancedStealth) {
|
|
185
|
+
body.browserSettings = { advancedStealth: true };
|
|
186
|
+
}
|
|
187
|
+
if (proxies) {
|
|
188
|
+
body.proxies = true;
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
log(`[bb] session create advancedStealth=${advancedStealth} proxies=${proxies}`);
|
|
192
|
+
|
|
170
193
|
const res = await fetch(`${BB_BASE}/v1/sessions`, {
|
|
171
194
|
method: "POST",
|
|
172
195
|
headers: {
|
|
173
196
|
"X-BB-API-Key": apiKey,
|
|
174
197
|
"Content-Type": "application/json",
|
|
175
198
|
},
|
|
176
|
-
|
|
177
|
-
// we explicitly REQUEST_RELEASE so quota isn't burned by orphans.
|
|
178
|
-
body: JSON.stringify({ projectId, keepAlive: false }),
|
|
199
|
+
body: JSON.stringify(body),
|
|
179
200
|
});
|
|
180
201
|
if (!res.ok) {
|
|
181
202
|
throw new Error(
|
|
@@ -597,7 +618,7 @@ function arg(value, primaryKey) {
|
|
|
597
618
|
return {};
|
|
598
619
|
}
|
|
599
620
|
|
|
600
|
-
async function runVerb(page, verb, index) {
|
|
621
|
+
async function runVerb(page, verb, index, ctx) {
|
|
601
622
|
const name = verbName(verb);
|
|
602
623
|
const raw = verb[name];
|
|
603
624
|
const a = expand(arg(raw, defaultKey(name)));
|
|
@@ -670,6 +691,7 @@ async function runVerb(page, verb, index) {
|
|
|
670
691
|
// Emit as JSON to stdout so wb captures it in step.complete.stdout.
|
|
671
692
|
// Pretty-printed for readability when a runbook surfaces the output.
|
|
672
693
|
console.log(JSON.stringify(items, null, 2));
|
|
694
|
+
if (ctx) ctx.lastResult = items;
|
|
673
695
|
return `${rowSelector} → ${items.length} rows`;
|
|
674
696
|
}
|
|
675
697
|
case "assert": {
|
|
@@ -695,13 +717,63 @@ async function runVerb(page, verb, index) {
|
|
|
695
717
|
// Run arbitrary JS in the page; result is JSON-serialized to stdout.
|
|
696
718
|
const result = await page.evaluate(a.script);
|
|
697
719
|
console.log(JSON.stringify(result, null, 2));
|
|
720
|
+
if (ctx) ctx.lastResult = result;
|
|
698
721
|
return `script ran`;
|
|
699
722
|
}
|
|
723
|
+
case "save": {
|
|
724
|
+
// Persist a JSON artifact into $WB_ARTIFACTS_DIR so later cells can read
|
|
725
|
+
// it and wb can upload it. Captures the previous verb's output unless
|
|
726
|
+
// the author provides an explicit `value:`.
|
|
727
|
+
const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim();
|
|
728
|
+
if (!artifactsDir) {
|
|
729
|
+
throw new Error(
|
|
730
|
+
"save: $WB_ARTIFACTS_DIR is not set — run this workbook via `wb run` (wb exports the dir for you)",
|
|
731
|
+
);
|
|
732
|
+
}
|
|
733
|
+
const explicitValue = a.value !== undefined;
|
|
734
|
+
const payload = explicitValue ? a.value : ctx?.lastResult;
|
|
735
|
+
if (payload === undefined) {
|
|
736
|
+
throw new Error(
|
|
737
|
+
"save: no value provided and no prior extract/eval result to capture",
|
|
738
|
+
);
|
|
739
|
+
}
|
|
740
|
+
const name =
|
|
741
|
+
typeof a.name === "string" && a.name.trim().length > 0
|
|
742
|
+
? sanitizeArtifactName(a.name)
|
|
743
|
+
: autoArtifactName(ctx?.blockIndex ?? index);
|
|
744
|
+
const filename = name.endsWith(".json") ? name : `${name}.json`;
|
|
745
|
+
const full = path.join(artifactsDir, filename);
|
|
746
|
+
await fsPromises.mkdir(artifactsDir, { recursive: true });
|
|
747
|
+
await fsPromises.writeFile(
|
|
748
|
+
full,
|
|
749
|
+
JSON.stringify(payload, null, 2),
|
|
750
|
+
"utf8",
|
|
751
|
+
);
|
|
752
|
+
send({
|
|
753
|
+
type: "slice.artifact_saved",
|
|
754
|
+
filename,
|
|
755
|
+
path: full,
|
|
756
|
+
bytes: Buffer.byteLength(JSON.stringify(payload)),
|
|
757
|
+
});
|
|
758
|
+
return `→ ${filename}`;
|
|
759
|
+
}
|
|
700
760
|
default:
|
|
701
761
|
throw new Error(`unsupported verb: ${name}`);
|
|
702
762
|
}
|
|
703
763
|
}
|
|
704
764
|
|
|
765
|
+
function sanitizeArtifactName(s) {
|
|
766
|
+
// Keep author-chosen names readable but safe as filenames. Drop anything
|
|
767
|
+
// that could escape the artifacts dir (slashes, NULs, etc.).
|
|
768
|
+
return String(s).replace(/[^A-Za-z0-9_.-]+/g, "_").slice(0, 200);
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
function autoArtifactName(blockIndex) {
|
|
772
|
+
const rand = randomUUID().replace(/-/g, "").slice(0, 8);
|
|
773
|
+
const n = Number.isFinite(blockIndex) ? blockIndex : 0;
|
|
774
|
+
return `cell-${n}-${rand}`;
|
|
775
|
+
}
|
|
776
|
+
|
|
705
777
|
function defaultKey(name) {
|
|
706
778
|
switch (name) {
|
|
707
779
|
case "goto":
|
|
@@ -716,6 +788,8 @@ function defaultKey(name) {
|
|
|
716
788
|
return "key";
|
|
717
789
|
case "eval":
|
|
718
790
|
return "script";
|
|
791
|
+
case "save":
|
|
792
|
+
return "name";
|
|
719
793
|
default:
|
|
720
794
|
return "value";
|
|
721
795
|
}
|
|
@@ -733,6 +807,8 @@ async function handleSlice(msg) {
|
|
|
733
807
|
const verbs = Array.isArray(msg.verbs) ? msg.verbs : [];
|
|
734
808
|
const sessionName = msg.session || "default";
|
|
735
809
|
const restore = msg.restore || null;
|
|
810
|
+
const blockIndex =
|
|
811
|
+
typeof msg.block_index === "number" ? msg.block_index : null;
|
|
736
812
|
|
|
737
813
|
let session;
|
|
738
814
|
try {
|
|
@@ -750,11 +826,14 @@ async function handleSlice(msg) {
|
|
|
750
826
|
// is where we'd jump to verbs[restore.state.verb_index].
|
|
751
827
|
const startAt = restore?.state?.verb_index ?? 0;
|
|
752
828
|
|
|
829
|
+
// Per-slice scratch so `save:` can capture the prior verb's JSON output.
|
|
830
|
+
const sliceCtx = { lastResult: undefined, blockIndex };
|
|
831
|
+
|
|
753
832
|
for (let i = startAt; i < verbs.length; i++) {
|
|
754
833
|
const v = verbs[i];
|
|
755
834
|
const name = verbName(v);
|
|
756
835
|
try {
|
|
757
|
-
const summary = await runVerb(session.page, v, i);
|
|
836
|
+
const summary = await runVerb(session.page, v, i, sliceCtx);
|
|
758
837
|
send({
|
|
759
838
|
type: "verb.complete",
|
|
760
839
|
verb: name,
|
package/package.json
CHANGED