wb-browser-runtime 0.4.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -29,6 +29,21 @@ Verb arguments support `{{ env.NAME }}` substitution at dispatch time, so any
29
29
  secrets your runbook needs (e.g. `HACKERNEWS_PASSWORD`) get pulled from the
30
30
  sidecar process env without ever appearing on stdout.
31
31
 
32
+ ## Optional: anti-detection
33
+
34
+ Targets behind Cloudflare / Kasada / DataDome (e.g. Airbase) will reject the
35
+ default Browserbase session fingerprint and serve a non-interactive challenge
36
+ page. Flip either flag on for the affected runs.
37
+
38
+ | Env var | Default | Purpose |
39
+ |------------------------------------|---------|--------------------------------------------------|
40
+ | `BROWSERBASE_ADVANCED_STEALTH` | *(off)* | Send `browserSettings.advancedStealth: true`. Browserbase Scale-plan-gated — API errors on lower plans. |
41
+ | `BROWSERBASE_PROXIES` | *(off)* | Send `proxies: true`. Routes through Browserbase residential proxy pool. Incurs extra per-session cost. |
42
+
43
+ Set `=1` (or `=true`) to enable. `proxies: true` alone clears most Cloudflare
44
+ challenges; add `advancedStealth: true` on top when the target still blocks.
45
+ The sidecar logs the resolved config at session create.
46
+
32
47
  ## Optional: session recording (rrweb + CDP screencast)
33
48
 
34
49
  Each browser session can be recorded two ways and uploaded to a consumer
@@ -85,10 +100,45 @@ example, see the `browserbase-hn-upvoted-probe` runbook in the xatabase repo.
85
100
  | `extract` | — | `selector` (rows), `fields: { name → spec }` |
86
101
  | `assert` | `assert: <selector>` | `selector`, `text_contains`, `url_contains` |
87
102
  | `eval` | `eval: <js>` | `script` |
103
+ | `save` | `save: <name>` | `name`, `value` (captures prior `extract`/`eval` when omitted) |
88
104
 
89
105
  `extract`'s `fields` entries are either a CSS selector string (returns
90
106
  `textContent`), or `{ selector, attr }` to read an attribute.
91
107
 
108
+ ## Artifacts
109
+
110
+ `wb` exports `$WB_ARTIFACTS_DIR` to every cell — a per-run directory
111
+ (`~/.wb/runs/<run_id>/artifacts/` by default) where any cell can drop files
112
+ that later cells will read back. The browser `save:` verb is the
113
+ sidecar-side equivalent:
114
+
115
+ ```yaml
116
+ - extract:
117
+ selector: .order-row
118
+ fields:
119
+ id: .order-id
120
+ total: .total
121
+ - save: orders # writes $WB_ARTIFACTS_DIR/orders.json
122
+ ```
123
+
124
+ Forms:
125
+
126
+ - `save: <name>` — captures the previous verb's JSON output (from
127
+ `extract` or `eval`) into `<name>.json`.
128
+ - `save: { name: orders, value: { ... } }` — writes an inline value.
129
+ - `save: {}` — auto-names the file `cell-<block_index>-<rand>.json`.
130
+
131
+ Downstream bash/python cells read the file directly:
132
+
133
+ ```bash
134
+ jq '.[0].id' "$WB_ARTIFACTS_DIR/orders.json"
135
+ ```
136
+
137
+ When `WB_ARTIFACTS_UPLOAD_URL` is set (template supports `{run_id}` and
138
+ `{filename}`), `wb` POSTs each new artifact file after the cell that
139
+ produced it completes. Auth reuses `WB_RECORDING_UPLOAD_SECRET`
140
+ (`Authorization: Bearer <…>`); failures are logged and non-fatal.
141
+
92
142
  ## Protocol
93
143
 
94
144
  Line-framed JSON, one message per line, on stdin/stdout. `stderr` is treated as
@@ -139,7 +189,8 @@ Sidecar exits 0.
139
189
  - v0.1 — protocol skeleton (echo only)
140
190
  - v0.2 — `slice.session_started` event with stub URL
141
191
  - v0.3 — Browserbase + playwright-core, real `goto/fill/click/wait_for/extract/assert`
142
- - v0.4 — rrweb + CDP screencast recording, uploaded to a consumer endpoint (this)
143
- - v0.5 — `act:` recovery via Stagehand, `slice.recovered` events
144
- - v0.6 — `wait_for_mfa` / `wait_for_email_otp` emitting `slice.paused` with
192
+ - v0.4 — rrweb + CDP screencast recording, uploaded to a consumer endpoint
193
+ - v0.5 — `save:` verb + shared `$WB_ARTIFACTS_DIR` for cross-cell data (this)
194
+ - v0.6 — `act:` recovery via Stagehand, `slice.recovered` events
195
+ - v0.7 — `wait_for_mfa` / `wait_for_email_otp` emitting `slice.paused` with
145
196
  `resume_url`
@@ -41,10 +41,11 @@ const SUPPORTS = [
41
41
  "extract",
42
42
  "assert",
43
43
  "eval",
44
+ "save",
44
45
  ];
45
46
 
46
47
  const BB_BASE = "https://api.browserbase.com";
47
- const VERSION = "0.4.0";
48
+ const VERSION = "0.5.0";
48
49
 
49
50
  // --- Recording config -------------------------------------------------------
50
51
  //
@@ -167,15 +168,35 @@ async function bbCreateSession() {
167
168
  "BROWSERBASE_API_KEY and BROWSERBASE_PROJECT_ID must be set",
168
169
  );
169
170
  }
171
+
172
+ // Both flags opt-in per session. advancedStealth is Scale-plan-gated on
173
+ // Browserbase's side; proxies adds residential-IP cost. Default off so a
174
+ // misconfigured plan doesn't break unrelated runs (HN, Google Sheets, etc.);
175
+ // flip per vendor when the target sits behind Cloudflare / similar bot
176
+ // detection (e.g., Airbase).
177
+ const envBool = (v) => v === "1" || (typeof v === "string" && v.toLowerCase() === "true");
178
+ const advancedStealth = envBool(process.env.BROWSERBASE_ADVANCED_STEALTH);
179
+ const proxies = envBool(process.env.BROWSERBASE_PROXIES);
180
+
181
+ // keepAlive:false — slice lifetime is tied to wb process; on shutdown
182
+ // we explicitly REQUEST_RELEASE so quota isn't burned by orphans.
183
+ const body = { projectId, keepAlive: false };
184
+ if (advancedStealth) {
185
+ body.browserSettings = { advancedStealth: true };
186
+ }
187
+ if (proxies) {
188
+ body.proxies = true;
189
+ }
190
+
191
+ log(`[bb] session create advancedStealth=${advancedStealth} proxies=${proxies}`);
192
+
170
193
  const res = await fetch(`${BB_BASE}/v1/sessions`, {
171
194
  method: "POST",
172
195
  headers: {
173
196
  "X-BB-API-Key": apiKey,
174
197
  "Content-Type": "application/json",
175
198
  },
176
- // keepAlive:false — slice lifetime is tied to wb process; on shutdown
177
- // we explicitly REQUEST_RELEASE so quota isn't burned by orphans.
178
- body: JSON.stringify({ projectId, keepAlive: false }),
199
+ body: JSON.stringify(body),
179
200
  });
180
201
  if (!res.ok) {
181
202
  throw new Error(
@@ -597,7 +618,7 @@ function arg(value, primaryKey) {
597
618
  return {};
598
619
  }
599
620
 
600
- async function runVerb(page, verb, index) {
621
+ async function runVerb(page, verb, index, ctx) {
601
622
  const name = verbName(verb);
602
623
  const raw = verb[name];
603
624
  const a = expand(arg(raw, defaultKey(name)));
@@ -670,6 +691,7 @@ async function runVerb(page, verb, index) {
670
691
  // Emit as JSON to stdout so wb captures it in step.complete.stdout.
671
692
  // Pretty-printed for readability when a runbook surfaces the output.
672
693
  console.log(JSON.stringify(items, null, 2));
694
+ if (ctx) ctx.lastResult = items;
673
695
  return `${rowSelector} → ${items.length} rows`;
674
696
  }
675
697
  case "assert": {
@@ -695,13 +717,63 @@ async function runVerb(page, verb, index) {
695
717
  // Run arbitrary JS in the page; result is JSON-serialized to stdout.
696
718
  const result = await page.evaluate(a.script);
697
719
  console.log(JSON.stringify(result, null, 2));
720
+ if (ctx) ctx.lastResult = result;
698
721
  return `script ran`;
699
722
  }
723
+ case "save": {
724
+ // Persist a JSON artifact into $WB_ARTIFACTS_DIR so later cells can read
725
+ // it and wb can upload it. Captures the previous verb's output unless
726
+ // the author provides an explicit `value:`.
727
+ const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim();
728
+ if (!artifactsDir) {
729
+ throw new Error(
730
+ "save: $WB_ARTIFACTS_DIR is not set — run this workbook via `wb run` (wb exports the dir for you)",
731
+ );
732
+ }
733
+ const explicitValue = a.value !== undefined;
734
+ const payload = explicitValue ? a.value : ctx?.lastResult;
735
+ if (payload === undefined) {
736
+ throw new Error(
737
+ "save: no value provided and no prior extract/eval result to capture",
738
+ );
739
+ }
740
+ const name =
741
+ typeof a.name === "string" && a.name.trim().length > 0
742
+ ? sanitizeArtifactName(a.name)
743
+ : autoArtifactName(ctx?.blockIndex ?? index);
744
+ const filename = name.endsWith(".json") ? name : `${name}.json`;
745
+ const full = path.join(artifactsDir, filename);
746
+ await fsPromises.mkdir(artifactsDir, { recursive: true });
747
+ await fsPromises.writeFile(
748
+ full,
749
+ JSON.stringify(payload, null, 2),
750
+ "utf8",
751
+ );
752
+ send({
753
+ type: "slice.artifact_saved",
754
+ filename,
755
+ path: full,
756
+ bytes: Buffer.byteLength(JSON.stringify(payload)),
757
+ });
758
+ return `→ ${filename}`;
759
+ }
700
760
  default:
701
761
  throw new Error(`unsupported verb: ${name}`);
702
762
  }
703
763
  }
704
764
 
765
+ function sanitizeArtifactName(s) {
766
+ // Keep author-chosen names readable but safe as filenames. Drop anything
767
+ // that could escape the artifacts dir (slashes, NULs, etc.).
768
+ return String(s).replace(/[^A-Za-z0-9_.-]+/g, "_").slice(0, 200);
769
+ }
770
+
771
+ function autoArtifactName(blockIndex) {
772
+ const rand = randomUUID().replace(/-/g, "").slice(0, 8);
773
+ const n = Number.isFinite(blockIndex) ? blockIndex : 0;
774
+ return `cell-${n}-${rand}`;
775
+ }
776
+
705
777
  function defaultKey(name) {
706
778
  switch (name) {
707
779
  case "goto":
@@ -716,6 +788,8 @@ function defaultKey(name) {
716
788
  return "key";
717
789
  case "eval":
718
790
  return "script";
791
+ case "save":
792
+ return "name";
719
793
  default:
720
794
  return "value";
721
795
  }
@@ -733,6 +807,8 @@ async function handleSlice(msg) {
733
807
  const verbs = Array.isArray(msg.verbs) ? msg.verbs : [];
734
808
  const sessionName = msg.session || "default";
735
809
  const restore = msg.restore || null;
810
+ const blockIndex =
811
+ typeof msg.block_index === "number" ? msg.block_index : null;
736
812
 
737
813
  let session;
738
814
  try {
@@ -750,11 +826,14 @@ async function handleSlice(msg) {
750
826
  // is where we'd jump to verbs[restore.state.verb_index].
751
827
  const startAt = restore?.state?.verb_index ?? 0;
752
828
 
829
+ // Per-slice scratch so `save:` can capture the prior verb's JSON output.
830
+ const sliceCtx = { lastResult: undefined, blockIndex };
831
+
753
832
  for (let i = startAt; i < verbs.length; i++) {
754
833
  const v = verbs[i];
755
834
  const name = verbName(v);
756
835
  try {
757
- const summary = await runVerb(session.page, v, i);
836
+ const summary = await runVerb(session.page, v, i, sliceCtx);
758
837
  send({
759
838
  type: "verb.complete",
760
839
  verb: name,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "wb-browser-runtime",
3
- "version": "0.4.0",
3
+ "version": "0.5.1",
4
4
  "description": "Browser sidecar runtime for wb — Browserbase + Playwright over the wb-sidecar/1 line-framed JSON protocol.",
5
5
  "bin": {
6
6
  "wb-browser-runtime": "bin/wb-browser-runtime.js"