wb-browser-runtime 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +58 -5
- package/bin/wb-browser-runtime.js +147 -999
- package/lib/http.js +63 -0
- package/lib/io.js +56 -0
- package/lib/providers/browser-use.js +133 -0
- package/lib/providers/browserbase.js +120 -0
- package/lib/providers/index.js +43 -0
- package/lib/recording-manager.js +620 -0
- package/lib/session-manager.js +101 -0
- package/lib/stub-page.js +112 -0
- package/lib/util.js +33 -0
- package/package.json +8 -3
- package/verbs/assert.js +23 -0
- package/verbs/click.js +8 -0
- package/verbs/eval.js +20 -0
- package/verbs/extract.js +38 -0
- package/verbs/fill.js +13 -0
- package/verbs/goto.js +10 -0
- package/verbs/index.js +70 -0
- package/verbs/press.js +9 -0
- package/verbs/save.js +55 -0
- package/verbs/screenshot.js +48 -0
- package/verbs/wait_for.js +13 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
// Per-name session cache with in-flight create dedup.
|
|
2
|
+
//
|
|
3
|
+
// Today the slice enqueue is global (one chain, all sessions serialized),
|
|
4
|
+
// so concurrent ensures for the same name never happen — the second caller
|
|
5
|
+
// always sees the cache populated. Once slice dispatch moves to per-session
|
|
6
|
+
// chains (see Phase 4), two concurrent slices for "vendor-x" would both
|
|
7
|
+
// race bbCreateSession and burn two Browserbase sessions. Deduping the
|
|
8
|
+
// in-flight promise here fixes that race up-front, so the per-session
|
|
9
|
+
// chain change in Phase 4 is a one-liner in main.js instead of a recursive
|
|
10
|
+
// bug hunt later.
|
|
11
|
+
//
|
|
12
|
+
// The manager is creation-logic-free on purpose: callers hand an async
|
|
13
|
+
// factory to `ensure()`, which is invoked at most once per name. The
|
|
14
|
+
// factory is responsible for its own cleanup on throw — on rejection the
|
|
15
|
+
// in-flight entry is dropped so a subsequent caller can retry.
|
|
16
|
+
|
|
17
|
+
export class SessionManager {
|
|
18
|
+
constructor() {
|
|
19
|
+
this._sessions = new Map();
|
|
20
|
+
this._inFlight = new Map();
|
|
21
|
+
this._chains = new Map();
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
get size() {
|
|
25
|
+
return this._sessions.size;
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
has(name) {
|
|
29
|
+
return this._sessions.has(name);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
get(name) {
|
|
33
|
+
return this._sessions.get(name);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
delete(name) {
|
|
37
|
+
return this._sessions.delete(name);
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
entries() {
|
|
41
|
+
return this._sessions.entries();
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
values() {
|
|
45
|
+
return this._sessions.values();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
// Match Map's default iterator (yields [name, info] pairs) so callers can
|
|
49
|
+
// write `for (const [name, info] of manager)` the same way they would
|
|
50
|
+
// against the underlying Map.
|
|
51
|
+
[Symbol.iterator]() {
|
|
52
|
+
return this._sessions.entries();
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// Serialize work against a single session name (so two slices against
|
|
56
|
+
// the same Playwright page don't race), but let distinct names run in
|
|
57
|
+
// parallel. Before this, the entry point held a single global promise
|
|
58
|
+
// chain — two slices against "vendor-a" and "vendor-b" serialized even
|
|
59
|
+
// though they touch disjoint browsers. The in-flight-create dedup in
|
|
60
|
+
// `ensure()` is what makes per-session parallelism safe here: two
|
|
61
|
+
// concurrent slices for the same name still share one bbCreateSession.
|
|
62
|
+
//
|
|
63
|
+
// `fn` should return a promise; errors propagate to the returned
|
|
64
|
+
// promise and don't poison the next link in the chain.
|
|
65
|
+
enqueueOn(name, fn) {
|
|
66
|
+
const prev = this._chains.get(name) ?? Promise.resolve();
|
|
67
|
+
const next = prev.catch(() => {}).then(fn);
|
|
68
|
+
this._chains.set(name, next);
|
|
69
|
+
return next;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Resolve once every currently-queued chain has settled. Used by
|
|
73
|
+
// shutdown to wait for in-flight slices before closing browsers and
|
|
74
|
+
// releasing Browserbase sessions. Only observes chains that exist at
|
|
75
|
+
// call time — later enqueues aren't awaited, which is the correct
|
|
76
|
+
// behavior for shutdown (the main loop stops accepting messages first).
|
|
77
|
+
async drainAll() {
|
|
78
|
+
const chains = Array.from(this._chains.values());
|
|
79
|
+
await Promise.allSettled(chains);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
async ensure(name, createFn) {
|
|
83
|
+
if (this._sessions.has(name)) return this._sessions.get(name);
|
|
84
|
+
const inFlight = this._inFlight.get(name);
|
|
85
|
+
if (inFlight) return inFlight;
|
|
86
|
+
// Only set the cached entry after createFn returns successfully — a
|
|
87
|
+
// failure inside createFn (e.g. startRecording throws) must not leave
|
|
88
|
+
// a half-constructed SessionInfo visible to iterators like shutdown().
|
|
89
|
+
const p = (async () => {
|
|
90
|
+
const info = await createFn();
|
|
91
|
+
this._sessions.set(name, info);
|
|
92
|
+
return info;
|
|
93
|
+
})();
|
|
94
|
+
this._inFlight.set(name, p);
|
|
95
|
+
try {
|
|
96
|
+
return await p;
|
|
97
|
+
} finally {
|
|
98
|
+
this._inFlight.delete(name);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
package/lib/stub-page.js
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
// Minimal in-memory fake of the Playwright `Page` API that the verbs
|
|
2
|
+
// exercise. Every method the verb registry touches is stubbed here; tests
|
|
3
|
+
// configure return values via the factory's options and assert against the
|
|
4
|
+
// recorded `calls` log.
|
|
5
|
+
//
|
|
6
|
+
// Intentionally does NOT simulate real browser behavior — a verb that
|
|
7
|
+
// would throw against a real page (e.g. `fill` on a non-existent selector)
|
|
8
|
+
// resolves cleanly here. Tests asserting error paths use options like
|
|
9
|
+
// `handles: { "#nope": null }` to wire explicit misses.
|
|
10
|
+
|
|
11
|
+
export function createStubPage(opts = {}) {
|
|
12
|
+
const calls = [];
|
|
13
|
+
let currentUrl = opts.initialUrl ?? "about:blank";
|
|
14
|
+
|
|
15
|
+
const screenshotBuf =
|
|
16
|
+
opts.screenshotBuf ?? Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x0d, 0x0a]);
|
|
17
|
+
const extractResult = opts.extractResult ?? [];
|
|
18
|
+
const evalResult = opts.evalResult ?? null;
|
|
19
|
+
// handles: selector -> { textContent } | null (null = selector not found).
|
|
20
|
+
// Missing keys (undefined) also count as "not found" so tests only need
|
|
21
|
+
// to wire what matters.
|
|
22
|
+
const handles = opts.handles ?? {};
|
|
23
|
+
|
|
24
|
+
const record = (call) => {
|
|
25
|
+
calls.push(call);
|
|
26
|
+
return call;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
return {
|
|
30
|
+
calls,
|
|
31
|
+
_setUrl(u) {
|
|
32
|
+
currentUrl = u;
|
|
33
|
+
},
|
|
34
|
+
url() {
|
|
35
|
+
return currentUrl;
|
|
36
|
+
},
|
|
37
|
+
|
|
38
|
+
async goto(url, options) {
|
|
39
|
+
record({ verb: "goto", url, options });
|
|
40
|
+
currentUrl = url;
|
|
41
|
+
},
|
|
42
|
+
async fill(selector, value, options) {
|
|
43
|
+
record({ verb: "fill", selector, value, options });
|
|
44
|
+
},
|
|
45
|
+
async click(selector, options) {
|
|
46
|
+
record({ verb: "click", selector, options });
|
|
47
|
+
},
|
|
48
|
+
async press(selector, key, options) {
|
|
49
|
+
record({ verb: "press", selector, key, options });
|
|
50
|
+
},
|
|
51
|
+
async waitForSelector(selector, options) {
|
|
52
|
+
record({ verb: "waitForSelector", selector, options });
|
|
53
|
+
},
|
|
54
|
+
async screenshot(options) {
|
|
55
|
+
record({ verb: "screenshot", options });
|
|
56
|
+
return screenshotBuf;
|
|
57
|
+
},
|
|
58
|
+
async $$eval(selector, fn, fieldSpec) {
|
|
59
|
+
record({ verb: "$$eval", selector, fieldSpec });
|
|
60
|
+
return extractResult;
|
|
61
|
+
},
|
|
62
|
+
async $(selector) {
|
|
63
|
+
record({ verb: "$", selector });
|
|
64
|
+
const h = handles[selector];
|
|
65
|
+
if (h == null) return null;
|
|
66
|
+
return {
|
|
67
|
+
async textContent() {
|
|
68
|
+
return h.textContent ?? null;
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
},
|
|
72
|
+
async evaluate(script) {
|
|
73
|
+
record({ verb: "evaluate", script });
|
|
74
|
+
return evalResult;
|
|
75
|
+
},
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Capture JSON frames written via lib/io.js `send` during a test. Returns
|
|
80
|
+
// a disposer that restores `process.stdout.write`. Non-JSON writes pass
|
|
81
|
+
// through, so node:test's own reporter output (spec/tap/text) is still
|
|
82
|
+
// visible. Use inside a test with `t.after(disposer)`.
|
|
83
|
+
export function captureSendFrames() {
|
|
84
|
+
const real = process.stdout.write.bind(process.stdout);
|
|
85
|
+
const frames = [];
|
|
86
|
+
process.stdout.write = (chunk, encoding, cb) => {
|
|
87
|
+
const str = typeof chunk === "string" ? chunk : chunk?.toString?.();
|
|
88
|
+
// Single JSON line ending in \n matches the shape of lib/io.js send()
|
|
89
|
+
// writes. Non-JSON or multi-line writes fall through to the real stdout
|
|
90
|
+
// so node:test's reporter output isn't swallowed.
|
|
91
|
+
if (
|
|
92
|
+
str &&
|
|
93
|
+
str.startsWith("{") &&
|
|
94
|
+
str.endsWith("\n") &&
|
|
95
|
+
str.indexOf("\n") === str.length - 1
|
|
96
|
+
) {
|
|
97
|
+
try {
|
|
98
|
+
frames.push(JSON.parse(str.trim()));
|
|
99
|
+
if (typeof encoding === "function") encoding();
|
|
100
|
+
else if (typeof cb === "function") cb();
|
|
101
|
+
return true;
|
|
102
|
+
} catch {
|
|
103
|
+
// Fall through to real write — wasn't actually a send() frame.
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
return real(chunk, encoding, cb);
|
|
107
|
+
};
|
|
108
|
+
const dispose = () => {
|
|
109
|
+
process.stdout.write = real;
|
|
110
|
+
};
|
|
111
|
+
return { frames, dispose };
|
|
112
|
+
}
|
package/lib/util.js
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { randomUUID } from "node:crypto";
|
|
3
|
+
|
|
4
|
+
// Resolve `candidate` inside `dir`, rejecting traversal and absolute paths.
|
|
5
|
+
// Returns null when the resolved path escapes `dir` (or is `dir` itself).
|
|
6
|
+
// Used by the screenshot verb and substitution artifact reads — anywhere
|
|
7
|
+
// runbook-author-controlled strings could compose with a trusted directory
|
|
8
|
+
// into an arbitrary filesystem write.
|
|
9
|
+
export function resolveInside(dir, candidate) {
|
|
10
|
+
const resolvedDir = path.resolve(dir);
|
|
11
|
+
const resolved = path.resolve(resolvedDir, candidate);
|
|
12
|
+
const rel = path.relative(resolvedDir, resolved);
|
|
13
|
+
if (rel === "" || rel.startsWith("..") || path.isAbsolute(rel)) return null;
|
|
14
|
+
return resolved;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
export function sanitizeArtifactName(s) {
|
|
18
|
+
// Keep author-chosen names readable but safe as filenames. Drop anything
|
|
19
|
+
// that could escape the artifacts dir (slashes, NULs, etc.).
|
|
20
|
+
return String(s).replace(/[^A-Za-z0-9_.-]+/g, "_").slice(0, 200);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export function autoArtifactName(blockIndex) {
|
|
24
|
+
const rand = randomUUID().replace(/-/g, "").slice(0, 8);
|
|
25
|
+
const n = Number.isFinite(blockIndex) ? blockIndex : 0;
|
|
26
|
+
return `cell-${n}-${rand}`;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function redact(value) {
|
|
30
|
+
if (typeof value !== "string") return "";
|
|
31
|
+
if (value.length <= 4) return "***";
|
|
32
|
+
return `${value.slice(0, 2)}***`;
|
|
33
|
+
}
|
package/package.json
CHANGED
|
@@ -1,19 +1,24 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "wb-browser-runtime",
|
|
3
|
-
"version": "0.
|
|
4
|
-
"description": "Browser sidecar runtime for wb — Browserbase
|
|
3
|
+
"version": "0.7.0",
|
|
4
|
+
"description": "Browser sidecar runtime for wb — Playwright over CDP (Browserbase, browser-use) via the wb-sidecar/1 line-framed JSON protocol.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"wb-browser-runtime": "bin/wb-browser-runtime.js"
|
|
7
7
|
},
|
|
8
8
|
"type": "module",
|
|
9
9
|
"engines": {
|
|
10
|
-
"node": ">=
|
|
10
|
+
"node": ">=24"
|
|
11
|
+
},
|
|
12
|
+
"scripts": {
|
|
13
|
+
"test": "node --test"
|
|
11
14
|
},
|
|
12
15
|
"dependencies": {
|
|
13
16
|
"playwright-core": "^1.49.0"
|
|
14
17
|
},
|
|
15
18
|
"files": [
|
|
16
19
|
"bin",
|
|
20
|
+
"lib",
|
|
21
|
+
"verbs",
|
|
17
22
|
"vendor",
|
|
18
23
|
"README.md"
|
|
19
24
|
]
|
package/verbs/assert.js
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "assert",
|
|
3
|
+
primaryKey: "selector",
|
|
4
|
+
async execute(page, args) {
|
|
5
|
+
const sel = args.selector;
|
|
6
|
+
const handle = await page.$(sel);
|
|
7
|
+
if (!handle) throw new Error(`assert: selector not found: ${sel}`);
|
|
8
|
+
if (args.text_contains) {
|
|
9
|
+
const txt = (await handle.textContent()) ?? "";
|
|
10
|
+
if (!txt.includes(args.text_contains)) {
|
|
11
|
+
throw new Error(
|
|
12
|
+
`assert: text "${args.text_contains}" not in ${sel} (got "${txt.slice(0, 80)}")`,
|
|
13
|
+
);
|
|
14
|
+
}
|
|
15
|
+
}
|
|
16
|
+
if (args.url_contains && !page.url().includes(args.url_contains)) {
|
|
17
|
+
throw new Error(
|
|
18
|
+
`assert: url does not contain "${args.url_contains}" (got ${page.url()})`,
|
|
19
|
+
);
|
|
20
|
+
}
|
|
21
|
+
return `${sel}`;
|
|
22
|
+
},
|
|
23
|
+
};
|
package/verbs/click.js
ADDED
package/verbs/eval.js
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "eval",
|
|
3
|
+
primaryKey: "script",
|
|
4
|
+
async execute(page, args, ctx) {
|
|
5
|
+
// Wrap the script in an async IIFE so authors can write function-body
|
|
6
|
+
// style: top-level `return X` works, top-level `await X` works, and
|
|
7
|
+
// multi-statement scripts read like the `(async () => { ... })()`
|
|
8
|
+
// pattern people already write into runbooks. Trade-off: bare-expression
|
|
9
|
+
// scripts (`script: "1 + 1"`) no longer return their value — authors
|
|
10
|
+
// must say `return 1 + 1` explicitly. That migration is intentional —
|
|
11
|
+
// multi-line scripts are the common case and "must add `return`" is a
|
|
12
|
+
// clearer rule than "single expressions vs. statement bodies behave
|
|
13
|
+
// differently."
|
|
14
|
+
const wrapped = `(async () => { ${args.script} })()`;
|
|
15
|
+
const result = await page.evaluate(wrapped);
|
|
16
|
+
console.log(JSON.stringify(result, null, 2));
|
|
17
|
+
if (ctx) ctx.lastResult = result;
|
|
18
|
+
return `script ran`;
|
|
19
|
+
},
|
|
20
|
+
};
|
package/verbs/extract.js
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "extract",
|
|
3
|
+
primaryKey: "selector",
|
|
4
|
+
async execute(page, args, ctx) {
|
|
5
|
+
// Pull structured rows out of the page. Each `field` entry is either:
|
|
6
|
+
// string — CSS selector relative to row, take textContent
|
|
7
|
+
// { selector, attr } — CSS selector relative to row, take attribute
|
|
8
|
+
// { selector, text: true } — explicit textContent (default)
|
|
9
|
+
const rowSelector = args.selector;
|
|
10
|
+
const fields = args.fields ?? {};
|
|
11
|
+
const items = await page.$$eval(
|
|
12
|
+
rowSelector,
|
|
13
|
+
(rows, fieldSpec) =>
|
|
14
|
+
rows.map((row) => {
|
|
15
|
+
const out = {};
|
|
16
|
+
for (const [name, spec] of Object.entries(fieldSpec)) {
|
|
17
|
+
const sel = typeof spec === "string" ? spec : spec.selector;
|
|
18
|
+
const attr = typeof spec === "string" ? null : spec.attr ?? null;
|
|
19
|
+
const el = sel ? row.querySelector(sel) : row;
|
|
20
|
+
if (!el) {
|
|
21
|
+
out[name] = null;
|
|
22
|
+
continue;
|
|
23
|
+
}
|
|
24
|
+
out[name] = attr
|
|
25
|
+
? el.getAttribute(attr)
|
|
26
|
+
: (el.textContent || "").trim();
|
|
27
|
+
}
|
|
28
|
+
return out;
|
|
29
|
+
}),
|
|
30
|
+
fields,
|
|
31
|
+
);
|
|
32
|
+
// Emit as JSON to stdout so wb captures it in step.complete.stdout.
|
|
33
|
+
// Pretty-printed for readability when a runbook surfaces the output.
|
|
34
|
+
console.log(JSON.stringify(items, null, 2));
|
|
35
|
+
if (ctx) ctx.lastResult = items;
|
|
36
|
+
return `${rowSelector} → ${items.length} rows`;
|
|
37
|
+
},
|
|
38
|
+
};
|
package/verbs/fill.js
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { redact } from "../lib/util.js";
|
|
2
|
+
|
|
3
|
+
export default {
|
|
4
|
+
name: "fill",
|
|
5
|
+
primaryKey: "selector",
|
|
6
|
+
async execute(page, args) {
|
|
7
|
+
// Don't echo the value into the summary — could be a credential.
|
|
8
|
+
await page.fill(args.selector, String(args.value ?? ""), {
|
|
9
|
+
timeout: args.timeout ?? 10_000,
|
|
10
|
+
});
|
|
11
|
+
return `${args.selector} = «${redact(args.value)}»`;
|
|
12
|
+
},
|
|
13
|
+
};
|
package/verbs/goto.js
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "goto",
|
|
3
|
+
primaryKey: "url",
|
|
4
|
+
async execute(page, args) {
|
|
5
|
+
const url = args.url ?? "";
|
|
6
|
+
const waitUntil = args.wait_until ?? "domcontentloaded";
|
|
7
|
+
await page.goto(url, { waitUntil, timeout: args.timeout ?? 30_000 });
|
|
8
|
+
return `→ ${page.url()}`;
|
|
9
|
+
},
|
|
10
|
+
};
|
package/verbs/index.js
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
// Verb registry. Each verb module exports a default { name, primaryKey,
|
|
2
|
+
// execute(page, args, ctx) } object. The registry is the single source of
|
|
3
|
+
// truth for the SUPPORTS list (shipped in the ready frame), the default-key
|
|
4
|
+
// lookup used by the bare-string arg form, and the dispatch table consumed
|
|
5
|
+
// by runVerb.
|
|
6
|
+
//
|
|
7
|
+
// Adding a verb: drop a new file next to these, import it here, append to
|
|
8
|
+
// VERBS. SUPPORTS/DEFAULT_KEYS/VERB_REGISTRY all derive automatically — no
|
|
9
|
+
// third list to keep in sync.
|
|
10
|
+
|
|
11
|
+
import gotoVerb from "./goto.js";
|
|
12
|
+
import fillVerb from "./fill.js";
|
|
13
|
+
import clickVerb from "./click.js";
|
|
14
|
+
import pressVerb from "./press.js";
|
|
15
|
+
import waitForVerb from "./wait_for.js";
|
|
16
|
+
import screenshotVerb from "./screenshot.js";
|
|
17
|
+
import extractVerb from "./extract.js";
|
|
18
|
+
import assertVerb from "./assert.js";
|
|
19
|
+
import evalVerb from "./eval.js";
|
|
20
|
+
import saveVerb from "./save.js";
|
|
21
|
+
|
|
22
|
+
const VERBS = [
|
|
23
|
+
gotoVerb,
|
|
24
|
+
fillVerb,
|
|
25
|
+
clickVerb,
|
|
26
|
+
pressVerb,
|
|
27
|
+
waitForVerb,
|
|
28
|
+
screenshotVerb,
|
|
29
|
+
extractVerb,
|
|
30
|
+
assertVerb,
|
|
31
|
+
evalVerb,
|
|
32
|
+
saveVerb,
|
|
33
|
+
];
|
|
34
|
+
|
|
35
|
+
export const VERB_REGISTRY = Object.fromEntries(VERBS.map((v) => [v.name, v]));
|
|
36
|
+
export const SUPPORTS = VERBS.map((v) => v.name);
|
|
37
|
+
|
|
38
|
+
export function verbName(verb) {
|
|
39
|
+
if (!verb || typeof verb !== "object") return String(verb);
|
|
40
|
+
return Object.keys(verb)[0] || "verb";
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function defaultKey(name) {
|
|
44
|
+
return VERB_REGISTRY[name]?.primaryKey ?? "value";
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
// Most verbs accept either a bare string ("goto: https://...") or a
|
|
48
|
+
// structured object ("goto: { url: ..., wait_until: ... }"). This pulls the
|
|
49
|
+
// canonical field out of either shape.
|
|
50
|
+
export function arg(value, primaryKey) {
|
|
51
|
+
if (typeof value === "string") return { [primaryKey]: value };
|
|
52
|
+
if (value && typeof value === "object") return value;
|
|
53
|
+
return {};
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// Dispatch a single verb. `expand` is injected by the caller so the
|
|
57
|
+
// substitution/secrets machinery stays in the entry point (where env policy
|
|
58
|
+
// and the artifact cache live) instead of leaking into this module.
|
|
59
|
+
export async function runVerb(page, verb, index, ctx, expand) {
|
|
60
|
+
const name = verbName(verb);
|
|
61
|
+
const handler = VERB_REGISTRY[name];
|
|
62
|
+
if (!handler) throw new Error(`unsupported verb: ${name}`);
|
|
63
|
+
const raw = verb[name];
|
|
64
|
+
const args = expand(
|
|
65
|
+
arg(raw, handler.primaryKey),
|
|
66
|
+
ctx?.secrets,
|
|
67
|
+
ctx?.artifactCache,
|
|
68
|
+
);
|
|
69
|
+
return handler.execute(page, args, { ...ctx, index });
|
|
70
|
+
}
|
package/verbs/press.js
ADDED
package/verbs/save.js
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { promises as fsPromises } from "node:fs";
|
|
3
|
+
import { randomUUID } from "node:crypto";
|
|
4
|
+
import { send } from "../lib/io.js";
|
|
5
|
+
import { sanitizeArtifactName, autoArtifactName } from "../lib/util.js";
|
|
6
|
+
|
|
7
|
+
export default {
|
|
8
|
+
name: "save",
|
|
9
|
+
primaryKey: "name",
|
|
10
|
+
async execute(_page, args, ctx) {
|
|
11
|
+
// Persist a JSON artifact into $WB_ARTIFACTS_DIR so later cells can read
|
|
12
|
+
// it and wb can upload it. Captures the previous verb's output unless
|
|
13
|
+
// the author provides an explicit `value:`.
|
|
14
|
+
const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim();
|
|
15
|
+
if (!artifactsDir) {
|
|
16
|
+
throw new Error(
|
|
17
|
+
"save: $WB_ARTIFACTS_DIR is not set — run this workbook via `wb run` (wb exports the dir for you)",
|
|
18
|
+
);
|
|
19
|
+
}
|
|
20
|
+
const explicitValue = args.value !== undefined;
|
|
21
|
+
const payload = explicitValue ? args.value : ctx?.lastResult;
|
|
22
|
+
if (payload === undefined) {
|
|
23
|
+
throw new Error(
|
|
24
|
+
"save: no value provided and no prior extract/eval result to capture",
|
|
25
|
+
);
|
|
26
|
+
}
|
|
27
|
+
const name =
|
|
28
|
+
typeof args.name === "string" && args.name.trim().length > 0
|
|
29
|
+
? sanitizeArtifactName(args.name)
|
|
30
|
+
: autoArtifactName(ctx?.blockIndex ?? ctx?.index ?? 0);
|
|
31
|
+
const filename = name.endsWith(".json") ? name : `${name}.json`;
|
|
32
|
+
const full = path.join(artifactsDir, filename);
|
|
33
|
+
await fsPromises.mkdir(artifactsDir, { recursive: true });
|
|
34
|
+
// Atomic write: serialize to .tmp, then rename. Announce the artifact
|
|
35
|
+
// AFTER rename so a partial write can never be seen by wb's uploader.
|
|
36
|
+
const serialized = JSON.stringify(payload, null, 2);
|
|
37
|
+
const tmp = `${full}.${process.pid}.${randomUUID().slice(0, 8)}.tmp`;
|
|
38
|
+
try {
|
|
39
|
+
await fsPromises.writeFile(tmp, serialized, "utf8");
|
|
40
|
+
await fsPromises.rename(tmp, full);
|
|
41
|
+
} catch (e) {
|
|
42
|
+
try {
|
|
43
|
+
await fsPromises.unlink(tmp);
|
|
44
|
+
} catch {}
|
|
45
|
+
throw e;
|
|
46
|
+
}
|
|
47
|
+
send({
|
|
48
|
+
type: "slice.artifact_saved",
|
|
49
|
+
filename,
|
|
50
|
+
path: full,
|
|
51
|
+
bytes: Buffer.byteLength(serialized),
|
|
52
|
+
});
|
|
53
|
+
return `→ ${filename}`;
|
|
54
|
+
},
|
|
55
|
+
};
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import path from "node:path";
|
|
2
|
+
import { promises as fsPromises } from "node:fs";
|
|
3
|
+
import { randomUUID } from "node:crypto";
|
|
4
|
+
import { resolveInside } from "../lib/util.js";
|
|
5
|
+
|
|
6
|
+
export default {
|
|
7
|
+
name: "screenshot",
|
|
8
|
+
primaryKey: "path",
|
|
9
|
+
async execute(page, args) {
|
|
10
|
+
// Always resolve inside $WB_ARTIFACTS_DIR (or cwd when unset). Absolute
|
|
11
|
+
// paths and traversals are rejected — screenshots are controlled by
|
|
12
|
+
// runbook authors whose content we don't want to grant arbitrary-write.
|
|
13
|
+
const requested = args.path ?? `screenshot-${Date.now()}.png`;
|
|
14
|
+
const artifactsDir = (process.env.WB_ARTIFACTS_DIR || "").trim() || ".";
|
|
15
|
+
if (path.isAbsolute(requested)) {
|
|
16
|
+
throw new Error(
|
|
17
|
+
`screenshot: absolute paths are not allowed (got ${requested})`,
|
|
18
|
+
);
|
|
19
|
+
}
|
|
20
|
+
const full = resolveInside(artifactsDir, requested);
|
|
21
|
+
if (!full) {
|
|
22
|
+
throw new Error(
|
|
23
|
+
`screenshot: path escapes artifacts dir (got ${requested})`,
|
|
24
|
+
);
|
|
25
|
+
}
|
|
26
|
+
await fsPromises.mkdir(path.dirname(full), { recursive: true });
|
|
27
|
+
// Atomic write via tmp + rename so a crash mid-capture can't leave a
|
|
28
|
+
// truncated PNG that's already been announced via slice.artifact_saved
|
|
29
|
+
// and uploaded to R2. We capture to a Buffer (with `type` derived from
|
|
30
|
+
// the requested extension) and write it ourselves — passing a `.tmp`
|
|
31
|
+
// path directly to Playwright fails because it infers format from the
|
|
32
|
+
// file extension and rejects unknown ones.
|
|
33
|
+
const ext = path.extname(full).toLowerCase();
|
|
34
|
+
const type = ext === ".jpg" || ext === ".jpeg" ? "jpeg" : "png";
|
|
35
|
+
const tmp = `${full}.${process.pid}.${randomUUID().slice(0, 8)}.tmp`;
|
|
36
|
+
try {
|
|
37
|
+
const buf = await page.screenshot({ type, fullPage: !!args.full_page });
|
|
38
|
+
await fsPromises.writeFile(tmp, buf);
|
|
39
|
+
await fsPromises.rename(tmp, full);
|
|
40
|
+
} catch (e) {
|
|
41
|
+
try {
|
|
42
|
+
await fsPromises.unlink(tmp);
|
|
43
|
+
} catch {}
|
|
44
|
+
throw e;
|
|
45
|
+
}
|
|
46
|
+
return `→ ${requested}`;
|
|
47
|
+
},
|
|
48
|
+
};
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
export default {
|
|
2
|
+
name: "wait_for",
|
|
3
|
+
primaryKey: "selector",
|
|
4
|
+
async execute(page, args) {
|
|
5
|
+
const selector = args.selector;
|
|
6
|
+
const state = args.state ?? "visible";
|
|
7
|
+
await page.waitForSelector(selector, {
|
|
8
|
+
state,
|
|
9
|
+
timeout: args.timeout ?? 15_000,
|
|
10
|
+
});
|
|
11
|
+
return `${selector} (${state})`;
|
|
12
|
+
},
|
|
13
|
+
};
|