chromiumfish 0.1.4 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,7 +14,7 @@ import { ChromiumFish } from "chromiumfish";
14
14
 
15
15
  const browser = await ChromiumFish({ personaSeed: "alpha-7", headless: true });
16
16
  const page = await browser.newPage();
17
- await page.goto("https://abrahamjuliot.github.io/creepjs/");
17
+ await page.goto("https://example.com");
18
18
  await page.screenshot({ path: "fp.png" });
19
19
  await browser.close();
20
20
  ```
@@ -53,6 +53,37 @@ The DB auto-updates: it tracks the `latest` monthly build (cached, re-checked
53
53
  weekly), so you get fresh data without upgrading the SDK. Pin a fixed version
54
54
  with `CHROMIUMFISH_GEOIP_VERSION=2026.06` for reproducibility.
55
55
 
56
+ ## AI agent
57
+
58
+ ChromiumFish ships a native in-browser agent (perceive → think → act, driven by
59
+ an OpenAI-compatible LLM). `launchAgent` starts the browser with the agent layer
60
+ and connects over CDP; `runTask` drives it from a plain-language goal.
61
+
62
+ ```ts
63
+ import { withAgent } from "chromiumfish";
64
+
65
+ // LLM config: a nearby .env (OPENAI_API_*), or pass apiKey/apiBase/model here.
66
+ const result = await withAgent({ typing: "human" }, (agent) =>
67
+ agent.runTask("Search DuckDuckGo for 'chromiumfish' and give me the first result's URL."),
68
+ );
69
+ console.log(result.finalText);
70
+ ```
71
+
72
+ `withAgent` shuts the browser down for you; use `launchAgent` directly if you
73
+ want to manage the lifecycle (`const { agent, close } = await launchAgent()`).
74
+
75
+ | Option | Default | Description |
76
+ |--------|---------|-------------|
77
+ | `typing` | `"human"` | Typing speed: `"human"` (~75 WPM, natural), `"fast"`, `"instant"`, or a custom `[keyDown, keyUp, longMultiplier]` triple (numbers = ms). |
78
+ | `model` | env | Model for the session (overrides `OPENAI_API_MODEL`); `runTask({ model })` overrides per task. |
79
+ | `apiKey` / `apiBase` | env | LLM key / base URL (override `OPENAI_API_KEY` / `OPENAI_API_BASE`). |
80
+ | `chrome` | `CHROME_BIN` / cached build | Path to the ChromiumFish binary. |
81
+ | `port` | `9222` | DevTools remote-debugging port. |
82
+ | `extraArgs` | — | Extra Chromium flags. |
83
+
84
+ > Needs a WebSocket: the Node 22+ global `WebSocket` is used automatically; on
85
+ > Node <22 install the optional `ws` package (`npm install ws`).
86
+
56
87
  ## CLI
57
88
 
58
89
  ```bash
@@ -0,0 +1,94 @@
1
+ export declare const AGENT_SYSTEM_PROMPT: string;
2
+ export declare const TYPING_PROFILES: Record<string, [string, string, string]>;
3
+ type Cadence = [string | number, string | number, string | number];
4
+ export type TypingSpeed = keyof typeof TYPING_PROFILES | (string & {}) | Cadence;
5
+ /** Build the GlicActorIncrementalTyping switch for a typing-speed setting. */
6
+ export declare function typingFlag(typing?: TypingSpeed): string;
7
+ export interface AgentStep {
8
+ action?: string;
9
+ status?: string;
10
+ [k: string]: unknown;
11
+ }
12
+ /** Outcome of one agent task plus the resolved step plan. */
13
+ export declare class AgentResult {
14
+ success: boolean;
15
+ finalText: string;
16
+ steps: AgentStep[];
17
+ replayed: boolean;
18
+ constructor(success: boolean, finalText: string, steps: AgentStep[], replayed?: boolean);
19
+ /** Number of steps replayed deterministically (no LLM call). */
20
+ get fromCache(): number;
21
+ /** Number of steps the page had drifted on, re-resolved via the LLM. */
22
+ get healed(): number;
23
+ /** Number of steps resolved by the LLM on a fresh run. */
24
+ get recorded(): number;
25
+ summary(): string;
26
+ }
27
+ export interface RunTaskOptions {
28
+ /** Navigate here before the agent loop (the agent can also navigate itself). */
29
+ url?: string;
30
+ /** Max perceive→act iterations. Defaults to 25. */
31
+ maxSteps?: number;
32
+ /** Per-task model override. */
33
+ model?: string;
34
+ /** A resolved plan to REPLAY (descriptor match per step, LLM only to heal). */
35
+ plan?: AgentStep[];
36
+ /** System prompt override (honored by builds whose agent layer reads it). */
37
+ systemPrompt?: string | null;
38
+ }
39
+ /** Connects to a running ChromiumFish CDP endpoint and runs agent tasks. */
40
+ export declare class AgentClient {
41
+ port: number;
42
+ private host;
43
+ private timeoutMs;
44
+ constructor(port?: number, host?: string, timeoutMs?: number);
45
+ private httpGet;
46
+ /** Return {targetId, wsUrl}, reusing a real page or opening one. */
47
+ private pickPage;
48
+ runTask(goal: string, opts?: RunTaskOptions): Promise<AgentResult>;
49
+ }
50
+ export interface LaunchAgentOptions {
51
+ /** DevTools remote-debugging port. Defaults to 9222. */
52
+ port?: number;
53
+ /** Path to the ChromiumFish binary; defaults to CHROME_BIN env or the cached build. */
54
+ chrome?: string;
55
+ /** LLM API key (overrides OPENAI_API_KEY). */
56
+ apiKey?: string;
57
+ /** LLM base URL (overrides OPENAI_API_BASE). */
58
+ apiBase?: string;
59
+ /** Model for this session (overrides OPENAI_API_MODEL). */
60
+ model?: string;
61
+ /** Typing cadence: "human" (default), "fast", "instant", or a [keyDown, keyUp, multiplier] triple. */
62
+ typing?: TypingSpeed;
63
+ /** Load a nearby .env for OPENAI_* config. Defaults to true. */
64
+ loadDotenv?: boolean;
65
+ /** Extra Chromium flags. */
66
+ extraArgs?: string[];
67
+ /** How long to wait for the CDP endpoint to come up (ms). Defaults to 30000. */
68
+ timeoutMs?: number;
69
+ }
70
+ /** A launched agent session: the connected client plus a cleanup function. */
71
+ export interface AgentSession {
72
+ agent: AgentClient;
73
+ /** Shut the browser down and remove its temp profile. */
74
+ close: () => Promise<void>;
75
+ }
76
+ /**
77
+ * Launch a local ChromiumFish with the AI agent layer and connect to it.
78
+ *
79
+ * LLM config can be passed in-script (`apiKey` / `apiBase` / `model`) or left to
80
+ * OPENAI_API_KEY / OPENAI_API_BASE / OPENAI_API_MODEL (a nearby .env is loaded
81
+ * automatically); an explicit option wins over the env var. Prefer {@link withAgent}
82
+ * for automatic cleanup, or remember to call the returned `close()`.
83
+ */
84
+ export declare function launchAgent(opts?: LaunchAgentOptions): Promise<AgentSession>;
85
+ /**
86
+ * Run `fn` against a freshly launched agent, shutting the browser down and
87
+ * cleaning up afterwards — the ergonomic equivalent of Python's
88
+ * `with launch_agent() as agent:`.
89
+ *
90
+ * const url = await withAgent({ typing: "fast" }, (agent) =>
91
+ * agent.runTask("...").then((r) => r.finalText));
92
+ */
93
+ export declare function withAgent<T>(opts: LaunchAgentOptions, fn: (agent: AgentClient) => Promise<T>): Promise<T>;
94
+ export {};
package/dist/agent.js ADDED
@@ -0,0 +1,453 @@
1
+ /**
2
+ * Native in-browser AI agent client (TypeScript port of the Python SDK's
3
+ * ``chromiumfish.agent``).
4
+ *
5
+ * Drives the fork's native ``Browser.agentRunTask`` CDP command on a running
6
+ * ChromiumFish (launched with ``--remote-debugging-port`` and the ``--agent-*``
7
+ * switches). Talks raw CDP over a WebSocket — the same path the Python client
8
+ * uses — which avoids Playwright's ``connectOverCDP`` context setup that this
9
+ * fork's CDP surface doesn't fully support.
10
+ *
11
+ * import { launchAgent } from "chromiumfish";
12
+ *
13
+ * const { agent, close } = await launchAgent({ typing: "fast" });
14
+ * try {
15
+ * const r = await agent.runTask("search for 'automation' and open the first result",
16
+ * { url: "http://127.0.0.1:8000/search" });
17
+ * console.log(r.success, r.finalText);
18
+ * } finally {
19
+ * await close();
20
+ * }
21
+ *
22
+ * Needs a WebSocket implementation: the Node 22+ global ``WebSocket`` is used if
23
+ * present, otherwise the optional ``ws`` package (``npm install ws``) on Node <22.
24
+ */
25
+ import { spawn } from "node:child_process";
26
+ import { mkdtempSync, rmSync, existsSync, readFileSync } from "node:fs";
27
+ import { tmpdir } from "node:os";
28
+ import path from "node:path";
29
+ import { binaryPath } from "./fetch.js";
30
+ const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
31
+ // The agent's master/system prompt, sent per task as Browser.agentRunTask's
32
+ // `systemPrompt` param. NOTE: current shipped builds use the prompt baked into
33
+ // the binary (C++ `kSystemPrompt`) and ignore this param — so it is effectively
34
+ // a no-op against them; kept for parity with the Python SDK and to declare the
35
+ // intended contract (the final `done` answer is ONLY the value the task asked
36
+ // for — no prose, no labels — so callers get a clean result).
37
+ export const AGENT_SYSTEM_PROMPT = "You are an autonomous web-browsing agent operating directly inside the " +
38
+ "browser. Each turn you receive the interactive elements currently visible " +
39
+ "on the page, one per line:\n" +
40
+ " [<index>]<role>label\n" +
41
+ "Roles: a (link), button, input, textarea, select, checkbox. Each input " +
42
+ 'shows its STATE in brackets: [EMPTY; placeholder hint "..."] = empty, the ' +
43
+ 'hint is NOT a value, type into it; [value: "..."] = already contains that ' +
44
+ "text; [filled]/[empty] = a password field; [checked]/[unchecked] = a " +
45
+ 'checkbox; [selected: "..."]/[no selection] = a select (its choices follow ' +
46
+ "as opts:[value=text, ...] — use one exact value). Disabled controls are " +
47
+ "marked (disabled). You also get a one-line note saying whether the page " +
48
+ "changed since your last action.\n\n" +
49
+ "Respond with ONLY a JSON object (no prose, no markdown):\n" +
50
+ "{\n" +
51
+ ' "thought": "brief reasoning",\n' +
52
+ ' "actions": [ <one or more action steps, run in order, in one shot> ]\n' +
53
+ "}\n" +
54
+ 'Each step is an object: {"action": ' +
55
+ '"click|type|scroll|navigate|select|read|wait|done", "index": <element ' +
56
+ 'index>, "text": "to type", "enter": true (type: press Enter), ' +
57
+ '"url": "https://..." (navigate), "value": "exact option value" ' +
58
+ '(select), "direction": "down|up|left|right" (scroll), "seconds": 1 ' +
59
+ '(wait), "success": true and "final": "<answer>" (done)}.\n' +
60
+ "Put MULTIPLE steps in the array to do them in ONE shot (strongly preferred " +
61
+ "for speed): whenever you know the values, fill ALL fields AND click submit " +
62
+ "in a single response. Use a single step only when the next step truly " +
63
+ "depends on this one's result.\n\n" +
64
+ "Rules:\n" +
65
+ "- If a cookie/consent dialog or modal is open — its elements are marked " +
66
+ '(modal), or you see text like "Accept all" / "Reject all" / ' +
67
+ '"consent" / "Before you continue" — DISMISS it FIRST by clicking its ' +
68
+ "accept/agree/continue button, before attempting anything else. Elements " +
69
+ "behind a modal are still listed but are visually covered and cannot be " +
70
+ "used until it is gone.\n" +
71
+ "- Use only indices in the CURRENT list; they are renumbered every step.\n" +
72
+ '- Link items (role a) show their destination URL after " -> ". To report ' +
73
+ "or use a link's URL, READ it from the list — do not click/navigate to the " +
74
+ "link just to find its address.\n" +
75
+ "- The element list shows only INTERACTIVE controls, NOT the article/body " +
76
+ "text. To read or summarize page CONTENT (article, blog post, paragraphs), " +
77
+ 'issue {"action":"read"} by itself; the page\'s text appears in your NEXT ' +
78
+ "observation as PAGE TEXT — then answer from it.\n" +
79
+ "- To submit a search or form, set \"enter\": true when typing, or click the " +
80
+ "submit/Continue button. Typing alone does not submit.\n" +
81
+ "- For a select, use action \"select\" with an exact value from its opts.\n" +
82
+ "- AVOID LOOPS: do each step ONCE. Never re-issue the same action (or the " +
83
+ "same type+type+click batch) just because the page still looks unfinished. " +
84
+ "A submit/save that clears or reloads the form almost always SUCCEEDED — " +
85
+ "treat the reset/empty form as success, NOT a reason to submit again. " +
86
+ "Created items and counts are non-interactive and invisible here; confirm " +
87
+ 'with {"action":"read"} then finish. Repeat an action only if the task TEXT ' +
88
+ "explicitly asks for it.\n" +
89
+ "- RETURN ONLY WHAT WAS ASKED. When you are done, the \"final\" field must " +
90
+ "contain EXACTLY the value the task requested and NOTHING else — no " +
91
+ "sentences, no labels, no explanation, no quotes, no markdown. Only when the " +
92
+ "task asks for a description/summary should \"final\" be a sentence.\n" +
93
+ "- Finish with a {\"action\":\"done\",\"success\":true,\"final\":\"...\"} " +
94
+ "step by itself; use success false only if the goal is genuinely impossible.";
95
+ // Per-keystroke typing cadence for the agent's incremental typing, as
96
+ // [key-down, key-up, long-text-multiplier]. The Actor framework default is
97
+ // ~25ms/25ms/0.2 (~240 WPM — superhuman); "human" slows it to ~75 WPM so the
98
+ // typing looks natural. "fast"/"instant" trade realism for speed.
99
+ export const TYPING_PROFILES = {
100
+ human: ["45ms", "110ms", "0.7"], // ~75 WPM — natural, the default
101
+ fast: ["10ms", "18ms", "0.3"], // brisk, still per-keystroke
102
+ instant: ["0ms", "0ms", "0"], // no inter-key delay (fastest)
103
+ };
104
+ /** Build the GlicActorIncrementalTyping switch for a typing-speed setting. */
105
+ export function typingFlag(typing = "human") {
106
+ let kd, ku, mult;
107
+ if (typeof typing === "string") {
108
+ const prof = TYPING_PROFILES[typing];
109
+ if (!prof) {
110
+ throw new Error(`unknown typing speed '${typing}'; use one of ` +
111
+ `${Object.keys(TYPING_PROFILES).join(", ")} or a [keyDown, keyUp, multiplier] triple`);
112
+ }
113
+ [kd, ku, mult] = prof;
114
+ }
115
+ else {
116
+ [kd, ku, mult] = typing;
117
+ }
118
+ const ms = (v) => (typeof v === "string" ? v : `${v}ms`);
119
+ return ("--enable-features=GlicActorIncrementalTyping:" +
120
+ `glic-actor-incremental-typing-key-down-duration/${ms(kd)}/` +
121
+ `glic-actor-incremental-typing-key-up-duration/${ms(ku)}/` +
122
+ `glic-actor-incremental-typing-long-multiplier/${mult}`);
123
+ }
124
+ /** Outcome of one agent task plus the resolved step plan. */
125
+ export class AgentResult {
126
+ success;
127
+ finalText;
128
+ steps;
129
+ replayed;
130
+ constructor(success, finalText, steps, replayed = false) {
131
+ this.success = success;
132
+ this.finalText = finalText;
133
+ this.steps = steps;
134
+ this.replayed = replayed;
135
+ }
136
+ /** Number of steps replayed deterministically (no LLM call). */
137
+ get fromCache() {
138
+ return this.steps.filter((s) => s.status === "replayed").length;
139
+ }
140
+ /** Number of steps the page had drifted on, re-resolved via the LLM. */
141
+ get healed() {
142
+ return this.steps.filter((s) => s.status === "healed").length;
143
+ }
144
+ /** Number of steps resolved by the LLM on a fresh run. */
145
+ get recorded() {
146
+ return this.steps.filter((s) => s.status === "recorded").length;
147
+ }
148
+ summary() {
149
+ return (`${this.success ? "ok" : "fail"} | ${this.steps.length} steps ` +
150
+ `(${this.fromCache} replayed, ${this.healed} healed, ${this.recorded} recorded)`);
151
+ }
152
+ }
153
+ async function getWebSocketCtor() {
154
+ const g = globalThis.WebSocket;
155
+ if (g)
156
+ return g;
157
+ try {
158
+ const spec = "ws"; // variable specifier: keeps tsc from requiring `ws` at build
159
+ const mod = await import(spec);
160
+ return mod.default ?? mod.WebSocket ?? mod;
161
+ }
162
+ catch {
163
+ throw new Error("the AI agent client needs a WebSocket implementation; on Node <22 install " +
164
+ "it with `npm install ws`");
165
+ }
166
+ }
167
+ /** Minimal CDP-over-WebSocket client. */
168
+ class CDP {
169
+ ws;
170
+ id = 0;
171
+ pending = new Map();
172
+ waiters = [];
173
+ constructor(ws) {
174
+ this.ws = ws;
175
+ ws.addEventListener("message", (ev) => {
176
+ const text = typeof ev.data === "string" ? ev.data : ev.data?.toString?.() ?? "";
177
+ let msg;
178
+ try {
179
+ msg = JSON.parse(text);
180
+ }
181
+ catch {
182
+ return;
183
+ }
184
+ if (msg.id != null && this.pending.has(msg.id)) {
185
+ const p = this.pending.get(msg.id);
186
+ this.pending.delete(msg.id);
187
+ if (msg.error)
188
+ p.reject(new Error(msg.error.message ?? JSON.stringify(msg.error)));
189
+ else
190
+ p.resolve(msg.result ?? {});
191
+ }
192
+ else if (msg.method) {
193
+ for (let i = this.waiters.length - 1; i >= 0; i--) {
194
+ if (this.waiters[i].method === msg.method) {
195
+ const w = this.waiters.splice(i, 1)[0];
196
+ clearTimeout(w.timer);
197
+ w.resolve();
198
+ }
199
+ }
200
+ }
201
+ });
202
+ }
203
+ static async connect(url) {
204
+ const WS = await getWebSocketCtor();
205
+ const ws = new WS(url);
206
+ await new Promise((resolve, reject) => {
207
+ ws.addEventListener("open", () => resolve(), { once: true });
208
+ ws.addEventListener("error", () => reject(new Error("CDP WebSocket error")), { once: true });
209
+ });
210
+ return new CDP(ws);
211
+ }
212
+ send(method, params = {}, timeoutMs) {
213
+ const id = ++this.id;
214
+ return new Promise((resolve, reject) => {
215
+ let timer;
216
+ if (timeoutMs) {
217
+ timer = setTimeout(() => {
218
+ this.pending.delete(id);
219
+ reject(new Error(`${method}: timed out after ${timeoutMs}ms`));
220
+ }, timeoutMs);
221
+ }
222
+ this.pending.set(id, {
223
+ resolve: (v) => {
224
+ if (timer)
225
+ clearTimeout(timer);
226
+ resolve(v);
227
+ },
228
+ reject,
229
+ });
230
+ this.ws.send(JSON.stringify({ id, method, params }));
231
+ });
232
+ }
233
+ waitEvent(method, timeoutMs) {
234
+ return new Promise((resolve) => {
235
+ const timer = setTimeout(() => {
236
+ const i = this.waiters.findIndex((w) => w.timer === timer);
237
+ if (i >= 0)
238
+ this.waiters.splice(i, 1);
239
+ resolve();
240
+ }, timeoutMs);
241
+ this.waiters.push({ method, resolve, timer });
242
+ });
243
+ }
244
+ close() {
245
+ try {
246
+ this.ws.close();
247
+ }
248
+ catch {
249
+ /* ignore */
250
+ }
251
+ }
252
+ }
253
+ /** Connects to a running ChromiumFish CDP endpoint and runs agent tasks. */
254
+ export class AgentClient {
255
+ port;
256
+ host;
257
+ timeoutMs;
258
+ constructor(port = 9222, host = "localhost", timeoutMs = 420_000) {
259
+ this.port = port;
260
+ this.host = host;
261
+ this.timeoutMs = timeoutMs;
262
+ }
263
+ async httpGet(p) {
264
+ const res = await fetch(`http://${this.host}:${this.port}${p}`);
265
+ if (!res.ok)
266
+ throw new Error(`GET ${p} -> ${res.status}`);
267
+ return res.json();
268
+ }
269
+ /** Return {targetId, wsUrl}, reusing a real page or opening one. */
270
+ async pickPage() {
271
+ const targets = await this.httpGet("/json");
272
+ const pages = targets.filter((t) => t.type === "page" && !String(t.url ?? "").startsWith("chrome://") && t.webSocketDebuggerUrl);
273
+ if (pages.length)
274
+ return { targetId: pages[0].id, wsUrl: pages[0].webSocketDebuggerUrl };
275
+ // No usable page: create one via the browser endpoint. (GET /json/new 405s on
276
+ // recent builds, so go through Target.createTarget instead.)
277
+ const ver = await this.httpGet("/json/version");
278
+ const browser = await CDP.connect(ver.webSocketDebuggerUrl);
279
+ try {
280
+ const { targetId } = await browser.send("Target.createTarget", { url: "about:blank" });
281
+ const again = await this.httpGet("/json");
282
+ const pg = again.find((t) => t.id === targetId);
283
+ if (!pg?.webSocketDebuggerUrl)
284
+ throw new Error("could not open a page target");
285
+ return { targetId, wsUrl: pg.webSocketDebuggerUrl };
286
+ }
287
+ finally {
288
+ browser.close();
289
+ }
290
+ }
291
+ async runTask(goal, opts = {}) {
292
+ const { url, maxSteps = 25, model = "", plan, systemPrompt = AGENT_SYSTEM_PROMPT } = opts;
293
+ const { targetId, wsUrl } = await this.pickPage();
294
+ const cdp = await CDP.connect(wsUrl);
295
+ try {
296
+ await cdp.send("Page.enable");
297
+ if (url && url !== "about:blank") {
298
+ await cdp.send("Page.navigate", { url });
299
+ await cdp.waitEvent("Page.loadEventFired", 20_000);
300
+ await sleep(500);
301
+ }
302
+ const params = { targetId, goal, maxSteps };
303
+ if (model)
304
+ params.model = model;
305
+ if (plan)
306
+ params.planJson = JSON.stringify(plan);
307
+ if (systemPrompt)
308
+ params.systemPrompt = systemPrompt;
309
+ const res = (await cdp.send("Browser.agentRunTask", params, this.timeoutMs)) ?? {};
310
+ let steps = [];
311
+ try {
312
+ const parsed = JSON.parse(res.stepsJson ?? "[]");
313
+ if (Array.isArray(parsed))
314
+ steps = parsed;
315
+ }
316
+ catch {
317
+ /* leave steps empty */
318
+ }
319
+ return new AgentResult(Boolean(res.success), res.finalText ?? "", steps, Boolean(plan));
320
+ }
321
+ finally {
322
+ cdp.close();
323
+ }
324
+ }
325
+ }
326
+ /** Load KEY=VALUE lines from the nearest .env (cwd or a parent) without override. */
327
+ function loadDotenv() {
328
+ let dir = process.cwd();
329
+ for (;;) {
330
+ const envFile = path.join(dir, ".env");
331
+ if (existsSync(envFile)) {
332
+ for (const raw of readFileSync(envFile, "utf8").split(/\r?\n/)) {
333
+ const line = raw.trim();
334
+ if (!line || line.startsWith("#") || !line.includes("="))
335
+ continue;
336
+ const idx = line.indexOf("=");
337
+ const key = line.slice(0, idx).trim();
338
+ const val = line.slice(idx + 1).trim();
339
+ if (!(key in process.env))
340
+ process.env[key] = val;
341
+ }
342
+ return;
343
+ }
344
+ const parent = path.dirname(dir);
345
+ if (parent === dir)
346
+ return;
347
+ dir = parent;
348
+ }
349
+ }
350
+ /**
351
+ * Launch a local ChromiumFish with the AI agent layer and connect to it.
352
+ *
353
+ * LLM config can be passed in-script (`apiKey` / `apiBase` / `model`) or left to
354
+ * OPENAI_API_KEY / OPENAI_API_BASE / OPENAI_API_MODEL (a nearby .env is loaded
355
+ * automatically); an explicit option wins over the env var. Prefer {@link withAgent}
356
+ * for automatic cleanup, or remember to call the returned `close()`.
357
+ */
358
+ export async function launchAgent(opts = {}) {
359
+ const { port = 9222, apiKey = "", apiBase = "", model = "", typing = "human", loadDotenv: doDotenv = true, extraArgs = [], timeoutMs = 30_000 } = opts;
360
+ if (doDotenv)
361
+ loadDotenv();
362
+ let chrome = opts.chrome ?? process.env.CHROME_BIN;
363
+ if (!chrome)
364
+ chrome = await binaryPath();
365
+ const profile = mkdtempSync(path.join(tmpdir(), "cf-agent-"));
366
+ const args = [
367
+ `--remote-debugging-port=${port}`,
368
+ "--remote-allow-origins=*",
369
+ `--user-data-dir=${profile}`,
370
+ "--disable-actor-safety-checks", // let the agent act unattended
371
+ // Typing cadence (see TYPING_PROFILES). Default "human" ~75 WPM so the
372
+ // agent's keystrokes look natural; "fast"/"instant" go quicker.
373
+ typingFlag(typing),
374
+ "--no-first-run",
375
+ "--no-default-browser-check",
376
+ `--agent-llm-url=${apiBase || (process.env.OPENAI_API_BASE ?? "")}`,
377
+ `--agent-llm-key=${apiKey || (process.env.OPENAI_API_KEY ?? "")}`,
378
+ `--agent-model=${model || (process.env.OPENAI_API_MODEL ?? "")}`,
379
+ ...extraArgs,
380
+ ];
381
+ const proc = spawn(chrome, args, { stdio: "ignore" });
382
+ const cleanup = () => rmSync(profile, { recursive: true, force: true });
383
+ const deadline = Date.now() + timeoutMs;
384
+ for (;;) {
385
+ try {
386
+ const r = await fetch(`http://localhost:${port}/json/version`);
387
+ if (r.ok)
388
+ break;
389
+ }
390
+ catch {
391
+ /* not up yet */
392
+ }
393
+ if (Date.now() > deadline) {
394
+ try {
395
+ proc.kill("SIGKILL");
396
+ }
397
+ catch {
398
+ /* ignore */
399
+ }
400
+ cleanup();
401
+ throw new Error("ChromiumFish did not expose its CDP endpoint in time");
402
+ }
403
+ await sleep(500);
404
+ }
405
+ // Open an initial page target so runTask can find one immediately.
406
+ try {
407
+ const ver = await (await fetch(`http://localhost:${port}/json/version`)).json();
408
+ const browser = await CDP.connect(ver.webSocketDebuggerUrl);
409
+ try {
410
+ await browser.send("Target.createTarget", { url: "about:blank" });
411
+ }
412
+ finally {
413
+ browser.close();
414
+ }
415
+ }
416
+ catch {
417
+ /* runTask.pickPage will retry if needed */
418
+ }
419
+ const close = async () => {
420
+ try {
421
+ proc.kill("SIGTERM");
422
+ }
423
+ catch {
424
+ /* ignore */
425
+ }
426
+ await sleep(300);
427
+ try {
428
+ proc.kill("SIGKILL");
429
+ }
430
+ catch {
431
+ /* ignore */
432
+ }
433
+ cleanup();
434
+ };
435
+ return { agent: new AgentClient(port), close };
436
+ }
437
+ /**
438
+ * Run `fn` against a freshly launched agent, shutting the browser down and
439
+ * cleaning up afterwards — the ergonomic equivalent of Python's
440
+ * `with launch_agent() as agent:`.
441
+ *
442
+ * const url = await withAgent({ typing: "fast" }, (agent) =>
443
+ * agent.runTask("...").then((r) => r.finalText));
444
+ */
445
+ export async function withAgent(opts, fn) {
446
+ const { agent, close } = await launchAgent(opts);
447
+ try {
448
+ return await fn(agent);
449
+ }
450
+ finally {
451
+ await close();
452
+ }
453
+ }
package/dist/index.d.ts CHANGED
@@ -1,5 +1,7 @@
1
1
  export { ChromiumFish, buildArgs, BASE_ARGS } from "./launcher.js";
2
2
  export type { ChromiumFishOptions } from "./launcher.js";
3
+ export { launchAgent, withAgent, AgentClient, AgentResult, typingFlag, TYPING_PROFILES, AGENT_SYSTEM_PROMPT, } from "./agent.js";
4
+ export type { LaunchAgentOptions, AgentSession, RunTaskOptions, AgentStep, TypingSpeed, } from "./agent.js";
3
5
  export { fetchBrowser, binaryPath, installDir, cacheRoot, platformSlug, findBinary } from "./fetch.js";
4
6
  export { Ip2TzDB, fetchDb, lookupTimezone, resolveTimezone, resolveVersion as resolveGeoipVersion, egressIp, assetName as ip2tzAssetName, dbPath as ip2tzDbPath, } from "./ip2tz.js";
5
7
  export { SDK_VERSION, DEFAULT_BROWSER_VERSION, DEFAULT_GEOIP_VERSION, GEOIP_FALLBACK_VERSION, RELEASE_REPO, browserVersion, releaseBaseUrl, geoipVersion, geoipBaseUrl, } from "./version.js";
package/dist/index.js CHANGED
@@ -1,4 +1,5 @@
1
1
  export { ChromiumFish, buildArgs, BASE_ARGS } from "./launcher.js";
2
+ export { launchAgent, withAgent, AgentClient, AgentResult, typingFlag, TYPING_PROFILES, AGENT_SYSTEM_PROMPT, } from "./agent.js";
2
3
  export { fetchBrowser, binaryPath, installDir, cacheRoot, platformSlug, findBinary } from "./fetch.js";
3
4
  export { Ip2TzDB, fetchDb, lookupTimezone, resolveTimezone, resolveVersion as resolveGeoipVersion, egressIp, assetName as ip2tzAssetName, dbPath as ip2tzDbPath, } from "./ip2tz.js";
4
5
  export { SDK_VERSION, DEFAULT_BROWSER_VERSION, DEFAULT_GEOIP_VERSION, GEOIP_FALLBACK_VERSION, RELEASE_REPO, browserVersion, releaseBaseUrl, geoipVersion, geoipBaseUrl, } from "./version.js";
package/dist/version.d.ts CHANGED
@@ -6,9 +6,9 @@
6
6
  * SDK downloads by default; override it with `CHROMIUMFISH_VERSION`.
7
7
  */
8
8
  /** SDK package version (kept in sync with package.json). */
9
- export declare const SDK_VERSION = "0.1.4";
9
+ export declare const SDK_VERSION = "0.2.1";
10
10
  /** Default ChromiumFish browser build to fetch. Matches src/chrome/VERSION. */
11
- export declare const DEFAULT_BROWSER_VERSION = "150.0.7844";
11
+ export declare const DEFAULT_BROWSER_VERSION = "149.0.7827.115";
12
12
  /** Public repo hosting the release assets. */
13
13
  export declare const RELEASE_REPO = "arman-bd/chromiumfish";
14
14
  /**
@@ -30,7 +30,7 @@ export declare const GEOIP_FALLBACK_VERSION = "2026.06";
30
30
  * Reject version strings that aren't a plain build tag. Versions are
31
31
  * interpolated into filesystem cache paths and release URLs, so a crafted
32
32
  * value like `../../../etc` would escape the cache dir (path traversal).
33
- * Real tags are digits, dots, and hyphens (e.g. "150.0.7844", "2026.06",
33
+ * Real tags are digits, dots, and hyphens (e.g. "149.0.7827.115", "2026.06",
34
34
  * "latest").
35
35
  */
36
36
  export declare function assertSafeVersion(version: string): string;
package/dist/version.js CHANGED
@@ -6,9 +6,9 @@
6
6
  * SDK downloads by default; override it with `CHROMIUMFISH_VERSION`.
7
7
  */
8
8
  /** SDK package version (kept in sync with package.json). */
9
- export const SDK_VERSION = "0.1.4";
9
+ export const SDK_VERSION = "0.2.1";
10
10
  /** Default ChromiumFish browser build to fetch. Matches src/chrome/VERSION. */
11
- export const DEFAULT_BROWSER_VERSION = "150.0.7844";
11
+ export const DEFAULT_BROWSER_VERSION = "149.0.7827.115";
12
12
  /** Public repo hosting the release assets. */
13
13
  export const RELEASE_REPO = "arman-bd/chromiumfish";
14
14
  /**
@@ -30,7 +30,7 @@ export const GEOIP_FALLBACK_VERSION = "2026.06";
30
30
  * Reject version strings that aren't a plain build tag. Versions are
31
31
  * interpolated into filesystem cache paths and release URLs, so a crafted
32
32
  * value like `../../../etc` would escape the cache dir (path traversal).
33
- * Real tags are digits, dots, and hyphens (e.g. "150.0.7844", "2026.06",
33
+ * Real tags are digits, dots, and hyphens (e.g. "149.0.7827.115", "2026.06",
34
34
  * "latest").
35
35
  */
36
36
  export function assertSafeVersion(version) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "chromiumfish",
3
- "version": "0.1.4",
3
+ "version": "0.2.1",
4
4
  "description": "Stealth Chromium build with a drop-in Playwright harness — fetches and launches the ChromiumFish browser.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
@@ -45,7 +45,13 @@
45
45
  "node": ">=18"
46
46
  },
47
47
  "peerDependencies": {
48
- "playwright-core": ">=1.40"
48
+ "playwright-core": ">=1.40",
49
+ "ws": ">=8"
50
+ },
51
+ "peerDependenciesMeta": {
52
+ "ws": {
53
+ "optional": true
54
+ }
49
55
  },
50
56
  "devDependencies": {
51
57
  "@types/node": "^20.0.0",