chromiumfish 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -3
- package/dist/agent.d.ts +89 -0
- package/dist/agent.js +452 -0
- package/dist/fetch.js +56 -21
- package/dist/index.d.ts +2 -0
- package/dist/index.js +1 -0
- package/dist/ip2tz.js +137 -30
- package/dist/launcher.d.ts +3 -3
- package/dist/launcher.js +7 -3
- package/dist/version.d.ts +9 -1
- package/dist/version.js +16 -3
- package/package.json +8 -2
package/README.md
CHANGED
|
@@ -12,9 +12,9 @@ npx chromiumfish fetch # download + cache the browser build
|
|
|
12
12
|
```javascript
|
|
13
13
|
import { ChromiumFish } from "chromiumfish";
|
|
14
14
|
|
|
15
|
-
const browser = await ChromiumFish({ personaSeed:
|
|
15
|
+
const browser = await ChromiumFish({ personaSeed: "alpha-7", headless: true });
|
|
16
16
|
const page = await browser.newPage();
|
|
17
|
-
await page.goto("https://
|
|
17
|
+
await page.goto("https://example.com");
|
|
18
18
|
await page.screenshot({ path: "fp.png" });
|
|
19
19
|
await browser.close();
|
|
20
20
|
```
|
|
@@ -26,7 +26,7 @@ await browser.close();
|
|
|
26
26
|
|
|
27
27
|
| Option | Default | Description |
|
|
28
28
|
|--------|---------|-------------|
|
|
29
|
-
| `personaSeed` | — |
|
|
29
|
+
| `personaSeed` | — | String id for a stable, internally-consistent fingerprint persona (any string; a number works too). |
|
|
30
30
|
| `headless` | `true` | Run headless (SwiftShader). |
|
|
31
31
|
| `proxy` | — | Playwright proxy object, e.g. `{ server, username, password }`. |
|
|
32
32
|
| `windowSize` | `[1920, 1080]` | Window dimensions (`null` to omit). |
|
|
@@ -53,6 +53,38 @@ The DB auto-updates: it tracks the `latest` monthly build (cached, re-checked
|
|
|
53
53
|
weekly), so you get fresh data without upgrading the SDK. Pin a fixed version
|
|
54
54
|
with `CHROMIUMFISH_GEOIP_VERSION=2026.06` for reproducibility.
|
|
55
55
|
|
|
56
|
+
## AI agent
|
|
57
|
+
|
|
58
|
+
ChromiumFish ships a native in-browser agent (perceive → think → act, driven by
|
|
59
|
+
an OpenAI-compatible LLM). `launchAgent` starts the browser with the agent layer
|
|
60
|
+
and connects over CDP; `runTask` drives it from a plain-language goal.
|
|
61
|
+
|
|
62
|
+
```ts
|
|
63
|
+
import { withAgent } from "chromiumfish";
|
|
64
|
+
|
|
65
|
+
// LLM config from a nearby .env: OPENAI_API_BASE / OPENAI_API_KEY / OPENAI_API_MODEL
|
|
66
|
+
const url = await withAgent({ typing: "human" }, (agent) =>
|
|
67
|
+
agent
|
|
68
|
+
.runTask("Search DuckDuckGo for 'chromiumfish' and give me the first result's URL.")
|
|
69
|
+
.then((r) => r.finalText),
|
|
70
|
+
);
|
|
71
|
+
console.log(url);
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
`withAgent` shuts the browser down for you; use `launchAgent` directly if you
|
|
75
|
+
want to manage the lifecycle (`const { agent, close } = await launchAgent()`).
|
|
76
|
+
|
|
77
|
+
| Option | Default | Description |
|
|
78
|
+
|--------|---------|-------------|
|
|
79
|
+
| `typing` | `"human"` | Typing speed: `"human"` (~75 WPM, natural), `"fast"`, `"instant"`, or a custom `[keyDown, keyUp, longMultiplier]` triple (numbers = ms). |
|
|
80
|
+
| `model` | env | Model for the session (overrides `OPENAI_API_MODEL`); `runTask({ model })` overrides per task. |
|
|
81
|
+
| `chrome` | `CHROME_BIN` / cached build | Path to the ChromiumFish binary. |
|
|
82
|
+
| `port` | `9222` | DevTools remote-debugging port. |
|
|
83
|
+
| `extraArgs` | — | Extra Chromium flags. |
|
|
84
|
+
|
|
85
|
+
> Needs a WebSocket: the Node 22+ global `WebSocket` is used automatically; on
|
|
86
|
+
> Node <22 install the optional `ws` package (`npm install ws`).
|
|
87
|
+
|
|
56
88
|
## CLI
|
|
57
89
|
|
|
58
90
|
```bash
|
package/dist/agent.d.ts
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
export declare const AGENT_SYSTEM_PROMPT: string;
|
|
2
|
+
export declare const TYPING_PROFILES: Record<string, [string, string, string]>;
|
|
3
|
+
type Cadence = [string | number, string | number, string | number];
|
|
4
|
+
export type TypingSpeed = keyof typeof TYPING_PROFILES | (string & {}) | Cadence;
|
|
5
|
+
/** Build the GlicActorIncrementalTyping switch for a typing-speed setting. */
|
|
6
|
+
export declare function typingFlag(typing?: TypingSpeed): string;
|
|
7
|
+
export interface AgentStep {
|
|
8
|
+
action?: string;
|
|
9
|
+
status?: string;
|
|
10
|
+
[k: string]: unknown;
|
|
11
|
+
}
|
|
12
|
+
/** Outcome of one agent task plus the resolved step plan. */
|
|
13
|
+
export declare class AgentResult {
|
|
14
|
+
success: boolean;
|
|
15
|
+
finalText: string;
|
|
16
|
+
steps: AgentStep[];
|
|
17
|
+
replayed: boolean;
|
|
18
|
+
constructor(success: boolean, finalText: string, steps: AgentStep[], replayed?: boolean);
|
|
19
|
+
/** Number of steps replayed deterministically (no LLM call). */
|
|
20
|
+
get fromCache(): number;
|
|
21
|
+
/** Number of steps the page had drifted on, re-resolved via the LLM. */
|
|
22
|
+
get healed(): number;
|
|
23
|
+
/** Number of steps resolved by the LLM on a fresh run. */
|
|
24
|
+
get recorded(): number;
|
|
25
|
+
summary(): string;
|
|
26
|
+
}
|
|
27
|
+
export interface RunTaskOptions {
|
|
28
|
+
/** Navigate here before the agent loop (the agent can also navigate itself). */
|
|
29
|
+
url?: string;
|
|
30
|
+
/** Max perceive→act iterations. Defaults to 25. */
|
|
31
|
+
maxSteps?: number;
|
|
32
|
+
/** Per-task model override. */
|
|
33
|
+
model?: string;
|
|
34
|
+
/** A resolved plan to REPLAY (descriptor match per step, LLM only to heal). */
|
|
35
|
+
plan?: AgentStep[];
|
|
36
|
+
/** System prompt override (honored by builds whose agent layer reads it). */
|
|
37
|
+
systemPrompt?: string | null;
|
|
38
|
+
}
|
|
39
|
+
/** Connects to a running ChromiumFish CDP endpoint and runs agent tasks. */
|
|
40
|
+
export declare class AgentClient {
|
|
41
|
+
port: number;
|
|
42
|
+
private host;
|
|
43
|
+
private timeoutMs;
|
|
44
|
+
constructor(port?: number, host?: string, timeoutMs?: number);
|
|
45
|
+
private httpGet;
|
|
46
|
+
/** Return {targetId, wsUrl}, reusing a real page or opening one. */
|
|
47
|
+
private pickPage;
|
|
48
|
+
runTask(goal: string, opts?: RunTaskOptions): Promise<AgentResult>;
|
|
49
|
+
}
|
|
50
|
+
export interface LaunchAgentOptions {
|
|
51
|
+
/** DevTools remote-debugging port. Defaults to 9222. */
|
|
52
|
+
port?: number;
|
|
53
|
+
/** Path to the ChromiumFish binary; defaults to CHROME_BIN env or the cached build. */
|
|
54
|
+
chrome?: string;
|
|
55
|
+
/** Model for this session (overrides OPENAI_API_MODEL). */
|
|
56
|
+
model?: string;
|
|
57
|
+
/** Typing cadence: "human" (default), "fast", "instant", or a [keyDown, keyUp, multiplier] triple. */
|
|
58
|
+
typing?: TypingSpeed;
|
|
59
|
+
/** Load a nearby .env for OPENAI_* config. Defaults to true. */
|
|
60
|
+
loadDotenv?: boolean;
|
|
61
|
+
/** Extra Chromium flags. */
|
|
62
|
+
extraArgs?: string[];
|
|
63
|
+
/** How long to wait for the CDP endpoint to come up (ms). Defaults to 30000. */
|
|
64
|
+
timeoutMs?: number;
|
|
65
|
+
}
|
|
66
|
+
/** A launched agent session: the connected client plus a cleanup function. */
|
|
67
|
+
export interface AgentSession {
|
|
68
|
+
agent: AgentClient;
|
|
69
|
+
/** Shut the browser down and remove its temp profile. */
|
|
70
|
+
close: () => Promise<void>;
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Launch a local ChromiumFish with the AI agent layer and connect to it.
|
|
74
|
+
*
|
|
75
|
+
* LLM config is read from OPENAI_API_BASE / OPENAI_API_KEY / OPENAI_API_MODEL
|
|
76
|
+
* (a nearby .env is loaded automatically). Prefer {@link withAgent} for
|
|
77
|
+
* automatic cleanup, or remember to call the returned `close()`.
|
|
78
|
+
*/
|
|
79
|
+
export declare function launchAgent(opts?: LaunchAgentOptions): Promise<AgentSession>;
|
|
80
|
+
/**
|
|
81
|
+
* Run `fn` against a freshly launched agent, shutting the browser down and
|
|
82
|
+
* cleaning up afterwards — the ergonomic equivalent of Python's
|
|
83
|
+
* `with launch_agent() as agent:`.
|
|
84
|
+
*
|
|
85
|
+
* const url = await withAgent({ typing: "fast" }, (agent) =>
|
|
86
|
+
* agent.runTask("...").then((r) => r.finalText));
|
|
87
|
+
*/
|
|
88
|
+
export declare function withAgent<T>(opts: LaunchAgentOptions, fn: (agent: AgentClient) => Promise<T>): Promise<T>;
|
|
89
|
+
export {};
|
package/dist/agent.js
ADDED
|
@@ -0,0 +1,452 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Native in-browser AI agent client (TypeScript port of the Python SDK's
|
|
3
|
+
* ``chromiumfish.agent``).
|
|
4
|
+
*
|
|
5
|
+
* Drives the fork's native ``Browser.agentRunTask`` CDP command on a running
|
|
6
|
+
* ChromiumFish (launched with ``--remote-debugging-port`` and the ``--agent-*``
|
|
7
|
+
* switches). Talks raw CDP over a WebSocket — the same path the Python client
|
|
8
|
+
* uses — which avoids Playwright's ``connectOverCDP`` context setup that this
|
|
9
|
+
* fork's CDP surface doesn't fully support.
|
|
10
|
+
*
|
|
11
|
+
* import { launchAgent } from "chromiumfish";
|
|
12
|
+
*
|
|
13
|
+
* const { agent, close } = await launchAgent({ typing: "fast" });
|
|
14
|
+
* try {
|
|
15
|
+
* const r = await agent.runTask("search for 'automation' and open the first result",
|
|
16
|
+
* { url: "http://127.0.0.1:8000/search" });
|
|
17
|
+
* console.log(r.success, r.finalText);
|
|
18
|
+
* } finally {
|
|
19
|
+
* await close();
|
|
20
|
+
* }
|
|
21
|
+
*
|
|
22
|
+
* Needs a WebSocket implementation: the Node 22+ global ``WebSocket`` is used if
|
|
23
|
+
* present, otherwise the optional ``ws`` package (``npm install ws``) on Node <22.
|
|
24
|
+
*/
|
|
25
|
+
import { spawn } from "node:child_process";
|
|
26
|
+
import { mkdtempSync, rmSync, existsSync, readFileSync } from "node:fs";
|
|
27
|
+
import { tmpdir } from "node:os";
|
|
28
|
+
import path from "node:path";
|
|
29
|
+
import { binaryPath } from "./fetch.js";
|
|
30
|
+
const sleep = (ms) => new Promise((r) => setTimeout(r, ms));
|
|
31
|
+
// The agent's master/system prompt, sent per task as Browser.agentRunTask's
|
|
32
|
+
// `systemPrompt` param. NOTE: current shipped builds use the prompt baked into
|
|
33
|
+
// the binary (C++ `kSystemPrompt`) and ignore this param — so it is effectively
|
|
34
|
+
// a no-op against them; kept for parity with the Python SDK and to declare the
|
|
35
|
+
// intended contract (the final `done` answer is ONLY the value the task asked
|
|
36
|
+
// for — no prose, no labels — so callers get a clean result).
|
|
37
|
+
export const AGENT_SYSTEM_PROMPT = "You are an autonomous web-browsing agent operating directly inside the " +
|
|
38
|
+
"browser. Each turn you receive the interactive elements currently visible " +
|
|
39
|
+
"on the page, one per line:\n" +
|
|
40
|
+
" [<index>]<role>label\n" +
|
|
41
|
+
"Roles: a (link), button, input, textarea, select, checkbox. Each input " +
|
|
42
|
+
'shows its STATE in brackets: [EMPTY; placeholder hint "..."] = empty, the ' +
|
|
43
|
+
'hint is NOT a value, type into it; [value: "..."] = already contains that ' +
|
|
44
|
+
"text; [filled]/[empty] = a password field; [checked]/[unchecked] = a " +
|
|
45
|
+
'checkbox; [selected: "..."]/[no selection] = a select (its choices follow ' +
|
|
46
|
+
"as opts:[value=text, ...] — use one exact value). Disabled controls are " +
|
|
47
|
+
"marked (disabled). You also get a one-line note saying whether the page " +
|
|
48
|
+
"changed since your last action.\n\n" +
|
|
49
|
+
"Respond with ONLY a JSON object (no prose, no markdown):\n" +
|
|
50
|
+
"{\n" +
|
|
51
|
+
' "thought": "brief reasoning",\n' +
|
|
52
|
+
' "actions": [ <one or more action steps, run in order, in one shot> ]\n' +
|
|
53
|
+
"}\n" +
|
|
54
|
+
'Each step is an object: {"action": ' +
|
|
55
|
+
'"click|type|scroll|navigate|select|read|wait|done", "index": <element ' +
|
|
56
|
+
'index>, "text": "to type", "enter": true (type: press Enter), ' +
|
|
57
|
+
'"url": "https://..." (navigate), "value": "exact option value" ' +
|
|
58
|
+
'(select), "direction": "down|up|left|right" (scroll), "seconds": 1 ' +
|
|
59
|
+
'(wait), "success": true and "final": "<answer>" (done)}.\n' +
|
|
60
|
+
"Put MULTIPLE steps in the array to do them in ONE shot (strongly preferred " +
|
|
61
|
+
"for speed): whenever you know the values, fill ALL fields AND click submit " +
|
|
62
|
+
"in a single response. Use a single step only when the next step truly " +
|
|
63
|
+
"depends on this one's result.\n\n" +
|
|
64
|
+
"Rules:\n" +
|
|
65
|
+
"- If a cookie/consent dialog or modal is open — its elements are marked " +
|
|
66
|
+
'(modal), or you see text like "Accept all" / "Reject all" / ' +
|
|
67
|
+
'"consent" / "Before you continue" — DISMISS it FIRST by clicking its ' +
|
|
68
|
+
"accept/agree/continue button, before attempting anything else. Elements " +
|
|
69
|
+
"behind a modal are still listed but are visually covered and cannot be " +
|
|
70
|
+
"used until it is gone.\n" +
|
|
71
|
+
"- Use only indices in the CURRENT list; they are renumbered every step.\n" +
|
|
72
|
+
'- Link items (role a) show their destination URL after " -> ". To report ' +
|
|
73
|
+
"or use a link's URL, READ it from the list — do not click/navigate to the " +
|
|
74
|
+
"link just to find its address.\n" +
|
|
75
|
+
"- The element list shows only INTERACTIVE controls, NOT the article/body " +
|
|
76
|
+
"text. To read or summarize page CONTENT (article, blog post, paragraphs), " +
|
|
77
|
+
'issue {"action":"read"} by itself; the page\'s text appears in your NEXT ' +
|
|
78
|
+
"observation as PAGE TEXT — then answer from it.\n" +
|
|
79
|
+
"- To submit a search or form, set \"enter\": true when typing, or click the " +
|
|
80
|
+
"submit/Continue button. Typing alone does not submit.\n" +
|
|
81
|
+
"- For a select, use action \"select\" with an exact value from its opts.\n" +
|
|
82
|
+
"- AVOID LOOPS: do each step ONCE. Never re-issue the same action (or the " +
|
|
83
|
+
"same type+type+click batch) just because the page still looks unfinished. " +
|
|
84
|
+
"A submit/save that clears or reloads the form almost always SUCCEEDED — " +
|
|
85
|
+
"treat the reset/empty form as success, NOT a reason to submit again. " +
|
|
86
|
+
"Created items and counts are non-interactive and invisible here; confirm " +
|
|
87
|
+
'with {"action":"read"} then finish. Repeat an action only if the task TEXT ' +
|
|
88
|
+
"explicitly asks for it.\n" +
|
|
89
|
+
"- RETURN ONLY WHAT WAS ASKED. When you are done, the \"final\" field must " +
|
|
90
|
+
"contain EXACTLY the value the task requested and NOTHING else — no " +
|
|
91
|
+
"sentences, no labels, no explanation, no quotes, no markdown. Only when the " +
|
|
92
|
+
"task asks for a description/summary should \"final\" be a sentence.\n" +
|
|
93
|
+
"- Finish with a {\"action\":\"done\",\"success\":true,\"final\":\"...\"} " +
|
|
94
|
+
"step by itself; use success false only if the goal is genuinely impossible.";
|
|
95
|
+
// Per-keystroke typing cadence for the agent's incremental typing, as
|
|
96
|
+
// [key-down, key-up, long-text-multiplier]. The Actor framework default is
|
|
97
|
+
// ~25ms/25ms/0.2 (~240 WPM — superhuman); "human" slows it to ~75 WPM so the
|
|
98
|
+
// typing looks natural. "fast"/"instant" trade realism for speed.
|
|
99
|
+
export const TYPING_PROFILES = {
|
|
100
|
+
human: ["45ms", "110ms", "0.7"], // ~75 WPM — natural, the default
|
|
101
|
+
fast: ["10ms", "18ms", "0.3"], // brisk, still per-keystroke
|
|
102
|
+
instant: ["0ms", "0ms", "0"], // no inter-key delay (fastest)
|
|
103
|
+
};
|
|
104
|
+
/** Build the GlicActorIncrementalTyping switch for a typing-speed setting. */
|
|
105
|
+
export function typingFlag(typing = "human") {
|
|
106
|
+
let kd, ku, mult;
|
|
107
|
+
if (typeof typing === "string") {
|
|
108
|
+
const prof = TYPING_PROFILES[typing];
|
|
109
|
+
if (!prof) {
|
|
110
|
+
throw new Error(`unknown typing speed '${typing}'; use one of ` +
|
|
111
|
+
`${Object.keys(TYPING_PROFILES).join(", ")} or a [keyDown, keyUp, multiplier] triple`);
|
|
112
|
+
}
|
|
113
|
+
[kd, ku, mult] = prof;
|
|
114
|
+
}
|
|
115
|
+
else {
|
|
116
|
+
[kd, ku, mult] = typing;
|
|
117
|
+
}
|
|
118
|
+
const ms = (v) => (typeof v === "string" ? v : `${v}ms`);
|
|
119
|
+
return ("--enable-features=GlicActorIncrementalTyping:" +
|
|
120
|
+
`glic-actor-incremental-typing-key-down-duration/${ms(kd)}/` +
|
|
121
|
+
`glic-actor-incremental-typing-key-up-duration/${ms(ku)}/` +
|
|
122
|
+
`glic-actor-incremental-typing-long-multiplier/${mult}`);
|
|
123
|
+
}
|
|
124
|
+
/** Outcome of one agent task plus the resolved step plan. */
|
|
125
|
+
export class AgentResult {
|
|
126
|
+
success;
|
|
127
|
+
finalText;
|
|
128
|
+
steps;
|
|
129
|
+
replayed;
|
|
130
|
+
constructor(success, finalText, steps, replayed = false) {
|
|
131
|
+
this.success = success;
|
|
132
|
+
this.finalText = finalText;
|
|
133
|
+
this.steps = steps;
|
|
134
|
+
this.replayed = replayed;
|
|
135
|
+
}
|
|
136
|
+
/** Number of steps replayed deterministically (no LLM call). */
|
|
137
|
+
get fromCache() {
|
|
138
|
+
return this.steps.filter((s) => s.status === "replayed").length;
|
|
139
|
+
}
|
|
140
|
+
/** Number of steps the page had drifted on, re-resolved via the LLM. */
|
|
141
|
+
get healed() {
|
|
142
|
+
return this.steps.filter((s) => s.status === "healed").length;
|
|
143
|
+
}
|
|
144
|
+
/** Number of steps resolved by the LLM on a fresh run. */
|
|
145
|
+
get recorded() {
|
|
146
|
+
return this.steps.filter((s) => s.status === "recorded").length;
|
|
147
|
+
}
|
|
148
|
+
summary() {
|
|
149
|
+
return (`${this.success ? "ok" : "fail"} | ${this.steps.length} steps ` +
|
|
150
|
+
`(${this.fromCache} replayed, ${this.healed} healed, ${this.recorded} recorded)`);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
async function getWebSocketCtor() {
|
|
154
|
+
const g = globalThis.WebSocket;
|
|
155
|
+
if (g)
|
|
156
|
+
return g;
|
|
157
|
+
try {
|
|
158
|
+
const spec = "ws"; // variable specifier: keeps tsc from requiring `ws` at build
|
|
159
|
+
const mod = await import(spec);
|
|
160
|
+
return mod.default ?? mod.WebSocket ?? mod;
|
|
161
|
+
}
|
|
162
|
+
catch {
|
|
163
|
+
throw new Error("the AI agent client needs a WebSocket implementation; on Node <22 install " +
|
|
164
|
+
"it with `npm install ws`");
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
/** Minimal CDP-over-WebSocket client. */
|
|
168
|
+
class CDP {
|
|
169
|
+
ws;
|
|
170
|
+
id = 0;
|
|
171
|
+
pending = new Map();
|
|
172
|
+
waiters = [];
|
|
173
|
+
constructor(ws) {
|
|
174
|
+
this.ws = ws;
|
|
175
|
+
ws.addEventListener("message", (ev) => {
|
|
176
|
+
const text = typeof ev.data === "string" ? ev.data : ev.data?.toString?.() ?? "";
|
|
177
|
+
let msg;
|
|
178
|
+
try {
|
|
179
|
+
msg = JSON.parse(text);
|
|
180
|
+
}
|
|
181
|
+
catch {
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
184
|
+
if (msg.id != null && this.pending.has(msg.id)) {
|
|
185
|
+
const p = this.pending.get(msg.id);
|
|
186
|
+
this.pending.delete(msg.id);
|
|
187
|
+
if (msg.error)
|
|
188
|
+
p.reject(new Error(msg.error.message ?? JSON.stringify(msg.error)));
|
|
189
|
+
else
|
|
190
|
+
p.resolve(msg.result ?? {});
|
|
191
|
+
}
|
|
192
|
+
else if (msg.method) {
|
|
193
|
+
for (let i = this.waiters.length - 1; i >= 0; i--) {
|
|
194
|
+
if (this.waiters[i].method === msg.method) {
|
|
195
|
+
const w = this.waiters.splice(i, 1)[0];
|
|
196
|
+
clearTimeout(w.timer);
|
|
197
|
+
w.resolve();
|
|
198
|
+
}
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
});
|
|
202
|
+
}
|
|
203
|
+
static async connect(url) {
|
|
204
|
+
const WS = await getWebSocketCtor();
|
|
205
|
+
const ws = new WS(url);
|
|
206
|
+
await new Promise((resolve, reject) => {
|
|
207
|
+
ws.addEventListener("open", () => resolve(), { once: true });
|
|
208
|
+
ws.addEventListener("error", () => reject(new Error("CDP WebSocket error")), { once: true });
|
|
209
|
+
});
|
|
210
|
+
return new CDP(ws);
|
|
211
|
+
}
|
|
212
|
+
send(method, params = {}, timeoutMs) {
|
|
213
|
+
const id = ++this.id;
|
|
214
|
+
return new Promise((resolve, reject) => {
|
|
215
|
+
let timer;
|
|
216
|
+
if (timeoutMs) {
|
|
217
|
+
timer = setTimeout(() => {
|
|
218
|
+
this.pending.delete(id);
|
|
219
|
+
reject(new Error(`${method}: timed out after ${timeoutMs}ms`));
|
|
220
|
+
}, timeoutMs);
|
|
221
|
+
}
|
|
222
|
+
this.pending.set(id, {
|
|
223
|
+
resolve: (v) => {
|
|
224
|
+
if (timer)
|
|
225
|
+
clearTimeout(timer);
|
|
226
|
+
resolve(v);
|
|
227
|
+
},
|
|
228
|
+
reject,
|
|
229
|
+
});
|
|
230
|
+
this.ws.send(JSON.stringify({ id, method, params }));
|
|
231
|
+
});
|
|
232
|
+
}
|
|
233
|
+
waitEvent(method, timeoutMs) {
|
|
234
|
+
return new Promise((resolve) => {
|
|
235
|
+
const timer = setTimeout(() => {
|
|
236
|
+
const i = this.waiters.findIndex((w) => w.timer === timer);
|
|
237
|
+
if (i >= 0)
|
|
238
|
+
this.waiters.splice(i, 1);
|
|
239
|
+
resolve();
|
|
240
|
+
}, timeoutMs);
|
|
241
|
+
this.waiters.push({ method, resolve, timer });
|
|
242
|
+
});
|
|
243
|
+
}
|
|
244
|
+
close() {
|
|
245
|
+
try {
|
|
246
|
+
this.ws.close();
|
|
247
|
+
}
|
|
248
|
+
catch {
|
|
249
|
+
/* ignore */
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
/** Connects to a running ChromiumFish CDP endpoint and runs agent tasks. */
|
|
254
|
+
export class AgentClient {
|
|
255
|
+
port;
|
|
256
|
+
host;
|
|
257
|
+
timeoutMs;
|
|
258
|
+
constructor(port = 9222, host = "localhost", timeoutMs = 420_000) {
|
|
259
|
+
this.port = port;
|
|
260
|
+
this.host = host;
|
|
261
|
+
this.timeoutMs = timeoutMs;
|
|
262
|
+
}
|
|
263
|
+
async httpGet(p) {
|
|
264
|
+
const res = await fetch(`http://${this.host}:${this.port}${p}`);
|
|
265
|
+
if (!res.ok)
|
|
266
|
+
throw new Error(`GET ${p} -> ${res.status}`);
|
|
267
|
+
return res.json();
|
|
268
|
+
}
|
|
269
|
+
/** Return {targetId, wsUrl}, reusing a real page or opening one. */
|
|
270
|
+
async pickPage() {
|
|
271
|
+
const targets = await this.httpGet("/json");
|
|
272
|
+
const pages = targets.filter((t) => t.type === "page" && !String(t.url ?? "").startsWith("chrome://") && t.webSocketDebuggerUrl);
|
|
273
|
+
if (pages.length)
|
|
274
|
+
return { targetId: pages[0].id, wsUrl: pages[0].webSocketDebuggerUrl };
|
|
275
|
+
// No usable page: create one via the browser endpoint. (GET /json/new 405s on
|
|
276
|
+
// recent builds, so go through Target.createTarget instead.)
|
|
277
|
+
const ver = await this.httpGet("/json/version");
|
|
278
|
+
const browser = await CDP.connect(ver.webSocketDebuggerUrl);
|
|
279
|
+
try {
|
|
280
|
+
const { targetId } = await browser.send("Target.createTarget", { url: "about:blank" });
|
|
281
|
+
const again = await this.httpGet("/json");
|
|
282
|
+
const pg = again.find((t) => t.id === targetId);
|
|
283
|
+
if (!pg?.webSocketDebuggerUrl)
|
|
284
|
+
throw new Error("could not open a page target");
|
|
285
|
+
return { targetId, wsUrl: pg.webSocketDebuggerUrl };
|
|
286
|
+
}
|
|
287
|
+
finally {
|
|
288
|
+
browser.close();
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
async runTask(goal, opts = {}) {
|
|
292
|
+
const { url, maxSteps = 25, model = "", plan, systemPrompt = AGENT_SYSTEM_PROMPT } = opts;
|
|
293
|
+
const { targetId, wsUrl } = await this.pickPage();
|
|
294
|
+
const cdp = await CDP.connect(wsUrl);
|
|
295
|
+
try {
|
|
296
|
+
await cdp.send("Page.enable");
|
|
297
|
+
if (url && url !== "about:blank") {
|
|
298
|
+
await cdp.send("Page.navigate", { url });
|
|
299
|
+
await cdp.waitEvent("Page.loadEventFired", 20_000);
|
|
300
|
+
await sleep(500);
|
|
301
|
+
}
|
|
302
|
+
const params = { targetId, goal, maxSteps };
|
|
303
|
+
if (model)
|
|
304
|
+
params.model = model;
|
|
305
|
+
if (plan)
|
|
306
|
+
params.planJson = JSON.stringify(plan);
|
|
307
|
+
if (systemPrompt)
|
|
308
|
+
params.systemPrompt = systemPrompt;
|
|
309
|
+
const res = (await cdp.send("Browser.agentRunTask", params, this.timeoutMs)) ?? {};
|
|
310
|
+
let steps = [];
|
|
311
|
+
try {
|
|
312
|
+
const parsed = JSON.parse(res.stepsJson ?? "[]");
|
|
313
|
+
if (Array.isArray(parsed))
|
|
314
|
+
steps = parsed;
|
|
315
|
+
}
|
|
316
|
+
catch {
|
|
317
|
+
/* leave steps empty */
|
|
318
|
+
}
|
|
319
|
+
return new AgentResult(Boolean(res.success), res.finalText ?? "", steps, Boolean(plan));
|
|
320
|
+
}
|
|
321
|
+
finally {
|
|
322
|
+
cdp.close();
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
}
|
|
326
|
+
/** Load KEY=VALUE lines from the nearest .env (cwd or a parent) without override. */
|
|
327
|
+
function loadDotenv() {
|
|
328
|
+
let dir = process.cwd();
|
|
329
|
+
for (;;) {
|
|
330
|
+
const envFile = path.join(dir, ".env");
|
|
331
|
+
if (existsSync(envFile)) {
|
|
332
|
+
for (const raw of readFileSync(envFile, "utf8").split(/\r?\n/)) {
|
|
333
|
+
const line = raw.trim();
|
|
334
|
+
if (!line || line.startsWith("#") || !line.includes("="))
|
|
335
|
+
continue;
|
|
336
|
+
const idx = line.indexOf("=");
|
|
337
|
+
const key = line.slice(0, idx).trim();
|
|
338
|
+
const val = line.slice(idx + 1).trim();
|
|
339
|
+
if (!(key in process.env))
|
|
340
|
+
process.env[key] = val;
|
|
341
|
+
}
|
|
342
|
+
return;
|
|
343
|
+
}
|
|
344
|
+
const parent = path.dirname(dir);
|
|
345
|
+
if (parent === dir)
|
|
346
|
+
return;
|
|
347
|
+
dir = parent;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
/**
|
|
351
|
+
* Launch a local ChromiumFish with the AI agent layer and connect to it.
|
|
352
|
+
*
|
|
353
|
+
* LLM config is read from OPENAI_API_BASE / OPENAI_API_KEY / OPENAI_API_MODEL
|
|
354
|
+
* (a nearby .env is loaded automatically). Prefer {@link withAgent} for
|
|
355
|
+
* automatic cleanup, or remember to call the returned `close()`.
|
|
356
|
+
*/
|
|
357
|
+
export async function launchAgent(opts = {}) {
|
|
358
|
+
const { port = 9222, model = "", typing = "human", loadDotenv: doDotenv = true, extraArgs = [], timeoutMs = 30_000 } = opts;
|
|
359
|
+
if (doDotenv)
|
|
360
|
+
loadDotenv();
|
|
361
|
+
let chrome = opts.chrome ?? process.env.CHROME_BIN;
|
|
362
|
+
if (!chrome)
|
|
363
|
+
chrome = await binaryPath();
|
|
364
|
+
const profile = mkdtempSync(path.join(tmpdir(), "cf-agent-"));
|
|
365
|
+
const args = [
|
|
366
|
+
`--remote-debugging-port=${port}`,
|
|
367
|
+
"--remote-allow-origins=*",
|
|
368
|
+
`--user-data-dir=${profile}`,
|
|
369
|
+
"--disable-actor-safety-checks", // let the agent act unattended
|
|
370
|
+
// Typing cadence (see TYPING_PROFILES). Default "human" ~75 WPM so the
|
|
371
|
+
// agent's keystrokes look natural; "fast"/"instant" go quicker.
|
|
372
|
+
typingFlag(typing),
|
|
373
|
+
"--no-first-run",
|
|
374
|
+
"--no-default-browser-check",
|
|
375
|
+
`--agent-llm-url=${process.env.OPENAI_API_BASE ?? ""}`,
|
|
376
|
+
`--agent-llm-key=${process.env.OPENAI_API_KEY ?? ""}`,
|
|
377
|
+
`--agent-model=${model || (process.env.OPENAI_API_MODEL ?? "")}`,
|
|
378
|
+
...extraArgs,
|
|
379
|
+
];
|
|
380
|
+
const proc = spawn(chrome, args, { stdio: "ignore" });
|
|
381
|
+
const cleanup = () => rmSync(profile, { recursive: true, force: true });
|
|
382
|
+
const deadline = Date.now() + timeoutMs;
|
|
383
|
+
for (;;) {
|
|
384
|
+
try {
|
|
385
|
+
const r = await fetch(`http://localhost:${port}/json/version`);
|
|
386
|
+
if (r.ok)
|
|
387
|
+
break;
|
|
388
|
+
}
|
|
389
|
+
catch {
|
|
390
|
+
/* not up yet */
|
|
391
|
+
}
|
|
392
|
+
if (Date.now() > deadline) {
|
|
393
|
+
try {
|
|
394
|
+
proc.kill("SIGKILL");
|
|
395
|
+
}
|
|
396
|
+
catch {
|
|
397
|
+
/* ignore */
|
|
398
|
+
}
|
|
399
|
+
cleanup();
|
|
400
|
+
throw new Error("ChromiumFish did not expose its CDP endpoint in time");
|
|
401
|
+
}
|
|
402
|
+
await sleep(500);
|
|
403
|
+
}
|
|
404
|
+
// Open an initial page target so runTask can find one immediately.
|
|
405
|
+
try {
|
|
406
|
+
const ver = await (await fetch(`http://localhost:${port}/json/version`)).json();
|
|
407
|
+
const browser = await CDP.connect(ver.webSocketDebuggerUrl);
|
|
408
|
+
try {
|
|
409
|
+
await browser.send("Target.createTarget", { url: "about:blank" });
|
|
410
|
+
}
|
|
411
|
+
finally {
|
|
412
|
+
browser.close();
|
|
413
|
+
}
|
|
414
|
+
}
|
|
415
|
+
catch {
|
|
416
|
+
/* runTask.pickPage will retry if needed */
|
|
417
|
+
}
|
|
418
|
+
const close = async () => {
|
|
419
|
+
try {
|
|
420
|
+
proc.kill("SIGTERM");
|
|
421
|
+
}
|
|
422
|
+
catch {
|
|
423
|
+
/* ignore */
|
|
424
|
+
}
|
|
425
|
+
await sleep(300);
|
|
426
|
+
try {
|
|
427
|
+
proc.kill("SIGKILL");
|
|
428
|
+
}
|
|
429
|
+
catch {
|
|
430
|
+
/* ignore */
|
|
431
|
+
}
|
|
432
|
+
cleanup();
|
|
433
|
+
};
|
|
434
|
+
return { agent: new AgentClient(port), close };
|
|
435
|
+
}
|
|
436
|
+
/**
|
|
437
|
+
* Run `fn` against a freshly launched agent, shutting the browser down and
|
|
438
|
+
* cleaning up afterwards — the ergonomic equivalent of Python's
|
|
439
|
+
* `with launch_agent() as agent:`.
|
|
440
|
+
*
|
|
441
|
+
* const url = await withAgent({ typing: "fast" }, (agent) =>
|
|
442
|
+
* agent.runTask("...").then((r) => r.finalText));
|
|
443
|
+
*/
|
|
444
|
+
export async function withAgent(opts, fn) {
|
|
445
|
+
const { agent, close } = await launchAgent(opts);
|
|
446
|
+
try {
|
|
447
|
+
return await fn(agent);
|
|
448
|
+
}
|
|
449
|
+
finally {
|
|
450
|
+
await close();
|
|
451
|
+
}
|
|
452
|
+
}
|
package/dist/fetch.js
CHANGED
|
@@ -11,7 +11,7 @@ import * as fs from "node:fs";
|
|
|
11
11
|
import * as https from "node:https";
|
|
12
12
|
import * as os from "node:os";
|
|
13
13
|
import * as path from "node:path";
|
|
14
|
-
import { browserVersion, releaseBaseUrl } from "./version.js";
|
|
14
|
+
import { assertSafeVersion, browserVersion, releaseBaseUrl } from "./version.js";
|
|
15
15
|
export function cacheRoot() {
|
|
16
16
|
const env = process.env.CHROMIUMFISH_CACHE_DIR;
|
|
17
17
|
if (env)
|
|
@@ -35,12 +35,13 @@ export function platformSlug() {
|
|
|
35
35
|
throw new Error(`unsupported platform: ${process.platform}`);
|
|
36
36
|
}
|
|
37
37
|
function assetName(version) {
|
|
38
|
+
assertSafeVersion(version);
|
|
38
39
|
const slug = platformSlug();
|
|
39
40
|
const ext = slug.startsWith("win") ? "zip" : "tar.gz";
|
|
40
41
|
return `chromiumfish-${version}-${slug}.${ext}`;
|
|
41
42
|
}
|
|
42
43
|
export function installDir(version = browserVersion()) {
|
|
43
|
-
return path.join(cacheRoot(), version, platformSlug());
|
|
44
|
+
return path.join(cacheRoot(), assertSafeVersion(version), platformSlug());
|
|
44
45
|
}
|
|
45
46
|
const BINARY_NAMES = ["chromiumfish", "chrome", "chromiumfish.exe", "chrome.exe", "ChromiumFish"];
|
|
46
47
|
export function findBinary(root) {
|
|
@@ -48,8 +49,15 @@ export function findBinary(root) {
|
|
|
48
49
|
return null;
|
|
49
50
|
for (const name of BINARY_NAMES) {
|
|
50
51
|
const direct = path.join(root, name);
|
|
51
|
-
if
|
|
52
|
-
|
|
52
|
+
// statSync can throw if the file is removed between existsSync and stat
|
|
53
|
+
// (TOCTOU); treat any stat failure as "not a usable binary here".
|
|
54
|
+
try {
|
|
55
|
+
if (fs.statSync(direct).isFile())
|
|
56
|
+
return direct;
|
|
57
|
+
}
|
|
58
|
+
catch {
|
|
59
|
+
/* keep looking */
|
|
60
|
+
}
|
|
53
61
|
}
|
|
54
62
|
const stack = [root];
|
|
55
63
|
while (stack.length) {
|
|
@@ -64,11 +72,15 @@ export function findBinary(root) {
|
|
|
64
72
|
}
|
|
65
73
|
return null;
|
|
66
74
|
}
|
|
75
|
+
// Idle-timeout (ms) applied to every download/fetch socket. A stalled server
|
|
76
|
+
// (no bytes for this long) aborts instead of hanging the launch forever.
|
|
77
|
+
const DOWNLOAD_IDLE_TIMEOUT_MS = 60_000;
|
|
78
|
+
const FETCH_IDLE_TIMEOUT_MS = 30_000;
|
|
67
79
|
function download(url, dest) {
|
|
68
80
|
fs.mkdirSync(path.dirname(dest), { recursive: true });
|
|
69
81
|
return new Promise((resolve, reject) => {
|
|
70
82
|
const get = (u) => {
|
|
71
|
-
https.get(u, (res) => {
|
|
83
|
+
const req = https.get(u, (res) => {
|
|
72
84
|
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
73
85
|
res.resume();
|
|
74
86
|
return get(res.headers.location);
|
|
@@ -80,15 +92,31 @@ function download(url, dest) {
|
|
|
80
92
|
const total = Number(res.headers["content-length"] || 0);
|
|
81
93
|
let read = 0;
|
|
82
94
|
const out = fs.createWriteStream(dest);
|
|
95
|
+
// On any failure: tear down both streams and remove the partial file
|
|
96
|
+
// so a later run doesn't trip over a truncated/corrupt download.
|
|
97
|
+
const fail = (err) => {
|
|
98
|
+
res.destroy();
|
|
99
|
+
out.destroy();
|
|
100
|
+
try {
|
|
101
|
+
fs.rmSync(dest, { force: true });
|
|
102
|
+
}
|
|
103
|
+
catch { /* best effort */ }
|
|
104
|
+
reject(err);
|
|
105
|
+
};
|
|
83
106
|
res.on("data", (c) => {
|
|
84
107
|
read += c.length;
|
|
85
108
|
if (total)
|
|
86
109
|
process.stderr.write(`\r[chromiumfish] ${Math.floor((read / total) * 100)}%`);
|
|
87
110
|
});
|
|
111
|
+
res.on("error", fail);
|
|
88
112
|
res.pipe(out);
|
|
89
113
|
out.on("finish", () => { process.stderr.write("\n"); out.close(() => resolve()); });
|
|
90
|
-
out.on("error",
|
|
91
|
-
})
|
|
114
|
+
out.on("error", fail);
|
|
115
|
+
});
|
|
116
|
+
req.on("error", reject);
|
|
117
|
+
req.setTimeout(DOWNLOAD_IDLE_TIMEOUT_MS, () => {
|
|
118
|
+
req.destroy(new Error(`download timed out (no data for ${DOWNLOAD_IDLE_TIMEOUT_MS}ms) for ${u}`));
|
|
119
|
+
});
|
|
92
120
|
};
|
|
93
121
|
process.stderr.write(`[chromiumfish] downloading ${url}\n`);
|
|
94
122
|
get(url);
|
|
@@ -96,20 +124,27 @@ function download(url, dest) {
|
|
|
96
124
|
}
|
|
97
125
|
function fetchText(url) {
|
|
98
126
|
return new Promise((resolve, reject) => {
|
|
99
|
-
const get = (u) =>
|
|
100
|
-
|
|
101
|
-
res.
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
res.
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
127
|
+
const get = (u) => {
|
|
128
|
+
const req = https.get(u, (res) => {
|
|
129
|
+
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
130
|
+
res.resume();
|
|
131
|
+
return get(res.headers.location);
|
|
132
|
+
}
|
|
133
|
+
if (res.statusCode !== 200) {
|
|
134
|
+
res.resume();
|
|
135
|
+
return reject(new Error(`HTTP ${res.statusCode}`));
|
|
136
|
+
}
|
|
137
|
+
let data = "";
|
|
138
|
+
res.setEncoding("utf8");
|
|
139
|
+
res.on("data", (c) => (data += c));
|
|
140
|
+
res.on("end", () => resolve(data));
|
|
141
|
+
res.on("error", reject);
|
|
142
|
+
});
|
|
143
|
+
req.on("error", reject);
|
|
144
|
+
req.setTimeout(FETCH_IDLE_TIMEOUT_MS, () => {
|
|
145
|
+
req.destroy(new Error(`request timed out for ${u}`));
|
|
146
|
+
});
|
|
147
|
+
};
|
|
113
148
|
get(url);
|
|
114
149
|
});
|
|
115
150
|
}
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
export { ChromiumFish, buildArgs, BASE_ARGS } from "./launcher.js";
|
|
2
2
|
export type { ChromiumFishOptions } from "./launcher.js";
|
|
3
|
+
export { launchAgent, withAgent, AgentClient, AgentResult, typingFlag, TYPING_PROFILES, AGENT_SYSTEM_PROMPT, } from "./agent.js";
|
|
4
|
+
export type { LaunchAgentOptions, AgentSession, RunTaskOptions, AgentStep, TypingSpeed, } from "./agent.js";
|
|
3
5
|
export { fetchBrowser, binaryPath, installDir, cacheRoot, platformSlug, findBinary } from "./fetch.js";
|
|
4
6
|
export { Ip2TzDB, fetchDb, lookupTimezone, resolveTimezone, resolveVersion as resolveGeoipVersion, egressIp, assetName as ip2tzAssetName, dbPath as ip2tzDbPath, } from "./ip2tz.js";
|
|
5
7
|
export { SDK_VERSION, DEFAULT_BROWSER_VERSION, DEFAULT_GEOIP_VERSION, GEOIP_FALLBACK_VERSION, RELEASE_REPO, browserVersion, releaseBaseUrl, geoipVersion, geoipBaseUrl, } from "./version.js";
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export { ChromiumFish, buildArgs, BASE_ARGS } from "./launcher.js";
|
|
2
|
+
export { launchAgent, withAgent, AgentClient, AgentResult, typingFlag, TYPING_PROFILES, AGENT_SYSTEM_PROMPT, } from "./agent.js";
|
|
2
3
|
export { fetchBrowser, binaryPath, installDir, cacheRoot, platformSlug, findBinary } from "./fetch.js";
|
|
3
4
|
export { Ip2TzDB, fetchDb, lookupTimezone, resolveTimezone, resolveVersion as resolveGeoipVersion, egressIp, assetName as ip2tzAssetName, dbPath as ip2tzDbPath, } from "./ip2tz.js";
|
|
4
5
|
export { SDK_VERSION, DEFAULT_BROWSER_VERSION, DEFAULT_GEOIP_VERSION, GEOIP_FALLBACK_VERSION, RELEASE_REPO, browserVersion, releaseBaseUrl, geoipVersion, geoipBaseUrl, } from "./version.js";
|
package/dist/ip2tz.js
CHANGED
|
@@ -18,10 +18,12 @@
|
|
|
18
18
|
*/
|
|
19
19
|
import { createHash } from "node:crypto";
|
|
20
20
|
import * as fs from "node:fs";
|
|
21
|
+
import * as http from "node:http";
|
|
21
22
|
import * as https from "node:https";
|
|
22
23
|
import * as path from "node:path";
|
|
24
|
+
import * as tls from "node:tls";
|
|
23
25
|
import { cacheRoot } from "./fetch.js";
|
|
24
|
-
import { GEOIP_FALLBACK_VERSION, geoipBaseUrl, geoipLatestManifestUrl, geoipVersion } from "./version.js";
|
|
26
|
+
import { assertSafeVersion, GEOIP_FALLBACK_VERSION, geoipBaseUrl, geoipLatestManifestUrl, geoipVersion, } from "./version.js";
|
|
25
27
|
const MAGIC = Buffer.from("IP2TZ\x01", "latin1");
|
|
26
28
|
const V4_REC = 6; // uint32 start + uint16 tz_idx
|
|
27
29
|
const V6_REC = 18; // 16-byte start + uint16 tz_idx
|
|
@@ -89,33 +91,42 @@ export async function resolveVersion(version = geoipVersion(), download = true)
|
|
|
89
91
|
return cached?.version || GEOIP_FALLBACK_VERSION;
|
|
90
92
|
}
|
|
91
93
|
export function assetName(version = geoipVersion()) {
|
|
92
|
-
return `ip2tz-${resolveVersionSync(version)}.bin`;
|
|
94
|
+
return `ip2tz-${assertSafeVersion(resolveVersionSync(version))}.bin`;
|
|
93
95
|
}
|
|
94
96
|
export function dbPath(version = geoipVersion()) {
|
|
95
|
-
return path.join(geoipDir(), `ip2tz-${resolveVersionSync(version)}.bin`);
|
|
97
|
+
return path.join(geoipDir(), `ip2tz-${assertSafeVersion(resolveVersionSync(version))}.bin`);
|
|
96
98
|
}
|
|
99
|
+
// Idle-timeout (ms) for the geoip manifest / DB / checksum fetches so a
|
|
100
|
+
// stalled server can't hang resolution or download forever.
|
|
101
|
+
const GET_IDLE_TIMEOUT_MS = 30_000;
|
|
97
102
|
function get(url) {
|
|
98
103
|
return new Promise((resolve, reject) => {
|
|
99
|
-
const go = (u) =>
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
res.
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
res.
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
104
|
+
const go = (u) => {
|
|
105
|
+
const req = https
|
|
106
|
+
.get(u, (res) => {
|
|
107
|
+
if (res.statusCode && res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) {
|
|
108
|
+
res.resume();
|
|
109
|
+
return go(res.headers.location);
|
|
110
|
+
}
|
|
111
|
+
if (res.statusCode !== 200) {
|
|
112
|
+
res.resume();
|
|
113
|
+
return reject(new Error(`HTTP ${res.statusCode}`));
|
|
114
|
+
}
|
|
115
|
+
const chunks = [];
|
|
116
|
+
res.on("data", (c) => chunks.push(c));
|
|
117
|
+
res.on("end", () => resolve(Buffer.concat(chunks)));
|
|
118
|
+
res.on("error", reject);
|
|
119
|
+
})
|
|
120
|
+
.on("error", reject);
|
|
121
|
+
req.setTimeout(GET_IDLE_TIMEOUT_MS, () => {
|
|
122
|
+
req.destroy(new Error(`request timed out for ${u}`));
|
|
123
|
+
});
|
|
124
|
+
};
|
|
114
125
|
go(url);
|
|
115
126
|
});
|
|
116
127
|
}
|
|
117
128
|
export async function fetchDb(version = geoipVersion(), force = false) {
|
|
118
|
-
const v = await resolveVersion(version); // concrete, e.g. "2026.06"
|
|
129
|
+
const v = assertSafeVersion(await resolveVersion(version)); // concrete, e.g. "2026.06"
|
|
119
130
|
const dest = path.join(geoipDir(), `ip2tz-${v}.bin`);
|
|
120
131
|
if (fs.existsSync(dest) && !force)
|
|
121
132
|
return dest;
|
|
@@ -256,28 +267,29 @@ function parseV6(ip) {
|
|
|
256
267
|
}
|
|
257
268
|
return b;
|
|
258
269
|
}
|
|
259
|
-
|
|
270
|
+
// Keyed by *resolved* concrete version so a later lookup with a different
|
|
271
|
+
// version doesn't silently reuse the first DB loaded.
|
|
272
|
+
const cache = new Map();
|
|
260
273
|
async function getDb(version = geoipVersion(), download = true) {
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
274
|
+
const v = assertSafeVersion(await resolveVersion(version, download));
|
|
275
|
+
const existing = cache.get(v);
|
|
276
|
+
if (existing)
|
|
277
|
+
return existing;
|
|
264
278
|
let p = path.join(geoipDir(), `ip2tz-${v}.bin`);
|
|
265
279
|
if (!fs.existsSync(p)) {
|
|
266
280
|
if (!download)
|
|
267
281
|
throw new Error("ip2tz DB not installed. Call fetchDb().");
|
|
268
282
|
p = await fetchDb(v);
|
|
269
283
|
}
|
|
270
|
-
|
|
271
|
-
|
|
284
|
+
const db = Ip2TzDB.load(p);
|
|
285
|
+
cache.set(v, db);
|
|
286
|
+
return db;
|
|
272
287
|
}
|
|
273
288
|
export async function lookupTimezone(ip, version = geoipVersion(), download = true) {
|
|
274
289
|
return (await getDb(version, download)).lookup(ip);
|
|
275
290
|
}
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
// a proxy is configured we currently probe direct and rely on the caller to
|
|
279
|
-
// pass an explicit IP if egress differs.
|
|
280
|
-
void proxy;
|
|
291
|
+
/** Probe the egress IP directly (no proxy). */
|
|
292
|
+
function egressDirect(timeoutMs) {
|
|
281
293
|
return new Promise((resolve) => {
|
|
282
294
|
const req = https.get(EGRESS_PROBE, { headers: { "User-Agent": "chromiumfish" } }, (res) => {
|
|
283
295
|
let data = "";
|
|
@@ -299,6 +311,101 @@ export function egressIp(proxy, timeoutMs = 8000) {
|
|
|
299
311
|
});
|
|
300
312
|
});
|
|
301
313
|
}
|
|
314
|
+
/**
|
|
315
|
+
* Probe the egress IP *through an HTTP(S) proxy* by CONNECT-tunnelling to the
|
|
316
|
+
* probe host, so the resolved timezone matches the proxy's exit — the whole
|
|
317
|
+
* point of timezone:"auto". Mirrors what the Python SDK gets from urllib's
|
|
318
|
+
* ProxyHandler. Returns null on any failure (never falls back to a direct
|
|
319
|
+
* probe, which would report the machine's real-IP timezone).
|
|
320
|
+
*/
|
|
321
|
+
function egressViaProxy(proxy, timeoutMs) {
|
|
322
|
+
return new Promise((resolve) => {
|
|
323
|
+
let pu;
|
|
324
|
+
let tu;
|
|
325
|
+
try {
|
|
326
|
+
pu = new URL(proxy);
|
|
327
|
+
tu = new URL(EGRESS_PROBE);
|
|
328
|
+
}
|
|
329
|
+
catch {
|
|
330
|
+
return resolve(null);
|
|
331
|
+
}
|
|
332
|
+
let settled = false;
|
|
333
|
+
const done = (ip) => {
|
|
334
|
+
if (!settled) {
|
|
335
|
+
settled = true;
|
|
336
|
+
resolve(ip);
|
|
337
|
+
}
|
|
338
|
+
};
|
|
339
|
+
const headers = {};
|
|
340
|
+
if (pu.username) {
|
|
341
|
+
const creds = `${decodeURIComponent(pu.username)}:${decodeURIComponent(pu.password)}`;
|
|
342
|
+
headers["Proxy-Authorization"] = "Basic " + Buffer.from(creds).toString("base64");
|
|
343
|
+
}
|
|
344
|
+
const connectReq = http.request({
|
|
345
|
+
host: pu.hostname,
|
|
346
|
+
port: Number(pu.port) || (pu.protocol === "https:" ? 443 : 80),
|
|
347
|
+
method: "CONNECT",
|
|
348
|
+
path: `${tu.hostname}:443`,
|
|
349
|
+
headers,
|
|
350
|
+
timeout: timeoutMs,
|
|
351
|
+
});
|
|
352
|
+
connectReq.on("connect", (res, socket) => {
|
|
353
|
+
if (res.statusCode !== 200) {
|
|
354
|
+
socket.destroy();
|
|
355
|
+
return done(null);
|
|
356
|
+
}
|
|
357
|
+
const tlsSock = tls.connect({ socket, servername: tu.hostname }, () => {
|
|
358
|
+
// HTTP/1.0 so the response isn't chunk-encoded (simpler to parse).
|
|
359
|
+
tlsSock.write(`GET ${tu.pathname} HTTP/1.0\r\nHost: ${tu.hostname}\r\n` +
|
|
360
|
+
`User-Agent: chromiumfish\r\nAccept: application/json\r\nConnection: close\r\n\r\n`);
|
|
361
|
+
});
|
|
362
|
+
let raw = "";
|
|
363
|
+
tlsSock.setEncoding("utf8");
|
|
364
|
+
tlsSock.setTimeout(timeoutMs, () => {
|
|
365
|
+
tlsSock.destroy();
|
|
366
|
+
done(null);
|
|
367
|
+
});
|
|
368
|
+
tlsSock.on("data", (d) => (raw += d));
|
|
369
|
+
tlsSock.on("end", () => {
|
|
370
|
+
const body = raw.split("\r\n\r\n").slice(1).join("\r\n\r\n");
|
|
371
|
+
const m = body.match(/\{[\s\S]*\}/);
|
|
372
|
+
try {
|
|
373
|
+
done(m ? JSON.parse(m[0]).ip || null : null);
|
|
374
|
+
}
|
|
375
|
+
catch {
|
|
376
|
+
done(null);
|
|
377
|
+
}
|
|
378
|
+
});
|
|
379
|
+
tlsSock.on("error", () => done(null));
|
|
380
|
+
});
|
|
381
|
+
connectReq.on("error", () => done(null));
|
|
382
|
+
connectReq.on("timeout", () => {
|
|
383
|
+
connectReq.destroy();
|
|
384
|
+
done(null);
|
|
385
|
+
});
|
|
386
|
+
connectReq.end();
|
|
387
|
+
});
|
|
388
|
+
}
|
|
389
|
+
export function egressIp(proxy, timeoutMs = 8000) {
|
|
390
|
+
if (proxy) {
|
|
391
|
+
let scheme = "";
|
|
392
|
+
try {
|
|
393
|
+
scheme = new URL(proxy).protocol;
|
|
394
|
+
}
|
|
395
|
+
catch {
|
|
396
|
+
/* invalid proxy URL */
|
|
397
|
+
}
|
|
398
|
+
if (scheme === "http:" || scheme === "https:")
|
|
399
|
+
return egressViaProxy(proxy, timeoutMs);
|
|
400
|
+
// SOCKS / unknown schemes aren't supported for the probe. Return null
|
|
401
|
+
// (leave the timezone unset) rather than probing the direct connection
|
|
402
|
+
// and reporting the machine's real-IP timezone — the incoherence we want
|
|
403
|
+
// to avoid.
|
|
404
|
+
process.stderr.write(`[chromiumfish] egress probe: unsupported proxy scheme '${scheme || proxy}'; skipping timezone resolution\n`);
|
|
405
|
+
return Promise.resolve(null);
|
|
406
|
+
}
|
|
407
|
+
return egressDirect(timeoutMs);
|
|
408
|
+
}
|
|
302
409
|
export async function resolveTimezone(opts = {}) {
|
|
303
410
|
const { proxy, version = geoipVersion(), download = true } = opts;
|
|
304
411
|
let ip = opts.ip;
|
package/dist/launcher.d.ts
CHANGED
|
@@ -6,8 +6,8 @@ import { type Browser, type LaunchOptions } from "playwright-core";
|
|
|
6
6
|
*/
|
|
7
7
|
export declare const BASE_ARGS: string[];
|
|
8
8
|
export interface ChromiumFishOptions extends Omit<LaunchOptions, "executablePath"> {
|
|
9
|
-
/**
|
|
10
|
-
personaSeed?:
|
|
9
|
+
/** String id for a stable, internally-consistent fingerprint persona. */
|
|
10
|
+
personaSeed?: string;
|
|
11
11
|
/** Run headless (SwiftShader). Defaults to true. */
|
|
12
12
|
headless?: boolean;
|
|
13
13
|
/** Window dimensions; defaults to [1920, 1080]. Pass null to omit. */
|
|
@@ -28,6 +28,6 @@ export declare function buildArgs(opts: ChromiumFishOptions): string[];
|
|
|
28
28
|
* Launch ChromiumFish and return a standard Playwright `Browser`.
|
|
29
29
|
*
|
|
30
30
|
* import { ChromiumFish } from "chromiumfish";
|
|
31
|
-
* const browser = await ChromiumFish({ personaSeed:
|
|
31
|
+
* const browser = await ChromiumFish({ personaSeed: "alpha-7", headless: true });
|
|
32
32
|
*/
|
|
33
33
|
export declare function ChromiumFish(opts?: ChromiumFishOptions): Promise<Browser>;
|
package/dist/launcher.js
CHANGED
|
@@ -20,8 +20,12 @@ function proxyToUrl(proxy) {
|
|
|
20
20
|
return undefined;
|
|
21
21
|
const { server, username, password } = proxy;
|
|
22
22
|
if (username && server.includes("://")) {
|
|
23
|
-
const
|
|
24
|
-
|
|
23
|
+
const idx = server.indexOf("://");
|
|
24
|
+
const scheme = server.slice(0, idx);
|
|
25
|
+
const host = server.slice(idx + 3);
|
|
26
|
+
// Percent-encode credentials so a password containing ':' / '@' / '/'
|
|
27
|
+
// can't corrupt the URL; the egress probe decodes them again.
|
|
28
|
+
return `${scheme}://${encodeURIComponent(username)}:${encodeURIComponent(password ?? "")}@${host}`;
|
|
25
29
|
}
|
|
26
30
|
return server;
|
|
27
31
|
}
|
|
@@ -40,7 +44,7 @@ export function buildArgs(opts) {
|
|
|
40
44
|
* Launch ChromiumFish and return a standard Playwright `Browser`.
|
|
41
45
|
*
|
|
42
46
|
* import { ChromiumFish } from "chromiumfish";
|
|
43
|
-
* const browser = await ChromiumFish({ personaSeed:
|
|
47
|
+
* const browser = await ChromiumFish({ personaSeed: "alpha-7", headless: true });
|
|
44
48
|
*/
|
|
45
49
|
export async function ChromiumFish(opts = {}) {
|
|
46
50
|
const { personaSeed, headless = true, windowSize, version, download = true, timezone, args, ...launch } = opts;
|
package/dist/version.d.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* SDK downloads by default; override it with `CHROMIUMFISH_VERSION`.
|
|
7
7
|
*/
|
|
8
8
|
/** SDK package version (kept in sync with package.json). */
|
|
9
|
-
export declare const SDK_VERSION = "0.1.
|
|
9
|
+
export declare const SDK_VERSION = "0.1.4";
|
|
10
10
|
/** Default ChromiumFish browser build to fetch. Matches src/chrome/VERSION. */
|
|
11
11
|
export declare const DEFAULT_BROWSER_VERSION = "150.0.7844";
|
|
12
12
|
/** Public repo hosting the release assets. */
|
|
@@ -26,6 +26,14 @@ export declare const DEFAULT_GEOIP_VERSION = "latest";
|
|
|
26
26
|
* pointer). Bump occasionally so the offline floor stays recent.
|
|
27
27
|
*/
|
|
28
28
|
export declare const GEOIP_FALLBACK_VERSION = "2026.06";
|
|
29
|
+
/**
|
|
30
|
+
* Reject version strings that aren't a plain build tag. Versions are
|
|
31
|
+
* interpolated into filesystem cache paths and release URLs, so a crafted
|
|
32
|
+
* value like `../../../etc` would escape the cache dir (path traversal).
|
|
33
|
+
* Real tags are digits, dots, and hyphens (e.g. "150.0.7844", "2026.06",
|
|
34
|
+
* "latest").
|
|
35
|
+
*/
|
|
36
|
+
export declare function assertSafeVersion(version: string): string;
|
|
29
37
|
export declare function browserVersion(): string;
|
|
30
38
|
export declare function releaseBaseUrl(version?: string): string;
|
|
31
39
|
export declare function geoipVersion(): string;
|
package/dist/version.js
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* SDK downloads by default; override it with `CHROMIUMFISH_VERSION`.
|
|
7
7
|
*/
|
|
8
8
|
/** SDK package version (kept in sync with package.json). */
|
|
9
|
-
export const SDK_VERSION = "0.1.
|
|
9
|
+
export const SDK_VERSION = "0.1.4";
|
|
10
10
|
/** Default ChromiumFish browser build to fetch. Matches src/chrome/VERSION. */
|
|
11
11
|
export const DEFAULT_BROWSER_VERSION = "150.0.7844";
|
|
12
12
|
/** Public repo hosting the release assets. */
|
|
@@ -26,14 +26,27 @@ export const DEFAULT_GEOIP_VERSION = "latest";
|
|
|
26
26
|
* pointer). Bump occasionally so the offline floor stays recent.
|
|
27
27
|
*/
|
|
28
28
|
export const GEOIP_FALLBACK_VERSION = "2026.06";
|
|
29
|
+
/**
|
|
30
|
+
* Reject version strings that aren't a plain build tag. Versions are
|
|
31
|
+
* interpolated into filesystem cache paths and release URLs, so a crafted
|
|
32
|
+
* value like `../../../etc` would escape the cache dir (path traversal).
|
|
33
|
+
* Real tags are digits, dots, and hyphens (e.g. "150.0.7844", "2026.06",
|
|
34
|
+
* "latest").
|
|
35
|
+
*/
|
|
36
|
+
export function assertSafeVersion(version) {
|
|
37
|
+
if (!/^[A-Za-z0-9._-]+$/.test(version) || version === "." || version === "..") {
|
|
38
|
+
throw new Error(`invalid version string: ${JSON.stringify(version)}`);
|
|
39
|
+
}
|
|
40
|
+
return version;
|
|
41
|
+
}
|
|
29
42
|
export function browserVersion() {
|
|
30
|
-
return process.env.CHROMIUMFISH_VERSION || DEFAULT_BROWSER_VERSION;
|
|
43
|
+
return assertSafeVersion(process.env.CHROMIUMFISH_VERSION || DEFAULT_BROWSER_VERSION);
|
|
31
44
|
}
|
|
32
45
|
export function releaseBaseUrl(version = browserVersion()) {
|
|
33
46
|
return `https://github.com/${RELEASE_REPO}/releases/download/v${version}`;
|
|
34
47
|
}
|
|
35
48
|
export function geoipVersion() {
|
|
36
|
-
return process.env.CHROMIUMFISH_GEOIP_VERSION || DEFAULT_GEOIP_VERSION;
|
|
49
|
+
return assertSafeVersion(process.env.CHROMIUMFISH_GEOIP_VERSION || DEFAULT_GEOIP_VERSION);
|
|
37
50
|
}
|
|
38
51
|
export function geoipBaseUrl(version = geoipVersion()) {
|
|
39
52
|
return `https://github.com/${RELEASE_REPO}/releases/download/geoip-${version}`;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "chromiumfish",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Stealth Chromium build with a drop-in Playwright harness — fetches and launches the ChromiumFish browser.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -45,7 +45,13 @@
|
|
|
45
45
|
"node": ">=18"
|
|
46
46
|
},
|
|
47
47
|
"peerDependencies": {
|
|
48
|
-
"playwright-core": ">=1.40"
|
|
48
|
+
"playwright-core": ">=1.40",
|
|
49
|
+
"ws": ">=8"
|
|
50
|
+
},
|
|
51
|
+
"peerDependenciesMeta": {
|
|
52
|
+
"ws": {
|
|
53
|
+
"optional": true
|
|
54
|
+
}
|
|
49
55
|
},
|
|
50
56
|
"devDependencies": {
|
|
51
57
|
"@types/node": "^20.0.0",
|