browser-autopilot 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/README.md +251 -0
  2. package/dist/agent/history.d.ts +41 -0
  3. package/dist/agent/history.js +98 -0
  4. package/dist/agent/history.js.map +1 -0
  5. package/dist/agent/loop.d.ts +34 -0
  6. package/dist/agent/loop.js +278 -0
  7. package/dist/agent/loop.js.map +1 -0
  8. package/dist/agent/run.d.ts +4 -0
  9. package/dist/agent/run.js +67 -0
  10. package/dist/agent/run.js.map +1 -0
  11. package/dist/agent/state.d.ts +37 -0
  12. package/dist/agent/state.js +82 -0
  13. package/dist/agent/state.js.map +1 -0
  14. package/dist/agent/tools.d.ts +414 -0
  15. package/dist/agent/tools.js +399 -0
  16. package/dist/agent/tools.js.map +1 -0
  17. package/dist/browser/cdp.d.ts +91 -0
  18. package/dist/browser/cdp.js +470 -0
  19. package/dist/browser/cdp.js.map +1 -0
  20. package/dist/browser/dom.d.ts +30 -0
  21. package/dist/browser/dom.js +79 -0
  22. package/dist/browser/dom.js.map +1 -0
  23. package/dist/browser/snapshot.d.ts +19 -0
  24. package/dist/browser/snapshot.js +70 -0
  25. package/dist/browser/snapshot.js.map +1 -0
  26. package/dist/captcha/solver.d.ts +20 -0
  27. package/dist/captcha/solver.js +101 -0
  28. package/dist/captcha/solver.js.map +1 -0
  29. package/dist/config.d.ts +36 -0
  30. package/dist/config.js +44 -0
  31. package/dist/config.js.map +1 -0
  32. package/dist/index.d.ts +20 -0
  33. package/dist/index.js +43 -0
  34. package/dist/index.js.map +1 -0
  35. package/dist/orchestrator.d.ts +33 -0
  36. package/dist/orchestrator.js +197 -0
  37. package/dist/orchestrator.js.map +1 -0
  38. package/dist/viewer/server.d.ts +14 -0
  39. package/dist/viewer/server.js +93 -0
  40. package/dist/viewer/server.js.map +1 -0
  41. package/dist/x11/agent.d.ts +34 -0
  42. package/dist/x11/agent.js +103 -0
  43. package/dist/x11/agent.js.map +1 -0
  44. package/dist/x11/chrome.d.ts +9 -0
  45. package/dist/x11/chrome.js +107 -0
  46. package/dist/x11/chrome.js.map +1 -0
  47. package/dist/x11/input.d.ts +13 -0
  48. package/dist/x11/input.js +75 -0
  49. package/dist/x11/input.js.map +1 -0
  50. package/dist/x11/login.d.ts +6 -0
  51. package/dist/x11/login.js +76 -0
  52. package/dist/x11/login.js.map +1 -0
  53. package/package.json +79 -0
package/README.md ADDED
@@ -0,0 +1,251 @@
1
+ # browser-autopilot
2
+
3
+ A general-purpose autonomous browser agent that can log into any website and perform complex tasks — even on sites with aggressive bot detection (Twitter/X, TikTok, LinkedIn, etc.).
4
+
5
+ Built in TypeScript. No Playwright. No Puppeteer. Raw CDP + xdotool + LLM.
6
+
7
+ ## The Problem
8
+
9
+ Modern websites detect browser automation at the login gate. Twitter's `LoginJsInstrumentationSubtask`, Cloudflare's challenge pages, DataDome — they all fingerprint the browser and block if they detect CDP (Chrome DevTools Protocol) domains being enabled. Every existing automation tool (Playwright, Puppeteer, Selenium, browser-use) gets caught because they attach CDP before the page loads.
10
+
11
+ ## The Solution
12
+
13
+ Automatic mode selection — tries the fast path first, falls back to stealth when blocked:
14
+
15
+ ```
16
+ orchestrate({ credentials, task })
17
+
18
+ ├─ Cached session (cookies)? ──────────────→ CDP Agent (instant)
19
+
20
+ ├─ Try CDP login (fast, 15 steps)
21
+ │ ├─ Works? ─────────────────────────────→ CDP Agent
22
+ │ └─ Bot detected? ("could not log you in")
23
+ │ │
24
+ │ └─ Relaunch Chrome, X11 login ─────→ CDP Agent
25
+ │ (xdotool keyboard, no CDP,
26
+ │ undetectable by any JS)
27
+
28
+ └─ Returns: { result, success, loginMethod: "cached"|"cdp"|"x11" }
29
+ ```
30
+
31
+ **CDP mode** (default): Raw Chrome DevTools Protocol. Fast, full DOM indexing with `[1] [2] [3]` element refs, screenshot + accessibility tree per step, 25+ tools.
32
+
33
+ **X11 mode** (fallback): `xdotool` keyboard input through the X Window System — identical to a physical keyboard. LLM sees X11 screenshots via ImageMagick. No CDP client attached. Completely undetectable by any in-page JavaScript. Used only when bot detection blocks CDP.
34
+
35
+ **The user never chooses.** The orchestrator tries CDP, detects failure signals, and switches automatically.
36
+
37
+ ## Features
38
+
39
+ | | |
40
+ |---|---|
41
+ | **Stealth login** | X11 keyboard input (xdotool) — undetectable by any in-page JavaScript |
42
+ | **LLM-driven** | Claude sees screenshots and decides actions — adapts to any UI change |
43
+ | **Index-based DOM** | Elements indexed as `[1] button "Next"` — LLM references by number |
44
+ | **25+ browser tools** | navigate, click, input, scroll, search, extract, tabs, dialogs, file I/O, JS eval |
45
+ | **Step-based agent** | Each step: screenshot + DOM → LLM reasoning → tool execution → history |
46
+ | **Error recovery** | Consecutive failure tracking, max_failures threshold, graceful degradation |
47
+ | **Loop detection** | Action hash comparison across sliding window, escalating nudges |
48
+ | **History management** | Truncation (first + last N steps), formatted context for LLM |
49
+ | **Prompt caching** | System prompt + tool definitions cached via Anthropic's `cache_control` |
50
+ | **Captcha solving** | Capsolver + 2Captcha (reCAPTCHA v2/v3, hCaptcha, Turnstile, DataDome) |
51
+ | **Proxy support** | SOCKS5 chaining via gost (handles auth that Chromium can't) |
52
+ | **Session persistence** | Browser profile + cookies survive across runs |
53
+ | **Sensitive data masking** | Passwords/secrets replaced with `<secret:key>` in LLM context |
54
+ | **Dockerized** | Runs headful Chrome in Docker via Xvfb — designed for containers/TEEs |
55
+
56
+ ## Quick Start
57
+
58
+ ### As a library
59
+
60
+ ```typescript
61
+ import { orchestrate } from "browser-autopilot";
62
+
63
+ // One call — handles login (auto CDP/X11) + task
64
+ const { result, success, loginMethod } = await orchestrate({
65
+ credentials: {
66
+ username: "myuser",
67
+ password: "mypass",
68
+ email: "me@example.com",
69
+ totpKey: "ABCDEF123456",
70
+ },
71
+ loginUrl: "https://x.com/login",
72
+ successUrlContains: "/home",
73
+ task: "Go to settings and export my data as CSV",
74
+ });
75
+
76
+ console.log(`${loginMethod} login → ${result}`);
77
+ ```
78
+
79
+ ### CDP agent only (already logged in or no auth needed)
80
+
81
+ ```typescript
82
+ import { CDPBrowser, runAgent } from "browser-autopilot";
83
+
84
+ const browser = new CDPBrowser();
85
+ await browser.connect();
86
+
87
+ const { result } = await runAgent({
88
+ task: "Go to wikipedia.org and find the population of Tokyo",
89
+ browser,
90
+ });
91
+ ```
92
+
93
+ ### As Docker (for servers/TEEs)
94
+
95
+ ```bash
96
+ docker build -f docker/Dockerfile -t browser-autopilot .
97
+
98
+ docker run --rm \
99
+ -e ANTHROPIC_API_KEY=sk-ant-... \
100
+ -e TWITTER_USER=myuser \
101
+ -e TWITTER_PASS=mypass \
102
+ -e TWITTER_EMAIL=me@example.com \
103
+ -e TWITTER_TOTP_KEY=ABCDEF123456 \
104
+ -e PROXY_HOST=1.2.3.4 \
105
+ -e PROXY_PORT=45001 \
106
+ -e PROXY_USER=proxyuser \
107
+ -e PROXY_PASS=proxypass \
108
+ -v mydata:/data \
109
+ browser-autopilot
110
+ ```
111
+
112
+ ### Login to any site
113
+
114
+ ```typescript
115
+ import { X11Agent } from "browser-autopilot";
116
+ import * as chrome from "browser-autopilot/x11/chrome";
117
+
118
+ chrome.launch("https://tiktok.com/login", "tiktok-profile");
119
+
120
+ const agent = new X11Agent();
121
+ await agent.run({
122
+ systemPrompt: `Login to TikTok. Username: foo, Password: bar.
123
+ TYPE the username, KEY Return, TYPE the password, KEY Return.
124
+ When you see the For You page, say ACTION: DONE.`,
125
+ successCheck: () => chrome.pageUrlContains("/foryou"),
126
+ });
127
+ ```
128
+
129
+ ### Custom tools
130
+
131
+ ```typescript
132
+ import { z } from "zod";
133
+ import { tool } from "ai";
134
+ import { runAgent, CDPBrowser } from "browser-autopilot";
135
+
136
+ const browser = new CDPBrowser();
137
+ await browser.connect();
138
+
139
+ await runAgent({
140
+ task: "Login and download my invoice",
141
+ browser,
142
+ extraTools: {
143
+ get_2fa: tool({
144
+ description: "Get 2FA code from authenticator",
145
+ parameters: z.object({}),
146
+ execute: async () => "123456",
147
+ }),
148
+ },
149
+ });
150
+ ```
151
+
152
+ ## How It Works (Per Step)
153
+
154
+ Each step of the CDP agent:
155
+
156
+ 1. **Capture state** — screenshot (CDP `Page.captureScreenshot`) + accessibility tree (`Accessibility.getFullAXTree`) + URL + title + tabs + scroll position + pending dialogs
157
+ 2. **Index DOM** — interactive elements get sequential numbers: `[1] button "Submit"`, `[2] textbox "Email"`. The LLM references elements by these numbers.
158
+ 3. **Build context** — format state as text, append action history (truncated), add loop detection nudges if needed
159
+ 4. **Send to LLM** — screenshot as vision input + state text + tool definitions (all prompt-cached)
160
+ 5. **Parse response** — extract reasoning (evaluation, memory, next goal) + tool calls
161
+ 6. **Execute tools** — up to `maxActionsPerStep` tool calls per step (click, type, navigate, etc.)
162
+ 7. **Record history** — step added to history with all actions + results + errors
163
+ 8. **Check termination** — done tool called? max steps? max failures?
164
+
165
+ ## Project Structure
166
+
167
+ ```
168
+ browser-autopilot/
169
+ ├── src/
170
+ │ ├── config.ts # All env vars
171
+ │ ├── index.ts # CLI entrypoint + library exports
172
+ │ ├── orchestrator.ts # Auto mode selection (cached → CDP → X11)
173
+ │ ├── x11/
174
+ │ │ ├── agent.ts # Generic X11Agent (works for any site)
175
+ │ │ ├── chrome.ts # Chrome launch, focus, status check
176
+ │ │ ├── input.ts # xdotool: type, key, click, screenshot
177
+ │ │ └── login.ts # Twitter login built on X11Agent
178
+ │ ├── browser/
179
+ │ │ ├── cdp.ts # Raw CDP client (nav, click, type, tabs, cookies, dialogs)
180
+ │ │ ├── dom.ts # DOM indexer — [1] [2] [3] element refs
181
+ │ │ └── snapshot.ts # AX tree serialization utilities
182
+ │ ├── agent/
183
+ │ │ ├── loop.ts # Step-based agent loop (the core)
184
+ │ │ ├── tools.ts # 25+ browser tools for the agent
185
+ │ │ ├── state.ts # Per-step browser state capture
186
+ │ │ ├── history.ts # History tracking, loop detection, truncation
187
+ │ │ └── run.ts # CLI entrypoint (Twitter dev portal example)
188
+ │ └── captcha/
189
+ │ └── solver.ts # Capsolver + 2Captcha unified solver
190
+ ├── docker/
191
+ │ ├── Dockerfile # Production image (amd64, Google Chrome, gost, xdotool)
192
+ │ └── entrypoint.sh # dbus → Xvfb → openbox → gost → login → agent
193
+ ├── tests/
194
+ │ ├── dom.test.ts # DOM indexer tests
195
+ │ ├── history.test.ts # History + loop detection tests
196
+ │ ├── state.test.ts # Browser state formatting tests
197
+ │ └── config.test.ts # Config tests
198
+ ├── package.json
199
+ └── tsconfig.json
200
+
201
+ ```
202
+
203
+ ## Key Constraints
204
+
205
+ | Constraint | Why |
206
+ |---|---|
207
+ | **Must use headful Chrome** | Headless Chrome sets `HeadlessChrome` user-agent — instantly blocked |
208
+ | **No CDP during login** | Twitter's JS instrumentation detects `Runtime.enable` and friends |
209
+ | **xdotool for login input** | X11 keyboard events are identical to physical keyboard — undetectable |
210
+ | **Timezone must match proxy geo** | `Intl.DateTimeFormat` timezone vs IP geolocation mismatch = flagged |
211
+ | **SOCKS5 auth needs gost** | Chromium doesn't support SOCKS5 username/password natively |
212
+ | **TOTP must be on-demand** | Codes expire in 30s — never bake into prompts, always generate fresh |
213
+ | **Clean profile locks** | Stale `SingletonLock` files from previous runs prevent Chrome startup |
214
+
215
+ ## Environment Variables
216
+
217
+ | Variable | Required | Description |
218
+ |---|---|---|
219
+ | `ANTHROPIC_API_KEY` | Yes | Claude API key |
220
+ | `TWITTER_USER` | For Twitter | Username |
221
+ | `TWITTER_PASS` | For Twitter | Password |
222
+ | `TWITTER_EMAIL` | For Twitter | Email for identity verification |
223
+ | `TWITTER_TOTP_KEY` | For Twitter | TOTP secret key |
224
+ | `PROXY_HOST` | No | SOCKS5 proxy host |
225
+ | `PROXY_PORT` | No | SOCKS5 proxy port |
226
+ | `PROXY_USER` | No | SOCKS5 proxy username |
227
+ | `PROXY_PASS` | No | SOCKS5 proxy password |
228
+ | `CAPSOLVER_KEY` | No | Capsolver API key |
229
+ | `TWOCAPTCHA_KEY` | No | 2Captcha API key |
230
+ | `CDP_PORT` | No | Chrome debugging port (default: 9222) |
231
+ | `PROFILE_DIR` | No | Browser profile directory |
232
+ | `DATA_DIR` | No | Data output directory |
233
+ | `AGENT_TASK` | No | Custom task for the CDP agent |
234
+ | `MAX_STEPS` | No | Max agent steps (default: 80) |
235
+
236
+ ## Stack
237
+
238
+ | Component | Purpose |
239
+ |---|---|
240
+ | TypeScript + Node.js | Runtime |
241
+ | Anthropic SDK | LLM (Claude claude-sonnet-4-6) with prompt caching |
242
+ | chrome-remote-interface | Raw CDP — no Playwright/Puppeteer |
243
+ | xdotool | X11 keyboard simulation |
244
+ | ImageMagick | X11 screenshot capture |
245
+ | Google Chrome | Real browser (not Chromium) |
246
+ | Xvfb | Virtual X11 display |
247
+ | openbox | Minimal window manager |
248
+ | gost | SOCKS5 proxy chaining |
249
+ | zod | Tool parameter validation |
250
+ | otplib | TOTP generation |
251
+ | vitest | Testing |
@@ -0,0 +1,41 @@
1
+ /**
2
+ * Agent history — tracks steps, manages conversation, detects loops.
3
+ */
4
+ export interface StepRecord {
5
+ step: number;
6
+ url: string;
7
+ evaluation: string;
8
+ memory: string;
9
+ nextGoal: string;
10
+ actions: Array<{
11
+ name: string;
12
+ params: Record<string, any>;
13
+ result?: string;
14
+ error?: string;
15
+ }>;
16
+ timestamp: number;
17
+ }
18
+ export declare class AgentHistory {
19
+ private steps;
20
+ private maxHistory;
21
+ private actionHashes;
22
+ constructor(maxHistory?: number);
23
+ add(record: StepRecord): void;
24
+ /**
25
+ * Format history for LLM context.
26
+ * Keeps first step + last N steps, omits middle with summary.
27
+ */
28
+ formatForLLM(): string;
29
+ /**
30
+ * Detect behavioral loops — same actions on same page state.
31
+ */
32
+ detectLoop(windowSize?: number): {
33
+ isLoop: boolean;
34
+ loopCount: number;
35
+ };
36
+ get consecutiveFailures(): number;
37
+ get lastStep(): StepRecord | undefined;
38
+ get totalSteps(): number;
39
+ get allUrls(): string[];
40
+ get finalResult(): string | null;
41
+ }
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Agent history — tracks steps, manages conversation, detects loops.
3
+ */
4
+ export class AgentHistory {
5
+ steps = [];
6
+ maxHistory;
7
+ actionHashes = [];
8
+ constructor(maxHistory = 20) {
9
+ this.maxHistory = maxHistory;
10
+ }
11
+ add(record) {
12
+ this.steps.push(record);
13
+ for (const a of record.actions) {
14
+ this.actionHashes.push(`${a.name}:${JSON.stringify(a.params)}:${record.url}`);
15
+ }
16
+ }
17
+ /**
18
+ * Format history for LLM context.
19
+ * Keeps first step + last N steps, omits middle with summary.
20
+ */
21
+ formatForLLM() {
22
+ if (this.steps.length === 0)
23
+ return "";
24
+ const lines = ["Previous steps:"];
25
+ const keep = this.steps.length <= this.maxHistory
26
+ ? this.steps
27
+ : [this.steps[0], ...this.steps.slice(-this.maxHistory + 1)];
28
+ if (this.steps.length > this.maxHistory) {
29
+ lines.push(`[... ${this.steps.length - this.maxHistory} steps omitted ...]`);
30
+ }
31
+ for (const s of keep) {
32
+ const actionStrs = s.actions.map((a) => {
33
+ const call = `${a.name}(${JSON.stringify(a.params).slice(0, 80)})`;
34
+ if (a.error)
35
+ return `${call} → FAILED: ${a.error.slice(0, 150)}`;
36
+ if (a.result)
37
+ return `${call} → ${a.result.slice(0, 300)}`;
38
+ return call;
39
+ });
40
+ lines.push(` Step ${s.step}: ${s.evaluation}`);
41
+ for (const a of actionStrs)
42
+ lines.push(` ${a}`);
43
+ if (s.memory)
44
+ lines.push(` Memory: ${s.memory}`);
45
+ }
46
+ return lines.join("\n");
47
+ }
48
+ /**
49
+ * Detect behavioral loops — same actions on same page state.
50
+ */
51
+ detectLoop(windowSize = 4) {
52
+ if (this.actionHashes.length < windowSize * 2)
53
+ return { isLoop: false, loopCount: 0 };
54
+ const recent = this.actionHashes.slice(-windowSize);
55
+ const prev = this.actionHashes.slice(-windowSize * 2, -windowSize);
56
+ const matches = recent.filter((h, i) => h === prev[i]).length;
57
+ const isLoop = matches >= windowSize - 1;
58
+ let loopCount = 0;
59
+ if (isLoop) {
60
+ for (let i = this.actionHashes.length - windowSize; i >= 0; i -= windowSize) {
61
+ const window = this.actionHashes.slice(i, i + windowSize);
62
+ if (window.every((h, j) => h === recent[j]))
63
+ loopCount++;
64
+ else
65
+ break;
66
+ }
67
+ }
68
+ return { isLoop, loopCount };
69
+ }
70
+ get consecutiveFailures() {
71
+ let count = 0;
72
+ for (let i = this.steps.length - 1; i >= 0; i--) {
73
+ const actions = this.steps[i].actions;
74
+ const hasSuccess = actions.some((a) => a.result && !a.error);
75
+ // Only count as failure if no action succeeded
76
+ if (!hasSuccess)
77
+ count++;
78
+ else
79
+ break;
80
+ }
81
+ return count;
82
+ }
83
+ get lastStep() {
84
+ return this.steps[this.steps.length - 1];
85
+ }
86
+ get totalSteps() { return this.steps.length; }
87
+ get allUrls() {
88
+ return [...new Set(this.steps.map((s) => s.url))];
89
+ }
90
+ get finalResult() {
91
+ const last = this.lastStep;
92
+ if (!last)
93
+ return null;
94
+ const doneAction = last.actions.find((a) => a.name === "done");
95
+ return doneAction?.result ?? null;
96
+ }
97
+ }
98
+ //# sourceMappingURL=history.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"history.js","sourceRoot":"","sources":["../../src/agent/history.ts"],"names":[],"mappings":"AAAA;;GAEG;AAYH,MAAM,OAAO,YAAY;IAChB,KAAK,GAAiB,EAAE,CAAC;IACzB,UAAU,CAAS;IACnB,YAAY,GAAa,EAAE,CAAC;IAEpC,YAAY,UAAU,GAAG,EAAE;QAC1B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC9B,CAAC;IAED,GAAG,CAAC,MAAkB;QACrB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACxB,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAChC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;QAC/E,CAAC;IACF,CAAC;IAED;;;OAGG;IACH,YAAY;QACX,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAEvC,MAAM,KAAK,GAAa,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU;YAChD,CAAC,CAAC,IAAI,CAAC,KAAK;YACZ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC;QAE9D,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;YACzC,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU,qBAAqB,CAAC,CAAC;QAC9E,CAAC;QAED,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YACtB,MAAM,UAAU,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;gBACtC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC;gBACnE,IAAI,CAAC,CAAC,KAAK;oBAAE,OAAO,GAAG,IAAI,cAAc,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;gBACjE,IAAI,CAAC,CAAC,MAAM;oBAAE,OAAO,GAAG,IAAI,MAAM,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;gBAC3D,OAAO,IAAI,CAAC;YACb,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC;YAChD,KAAK,MAAM,CAAC,IAAI,UAAU;gBAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YACnD,IAAI,CAAC,CAAC,MAAM;gBAAE,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,UAAU,GAAG,CAAC;QACxB,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,UAAU,GAAG,CAAC;YAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;QAEtF,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,UAAU,CAAC,CAAC;QACpD,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAE9D,MAAM,MAAM,GAAG,OAAO,IAAI,UAAU,GAAG,CAAC,CAAC;QACzC,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,MAAM,EAAE,CAAC;YACZ,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC;gBAC7E,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;gBAC1D,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC;oBAAE,SAAS,EAAE,CAAC;;oBACpD,MAAM;YACZ,CAAC;QACF,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IAC9B,CAAC;IAED,IAAI,mBAAmB;QACtB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACtC,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YAC7D,+CAA+C;YAC/C,IAAI,CAAC,UAAU;gBAAE,KAAK,EAAE,CAAC;;gBACpB,MAAM;QACZ,CAAC;QACD,OAAO,KAAK,CAAC;IACd,CAAC;IAED,IAAI,QAAQ;QACX,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC1C,CAAC;IAED,IAAI,UAAU,KAAa,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAEtD,IAAI,OAAO;QACV,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,WAAW;QACd,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC3B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QACvB,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;QAC/D,OAAO,UAAU,EAAE,MAAM,IAAI,IAAI,CAAC;IACnC,CAAC;CACD"}
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Step-based LLM agent loop with multi-turn tool use.
3
+ *
4
+ * Each step:
5
+ * 1. Capture browser state (screenshot + indexed DOM + URL + tabs)
6
+ * 2. Send to LLM with history + state + screenshot
7
+ * 3. LLM returns tool calls
8
+ * 4. Execute tools, feed results back to LLM
9
+ * 5. LLM can chain more tool calls (seeing previous results)
10
+ * 6. Repeat tool loop until LLM emits end_turn or hits maxActionsPerStep
11
+ * 7. Record step in history, advance to next step
12
+ *
13
+ * The inner tool loop means the LLM can call get_buyer_profile,
14
+ * see the result, then call input(text="Marcus") — all within one step.
15
+ */
16
+ import { CDPBrowser } from "../browser/cdp.js";
17
+ import { AgentHistory, type StepRecord } from "./history.js";
18
+ export interface AgentOptions {
19
+ task: string;
20
+ browser: CDPBrowser;
21
+ model?: string;
22
+ maxSteps?: number;
23
+ maxFailures?: number;
24
+ maxActionsPerStep?: number;
25
+ extraTools?: Record<string, any>;
26
+ systemPrompt?: string;
27
+ onStep?: (step: StepRecord) => void;
28
+ sensitiveData?: Record<string, string>;
29
+ }
30
+ export declare function runAgent(opts: AgentOptions): Promise<{
31
+ result: string | null;
32
+ success: boolean;
33
+ history: AgentHistory;
34
+ }>;