browser-autopilot 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +251 -0
- package/dist/agent/history.d.ts +41 -0
- package/dist/agent/history.js +98 -0
- package/dist/agent/history.js.map +1 -0
- package/dist/agent/loop.d.ts +34 -0
- package/dist/agent/loop.js +278 -0
- package/dist/agent/loop.js.map +1 -0
- package/dist/agent/run.d.ts +4 -0
- package/dist/agent/run.js +67 -0
- package/dist/agent/run.js.map +1 -0
- package/dist/agent/state.d.ts +37 -0
- package/dist/agent/state.js +82 -0
- package/dist/agent/state.js.map +1 -0
- package/dist/agent/tools.d.ts +414 -0
- package/dist/agent/tools.js +399 -0
- package/dist/agent/tools.js.map +1 -0
- package/dist/browser/cdp.d.ts +91 -0
- package/dist/browser/cdp.js +470 -0
- package/dist/browser/cdp.js.map +1 -0
- package/dist/browser/dom.d.ts +30 -0
- package/dist/browser/dom.js +79 -0
- package/dist/browser/dom.js.map +1 -0
- package/dist/browser/snapshot.d.ts +19 -0
- package/dist/browser/snapshot.js +70 -0
- package/dist/browser/snapshot.js.map +1 -0
- package/dist/captcha/solver.d.ts +20 -0
- package/dist/captcha/solver.js +101 -0
- package/dist/captcha/solver.js.map +1 -0
- package/dist/config.d.ts +36 -0
- package/dist/config.js +44 -0
- package/dist/config.js.map +1 -0
- package/dist/index.d.ts +20 -0
- package/dist/index.js +43 -0
- package/dist/index.js.map +1 -0
- package/dist/orchestrator.d.ts +33 -0
- package/dist/orchestrator.js +197 -0
- package/dist/orchestrator.js.map +1 -0
- package/dist/viewer/server.d.ts +14 -0
- package/dist/viewer/server.js +93 -0
- package/dist/viewer/server.js.map +1 -0
- package/dist/x11/agent.d.ts +34 -0
- package/dist/x11/agent.js +103 -0
- package/dist/x11/agent.js.map +1 -0
- package/dist/x11/chrome.d.ts +9 -0
- package/dist/x11/chrome.js +107 -0
- package/dist/x11/chrome.js.map +1 -0
- package/dist/x11/input.d.ts +13 -0
- package/dist/x11/input.js +75 -0
- package/dist/x11/input.js.map +1 -0
- package/dist/x11/login.d.ts +6 -0
- package/dist/x11/login.js +76 -0
- package/dist/x11/login.js.map +1 -0
- package/package.json +79 -0
package/README.md
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
# browser-autopilot
|
|
2
|
+
|
|
3
|
+
A general-purpose autonomous browser agent that can log into any website and perform complex tasks — even on sites with aggressive bot detection (Twitter/X, TikTok, LinkedIn, etc.).
|
|
4
|
+
|
|
5
|
+
Built in TypeScript. No Playwright. No Puppeteer. Raw CDP + xdotool + LLM.
|
|
6
|
+
|
|
7
|
+
## The Problem
|
|
8
|
+
|
|
9
|
+
Modern websites detect browser automation at the login gate. Twitter's `LoginJsInstrumentationSubtask`, Cloudflare's challenge pages, DataDome — they all fingerprint the browser and block if they detect CDP (Chrome DevTools Protocol) domains being enabled. Every existing automation tool (Playwright, Puppeteer, Selenium, browser-use) gets caught because they attach CDP before the page loads.
|
|
10
|
+
|
|
11
|
+
## The Solution
|
|
12
|
+
|
|
13
|
+
Automatic mode selection — tries the fast path first, falls back to stealth when blocked:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
orchestrate({ credentials, task })
|
|
17
|
+
│
|
|
18
|
+
├─ Cached session (cookies)? ──────────────→ CDP Agent (instant)
|
|
19
|
+
│
|
|
20
|
+
├─ Try CDP login (fast, 15 steps)
|
|
21
|
+
│ ├─ Works? ─────────────────────────────→ CDP Agent
|
|
22
|
+
│ └─ Bot detected? ("could not log you in")
|
|
23
|
+
│ │
|
|
24
|
+
│ └─ Relaunch Chrome, X11 login ─────→ CDP Agent
|
|
25
|
+
│ (xdotool keyboard, no CDP,
|
|
26
|
+
│ undetectable by any JS)
|
|
27
|
+
│
|
|
28
|
+
└─ Returns: { result, success, loginMethod: "cached"|"cdp"|"x11" }
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
**CDP mode** (default): Raw Chrome DevTools Protocol. Fast, full DOM indexing with `[1] [2] [3]` element refs, screenshot + accessibility tree per step, 25+ tools.
|
|
32
|
+
|
|
33
|
+
**X11 mode** (fallback): `xdotool` keyboard input through the X Window System — identical to a physical keyboard. LLM sees X11 screenshots via ImageMagick. No CDP client attached. Completely undetectable by any in-page JavaScript. Used only when bot detection blocks CDP.
|
|
34
|
+
|
|
35
|
+
**The user never chooses.** The orchestrator tries CDP, detects failure signals, and switches automatically.
|
|
36
|
+
|
|
37
|
+
## Features
|
|
38
|
+
|
|
39
|
+
| | |
|
|
40
|
+
|---|---|
|
|
41
|
+
| **Stealth login** | X11 keyboard input (xdotool) — undetectable by any in-page JavaScript |
|
|
42
|
+
| **LLM-driven** | Claude sees screenshots and decides actions — adapts to any UI change |
|
|
43
|
+
| **Index-based DOM** | Elements indexed as `[1] button "Next"` — LLM references by number |
|
|
44
|
+
| **25+ browser tools** | navigate, click, input, scroll, search, extract, tabs, dialogs, file I/O, JS eval |
|
|
45
|
+
| **Step-based agent** | Each step: screenshot + DOM → LLM reasoning → tool execution → history |
|
|
46
|
+
| **Error recovery** | Consecutive failure tracking, max_failures threshold, graceful degradation |
|
|
47
|
+
| **Loop detection** | Action hash comparison across sliding window, escalating nudges |
|
|
48
|
+
| **History management** | Truncation (first + last N steps), formatted context for LLM |
|
|
49
|
+
| **Prompt caching** | System prompt + tool definitions cached via Anthropic's `cache_control` |
|
|
50
|
+
| **Captcha solving** | Capsolver + 2Captcha (reCAPTCHA v2/v3, hCaptcha, Turnstile, DataDome) |
|
|
51
|
+
| **Proxy support** | SOCKS5 chaining via gost (handles auth that Chromium can't) |
|
|
52
|
+
| **Session persistence** | Browser profile + cookies survive across runs |
|
|
53
|
+
| **Sensitive data masking** | Passwords/secrets replaced with `<secret:key>` in LLM context |
|
|
54
|
+
| **Dockerized** | Runs headful Chrome in Docker via Xvfb — designed for containers/TEEs |
|
|
55
|
+
|
|
56
|
+
## Quick Start
|
|
57
|
+
|
|
58
|
+
### As a library
|
|
59
|
+
|
|
60
|
+
```typescript
|
|
61
|
+
import { orchestrate } from "browser-autopilot";
|
|
62
|
+
|
|
63
|
+
// One call — handles login (auto CDP/X11) + task
|
|
64
|
+
const { result, success, loginMethod } = await orchestrate({
|
|
65
|
+
credentials: {
|
|
66
|
+
username: "myuser",
|
|
67
|
+
password: "mypass",
|
|
68
|
+
email: "me@example.com",
|
|
69
|
+
totpKey: "ABCDEF123456",
|
|
70
|
+
},
|
|
71
|
+
loginUrl: "https://x.com/login",
|
|
72
|
+
successUrlContains: "/home",
|
|
73
|
+
task: "Go to settings and export my data as CSV",
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
console.log(`${loginMethod} login → ${result}`);
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
### CDP agent only (already logged in or no auth needed)
|
|
80
|
+
|
|
81
|
+
```typescript
|
|
82
|
+
import { CDPBrowser, runAgent } from "browser-autopilot";
|
|
83
|
+
|
|
84
|
+
const browser = new CDPBrowser();
|
|
85
|
+
await browser.connect();
|
|
86
|
+
|
|
87
|
+
const { result } = await runAgent({
|
|
88
|
+
task: "Go to wikipedia.org and find the population of Tokyo",
|
|
89
|
+
browser,
|
|
90
|
+
});
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### As Docker (for servers/TEEs)
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
docker build -f docker/Dockerfile -t browser-autopilot .
|
|
97
|
+
|
|
98
|
+
docker run --rm \
|
|
99
|
+
-e ANTHROPIC_API_KEY=sk-ant-... \
|
|
100
|
+
-e TWITTER_USER=myuser \
|
|
101
|
+
-e TWITTER_PASS=mypass \
|
|
102
|
+
-e TWITTER_EMAIL=me@example.com \
|
|
103
|
+
-e TWITTER_TOTP_KEY=ABCDEF123456 \
|
|
104
|
+
-e PROXY_HOST=1.2.3.4 \
|
|
105
|
+
-e PROXY_PORT=45001 \
|
|
106
|
+
-e PROXY_USER=proxyuser \
|
|
107
|
+
-e PROXY_PASS=proxypass \
|
|
108
|
+
-v mydata:/data \
|
|
109
|
+
browser-autopilot
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Login to any site
|
|
113
|
+
|
|
114
|
+
```typescript
|
|
115
|
+
import { X11Agent } from "browser-autopilot";
|
|
116
|
+
import * as chrome from "browser-autopilot/x11/chrome";
|
|
117
|
+
|
|
118
|
+
chrome.launch("https://tiktok.com/login", "tiktok-profile");
|
|
119
|
+
|
|
120
|
+
const agent = new X11Agent();
|
|
121
|
+
await agent.run({
|
|
122
|
+
systemPrompt: `Login to TikTok. Username: foo, Password: bar.
|
|
123
|
+
TYPE the username, KEY Return, TYPE the password, KEY Return.
|
|
124
|
+
When you see the For You page, say ACTION: DONE.`,
|
|
125
|
+
successCheck: () => chrome.pageUrlContains("/foryou"),
|
|
126
|
+
});
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Custom tools
|
|
130
|
+
|
|
131
|
+
```typescript
|
|
132
|
+
import { z } from "zod";
|
|
133
|
+
import { tool } from "ai";
|
|
134
|
+
import { runAgent, CDPBrowser } from "browser-autopilot";
|
|
135
|
+
|
|
136
|
+
const browser = new CDPBrowser();
|
|
137
|
+
await browser.connect();
|
|
138
|
+
|
|
139
|
+
await runAgent({
|
|
140
|
+
task: "Login and download my invoice",
|
|
141
|
+
browser,
|
|
142
|
+
extraTools: {
|
|
143
|
+
get_2fa: tool({
|
|
144
|
+
description: "Get 2FA code from authenticator",
|
|
145
|
+
parameters: z.object({}),
|
|
146
|
+
execute: async () => "123456",
|
|
147
|
+
}),
|
|
148
|
+
},
|
|
149
|
+
});
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## How It Works (Per Step)
|
|
153
|
+
|
|
154
|
+
Each step of the CDP agent:
|
|
155
|
+
|
|
156
|
+
1. **Capture state** — screenshot (CDP `Page.captureScreenshot`) + accessibility tree (`Accessibility.getFullAXTree`) + URL + title + tabs + scroll position + pending dialogs
|
|
157
|
+
2. **Index DOM** — interactive elements get sequential numbers: `[1] button "Submit"`, `[2] textbox "Email"`. The LLM references elements by these numbers.
|
|
158
|
+
3. **Build context** — format state as text, append action history (truncated), add loop detection nudges if needed
|
|
159
|
+
4. **Send to LLM** — screenshot as vision input + state text + tool definitions (all prompt-cached)
|
|
160
|
+
5. **Parse response** — extract reasoning (evaluation, memory, next goal) + tool calls
|
|
161
|
+
6. **Execute tools** — up to `maxActionsPerStep` tool calls per step (click, type, navigate, etc.)
|
|
162
|
+
7. **Record history** — step added to history with all actions + results + errors
|
|
163
|
+
8. **Check termination** — done tool called? max steps? max failures?
|
|
164
|
+
|
|
165
|
+
## Project Structure
|
|
166
|
+
|
|
167
|
+
```
|
|
168
|
+
browser-autopilot/
|
|
169
|
+
├── src/
|
|
170
|
+
│ ├── config.ts # All env vars
|
|
171
|
+
│ ├── index.ts # CLI entrypoint + library exports
|
|
172
|
+
│ ├── orchestrator.ts # Auto mode selection (cached → CDP → X11)
|
|
173
|
+
│ ├── x11/
|
|
174
|
+
│ │ ├── agent.ts # Generic X11Agent (works for any site)
|
|
175
|
+
│ │ ├── chrome.ts # Chrome launch, focus, status check
|
|
176
|
+
│ │ ├── input.ts # xdotool: type, key, click, screenshot
|
|
177
|
+
│ │ └── login.ts # Twitter login built on X11Agent
|
|
178
|
+
│ ├── browser/
|
|
179
|
+
│ │ ├── cdp.ts # Raw CDP client (nav, click, type, tabs, cookies, dialogs)
|
|
180
|
+
│ │ ├── dom.ts # DOM indexer — [1] [2] [3] element refs
|
|
181
|
+
│ │ └── snapshot.ts # AX tree serialization utilities
|
|
182
|
+
│ ├── agent/
|
|
183
|
+
│ │ ├── loop.ts # Step-based agent loop (the core)
|
|
184
|
+
│ │ ├── tools.ts # 25+ browser tools for the agent
|
|
185
|
+
│ │ ├── state.ts # Per-step browser state capture
|
|
186
|
+
│ │ ├── history.ts # History tracking, loop detection, truncation
|
|
187
|
+
│ │ └── run.ts # CLI entrypoint (Twitter dev portal example)
|
|
188
|
+
│ └── captcha/
|
|
189
|
+
│ └── solver.ts # Capsolver + 2Captcha unified solver
|
|
190
|
+
├── docker/
|
|
191
|
+
│ ├── Dockerfile # Production image (amd64, Google Chrome, gost, xdotool)
|
|
192
|
+
│ └── entrypoint.sh # dbus → Xvfb → openbox → gost → login → agent
|
|
193
|
+
├── tests/
|
|
194
|
+
│ ├── dom.test.ts # DOM indexer tests
|
|
195
|
+
│ ├── history.test.ts # History + loop detection tests
|
|
196
|
+
│ ├── state.test.ts # Browser state formatting tests
|
|
197
|
+
│ └── config.test.ts # Config tests
|
|
198
|
+
├── package.json
|
|
199
|
+
└── tsconfig.json
|
|
200
|
+
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Key Constraints
|
|
204
|
+
|
|
205
|
+
| Constraint | Why |
|
|
206
|
+
|---|---|
|
|
207
|
+
| **Must use headful Chrome** | Headless Chrome sets `HeadlessChrome` user-agent — instantly blocked |
|
|
208
|
+
| **No CDP during login** | Twitter's JS instrumentation detects `Runtime.enable` and friends |
|
|
209
|
+
| **xdotool for login input** | X11 keyboard events are identical to physical keyboard — undetectable |
|
|
210
|
+
| **Timezone must match proxy geo** | `Intl.DateTimeFormat` timezone vs IP geolocation mismatch = flagged |
|
|
211
|
+
| **SOCKS5 auth needs gost** | Chromium doesn't support SOCKS5 username/password natively |
|
|
212
|
+
| **TOTP must be on-demand** | Codes expire in 30s — never bake into prompts, always generate fresh |
|
|
213
|
+
| **Clean profile locks** | Stale `SingletonLock` files from previous runs prevent Chrome startup |
|
|
214
|
+
|
|
215
|
+
## Environment Variables
|
|
216
|
+
|
|
217
|
+
| Variable | Required | Description |
|
|
218
|
+
|---|---|---|
|
|
219
|
+
| `ANTHROPIC_API_KEY` | Yes | Claude API key |
|
|
220
|
+
| `TWITTER_USER` | For Twitter | Username |
|
|
221
|
+
| `TWITTER_PASS` | For Twitter | Password |
|
|
222
|
+
| `TWITTER_EMAIL` | For Twitter | Email for identity verification |
|
|
223
|
+
| `TWITTER_TOTP_KEY` | For Twitter | TOTP secret key |
|
|
224
|
+
| `PROXY_HOST` | No | SOCKS5 proxy host |
|
|
225
|
+
| `PROXY_PORT` | No | SOCKS5 proxy port |
|
|
226
|
+
| `PROXY_USER` | No | SOCKS5 proxy username |
|
|
227
|
+
| `PROXY_PASS` | No | SOCKS5 proxy password |
|
|
228
|
+
| `CAPSOLVER_KEY` | No | Capsolver API key |
|
|
229
|
+
| `TWOCAPTCHA_KEY` | No | 2Captcha API key |
|
|
230
|
+
| `CDP_PORT` | No | Chrome debugging port (default: 9222) |
|
|
231
|
+
| `PROFILE_DIR` | No | Browser profile directory |
|
|
232
|
+
| `DATA_DIR` | No | Data output directory |
|
|
233
|
+
| `AGENT_TASK` | No | Custom task for the CDP agent |
|
|
234
|
+
| `MAX_STEPS` | No | Max agent steps (default: 80) |
|
|
235
|
+
|
|
236
|
+
## Stack
|
|
237
|
+
|
|
238
|
+
| Component | Purpose |
|
|
239
|
+
|---|---|
|
|
240
|
+
| TypeScript + Node.js | Runtime |
|
|
241
|
+
| Anthropic SDK | LLM (Claude claude-sonnet-4-6) with prompt caching |
|
|
242
|
+
| chrome-remote-interface | Raw CDP — no Playwright/Puppeteer |
|
|
243
|
+
| xdotool | X11 keyboard simulation |
|
|
244
|
+
| ImageMagick | X11 screenshot capture |
|
|
245
|
+
| Google Chrome | Real browser (not Chromium) |
|
|
246
|
+
| Xvfb | Virtual X11 display |
|
|
247
|
+
| openbox | Minimal window manager |
|
|
248
|
+
| gost | SOCKS5 proxy chaining |
|
|
249
|
+
| zod | Tool parameter validation |
|
|
250
|
+
| otplib | TOTP generation |
|
|
251
|
+
| vitest | Testing |
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent history — tracks steps, manages conversation, detects loops.
|
|
3
|
+
*/
|
|
4
|
+
export interface StepRecord {
|
|
5
|
+
step: number;
|
|
6
|
+
url: string;
|
|
7
|
+
evaluation: string;
|
|
8
|
+
memory: string;
|
|
9
|
+
nextGoal: string;
|
|
10
|
+
actions: Array<{
|
|
11
|
+
name: string;
|
|
12
|
+
params: Record<string, any>;
|
|
13
|
+
result?: string;
|
|
14
|
+
error?: string;
|
|
15
|
+
}>;
|
|
16
|
+
timestamp: number;
|
|
17
|
+
}
|
|
18
|
+
export declare class AgentHistory {
|
|
19
|
+
private steps;
|
|
20
|
+
private maxHistory;
|
|
21
|
+
private actionHashes;
|
|
22
|
+
constructor(maxHistory?: number);
|
|
23
|
+
add(record: StepRecord): void;
|
|
24
|
+
/**
|
|
25
|
+
* Format history for LLM context.
|
|
26
|
+
* Keeps first step + last N steps, omits middle with summary.
|
|
27
|
+
*/
|
|
28
|
+
formatForLLM(): string;
|
|
29
|
+
/**
|
|
30
|
+
* Detect behavioral loops — same actions on same page state.
|
|
31
|
+
*/
|
|
32
|
+
detectLoop(windowSize?: number): {
|
|
33
|
+
isLoop: boolean;
|
|
34
|
+
loopCount: number;
|
|
35
|
+
};
|
|
36
|
+
get consecutiveFailures(): number;
|
|
37
|
+
get lastStep(): StepRecord | undefined;
|
|
38
|
+
get totalSteps(): number;
|
|
39
|
+
get allUrls(): string[];
|
|
40
|
+
get finalResult(): string | null;
|
|
41
|
+
}
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Agent history — tracks steps, manages conversation, detects loops.
|
|
3
|
+
*/
|
|
4
|
+
export class AgentHistory {
|
|
5
|
+
steps = [];
|
|
6
|
+
maxHistory;
|
|
7
|
+
actionHashes = [];
|
|
8
|
+
constructor(maxHistory = 20) {
|
|
9
|
+
this.maxHistory = maxHistory;
|
|
10
|
+
}
|
|
11
|
+
add(record) {
|
|
12
|
+
this.steps.push(record);
|
|
13
|
+
for (const a of record.actions) {
|
|
14
|
+
this.actionHashes.push(`${a.name}:${JSON.stringify(a.params)}:${record.url}`);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Format history for LLM context.
|
|
19
|
+
* Keeps first step + last N steps, omits middle with summary.
|
|
20
|
+
*/
|
|
21
|
+
formatForLLM() {
|
|
22
|
+
if (this.steps.length === 0)
|
|
23
|
+
return "";
|
|
24
|
+
const lines = ["Previous steps:"];
|
|
25
|
+
const keep = this.steps.length <= this.maxHistory
|
|
26
|
+
? this.steps
|
|
27
|
+
: [this.steps[0], ...this.steps.slice(-this.maxHistory + 1)];
|
|
28
|
+
if (this.steps.length > this.maxHistory) {
|
|
29
|
+
lines.push(`[... ${this.steps.length - this.maxHistory} steps omitted ...]`);
|
|
30
|
+
}
|
|
31
|
+
for (const s of keep) {
|
|
32
|
+
const actionStrs = s.actions.map((a) => {
|
|
33
|
+
const call = `${a.name}(${JSON.stringify(a.params).slice(0, 80)})`;
|
|
34
|
+
if (a.error)
|
|
35
|
+
return `${call} → FAILED: ${a.error.slice(0, 150)}`;
|
|
36
|
+
if (a.result)
|
|
37
|
+
return `${call} → ${a.result.slice(0, 300)}`;
|
|
38
|
+
return call;
|
|
39
|
+
});
|
|
40
|
+
lines.push(` Step ${s.step}: ${s.evaluation}`);
|
|
41
|
+
for (const a of actionStrs)
|
|
42
|
+
lines.push(` ${a}`);
|
|
43
|
+
if (s.memory)
|
|
44
|
+
lines.push(` Memory: ${s.memory}`);
|
|
45
|
+
}
|
|
46
|
+
return lines.join("\n");
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Detect behavioral loops — same actions on same page state.
|
|
50
|
+
*/
|
|
51
|
+
detectLoop(windowSize = 4) {
|
|
52
|
+
if (this.actionHashes.length < windowSize * 2)
|
|
53
|
+
return { isLoop: false, loopCount: 0 };
|
|
54
|
+
const recent = this.actionHashes.slice(-windowSize);
|
|
55
|
+
const prev = this.actionHashes.slice(-windowSize * 2, -windowSize);
|
|
56
|
+
const matches = recent.filter((h, i) => h === prev[i]).length;
|
|
57
|
+
const isLoop = matches >= windowSize - 1;
|
|
58
|
+
let loopCount = 0;
|
|
59
|
+
if (isLoop) {
|
|
60
|
+
for (let i = this.actionHashes.length - windowSize; i >= 0; i -= windowSize) {
|
|
61
|
+
const window = this.actionHashes.slice(i, i + windowSize);
|
|
62
|
+
if (window.every((h, j) => h === recent[j]))
|
|
63
|
+
loopCount++;
|
|
64
|
+
else
|
|
65
|
+
break;
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return { isLoop, loopCount };
|
|
69
|
+
}
|
|
70
|
+
get consecutiveFailures() {
|
|
71
|
+
let count = 0;
|
|
72
|
+
for (let i = this.steps.length - 1; i >= 0; i--) {
|
|
73
|
+
const actions = this.steps[i].actions;
|
|
74
|
+
const hasSuccess = actions.some((a) => a.result && !a.error);
|
|
75
|
+
// Only count as failure if no action succeeded
|
|
76
|
+
if (!hasSuccess)
|
|
77
|
+
count++;
|
|
78
|
+
else
|
|
79
|
+
break;
|
|
80
|
+
}
|
|
81
|
+
return count;
|
|
82
|
+
}
|
|
83
|
+
get lastStep() {
|
|
84
|
+
return this.steps[this.steps.length - 1];
|
|
85
|
+
}
|
|
86
|
+
get totalSteps() { return this.steps.length; }
|
|
87
|
+
get allUrls() {
|
|
88
|
+
return [...new Set(this.steps.map((s) => s.url))];
|
|
89
|
+
}
|
|
90
|
+
get finalResult() {
|
|
91
|
+
const last = this.lastStep;
|
|
92
|
+
if (!last)
|
|
93
|
+
return null;
|
|
94
|
+
const doneAction = last.actions.find((a) => a.name === "done");
|
|
95
|
+
return doneAction?.result ?? null;
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
//# sourceMappingURL=history.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"history.js","sourceRoot":"","sources":["../../src/agent/history.ts"],"names":[],"mappings":"AAAA;;GAEG;AAYH,MAAM,OAAO,YAAY;IAChB,KAAK,GAAiB,EAAE,CAAC;IACzB,UAAU,CAAS;IACnB,YAAY,GAAa,EAAE,CAAC;IAEpC,YAAY,UAAU,GAAG,EAAE;QAC1B,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC9B,CAAC;IAED,GAAG,CAAC,MAAkB;QACrB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACxB,KAAK,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;YAChC,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,MAAM,CAAC,GAAG,EAAE,CAAC,CAAC;QAC/E,CAAC;IACF,CAAC;IAED;;;OAGG;IACH,YAAY;QACX,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,EAAE,CAAC;QAEvC,MAAM,KAAK,GAAa,CAAC,iBAAiB,CAAC,CAAC;QAC5C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,IAAI,IAAI,CAAC,UAAU;YAChD,CAAC,CAAC,IAAI,CAAC,KAAK;YACZ,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC;QAE9D,IAAI,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;YACzC,KAAK,CAAC,IAAI,CAAC,QAAQ,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,UAAU,qBAAqB,CAAC,CAAC;QAC9E,CAAC;QAED,KAAK,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YACtB,MAAM,UAAU,GAAG,CAAC,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE;gBACtC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,IAAI,IAAI,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC;gBACnE,IAAI,CAAC,CAAC,KAAK;oBAAE,OAAO,GAAG,IAAI,cAAc,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;gBACjE,IAAI,CAAC,CAAC,MAAM;oBAAE,OAAO,GAAG,IAAI,MAAM,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;gBAC3D,OAAO,IAAI,CAAC;YACb,CAAC,CAAC,CAAC;YACH,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,KAAK,CAAC,CAAC,UAAU,EAAE,CAAC,CAAC;YAChD,KAAK,MAAM,CAAC,IAAI,UAAU;gBAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;YACnD,IAAI,CAAC,CAAC,MAAM;gBAAE,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,CAAC,MAAM,EAAE,CAAC,CAAC;QACrD,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,UAAU,GAAG,CAAC;QACxB,IAAI,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,UAAU,GAAG,CAAC;YAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC;QAEtF,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,UAAU,CAAC,CAAC;QACpD,MAAM,IAAI,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,UAAU,GAAG,CAAC,EAAE,CAAC,UAAU,CAAC,CAAC;QACnE,MAAM,OAAO,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAE9D,MAAM,MAAM,GAAG,OAAO,IAAI,UAAU,GAAG,CAAC,CAAC;QACzC,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,MAAM,EAAE,CAAC;YACZ,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,YAAY,CAAC,MAAM,GAAG,UAAU,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,UAAU,EAAE,CAAC;gBAC7E,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,CAAC;gBAC1D,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,CAAC,CAAC;oBAAE,SAAS,EAAE,CAAC;;oBACpD,MAAM;YACZ,CAAC;QACF,CAAC;QAED,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;IAC9B,CAAC;IAED,IAAI,mBAAmB;QACtB,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,IAAI,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACjD,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC;YACtC,MAAM,UAAU,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YAC7D,+CAA+C;YAC/C,IAAI,CAAC,UAAU;gBAAE,KAAK,EAAE,CAAC;;gBACpB,MAAM;QACZ,CAAC;QACD,OAAO,KAAK,CAAC;IACd,CAAC;IAED,IAAI,QAAQ;QACX,OAAO,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC1C,CAAC;IAED,IAAI,UAAU,KAAa,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC;IAEtD,IAAI,OAAO;QACV,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;IACnD,CAAC;IAED,IAAI,WAAW;QACd,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,CAAC;QAC3B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QACvB,MAAM,UAAU,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,MAAM,CAAC,CAAC;QAC/D,OAAO,UAAU,EAAE,MAAM,IAAI,IAAI,CAAC;IACnC,CAAC;CACD"}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Step-based LLM agent loop with multi-turn tool use.
|
|
3
|
+
*
|
|
4
|
+
* Each step:
|
|
5
|
+
* 1. Capture browser state (screenshot + indexed DOM + URL + tabs)
|
|
6
|
+
* 2. Send to LLM with history + state + screenshot
|
|
7
|
+
* 3. LLM returns tool calls
|
|
8
|
+
* 4. Execute tools, feed results back to LLM
|
|
9
|
+
* 5. LLM can chain more tool calls (seeing previous results)
|
|
10
|
+
* 6. Repeat tool loop until LLM emits end_turn or hits maxActionsPerStep
|
|
11
|
+
* 7. Record step in history, advance to next step
|
|
12
|
+
*
|
|
13
|
+
* The inner tool loop means the LLM can call get_buyer_profile,
|
|
14
|
+
* see the result, then call input(text="Marcus") — all within one step.
|
|
15
|
+
*/
|
|
16
|
+
import { CDPBrowser } from "../browser/cdp.js";
|
|
17
|
+
import { AgentHistory, type StepRecord } from "./history.js";
|
|
18
|
+
export interface AgentOptions {
|
|
19
|
+
task: string;
|
|
20
|
+
browser: CDPBrowser;
|
|
21
|
+
model?: string;
|
|
22
|
+
maxSteps?: number;
|
|
23
|
+
maxFailures?: number;
|
|
24
|
+
maxActionsPerStep?: number;
|
|
25
|
+
extraTools?: Record<string, any>;
|
|
26
|
+
systemPrompt?: string;
|
|
27
|
+
onStep?: (step: StepRecord) => void;
|
|
28
|
+
sensitiveData?: Record<string, string>;
|
|
29
|
+
}
|
|
30
|
+
export declare function runAgent(opts: AgentOptions): Promise<{
|
|
31
|
+
result: string | null;
|
|
32
|
+
success: boolean;
|
|
33
|
+
history: AgentHistory;
|
|
34
|
+
}>;
|