usertester 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +219 -0
- package/dist/browser/agent.d.ts +33 -0
- package/dist/browser/agent.js +393 -0
- package/dist/browser/agent.js.map +1 -0
- package/dist/cli/cleanup.d.ts +5 -0
- package/dist/cli/cleanup.js +75 -0
- package/dist/cli/cleanup.js.map +1 -0
- package/dist/cli/harness.d.ts +10 -0
- package/dist/cli/harness.js +108 -0
- package/dist/cli/harness.js.map +1 -0
- package/dist/cli/index.d.ts +5 -0
- package/dist/cli/index.js +31 -0
- package/dist/cli/index.js.map +1 -0
- package/dist/cli/kill.d.ts +5 -0
- package/dist/cli/kill.js +46 -0
- package/dist/cli/kill.js.map +1 -0
- package/dist/cli/logs.d.ts +5 -0
- package/dist/cli/logs.js +64 -0
- package/dist/cli/logs.js.map +1 -0
- package/dist/cli/profiles.d.ts +5 -0
- package/dist/cli/profiles.js +67 -0
- package/dist/cli/profiles.js.map +1 -0
- package/dist/cli/send.d.ts +5 -0
- package/dist/cli/send.js +46 -0
- package/dist/cli/send.js.map +1 -0
- package/dist/cli/setup.d.ts +6 -0
- package/dist/cli/setup.js +168 -0
- package/dist/cli/setup.js.map +1 -0
- package/dist/cli/spawn.d.ts +5 -0
- package/dist/cli/spawn.js +52 -0
- package/dist/cli/spawn.js.map +1 -0
- package/dist/cli/status.d.ts +5 -0
- package/dist/cli/status.js +85 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/harness/applier.d.ts +38 -0
- package/dist/harness/applier.js +152 -0
- package/dist/harness/applier.js.map +1 -0
- package/dist/harness/index.d.ts +14 -0
- package/dist/harness/index.js +110 -0
- package/dist/harness/index.js.map +1 -0
- package/dist/harness/patterns.d.ts +14 -0
- package/dist/harness/patterns.js +96 -0
- package/dist/harness/patterns.js.map +1 -0
- package/dist/harness/proposer.d.ts +26 -0
- package/dist/harness/proposer.js +181 -0
- package/dist/harness/proposer.js.map +1 -0
- package/dist/harness/traces.d.ts +29 -0
- package/dist/harness/traces.js +65 -0
- package/dist/harness/traces.js.map +1 -0
- package/dist/harness/validator.d.ts +6 -0
- package/dist/harness/validator.js +112 -0
- package/dist/harness/validator.js.map +1 -0
- package/dist/inbox/agentmail.d.ts +11 -0
- package/dist/inbox/agentmail.js +36 -0
- package/dist/inbox/agentmail.js.map +1 -0
- package/dist/llm/provider.d.ts +15 -0
- package/dist/llm/provider.js +65 -0
- package/dist/llm/provider.js.map +1 -0
- package/dist/orchestrator/agent.d.ts +17 -0
- package/dist/orchestrator/agent.js +195 -0
- package/dist/orchestrator/agent.js.map +1 -0
- package/dist/orchestrator/index.d.ts +7 -0
- package/dist/orchestrator/index.js +92 -0
- package/dist/orchestrator/index.js.map +1 -0
- package/dist/orchestrator/retry.d.ts +27 -0
- package/dist/orchestrator/retry.js +145 -0
- package/dist/orchestrator/retry.js.map +1 -0
- package/dist/orchestrator/session.d.ts +13 -0
- package/dist/orchestrator/session.js +55 -0
- package/dist/orchestrator/session.js.map +1 -0
- package/dist/output/events.d.ts +12 -0
- package/dist/output/events.js +81 -0
- package/dist/output/events.js.map +1 -0
- package/dist/profiles/learner.d.ts +4 -0
- package/dist/profiles/learner.js +168 -0
- package/dist/profiles/learner.js.map +1 -0
- package/dist/tools/captcha.d.ts +19 -0
- package/dist/tools/captcha.js +76 -0
- package/dist/tools/captcha.js.map +1 -0
- package/dist/tools/inbox.d.ts +30 -0
- package/dist/tools/inbox.js +65 -0
- package/dist/tools/inbox.js.map +1 -0
- package/dist/types.d.ts +121 -0
- package/dist/types.js +30 -0
- package/dist/types.js.map +1 -0
- package/package.json +60 -0
- package/tasks.example.json +5 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 usertester contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
1
|
+
# usertester
|
|
2
|
+
|
|
3
|
+
Spawn N AI agents as simulated users to test your web app flows — signup, onboarding, checkout, email verification — in parallel, with real email inboxes and natural language control.
|
|
4
|
+
|
|
5
|
+
```
|
|
6
|
+
usertester spawn --url https://myapp.com --n 3 --message "Sign up as a new user"
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
Each agent gets a unique email inbox, runs a headless browser, and executes your task as a first-time user. You watch a live NDJSON event stream. When an agent finishes, send it a follow-up task — the browser session stays open.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## Install
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npm install -g usertester
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Or run without installing:
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
npx usertester setup
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
You need Node.js 20+ and Google Chrome installed locally.
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
## Quick start
|
|
30
|
+
|
|
31
|
+
**Step 1 — Get two API keys**
|
|
32
|
+
|
|
33
|
+
- Anthropic API key: https://console.anthropic.com/settings/keys
|
|
34
|
+
- AgentMail API key: https://agentmail.to/dashboard
|
|
35
|
+
|
|
36
|
+
**Step 2 — Configure**
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
usertester setup
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
Prompts for both keys, validates them live, writes `.env`.
|
|
43
|
+
|
|
44
|
+
**Step 3 — Spawn agents**
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
usertester spawn --url https://yourapp.com --n 1 --message "Sign up as a new user"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
Output (NDJSON, one event per line):
|
|
51
|
+
|
|
52
|
+
```jsonl
|
|
53
|
+
{"event":"session_start","sessionId":"abc123","url":"https://yourapp.com","n":1}
|
|
54
|
+
{"event":"spawned","agent":"agent-01","inbox":"abc@agentmail.to"}
|
|
55
|
+
{"event":"state","agent":"agent-01","from":"SIGNING_UP","to":"RUNNING"}
|
|
56
|
+
{"event":"ready","agent":"agent-01","message_completed":"Sign up as a new user","summary":"Filled the registration form and clicked Register. Signup succeeded and was redirected to the dashboard.","screenshot":"/Users/you/.usertester/abc123/agent-01/screenshots/001.png"}
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
**Step 4 — Send follow-up tasks**
|
|
60
|
+
|
|
61
|
+
While an agent is in `WAITING` state, send it a new task — the browser session stays open:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
usertester send agent-01 "Go to the pricing page and try to upgrade to the Pro plan"
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
---
|
|
68
|
+
|
|
69
|
+
## Commands
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
usertester setup # First-run API key configuration
|
|
73
|
+
usertester spawn --url URL --n N --message M # Spawn N agents with a shared task
|
|
74
|
+
usertester spawn --url URL --messages-file tasks.json # Per-agent tasks from file
|
|
75
|
+
usertester status # Show all agents + current state
|
|
76
|
+
usertester send <agent-id> <message> # Resume a waiting agent with a new task
|
|
77
|
+
usertester kill <agent-id> # Kill a running or waiting agent
|
|
78
|
+
usertester logs <agent-id> [--follow] # Tail an agent's log
|
|
79
|
+
usertester cleanup # Delete all AgentMail inboxes for current session
|
|
80
|
+
usertester cleanup --all # Clean up all sessions
|
|
81
|
+
usertester profiles list # Show learned profile hints per URL/scenario
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
---
|
|
85
|
+
|
|
86
|
+
## Per-agent task file
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
[
|
|
90
|
+
{ "message": "Sign up as a new user and complete onboarding" },
|
|
91
|
+
{ "message": "Sign up, then try to upgrade to the paid plan" },
|
|
92
|
+
{ "message": "Sign up using Google OAuth if available" }
|
|
93
|
+
]
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
If the file has fewer entries than `--n`, tasks cycle.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## How it works
|
|
101
|
+
|
|
102
|
+
1. **Inbox provisioning** — each agent gets a unique `@agentmail.to` email address (~135ms)
|
|
103
|
+
2. **Browser agent** — headless Chrome via Stagehand v3, controlled by `claude-opus-4-6`
|
|
104
|
+
3. **Multi-step execution** — `agent().execute()` runs an observe→act→check loop until the task completes
|
|
105
|
+
4. **RLM memory** — session history is queried in chunks rather than fed whole into context. Cost stays near-flat as sessions grow.
|
|
106
|
+
5. **Profile learning** — after each session, failures are extracted into `facts.json` per URL/scenario. Next run, the agent starts with those hints.
|
|
107
|
+
6. **NDJSON event stream** — every state transition and result is a JSON line to stdout. Calling agents (Claude Code, etc.) parse this to decide next steps.
|
|
108
|
+
|
|
109
|
+
---
|
|
110
|
+
|
|
111
|
+
## Bypassing bot detection (Cloudflare, CAPTCHA)
|
|
112
|
+
|
|
113
|
+
usertester injects an `x-usertester-session: 1` header on every request. Configure your app to allow this traffic through.
|
|
114
|
+
|
|
115
|
+
### Option A: Cloudflare WAF bypass (recommended, free)
|
|
116
|
+
|
|
117
|
+
**Step 1 — Generate a secret bypass token:**
|
|
118
|
+
```bash
|
|
119
|
+
openssl rand -hex 24 # → e.g. a3f9c2b8d7e14f6a9c2b8d7e14f6a9c2b8d7e14f
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
**Step 2 — Add it to your `.env`:**
|
|
123
|
+
```
|
|
124
|
+
USERTESTER_BYPASS_TOKEN=a3f9c2b8d7e14f6a9c2b8d7e14f6a9c2b8d7e14f
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
**Step 3 — Add a WAF rule in Cloudflare dashboard → Security → WAF → Custom rules:**
|
|
128
|
+
```
|
|
129
|
+
Field: Request Header
|
|
130
|
+
Header: x-usertester-bypass
|
|
131
|
+
Operator: equals
|
|
132
|
+
Value: a3f9c2b8d7e14f6a9c2b8d7e14f6a9c2b8d7e14f ← your secret
|
|
133
|
+
Action: Skip → All remaining custom rules
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
The token is never in source code — only in your `.env` and Cloudflare dashboard. Rotate it anytime by generating a new one and updating both places.
|
|
137
|
+
|
|
138
|
+
### Option B: Supabase Auth — use Cloudflare test keys
|
|
139
|
+
|
|
140
|
+
If your app uses Supabase Auth with Cloudflare Turnstile:
|
|
141
|
+
|
|
142
|
+
1. Supabase dashboard → **Authentication → Security → CAPTCHA protection**
|
|
143
|
+
2. Switch site key to: `1x00000000000000000000AA` (Cloudflare's official test key — always passes)
|
|
144
|
+
3. Switch secret key to: `1x0000000000000000000000000000000AA`
|
|
145
|
+
|
|
146
|
+
Use only in dev/staging — not production.
|
|
147
|
+
|
|
148
|
+
### Option C: Automatic CAPTCHA solving (no app changes, paid)
|
|
149
|
+
|
|
150
|
+
Add `CAPSOLVER_API_KEY` to `.env` and usertester will automatically solve Cloudflare Turnstile via [CapSolver](https://capsolver.com) (~$1.20/1K solves, ~85-90% success rate).
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
CAPSOLVER_API_KEY=CAP-...
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
## Calling from a coding agent
|
|
159
|
+
|
|
160
|
+
usertester is designed to be orchestrated by a coding agent (Claude Code, Codex) as well as used directly. Parse the NDJSON stream:
|
|
161
|
+
|
|
162
|
+
```typescript
|
|
163
|
+
import { spawn } from 'node:child_process'
|
|
164
|
+
import * as readline from 'node:readline'
|
|
165
|
+
|
|
166
|
+
const proc = spawn('usertester', ['spawn', '--url', url, '--n', '3', '--message', task])
|
|
167
|
+
const rl = readline.createInterface({ input: proc.stdout })
|
|
168
|
+
|
|
169
|
+
rl.on('line', (line) => {
|
|
170
|
+
const event = JSON.parse(line)
|
|
171
|
+
if (event.event === 'ready') {
|
|
172
|
+
// agent.summary tells you what happened
|
|
173
|
+
// send next task:
|
|
174
|
+
spawn('usertester', ['send', event.agent, 'Next task here'])
|
|
175
|
+
}
|
|
176
|
+
})
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
---
|
|
180
|
+
|
|
181
|
+
## Limits
|
|
182
|
+
|
|
183
|
+
| | Free plan | Paid plan |
|
|
184
|
+
|---|---|---|
|
|
185
|
+
| AgentMail inboxes | 3 simultaneous | Unlimited |
|
|
186
|
+
| Agents per session | 3 | Up to 20 (configurable) |
|
|
187
|
+
|
|
188
|
+
Always run `usertester cleanup` between sessions to free inbox slots on the free plan.
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## Results
|
|
193
|
+
|
|
194
|
+
After a session, results are saved to `~/.usertester/<session-id>/`:
|
|
195
|
+
|
|
196
|
+
```
|
|
197
|
+
~/.usertester/<session-id>/
|
|
198
|
+
├── state.json # Live session + agent states
|
|
199
|
+
├── agent-01/
|
|
200
|
+
│ ├── agent.log # Full agent activity log
|
|
201
|
+
│ ├── events.ndjson # Structured event history
|
|
202
|
+
│ └── screenshots/ # Screenshots per task
|
|
203
|
+
└── ...
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
---
|
|
207
|
+
|
|
208
|
+
## Requirements
|
|
209
|
+
|
|
210
|
+
- Node.js 20+
|
|
211
|
+
- Google Chrome (for local browser automation)
|
|
212
|
+
- Anthropic API key
|
|
213
|
+
- AgentMail API key
|
|
214
|
+
|
|
215
|
+
---
|
|
216
|
+
|
|
217
|
+
## License
|
|
218
|
+
|
|
219
|
+
MIT
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import type { SessionMemory, ProfileFacts, UsertesterConfig } from '../types.js';
|
|
2
|
+
import type { RetryAttempt } from '../orchestrator/retry.js';
|
|
3
|
+
export interface ResumeResult {
|
|
4
|
+
summary: string;
|
|
5
|
+
screenshotPath: string;
|
|
6
|
+
}
|
|
7
|
+
export declare class BrowserAgent {
|
|
8
|
+
private stagehand;
|
|
9
|
+
private config;
|
|
10
|
+
private memory;
|
|
11
|
+
private agentDir;
|
|
12
|
+
private screenshotIndex;
|
|
13
|
+
private rlmRecentActions;
|
|
14
|
+
private rlmMaxFailedActions;
|
|
15
|
+
private retryHistory;
|
|
16
|
+
constructor(opts: {
|
|
17
|
+
config: Partial<UsertesterConfig>;
|
|
18
|
+
agentDir: string;
|
|
19
|
+
rlmRecentActions?: number;
|
|
20
|
+
rlmMaxFailedActions?: number;
|
|
21
|
+
});
|
|
22
|
+
start(url: string, inbox: string, initialTask: string, profileHints?: ProfileFacts): Promise<void>;
|
|
23
|
+
resume(task: string): Promise<ResumeResult>;
|
|
24
|
+
exportMemory(): SessionMemory;
|
|
25
|
+
exportRetryHistory(): RetryAttempt[];
|
|
26
|
+
destroy(): Promise<void>;
|
|
27
|
+
private buildRLMContext;
|
|
28
|
+
private llmBatch;
|
|
29
|
+
private executeTask;
|
|
30
|
+
private recordAction;
|
|
31
|
+
private takeScreenshot;
|
|
32
|
+
private summarizeLastTask;
|
|
33
|
+
}
|
|
@@ -0,0 +1,393 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* BrowserAgent: Stagehand v3 wrapper with RLM memory loop
|
|
3
|
+
*
|
|
4
|
+
* Implements the BrowserAgent interface from the design doc:
|
|
5
|
+
* start(url, inbox, initialTask, profileHints?) → void
|
|
6
|
+
* resume(task) → ResumeResult
|
|
7
|
+
* exportMemory() → SessionMemory
|
|
8
|
+
* destroy() → void
|
|
9
|
+
*/
|
|
10
|
+
import { Stagehand } from '@browserbasehq/stagehand';
|
|
11
|
+
import path from 'node:path';
|
|
12
|
+
import { appendAgentEvent, appendAgentLog } from '../output/events.js';
|
|
13
|
+
import { cheapCall, cheapBatch } from '../llm/provider.js';
|
|
14
|
+
import { classifyFailure, selectToolsForRecovery, buildRetryInstruction } from '../orchestrator/retry.js';
|
|
15
|
+
const ARCHIVE_THRESHOLD = 50;
|
|
16
|
+
const ARCHIVE_BATCH = 10;
|
|
17
|
+
export class BrowserAgent {
|
|
18
|
+
stagehand = null;
|
|
19
|
+
config;
|
|
20
|
+
memory;
|
|
21
|
+
agentDir;
|
|
22
|
+
screenshotIndex = 0;
|
|
23
|
+
rlmRecentActions;
|
|
24
|
+
rlmMaxFailedActions;
|
|
25
|
+
retryHistory = [];
|
|
26
|
+
constructor(opts) {
|
|
27
|
+
this.config = opts.config;
|
|
28
|
+
this.agentDir = opts.agentDir;
|
|
29
|
+
this.rlmRecentActions = opts.rlmRecentActions ?? 10;
|
|
30
|
+
this.rlmMaxFailedActions = opts.rlmMaxFailedActions ?? 5;
|
|
31
|
+
this.memory = {
|
|
32
|
+
taskDescription: '',
|
|
33
|
+
startUrl: '',
|
|
34
|
+
actions: [],
|
|
35
|
+
archivedActionCount: 0,
|
|
36
|
+
recoveryTips: [],
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
async start(url, inbox, initialTask, profileHints) {
|
|
40
|
+
this.memory.taskDescription = initialTask;
|
|
41
|
+
this.memory.startUrl = url;
|
|
42
|
+
// Use Stagehand's native model config (provider/model format + apiKey)
|
|
43
|
+
// This is the format confirmed working from spike tests
|
|
44
|
+
const cuaModelString = this.config.cua_model ?? 'anthropic/claude-opus-4-6';
|
|
45
|
+
// Strip 'openrouter/' prefix — Stagehand uses provider/model directly
|
|
46
|
+
const stagehandModelName = cuaModelString.startsWith('openrouter/')
|
|
47
|
+
? cuaModelString.slice('openrouter/'.length)
|
|
48
|
+
: cuaModelString;
|
|
49
|
+
const apiKey = this.config.anthropic_api_key
|
|
50
|
+
?? this.config.openrouter_api_key
|
|
51
|
+
?? this.config.openai_api_key
|
|
52
|
+
?? process.env.ANTHROPIC_API_KEY
|
|
53
|
+
?? process.env.OPENROUTER_API_KEY
|
|
54
|
+
?? '';
|
|
55
|
+
const useBrowserbase = !!(this.config.browserbase_api_key && this.config.browserbase_project_id);
|
|
56
|
+
if (useBrowserbase) {
|
|
57
|
+
appendAgentLog(this.agentDir, `Using Browserbase (project: ${this.config.browserbase_project_id})`);
|
|
58
|
+
this.stagehand = new Stagehand({
|
|
59
|
+
env: 'BROWSERBASE',
|
|
60
|
+
apiKey: this.config.browserbase_api_key,
|
|
61
|
+
projectId: this.config.browserbase_project_id,
|
|
62
|
+
verbose: 0,
|
|
63
|
+
model: { modelName: stagehandModelName, apiKey },
|
|
64
|
+
logger: () => { },
|
|
65
|
+
experimental: true,
|
|
66
|
+
disableAPI: true,
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
appendAgentLog(this.agentDir, `Using local Chrome (headless)`);
|
|
71
|
+
this.stagehand = new Stagehand({
|
|
72
|
+
env: 'LOCAL',
|
|
73
|
+
verbose: 0,
|
|
74
|
+
model: { modelName: stagehandModelName, apiKey },
|
|
75
|
+
localBrowserLaunchOptions: { headless: true },
|
|
76
|
+
logger: () => { },
|
|
77
|
+
experimental: true,
|
|
78
|
+
disableAPI: true,
|
|
79
|
+
});
|
|
80
|
+
}
|
|
81
|
+
await this.stagehand.init();
|
|
82
|
+
const page = this.stagehand.context.pages()[0];
|
|
83
|
+
// Inject customer-specific bypass token if configured.
|
|
84
|
+
// Customers add a WAF rule: (http.request.headers["x-usertester-bypass"] eq "<their-token>") → Skip
|
|
85
|
+
// The token is secret — read from USERTESTER_BYPASS_TOKEN env, never hardcoded.
|
|
86
|
+
const bypassToken = this.config.bypass_token;
|
|
87
|
+
if (bypassToken) {
|
|
88
|
+
await page.setExtraHTTPHeaders({ 'x-usertester-bypass': bypassToken });
|
|
89
|
+
}
|
|
90
|
+
appendAgentLog(this.agentDir, `Browser started. Navigating to ${url}`);
|
|
91
|
+
appendAgentEvent(this.agentDir, { event: 'browser_started', url });
|
|
92
|
+
await page.goto(url, { waitUntil: 'load' });
|
|
93
|
+
// Build initial system context including profile hints.
|
|
94
|
+
// If a high-confidence recovery tip exists (proven approach), use it exclusively —
|
|
95
|
+
// contradictory lower-confidence hints are excluded to avoid confusing the agent.
|
|
96
|
+
const provenApproach = profileHints?.harnessHints.find(h => h.confidence >= 0.95 && h.observation.startsWith('PROVEN APPROACH'));
|
|
97
|
+
const hintLines = provenApproach
|
|
98
|
+
? `- ${provenApproach.observation}`
|
|
99
|
+
: profileHints?.harnessHints
|
|
100
|
+
.filter(h => h.confidence > 0.5)
|
|
101
|
+
.map(h => `- ${h.observation}`)
|
|
102
|
+
.join('\n');
|
|
103
|
+
const systemContext = [
|
|
104
|
+
`You are testing this web app as a first-time user.`,
|
|
105
|
+
`Your email address is: ${inbox}`,
|
|
106
|
+
`Your task: ${initialTask}`,
|
|
107
|
+
`Navigate the app, complete the task, and note anything confusing, broken, or unclear.`,
|
|
108
|
+
`Do not skip steps. Use the email ${inbox} when asked for an email.`,
|
|
109
|
+
`If verification fails and you need to resend a code, wait for any cooldown timer shown before clicking Resend. Then call readInboxEmail again to get the new code.`,
|
|
110
|
+
hintLines ? `\nKnown context from previous runs:\n${hintLines}` : '',
|
|
111
|
+
]
|
|
112
|
+
.filter(Boolean)
|
|
113
|
+
.join('\n');
|
|
114
|
+
appendAgentLog(this.agentDir, `Starting task: ${initialTask}`);
|
|
115
|
+
// Pre-inject tools from recovery tip on attempt 1.
|
|
116
|
+
// The profile's PROVEN APPROACH hint records which tools worked — inject them immediately
|
|
117
|
+
// so the agent doesn't waste attempt 1 discovering it needs them.
|
|
118
|
+
const attempt1Tools = {};
|
|
119
|
+
const { readInboxEmail } = await import('../tools/inbox.js');
|
|
120
|
+
const provenHint = profileHints?.harnessHints.find(h => h.confidence >= 0.95 && h.observation.startsWith('PROVEN APPROACH'));
|
|
121
|
+
if (provenHint?.observation.includes('readInboxEmail')) {
|
|
122
|
+
attempt1Tools['readInboxEmail'] = readInboxEmail;
|
|
123
|
+
appendAgentLog(this.agentDir, `Pre-injecting readInboxEmail from profile recovery tip`);
|
|
124
|
+
}
|
|
125
|
+
this.retryHistory = [];
|
|
126
|
+
let result = await this.executeTask(systemContext, initialTask, attempt1Tools);
|
|
127
|
+
if (!result.completed) {
|
|
128
|
+
for (let attempt = 2; attempt <= 5; attempt++) {
|
|
129
|
+
const classification = await classifyFailure(result.message, this.config);
|
|
130
|
+
appendAgentLog(this.agentDir, `Retry ${attempt}: classified as ${classification.type} — ${classification.recoveryHint}`);
|
|
131
|
+
this.retryHistory.push({
|
|
132
|
+
attempt: attempt - 1,
|
|
133
|
+
instruction: initialTask,
|
|
134
|
+
toolsInjected: [],
|
|
135
|
+
result: 'failed',
|
|
136
|
+
failureType: classification.type,
|
|
137
|
+
agentMessage: result.message,
|
|
138
|
+
finalUrl: result.finalUrl,
|
|
139
|
+
});
|
|
140
|
+
if (classification.type === 'COMPLETE')
|
|
141
|
+
break;
|
|
142
|
+
if (classification.type === 'ESCALATE')
|
|
143
|
+
break;
|
|
144
|
+
// RATE_LIMITED: wait the app's specified cooldown then retry
|
|
145
|
+
if (classification.type === 'RATE_LIMITED') {
|
|
146
|
+
const secondsMatch = result.message.match(/only request this after (\d+)|wait (\d+) second/i);
|
|
147
|
+
const waitSeconds = secondsMatch
|
|
148
|
+
? parseInt(secondsMatch[1] ?? secondsMatch[2], 10)
|
|
149
|
+
: 90; // default to 90s if we can't parse
|
|
150
|
+
appendAgentLog(this.agentDir, ` Rate limited — waiting ${waitSeconds}s before retry`);
|
|
151
|
+
await new Promise(r => setTimeout(r, waitSeconds * 1000));
|
|
152
|
+
}
|
|
153
|
+
// ENVIRONMENT_BLOCK: only break if no solver tool available for it
|
|
154
|
+
if (classification.type === 'ENVIRONMENT_BLOCK') {
|
|
155
|
+
const recoveryTools = selectToolsForRecovery(classification);
|
|
156
|
+
if (Object.keys(recoveryTools).length === 0)
|
|
157
|
+
break; // no tool can help
|
|
158
|
+
}
|
|
159
|
+
if (classification.type === 'TRANSIENT' && attempt > 3)
|
|
160
|
+
break;
|
|
161
|
+
const tools = selectToolsForRecovery(classification);
|
|
162
|
+
const retryInstruction = buildRetryInstruction(initialTask, this.retryHistory, this.memory, url);
|
|
163
|
+
appendAgentLog(this.agentDir, ` injecting tools: ${Object.keys(tools).join(', ') || 'none'}`);
|
|
164
|
+
result = await this.executeTask(systemContext, retryInstruction, tools);
|
|
165
|
+
if (result.completed) {
|
|
166
|
+
appendAgentLog(this.agentDir, `✓ Retry ${attempt} succeeded`);
|
|
167
|
+
const tip = {
|
|
168
|
+
url: this.memory.startUrl,
|
|
169
|
+
scenario: 'signup',
|
|
170
|
+
failedApproaches: this.retryHistory
|
|
171
|
+
.filter(a => a.result === 'failed')
|
|
172
|
+
.map(a => a.agentMessage.slice(0, 150)),
|
|
173
|
+
successApproach: result.message.slice(0, 400),
|
|
174
|
+
toolsUsed: Object.keys(tools),
|
|
175
|
+
finalUrl: result.finalUrl,
|
|
176
|
+
confidence: 0.95,
|
|
177
|
+
ts: Date.now(),
|
|
178
|
+
};
|
|
179
|
+
this.memory.recoveryTips.push(tip);
|
|
180
|
+
appendAgentEvent(this.agentDir, { event: 'recovery_tip_written', tip });
|
|
181
|
+
appendAgentLog(this.agentDir, `Recovery tip stored: ${tip.successApproach.slice(0, 80)}`);
|
|
182
|
+
break;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
async resume(task) {
|
|
188
|
+
if (!this.stagehand)
|
|
189
|
+
throw new Error('BrowserAgent not started');
|
|
190
|
+
const context = await this.buildRLMContext(task);
|
|
191
|
+
appendAgentLog(this.agentDir, `Resuming with task: ${task}`);
|
|
192
|
+
appendAgentLog(this.agentDir, `RLM context: ${context.slice(0, 200)}...`);
|
|
193
|
+
await this.executeTask(context, task, {});
|
|
194
|
+
const screenshotPath = await this.takeScreenshot();
|
|
195
|
+
const summary = await this.summarizeLastTask(task);
|
|
196
|
+
return { summary, screenshotPath };
|
|
197
|
+
}
|
|
198
|
+
exportMemory() {
|
|
199
|
+
return { ...this.memory, actions: [...this.memory.actions] };
|
|
200
|
+
}
|
|
201
|
+
exportRetryHistory() {
|
|
202
|
+
return [...this.retryHistory];
|
|
203
|
+
}
|
|
204
|
+
async destroy() {
|
|
205
|
+
if (this.stagehand) {
|
|
206
|
+
await this.stagehand.close();
|
|
207
|
+
this.stagehand = null;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// --- Private: RLM context builder ---
|
|
211
|
+
async buildRLMContext(nextTask) {
|
|
212
|
+
const page = this.stagehand.context.pages()[0];
|
|
213
|
+
const currentUrl = page.url();
|
|
214
|
+
const recentWindow = this.memory.actions.slice(-this.rlmRecentActions);
|
|
215
|
+
const failedWindow = this.memory.actions
|
|
216
|
+
.filter(a => a.result === 'failed')
|
|
217
|
+
.slice(-this.rlmMaxFailedActions);
|
|
218
|
+
const [recentContext, failureContext] = await this.llmBatch([
|
|
219
|
+
{
|
|
220
|
+
data: recentWindow,
|
|
221
|
+
prompt: 'What is the current browser state and what has the agent done most recently?',
|
|
222
|
+
},
|
|
223
|
+
{
|
|
224
|
+
data: failedWindow,
|
|
225
|
+
prompt: 'What has failed before that the agent should avoid repeating?',
|
|
226
|
+
},
|
|
227
|
+
]);
|
|
228
|
+
return [
|
|
229
|
+
`You are a browser automation agent testing a web app.`,
|
|
230
|
+
`Current URL: ${currentUrl}`,
|
|
231
|
+
`Next task: ${nextTask}`,
|
|
232
|
+
`Total actions taken so far: ${this.memory.actions.length + this.memory.archivedActionCount}`,
|
|
233
|
+
`Recent state: ${recentContext}`,
|
|
234
|
+
failureContext !== '(no data)' ? `Things to avoid: ${failureContext}` : null,
|
|
235
|
+
]
|
|
236
|
+
.filter(Boolean)
|
|
237
|
+
.join('\n');
|
|
238
|
+
}
|
|
239
|
+
async llmBatch(queries) {
|
|
240
|
+
const prompts = queries.map(({ data, prompt }) => {
|
|
241
|
+
if (data.length === 0)
|
|
242
|
+
return null;
|
|
243
|
+
const dataStr = data
|
|
244
|
+
.map(a => `${a.action} → ${a.result}${a.observation ? ` | ${a.observation}` : ''}`)
|
|
245
|
+
.join('\n');
|
|
246
|
+
return `${prompt}\n\nActions:\n${dataStr}\n\nAnswer in 1-2 sentences.`;
|
|
247
|
+
});
|
|
248
|
+
return Promise.all(prompts.map(async (p) => {
|
|
249
|
+
if (p === null)
|
|
250
|
+
return '(no data)';
|
|
251
|
+
const text = await cheapBatch([p], this.config, 150);
|
|
252
|
+
return text[0] || '(no data)';
|
|
253
|
+
}));
|
|
254
|
+
}
|
|
255
|
+
// --- Private: task execution ---
|
|
256
|
+
async executeTask(systemContext, task, tools = {}) {
|
|
257
|
+
if (!this.stagehand)
|
|
258
|
+
throw new Error('Stagehand not initialized');
|
|
259
|
+
const page = this.stagehand.context.pages()[0];
|
|
260
|
+
const startUrl = page.url();
|
|
261
|
+
const fullInstruction = systemContext ? `${systemContext}\n\nTask: ${task}` : task;
|
|
262
|
+
try {
|
|
263
|
+
// Tools are passed to stagehand.agent() config, not to execute()
|
|
264
|
+
const agentConfig = {};
|
|
265
|
+
if (Object.keys(tools).length > 0) {
|
|
266
|
+
agentConfig.tools = tools;
|
|
267
|
+
}
|
|
268
|
+
const agent = this.stagehand.agent(agentConfig);
|
|
269
|
+
const result = await agent.execute({ instruction: fullInstruction, maxSteps: 15 });
|
|
270
|
+
await page.waitForLoadState('load').catch(() => { });
|
|
271
|
+
const newUrl = page.url();
|
|
272
|
+
appendAgentLog(this.agentDir, `agent.execute() completed: ${result.completed ? 'done' : 'incomplete'}`);
|
|
273
|
+
appendAgentLog(this.agentDir, ` steps: ${result.actions?.length ?? 0}, tools injected: ${Object.keys(tools).join(', ') || 'none'}`);
|
|
274
|
+
appendAgentLog(this.agentDir, ` message: ${result.message}`);
|
|
275
|
+
appendAgentLog(this.agentDir, ` final url: ${newUrl}`);
|
|
276
|
+
// Record each step as an ActionRecord for RLM memory
|
|
277
|
+
for (const action of (result.actions ?? [])) {
|
|
278
|
+
this.recordAction({
|
|
279
|
+
ts: Date.now(),
|
|
280
|
+
action: action.type ?? 'unknown',
|
|
281
|
+
result: 'success',
|
|
282
|
+
observation: action.reasoning ?? undefined,
|
|
283
|
+
url: startUrl,
|
|
284
|
+
});
|
|
285
|
+
}
|
|
286
|
+
// Record overall outcome with agent's message as observation
|
|
287
|
+
this.recordAction({
|
|
288
|
+
ts: Date.now(),
|
|
289
|
+
action: task.slice(0, 100),
|
|
290
|
+
result: result.completed ? 'success' : 'failed',
|
|
291
|
+
observation: result.message ?? (newUrl !== startUrl ? `Navigated to ${newUrl}` : `Stayed on ${startUrl}`),
|
|
292
|
+
url: startUrl,
|
|
293
|
+
});
|
|
294
|
+
return { completed: result.completed, message: result.message ?? '', finalUrl: newUrl };
|
|
295
|
+
}
|
|
296
|
+
catch (err) {
|
|
297
|
+
appendAgentLog(this.agentDir, `agent.execute() failed: ${err}`);
|
|
298
|
+
// Fallback: individual act() calls per observed action
|
|
299
|
+
let allActions = [];
|
|
300
|
+
try {
|
|
301
|
+
allActions = await this.stagehand.observe();
|
|
302
|
+
}
|
|
303
|
+
catch { }
|
|
304
|
+
if (allActions.length > 0) {
|
|
305
|
+
appendAgentLog(this.agentDir, `Falling back to ${allActions.length} individual act() calls`);
|
|
306
|
+
for (const action of allActions.slice(0, 5)) {
|
|
307
|
+
try {
|
|
308
|
+
await this.stagehand.act(action.description);
|
|
309
|
+
await page.waitForLoadState('load').catch(() => { });
|
|
310
|
+
this.recordAction({
|
|
311
|
+
ts: Date.now(),
|
|
312
|
+
action: action.description,
|
|
313
|
+
selector: action.selector,
|
|
314
|
+
result: 'success',
|
|
315
|
+
url: startUrl,
|
|
316
|
+
});
|
|
317
|
+
appendAgentLog(this.agentDir, ` ok ${action.description}`);
|
|
318
|
+
}
|
|
319
|
+
catch (err2) {
|
|
320
|
+
this.recordAction({
|
|
321
|
+
ts: Date.now(),
|
|
322
|
+
action: action.description,
|
|
323
|
+
selector: action.selector,
|
|
324
|
+
result: 'failed',
|
|
325
|
+
observation: String(err2),
|
|
326
|
+
url: startUrl,
|
|
327
|
+
});
|
|
328
|
+
appendAgentLog(this.agentDir, ` fail ${action.description}: ${err2}`);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
else {
|
|
333
|
+
this.recordAction({
|
|
334
|
+
ts: Date.now(),
|
|
335
|
+
action: task.slice(0, 100),
|
|
336
|
+
result: 'failed',
|
|
337
|
+
observation: String(err),
|
|
338
|
+
url: startUrl,
|
|
339
|
+
});
|
|
340
|
+
}
|
|
341
|
+
return { completed: false, message: String(err), finalUrl: page.url() };
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
recordAction(action) {
|
|
345
|
+
this.memory.actions.push(action);
|
|
346
|
+
appendAgentEvent(this.agentDir, { event: 'action', ...action });
|
|
347
|
+
// Archive oldest actions when exceeding threshold (RLM memory management)
|
|
348
|
+
if (this.memory.actions.length > ARCHIVE_THRESHOLD) {
|
|
349
|
+
const archived = this.memory.actions.splice(0, ARCHIVE_BATCH);
|
|
350
|
+
this.memory.archivedActionCount += archived.length;
|
|
351
|
+
appendAgentEvent(this.agentDir, {
|
|
352
|
+
event: 'actions_archived',
|
|
353
|
+
count: archived.length,
|
|
354
|
+
total_archived: this.memory.archivedActionCount,
|
|
355
|
+
actions: archived,
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
async takeScreenshot() {
|
|
360
|
+
if (!this.stagehand)
|
|
361
|
+
return '';
|
|
362
|
+
this.screenshotIndex++;
|
|
363
|
+
const screenshotDir = path.join(this.agentDir, 'screenshots');
|
|
364
|
+
const filename = `${String(this.screenshotIndex).padStart(3, '0')}.png`;
|
|
365
|
+
const screenshotPath = path.join(screenshotDir, filename);
|
|
366
|
+
try {
|
|
367
|
+
const page = this.stagehand.context.pages()[0];
|
|
368
|
+
await page.screenshot({ path: screenshotPath });
|
|
369
|
+
appendAgentLog(this.agentDir, `Screenshot saved: ${filename}`);
|
|
370
|
+
}
|
|
371
|
+
catch (err) {
|
|
372
|
+
appendAgentLog(this.agentDir, `Screenshot failed: ${err}`);
|
|
373
|
+
}
|
|
374
|
+
return screenshotPath;
|
|
375
|
+
}
|
|
376
|
+
async summarizeLastTask(task) {
|
|
377
|
+
const recentActions = this.memory.actions.slice(-10);
|
|
378
|
+
if (recentActions.length === 0)
|
|
379
|
+
return 'No actions recorded.';
|
|
380
|
+
const actionsStr = recentActions
|
|
381
|
+
.map(a => `${a.action} → ${a.result}${a.observation ? ` (${a.observation})` : ''}`)
|
|
382
|
+
.join('\n');
|
|
383
|
+
const prompt = `Task: "${task}"\n\nActions taken:\n${actionsStr}\n\nSummarize in 1-2 sentences: what happened, did the task complete, and anything confusing or broken?`;
|
|
384
|
+
try {
|
|
385
|
+
const text = await cheapCall(prompt, this.config, 200);
|
|
386
|
+
return text || 'Task execution complete.';
|
|
387
|
+
}
|
|
388
|
+
catch {
|
|
389
|
+
return `Completed ${recentActions.filter(a => a.result === 'success').length}/${recentActions.length} actions.`;
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
//# sourceMappingURL=agent.js.map
|