@bonyadnouri/autoend 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +101 -0
- package/dist/auth/session.d.ts +16 -0
- package/dist/auth/session.js +14 -0
- package/dist/auth/session.js.map +1 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +118 -0
- package/dist/cli.js.map +1 -0
- package/dist/config.d.ts +14 -0
- package/dist/config.js +65 -0
- package/dist/config.js.map +1 -0
- package/dist/explore/explorer.d.ts +42 -0
- package/dist/explore/explorer.js +232 -0
- package/dist/explore/explorer.js.map +1 -0
- package/dist/explore/hands.d.ts +13 -0
- package/dist/explore/hands.js +35 -0
- package/dist/explore/hands.js.map +1 -0
- package/dist/map/flow-map.d.ts +26 -0
- package/dist/map/flow-map.js +34 -0
- package/dist/map/flow-map.js.map +1 -0
- package/dist/replay/replay.d.ts +36 -0
- package/dist/replay/replay.js +110 -0
- package/dist/replay/replay.js.map +1 -0
- package/dist/report/artifact.d.ts +10 -0
- package/dist/report/artifact.js +20 -0
- package/dist/report/artifact.js.map +1 -0
- package/dist/report/types.d.ts +39 -0
- package/dist/report/types.js +2 -0
- package/dist/report/types.js.map +1 -0
- package/dist/run/effort.d.ts +21 -0
- package/dist/run/effort.js +23 -0
- package/dist/run/effort.js.map +1 -0
- package/dist/run/run.d.ts +17 -0
- package/dist/run/run.js +39 -0
- package/dist/run/run.js.map +1 -0
- package/dist/setup/wizard.d.ts +2 -0
- package/dist/setup/wizard.js +81 -0
- package/dist/setup/wizard.js.map +1 -0
- package/dist/viewer/html.d.ts +7 -0
- package/dist/viewer/html.js +240 -0
- package/dist/viewer/html.js.map +1 -0
- package/dist/viewer/server.d.ts +13 -0
- package/dist/viewer/server.js +54 -0
- package/dist/viewer/server.js.map +1 -0
- package/package.json +50 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Bonyad Nouri
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# autoend
|
|
2
|
+
|
|
3
|
+
**Agent-powered end-to-end testing.** Point it at your app, walk away, get back a video-backed report.
|
|
4
|
+
|
|
5
|
+
```sh
|
|
6
|
+
npx autoend init # guided setup — takes a minute
|
|
7
|
+
npx autoend # agents test your app, a report opens
|
|
8
|
+
```
|
|
9
|
+
|
|
10
|
+
Straight from GitHub also works: `npx github:bonyadnouri/autoend init`
|
|
11
|
+
|
|
12
|
+
No test suite to write. No selectors to maintain. Agents discover your app's flows, guard them on every run, and hand you the evidence on film.
|
|
13
|
+
|
|
14
|
+

|
|
15
|
+
|
|
16
|
+
## How a Run works
|
|
17
|
+
|
|
18
|
+
1. **Replay** — every flow agents have ever verified is re-executed: headless, parallel, deterministic, with a WebM recording per flow. No LLM in the loop, so it's fast and free.
|
|
19
|
+
2. **Explore** — within the effort budget you chose, agents probe new surface: clicking, filling, navigating — looking for flows nobody wrote down and failures nobody noticed.
|
|
20
|
+
3. **Report** — a local page opens with a one-glance verdict. Click into any finding and *watch* what the agent saw.
|
|
21
|
+
|
|
22
|
+
Findings arrive in tiers, so signal never drowns in noise:
|
|
23
|
+
|
|
24
|
+
| Tier | Meaning | Your move |
|
|
25
|
+
|---|---|---|
|
|
26
|
+
| **Hard failure** | Objectively broken — 5xx, crashes, console errors | Fix it |
|
|
27
|
+
| **Regression** | Worked in a previous run, failed now | Fix it — or dismiss if the removal was intentional |
|
|
28
|
+
| **Heal** | UI changed, goal still works; the flow script was rewritten | Watch the video, confirm the heal |
|
|
29
|
+
| **Advisory** | Agent judgment: UX, accessibility, speed | Your call |
|
|
30
|
+
|
|
31
|
+

|
|
32
|
+
|
|
33
|
+
## The Flow Map is yours
|
|
34
|
+
|
|
35
|
+
Everything agents learn lives in your repo, in plain Playwright:
|
|
36
|
+
|
|
37
|
+
```
|
|
38
|
+
.autoend/
|
|
39
|
+
├── flows/ # commit this — your team's shared baseline
|
|
40
|
+
│ └── checkout/
|
|
41
|
+
│ ├── flow.json # metadata: title, last passed, ...
|
|
42
|
+
│ └── flow.mts # an ordinary Playwright script
|
|
43
|
+
└── runs/ # gitignored — reports + evidence videos
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Flows are ordinary Playwright scripts. Branches carry their own baseline, teammates share it through git, map changes show up in PRs like any other diff — and if you ever leave autoend, you walk away with a working Playwright suite. **No lock-in, by design.**
|
|
47
|
+
|
|
48
|
+
## Effort: you choose how hard it tests
|
|
49
|
+
|
|
50
|
+
```sh
|
|
51
|
+
npx autoend # your configured default
|
|
52
|
+
npx autoend -e low # quick pass (~1-2 min)
|
|
53
|
+
npx autoend -e high # thorough sweep (~5 min)
|
|
54
|
+
npx autoend -e ultra # leave it running (~35 min)
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
Replay always completes at any effort — regression coverage is never sacrificed to a small budget. Effort only scales exploration.
|
|
58
|
+
|
|
59
|
+
## Requirements
|
|
60
|
+
|
|
61
|
+
- **Node.js ≥ 23.6** (flow scripts run via native TypeScript type-stripping)
|
|
62
|
+
- **A Cursor API key** — exploring agents run on the [Cursor SDK](https://cursor.com/docs/sdk/typescript) ([get a key](https://cursor.com/dashboard))
|
|
63
|
+
- Browsers install on first use via Playwright; [agent-browser](https://github.com/vercel-labs/agent-browser) ships as a dependency
|
|
64
|
+
|
|
65
|
+
## Status
|
|
66
|
+
|
|
67
|
+
Early. The architecture is settled, documented, and de-risked end-to-end; the explorer fleet is the active front.
|
|
68
|
+
|
|
69
|
+
- [x] Run pipeline: replay → explore → report artifact
|
|
70
|
+
- [x] Replay engine — parallel headless Playwright with per-flow video
|
|
71
|
+
- [x] Report viewer — verdict, tiers, embedded evidence
|
|
72
|
+
- [x] Guided setup (`autoend init`)
|
|
73
|
+
- [x] Cursor SDK harness verified (agents driving agent-browser via shell)
|
|
74
|
+
- [ ] Explorer fleet — flow discovery, hard-failure detection, advisories
|
|
75
|
+
- [ ] Heal-and-notify on replay failures
|
|
76
|
+
- [ ] Report resolution actions (dismiss / reject / suppress)
|
|
77
|
+
- [ ] Fleet auth — login once, share session storage state
|
|
78
|
+
|
|
79
|
+
## Under the hood
|
|
80
|
+
|
|
81
|
+
Every load-bearing decision is written down — start with [`CONTEXT.md`](./CONTEXT.md) (the project glossary) and [`docs/adr/`](./docs/adr/):
|
|
82
|
+
|
|
83
|
+
1. [Fly-generated tests over an accumulated Flow Map](./docs/adr/0001-fly-generated-tests-over-accumulated-flow-map.md)
|
|
84
|
+
2. [agent-browser hands + Playwright artifacts](./docs/adr/0002-agent-browser-hands-playwright-artifact.md) — why exploration and replay use different engines
|
|
85
|
+
3. [Cursor SDK as the agent harness](./docs/adr/0003-cursor-sdk-as-agent-harness.md)
|
|
86
|
+
4. [Report as a static artifact + thin viewer](./docs/adr/0004-report-as-static-artifact.md)
|
|
87
|
+
|
|
88
|
+
The short version: exploration is LLM-latency-bound, so agents drive the browser through the most token-efficient hands available (agent-browser, ~200–400 tokens per snapshot). Replay is reliability-bound, so flows persist as plain Playwright with auto-waiting and native video. The report is static files served by a dumb local viewer — portable to CI by construction.
|
|
89
|
+
|
|
90
|
+
## Development
|
|
91
|
+
|
|
92
|
+
```sh
|
|
93
|
+
npm install
|
|
94
|
+
npm test # vitest — includes a real browser replay integration test
|
|
95
|
+
npm run build # tsc → dist/
|
|
96
|
+
npm run dev -- http://localhost:3000 -e low --no-open
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## License
|
|
100
|
+
|
|
101
|
+
[MIT](./LICENSE)
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Fleet auth (decided in the grilling session): login ONCE with the Test
|
|
3
|
+
* Account, capture the session as storage state, inject it into every
|
|
4
|
+
* agent's browser context. One login per Run — no parallel-login lockouts.
|
|
5
|
+
*
|
|
6
|
+
* v1 supports form-based login only; OAuth/SSO/MFA Targets fail early with a
|
|
7
|
+
* clear message (CONTEXT.md: Test Account). v1.1 adds a second *producer*
|
|
8
|
+
* (user-captured session) — the consuming side stays unchanged.
|
|
9
|
+
*/
|
|
10
|
+
export interface TestAccount {
|
|
11
|
+
username: string;
|
|
12
|
+
password: string;
|
|
13
|
+
}
|
|
14
|
+
export declare function testAccountFromEnv(): TestAccount | undefined;
|
|
15
|
+
/** Perform the single form login and return the path to the captured storage state. */
|
|
16
|
+
export declare function produceStorageState(_target: URL, _account: TestAccount): Promise<string>;
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
export function testAccountFromEnv() {
|
|
2
|
+
const username = process.env.AUTOEND_USER;
|
|
3
|
+
const password = process.env.AUTOEND_PASS;
|
|
4
|
+
if (!username || !password)
|
|
5
|
+
return undefined;
|
|
6
|
+
return { username, password };
|
|
7
|
+
}
|
|
8
|
+
/** Perform the single form login and return the path to the captured storage state. */
|
|
9
|
+
export async function produceStorageState(_target, _account) {
|
|
10
|
+
// TODO: drive the Target's login form once (Playwright), save storageState to
|
|
11
|
+
// a Run-scoped temp file, return its path for every explorer/replay context.
|
|
12
|
+
throw new Error('fleet auth is not implemented yet (scaffold)');
|
|
13
|
+
}
|
|
14
|
+
//# sourceMappingURL=session.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"session.js","sourceRoot":"","sources":["../../src/auth/session.ts"],"names":[],"mappings":"AAcA,MAAM,UAAU,kBAAkB;IAChC,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC;IAC1C,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,YAAY,CAAC;IAC1C,IAAI,CAAC,QAAQ,IAAI,CAAC,QAAQ;QAAE,OAAO,SAAS,CAAC;IAC7C,OAAO,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC;AAChC,CAAC;AAED,uFAAuF;AACvF,MAAM,CAAC,KAAK,UAAU,mBAAmB,CAAC,OAAY,EAAE,QAAqB;IAC3E,8EAA8E;IAC9E,6EAA6E;IAC7E,MAAM,IAAI,KAAK,CAAC,8CAA8C,CAAC,CAAC;AAClE,CAAC"}
|
package/dist/cli.d.ts
ADDED
package/dist/cli.js
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import { spawn } from 'node:child_process';
|
|
3
|
+
import { rm } from 'node:fs/promises';
|
|
4
|
+
import { parseArgs } from 'node:util';
|
|
5
|
+
import pc from 'picocolors';
|
|
6
|
+
import { loadConfig, loadDotEnv } from './config.js';
|
|
7
|
+
import { handsAvailable } from './explore/hands.js';
|
|
8
|
+
import { runsDir } from './report/artifact.js';
|
|
9
|
+
import { EFFORT_LEVELS, isEffort } from './run/effort.js';
|
|
10
|
+
import { executeRun } from './run/run.js';
|
|
11
|
+
import { runSetupWizard } from './setup/wizard.js';
|
|
12
|
+
import { serveReport } from './viewer/server.js';
|
|
13
|
+
const USAGE = `Usage:
|
|
14
|
+
autoend init guided setup (target, effort, API key)
|
|
15
|
+
autoend [target-url] start a Run (falls back to your configured target)
|
|
16
|
+
autoend clean delete all local Run artifacts
|
|
17
|
+
|
|
18
|
+
Options:
|
|
19
|
+
-e, --effort <level> ${EFFORT_LEVELS.join(' | ')} (default: from config, else mid)
|
|
20
|
+
--no-open don't open the Report in a browser
|
|
21
|
+
--no-serve write the Run artifact and exit (CI-style)
|
|
22
|
+
--port <n> viewer port (default: random)
|
|
23
|
+
-h, --help show this help
|
|
24
|
+
`;
|
|
25
|
+
async function main() {
|
|
26
|
+
const { values, positionals } = parseArgs({
|
|
27
|
+
allowPositionals: true,
|
|
28
|
+
options: {
|
|
29
|
+
effort: { type: 'string', short: 'e' },
|
|
30
|
+
'no-open': { type: 'boolean', default: false },
|
|
31
|
+
'no-serve': { type: 'boolean', default: false },
|
|
32
|
+
port: { type: 'string', default: '0' },
|
|
33
|
+
help: { type: 'boolean', short: 'h', default: false },
|
|
34
|
+
},
|
|
35
|
+
});
|
|
36
|
+
if (values.help) {
|
|
37
|
+
process.stdout.write(USAGE);
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
const repoRoot = process.cwd();
|
|
41
|
+
const command = positionals[0];
|
|
42
|
+
if (command === 'init') {
|
|
43
|
+
await runSetupWizard(repoRoot);
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
if (command === 'clean') {
|
|
47
|
+
await rm(runsDir(repoRoot), { recursive: true, force: true });
|
|
48
|
+
console.log('Local Run artifacts deleted.');
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
if (positionals.length > 1) {
|
|
52
|
+
process.stdout.write(USAGE);
|
|
53
|
+
process.exitCode = 2;
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
await loadDotEnv(repoRoot);
|
|
57
|
+
const config = await loadConfig(repoRoot);
|
|
58
|
+
// First contact with no target and no config: hand over to the wizard.
|
|
59
|
+
if (!command && !config) {
|
|
60
|
+
if (process.stdout.isTTY) {
|
|
61
|
+
await runSetupWizard(repoRoot);
|
|
62
|
+
return;
|
|
63
|
+
}
|
|
64
|
+
console.error('error: no target configured — run `autoend init` or pass a URL');
|
|
65
|
+
process.exitCode = 2;
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
let target;
|
|
69
|
+
try {
|
|
70
|
+
target = new URL(command ?? config.target);
|
|
71
|
+
}
|
|
72
|
+
catch {
|
|
73
|
+
console.error(`error: "${command}" is not a valid URL`);
|
|
74
|
+
process.exitCode = 2;
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
const effortInput = values.effort ?? config?.effort ?? 'mid';
|
|
78
|
+
if (!isEffort(effortInput)) {
|
|
79
|
+
console.error(`error: unknown effort "${effortInput}" (expected ${EFFORT_LEVELS.join(', ')})`);
|
|
80
|
+
process.exitCode = 2;
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
const effort = effortInput;
|
|
84
|
+
if (!process.env.CURSOR_API_KEY) {
|
|
85
|
+
console.warn(pc.yellow('warning: CURSOR_API_KEY not set — exploration will be skipped (run `autoend init`)'));
|
|
86
|
+
}
|
|
87
|
+
if (!(await handsAvailable())) {
|
|
88
|
+
console.warn(pc.yellow('warning: agent-browser not found on PATH — exploration will be skipped'));
|
|
89
|
+
}
|
|
90
|
+
console.log(`${pc.cyan('Run starting')} ${target.href} ${pc.dim(`· effort ${effort}`)}`);
|
|
91
|
+
const startedMs = Date.now();
|
|
92
|
+
const { artifactDir, artifact } = await executeRun({ target, effort, repoRoot });
|
|
93
|
+
const seconds = ((Date.now() - startedMs) / 1000).toFixed(1);
|
|
94
|
+
const failures = artifact.findings.filter((f) => f.kind === 'hard-failure').length;
|
|
95
|
+
const regressions = artifact.findings.filter((f) => f.kind === 'regression').length;
|
|
96
|
+
const advisories = artifact.findings.filter((f) => f.kind === 'advisory').length;
|
|
97
|
+
const verdict = failures + regressions > 0
|
|
98
|
+
? pc.red(`${failures} hard failures, ${regressions} regressions`)
|
|
99
|
+
: pc.green('all clear');
|
|
100
|
+
console.log(`${pc.cyan(`Run finished in ${seconds}s`)} · ${artifact.flowsReplayed} replayed · ${artifact.flowsDiscovered} discovered · ${verdict}` +
|
|
101
|
+
(artifact.heals.length + advisories > 0 ? pc.dim(` · ${artifact.heals.length} heals, ${advisories} advisories`) : ''));
|
|
102
|
+
console.log(pc.dim(`Artifact: ${artifactDir}`));
|
|
103
|
+
if (values['no-serve'])
|
|
104
|
+
return;
|
|
105
|
+
const viewer = await serveReport(artifactDir, Number(values.port));
|
|
106
|
+
console.log(`Report: ${pc.underline(viewer.url)} ${pc.dim('(Ctrl+C to stop)')}`);
|
|
107
|
+
if (!values['no-open'])
|
|
108
|
+
openInBrowser(viewer.url);
|
|
109
|
+
}
|
|
110
|
+
function openInBrowser(url) {
|
|
111
|
+
const command = process.platform === 'darwin' ? 'open' : process.platform === 'win32' ? 'start' : 'xdg-open';
|
|
112
|
+
spawn(command, [url], { detached: true, stdio: 'ignore', shell: process.platform === 'win32' }).unref();
|
|
113
|
+
}
|
|
114
|
+
main().catch((error) => {
|
|
115
|
+
console.error(error instanceof Error ? error.message : error);
|
|
116
|
+
process.exitCode = 1;
|
|
117
|
+
});
|
|
118
|
+
//# sourceMappingURL=cli.js.map
|
package/dist/cli.js.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../src/cli.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,KAAK,EAAE,MAAM,oBAAoB,CAAC;AAC3C,OAAO,EAAE,EAAE,EAAE,MAAM,kBAAkB,CAAC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,WAAW,CAAC;AACtC,OAAO,EAAE,MAAM,YAAY,CAAC;AAC5B,OAAO,EAAE,UAAU,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACrD,OAAO,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACpD,OAAO,EAAE,OAAO,EAAE,MAAM,sBAAsB,CAAC;AAC/C,OAAO,EAAE,aAAa,EAAE,QAAQ,EAAe,MAAM,iBAAiB,CAAC;AACvE,OAAO,EAAE,UAAU,EAAE,MAAM,cAAc,CAAC;AAC1C,OAAO,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAC;AACnD,OAAO,EAAE,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAEjD,MAAM,KAAK,GAAG;;;;;;2BAMa,aAAa,CAAC,IAAI,CAAC,KAAK,CAAC;;;;;CAKnD,CAAC;AAEF,KAAK,UAAU,IAAI;IACjB,MAAM,EAAE,MAAM,EAAE,WAAW,EAAE,GAAG,SAAS,CAAC;QACxC,gBAAgB,EAAE,IAAI;QACtB,OAAO,EAAE;YACP,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,GAAG,EAAE;YACtC,SAAS,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE;YAC9C,UAAU,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,OAAO,EAAE,KAAK,EAAE;YAC/C,IAAI,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,GAAG,EAAE;YACtC,IAAI,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,GAAG,EAAE,OAAO,EAAE,KAAK,EAAE;SACtD;KACF,CAAC,CAAC;IAEH,IAAI,MAAM,CAAC,IAAI,EAAE,CAAC;QAChB,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAC5B,OAAO;IACT,CAAC;IAED,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;IAC/B,MAAM,OAAO,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;IAE/B,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;QACvB,MAAM,cAAc,CAAC,QAAQ,CAAC,CAAC;QAC/B,OAAO;IACT,CAAC;IACD,IAAI,OAAO,KAAK,OAAO,EAAE,CAAC;QACxB,MAAM,EAAE,CAAC,OAAO,CAAC,QAAQ,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;QAC9D,OAAO,CAAC,GAAG,CAAC,8BAA8B,CAAC,CAAC;QAC5C,OAAO;IACT,CAAC;IACD,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAC5B,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,MAAM,UAAU,CAAC,QAAQ,CAAC,CAAC;IAC3B,MAAM,MAAM,GAAG,MAAM,UAAU,CAAC,QAAQ,CAAC,CAAC;IAE1C,uEAAuE;IACvE,IAAI,CAAC,OAAO,IAAI,CAAC,MAAM,EAAE,CAAC;QACxB,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;YACzB,MAAM,cAAc,CAAC,QAAQ,CAAC,CAAC;YAC/B,OAAO;QACT,CAAC;QACD,OAAO,CAAC,KAAK,CAAC,gEAAgE,CAAC,CAAC;QAChF,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,IAAI,MAAW,CAAC;IAChB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,OAAO,IAAI,MAAO,CAAC,MAAM,CAAC,CAAC;IAC9C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,CAAC,KAAK,CAAC,WAAW,OAAO,sBAAsB,CAAC,CAAC;QACxD,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,IAAI,MAAM,EAAE,MAAM,IAAI,KAAK,CAAC;IAC7D,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,EAAE,CAAC;QAC3B,OAAO,CAAC,KAAK,CAAC,0BAA0B,WAAW,eAAe,aAAa,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/F,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;QACrB,OAAO;IACT,CAAC;IACD,MAAM,MAAM,GAAW,WAAW,CAAC;IAEnC,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,EAAE,CAAC;QAChC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,oFAAoF,CAAC,CAAC,CAAC;IAChH,CAAC;IACD,IAAI,CAAC,CAAC,MAAM,cAAc,EAAE,CAAC,EAAE,CAAC;QAC9B,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,wEAAwE,CAAC,CAAC,CAAC;IACpG,CAAC;IAED,OAAO,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC,GAAG,CAAC,YAAY,MAAM,EAAE,CAAC,EAAE,CAAC,CAAC;IACzF,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,GAAG,MAAM,UAAU,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC,CAAC;IACjF,MAAM,OAAO,GAAG,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC,GAAG,IAAI,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;IAE7D,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,cAAc,CAAC,CAAC,MAAM,CAAC;IACnF,MAAM,WAAW,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,YAAY,CAAC,CAAC,MAAM,CAAC;IACpF,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,UAAU,CAAC,CAAC,MAAM,CAAC;IACjF,MAAM,OAAO,GACX,QAAQ,GAAG,WAAW,GAAG,CAAC;QACxB,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,QAAQ,mBAAmB,WAAW,cAAc,CAAC;QACjE,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;IAC5B,OAAO,CAAC,GAAG,CACT,GAAG,EAAE,CAAC,IAAI,CAAC,mBAAmB,OAAO,GAAG,CAAC,MAAM,QAAQ,CAAC,aAAa,eAAe,QAAQ,CAAC,eAAe,iBAAiB,OAAO,EAAE;QACpI,CAAC,QAAQ,CAAC,KAAK,CAAC,MAAM,GAAG,UAAU,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,MAAM,QAAQ,CAAC,KAAK,CAAC,MAAM,WAAW,UAAU,aAAa,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CACxH,CAAC;IACF,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,aAAa,WAAW,EAAE,CAAC,CAAC,CAAC;IAEhD,IAAI,MAAM,CAAC,UAAU,CAAC;QAAE,OAAO;IAE/B,MAAM,MAAM,GAAG,MAAM,WAAW,CAAC,WAAW,EAAE,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;IACnE,OAAO,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC,SAAS,CAAC,MAAM,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC,GAAG,CAAC,kBAAkB,CAAC,EAAE,CAAC,CAAC;IACjF,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;QAAE,aAAa,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;AACpD,CAAC;AAED,SAAS,aAAa,CAAC,GAAW;IAChC,MAAM,OAAO,GACX,OAAO,CAAC,QAAQ,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,QAAQ,KAAK,OAAO,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC;IAC/F,KAAK,CAAC,OAAO,EAAE,CAAC,GAAG,CAAC,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC;AAC1G,CAAC;AAED,IAAI,EAAE,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACrB,OAAO,CAAC,KAAK,CAAC,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;IAC9D,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAC;AACvB,CAAC,CAAC,CAAC"}
|
package/dist/config.d.ts
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { Effort } from './run/effort.js';
|
|
2
|
+
/** Persisted by `autoend init`; flags on `autoend <url>` override it. */
|
|
3
|
+
export interface AutoendConfig {
|
|
4
|
+
target: string;
|
|
5
|
+
effort: Effort;
|
|
6
|
+
}
|
|
7
|
+
export declare function configPath(repoRoot: string): string;
|
|
8
|
+
export declare function loadConfig(repoRoot: string): Promise<AutoendConfig | undefined>;
|
|
9
|
+
export declare function saveConfig(repoRoot: string, config: AutoendConfig): Promise<void>;
|
|
10
|
+
/** Minimal .env loader — sets vars that aren't already in the environment. */
|
|
11
|
+
export declare function loadDotEnv(repoRoot: string): Promise<void>;
|
|
12
|
+
export declare function appendDotEnv(repoRoot: string, key: string, value: string): Promise<void>;
|
|
13
|
+
/** Ensure the host repo's .gitignore contains each line (creates the file if missing). */
|
|
14
|
+
export declare function ensureGitignore(repoRoot: string, lines: string[]): Promise<string[]>;
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
import { mkdir, readFile, writeFile } from 'node:fs/promises';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
export function configPath(repoRoot) {
|
|
4
|
+
return join(repoRoot, '.autoend', 'config.json');
|
|
5
|
+
}
|
|
6
|
+
export async function loadConfig(repoRoot) {
|
|
7
|
+
try {
|
|
8
|
+
return JSON.parse(await readFile(configPath(repoRoot), 'utf8'));
|
|
9
|
+
}
|
|
10
|
+
catch {
|
|
11
|
+
return undefined;
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
export async function saveConfig(repoRoot, config) {
|
|
15
|
+
await mkdir(join(repoRoot, '.autoend'), { recursive: true });
|
|
16
|
+
await writeFile(configPath(repoRoot), JSON.stringify(config, null, 2));
|
|
17
|
+
}
|
|
18
|
+
/** Minimal .env loader — sets vars that aren't already in the environment. */
|
|
19
|
+
export async function loadDotEnv(repoRoot) {
|
|
20
|
+
let raw;
|
|
21
|
+
try {
|
|
22
|
+
raw = await readFile(join(repoRoot, '.env'), 'utf8');
|
|
23
|
+
}
|
|
24
|
+
catch {
|
|
25
|
+
return;
|
|
26
|
+
}
|
|
27
|
+
for (const line of raw.split('\n')) {
|
|
28
|
+
const match = /^\s*([A-Za-z_][A-Za-z0-9_]*)\s*=\s*(.*)\s*$/.exec(line);
|
|
29
|
+
if (match && process.env[match[1]] === undefined) {
|
|
30
|
+
process.env[match[1]] = match[2].replace(/^["']|["']$/g, '');
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
export async function appendDotEnv(repoRoot, key, value) {
|
|
35
|
+
const path = join(repoRoot, '.env');
|
|
36
|
+
let raw = '';
|
|
37
|
+
try {
|
|
38
|
+
raw = await readFile(path, 'utf8');
|
|
39
|
+
}
|
|
40
|
+
catch {
|
|
41
|
+
// no .env yet
|
|
42
|
+
}
|
|
43
|
+
const line = `${key}=${value}`;
|
|
44
|
+
const next = raw.length === 0 ? `${line}\n` : raw.endsWith('\n') ? `${raw}${line}\n` : `${raw}\n${line}\n`;
|
|
45
|
+
await writeFile(path, next);
|
|
46
|
+
}
|
|
47
|
+
/** Ensure the host repo's .gitignore contains each line (creates the file if missing). */
|
|
48
|
+
export async function ensureGitignore(repoRoot, lines) {
|
|
49
|
+
const path = join(repoRoot, '.gitignore');
|
|
50
|
+
let raw = '';
|
|
51
|
+
try {
|
|
52
|
+
raw = await readFile(path, 'utf8');
|
|
53
|
+
}
|
|
54
|
+
catch {
|
|
55
|
+
// no .gitignore yet
|
|
56
|
+
}
|
|
57
|
+
const existing = new Set(raw.split('\n').map((l) => l.trim()));
|
|
58
|
+
const missing = lines.filter((l) => !existing.has(l));
|
|
59
|
+
if (missing.length > 0) {
|
|
60
|
+
const base = raw.length === 0 || raw.endsWith('\n') ? raw : `${raw}\n`;
|
|
61
|
+
await writeFile(path, `${base}${missing.join('\n')}\n`);
|
|
62
|
+
}
|
|
63
|
+
return missing;
|
|
64
|
+
}
|
|
65
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAC9D,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AASjC,MAAM,UAAU,UAAU,CAAC,QAAgB;IACzC,OAAO,IAAI,CAAC,QAAQ,EAAE,UAAU,EAAE,aAAa,CAAC,CAAC;AACnD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,IAAI,CAAC;QACH,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,MAAM,CAAC,CAAkB,CAAC;IACnF,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,SAAS,CAAC;IACnB,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB,EAAE,MAAqB;IACtE,MAAM,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,UAAU,CAAC,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;IAC7D,MAAM,SAAS,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,CAAC;AACzE,CAAC;AAED,8EAA8E;AAC9E,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,IAAI,GAAW,CAAC;IAChB,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,MAAM,CAAC,CAAC;IACvD,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;IACT,CAAC;IACD,KAAK,MAAM,IAAI,IAAI,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC;QACnC,MAAM,KAAK,GAAG,6CAA6C,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACvE,IAAI,KAAK,IAAI,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,SAAS,EAAE,CAAC;YACjD,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;QAC/D,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB,EAAE,GAAW,EAAE,KAAa;IAC7E,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACpC,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACrC,CAAC;IAAC,MAAM,CAAC;QACP,cAAc;IAChB,CAAC;IACD,MAAM,IAAI,GAAG,GAAG,GAAG,IAAI,KAAK,EAAE,CAAC;IAC/B,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,GAAG,GAAG,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,GAAG,KAAK,IAAI,IAAI,CAAC;IAC3G,MAAM,SAAS,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;AAC9B,CAAC;AAED,0FAA0F;AAC1F,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,QAAgB,EAAE,KAAe;IACrE,MAAM,IAAI,GAAG,IAAI,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAC;IAC1C,IAAI,GAAG,GAAG,EAAE,CAAC;IACb,IAAI,CAAC;QACH,GAAG,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IACrC,CAAC;IAAC,MAAM,CAAC;QACP,oBAAoB;IACtB,CAAC;IACD,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAC/D,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IACtD,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACvB,MAAM,IAAI,GAAG,GAAG,CAAC,MAAM,KAAK,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,GAAG,IAAI,CAAC;QACvE,MAAM,SAAS,CAAC,IAAI,EAAE,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAC1D,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import { type FlowMeta } from '../map/flow-map.js';
|
|
2
|
+
import type { ExplorationBudget } from '../run/effort.js';
|
|
3
|
+
import type { Finding } from '../report/types.js';
|
|
4
|
+
export interface ExploreOptions {
|
|
5
|
+
repoRoot: string;
|
|
6
|
+
target: URL;
|
|
7
|
+
budget: ExplorationBudget;
|
|
8
|
+
/** This Run's artifact directory — explorers get a scratch dir inside it. */
|
|
9
|
+
runDir: string;
|
|
10
|
+
evidenceDir: string;
|
|
11
|
+
knownFlows: FlowMeta[];
|
|
12
|
+
}
|
|
13
|
+
export interface ExplorationResult {
|
|
14
|
+
discovered: number;
|
|
15
|
+
findings: Finding[];
|
|
16
|
+
}
|
|
17
|
+
export interface ProposedFlow {
|
|
18
|
+
id: string;
|
|
19
|
+
title: string;
|
|
20
|
+
script: string;
|
|
21
|
+
}
|
|
22
|
+
export interface ExplorerReport {
|
|
23
|
+
flows: ProposedFlow[];
|
|
24
|
+
findings: Array<{
|
|
25
|
+
kind: 'hard-failure' | 'advisory';
|
|
26
|
+
title: string;
|
|
27
|
+
detail: string;
|
|
28
|
+
}>;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Phase 2 of a Run (ADR-0001/0003): spawn Cursor local agents that hold the
|
|
32
|
+
* agent-browser CLI through the SDK's shell tool, each in an isolated browser
|
|
33
|
+
* session, time-boxed by the Effort budget. Flows they propose only enter the
|
|
34
|
+
* Flow Map after verify-by-running via the replay engine (ADR-0002).
|
|
35
|
+
*/
|
|
36
|
+
export declare function explore(opts: ExploreOptions): Promise<ExplorationResult>;
|
|
37
|
+
/** Parse an explorer's final message. Exported for tests. */
|
|
38
|
+
export declare function parseExplorerReport(text: string): ExplorerReport | undefined;
|
|
39
|
+
/** Dedupe proposed flows against the map and each other. Exported for tests. */
|
|
40
|
+
export declare function collectProposedFlows(reports: Array<ExplorerReport | undefined>, knownFlows: FlowMeta[]): ProposedFlow[];
|
|
41
|
+
/** Kebab-case a title into a flow id. Exported for tests. */
|
|
42
|
+
export declare function slugify(value: string): string | undefined;
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import { access, mkdir, writeFile } from 'node:fs/promises';
|
|
2
|
+
import { join } from 'node:path';
|
|
3
|
+
import { Agent } from '@cursor/sdk';
|
|
4
|
+
import { chromium } from 'playwright';
|
|
5
|
+
import { addFlow } from '../map/flow-map.js';
|
|
6
|
+
import { runFlowScript } from '../replay/replay.js';
|
|
7
|
+
import { closeSession } from './hands.js';
|
|
8
|
+
/** Each explorer gets a distinct lens so the fleet doesn't converge on one path. */
|
|
9
|
+
const LENSES = [
|
|
10
|
+
'Walk the primary user journey end to end, starting from the landing page — the path a first-time visitor is meant to take.',
|
|
11
|
+
'Exercise forms and inputs: search, signup, login, contact. Fill them with plausible test data and submit.',
|
|
12
|
+
'Explore navigation: menus, headers, footers, secondary pages. Verify links lead somewhere real.',
|
|
13
|
+
'Probe edge behavior: empty states, browser back/forward, repeating an action twice, unusual but legitimate input.',
|
|
14
|
+
];
|
|
15
|
+
/**
|
|
16
|
+
* Extra wall-clock the hard stop allows past the soft deadline given to
|
|
17
|
+
* agents. Live-run data: explorers on a multi-page target need 60-100s wall
|
|
18
|
+
* clock at low effort ('auto' model, 5-15s/turn) — a tight grace starves them
|
|
19
|
+
* right before they report, losing all their work.
|
|
20
|
+
*/
|
|
21
|
+
const GRACE_MS = 60_000;
|
|
22
|
+
/**
|
|
23
|
+
* Phase 2 of a Run (ADR-0001/0003): spawn Cursor local agents that hold the
|
|
24
|
+
* agent-browser CLI through the SDK's shell tool, each in an isolated browser
|
|
25
|
+
* session, time-boxed by the Effort budget. Flows they propose only enter the
|
|
26
|
+
* Flow Map after verify-by-running via the replay engine (ADR-0002).
|
|
27
|
+
*/
|
|
28
|
+
export async function explore(opts) {
|
|
29
|
+
const apiKey = process.env.CURSOR_API_KEY;
|
|
30
|
+
if (!apiKey) {
|
|
31
|
+
console.warn('exploration skipped: CURSOR_API_KEY not set — run `autoend init`');
|
|
32
|
+
return { discovered: 0, findings: [] };
|
|
33
|
+
}
|
|
34
|
+
const workDir = join(opts.runDir, 'explore');
|
|
35
|
+
await mkdir(workDir, { recursive: true });
|
|
36
|
+
const reports = await Promise.all(Array.from({ length: opts.budget.explorers }, (_, i) => runExplorer(i, apiKey, workDir, opts)));
|
|
37
|
+
const findings = [];
|
|
38
|
+
for (const [i, report] of reports.entries()) {
|
|
39
|
+
if (!report)
|
|
40
|
+
continue;
|
|
41
|
+
const evidence = `explore-${i}.webm`;
|
|
42
|
+
const recorded = await access(join(opts.evidenceDir, evidence)).then(() => true, () => false);
|
|
43
|
+
for (const [n, f] of report.findings.entries()) {
|
|
44
|
+
findings.push({
|
|
45
|
+
id: `explore-${i}-${n}`,
|
|
46
|
+
kind: f.kind,
|
|
47
|
+
title: f.title,
|
|
48
|
+
detail: f.detail,
|
|
49
|
+
evidence: recorded ? evidence : undefined,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
const proposed = collectProposedFlows(reports, opts.knownFlows);
|
|
54
|
+
let discovered = 0;
|
|
55
|
+
if (proposed.length > 0) {
|
|
56
|
+
const browser = await chromium.launch();
|
|
57
|
+
try {
|
|
58
|
+
for (const flow of proposed) {
|
|
59
|
+
const scriptPath = join(workDir, `${flow.id}.mts`);
|
|
60
|
+
await writeFile(scriptPath, flow.script);
|
|
61
|
+
const outcome = await runFlowScript(browser, scriptPath, opts.target, opts.evidenceDir, `discovered-${flow.id}`);
|
|
62
|
+
if (outcome.ok) {
|
|
63
|
+
const now = new Date().toISOString();
|
|
64
|
+
await addFlow(opts.repoRoot, { id: flow.id, title: flow.title, discoveredAt: now, lastPassedAt: now }, flow.script);
|
|
65
|
+
discovered += 1;
|
|
66
|
+
}
|
|
67
|
+
else {
|
|
68
|
+
console.warn(`proposed flow "${flow.id}" failed verification and was discarded: ${outcome.error}`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
finally {
|
|
73
|
+
await browser.close();
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return { discovered, findings };
|
|
77
|
+
}
|
|
78
|
+
async function runExplorer(index, apiKey, workDir, opts) {
|
|
79
|
+
const session = `autoend-x${index}`;
|
|
80
|
+
const agent = await Agent.create({
|
|
81
|
+
name: `autoend-explorer-${index}`,
|
|
82
|
+
model: { id: 'auto' },
|
|
83
|
+
apiKey,
|
|
84
|
+
local: { cwd: workDir },
|
|
85
|
+
});
|
|
86
|
+
try {
|
|
87
|
+
const run = await agent.send(explorerPrompt(index, session, opts));
|
|
88
|
+
const outcome = (await Promise.race([
|
|
89
|
+
run.wait(),
|
|
90
|
+
sleep(opts.budget.seconds * 1000 + GRACE_MS).then(() => 'timeout'),
|
|
91
|
+
]));
|
|
92
|
+
if (outcome === 'timeout') {
|
|
93
|
+
await run.cancel().catch(() => { });
|
|
94
|
+
console.warn(`explorer ${index} hit the time budget before reporting`);
|
|
95
|
+
return undefined;
|
|
96
|
+
}
|
|
97
|
+
if (outcome.status !== 'finished' || typeof outcome.result !== 'string') {
|
|
98
|
+
console.warn(`explorer ${index} ended with status "${outcome.status}"${outcome.error?.message ? `: ${outcome.error.message}` : ''}`);
|
|
99
|
+
return undefined;
|
|
100
|
+
}
|
|
101
|
+
const report = parseExplorerReport(outcome.result);
|
|
102
|
+
if (!report)
|
|
103
|
+
console.warn(`explorer ${index} returned an unparseable report`);
|
|
104
|
+
return report;
|
|
105
|
+
}
|
|
106
|
+
finally {
|
|
107
|
+
agent.close();
|
|
108
|
+
await closeSession(session);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
function explorerPrompt(index, session, opts) {
|
|
112
|
+
const { target, budget, evidenceDir, knownFlows } = opts;
|
|
113
|
+
const videoPath = join(evidenceDir, `explore-${index}.webm`);
|
|
114
|
+
const known = knownFlows.length > 0
|
|
115
|
+
? knownFlows.map((f) => `- ${f.title}`).join('\n')
|
|
116
|
+
: '(none yet — this is the first exploration of this app)';
|
|
117
|
+
return `You are autoend explorer #${index}, part of a fleet testing a web app end-to-end. You have ${budget.seconds} seconds of exploration; ${Math.round(GRACE_MS / 1000)}s after that deadline you are hard-killed and any unreported work is LOST — so report early rather than perfectly.
|
|
118
|
+
|
|
119
|
+
TARGET: ${target.href}
|
|
120
|
+
Your lens: ${LENSES[index % LENSES.length]}
|
|
121
|
+
|
|
122
|
+
## Your browser
|
|
123
|
+
Drive the browser with the agent-browser CLI via shell. EVERY command MUST include \`--session ${session}\` (other agents share the daemon; the flag isolates your browser).
|
|
124
|
+
|
|
125
|
+
SPEED MATTERS: every shell call costs you a turn. BATCH commands whenever possible.
|
|
126
|
+
|
|
127
|
+
Protocol — first shell call (one batch):
|
|
128
|
+
agent-browser --session ${session} batch "open ${target.href}" "record start ${videoPath}" "snapshot -i -c"
|
|
129
|
+
Work loop (batch an action with the checks that follow it):
|
|
130
|
+
agent-browser --session ${session} batch "click @e12" "get url" "snapshot -i -c" "console" "errors"
|
|
131
|
+
agent-browser --session ${session} batch "fill @e5 test@example.com" "click @e7" "snapshot -i -c"
|
|
132
|
+
Protocol — last shell call (NEVER skip, even when out of time):
|
|
133
|
+
agent-browser --session ${session} batch "record stop" "close"
|
|
134
|
+
|
|
135
|
+
## Hard rules
|
|
136
|
+
- NEVER navigate off the origin ${target.origin} — if a click leaves it, go back immediately.
|
|
137
|
+
- Avoid destructive or irreversible actions (deleting data, real purchases, sending messages to third parties) unless a flow cannot be completed otherwise.
|
|
138
|
+
- Do not read or modify files outside your working directory. Your only tools are agent-browser and trivial shell.
|
|
139
|
+
|
|
140
|
+
## What to produce
|
|
141
|
+
1. FLOWS — user-meaningful paths you verified work (e.g. "Visitor completes checkout"). Known flows, do NOT re-propose:
|
|
142
|
+
${known}
|
|
143
|
+
For each NEW flow, write a Playwright script:
|
|
144
|
+
- default-export \`async function flow(page, target)\` — no imports; use only the \`page\` (Playwright Page) and \`target\` (URL) arguments
|
|
145
|
+
- navigate target-relative: \`await page.goto(new URL('/pricing', target).href)\` — page.goto() takes a STRING, so \`.href\` is MANDATORY (passing the URL object is the #1 script bug)
|
|
146
|
+
- prefer role/text locators: \`page.getByRole('link', { name: 'Pricing' })\`
|
|
147
|
+
- assert by throwing: \`if (!heading?.includes('Pricing')) throw new Error('expected Pricing, got ' + heading)\`
|
|
148
|
+
- keep it under ~25 lines; it must complete in under 60s
|
|
149
|
+
2. FINDINGS —
|
|
150
|
+
- kind "hard-failure": objective breakage only (console/page errors, HTTP >= 400 responses, crashes, blank pages). Include the exact error output in detail.
|
|
151
|
+
- kind "advisory": your judgment on UX, accessibility, or speed. Be sparing; only what a developer would thank you for.
|
|
152
|
+
|
|
153
|
+
## Final message — STRICT
|
|
154
|
+
Reply with ONLY one JSON object, no prose, no markdown fences:
|
|
155
|
+
{
|
|
156
|
+
"flows": [
|
|
157
|
+
{ "id": "kebab-case-id", "title": "Visitor does something meaningful", "script": "export default async function flow(page, target) { ... }" }
|
|
158
|
+
],
|
|
159
|
+
"findings": [
|
|
160
|
+
{ "kind": "hard-failure", "title": "Short statement", "detail": "Exact evidence: error text, URL, HTTP status" }
|
|
161
|
+
]
|
|
162
|
+
}
|
|
163
|
+
Empty arrays are fine. An honest empty report beats an invented one.`;
|
|
164
|
+
}
|
|
165
|
+
/** Parse an explorer's final message. Exported for tests. */
|
|
166
|
+
export function parseExplorerReport(text) {
|
|
167
|
+
const start = text.indexOf('{');
|
|
168
|
+
const end = text.lastIndexOf('}');
|
|
169
|
+
if (start < 0 || end <= start)
|
|
170
|
+
return undefined;
|
|
171
|
+
let parsed;
|
|
172
|
+
try {
|
|
173
|
+
parsed = JSON.parse(text.slice(start, end + 1));
|
|
174
|
+
}
|
|
175
|
+
catch {
|
|
176
|
+
return undefined;
|
|
177
|
+
}
|
|
178
|
+
if (typeof parsed !== 'object' || parsed === null)
|
|
179
|
+
return undefined;
|
|
180
|
+
const raw = parsed;
|
|
181
|
+
const flows = [];
|
|
182
|
+
if (Array.isArray(raw.flows)) {
|
|
183
|
+
for (const f of raw.flows) {
|
|
184
|
+
const id = slugify(String(f?.id ?? f?.title ?? ''));
|
|
185
|
+
const script = typeof f?.script === 'string' ? f.script : undefined;
|
|
186
|
+
if (id && script && script.includes('export default')) {
|
|
187
|
+
flows.push({ id, title: String(f.title ?? id), script });
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
const findings = [];
|
|
192
|
+
if (Array.isArray(raw.findings)) {
|
|
193
|
+
for (const f of raw.findings) {
|
|
194
|
+
if (typeof f?.title !== 'string')
|
|
195
|
+
continue;
|
|
196
|
+
findings.push({
|
|
197
|
+
kind: f.kind === 'hard-failure' ? 'hard-failure' : 'advisory',
|
|
198
|
+
title: f.title,
|
|
199
|
+
detail: typeof f.detail === 'string' ? f.detail : '',
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
return { flows, findings };
|
|
204
|
+
}
|
|
205
|
+
/** Dedupe proposed flows against the map and each other. Exported for tests. */
|
|
206
|
+
export function collectProposedFlows(reports, knownFlows) {
|
|
207
|
+
const taken = new Set(knownFlows.map((f) => f.id));
|
|
208
|
+
const out = [];
|
|
209
|
+
for (const report of reports) {
|
|
210
|
+
for (const flow of report?.flows ?? []) {
|
|
211
|
+
if (taken.has(flow.id))
|
|
212
|
+
continue;
|
|
213
|
+
taken.add(flow.id);
|
|
214
|
+
out.push(flow);
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
return out;
|
|
218
|
+
}
|
|
219
|
+
/** Kebab-case a title into a flow id. Exported for tests. */
|
|
220
|
+
export function slugify(value) {
|
|
221
|
+
const slug = value
|
|
222
|
+
.toLowerCase()
|
|
223
|
+
.replace(/[^a-z0-9]+/g, '-')
|
|
224
|
+
.replace(/^-+|-+$/g, '')
|
|
225
|
+
.slice(0, 40)
|
|
226
|
+
.replace(/-+$/, '');
|
|
227
|
+
return slug.length > 0 ? slug : undefined;
|
|
228
|
+
}
|
|
229
|
+
function sleep(ms) {
|
|
230
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
231
|
+
}
|
|
232
|
+
//# sourceMappingURL=explorer.js.map
|