agex 0.2.7 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +176 -0
- package/package.json +5 -5
package/README.md
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# agex
|
|
2
|
+
|
|
3
|
+
**Let AI agents prove their work — with video.**
|
|
4
|
+
|
|
5
|
+
AI agents work in the background. They write code, fix bugs, ship features. But how do they prove their work?
|
|
6
|
+
|
|
7
|
+
No screenshots. No recordings. No proof. You just have to trust the logs — or worse, manually test it yourself. A bottleneck.
|
|
8
|
+
|
|
9
|
+
agex lets any AI agent (Cursor, Claude, Codex) open a browser, take screenshots, record video, and deliver visual evidence that the work is done. Run it locally. Run it in CI. Ship with proof.
|
|
10
|
+
|
|
11
|
+
Built on [agent-browser](https://github.com/vercel-labs/agent-browser) by Vercel.
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
npx agex prove "the homepage has a search bar and a sign up button" --url https://github.com
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
On your PR CI
|
|
20
|
+
```
|
|
21
|
+
npx agex prove-pr --url http://localhost:3000 # your local dev URL, start your app in the CI
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
## Install
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
npm install -g agex
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
## Commands
|
|
32
|
+
|
|
33
|
+
### `agex prove` - Visual assertion testing
|
|
34
|
+
|
|
35
|
+
Prove visual assertions on any URL with screenshots and video evidence.
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# Is there a dog in Google's logo? (spoiler: no — and the agent catches it)
|
|
39
|
+
agex prove "google home page has a dog in the logo" \
|
|
40
|
+
--url "https://www.google.com/?hl=en&gl=us" --agent codex
|
|
41
|
+
|
|
42
|
+
# Verify GitHub's Code dropdown has HTTPS/SSH/GitHub CLI tabs
|
|
43
|
+
agex prove "clicking the Code dropdown opens a panel with HTTPS/SSH/GitHub CLI tabs" \
|
|
44
|
+
--url https://github.com/facebook/react
|
|
45
|
+
|
|
46
|
+
# Multi-element verification
|
|
47
|
+
agex prove "github.com homepage has a search bar, a sign up button, and at least 3 navigation links" \
|
|
48
|
+
--url https://github.com --agent claude
|
|
49
|
+
|
|
50
|
+
# Complex page structure
|
|
51
|
+
agex prove "the wikipedia page for JavaScript has a History section, a Syntax section, and an infobox sidebar" \
|
|
52
|
+
--url https://en.wikipedia.org/wiki/JavaScript --agent claude
|
|
53
|
+
|
|
54
|
+
# E-commerce flow
|
|
55
|
+
agex prove "search for 'mechanical keyboard', filter by 4+ stars, verify the first result shows product image, price, and rating" \
|
|
56
|
+
--url https://amazon.com --agent claude
|
|
57
|
+
|
|
58
|
+
# Video content
|
|
59
|
+
agex prove "youtube homepage shows at least 6 video thumbnails with titles and view counts" \
|
|
60
|
+
--url https://www.youtube.com --agent codex
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
| Flag | Description | Default |
|
|
64
|
+
|------|-------------|---------|
|
|
65
|
+
| `--agent, -a` | Agent to use: `cursor`, `claude`, `codex` | `cursor` |
|
|
66
|
+
| `--url, -u` | URL to navigate to | - |
|
|
67
|
+
| `--output, -o` | Output directory | `./prove-results` |
|
|
68
|
+
| `--video` | Enable video recording | `true` |
|
|
69
|
+
| `--screenshots` | Enable screenshots | `true` |
|
|
70
|
+
| `--model, -m` | Model to use | - |
|
|
71
|
+
| `--timeout, -t` | Timeout in ms | `300000` |
|
|
72
|
+
| `--viewport` | Viewport size (WxH) | `1920x1080` |
|
|
73
|
+
| `--headless` | Run headless | `true` |
|
|
74
|
+
| `--browser, -b` | Browser mode: `mcp` or `cli` | `mcp` |
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
### `agex prove-pr` - PR verification in CI
|
|
79
|
+
|
|
80
|
+
Run it in your CI. It reads the diff, thinks, acts, and reports autonomously.
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
agex prove-pr --base main --url http://localhost:3000
|
|
84
|
+
agex prove-pr --base HEAD~3 --agent claude --hypotheses 10 --hint "focus on mobile layout"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
| Flag | Description | Default |
|
|
88
|
+
|------|-------------|---------|
|
|
89
|
+
| `--base` | Base branch/commit to compare against | auto-detected |
|
|
90
|
+
| `--agent, -a` | Agent to use: `cursor`, `claude`, `codex` | `cursor` |
|
|
91
|
+
| `--url, -u` | Dev server URL for visual testing | - |
|
|
92
|
+
| `--output, -o` | Output directory | `./prove-pr-results` |
|
|
93
|
+
| `--hypotheses` | Number of hypotheses to generate | `5` |
|
|
94
|
+
| `--model, -m` | Model to use | - |
|
|
95
|
+
| `--viewport` | Viewport size (WxH) | `1920x1080` |
|
|
96
|
+
| `--hint` | Additional prompt hint for hypothesis generation | - |
|
|
97
|
+
| `--timeout, -t` | Timeout in ms | `300000` |
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
### `agex review` - AI code review
|
|
102
|
+
|
|
103
|
+
Review code changes using an AI agent.
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
agex review --base main --agent cursor
|
|
107
|
+
agex review --base HEAD~3 --agent claude --hint "focus on security"
|
|
108
|
+
agex review --agent codex --hypotheses 3 --no-worktree
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
| Flag | Description | Default |
|
|
112
|
+
|------|-------------|---------|
|
|
113
|
+
| `--base` | Base branch/commit to compare against | auto-detected |
|
|
114
|
+
| `--agent, -a` | Agent to use: `cursor`, `claude`, `codex` | required |
|
|
115
|
+
| `--model, -m` | Model to use | - |
|
|
116
|
+
| `--worktree` | Include worktree changes | `true` |
|
|
117
|
+
| `--hypotheses` | Number of hypotheses to generate | - |
|
|
118
|
+
| `--hint` | Additional prompt hint | - |
|
|
119
|
+
| `--timeout, -t` | Timeout in ms | `300000` |
|
|
120
|
+
|
|
121
|
+
---
|
|
122
|
+
|
|
123
|
+
### `agex run` - Execute AI agent tasks
|
|
124
|
+
|
|
125
|
+
Run a prompt through any supported AI agent.
|
|
126
|
+
|
|
127
|
+
```bash
|
|
128
|
+
agex run "build a landing page" --agent cursor
|
|
129
|
+
agex run "refactor this function" --agent claude --mode json
|
|
130
|
+
agex run "fix the bug" --approval on-request --timeout 600000
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
| Flag | Description | Default |
|
|
134
|
+
|------|-------------|---------|
|
|
135
|
+
| `--agent, -a` | Agent to use: `cursor`, `claude`, `codex` | auto-detected |
|
|
136
|
+
| `--model, -m` | Model to use | - |
|
|
137
|
+
| `--mode` | Output mode: `text`, `json`, `debug` | `text` |
|
|
138
|
+
| `--approval` | Approval policy: `never`, `on-request`, `on-failure`, `untrusted` | - |
|
|
139
|
+
| `--timeout` | Timeout in ms | `300000` |
|
|
140
|
+
| `--install` | Run install command before agent | `false` |
|
|
141
|
+
| `--install-command` | Custom install command | - |
|
|
142
|
+
| `--stream` | Enable streaming output | `true` |
|
|
143
|
+
| `--browser` | Enable browser | `true` |
|
|
144
|
+
|
|
145
|
+
---
|
|
146
|
+
|
|
147
|
+
### `agex browse` - Browser automation
|
|
148
|
+
|
|
149
|
+
Low-level browser automation via agent-browser.
|
|
150
|
+
|
|
151
|
+
```bash
|
|
152
|
+
agex browse open https://example.com
|
|
153
|
+
agex browse snapshot -i
|
|
154
|
+
agex browse click @e1
|
|
155
|
+
agex browse fill @e2 "hello@example.com"
|
|
156
|
+
agex browse screenshot page.png
|
|
157
|
+
agex browse close
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
| Flag | Description | Default |
|
|
161
|
+
|------|-------------|---------|
|
|
162
|
+
| `--install` | Force reinstall browser | `false` |
|
|
163
|
+
|
|
164
|
+
---
|
|
165
|
+
|
|
166
|
+
## Supported Agents
|
|
167
|
+
|
|
168
|
+
| Agent | Description |
|
|
169
|
+
|-------|-------------|
|
|
170
|
+
| `cursor` | Cursor IDE agent |
|
|
171
|
+
| `claude` | Anthropic Claude CLI |
|
|
172
|
+
| `codex` | OpenAI Codex CLI |
|
|
173
|
+
|
|
174
|
+
## Ship with proof.
|
|
175
|
+
|
|
176
|
+
MIT License
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agex",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.8",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -26,15 +26,15 @@
|
|
|
26
26
|
"typescript": "^5.9.3",
|
|
27
27
|
"vitest": "^2.1.9",
|
|
28
28
|
"agex-agent": "0.1.0",
|
|
29
|
-
"agex-browse": "0.1.0",
|
|
30
29
|
"agex-core": "0.1.0",
|
|
31
|
-
"agex-
|
|
30
|
+
"agex-browse": "0.1.0",
|
|
32
31
|
"agex-prove": "0.1.0",
|
|
33
|
-
"agex-prove-pr": "0.1.0"
|
|
32
|
+
"agex-prove-pr": "0.1.0",
|
|
33
|
+
"agex-review": "0.1.0"
|
|
34
34
|
},
|
|
35
35
|
"scripts": {
|
|
36
36
|
"build": "tsc -p tsconfig.json",
|
|
37
|
-
"bundle": "rm -rf dist assets && tsup && mkdir -p ./assets/assets ./assets/scripts && cp -r ../browse/assets/ ./assets/assets/ && cp -r ../browse/scripts/ ./assets/scripts/",
|
|
37
|
+
"bundle": "rm -rf dist assets && tsup && mkdir -p ./assets/assets ./assets/scripts && cp -r ../browse/assets/ ./assets/assets/ && cp -r ../browse/scripts/ ./assets/scripts/ && cp ../../README.md ./README.md",
|
|
38
38
|
"test": "vitest run"
|
|
39
39
|
}
|
|
40
40
|
}
|