@sparkleideas/browser 3.0.0-alpha.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +730 -0
- package/agents/architect.yaml +11 -0
- package/agents/coder.yaml +11 -0
- package/agents/reviewer.yaml +10 -0
- package/agents/security-architect.yaml +10 -0
- package/agents/tester.yaml +10 -0
- package/docker/Dockerfile +22 -0
- package/docker/docker-compose.yml +52 -0
- package/docker/test-fixtures/index.html +61 -0
- package/package.json +56 -0
- package/skills/browser/SKILL.md +204 -0
- package/src/agent/index.ts +35 -0
- package/src/application/browser-service.ts +570 -0
- package/src/domain/types.ts +324 -0
- package/src/index.ts +156 -0
- package/src/infrastructure/agent-browser-adapter.ts +654 -0
- package/src/infrastructure/hooks-integration.ts +170 -0
- package/src/infrastructure/memory-integration.ts +449 -0
- package/src/infrastructure/reasoningbank-adapter.ts +282 -0
- package/src/infrastructure/security-integration.ts +528 -0
- package/src/infrastructure/workflow-templates.ts +479 -0
- package/src/mcp-tools/browser-tools.ts +1210 -0
- package/src/mcp-tools/index.ts +6 -0
- package/src/skill/index.ts +24 -0
- package/tests/agent-browser-adapter.test.ts +328 -0
- package/tests/browser-service.test.ts +137 -0
- package/tests/e2e/browser-e2e.test.ts +175 -0
- package/tests/memory-integration.test.ts +277 -0
- package/tests/reasoningbank-adapter.test.ts +219 -0
- package/tests/security-integration.test.ts +194 -0
- package/tests/workflow-templates.test.ts +231 -0
- package/tmp.json +0 -0
- package/tsconfig.json +20 -0
- package/vitest.config.ts +15 -0
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# Browser Automation Test Container
|
|
2
|
+
# Uses Playwright with all browsers pre-installed
|
|
3
|
+
|
|
4
|
+
FROM mcr.microsoft.com/playwright:v1.40.0-jammy
|
|
5
|
+
|
|
6
|
+
WORKDIR /app
|
|
7
|
+
|
|
8
|
+
# Install Node.js dependencies
|
|
9
|
+
COPY package*.json ./
|
|
10
|
+
RUN npm install
|
|
11
|
+
|
|
12
|
+
# Copy source
|
|
13
|
+
COPY . .
|
|
14
|
+
|
|
15
|
+
# Build
|
|
16
|
+
RUN npm run build
|
|
17
|
+
|
|
18
|
+
# Install agent-browser globally
|
|
19
|
+
RUN npm install -g agent-browser@latest
|
|
20
|
+
|
|
21
|
+
# Run tests
|
|
22
|
+
CMD ["npm", "test"]
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
version: '3.8'
|
|
2
|
+
|
|
3
|
+
services:
|
|
4
|
+
browser-tests:
|
|
5
|
+
build:
|
|
6
|
+
context: ..
|
|
7
|
+
dockerfile: docker/Dockerfile
|
|
8
|
+
environment:
|
|
9
|
+
- NODE_ENV=test
|
|
10
|
+
- PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
|
|
11
|
+
volumes:
|
|
12
|
+
- ../tests:/app/tests
|
|
13
|
+
- ../src:/app/src
|
|
14
|
+
command: npm test
|
|
15
|
+
|
|
16
|
+
# Interactive browser session for debugging
|
|
17
|
+
browser-debug:
|
|
18
|
+
build:
|
|
19
|
+
context: ..
|
|
20
|
+
dockerfile: docker/Dockerfile
|
|
21
|
+
environment:
|
|
22
|
+
- NODE_ENV=development
|
|
23
|
+
- PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD=1
|
|
24
|
+
ports:
|
|
25
|
+
- "9222:9222" # Chrome DevTools Protocol
|
|
26
|
+
volumes:
|
|
27
|
+
- ../:/app
|
|
28
|
+
command: tail -f /dev/null # Keep container running
|
|
29
|
+
profiles:
|
|
30
|
+
- debug
|
|
31
|
+
|
|
32
|
+
# E2E integration tests with real browser
|
|
33
|
+
browser-e2e:
|
|
34
|
+
build:
|
|
35
|
+
context: ..
|
|
36
|
+
dockerfile: docker/Dockerfile
|
|
37
|
+
environment:
|
|
38
|
+
- NODE_ENV=test
|
|
39
|
+
- TEST_URL=http://test-server:3000
|
|
40
|
+
depends_on:
|
|
41
|
+
- test-server
|
|
42
|
+
command: npm run test:e2e
|
|
43
|
+
profiles:
|
|
44
|
+
- e2e
|
|
45
|
+
|
|
46
|
+
# Simple test server for E2E tests
|
|
47
|
+
test-server:
|
|
48
|
+
image: nginx:alpine
|
|
49
|
+
volumes:
|
|
50
|
+
- ./test-fixtures:/usr/share/nginx/html:ro
|
|
51
|
+
profiles:
|
|
52
|
+
- e2e
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<html lang="en">
|
|
3
|
+
<head>
|
|
4
|
+
<meta charset="UTF-8">
|
|
5
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
6
|
+
<title>Browser Test Page</title>
|
|
7
|
+
<style>
|
|
8
|
+
body { font-family: system-ui, sans-serif; padding: 2rem; }
|
|
9
|
+
.form-group { margin: 1rem 0; }
|
|
10
|
+
label { display: block; margin-bottom: 0.5rem; }
|
|
11
|
+
input, textarea { padding: 0.5rem; width: 100%; max-width: 300px; }
|
|
12
|
+
button { padding: 0.5rem 1rem; cursor: pointer; }
|
|
13
|
+
#result { margin-top: 1rem; padding: 1rem; background: #f0f0f0; }
|
|
14
|
+
</style>
|
|
15
|
+
</head>
|
|
16
|
+
<body>
|
|
17
|
+
<h1>Browser Automation Test Page</h1>
|
|
18
|
+
|
|
19
|
+
<form id="test-form">
|
|
20
|
+
<div class="form-group">
|
|
21
|
+
<label for="email">Email</label>
|
|
22
|
+
<input type="email" id="email" name="email" placeholder="Enter email">
|
|
23
|
+
</div>
|
|
24
|
+
|
|
25
|
+
<div class="form-group">
|
|
26
|
+
<label for="password">Password</label>
|
|
27
|
+
<input type="password" id="password" name="password" placeholder="Enter password">
|
|
28
|
+
</div>
|
|
29
|
+
|
|
30
|
+
<div class="form-group">
|
|
31
|
+
<label for="message">Message</label>
|
|
32
|
+
<textarea id="message" name="message" rows="3" placeholder="Enter message"></textarea>
|
|
33
|
+
</div>
|
|
34
|
+
|
|
35
|
+
<div class="form-group">
|
|
36
|
+
<label>
|
|
37
|
+
<input type="checkbox" id="agree" name="agree">
|
|
38
|
+
I agree to terms
|
|
39
|
+
</label>
|
|
40
|
+
</div>
|
|
41
|
+
|
|
42
|
+
<button type="submit" id="submit-btn">Submit</button>
|
|
43
|
+
</form>
|
|
44
|
+
|
|
45
|
+
<div id="result" style="display: none;">
|
|
46
|
+
<h2>Form Submitted!</h2>
|
|
47
|
+
<pre id="result-data"></pre>
|
|
48
|
+
</div>
|
|
49
|
+
|
|
50
|
+
<script>
|
|
51
|
+
document.getElementById('test-form').addEventListener('submit', function(e) {
|
|
52
|
+
e.preventDefault();
|
|
53
|
+
const formData = new FormData(this);
|
|
54
|
+
const data = Object.fromEntries(formData.entries());
|
|
55
|
+
|
|
56
|
+
document.getElementById('result').style.display = 'block';
|
|
57
|
+
document.getElementById('result-data').textContent = JSON.stringify(data, null, 2);
|
|
58
|
+
});
|
|
59
|
+
</script>
|
|
60
|
+
</body>
|
|
61
|
+
</html>
|
package/package.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@sparkleideas/browser",
|
|
3
|
+
"version": "3.0.0-alpha.3",
|
|
4
|
+
"description": "Browser automation for AI agents - integrates agent-browser with claude-flow swarms",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": "./dist/index.js",
|
|
10
|
+
"./mcp-tools": "./dist/mcp-tools/index.js",
|
|
11
|
+
"./skill": "./dist/skill/index.js",
|
|
12
|
+
"./agent": "./dist/agent/index.js"
|
|
13
|
+
},
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build": "tsc",
|
|
16
|
+
"dev": "tsc --watch",
|
|
17
|
+
"test": "vitest run",
|
|
18
|
+
"test:watch": "vitest",
|
|
19
|
+
"test:e2e": "vitest run tests/e2e",
|
|
20
|
+
"lint": "eslint src/",
|
|
21
|
+
"typecheck": "tsc --noEmit",
|
|
22
|
+
"postinstall": "node -e \"const{execSync}=require('child_process');try{execSync('agent-browser --version',{stdio:'ignore'});}catch{console.log('\\n📦 Installing agent-browser globally...');try{execSync('npm install -g agent-browser@latest',{stdio:'inherit'});console.log('✅ agent-browser installed successfully!');}catch(e){console.log('⚠️ Could not install agent-browser globally. Run manually:\\n npm install -g agent-browser@latest');}}\""
|
|
23
|
+
},
|
|
24
|
+
"dependencies": {
|
|
25
|
+
"agent-browser": "^0.6.0",
|
|
26
|
+
"zod": "^3.22.4",
|
|
27
|
+
"@sparkleideas/agentic-flow": "^2.0.3"
|
|
28
|
+
},
|
|
29
|
+
"peerDependencies": {
|
|
30
|
+
"@sparkleideas/cli": "^3.0.0-alpha.140"
|
|
31
|
+
},
|
|
32
|
+
"devDependencies": {
|
|
33
|
+
"@types/node": "^20.10.0",
|
|
34
|
+
"typescript": "^5.3.0",
|
|
35
|
+
"vitest": "^2.0.0"
|
|
36
|
+
},
|
|
37
|
+
"keywords": [
|
|
38
|
+
"browser",
|
|
39
|
+
"automation",
|
|
40
|
+
"ai-agent",
|
|
41
|
+
"claude-flow",
|
|
42
|
+
"playwright",
|
|
43
|
+
"web-scraping",
|
|
44
|
+
"testing"
|
|
45
|
+
],
|
|
46
|
+
"author": "ruvnet",
|
|
47
|
+
"license": "MIT",
|
|
48
|
+
"repository": {
|
|
49
|
+
"type": "git",
|
|
50
|
+
"url": "https://github.com/ruvnet/claude-flow.git",
|
|
51
|
+
"directory": "v3/@claude-flow/browser"
|
|
52
|
+
},
|
|
53
|
+
"engines": {
|
|
54
|
+
"node": ">=18.0.0"
|
|
55
|
+
}
|
|
56
|
+
}
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: browser
|
|
3
|
+
description: Web browser automation with AI-optimized snapshots for claude-flow agents
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
triggers:
|
|
6
|
+
- /browser
|
|
7
|
+
- browse
|
|
8
|
+
- web automation
|
|
9
|
+
- scrape
|
|
10
|
+
- navigate
|
|
11
|
+
- screenshot
|
|
12
|
+
tools:
|
|
13
|
+
- browser/open
|
|
14
|
+
- browser/snapshot
|
|
15
|
+
- browser/click
|
|
16
|
+
- browser/fill
|
|
17
|
+
- browser/screenshot
|
|
18
|
+
- browser/close
|
|
19
|
+
---
|
|
20
|
+
|
|
21
|
+
# Browser Automation Skill
|
|
22
|
+
|
|
23
|
+
Web browser automation using agent-browser with AI-optimized snapshots. Reduces context by 93% using element refs (@e1, @e2) instead of full DOM.
|
|
24
|
+
|
|
25
|
+
## Core Workflow
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# 1. Navigate to page
|
|
29
|
+
agent-browser open <url>
|
|
30
|
+
|
|
31
|
+
# 2. Get accessibility tree with element refs
|
|
32
|
+
agent-browser snapshot -i # -i = interactive elements only
|
|
33
|
+
|
|
34
|
+
# 3. Interact using refs from snapshot
|
|
35
|
+
agent-browser click @e2
|
|
36
|
+
agent-browser fill @e3 "text"
|
|
37
|
+
|
|
38
|
+
# 4. Re-snapshot after page changes
|
|
39
|
+
agent-browser snapshot -i
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Quick Reference
|
|
43
|
+
|
|
44
|
+
### Navigation
|
|
45
|
+
| Command | Description |
|
|
46
|
+
|---------|-------------|
|
|
47
|
+
| `open <url>` | Navigate to URL |
|
|
48
|
+
| `back` | Go back |
|
|
49
|
+
| `forward` | Go forward |
|
|
50
|
+
| `reload` | Reload page |
|
|
51
|
+
| `close` | Close browser |
|
|
52
|
+
|
|
53
|
+
### Snapshots (AI-Optimized)
|
|
54
|
+
| Command | Description |
|
|
55
|
+
|---------|-------------|
|
|
56
|
+
| `snapshot` | Full accessibility tree |
|
|
57
|
+
| `snapshot -i` | Interactive elements only (buttons, links, inputs) |
|
|
58
|
+
| `snapshot -c` | Compact (remove empty elements) |
|
|
59
|
+
| `snapshot -d 3` | Limit depth to 3 levels |
|
|
60
|
+
| `screenshot [path]` | Capture screenshot (base64 if no path) |
|
|
61
|
+
|
|
62
|
+
### Interaction
|
|
63
|
+
| Command | Description |
|
|
64
|
+
|---------|-------------|
|
|
65
|
+
| `click <sel>` | Click element |
|
|
66
|
+
| `fill <sel> <text>` | Clear and fill input |
|
|
67
|
+
| `type <sel> <text>` | Type with key events |
|
|
68
|
+
| `press <key>` | Press key (Enter, Tab, etc.) |
|
|
69
|
+
| `hover <sel>` | Hover element |
|
|
70
|
+
| `select <sel> <val>` | Select dropdown option |
|
|
71
|
+
| `check/uncheck <sel>` | Toggle checkbox |
|
|
72
|
+
| `scroll <dir> [px]` | Scroll page |
|
|
73
|
+
|
|
74
|
+
### Get Info
|
|
75
|
+
| Command | Description |
|
|
76
|
+
|---------|-------------|
|
|
77
|
+
| `get text <sel>` | Get text content |
|
|
78
|
+
| `get html <sel>` | Get innerHTML |
|
|
79
|
+
| `get value <sel>` | Get input value |
|
|
80
|
+
| `get attr <sel> <attr>` | Get attribute |
|
|
81
|
+
| `get title` | Get page title |
|
|
82
|
+
| `get url` | Get current URL |
|
|
83
|
+
|
|
84
|
+
### Wait
|
|
85
|
+
| Command | Description |
|
|
86
|
+
|---------|-------------|
|
|
87
|
+
| `wait <selector>` | Wait for element |
|
|
88
|
+
| `wait <ms>` | Wait milliseconds |
|
|
89
|
+
| `wait --text "text"` | Wait for text |
|
|
90
|
+
| `wait --url "pattern"` | Wait for URL |
|
|
91
|
+
| `wait --load networkidle` | Wait for load state |
|
|
92
|
+
|
|
93
|
+
### Sessions
|
|
94
|
+
| Command | Description |
|
|
95
|
+
|---------|-------------|
|
|
96
|
+
| `--session <name>` | Use isolated session |
|
|
97
|
+
| `session list` | List active sessions |
|
|
98
|
+
|
|
99
|
+
## Selectors
|
|
100
|
+
|
|
101
|
+
### Element Refs (Recommended)
|
|
102
|
+
```bash
|
|
103
|
+
# Get refs from snapshot
|
|
104
|
+
agent-browser snapshot -i
|
|
105
|
+
# Output: button "Submit" [ref=e2]
|
|
106
|
+
|
|
107
|
+
# Use ref to interact
|
|
108
|
+
agent-browser click @e2
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### CSS Selectors
|
|
112
|
+
```bash
|
|
113
|
+
agent-browser click "#submit"
|
|
114
|
+
agent-browser fill ".email-input" "test@test.com"
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Semantic Locators
|
|
118
|
+
```bash
|
|
119
|
+
agent-browser find role button click --name "Submit"
|
|
120
|
+
agent-browser find label "Email" fill "test@test.com"
|
|
121
|
+
agent-browser find testid "login-btn" click
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## Examples
|
|
125
|
+
|
|
126
|
+
### Login Flow
|
|
127
|
+
```bash
|
|
128
|
+
agent-browser open https://example.com/login
|
|
129
|
+
agent-browser snapshot -i
|
|
130
|
+
agent-browser fill @e2 "user@example.com"
|
|
131
|
+
agent-browser fill @e3 "password123"
|
|
132
|
+
agent-browser click @e4
|
|
133
|
+
agent-browser wait --url "**/dashboard"
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### Form Submission
|
|
137
|
+
```bash
|
|
138
|
+
agent-browser open https://example.com/contact
|
|
139
|
+
agent-browser snapshot -i
|
|
140
|
+
agent-browser fill @e1 "John Doe"
|
|
141
|
+
agent-browser fill @e2 "john@example.com"
|
|
142
|
+
agent-browser fill @e3 "Hello, this is my message"
|
|
143
|
+
agent-browser click @e4
|
|
144
|
+
agent-browser wait --text "Thank you"
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Data Extraction
|
|
148
|
+
```bash
|
|
149
|
+
agent-browser open https://example.com/products
|
|
150
|
+
agent-browser snapshot -i
|
|
151
|
+
# Iterate through product refs
|
|
152
|
+
agent-browser get text @e1 # Product name
|
|
153
|
+
agent-browser get text @e2 # Price
|
|
154
|
+
agent-browser get attr @e3 href # Link
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Multi-Session (Swarm)
|
|
158
|
+
```bash
|
|
159
|
+
# Session 1: Navigator
|
|
160
|
+
agent-browser --session nav open https://example.com
|
|
161
|
+
agent-browser --session nav state save auth.json
|
|
162
|
+
|
|
163
|
+
# Session 2: Scraper (uses same auth)
|
|
164
|
+
agent-browser --session scrape state load auth.json
|
|
165
|
+
agent-browser --session scrape open https://example.com/data
|
|
166
|
+
agent-browser --session scrape snapshot -i
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Integration with Claude Flow
|
|
170
|
+
|
|
171
|
+
### MCP Tools
|
|
172
|
+
All browser operations are available as MCP tools with `browser/` prefix:
|
|
173
|
+
- `browser/open`
|
|
174
|
+
- `browser/snapshot`
|
|
175
|
+
- `browser/click`
|
|
176
|
+
- `browser/fill`
|
|
177
|
+
- `browser/screenshot`
|
|
178
|
+
- etc.
|
|
179
|
+
|
|
180
|
+
### Memory Integration
|
|
181
|
+
```bash
|
|
182
|
+
# Store successful patterns
|
|
183
|
+
npx @claude-flow/cli memory store --namespace browser-patterns --key "login-flow" --value "snapshot->fill->click->wait"
|
|
184
|
+
|
|
185
|
+
# Retrieve before similar task
|
|
186
|
+
npx @claude-flow/cli memory search --query "login automation"
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Hooks
|
|
190
|
+
```bash
|
|
191
|
+
# Pre-browse hook (get context)
|
|
192
|
+
npx @claude-flow/cli hooks pre-edit --file "browser-task.ts"
|
|
193
|
+
|
|
194
|
+
# Post-browse hook (record success)
|
|
195
|
+
npx @claude-flow/cli hooks post-task --task-id "browse-1" --success true
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Tips
|
|
199
|
+
|
|
200
|
+
1. **Always use snapshots** - They're optimized for AI with refs
|
|
201
|
+
2. **Prefer `-i` flag** - Gets only interactive elements, smaller output
|
|
202
|
+
3. **Use refs, not selectors** - More reliable, deterministic
|
|
203
|
+
4. **Re-snapshot after navigation** - Page state changes
|
|
204
|
+
5. **Use sessions for parallel work** - Each session is isolated
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser Agent Exports
|
|
3
|
+
* Re-exports agent-related functionality for swarm integration
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export { BrowserSwarmCoordinator, createBrowserSwarm } from '../application/browser-service.js';
|
|
7
|
+
export { ReasoningBankAdapter, getReasoningBank } from '../infrastructure/reasoningbank-adapter.js';
|
|
8
|
+
export type { BrowserPattern, PatternStep } from '../infrastructure/reasoningbank-adapter.js';
|
|
9
|
+
|
|
10
|
+
// Agent metadata
|
|
11
|
+
export const AGENT_METADATA = {
|
|
12
|
+
name: 'browser-agent',
|
|
13
|
+
description: 'Web automation specialist using agent-browser with AI-optimized snapshots',
|
|
14
|
+
version: '1.0.0',
|
|
15
|
+
routing: {
|
|
16
|
+
complexity: 'medium',
|
|
17
|
+
model: 'sonnet',
|
|
18
|
+
priority: 'normal',
|
|
19
|
+
keywords: ['browser', 'web', 'scrape', 'screenshot', 'navigate', 'login', 'form', 'click', 'automate'],
|
|
20
|
+
},
|
|
21
|
+
capabilities: [
|
|
22
|
+
'web-navigation',
|
|
23
|
+
'form-interaction',
|
|
24
|
+
'screenshot-capture',
|
|
25
|
+
'data-extraction',
|
|
26
|
+
'network-interception',
|
|
27
|
+
'session-management',
|
|
28
|
+
'multi-tab-coordination',
|
|
29
|
+
],
|
|
30
|
+
swarm: {
|
|
31
|
+
roles: ['navigator', 'scraper', 'validator', 'tester', 'monitor'],
|
|
32
|
+
topology: 'hierarchical',
|
|
33
|
+
maxSessions: 5,
|
|
34
|
+
},
|
|
35
|
+
};
|