@dyyz1993/agent-browser 0.11.0 → 0.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/agent-browser-linux-x64 +0 -0
- package/dist/cli/help.js +1 -1
- package/dist/openapi.js +1 -1
- package/dist/stream-server-standalone.js +3 -3
- package/dist/viewer-script.js +54 -54
- package/package.json +1 -1
- package/skills/agent-browser/SKILL.md +279 -229
- package/skills/agent-browser/references/mobile-viewer.md +188 -0
- package/skills/agent-browser/references/viewer-mode.md +148 -0
- package/skills/agent-browser/templates/api-interception.sh +3 -1
- package/skills/agent-browser/templates/data-extraction.sh +8 -4
- package/skills/agent-browser/templates/form-automation.sh +18 -23
- package/skills/agent-browser/templates/network-intercept-crawl.sh +1 -0
- package/skills/agent-browser/templates/recorder-workflow.sh +51 -0
- package/skills/agent-browser/templates/viewer-remote.sh +41 -0
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/scripts/check_goods_container.js +0 -35
- package/scripts/check_page_content.js +0 -36
- package/scripts/click_applause_rate.js +0 -30
- package/scripts/e2e-test-recorder.ts +0 -584
- package/scripts/explore_jd_page.js +0 -31
- package/scripts/extract_all_jd_data.js +0 -80
- package/scripts/extract_jd_product_detail.js +0 -62
- package/scripts/extract_jd_products_correct_links.js +0 -78
- package/scripts/extract_jd_products_final.js +0 -80
- package/scripts/extract_jd_reviews.js +0 -48
- package/scripts/extract_jd_seafood_final.js +0 -78
- package/scripts/extract_multiple_products.js +0 -77
- package/scripts/extract_products_no_scroll.js +0 -68
- package/scripts/extract_products_simple.js +0 -68
- package/scripts/find_applause_rate.js +0 -26
- package/scripts/find_jd_links.js +0 -28
- package/scripts/find_main_content.js +0 -20
- package/scripts/find_product_cards.js +0 -38
- package/scripts/find_root_content.js +0 -26
- package/scripts/find_unique_products.js +0 -55
- package/scripts/get_jd_product_detail.js +0 -16
- package/scripts/get_jd_products.js +0 -23
- package/scripts/get_jd_seafood_products.js +0 -44
- package/scripts/get_product_details_from_images.js +0 -54
- package/scripts/scroll_and_get_products.js +0 -47
- package/scripts/scroll_deep_and_find.js +0 -45
- package/scripts/verify-baidu-enter.ts +0 -116
- package/scripts/verify-form.sh +0 -67
- package/scripts/verify-login.sh +0 -65
- package/scripts/verify-recording.sh +0 -80
- package/scripts/verify-upload.sh +0 -41
- package/skills/agent-browser/references/profiling.md +0 -120
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# Mobile Remote Control (Viewer Mode)
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
When the agent-browser viewer is opened on a **touch device** (phone, tablet), it automatically enters **mobile mode** with a touch-optimized UI. This is distinct from iOS Simulator mode — it works on ANY phone/tablet browser via the web viewer, requiring no simulator installation.
|
|
6
|
+
|
|
7
|
+
## Touchpad System
|
|
8
|
+
|
|
9
|
+
The touchpad occupies the bottom portion of the viewer screen and simulates mouse input on the remote browser:
|
|
10
|
+
|
|
11
|
+
| Gesture | Action | Visual Feedback |
|
|
12
|
+
| ------------------- | ------------------------------------ | --------------------------------------- |
|
|
13
|
+
| Single tap | Click at virtual cursor position | Cursor flashes red briefly |
|
|
14
|
+
| Single finger drag | Move virtual cursor on remote screen | Cursor follows finger |
|
|
15
|
+
| Long press (~800ms) | Enter drag mode (hold mouse down) | Cursor turns orange, shows "DRAG" badge |
|
|
16
|
+
| Two-finger drag | Scroll wheel (vertical/horizontal) | Shows "SCROLL" badge |
|
|
17
|
+
| Two-finger release | Momentum scroll (deceleration) | Smooth deceleration after release |
|
|
18
|
+
|
|
19
|
+
**Implementation details:**
|
|
20
|
+
|
|
21
|
+
- All touch listeners use `{ passive: false }` + `preventDefault()` to prevent browser gestures
|
|
22
|
+
- Movement uses acceleration curve for natural feel (`computeAcceleration()`)
|
|
23
|
+
- Scroll uses separate wheel acceleration (`computeWheelAccel()`)
|
|
24
|
+
- Cooldown period after two-finger scroll prevents accidental clicks
|
|
25
|
+
- Momentum scroll uses RAF loop with 0.92 decay factor
|
|
26
|
+
|
|
27
|
+
## Virtual Keyboard Toolbar
|
|
28
|
+
|
|
29
|
+
Collapsible toolbar at the top of the touchpad area:
|
|
30
|
+
|
|
31
|
+
| Button | Key Sent | Code |
|
|
32
|
+
| ----------- | ----------- | ------------ |
|
|
33
|
+
| Tab | Tab | `Tab` |
|
|
34
|
+
| Up Arrow | Arrow Up | `ArrowUp` |
|
|
35
|
+
| Left Arrow | Arrow Left | `ArrowLeft` |
|
|
36
|
+
| Down Arrow | Arrow Down | `ArrowDown` |
|
|
37
|
+
| Right Arrow | Arrow Right | `ArrowRight` |
|
|
38
|
+
| Enter | Enter | `Enter` |
|
|
39
|
+
| Backspace | Backspace | `Backspace` |
|
|
40
|
+
| Escape | Escape | `Escape` |
|
|
41
|
+
|
|
42
|
+
- **Collapsed state** (default): Shows only expand button (+ icon)
|
|
43
|
+
- **Expanded state**: Shows all 8 keys in wrapped layout
|
|
44
|
+
- Tap any key to send immediately to remote browser (no need to switch to keyboard app)
|
|
45
|
+
|
|
46
|
+
## Text Input (Input Panel)
|
|
47
|
+
|
|
48
|
+
This is the key innovation for mobile remote control — typing text into remote input fields from your phone.
|
|
49
|
+
|
|
50
|
+
### Flow Diagram
|
|
51
|
+
|
|
52
|
+
```
|
|
53
|
+
User taps remote <input> on viewer screen
|
|
54
|
+
↓
|
|
55
|
+
Daemon detects focus event via injected listener
|
|
56
|
+
↓
|
|
57
|
+
Daemon sends {type: "input_focused", value: "...", ...} to viewer
|
|
58
|
+
↓
|
|
59
|
+
Viewer enters INPUT MODE:
|
|
60
|
+
- Hides virtual cursor
|
|
61
|
+
- Shows #input-panel at screen bottom
|
|
62
|
+
- Pre-fills local input field with current value
|
|
63
|
+
- Sets window._currentTargetSelector for fill targeting
|
|
64
|
+
↓
|
|
65
|
+
User types in local input field (with IME if needed)
|
|
66
|
+
↓
|
|
67
|
+
Text syncs to remote via {type: "input_fill", text: "...", selector: "..."}
|
|
68
|
+
↓
|
|
69
|
+
User taps Send (arrow icon) or presses Enter:
|
|
70
|
+
- Sends final input_fill + Enter keydown/keyup
|
|
71
|
+
- Exits input mode
|
|
72
|
+
↓
|
|
73
|
+
OR user taps Escape or clicks outside panel:
|
|
74
|
+
- Sends input_blur_element to remote
|
|
75
|
+
- Exits input mode, restores touchpad
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### IME / CJK Composition Support
|
|
79
|
+
|
|
80
|
+
Critical for Chinese, Japanese, Korean input methods:
|
|
81
|
+
|
|
82
|
+
| Event | Handling | Prevents |
|
|
83
|
+
| ------------------------ | -------------------------------------------------------- | ------------------------------------------- |
|
|
84
|
+
| `compositionstart` | Sets `_fieldComposing = true` | Intermediate pinyin sent to remote |
|
|
85
|
+
| `compositionupdate` | (ignored while composing) | Garbage characters |
|
|
86
|
+
| `compositionend` | Sets `_fieldComposing = false`, double-RAF deferred sync | Partial commits sent early |
|
|
87
|
+
| RAF poll (30ms interval) | Skips sync while `_fieldComposing === true` | Race condition with IME candidate selection |
|
|
88
|
+
|
|
89
|
+
**Key insight:** Only fully committed characters (after user selects from IME candidate list) are synced to the remote browser. Intermediate pinyin/kana composition is completely filtered out.
|
|
90
|
+
|
|
91
|
+
### Input Panel Layout
|
|
92
|
+
|
|
93
|
+
```
|
|
94
|
+
┌─────────────────────────────────────────┐
|
|
95
|
+
│ target: input[type="email"] │ <- label row
|
|
96
|
+
├─────────────────────────────────────────┤
|
|
97
|
+
│ [________________________] [>] │ <- input + send button
|
|
98
|
+
└─────────────────────────────────────────┘
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
- Label shows: input type + placeholder (if different from value)
|
|
102
|
+
- Input field: `border-radius: 18px`, `font-size:16px` (prevents iOS zoom)
|
|
103
|
+
- Send button: Blue circle with arrow SVG icon
|
|
104
|
+
- Dismissal: Tap outside panel or press Escape
|
|
105
|
+
|
|
106
|
+
### Keyboard Awareness on Mobile
|
|
107
|
+
|
|
108
|
+
On mobile devices, the viewer intentionally suppresses keyboard-related events to prevent interference:
|
|
109
|
+
|
|
110
|
+
- `hiddenInput` (#hidden-input) is **NOT created** on touch devices (unlike desktop mode)
|
|
111
|
+
- Document-level `keydown`/`keyup` listeners check `event.target` — ignores events from `#input-field`
|
|
112
|
+
- This allows the native mobile keyboard to work normally for text input without conflicting with remote keyboard forwarding
|
|
113
|
+
|
|
114
|
+
## DeviceMode Dynamic Switching
|
|
115
|
+
|
|
116
|
+
The viewer does NOT detect device type once at startup. It uses a reactive architecture that can switch at runtime:
|
|
117
|
+
|
|
118
|
+
### Detection Function
|
|
119
|
+
|
|
120
|
+
```javascript
|
|
121
|
+
function detectDeviceMode() {
|
|
122
|
+
var uaMatch = /iphone|ipod|android(?=.*mobile)|mobile|tablet|ipad/i.test(ua);
|
|
123
|
+
var hasTouch = 'ontouchstart' in window || navigator.maxTouchPoints > 0;
|
|
124
|
+
return uaMatch || hasTouch ? 'mobile' : 'desktop';
|
|
125
|
+
}
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Singleton Architecture
|
|
129
|
+
|
|
130
|
+
```javascript
|
|
131
|
+
const DeviceMode = {
|
|
132
|
+
_current: detectDeviceMode(), // Initial detection
|
|
133
|
+
_listeners: [], // Change callbacks
|
|
134
|
+
|
|
135
|
+
get current() {
|
|
136
|
+
return this._current;
|
|
137
|
+
},
|
|
138
|
+
|
|
139
|
+
onModeChange(fn) {
|
|
140
|
+
this._listeners.push(fn);
|
|
141
|
+
},
|
|
142
|
+
|
|
143
|
+
switchTo(mode) {
|
|
144
|
+
if (mode === this._current) return; // No-op for same mode
|
|
145
|
+
var prev = this._current;
|
|
146
|
+
this._current = mode;
|
|
147
|
+
if (mode === 'desktop') {
|
|
148
|
+
MobileModule.detach(); // Hide touchpad, show cursor
|
|
149
|
+
DesktopModule.attach(); // Create hiddenInput, focus it
|
|
150
|
+
} else {
|
|
151
|
+
DesktopModule.detach(); // Remove hiddenInput
|
|
152
|
+
MobileModule.attach(); // Show touchpad, init cursor
|
|
153
|
+
}
|
|
154
|
+
this._listeners.forEach((fn) => fn(mode, prev));
|
|
155
|
+
},
|
|
156
|
+
};
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
### Module Lifecycle
|
|
160
|
+
|
|
161
|
+
**DesktopModule** (PC mode):
|
|
162
|
+
|
|
163
|
+
- `attach()`: Creates invisible `#hidden-input`, focuses it (captures keyboard for remote forwarding)
|
|
164
|
+
- `detach()`: Blurs and removes hiddenInput
|
|
165
|
+
|
|
166
|
+
**MobileModule** (touch mode):
|
|
167
|
+
|
|
168
|
+
- `attach()`: Shows touchpad (display:flex), initializes virtual cursor, sets up toolbar
|
|
169
|
+
- `detach()`: Hides input-panel, shows cursor again
|
|
170
|
+
|
|
171
|
+
### Auto-Switching Triggers
|
|
172
|
+
|
|
173
|
+
| Trigger | Handler | Use Case |
|
|
174
|
+
| --------------------------------------- | ------------------------- | ------------------------------------------------- |
|
|
175
|
+
| `resize` event | Debounced 100ms re-detect | Phone rotation, window resize |
|
|
176
|
+
| `orientationchange` | Delayed 200ms re-detect | Portrait<->Landscape |
|
|
177
|
+
| `matchMedia("(pointer:coarse)")` change | Immediate switch | Stylus connect/disconnect, tablet keyboard attach |
|
|
178
|
+
|
|
179
|
+
## Mobile-Specific CSS Considerations
|
|
180
|
+
|
|
181
|
+
| Issue | Solution |
|
|
182
|
+
| -------------------------------------- | --------------------------------------------------------- |
|
|
183
|
+
| iOS keyboard pushes content up | `min/max-height: 100dvh` on html/body, `position: fixed` |
|
|
184
|
+
| VisualViewport API for keyboard height | Listener resizes input panel above keyboard |
|
|
185
|
+
| iOS auto-scroll during input | `setInterval` scroll guard (100ms) fights browser scroll |
|
|
186
|
+
| Browser gesture conflicts | `touch-action: none` on body during input mode |
|
|
187
|
+
| Safe area (notch phones) | `padding-bottom: env(safe-area-inset-bottom)` on touchpad |
|
|
188
|
+
| Small tap targets | Minimum 44px height on buttons (iOS guideline) |
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Viewer / Streaming Mode
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The viewer mode provides a **real-time visual remote browser interface**. It streams browser frames (JPEG/WebP) over WebSocket and forwards user input (mouse, keyboard, touch) back to the daemon. This enables:
|
|
6
|
+
|
|
7
|
+
- **Remote debugging** — see what the browser sees in real time
|
|
8
|
+
- **Mobile device control** — operate a desktop browser from your phone
|
|
9
|
+
- **Presentation/demo** — show browser activity to an audience
|
|
10
|
+
- **Collaboration** — share a browser session with others
|
|
11
|
+
|
|
12
|
+
## Starting the Viewer
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
# Prerequisite: have a browser session running
|
|
16
|
+
agent-browser open https://example.com
|
|
17
|
+
|
|
18
|
+
# Start viewer (opens URL in default browser)
|
|
19
|
+
agent-browser viewer
|
|
20
|
+
|
|
21
|
+
# Get connection details as JSON (for scripting/embedding)
|
|
22
|
+
agent-browser viewer --json
|
|
23
|
+
# Output: {"url":"http://localhost:5005/view?session=default","ws":"ws://...","port":5005}
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Viewer URL Parameters
|
|
27
|
+
|
|
28
|
+
| Parameter | Description |
|
|
29
|
+
| ------------------ | -------------------------------------- |
|
|
30
|
+
| `?session=<id>` | Connect to a specific named session |
|
|
31
|
+
| `?instanceId=<id>` | Connect to a specific browser instance |
|
|
32
|
+
|
|
33
|
+
## Architecture
|
|
34
|
+
|
|
35
|
+
```
|
|
36
|
+
┌─────────────┐ IPC ┌───────────────────┐ WebSocket ┌──────────┐
|
|
37
|
+
│ Browser │ ───────→ │ Daemon Process │ ←────────────→ │ Viewer │
|
|
38
|
+
│ (Playwright) │ │ (:5000 socket) │ │ (Browser) │
|
|
39
|
+
└─────────────┘ └────────┬─────────┘ └──────────┘
|
|
40
|
+
│
|
|
41
|
+
standalone HTTP+WS server (:5005)
|
|
42
|
+
serves viewer.html + proxies messages
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
**Data flow:**
|
|
46
|
+
|
|
47
|
+
1. **Frames**: Browser -> Daemon -> Standalone Server -> Viewer (binary JPEG/WebP via WS)
|
|
48
|
+
2. **Input**: Viewer -> Standalone Server -> Daemon -> Browser (JSON messages)
|
|
49
|
+
|
|
50
|
+
## Viewer Page Features
|
|
51
|
+
|
|
52
|
+
### Desktop Mode (PC/Mac)
|
|
53
|
+
|
|
54
|
+
| Feature | Description |
|
|
55
|
+
| ------------ | ------------------------------------------------------------------- |
|
|
56
|
+
| Screen area | Shows streamed frame, click/drag/scroll sends input to remote |
|
|
57
|
+
| Toolbar | URL bar, connection status, quality badge, record button |
|
|
58
|
+
| Hidden input | Invisible capture field for keyboard events (auto-focused on click) |
|
|
59
|
+
| Cursor | Red dot showing remote mouse position |
|
|
60
|
+
|
|
61
|
+
### Mobile Mode (Touch Device)
|
|
62
|
+
|
|
63
|
+
Automatically activates on touch devices. See [mobile-viewer.md](mobile-viewer.md) for full details.
|
|
64
|
+
|
|
65
|
+
| Feature | Description |
|
|
66
|
+
| ---------------- | --------------------------------------------------- |
|
|
67
|
+
| Touchpad | Bottom gesture area for cursor simulation |
|
|
68
|
+
| Input Panel | Text input popup when tapping remote input fields |
|
|
69
|
+
| Keyboard toolbar | Virtual keys: Tab, Arrows, Enter, Backspace, Escape |
|
|
70
|
+
| IME support | Chinese/Japanese composition (pinyin, kana, etc.) |
|
|
71
|
+
|
|
72
|
+
## Element Selector / Crop Mode
|
|
73
|
+
|
|
74
|
+
Crop the video stream to a specific DOM element's bounds:
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# Via viewer UI: click element selector button, then click target element
|
|
78
|
+
# The stream is cropped to that element's rectangle
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
When element mode is active:
|
|
82
|
+
|
|
83
|
+
- Server crops frames to element bounds using Sharp
|
|
84
|
+
- Mouse coordinates auto-map to element-local space
|
|
85
|
+
- Falls back to "degraded mode" (full page) if element not found or disappears
|
|
86
|
+
- `deviceWidth`/`deviceHeight` in metadata reflect element dimensions
|
|
87
|
+
|
|
88
|
+
Use cases:
|
|
89
|
+
|
|
90
|
+
- Focus testing on a specific component
|
|
91
|
+
- Recording interactions within a widget
|
|
92
|
+
- Bandwidth savings (only stream the element, not full page)
|
|
93
|
+
|
|
94
|
+
## Message Types (Viewer <-> Server)
|
|
95
|
+
|
|
96
|
+
### Server → Viewer (over WebSocket)
|
|
97
|
+
|
|
98
|
+
| Type | Purpose |
|
|
99
|
+
| --------------- | ------------------------------------------------------------------ |
|
|
100
|
+
| `frame` | Binary frame data with metadata (dimensions, format, element info) |
|
|
101
|
+
| `status` | Connection status, viewport changes |
|
|
102
|
+
| `navigation` | URL/title changes |
|
|
103
|
+
| `input_focused` | Remote element received focus → triggers input panel (mobile) |
|
|
104
|
+
| `input_value` | Remote input value changed |
|
|
105
|
+
| `input_blur` | Remote element lost focus |
|
|
106
|
+
|
|
107
|
+
### Viewer → Server (over WebSocket)
|
|
108
|
+
|
|
109
|
+
| Type | Purpose |
|
|
110
|
+
| ---------------------- | --------------------------------------------------- |
|
|
111
|
+
| `input_mouse` | Mouse move/press/release/wheel |
|
|
112
|
+
| `input_keyboard` | Key down/up with modifiers |
|
|
113
|
+
| `input_fill` | Full text value sync (mobile input panel) |
|
|
114
|
+
| `input_blur_element` | Blur remote element (mobile input commit) |
|
|
115
|
+
| `keyboard_insert_text` | Character-by-character insert (desktop hiddenInput) |
|
|
116
|
+
| `user_activity` | Keep-alive signal (resumes streaming if paused) |
|
|
117
|
+
| `selector_element` | Request crop to specific element |
|
|
118
|
+
|
|
119
|
+
## Troubleshooting
|
|
120
|
+
|
|
121
|
+
### Black screen
|
|
122
|
+
|
|
123
|
+
- Check daemon is running: `agent-browser status`
|
|
124
|
+
- Verify browser launched: `agent-browser open https://example.com` should work first
|
|
125
|
+
|
|
126
|
+
### Connection refused
|
|
127
|
+
|
|
128
|
+
- The viewer command auto-starts the standalone server on port 5005
|
|
129
|
+
- If port conflicts, check: `lsof -i :5005`
|
|
130
|
+
- Kill stale process: `kill $(lsof -t -i :5005)`
|
|
131
|
+
|
|
132
|
+
### Laggy updates
|
|
133
|
+
|
|
134
|
+
- Frame compression is JPEG by default (adjustable)
|
|
135
|
+
- Quality badge shows current state: "interacting" / "static" / "compressed"
|
|
136
|
+
- Network latency between viewer and server affects frame rate
|
|
137
|
+
|
|
138
|
+
### Element not found (degraded mode)
|
|
139
|
+
|
|
140
|
+
- Yellow toast appears: "Element not found, showing full page"
|
|
141
|
+
- Element may have been removed by SPA navigation or animation
|
|
142
|
+
- Re-select the element or exit selector mode
|
|
143
|
+
|
|
144
|
+
### Viewer shows but no frame
|
|
145
|
+
|
|
146
|
+
- Check daemon log: `~/.agent-browser/default.log`
|
|
147
|
+
- Look for "Browser not launched" errors
|
|
148
|
+
- Ensure `agent-browser open` was called before `agent-browser viewer`
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/bin/bash
|
|
2
2
|
# API Interception Template - Passively capture API responses
|
|
3
|
+
set -euo pipefail
|
|
3
4
|
# Usage: ./api-interception.sh [target_url] [output_file]
|
|
4
5
|
|
|
5
6
|
TARGET_URL="${1:-https://example.com/user/profile}"
|
|
@@ -12,7 +13,8 @@ sleep 1
|
|
|
12
13
|
|
|
13
14
|
echo ""
|
|
14
15
|
echo "=== 2. Open blank page ==="
|
|
15
|
-
|
|
16
|
+
# Optional: set PROXY_URL if using a proxy
|
|
17
|
+
export https_proxy=${PROXY_URL:-}
|
|
16
18
|
agent-browser open "about:blank"
|
|
17
19
|
sleep 1
|
|
18
20
|
|
|
@@ -34,7 +34,8 @@ case "$MODE" in
|
|
|
34
34
|
api)
|
|
35
35
|
echo ""
|
|
36
36
|
echo "=== 2. API Interception Mode ==="
|
|
37
|
-
|
|
37
|
+
# Optional: set PROXY_URL if using a proxy
|
|
38
|
+
export https_proxy=${PROXY_URL:-}
|
|
38
39
|
agent-browser open "about:blank"
|
|
39
40
|
sleep 1
|
|
40
41
|
|
|
@@ -64,7 +65,8 @@ case "$MODE" in
|
|
|
64
65
|
scroll)
|
|
65
66
|
echo ""
|
|
66
67
|
echo "=== 2. Infinite Scroll Mode ==="
|
|
67
|
-
|
|
68
|
+
# Optional: set PROXY_URL if using a proxy
|
|
69
|
+
export https_proxy=${PROXY_URL:-}
|
|
68
70
|
agent-browser open "$TARGET_URL"
|
|
69
71
|
sleep 2
|
|
70
72
|
|
|
@@ -125,7 +127,8 @@ print(json.dumps(unique, ensure_ascii=False))
|
|
|
125
127
|
js)
|
|
126
128
|
echo ""
|
|
127
129
|
echo "=== 2. JS Variable Extraction Mode ==="
|
|
128
|
-
|
|
130
|
+
# Optional: set PROXY_URL if using a proxy
|
|
131
|
+
export https_proxy=${PROXY_URL:-}
|
|
129
132
|
agent-browser open "$TARGET_URL"
|
|
130
133
|
sleep 3
|
|
131
134
|
|
|
@@ -153,7 +156,8 @@ print(json.dumps(unique, ensure_ascii=False))
|
|
|
153
156
|
dom|*)
|
|
154
157
|
echo ""
|
|
155
158
|
echo "=== 2. DOM Extraction Mode ==="
|
|
156
|
-
|
|
159
|
+
# Optional: set PROXY_URL if using a proxy
|
|
160
|
+
export https_proxy=${PROXY_URL:-}
|
|
157
161
|
agent-browser open "$TARGET_URL"
|
|
158
162
|
sleep 2
|
|
159
163
|
|
|
@@ -1,32 +1,27 @@
|
|
|
1
|
-
#!/bin/bash
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
2
|
# Template: Form Automation Workflow
|
|
3
3
|
# Purpose: Fill and submit web forms with validation
|
|
4
4
|
# Usage: ./form-automation.sh <form-url>
|
|
5
5
|
#
|
|
6
|
-
#
|
|
7
|
-
# 1. Navigate to form
|
|
8
|
-
# 2. Snapshot to get element refs
|
|
9
|
-
# 3. Fill fields using refs
|
|
10
|
-
# 4. Submit and verify result
|
|
11
|
-
#
|
|
12
|
-
# Customize: Update the refs (@e1, @e2, etc.) based on your form's snapshot output
|
|
6
|
+
# Demonstrates: snapshot -> interact -> verify pattern
|
|
13
7
|
|
|
14
8
|
set -euo pipefail
|
|
15
9
|
|
|
16
10
|
FORM_URL="${1:?Usage: $0 <form-url>}"
|
|
11
|
+
SESSION="form-$(date +%s)"
|
|
17
12
|
|
|
18
|
-
echo "Form
|
|
13
|
+
echo "=== Form Automation: $FORM_URL ==="
|
|
19
14
|
|
|
20
15
|
# Step 1: Navigate to form
|
|
21
|
-
agent-browser open "$FORM_URL"
|
|
22
|
-
agent-browser wait --load networkidle
|
|
16
|
+
agent-browser --session "$SESSION" open "$FORM_URL"
|
|
17
|
+
agent-browser --session "$SESSION" wait --load networkidle
|
|
23
18
|
|
|
24
19
|
# Step 2: Snapshot to discover form elements
|
|
25
20
|
echo ""
|
|
26
21
|
echo "Form structure:"
|
|
27
|
-
agent-browser snapshot -i
|
|
22
|
+
agent-browser --session "$SESSION" snapshot -i
|
|
28
23
|
|
|
29
|
-
# Step 3: Fill form fields (customize
|
|
24
|
+
# Step 3: Fill form fields (customize refs based on snapshot output above)
|
|
30
25
|
#
|
|
31
26
|
# Common field types:
|
|
32
27
|
# agent-browser fill @e1 "John Doe" # Text input
|
|
@@ -34,27 +29,27 @@ agent-browser snapshot -i
|
|
|
34
29
|
# agent-browser fill @e3 "SecureP@ss123" # Password input
|
|
35
30
|
# agent-browser select @e4 "Option Value" # Dropdown
|
|
36
31
|
# agent-browser check @e5 # Checkbox
|
|
37
|
-
# agent-browser click @e6 # Radio button
|
|
32
|
+
# agent-browser click @e6 # Radio button / Submit button
|
|
38
33
|
# agent-browser fill @e7 "Multi-line text" # Textarea
|
|
39
34
|
# agent-browser upload @e8 /path/to/file.pdf # File upload
|
|
40
35
|
#
|
|
41
36
|
# Uncomment and modify:
|
|
42
|
-
# agent-browser fill @e1 "Test User"
|
|
43
|
-
# agent-browser fill @e2 "test@example.com"
|
|
44
|
-
# agent-browser click @e3 # Submit button
|
|
37
|
+
# agent-browser --session "$SESSION" fill @e1 "Test User"
|
|
38
|
+
# agent-browser --session "$SESSION" fill @e2 "test@example.com"
|
|
39
|
+
# agent-browser --session "$SESSION" click @e3 # Submit button
|
|
45
40
|
|
|
46
|
-
# Step 4: Wait for submission
|
|
47
|
-
|
|
48
|
-
# agent-browser wait --url "**/success" # Or wait for redirect
|
|
41
|
+
# Step 4: Wait for submission to complete
|
|
42
|
+
agent-browser --session "$SESSION" wait --load networkidle
|
|
43
|
+
# agent-browser --session "$SESSION" wait --url "**/success" # Or wait for redirect
|
|
49
44
|
|
|
50
45
|
# Step 5: Verify result
|
|
51
46
|
echo ""
|
|
52
47
|
echo "Result:"
|
|
53
|
-
agent-browser get url
|
|
54
|
-
agent-browser snapshot -i
|
|
48
|
+
agent-browser --session "$SESSION" get url
|
|
49
|
+
agent-browser --session "$SESSION" snapshot -i
|
|
55
50
|
|
|
56
51
|
# Optional: Capture evidence
|
|
57
|
-
agent-browser screenshot /tmp/form-result.png
|
|
52
|
+
agent-browser --session "$SESSION" screenshot /tmp/form-result.png
|
|
58
53
|
echo "Screenshot saved: /tmp/form-result.png"
|
|
59
54
|
|
|
60
55
|
# Cleanup
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Template: Recorder Workflow
|
|
3
|
+
# Purpose: Record browser actions, save as YAML, replay later
|
|
4
|
+
# Usage: ./recorder-workflow.sh [url] [output.yaml]
|
|
5
|
+
#
|
|
6
|
+
# Records your interactions into a replayable YAML workflow file.
|
|
7
|
+
# Useful for test automation, demo creation, and regression testing.
|
|
8
|
+
|
|
9
|
+
set -euo pipefail
|
|
10
|
+
|
|
11
|
+
URL="${1:?https://example.com/form}"
|
|
12
|
+
OUTPUT="${2:-recording-$(date +%Y%m%d-%H%M%S).yaml}"
|
|
13
|
+
SESSION="record-$(date +%s)"
|
|
14
|
+
|
|
15
|
+
echo "=== Recorder Workflow: $URL ==="
|
|
16
|
+
|
|
17
|
+
# Step 1: Start recording
|
|
18
|
+
agent-browser recorder start --session "$SESSION"
|
|
19
|
+
echo "Recording started on session: $SESSION"
|
|
20
|
+
|
|
21
|
+
# Step 2: Navigate and perform workflow
|
|
22
|
+
agent-browser --session "$SESSION" open "$URL"
|
|
23
|
+
agent-browser --session "$SESSION" wait --load networkidle
|
|
24
|
+
|
|
25
|
+
echo ""
|
|
26
|
+
echo "Form structure (copy refs from below):"
|
|
27
|
+
agent-browser --session "$SESSION" snapshot -i
|
|
28
|
+
|
|
29
|
+
# Step 3: Perform your actions here (uncomment/customize):
|
|
30
|
+
#
|
|
31
|
+
# agent-browser --session "$SESSION" fill @e1 "user@example.com"
|
|
32
|
+
# agent-browser --session "$SESSION" fill @e2 "password123"
|
|
33
|
+
# agent-browser --session "$SESSION" click @e3
|
|
34
|
+
# agent-browser --session "$SESSION" wait --load networkidle
|
|
35
|
+
# agent-browser --session "$SESSION" snapshot -i # Verify
|
|
36
|
+
|
|
37
|
+
echo ""
|
|
38
|
+
echo "Waiting ${TIMEOUT:-10}s before stopping recording..."
|
|
39
|
+
sleep "${TIMEOUT:-10}"
|
|
40
|
+
|
|
41
|
+
# Step 4: Stop recording and save
|
|
42
|
+
agent-browser recorder stop --session "$SESSION" --output "$OUTPUT"
|
|
43
|
+
echo ""
|
|
44
|
+
echo "Saved recording to: $OUTPUT"
|
|
45
|
+
echo ""
|
|
46
|
+
echo "To replay:"
|
|
47
|
+
echo " agent-browser recorder replay $OUTPUT"
|
|
48
|
+
|
|
49
|
+
# Cleanup
|
|
50
|
+
agent-browser --session "$SESSION" close
|
|
51
|
+
echo "Done"
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# Template: Viewer Remote Control Workflow
|
|
3
|
+
# Purpose: Open browser, start viewer, interact remotely via streaming UI
|
|
4
|
+
# Usage: ./viewer-remote.sh [url] [timeout-seconds]
|
|
5
|
+
#
|
|
6
|
+
# The viewer streams real-time browser frames to your local browser.
|
|
7
|
+
# On touch devices, you get touchpad + input panel for mobile control.
|
|
8
|
+
|
|
9
|
+
set -euo pipefail
|
|
10
|
+
|
|
11
|
+
URL="${1:?https://www.baidu.com}"
|
|
12
|
+
TIMEOUT="${2:-300}"
|
|
13
|
+
SESSION="viewer-$(date +%s)"
|
|
14
|
+
|
|
15
|
+
echo "=== Viewer Remote Control: $URL ==="
|
|
16
|
+
|
|
17
|
+
# Step 1: Launch browser (headed so you can see it locally too)
|
|
18
|
+
agent-browser --session "$SESSION" --headed open "$URL"
|
|
19
|
+
agent-browser --session "$SESSION" wait --load networkidle
|
|
20
|
+
|
|
21
|
+
# Step 2: Start viewer and get connection URL
|
|
22
|
+
VIEWER_INFO=$(agent-browser --session "$SESSION" viewer --json)
|
|
23
|
+
echo "Viewer info: $VIEWER_INFO"
|
|
24
|
+
|
|
25
|
+
# Extract URL (requires jq)
|
|
26
|
+
VIEWER_URL=$(echo "$VIEWER_INFO" | jq -r '.url // 2>/dev/null || echo "Check port 5005 manually")
|
|
27
|
+
echo ""
|
|
28
|
+
echo "Open this URL in your browser:"
|
|
29
|
+
echo " $VIEWER_URL"
|
|
30
|
+
echo ""
|
|
31
|
+
echo "Tips:"
|
|
32
|
+
echo " - Desktop: Click/drag/scroll on the streamed screen area"
|
|
33
|
+
echo " - Mobile: Use touchpad at bottom for cursor, tap inputs for text panel"
|
|
34
|
+
echo " - Tap a remote input field to open mobile text input"
|
|
35
|
+
echo ""
|
|
36
|
+
echo "Viewer will auto-close after ${TIMEOUT}s..."
|
|
37
|
+
sleep "$TIMEOUT"
|
|
38
|
+
|
|
39
|
+
# Cleanup
|
|
40
|
+
agent-browser --session "$SESSION" close
|
|
41
|
+
echo "Done"
|
|
Binary file
|
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
(() => {
|
|
2
|
-
const result = {
|
|
3
|
-
goodsContainerCount: 0,
|
|
4
|
-
sampleContainer: null,
|
|
5
|
-
allClasses: []
|
|
6
|
-
};
|
|
7
|
-
|
|
8
|
-
const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
|
|
9
|
-
result.goodsContainerCount = containers.length;
|
|
10
|
-
|
|
11
|
-
if (containers.length > 0) {
|
|
12
|
-
result.sampleContainer = {
|
|
13
|
-
className: containers[0].className,
|
|
14
|
-
innerHTML: containers[0].innerHTML.substring(0, 500),
|
|
15
|
-
text: containers[0].textContent?.substring(0, 200)
|
|
16
|
-
};
|
|
17
|
-
}
|
|
18
|
-
|
|
19
|
-
const allDivs = document.querySelectorAll('div');
|
|
20
|
-
const classSet = new Set();
|
|
21
|
-
allDivs.forEach(div => {
|
|
22
|
-
if (div.className) {
|
|
23
|
-
const classes = String(div.className).split(' ');
|
|
24
|
-
classes.forEach(c => {
|
|
25
|
-
if (c.includes('goods') || c.includes('item') || c.includes('product')) {
|
|
26
|
-
classSet.add(c);
|
|
27
|
-
}
|
|
28
|
-
});
|
|
29
|
-
}
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
result.allClasses = Array.from(classSet).slice(0, 20);
|
|
33
|
-
|
|
34
|
-
return result;
|
|
35
|
-
})()
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
(() => {
|
|
2
|
-
const result = {
|
|
3
|
-
visibleElements: [],
|
|
4
|
-
linksWithProductInClass: []
|
|
5
|
-
};
|
|
6
|
-
|
|
7
|
-
const allElements = document.querySelectorAll('*');
|
|
8
|
-
let count = 0;
|
|
9
|
-
|
|
10
|
-
for (const el of allElements) {
|
|
11
|
-
if (count >= 50) break;
|
|
12
|
-
|
|
13
|
-
const className = el.className ? String(el.className) : '';
|
|
14
|
-
const text = el.textContent?.trim() || '';
|
|
15
|
-
|
|
16
|
-
if ((className.includes('product') || className.includes('item') || className.includes('card')) && text.length > 10) {
|
|
17
|
-
result.visibleElements.push({
|
|
18
|
-
tagName: el.tagName,
|
|
19
|
-
className: className.substring(0, 100),
|
|
20
|
-
text: text.substring(0, 100)
|
|
21
|
-
});
|
|
22
|
-
count++;
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
const productLinks = document.querySelectorAll('a[href*="item.jd"], a[href*="product.jd"]');
|
|
27
|
-
productLinks.forEach((link, index) => {
|
|
28
|
-
if (index >= 10) return;
|
|
29
|
-
result.linksWithProductInClass.push({
|
|
30
|
-
href: link.href.substring(0, 100),
|
|
31
|
-
text: link.textContent?.substring(0, 50)
|
|
32
|
-
});
|
|
33
|
-
});
|
|
34
|
-
|
|
35
|
-
return result;
|
|
36
|
-
})()
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
(async () => {
|
|
2
|
-
const applauseRateEl = document.querySelector('.applause-rate');
|
|
3
|
-
|
|
4
|
-
if (applauseRateEl) {
|
|
5
|
-
applauseRateEl.click();
|
|
6
|
-
|
|
7
|
-
await new Promise(resolve => setTimeout(resolve, 3000));
|
|
8
|
-
|
|
9
|
-
const result = {
|
|
10
|
-
elementFound: true,
|
|
11
|
-
elementText: applauseRateEl.textContent,
|
|
12
|
-
popup: null
|
|
13
|
-
};
|
|
14
|
-
|
|
15
|
-
const popup = document.querySelector('[class*="popup"], [class*="modal"], [class*="dialog"]');
|
|
16
|
-
if (popup) {
|
|
17
|
-
result.popup = {
|
|
18
|
-
className: popup.className,
|
|
19
|
-
text: popup.textContent?.substring(0, 500)
|
|
20
|
-
};
|
|
21
|
-
}
|
|
22
|
-
|
|
23
|
-
return result;
|
|
24
|
-
} else {
|
|
25
|
-
return {
|
|
26
|
-
elementFound: false,
|
|
27
|
-
message: 'applause-rate element not found'
|
|
28
|
-
};
|
|
29
|
-
}
|
|
30
|
-
})()
|