@dyyz1993/agent-browser 0.11.0 → 0.11.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/bin/agent-browser-linux-x64 +0 -0
  2. package/dist/cli/help.js +1 -1
  3. package/dist/openapi.js +1 -1
  4. package/dist/stream-server-standalone.js +3 -3
  5. package/dist/viewer-script.js +54 -54
  6. package/package.json +1 -1
  7. package/skills/agent-browser/SKILL.md +279 -229
  8. package/skills/agent-browser/references/mobile-viewer.md +188 -0
  9. package/skills/agent-browser/references/viewer-mode.md +148 -0
  10. package/skills/agent-browser/templates/api-interception.sh +3 -1
  11. package/skills/agent-browser/templates/data-extraction.sh +8 -4
  12. package/skills/agent-browser/templates/form-automation.sh +18 -23
  13. package/skills/agent-browser/templates/network-intercept-crawl.sh +1 -0
  14. package/skills/agent-browser/templates/recorder-workflow.sh +51 -0
  15. package/skills/agent-browser/templates/viewer-remote.sh +41 -0
  16. package/bin/agent-browser-darwin-arm64 +0 -0
  17. package/scripts/check_goods_container.js +0 -35
  18. package/scripts/check_page_content.js +0 -36
  19. package/scripts/click_applause_rate.js +0 -30
  20. package/scripts/e2e-test-recorder.ts +0 -584
  21. package/scripts/explore_jd_page.js +0 -31
  22. package/scripts/extract_all_jd_data.js +0 -80
  23. package/scripts/extract_jd_product_detail.js +0 -62
  24. package/scripts/extract_jd_products_correct_links.js +0 -78
  25. package/scripts/extract_jd_products_final.js +0 -80
  26. package/scripts/extract_jd_reviews.js +0 -48
  27. package/scripts/extract_jd_seafood_final.js +0 -78
  28. package/scripts/extract_multiple_products.js +0 -77
  29. package/scripts/extract_products_no_scroll.js +0 -68
  30. package/scripts/extract_products_simple.js +0 -68
  31. package/scripts/find_applause_rate.js +0 -26
  32. package/scripts/find_jd_links.js +0 -28
  33. package/scripts/find_main_content.js +0 -20
  34. package/scripts/find_product_cards.js +0 -38
  35. package/scripts/find_root_content.js +0 -26
  36. package/scripts/find_unique_products.js +0 -55
  37. package/scripts/get_jd_product_detail.js +0 -16
  38. package/scripts/get_jd_products.js +0 -23
  39. package/scripts/get_jd_seafood_products.js +0 -44
  40. package/scripts/get_product_details_from_images.js +0 -54
  41. package/scripts/scroll_and_get_products.js +0 -47
  42. package/scripts/scroll_deep_and_find.js +0 -45
  43. package/scripts/verify-baidu-enter.ts +0 -116
  44. package/scripts/verify-form.sh +0 -67
  45. package/scripts/verify-login.sh +0 -65
  46. package/scripts/verify-recording.sh +0 -80
  47. package/scripts/verify-upload.sh +0 -41
  48. package/skills/agent-browser/references/profiling.md +0 -120
@@ -0,0 +1,188 @@
1
+ # Mobile Remote Control (Viewer Mode)
2
+
3
+ ## Overview
4
+
5
+ When the agent-browser viewer is opened on a **touch device** (phone, tablet), it automatically enters **mobile mode** with a touch-optimized UI. This is distinct from iOS Simulator mode — it works on ANY phone/tablet browser via the web viewer, requiring no simulator installation.
6
+
7
+ ## Touchpad System
8
+
9
+ The touchpad occupies the bottom portion of the viewer screen and simulates mouse input on the remote browser:
10
+
11
+ | Gesture | Action | Visual Feedback |
12
+ | ------------------- | ------------------------------------ | --------------------------------------- |
13
+ | Single tap | Click at virtual cursor position | Cursor flashes red briefly |
14
+ | Single finger drag | Move virtual cursor on remote screen | Cursor follows finger |
15
+ | Long press (~800ms) | Enter drag mode (hold mouse down) | Cursor turns orange, shows "DRAG" badge |
16
+ | Two-finger drag | Scroll wheel (vertical/horizontal) | Shows "SCROLL" badge |
17
+ | Two-finger release | Momentum scroll (deceleration) | Smooth deceleration after release |
18
+
19
+ **Implementation details:**
20
+
21
+ - All touch listeners use `{ passive: false }` + `preventDefault()` to prevent browser gestures
22
+ - Movement uses acceleration curve for natural feel (`computeAcceleration()`)
23
+ - Scroll uses separate wheel acceleration (`computeWheelAccel()`)
24
+ - Cooldown period after two-finger scroll prevents accidental clicks
25
+ - Momentum scroll uses RAF loop with 0.92 decay factor
26
+
27
+ ## Virtual Keyboard Toolbar
28
+
29
+ Collapsible toolbar at the top of the touchpad area:
30
+
31
+ | Button | Key Sent | Code |
32
+ | ----------- | ----------- | ------------ |
33
+ | Tab | Tab | `Tab` |
34
+ | Up Arrow | Arrow Up | `ArrowUp` |
35
+ | Left Arrow | Arrow Left | `ArrowLeft` |
36
+ | Down Arrow | Arrow Down | `ArrowDown` |
37
+ | Right Arrow | Arrow Right | `ArrowRight` |
38
+ | Enter | Enter | `Enter` |
39
+ | Backspace | Backspace | `Backspace` |
40
+ | Escape | Escape | `Escape` |
41
+
42
+ - **Collapsed state** (default): Shows only expand button (+ icon)
43
+ - **Expanded state**: Shows all 8 keys in wrapped layout
44
+ - Tap any key to send immediately to remote browser (no need to switch to keyboard app)
45
+
46
+ ## Text Input (Input Panel)
47
+
48
+ This is the key innovation for mobile remote control — typing text into remote input fields from your phone.
49
+
50
+ ### Flow Diagram
51
+
52
+ ```
53
+ User taps remote <input> on viewer screen
54
+
55
+ Daemon detects focus event via injected listener
56
+
57
+ Daemon sends {type: "input_focused", value: "...", ...} to viewer
58
+
59
+ Viewer enters INPUT MODE:
60
+ - Hides virtual cursor
61
+ - Shows #input-panel at screen bottom
62
+ - Pre-fills local input field with current value
63
+ - Sets window._currentTargetSelector for fill targeting
64
+
65
+ User types in local input field (with IME if needed)
66
+
67
+ Text syncs to remote via {type: "input_fill", text: "...", selector: "..."}
68
+
69
+ User taps Send (arrow icon) or presses Enter:
70
+ - Sends final input_fill + Enter keydown/keyup
71
+ - Exits input mode
72
+
73
+ OR user taps Escape or clicks outside panel:
74
+ - Sends input_blur_element to remote
75
+ - Exits input mode, restores touchpad
76
+ ```
77
+
78
+ ### IME / CJK Composition Support
79
+
80
+ Critical for Chinese, Japanese, Korean input methods:
81
+
82
+ | Event | Handling | Prevents |
83
+ | ------------------------ | -------------------------------------------------------- | ------------------------------------------- |
84
+ | `compositionstart` | Sets `_fieldComposing = true` | Intermediate pinyin sent to remote |
85
+ | `compositionupdate` | (ignored while composing) | Garbage characters |
86
+ | `compositionend` | Sets `_fieldComposing = false`, double-RAF deferred sync | Partial commits sent early |
87
+ | RAF poll (30ms interval) | Skips sync while `_fieldComposing === true` | Race condition with IME candidate selection |
88
+
89
+ **Key insight:** Only fully committed characters (after user selects from IME candidate list) are synced to the remote browser. Intermediate pinyin/kana composition is completely filtered out.
90
+
91
+ ### Input Panel Layout
92
+
93
+ ```
94
+ ┌─────────────────────────────────────────┐
95
+ │ target: input[type="email"] │ <- label row
96
+ ├─────────────────────────────────────────┤
97
+ │ [________________________] [>] │ <- input + send button
98
+ └─────────────────────────────────────────┘
99
+ ```
100
+
101
+ - Label shows: input type + placeholder (if different from value)
102
+ - Input field: `border-radius: 18px`, `font-size:16px` (prevents iOS zoom)
103
+ - Send button: Blue circle with arrow SVG icon
104
+ - Dismissal: Tap outside panel or press Escape
105
+
106
+ ### Keyboard Awareness on Mobile
107
+
108
+ On mobile devices, the viewer intentionally suppresses keyboard-related events to prevent interference:
109
+
110
+ - `hiddenInput` (#hidden-input) is **NOT created** on touch devices (unlike desktop mode)
111
+ - Document-level `keydown`/`keyup` listeners check `event.target` — ignores events from `#input-field`
112
+ - This allows the native mobile keyboard to work normally for text input without conflicting with remote keyboard forwarding
113
+
114
+ ## DeviceMode Dynamic Switching
115
+
116
+ The viewer does NOT detect device type once at startup. It uses a reactive architecture that can switch at runtime:
117
+
118
+ ### Detection Function
119
+
120
+ ```javascript
121
+ function detectDeviceMode() {
122
+ var uaMatch = /iphone|ipod|android(?=.*mobile)|mobile|tablet|ipad/i.test(ua);
123
+ var hasTouch = 'ontouchstart' in window || navigator.maxTouchPoints > 0;
124
+ return uaMatch || hasTouch ? 'mobile' : 'desktop';
125
+ }
126
+ ```
127
+
128
+ ### Singleton Architecture
129
+
130
+ ```javascript
131
+ const DeviceMode = {
132
+ _current: detectDeviceMode(), // Initial detection
133
+ _listeners: [], // Change callbacks
134
+
135
+ get current() {
136
+ return this._current;
137
+ },
138
+
139
+ onModeChange(fn) {
140
+ this._listeners.push(fn);
141
+ },
142
+
143
+ switchTo(mode) {
144
+ if (mode === this._current) return; // No-op for same mode
145
+ var prev = this._current;
146
+ this._current = mode;
147
+ if (mode === 'desktop') {
148
+ MobileModule.detach(); // Hide touchpad, show cursor
149
+ DesktopModule.attach(); // Create hiddenInput, focus it
150
+ } else {
151
+ DesktopModule.detach(); // Remove hiddenInput
152
+ MobileModule.attach(); // Show touchpad, init cursor
153
+ }
154
+ this._listeners.forEach((fn) => fn(mode, prev));
155
+ },
156
+ };
157
+ ```
158
+
159
+ ### Module Lifecycle
160
+
161
+ **DesktopModule** (PC mode):
162
+
163
+ - `attach()`: Creates invisible `#hidden-input`, focuses it (captures keyboard for remote forwarding)
164
+ - `detach()`: Blurs and removes hiddenInput
165
+
166
+ **MobileModule** (touch mode):
167
+
168
+ - `attach()`: Shows touchpad (display:flex), initializes virtual cursor, sets up toolbar
169
+ - `detach()`: Hides input-panel, shows cursor again
170
+
171
+ ### Auto-Switching Triggers
172
+
173
+ | Trigger | Handler | Use Case |
174
+ | --------------------------------------- | ------------------------- | ------------------------------------------------- |
175
+ | `resize` event | Debounced 100ms re-detect | Phone rotation, window resize |
176
+ | `orientationchange` | Delayed 200ms re-detect | Portrait<->Landscape |
177
+ | `matchMedia("(pointer:coarse)")` change | Immediate switch | Stylus connect/disconnect, tablet keyboard attach |
178
+
179
+ ## Mobile-Specific CSS Considerations
180
+
181
+ | Issue | Solution |
182
+ | -------------------------------------- | --------------------------------------------------------- |
183
+ | iOS keyboard pushes content up | `min/max-height: 100dvh` on html/body, `position: fixed` |
184
+ | VisualViewport API for keyboard height | Listener resizes input panel above keyboard |
185
+ | iOS auto-scroll during input | `setInterval` scroll guard (100ms) fights browser scroll |
186
+ | Browser gesture conflicts | `touch-action: none` on body during input mode |
187
+ | Safe area (notch phones) | `padding-bottom: env(safe-area-inset-bottom)` on touchpad |
188
+ | Small tap targets | Minimum 44px height on buttons (iOS guideline) |
@@ -0,0 +1,148 @@
1
+ # Viewer / Streaming Mode
2
+
3
+ ## Overview
4
+
5
+ The viewer mode provides a **real-time visual remote browser interface**. It streams browser frames (JPEG/WebP) over WebSocket and forwards user input (mouse, keyboard, touch) back to the daemon. This enables:
6
+
7
+ - **Remote debugging** — see what the browser sees in real time
8
+ - **Mobile device control** — operate a desktop browser from your phone
9
+ - **Presentation/demo** — show browser activity to an audience
10
+ - **Collaboration** — share a browser session with others
11
+
12
+ ## Starting the Viewer
13
+
14
+ ```bash
15
+ # Prerequisite: have a browser session running
16
+ agent-browser open https://example.com
17
+
18
+ # Start viewer (opens URL in default browser)
19
+ agent-browser viewer
20
+
21
+ # Get connection details as JSON (for scripting/embedding)
22
+ agent-browser viewer --json
23
+ # Output: {"url":"http://localhost:5005/view?session=default","ws":"ws://...","port":5005}
24
+ ```
25
+
26
+ ## Viewer URL Parameters
27
+
28
+ | Parameter | Description |
29
+ | ------------------ | -------------------------------------- |
30
+ | `?session=<id>` | Connect to a specific named session |
31
+ | `?instanceId=<id>` | Connect to a specific browser instance |
32
+
33
+ ## Architecture
34
+
35
+ ```
36
+ ┌─────────────┐ IPC ┌───────────────────┐ WebSocket ┌──────────┐
37
+ │ Browser │ ───────→ │ Daemon Process │ ←────────────→ │ Viewer │
38
+ │ (Playwright) │ │ (:5000 socket) │ │ (Browser) │
39
+ └─────────────┘ └────────┬─────────┘ └──────────┘
40
+
41
+ standalone HTTP+WS server (:5005)
42
+ serves viewer.html + proxies messages
43
+ ```
44
+
45
+ **Data flow:**
46
+
47
+ 1. **Frames**: Browser -> Daemon -> Standalone Server -> Viewer (binary JPEG/WebP via WS)
48
+ 2. **Input**: Viewer -> Standalone Server -> Daemon -> Browser (JSON messages)
49
+
50
+ ## Viewer Page Features
51
+
52
+ ### Desktop Mode (PC/Mac)
53
+
54
+ | Feature | Description |
55
+ | ------------ | ------------------------------------------------------------------- |
56
+ | Screen area | Shows streamed frame, click/drag/scroll sends input to remote |
57
+ | Toolbar | URL bar, connection status, quality badge, record button |
58
+ | Hidden input | Invisible capture field for keyboard events (auto-focused on click) |
59
+ | Cursor | Red dot showing remote mouse position |
60
+
61
+ ### Mobile Mode (Touch Device)
62
+
63
+ Automatically activates on touch devices. See [mobile-viewer.md](mobile-viewer.md) for full details.
64
+
65
+ | Feature | Description |
66
+ | ---------------- | --------------------------------------------------- |
67
+ | Touchpad | Bottom gesture area for cursor simulation |
68
+ | Input Panel | Text input popup when tapping remote input fields |
69
+ | Keyboard toolbar | Virtual keys: Tab, Arrows, Enter, Backspace, Escape |
70
+ | IME support | Chinese/Japanese composition (pinyin, kana, etc.) |
71
+
72
+ ## Element Selector / Crop Mode
73
+
74
+ Crop the video stream to a specific DOM element's bounds:
75
+
76
+ ```bash
77
+ # Via viewer UI: click element selector button, then click target element
78
+ # The stream is cropped to that element's rectangle
79
+ ```
80
+
81
+ When element mode is active:
82
+
83
+ - Server crops frames to element bounds using Sharp
84
+ - Mouse coordinates auto-map to element-local space
85
+ - Falls back to "degraded mode" (full page) if element not found or disappears
86
+ - `deviceWidth`/`deviceHeight` in metadata reflect element dimensions
87
+
88
+ Use cases:
89
+
90
+ - Focus testing on a specific component
91
+ - Recording interactions within a widget
92
+ - Bandwidth savings (only stream the element, not full page)
93
+
94
+ ## Message Types (Viewer <-> Server)
95
+
96
+ ### Server → Viewer (over WebSocket)
97
+
98
+ | Type | Purpose |
99
+ | --------------- | ------------------------------------------------------------------ |
100
+ | `frame` | Binary frame data with metadata (dimensions, format, element info) |
101
+ | `status` | Connection status, viewport changes |
102
+ | `navigation` | URL/title changes |
103
+ | `input_focused` | Remote element received focus → triggers input panel (mobile) |
104
+ | `input_value` | Remote input value changed |
105
+ | `input_blur` | Remote element lost focus |
106
+
107
+ ### Viewer → Server (over WebSocket)
108
+
109
+ | Type | Purpose |
110
+ | ---------------------- | --------------------------------------------------- |
111
+ | `input_mouse` | Mouse move/press/release/wheel |
112
+ | `input_keyboard` | Key down/up with modifiers |
113
+ | `input_fill` | Full text value sync (mobile input panel) |
114
+ | `input_blur_element` | Blur remote element (mobile input commit) |
115
+ | `keyboard_insert_text` | Character-by-character insert (desktop hiddenInput) |
116
+ | `user_activity` | Keep-alive signal (resumes streaming if paused) |
117
+ | `selector_element` | Request crop to specific element |
118
+
119
+ ## Troubleshooting
120
+
121
+ ### Black screen
122
+
123
+ - Check daemon is running: `agent-browser status`
124
+ - Verify browser launched: `agent-browser open https://example.com` should work first
125
+
126
+ ### Connection refused
127
+
128
+ - The viewer command auto-starts the standalone server on port 5005
129
+ - If port conflicts, check: `lsof -i :5005`
130
+ - Kill stale process: `kill $(lsof -t -i :5005)`
131
+
132
+ ### Laggy updates
133
+
134
+ - Frame compression is JPEG by default (adjustable)
135
+ - Quality badge shows current state: "interacting" / "static" / "compressed"
136
+ - Network latency between viewer and server affects frame rate
137
+
138
+ ### Element not found (degraded mode)
139
+
140
+ - Yellow toast appears: "Element not found, showing full page"
141
+ - Element may have been removed by SPA navigation or animation
142
+ - Re-select the element or exit selector mode
143
+
144
+ ### Viewer shows but no frame
145
+
146
+ - Check daemon log: `~/.agent-browser/default.log`
147
+ - Look for "Browser not launched" errors
148
+ - Ensure `agent-browser open` was called before `agent-browser viewer`
@@ -1,5 +1,6 @@
1
1
  #!/bin/bash
2
2
  # API Interception Template - Passively capture API responses
3
+ set -euo pipefail
3
4
  # Usage: ./api-interception.sh [target_url] [output_file]
4
5
 
5
6
  TARGET_URL="${1:-https://example.com/user/profile}"
@@ -12,7 +13,8 @@ sleep 1
12
13
 
13
14
  echo ""
14
15
  echo "=== 2. Open blank page ==="
15
- export https_proxy=http://127.0.0.1:7890
16
+ # Optional: set PROXY_URL if using a proxy
17
+ export https_proxy=${PROXY_URL:-}
16
18
  agent-browser open "about:blank"
17
19
  sleep 1
18
20
 
@@ -34,7 +34,8 @@ case "$MODE" in
34
34
  api)
35
35
  echo ""
36
36
  echo "=== 2. API Interception Mode ==="
37
- export https_proxy=http://127.0.0.1:7890
37
+ # Optional: set PROXY_URL if using a proxy
38
+ export https_proxy=${PROXY_URL:-}
38
39
  agent-browser open "about:blank"
39
40
  sleep 1
40
41
 
@@ -64,7 +65,8 @@ case "$MODE" in
64
65
  scroll)
65
66
  echo ""
66
67
  echo "=== 2. Infinite Scroll Mode ==="
67
- export https_proxy=http://127.0.0.1:7890
68
+ # Optional: set PROXY_URL if using a proxy
69
+ export https_proxy=${PROXY_URL:-}
68
70
  agent-browser open "$TARGET_URL"
69
71
  sleep 2
70
72
 
@@ -125,7 +127,8 @@ print(json.dumps(unique, ensure_ascii=False))
125
127
  js)
126
128
  echo ""
127
129
  echo "=== 2. JS Variable Extraction Mode ==="
128
- export https_proxy=http://127.0.0.1:7890
130
+ # Optional: set PROXY_URL if using a proxy
131
+ export https_proxy=${PROXY_URL:-}
129
132
  agent-browser open "$TARGET_URL"
130
133
  sleep 3
131
134
 
@@ -153,7 +156,8 @@ print(json.dumps(unique, ensure_ascii=False))
153
156
  dom|*)
154
157
  echo ""
155
158
  echo "=== 2. DOM Extraction Mode ==="
156
- export https_proxy=http://127.0.0.1:7890
159
+ # Optional: set PROXY_URL if using a proxy
160
+ export https_proxy=${PROXY_URL:-}
157
161
  agent-browser open "$TARGET_URL"
158
162
  sleep 2
159
163
 
@@ -1,32 +1,27 @@
1
- #!/bin/bash
1
+ #!/usr/bin/env bash
2
2
  # Template: Form Automation Workflow
3
3
  # Purpose: Fill and submit web forms with validation
4
4
  # Usage: ./form-automation.sh <form-url>
5
5
  #
6
- # This template demonstrates the snapshot-interact-verify pattern:
7
- # 1. Navigate to form
8
- # 2. Snapshot to get element refs
9
- # 3. Fill fields using refs
10
- # 4. Submit and verify result
11
- #
12
- # Customize: Update the refs (@e1, @e2, etc.) based on your form's snapshot output
6
+ # Demonstrates: snapshot -> interact -> verify pattern
13
7
 
14
8
  set -euo pipefail
15
9
 
16
10
  FORM_URL="${1:?Usage: $0 <form-url>}"
11
+ SESSION="form-$(date +%s)"
17
12
 
18
- echo "Form automation: $FORM_URL"
13
+ echo "=== Form Automation: $FORM_URL ==="
19
14
 
20
15
  # Step 1: Navigate to form
21
- agent-browser open "$FORM_URL"
22
- agent-browser wait --load networkidle
16
+ agent-browser --session "$SESSION" open "$FORM_URL"
17
+ agent-browser --session "$SESSION" wait --load networkidle
23
18
 
24
19
  # Step 2: Snapshot to discover form elements
25
20
  echo ""
26
21
  echo "Form structure:"
27
- agent-browser snapshot -i
22
+ agent-browser --session "$SESSION" snapshot -i
28
23
 
29
- # Step 3: Fill form fields (customize these refs based on snapshot output)
24
+ # Step 3: Fill form fields (customize refs based on snapshot output above)
30
25
  #
31
26
  # Common field types:
32
27
  # agent-browser fill @e1 "John Doe" # Text input
@@ -34,27 +29,27 @@ agent-browser snapshot -i
34
29
  # agent-browser fill @e3 "SecureP@ss123" # Password input
35
30
  # agent-browser select @e4 "Option Value" # Dropdown
36
31
  # agent-browser check @e5 # Checkbox
37
- # agent-browser click @e6 # Radio button
32
+ # agent-browser click @e6 # Radio button / Submit button
38
33
  # agent-browser fill @e7 "Multi-line text" # Textarea
39
34
  # agent-browser upload @e8 /path/to/file.pdf # File upload
40
35
  #
41
36
  # Uncomment and modify:
42
- # agent-browser fill @e1 "Test User"
43
- # agent-browser fill @e2 "test@example.com"
44
- # agent-browser click @e3 # Submit button
37
+ # agent-browser --session "$SESSION" fill @e1 "Test User"
38
+ # agent-browser --session "$SESSION" fill @e2 "test@example.com"
39
+ # agent-browser --session "$SESSION" click @e3 # Submit button
45
40
 
46
- # Step 4: Wait for submission
47
- # agent-browser wait --load networkidle
48
- # agent-browser wait --url "**/success" # Or wait for redirect
41
+ # Step 4: Wait for submission to complete
42
+ agent-browser --session "$SESSION" wait --load networkidle
43
+ # agent-browser --session "$SESSION" wait --url "**/success" # Or wait for redirect
49
44
 
50
45
  # Step 5: Verify result
51
46
  echo ""
52
47
  echo "Result:"
53
- agent-browser get url
54
- agent-browser snapshot -i
48
+ agent-browser --session "$SESSION" get url
49
+ agent-browser --session "$SESSION" snapshot -i
55
50
 
56
51
  # Optional: Capture evidence
57
- agent-browser screenshot /tmp/form-result.png
52
+ agent-browser --session "$SESSION" screenshot /tmp/form-result.png
58
53
  echo "Screenshot saved: /tmp/form-result.png"
59
54
 
60
55
  # Cleanup
@@ -1,5 +1,6 @@
1
1
  #!/bin/bash
2
2
  # Network Interception Data Collection Template
3
+ set -euo pipefail
3
4
  # Usage: ./network-intercept-crawl.sh <url> <output-dir> [max-scrolls]
4
5
  #
5
6
  # This script demonstrates how to capture API data using network interception.
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env bash
2
+ # Template: Recorder Workflow
3
+ # Purpose: Record browser actions, save as YAML, replay later
4
+ # Usage: ./recorder-workflow.sh [url] [output.yaml]
5
+ #
6
+ # Records your interactions into a replayable YAML workflow file.
7
+ # Useful for test automation, demo creation, and regression testing.
8
+
9
+ set -euo pipefail
10
+
11
+ URL="${1:?https://example.com/form}"
12
+ OUTPUT="${2:-recording-$(date +%Y%m%d-%H%M%S).yaml}"
13
+ SESSION="record-$(date +%s)"
14
+
15
+ echo "=== Recorder Workflow: $URL ==="
16
+
17
+ # Step 1: Start recording
18
+ agent-browser recorder start --session "$SESSION"
19
+ echo "Recording started on session: $SESSION"
20
+
21
+ # Step 2: Navigate and perform workflow
22
+ agent-browser --session "$SESSION" open "$URL"
23
+ agent-browser --session "$SESSION" wait --load networkidle
24
+
25
+ echo ""
26
+ echo "Form structure (copy refs from below):"
27
+ agent-browser --session "$SESSION" snapshot -i
28
+
29
+ # Step 3: Perform your actions here (uncomment/customize):
30
+ #
31
+ # agent-browser --session "$SESSION" fill @e1 "user@example.com"
32
+ # agent-browser --session "$SESSION" fill @e2 "password123"
33
+ # agent-browser --session "$SESSION" click @e3
34
+ # agent-browser --session "$SESSION" wait --load networkidle
35
+ # agent-browser --session "$SESSION" snapshot -i # Verify
36
+
37
+ echo ""
38
+ echo "Waiting ${TIMEOUT:-10}s before stopping recording..."
39
+ sleep "${TIMEOUT:-10}"
40
+
41
+ # Step 4: Stop recording and save
42
+ agent-browser recorder stop --session "$SESSION" --output "$OUTPUT"
43
+ echo ""
44
+ echo "Saved recording to: $OUTPUT"
45
+ echo ""
46
+ echo "To replay:"
47
+ echo " agent-browser recorder replay $OUTPUT"
48
+
49
+ # Cleanup
50
+ agent-browser --session "$SESSION" close
51
+ echo "Done"
@@ -0,0 +1,41 @@
1
+ #!/usr/bin/env bash
2
+ # Template: Viewer Remote Control Workflow
3
+ # Purpose: Open browser, start viewer, interact remotely via streaming UI
4
+ # Usage: ./viewer-remote.sh [url] [timeout-seconds]
5
+ #
6
+ # The viewer streams real-time browser frames to your local browser.
7
+ # On touch devices, you get touchpad + input panel for mobile control.
8
+
9
+ set -euo pipefail
10
+
11
+ URL="${1:?https://www.baidu.com}"
12
+ TIMEOUT="${2:-300}"
13
+ SESSION="viewer-$(date +%s)"
14
+
15
+ echo "=== Viewer Remote Control: $URL ==="
16
+
17
+ # Step 1: Launch browser (headed so you can see it locally too)
18
+ agent-browser --session "$SESSION" --headed open "$URL"
19
+ agent-browser --session "$SESSION" wait --load networkidle
20
+
21
+ # Step 2: Start viewer and get connection URL
22
+ VIEWER_INFO=$(agent-browser --session "$SESSION" viewer --json)
23
+ echo "Viewer info: $VIEWER_INFO"
24
+
25
+ # Extract URL (requires jq)
26
+ VIEWER_URL=$(echo "$VIEWER_INFO" | jq -r '.url // 2>/dev/null || echo "Check port 5005 manually")
27
+ echo ""
28
+ echo "Open this URL in your browser:"
29
+ echo " $VIEWER_URL"
30
+ echo ""
31
+ echo "Tips:"
32
+ echo " - Desktop: Click/drag/scroll on the streamed screen area"
33
+ echo " - Mobile: Use touchpad at bottom for cursor, tap inputs for text panel"
34
+ echo " - Tap a remote input field to open mobile text input"
35
+ echo ""
36
+ echo "Viewer will auto-close after ${TIMEOUT}s..."
37
+ sleep "$TIMEOUT"
38
+
39
+ # Cleanup
40
+ agent-browser --session "$SESSION" close
41
+ echo "Done"
Binary file
@@ -1,35 +0,0 @@
1
- (() => {
2
- const result = {
3
- goodsContainerCount: 0,
4
- sampleContainer: null,
5
- allClasses: []
6
- };
7
-
8
- const containers = document.querySelectorAll('._goodsContainer_1p2ae_1');
9
- result.goodsContainerCount = containers.length;
10
-
11
- if (containers.length > 0) {
12
- result.sampleContainer = {
13
- className: containers[0].className,
14
- innerHTML: containers[0].innerHTML.substring(0, 500),
15
- text: containers[0].textContent?.substring(0, 200)
16
- };
17
- }
18
-
19
- const allDivs = document.querySelectorAll('div');
20
- const classSet = new Set();
21
- allDivs.forEach(div => {
22
- if (div.className) {
23
- const classes = String(div.className).split(' ');
24
- classes.forEach(c => {
25
- if (c.includes('goods') || c.includes('item') || c.includes('product')) {
26
- classSet.add(c);
27
- }
28
- });
29
- }
30
- });
31
-
32
- result.allClasses = Array.from(classSet).slice(0, 20);
33
-
34
- return result;
35
- })()
@@ -1,36 +0,0 @@
1
- (() => {
2
- const result = {
3
- visibleElements: [],
4
- linksWithProductInClass: []
5
- };
6
-
7
- const allElements = document.querySelectorAll('*');
8
- let count = 0;
9
-
10
- for (const el of allElements) {
11
- if (count >= 50) break;
12
-
13
- const className = el.className ? String(el.className) : '';
14
- const text = el.textContent?.trim() || '';
15
-
16
- if ((className.includes('product') || className.includes('item') || className.includes('card')) && text.length > 10) {
17
- result.visibleElements.push({
18
- tagName: el.tagName,
19
- className: className.substring(0, 100),
20
- text: text.substring(0, 100)
21
- });
22
- count++;
23
- }
24
- }
25
-
26
- const productLinks = document.querySelectorAll('a[href*="item.jd"], a[href*="product.jd"]');
27
- productLinks.forEach((link, index) => {
28
- if (index >= 10) return;
29
- result.linksWithProductInClass.push({
30
- href: link.href.substring(0, 100),
31
- text: link.textContent?.substring(0, 50)
32
- });
33
- });
34
-
35
- return result;
36
- })()
@@ -1,30 +0,0 @@
1
- (async () => {
2
- const applauseRateEl = document.querySelector('.applause-rate');
3
-
4
- if (applauseRateEl) {
5
- applauseRateEl.click();
6
-
7
- await new Promise(resolve => setTimeout(resolve, 3000));
8
-
9
- const result = {
10
- elementFound: true,
11
- elementText: applauseRateEl.textContent,
12
- popup: null
13
- };
14
-
15
- const popup = document.querySelector('[class*="popup"], [class*="modal"], [class*="dialog"]');
16
- if (popup) {
17
- result.popup = {
18
- className: popup.className,
19
- text: popup.textContent?.substring(0, 500)
20
- };
21
- }
22
-
23
- return result;
24
- } else {
25
- return {
26
- elementFound: false,
27
- message: 'applause-rate element not found'
28
- };
29
- }
30
- })()