surfagent 1.0.9 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/API.md +208 -0
- package/CLAUDE.md +22 -0
- package/README.md +1 -0
- package/dist/api/act.d.ts +8 -0
- package/dist/api/act.js +172 -44
- package/dist/api/recon.d.ts +2 -0
- package/dist/api/recon.js +28 -3
- package/dist/api/server.js +19 -1
- package/package.json +1 -1
- package/src/api/act.ts +177 -45
- package/src/api/recon.ts +30 -3
- package/src/api/server.ts +20 -1
package/API.md
CHANGED
|
@@ -447,6 +447,27 @@ Check if the API can connect to Chrome.
|
|
|
447
447
|
|
|
448
448
|
---
|
|
449
449
|
|
|
450
|
+
### POST /type
|
|
451
|
+
|
|
452
|
+
Raw CDP key typing without clearing the field first. Use this for apps like **Google Sheets**, contenteditable elements, or any context where `/fill`'s Ctrl+A clear step causes side effects (e.g., selecting all cells instead of clearing a field).
|
|
453
|
+
|
|
454
|
+
**Request:**
|
|
455
|
+
```json
|
|
456
|
+
{ "tab": "0", "keys": "Hello World", "submit": "tab" }
|
|
457
|
+
```
|
|
458
|
+
|
|
459
|
+
- `keys` (string) — characters to type via CDP `Input.dispatchKeyEvent`
|
|
460
|
+
- `submit` (optional) — `"enter"` or `"tab"` to press after typing
|
|
461
|
+
|
|
462
|
+
**Response:**
|
|
463
|
+
```json
|
|
464
|
+
{ "typed": 11, "submitted": true }
|
|
465
|
+
```
|
|
466
|
+
|
|
467
|
+
**Why not `/fill`?** The `/fill` endpoint focuses an element, does Ctrl+A + Backspace to clear it, then types. In Google Sheets, Ctrl+A selects all cells (not text in the current cell), wiping the entire sheet. `/type` skips the focus and clear — it types into whatever currently has focus.
|
|
468
|
+
|
|
469
|
+
---
|
|
470
|
+
|
|
450
471
|
## Tab Targeting
|
|
451
472
|
|
|
452
473
|
All POST endpoints accept a `tab` field. It resolves in this order:
|
|
@@ -548,6 +569,193 @@ POST /read { "tab": "0", "selector": "#main-content" }
|
|
|
548
569
|
→ Click inside the iframe
|
|
549
570
|
```
|
|
550
571
|
|
|
572
|
+
### Google Sheets workflow
|
|
573
|
+
|
|
574
|
+
Google Sheets requires a special approach because `/fill` uses Ctrl+A to clear fields, which selects all cells in Sheets. Use the **name box + `/type`** pattern instead.
|
|
575
|
+
|
|
576
|
+
**Navigate to a cell:** Use `/fill` on the name box (`#t-name-box`), then Enter to jump to the cell.
|
|
577
|
+
|
|
578
|
+
**Type into a cell:** Use `/type` with `"submit": "tab"` (moves to next cell) or `"submit": "enter"` (moves down).
|
|
579
|
+
|
|
580
|
+
```
|
|
581
|
+
1. POST /click { "tab": "sheets", "selector": "#t-name-box" }
|
|
582
|
+
→ Focus the name box
|
|
583
|
+
2. POST /fill { "tab": "sheets", "fields": [{ "selector": "#t-name-box", "value": "A1", "clear": true }], "submit": "enter" }
|
|
584
|
+
→ Navigate to cell A1
|
|
585
|
+
3. POST /type { "tab": "sheets", "keys": "Hello World", "submit": "tab" }
|
|
586
|
+
→ Type into A1, Tab moves to B1
|
|
587
|
+
4. POST /type { "tab": "sheets", "keys": "=SUM(A1:A10)", "submit": "enter" }
|
|
588
|
+
→ Type a formula, Enter commits and moves down
|
|
589
|
+
```
|
|
590
|
+
|
|
591
|
+
**Adding a new sheet tab:** The "+" button at the bottom does not respond to DOM `.click()`. Use CDP `Input.dispatchMouseEvent` at the button's coordinates:
|
|
592
|
+
|
|
593
|
+
```bash
|
|
594
|
+
# Get the Add Sheet button position
|
|
595
|
+
curl -s -X POST localhost:3456/eval -d '{"tab":"0","expression":"var els = document.querySelectorAll(\"div[data-tooltip]\"); var r = \"\"; for(var i=0;i<els.length;i++){if(els[i].dataset.tooltip===\"Add Sheet\"){var b=els[i].getBoundingClientRect(); r=b.x+\",\"+b.y+\",\"+b.width+\",\"+b.height}} r"}'
|
|
596
|
+
# Returns: "44,854,34,34"
|
|
597
|
+
|
|
598
|
+
# Click it with a Node script using CDP mouse events
|
|
599
|
+
node -e "
|
|
600
|
+
const CDP = require('chrome-remote-interface');
|
|
601
|
+
(async () => {
|
|
602
|
+
const targets = await CDP.List({port: 9222});
|
|
603
|
+
const tab = targets.find(t => t.url.includes('docs.google.com'));
|
|
604
|
+
const client = await CDP({target: tab, port: 9222});
|
|
605
|
+
await client.Input.dispatchMouseEvent({type:'mousePressed', x:61, y:871, button:'left', clickCount:1});
|
|
606
|
+
await client.Input.dispatchMouseEvent({type:'mouseReleased', x:61, y:871, button:'left', clickCount:1});
|
|
607
|
+
await client.close();
|
|
608
|
+
})();
|
|
609
|
+
"
|
|
610
|
+
```
|
|
611
|
+
|
|
612
|
+
**Renaming a sheet tab:** Double-click the tab name via CDP mouse events at the tab's coordinates, then use `/type` to enter the new name and press Enter.
|
|
613
|
+
|
|
614
|
+
**Using the menu search:** Google Sheets has a menu search box (`input[aria-label="Menus"]` or `input[aria-label="Menus (Option+/)"]`). Use `/fill` to type a command (e.g., "Insert chart"), then `/click` on the matching result.
|
|
615
|
+
|
|
616
|
+
**Key gotchas:**
|
|
617
|
+
- Never use `/fill` directly on Google Sheets cells — it will wipe data via Ctrl+A
|
|
618
|
+
- Always navigate to a cell via the name box first, then `/type`
|
|
619
|
+
- Some buttons (Add Sheet, menu items) only respond to CDP mouse events, not DOM clicks
|
|
620
|
+
- Navigating away from unsaved Sheets triggers a native Chrome dialog — see the "Native Chrome Dialogs" section below
|
|
621
|
+
|
|
622
|
+
### CDP mouse clicks for unreachable elements
|
|
623
|
+
|
|
624
|
+
Some UI elements don't respond to JavaScript `.click()` or the `/click` endpoint — they only react to real mouse events at their coordinates. This is common for:
|
|
625
|
+
- Google Sheets buttons (Add Sheet, toolbar items)
|
|
626
|
+
- Canvas-rendered elements
|
|
627
|
+
- Custom widgets that listen for `mousedown`/`mouseup` events
|
|
628
|
+
|
|
629
|
+
**Pattern:**
|
|
630
|
+
```bash
|
|
631
|
+
# 1. Get the element's coordinates via /eval
|
|
632
|
+
curl -s -X POST localhost:3456/eval -d '{"tab":"0","expression":"document.querySelector(\"#my-button\").getBoundingClientRect().x"}'
|
|
633
|
+
# → {"result": 100}
|
|
634
|
+
|
|
635
|
+
# 2. Click via CDP Input.dispatchMouseEvent (requires a Node script)
|
|
636
|
+
node -e "
|
|
637
|
+
const CDP = require('chrome-remote-interface');
|
|
638
|
+
(async () => {
|
|
639
|
+
const targets = await CDP.List({port: 9222});
|
|
640
|
+
const tab = targets.find(t => t.url.includes('your-site'));
|
|
641
|
+
const client = await CDP({target: tab, port: 9222});
|
|
642
|
+
await client.Input.dispatchMouseEvent({type:'mousePressed', x:100, y:200, button:'left', clickCount:1});
|
|
643
|
+
await client.Input.dispatchMouseEvent({type:'mouseReleased', x:100, y:200, button:'left', clickCount:1});
|
|
644
|
+
await client.close();
|
|
645
|
+
})();
|
|
646
|
+
"
|
|
647
|
+
```
|
|
648
|
+
|
|
649
|
+
**Double-click** (e.g., to rename a Google Sheets tab): use `clickCount: 2`.
|
|
650
|
+
|
|
651
|
+
---
|
|
652
|
+
|
|
653
|
+
## Native Chrome Dialogs (Not in DOM or CDP)
|
|
654
|
+
|
|
655
|
+
Chrome can show browser-level popups — like "Leave page?" (`beforeunload`) dialogs — that are **not in the DOM**, **not accessible via CDP**, and will **block all CDP commands** (`/eval`, `/recon`, `/read` will all hang or timeout).
|
|
656
|
+
|
|
657
|
+
**Symptoms of a stuck session:**
|
|
658
|
+
- API calls hang or timeout on a tab that was previously working
|
|
659
|
+
- `Page.handleJavaScriptDialog` returns "No dialog is showing" (because it's not a JS dialog — it's a native Chrome window)
|
|
660
|
+
- The agent appears frozen on a page
|
|
661
|
+
|
|
662
|
+
**How to detect it (macOS only):**
|
|
663
|
+
|
|
664
|
+
Use CoreGraphics to list windows belonging to the surfagent Chrome process. Native dialogs appear as small unnamed windows (~260x218px) that are not visible to CDP.
|
|
665
|
+
|
|
666
|
+
```bash
|
|
667
|
+
# 1. Find the surfagent Chrome PID
|
|
668
|
+
SURFAGENT_PID=$(ps aux | grep 'chrome.*surfagent' | grep -v grep | awk '{print $2}')
|
|
669
|
+
|
|
670
|
+
# 2. List all windows for that PID using CoreGraphics
|
|
671
|
+
swift -e "
|
|
672
|
+
import CoreGraphics
|
|
673
|
+
let windows = CGWindowListCopyWindowInfo(.optionAll, kCGNullWindowID) as! [[String: Any]]
|
|
674
|
+
for w in windows {
|
|
675
|
+
let pid = w[\"kCGWindowOwnerPID\"] as? Int ?? 0
|
|
676
|
+
if pid == ${SURFAGENT_PID} {
|
|
677
|
+
let name = w[\"kCGWindowName\"] as? String ?? \"(unnamed)\"
|
|
678
|
+
let bounds = w[\"kCGWindowBounds\"] as? [String: Any] ?? [:]
|
|
679
|
+
let width = bounds[\"Width\"] as? Int ?? 0
|
|
680
|
+
let height = bounds[\"Height\"] as? Int ?? 0
|
|
681
|
+
if width > 100 && height > 100 {
|
|
682
|
+
print(\"Window: \(name) | Size: \(width)x\(height)\")
|
|
683
|
+
}
|
|
684
|
+
}
|
|
685
|
+
}
|
|
686
|
+
"
|
|
687
|
+
```
|
|
688
|
+
|
|
689
|
+
**What to look for:** A small unnamed window (typically ~260x218) alongside the main browser window. That's the native dialog.
|
|
690
|
+
|
|
691
|
+
**How to dismiss it:**
|
|
692
|
+
|
|
693
|
+
Native Chrome dialogs cannot be dismissed via CDP or AppleScript's `tell process "Google Chrome"` (which only sees the personal Chrome, not the surfagent debug instance). You must use the **Swift Accessibility API targeting the surfagent PID directly**.
|
|
694
|
+
|
|
695
|
+
```bash
|
|
696
|
+
# Find the surfagent Chrome PID
|
|
697
|
+
SURFAGENT_PID=$(ps aux | grep 'chrome.*surfagent' | grep -v grep | awk '{print $2}')
|
|
698
|
+
|
|
699
|
+
# Click "Cancel" (stay on page) — or change "Avbryt"/"Cancel" to "Leave"/"Gå ut" to leave
|
|
700
|
+
swift -e "
|
|
701
|
+
import Cocoa
|
|
702
|
+
|
|
703
|
+
let pid: pid_t = ${SURFAGENT_PID}
|
|
704
|
+
let app = AXUIElementCreateApplication(pid)
|
|
705
|
+
|
|
706
|
+
var windowsRef: CFTypeRef?
|
|
707
|
+
AXUIElementCopyAttributeValue(app, \"AXWindows\" as CFString, &windowsRef)
|
|
708
|
+
|
|
709
|
+
if let windows = windowsRef as? [AXUIElement] {
|
|
710
|
+
for win in windows {
|
|
711
|
+
var subroleRef: CFTypeRef?
|
|
712
|
+
AXUIElementCopyAttributeValue(win, \"AXSubrole\" as CFString, &subroleRef)
|
|
713
|
+
let subrole = subroleRef as? String ?? \"\"
|
|
714
|
+
|
|
715
|
+
// Native dialogs have subrole AXDialog
|
|
716
|
+
if subrole == \"AXDialog\" {
|
|
717
|
+
var childrenRef: CFTypeRef?
|
|
718
|
+
AXUIElementCopyAttributeValue(win, \"AXChildren\" as CFString, &childrenRef)
|
|
719
|
+
if let children = childrenRef as? [AXUIElement] {
|
|
720
|
+
for child in children {
|
|
721
|
+
var roleRef: CFTypeRef?
|
|
722
|
+
AXUIElementCopyAttributeValue(child, \"AXRole\" as CFString, &roleRef)
|
|
723
|
+
var titleRef: CFTypeRef?
|
|
724
|
+
AXUIElementCopyAttributeValue(child, \"AXTitle\" as CFString, &titleRef)
|
|
725
|
+
let role = roleRef as? String ?? \"\"
|
|
726
|
+
let title = titleRef as? String ?? \"\"
|
|
727
|
+
|
|
728
|
+
// Match button by title — handles multiple languages
|
|
729
|
+
// Cancel/Stay: \"Cancel\", \"Avbryt\" (Norwegian)
|
|
730
|
+
// Leave: \"Leave\", \"Gå ut\" (Norwegian)
|
|
731
|
+
let cancelNames = [\"Cancel\", \"Avbryt\"]
|
|
732
|
+
let leaveNames = [\"Leave\", \"Gå ut\"]
|
|
733
|
+
|
|
734
|
+
let targetNames = cancelNames // Change to leaveNames to leave
|
|
735
|
+
|
|
736
|
+
if role == \"AXButton\" && targetNames.contains(title) {
|
|
737
|
+
let result = AXUIElementPerformAction(child, \"AXPress\" as CFString)
|
|
738
|
+
print(\"Clicked \(title): \(result == .success ? \"SUCCESS\" : \"FAILED\")\")
|
|
739
|
+
}
|
|
740
|
+
}
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
}
|
|
744
|
+
}
|
|
745
|
+
"
|
|
746
|
+
```
|
|
747
|
+
|
|
748
|
+
**Why AppleScript doesn't work:** `tell process "Google Chrome"` sees all Chrome instances as one process, but only exposes the *personal* Chrome's windows. The surfagent Chrome (launched with `--user-data-dir=/tmp/surfagent-chrome`) is invisible to it. The Swift `AXUIElementCreateApplication(pid)` approach targets the exact process by PID, which is the only way to reach the surfagent Chrome's native dialogs.
|
|
749
|
+
|
|
750
|
+
**When to check:** If any API call hangs or times out unexpectedly on a tab that was previously responsive, check for a native dialog before retrying. Common triggers:
|
|
751
|
+
- Navigating away from pages with unsaved changes (Google Sheets, web editors, forms)
|
|
752
|
+
- `window.onbeforeunload` handlers
|
|
753
|
+
- Chrome permission prompts
|
|
754
|
+
|
|
755
|
+
**Decision logic for agents:**
|
|
756
|
+
- **Click "Leave"** if you intentionally navigated away and don't need the page anymore
|
|
757
|
+
- **Click "Cancel"** if the navigation was accidental and you want to keep working on the current page (e.g., Google Sheets with unsaved data)
|
|
758
|
+
|
|
551
759
|
---
|
|
552
760
|
|
|
553
761
|
## Important Notes
|
package/CLAUDE.md
CHANGED
|
@@ -76,6 +76,9 @@ curl -X POST localhost:3456/eval -H 'Content-Type: application/json' -d '{"tab":
|
|
|
76
76
|
# Bring tab to front
|
|
77
77
|
curl -X POST localhost:3456/focus -H 'Content-Type: application/json' -d '{"tab":"0"}'
|
|
78
78
|
|
|
79
|
+
# Raw key typing — no clear step, for Google Sheets / contenteditable / canvas
|
|
80
|
+
curl -X POST localhost:3456/type -H 'Content-Type: application/json' -d '{"tab":"0","keys":"Hello","submit":"tab"}'
|
|
81
|
+
|
|
79
82
|
# Captcha detection and interaction (experimental)
|
|
80
83
|
curl -X POST localhost:3456/captcha -H 'Content-Type: application/json' -d '{"tab":"0","action":"detect"}'
|
|
81
84
|
|
|
@@ -86,6 +89,25 @@ curl localhost:3456/tabs
|
|
|
86
89
|
curl localhost:3456/health
|
|
87
90
|
```
|
|
88
91
|
|
|
92
|
+
### Google Sheets
|
|
93
|
+
|
|
94
|
+
Google Sheets requires `/type` instead of `/fill` for cell input (because `/fill` does Ctrl+A which selects all cells). Use the name box to navigate, then `/type` to enter data:
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
# 1. Click the name box
|
|
98
|
+
curl -X POST localhost:3456/click -H 'Content-Type: application/json' -d '{"tab":"sheets","selector":"#t-name-box"}'
|
|
99
|
+
|
|
100
|
+
# 2. Navigate to a cell
|
|
101
|
+
curl -X POST localhost:3456/fill -H 'Content-Type: application/json' -d '{"tab":"sheets","fields":[{"selector":"#t-name-box","value":"A1","clear":true}],"submit":"enter"}'
|
|
102
|
+
|
|
103
|
+
# 3. Type into the cell (Tab moves right, Enter moves down)
|
|
104
|
+
curl -X POST localhost:3456/type -H 'Content-Type: application/json' -d '{"tab":"sheets","keys":"=SUM(B2:B10)","submit":"tab"}'
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
Some Sheets buttons (Add Sheet +, toolbar) only respond to CDP mouse events, not DOM clicks. See `API.md` for the CDP mouse click pattern.
|
|
108
|
+
|
|
109
|
+
**Warning:** Navigating away from unsaved Sheets triggers a native Chrome "Leave page?" dialog that blocks ALL CDP commands. See `API.md` > "Native Chrome Dialogs" for detection and dismissal via Swift AX API.
|
|
110
|
+
|
|
89
111
|
### Tab Targeting
|
|
90
112
|
|
|
91
113
|
All endpoints accept a `tab` field:
|
package/README.md
CHANGED
|
@@ -74,6 +74,7 @@ curl -X POST localhost:3456/read -H 'Content-Type: application/json' \
|
|
|
74
74
|
| `/navigate` | POST | Go to URL, back, or forward in the same tab |
|
|
75
75
|
| `/eval` | POST | Run JavaScript in any tab or cross-origin iframe |
|
|
76
76
|
| `/captcha` | POST | Detect and interact with captchas — Arkose, reCAPTCHA, hCaptcha (experimental) |
|
|
77
|
+
| `/type` | POST | Raw CDP key typing without clearing — for Google Sheets, contenteditable, canvas apps |
|
|
77
78
|
| `/focus` | POST | Bring a tab to the front in Chrome |
|
|
78
79
|
| `/tabs` | GET | List all open Chrome tabs |
|
|
79
80
|
| `/health` | GET | Check if Chrome and API are connected |
|
package/dist/api/act.d.ts
CHANGED
|
@@ -91,3 +91,11 @@ export declare function focusTab(tabPattern: string, options: {
|
|
|
91
91
|
title: string;
|
|
92
92
|
url: string;
|
|
93
93
|
}>;
|
|
94
|
+
export declare function typeKeys(tabPattern: string, keys: string, options: {
|
|
95
|
+
port?: number;
|
|
96
|
+
host?: string;
|
|
97
|
+
submit?: string;
|
|
98
|
+
}): Promise<{
|
|
99
|
+
typed: number;
|
|
100
|
+
submitted?: boolean;
|
|
101
|
+
}>;
|
package/dist/api/act.js
CHANGED
|
@@ -37,43 +37,80 @@ export async function fillFields(request, options) {
|
|
|
37
37
|
const results = [];
|
|
38
38
|
for (const field of request.fields) {
|
|
39
39
|
try {
|
|
40
|
-
//
|
|
41
|
-
await client.Runtime.evaluate({
|
|
42
|
-
expression: `
|
|
43
|
-
(
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
el.
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
}
|
|
52
|
-
`,
|
|
40
|
+
// Detect element type to choose fill strategy
|
|
41
|
+
const elInfo = await client.Runtime.evaluate({
|
|
42
|
+
expression: `(function() {
|
|
43
|
+
const el = document.querySelector(${JSON.stringify(field.selector)});
|
|
44
|
+
if (!el) return { found: false };
|
|
45
|
+
return {
|
|
46
|
+
found: true,
|
|
47
|
+
tag: el.tagName,
|
|
48
|
+
type: el.type || null,
|
|
49
|
+
contentEditable: el.isContentEditable || false,
|
|
50
|
+
maxLength: el.maxLength >= 0 ? el.maxLength : null
|
|
51
|
+
};
|
|
52
|
+
})()`,
|
|
53
53
|
returnByValue: true
|
|
54
54
|
});
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
55
|
+
const info = elInfo.result.value;
|
|
56
|
+
if (!info || !info.found) {
|
|
57
|
+
results.push({ selector: field.selector, success: false, error: `Element not found: ${field.selector}` });
|
|
58
|
+
continue;
|
|
59
|
+
}
|
|
60
|
+
const isDateTimeRange = ['date', 'time', 'datetime-local', 'month', 'week', 'range', 'color'].includes(info.type);
|
|
61
|
+
const isContentEditable = info.contentEditable && info.tag !== 'INPUT' && info.tag !== 'TEXTAREA';
|
|
62
|
+
if (isDateTimeRange) {
|
|
63
|
+
// Date/time/range inputs: set value programmatically + dispatch events
|
|
64
|
+
await client.Runtime.evaluate({
|
|
65
|
+
expression: `(function() {
|
|
66
|
+
const el = document.querySelector(${JSON.stringify(field.selector)});
|
|
67
|
+
const nativeSetter = Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value').set;
|
|
68
|
+
nativeSetter.call(el, ${JSON.stringify(field.value)});
|
|
69
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
70
|
+
el.dispatchEvent(new Event('change', { bubbles: true }));
|
|
71
|
+
})()`,
|
|
72
|
+
returnByValue: true
|
|
66
73
|
});
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
// Focus and clear
|
|
77
|
+
await client.Runtime.evaluate({
|
|
78
|
+
expression: `
|
|
79
|
+
(function() {
|
|
80
|
+
const el = document.querySelector(${JSON.stringify(field.selector)});
|
|
81
|
+
el.focus();
|
|
82
|
+
el.click();
|
|
83
|
+
if (el.select) el.select();
|
|
84
|
+
else if (el.setSelectionRange) el.setSelectionRange(0, el.value?.length || 0);
|
|
85
|
+
})()
|
|
86
|
+
`,
|
|
87
|
+
returnByValue: true
|
|
70
88
|
});
|
|
89
|
+
// Clear existing value with select-all + delete
|
|
90
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'a', code: 'KeyA', modifiers: 2 });
|
|
91
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'a', code: 'KeyA', modifiers: 2 });
|
|
92
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'Backspace', code: 'Backspace' });
|
|
93
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'Backspace', code: 'Backspace' });
|
|
94
|
+
// Type each character via CDP Input.dispatchKeyEvent
|
|
95
|
+
for (const char of field.value) {
|
|
96
|
+
if (char === '\n') {
|
|
97
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'Enter', code: 'Enter', text: '\r', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
98
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
99
|
+
}
|
|
100
|
+
else if (char === '\t') {
|
|
101
|
+
await client.Runtime.evaluate({ expression: `document.execCommand('insertText', false, '\\t')` });
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: char, text: char });
|
|
105
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: char });
|
|
106
|
+
}
|
|
107
|
+
}
|
|
71
108
|
}
|
|
72
|
-
// Verify
|
|
73
|
-
const
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
});
|
|
109
|
+
// Verify: use value for inputs, textContent for contenteditable
|
|
110
|
+
const verifyExpr = isContentEditable
|
|
111
|
+
? `document.querySelector(${JSON.stringify(field.selector)})?.textContent?.trim()`
|
|
112
|
+
: `document.querySelector(${JSON.stringify(field.selector)})?.value`;
|
|
113
|
+
const verify = await client.Runtime.evaluate({ expression: verifyExpr, returnByValue: true });
|
|
77
114
|
const actual = verify.result.value;
|
|
78
115
|
if (actual === field.value) {
|
|
79
116
|
results.push({ selector: field.selector, success: true });
|
|
@@ -81,6 +118,13 @@ export async function fillFields(request, options) {
|
|
|
81
118
|
else if (actual === undefined || actual === null) {
|
|
82
119
|
results.push({ selector: field.selector, success: false, error: `Element not found or has no value: ${field.selector}` });
|
|
83
120
|
}
|
|
121
|
+
else if (info.maxLength && actual === field.value.substring(0, info.maxLength)) {
|
|
122
|
+
// Maxlength truncation — fill worked within constraint
|
|
123
|
+
results.push({ selector: field.selector, success: true, error: `Truncated to maxlength=${info.maxLength}` });
|
|
124
|
+
}
|
|
125
|
+
else if (isContentEditable && actual.includes(field.value)) {
|
|
126
|
+
results.push({ selector: field.selector, success: true });
|
|
127
|
+
}
|
|
84
128
|
else {
|
|
85
129
|
results.push({ selector: field.selector, success: false, error: `Value mismatch: expected "${field.value}", got "${actual}"` });
|
|
86
130
|
}
|
|
@@ -138,13 +182,25 @@ export async function clickElement(request, options) {
|
|
|
138
182
|
}
|
|
139
183
|
if (!el && text) {
|
|
140
184
|
const lower = text.toLowerCase();
|
|
141
|
-
const all = document.querySelectorAll('a, button, input[type="submit"], [role="button"], [role="option"], [role="menuitem"], [role="listitem"], [role="tab"], [role="link"], li[aria-label], [onclick]');
|
|
185
|
+
const all = document.querySelectorAll('a, button, input[type="submit"], [role="button"], [role="option"], [role="menuitem"], [role="listitem"], [role="tab"], [role="link"], li[aria-label], [onclick], label');
|
|
186
|
+
let bestMatch = null;
|
|
187
|
+
let bestScore = Infinity; // lower is better
|
|
142
188
|
for (const candidate of all) {
|
|
143
189
|
const t = (candidate.innerText || candidate.textContent || candidate.value || candidate.getAttribute('aria-label') || '').trim();
|
|
144
|
-
|
|
190
|
+
const tLower = t.toLowerCase();
|
|
191
|
+
if (!tLower.includes(lower)) continue;
|
|
192
|
+
// Score: 0 = exact, 1 = starts-with, 2+ = contains (shorter text = better)
|
|
193
|
+
let score;
|
|
194
|
+
if (tLower === lower) score = 0;
|
|
195
|
+
else if (tLower.startsWith(lower)) score = 1;
|
|
196
|
+
else score = 2 + t.length;
|
|
197
|
+
if (score < bestScore) { bestMatch = candidate; bestScore = score; }
|
|
198
|
+
if (score === 0) break; // exact match, stop
|
|
145
199
|
}
|
|
200
|
+
el = bestMatch;
|
|
146
201
|
}
|
|
147
202
|
if (!el) return { success: false, error: 'Element not found' };
|
|
203
|
+
if (el.disabled || el.getAttribute('aria-disabled') === 'true') return { success: false, error: 'Element is disabled' };
|
|
148
204
|
|
|
149
205
|
el.scrollIntoView({ block: 'center' });
|
|
150
206
|
|
|
@@ -192,17 +248,39 @@ export async function scrollPage(request, options) {
|
|
|
192
248
|
const scrollY = Math.round(window.scrollY);
|
|
193
249
|
const scrollHeight = document.documentElement.scrollHeight;
|
|
194
250
|
const viewportHeight = window.innerHeight;
|
|
195
|
-
const atBottom = (scrollY + viewportHeight) >= (scrollHeight -
|
|
251
|
+
const atBottom = (scrollY + viewportHeight) >= (scrollHeight - 2);
|
|
196
252
|
|
|
197
|
-
// Get visible text content
|
|
198
|
-
const centerY = scrollY + viewportHeight / 2;
|
|
199
|
-
const elements = document.elementsFromPoint(window.innerWidth / 2, viewportHeight / 2);
|
|
253
|
+
// Get visible text content from elements in the current viewport
|
|
200
254
|
let contentPreview = '';
|
|
201
|
-
|
|
255
|
+
const visibleTexts = [];
|
|
256
|
+
const mainEl = document.querySelector('main, article, [role="main"]') || document.body;
|
|
257
|
+
const allEls = mainEl.querySelectorAll('p, li, td, th, h1, h2, h3, h4, h5, h6, dd, dt, blockquote, pre');
|
|
258
|
+
for (const el of allEls) {
|
|
259
|
+
if (visibleTexts.length >= 30) break;
|
|
260
|
+
const rect = el.getBoundingClientRect();
|
|
261
|
+
// Element must be within the viewport
|
|
262
|
+
if (rect.bottom < 0 || rect.top > viewportHeight || rect.height === 0) continue;
|
|
263
|
+
// Skip fixed/sticky elements (nav, TOC, sidebars)
|
|
264
|
+
const style = window.getComputedStyle(el.closest('nav, aside, [role="navigation"]') || el);
|
|
265
|
+
if (style.position === 'fixed' || style.position === 'sticky') continue;
|
|
202
266
|
const text = el.innerText?.trim();
|
|
203
|
-
if (text
|
|
204
|
-
|
|
205
|
-
|
|
267
|
+
if (!text || text.length < 5) continue;
|
|
268
|
+
// Skip if text is too long (likely a parent container)
|
|
269
|
+
if (text.length > 500) continue;
|
|
270
|
+
// Skip duplicates
|
|
271
|
+
if (visibleTexts.some(t => t.includes(text) || text.includes(t))) continue;
|
|
272
|
+
visibleTexts.push(text);
|
|
273
|
+
}
|
|
274
|
+
contentPreview = visibleTexts.join('\\n').substring(0, 1500);
|
|
275
|
+
if (!contentPreview) {
|
|
276
|
+
// Fallback: grab from center point
|
|
277
|
+
const elements = document.elementsFromPoint(window.innerWidth / 2, viewportHeight / 2);
|
|
278
|
+
for (const el of elements) {
|
|
279
|
+
const text = el.innerText?.trim();
|
|
280
|
+
if (text && text.length > 50 && text.length < 3000) {
|
|
281
|
+
contentPreview = text.substring(0, 1500);
|
|
282
|
+
break;
|
|
283
|
+
}
|
|
206
284
|
}
|
|
207
285
|
}
|
|
208
286
|
|
|
@@ -237,8 +315,16 @@ export async function navigatePage(request, options) {
|
|
|
237
315
|
await new Promise(resolve => setTimeout(resolve, waitMs));
|
|
238
316
|
}
|
|
239
317
|
else if (request.url) {
|
|
318
|
+
// Block dangerous URL schemes
|
|
319
|
+
const scheme = request.url.trim().toLowerCase().split(':')[0];
|
|
320
|
+
if (['javascript', 'vbscript'].includes(scheme)) {
|
|
321
|
+
await client.close();
|
|
322
|
+
throw new Error('Blocked: javascript: URLs are not allowed');
|
|
323
|
+
}
|
|
240
324
|
await client.Page.navigate({ url: request.url });
|
|
241
|
-
|
|
325
|
+
// Race loadEventFired against a timeout to prevent hanging on non-loading URLs
|
|
326
|
+
const loadTimeout = new Promise(resolve => setTimeout(resolve, Math.min(waitMs + 10000, 30000)));
|
|
327
|
+
await Promise.race([client.Page.loadEventFired(), loadTimeout]);
|
|
242
328
|
await new Promise(resolve => setTimeout(resolve, waitMs));
|
|
243
329
|
}
|
|
244
330
|
const result = await client.Runtime.evaluate({
|
|
@@ -262,9 +348,18 @@ export async function evalInTab(tab, expression, options) {
|
|
|
262
348
|
const timeout = new Promise((_, reject) => setTimeout(() => reject(new Error('Eval timed out after 30s')), 30000));
|
|
263
349
|
const evalPromise = client.Runtime.evaluate({
|
|
264
350
|
expression,
|
|
265
|
-
returnByValue: true
|
|
351
|
+
returnByValue: true,
|
|
352
|
+
awaitPromise: true
|
|
266
353
|
});
|
|
267
354
|
const result = await Promise.race([evalPromise, timeout]);
|
|
355
|
+
// Check for exceptions (syntax errors, thrown errors, etc.)
|
|
356
|
+
if (result.exceptionDetails) {
|
|
357
|
+
const desc = result.exceptionDetails.exception?.description
|
|
358
|
+
|| result.exceptionDetails.text
|
|
359
|
+
|| 'Unknown error';
|
|
360
|
+
await client.close();
|
|
361
|
+
return { __error: desc };
|
|
362
|
+
}
|
|
268
363
|
await client.close();
|
|
269
364
|
return result.result.value ?? null;
|
|
270
365
|
}
|
|
@@ -579,3 +674,36 @@ export async function focusTab(tabPattern, options) {
|
|
|
579
674
|
throw error;
|
|
580
675
|
}
|
|
581
676
|
}
|
|
677
|
+
// Raw CDP key typing — no clear step, no element focus. Types directly into whatever has focus.
|
|
678
|
+
// Designed for apps like Google Sheets where Ctrl+A/Backspace clear causes side effects.
|
|
679
|
+
export async function typeKeys(tabPattern, keys, options) {
|
|
680
|
+
const port = options.port || 9222;
|
|
681
|
+
const host = options.host || 'localhost';
|
|
682
|
+
const tab = await resolveTab(tabPattern, port, host);
|
|
683
|
+
const client = await connectToTab(tab.id, port, host);
|
|
684
|
+
const cdp = client;
|
|
685
|
+
try {
|
|
686
|
+
// Type each character via CDP Input.dispatchKeyEvent
|
|
687
|
+
for (const char of keys) {
|
|
688
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: char, text: char });
|
|
689
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: char });
|
|
690
|
+
}
|
|
691
|
+
let submitted = false;
|
|
692
|
+
if (options.submit === 'enter') {
|
|
693
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
694
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
695
|
+
submitted = true;
|
|
696
|
+
}
|
|
697
|
+
else if (options.submit === 'tab') {
|
|
698
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'Tab', code: 'Tab', windowsVirtualKeyCode: 9, nativeVirtualKeyCode: 9 });
|
|
699
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'Tab', code: 'Tab', windowsVirtualKeyCode: 9, nativeVirtualKeyCode: 9 });
|
|
700
|
+
submitted = true;
|
|
701
|
+
}
|
|
702
|
+
await client.close();
|
|
703
|
+
return { typed: keys.length, submitted };
|
|
704
|
+
}
|
|
705
|
+
catch (error) {
|
|
706
|
+
await client.close();
|
|
707
|
+
throw error;
|
|
708
|
+
}
|
|
709
|
+
}
|
package/dist/api/recon.d.ts
CHANGED
package/dist/api/recon.js
CHANGED
|
@@ -30,7 +30,14 @@ const EXTRACTION_SCRIPT = `
|
|
|
30
30
|
const tag = el.tagName.toLowerCase();
|
|
31
31
|
if (el.getAttribute('aria-label')) return tag + '[aria-label="' + el.getAttribute('aria-label') + '"]';
|
|
32
32
|
if (el.getAttribute('data-testid')) return '[data-testid="' + el.getAttribute('data-testid') + '"]';
|
|
33
|
-
if (el.getAttribute('name'))
|
|
33
|
+
if (el.getAttribute('name')) {
|
|
34
|
+
const nameSelector = tag + '[name="' + el.getAttribute('name') + '"]';
|
|
35
|
+
// Disambiguate radio/checkbox with same name by adding value
|
|
36
|
+
if ((el.type === 'radio' || el.type === 'checkbox') && el.value) {
|
|
37
|
+
return nameSelector + '[value="' + el.value + '"]';
|
|
38
|
+
}
|
|
39
|
+
return nameSelector;
|
|
40
|
+
}
|
|
34
41
|
// Positional fallback
|
|
35
42
|
const parent = el.parentElement;
|
|
36
43
|
if (!parent) return tag;
|
|
@@ -106,7 +113,12 @@ const EXTRACTION_SCRIPT = `
|
|
|
106
113
|
selector: buildSelector(el),
|
|
107
114
|
role: el.getAttribute('role'),
|
|
108
115
|
x: Math.round(rect.x),
|
|
109
|
-
y: Math.round(rect.y)
|
|
116
|
+
y: Math.round(rect.y),
|
|
117
|
+
...(el.dataset && Object.keys(el.dataset).length > 0 ? {
|
|
118
|
+
data: Object.fromEntries(
|
|
119
|
+
['date','iso','value','testid','id'].filter(k => el.dataset[k]).map(k => [k, el.dataset[k]])
|
|
120
|
+
)
|
|
121
|
+
} : {})
|
|
110
122
|
});
|
|
111
123
|
}
|
|
112
124
|
}
|
|
@@ -208,6 +220,16 @@ const EXTRACTION_SCRIPT = `
|
|
|
208
220
|
}
|
|
209
221
|
}
|
|
210
222
|
}
|
|
223
|
+
// Deduplicate: remove overlays that are descendants of other overlays
|
|
224
|
+
const deduped = overlays.filter((o, i) => {
|
|
225
|
+
const el = document.querySelector(o.selector);
|
|
226
|
+
if (!el) return true;
|
|
227
|
+
return !overlays.some((other, j) => {
|
|
228
|
+
if (i === j) return false;
|
|
229
|
+
const otherEl = document.querySelector(other.selector);
|
|
230
|
+
return otherEl && otherEl !== el && otherEl.contains(el);
|
|
231
|
+
});
|
|
232
|
+
});
|
|
211
233
|
|
|
212
234
|
// ---- Captcha detection ----
|
|
213
235
|
const captchas = [];
|
|
@@ -238,9 +260,10 @@ const EXTRACTION_SCRIPT = `
|
|
|
238
260
|
headings,
|
|
239
261
|
navigation: navigation.slice(0, 50),
|
|
240
262
|
elements: elements.slice(0, 150),
|
|
263
|
+
totalElements: elements.length,
|
|
241
264
|
forms,
|
|
242
265
|
landmarks,
|
|
243
|
-
overlays,
|
|
266
|
+
overlays: deduped,
|
|
244
267
|
captchas,
|
|
245
268
|
contentSummary
|
|
246
269
|
};
|
|
@@ -291,6 +314,7 @@ export async function reconUrl(url, options) {
|
|
|
291
314
|
headings: data.headings,
|
|
292
315
|
navigation: data.navigation,
|
|
293
316
|
elements: data.elements,
|
|
317
|
+
totalElements: data.totalElements || data.elements?.length || 0,
|
|
294
318
|
forms: data.forms,
|
|
295
319
|
contentSummary: data.contentSummary,
|
|
296
320
|
landmarks: data.landmarks,
|
|
@@ -348,6 +372,7 @@ export async function reconTab(tabPattern, options) {
|
|
|
348
372
|
headings: data.headings,
|
|
349
373
|
navigation: data.navigation,
|
|
350
374
|
elements: data.elements,
|
|
375
|
+
totalElements: data.totalElements || data.elements?.length || 0,
|
|
351
376
|
forms: data.forms,
|
|
352
377
|
contentSummary: data.contentSummary,
|
|
353
378
|
landmarks: data.landmarks,
|
package/dist/api/server.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import http from 'node:http';
|
|
3
3
|
import { reconUrl, reconTab } from './recon.js';
|
|
4
|
-
import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract, dismissOverlays } from './act.js';
|
|
4
|
+
import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract, dismissOverlays, typeKeys } from './act.js';
|
|
5
5
|
import { getAllTabs } from '../chrome/tabs.js';
|
|
6
6
|
const PORT = parseInt(process.env.API_PORT || '3456', 10);
|
|
7
7
|
const CDP_PORT = parseInt(process.env.CDP_PORT || '9222', 10);
|
|
@@ -136,14 +136,32 @@ const server = http.createServer(async (req, res) => {
|
|
|
136
136
|
return json(res, 400, { error: 'Provide "tab" and "expression"' });
|
|
137
137
|
}
|
|
138
138
|
const result = await evalInTab(body.tab, body.expression, { port: CDP_PORT, host: CDP_HOST });
|
|
139
|
+
if (result && result.__error) {
|
|
140
|
+
return json(res, 200, { result: null, error: result.__error });
|
|
141
|
+
}
|
|
139
142
|
return json(res, 200, { result });
|
|
140
143
|
}
|
|
144
|
+
// POST /type — raw CDP key typing, no clear step (for Google Sheets, contenteditable, etc.)
|
|
145
|
+
if (path === '/type' && req.method === 'POST') {
|
|
146
|
+
const body = parseBody(await readBody(req));
|
|
147
|
+
if (!body.tab || !body.keys) {
|
|
148
|
+
return json(res, 400, { error: 'Provide "tab" and "keys" (string to type), optional "submit": "enter"|"tab"' });
|
|
149
|
+
}
|
|
150
|
+
const result = await typeKeys(body.tab, body.keys, { port: CDP_PORT, host: CDP_HOST, submit: body.submit });
|
|
151
|
+
return json(res, 200, result);
|
|
152
|
+
}
|
|
141
153
|
// POST /navigate — go to url, back, or forward in same tab
|
|
142
154
|
if (path === '/navigate' && req.method === 'POST') {
|
|
143
155
|
const body = parseBody(await readBody(req));
|
|
144
156
|
if (!body.tab) {
|
|
145
157
|
return json(res, 400, { error: 'Provide "tab" and one of: "url", "back":true, "forward":true' });
|
|
146
158
|
}
|
|
159
|
+
if (!body.url && !body.back && !body.forward) {
|
|
160
|
+
return json(res, 400, { error: 'Provide one of: "url", "back":true, "forward":true' });
|
|
161
|
+
}
|
|
162
|
+
if ((body.url && body.back) || (body.url && body.forward) || (body.back && body.forward)) {
|
|
163
|
+
return json(res, 400, { error: 'Provide only one of: "url", "back", "forward"' });
|
|
164
|
+
}
|
|
147
165
|
const result = await navigatePage(body, { port: CDP_PORT, host: CDP_HOST });
|
|
148
166
|
return json(res, 200, result);
|
|
149
167
|
}
|
package/package.json
CHANGED
package/src/api/act.ts
CHANGED
|
@@ -60,52 +60,93 @@ export async function fillFields(
|
|
|
60
60
|
|
|
61
61
|
for (const field of request.fields) {
|
|
62
62
|
try {
|
|
63
|
-
//
|
|
64
|
-
await client.Runtime.evaluate({
|
|
65
|
-
expression: `
|
|
66
|
-
(
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
el.
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
}
|
|
75
|
-
`,
|
|
63
|
+
// Detect element type to choose fill strategy
|
|
64
|
+
const elInfo = await client.Runtime.evaluate({
|
|
65
|
+
expression: `(function() {
|
|
66
|
+
const el = document.querySelector(${JSON.stringify(field.selector)});
|
|
67
|
+
if (!el) return { found: false };
|
|
68
|
+
return {
|
|
69
|
+
found: true,
|
|
70
|
+
tag: el.tagName,
|
|
71
|
+
type: el.type || null,
|
|
72
|
+
contentEditable: el.isContentEditable || false,
|
|
73
|
+
maxLength: el.maxLength >= 0 ? el.maxLength : null
|
|
74
|
+
};
|
|
75
|
+
})()`,
|
|
76
76
|
returnByValue: true
|
|
77
77
|
});
|
|
78
|
+
const info = elInfo.result.value as any;
|
|
79
|
+
if (!info || !info.found) {
|
|
80
|
+
results.push({ selector: field.selector, success: false, error: `Element not found: ${field.selector}` });
|
|
81
|
+
continue;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const isDateTimeRange = ['date', 'time', 'datetime-local', 'month', 'week', 'range', 'color'].includes(info.type);
|
|
85
|
+
const isContentEditable = info.contentEditable && info.tag !== 'INPUT' && info.tag !== 'TEXTAREA';
|
|
78
86
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
text: char,
|
|
87
|
+
if (isDateTimeRange) {
|
|
88
|
+
// Date/time/range inputs: set value programmatically + dispatch events
|
|
89
|
+
await client.Runtime.evaluate({
|
|
90
|
+
expression: `(function() {
|
|
91
|
+
const el = document.querySelector(${JSON.stringify(field.selector)});
|
|
92
|
+
const nativeSetter = Object.getOwnPropertyDescriptor(HTMLInputElement.prototype, 'value').set;
|
|
93
|
+
nativeSetter.call(el, ${JSON.stringify(field.value)});
|
|
94
|
+
el.dispatchEvent(new Event('input', { bubbles: true }));
|
|
95
|
+
el.dispatchEvent(new Event('change', { bubbles: true }));
|
|
96
|
+
})()`,
|
|
97
|
+
returnByValue: true
|
|
91
98
|
});
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
99
|
+
} else {
|
|
100
|
+
// Focus and clear
|
|
101
|
+
await client.Runtime.evaluate({
|
|
102
|
+
expression: `
|
|
103
|
+
(function() {
|
|
104
|
+
const el = document.querySelector(${JSON.stringify(field.selector)});
|
|
105
|
+
el.focus();
|
|
106
|
+
el.click();
|
|
107
|
+
if (el.select) el.select();
|
|
108
|
+
else if (el.setSelectionRange) el.setSelectionRange(0, el.value?.length || 0);
|
|
109
|
+
})()
|
|
110
|
+
`,
|
|
111
|
+
returnByValue: true
|
|
95
112
|
});
|
|
113
|
+
|
|
114
|
+
// Clear existing value with select-all + delete
|
|
115
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'a', code: 'KeyA', modifiers: 2 });
|
|
116
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'a', code: 'KeyA', modifiers: 2 });
|
|
117
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'Backspace', code: 'Backspace' });
|
|
118
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'Backspace', code: 'Backspace' });
|
|
119
|
+
|
|
120
|
+
// Type each character via CDP Input.dispatchKeyEvent
|
|
121
|
+
for (const char of field.value) {
|
|
122
|
+
if (char === '\n') {
|
|
123
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'Enter', code: 'Enter', text: '\r', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
124
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
125
|
+
} else if (char === '\t') {
|
|
126
|
+
await client.Runtime.evaluate({ expression: `document.execCommand('insertText', false, '\\t')` });
|
|
127
|
+
} else {
|
|
128
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: char, text: char });
|
|
129
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: char });
|
|
130
|
+
}
|
|
131
|
+
}
|
|
96
132
|
}
|
|
97
133
|
|
|
98
|
-
// Verify
|
|
99
|
-
const
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
});
|
|
134
|
+
// Verify: use value for inputs, textContent for contenteditable
|
|
135
|
+
const verifyExpr = isContentEditable
|
|
136
|
+
? `document.querySelector(${JSON.stringify(field.selector)})?.textContent?.trim()`
|
|
137
|
+
: `document.querySelector(${JSON.stringify(field.selector)})?.value`;
|
|
138
|
+
const verify = await client.Runtime.evaluate({ expression: verifyExpr, returnByValue: true });
|
|
103
139
|
|
|
104
140
|
const actual = verify.result.value as string;
|
|
105
141
|
if (actual === field.value) {
|
|
106
142
|
results.push({ selector: field.selector, success: true });
|
|
107
143
|
} else if (actual === undefined || actual === null) {
|
|
108
144
|
results.push({ selector: field.selector, success: false, error: `Element not found or has no value: ${field.selector}` });
|
|
145
|
+
} else if (info.maxLength && actual === field.value.substring(0, info.maxLength)) {
|
|
146
|
+
// Maxlength truncation — fill worked within constraint
|
|
147
|
+
results.push({ selector: field.selector, success: true, error: `Truncated to maxlength=${info.maxLength}` });
|
|
148
|
+
} else if (isContentEditable && actual.includes(field.value)) {
|
|
149
|
+
results.push({ selector: field.selector, success: true });
|
|
109
150
|
} else {
|
|
110
151
|
results.push({ selector: field.selector, success: false, error: `Value mismatch: expected "${field.value}", got "${actual}"` });
|
|
111
152
|
}
|
|
@@ -176,13 +217,25 @@ export async function clickElement(
|
|
|
176
217
|
}
|
|
177
218
|
if (!el && text) {
|
|
178
219
|
const lower = text.toLowerCase();
|
|
179
|
-
const all = document.querySelectorAll('a, button, input[type="submit"], [role="button"], [role="option"], [role="menuitem"], [role="listitem"], [role="tab"], [role="link"], li[aria-label], [onclick]');
|
|
220
|
+
const all = document.querySelectorAll('a, button, input[type="submit"], [role="button"], [role="option"], [role="menuitem"], [role="listitem"], [role="tab"], [role="link"], li[aria-label], [onclick], label');
|
|
221
|
+
let bestMatch = null;
|
|
222
|
+
let bestScore = Infinity; // lower is better
|
|
180
223
|
for (const candidate of all) {
|
|
181
224
|
const t = (candidate.innerText || candidate.textContent || candidate.value || candidate.getAttribute('aria-label') || '').trim();
|
|
182
|
-
|
|
225
|
+
const tLower = t.toLowerCase();
|
|
226
|
+
if (!tLower.includes(lower)) continue;
|
|
227
|
+
// Score: 0 = exact, 1 = starts-with, 2+ = contains (shorter text = better)
|
|
228
|
+
let score;
|
|
229
|
+
if (tLower === lower) score = 0;
|
|
230
|
+
else if (tLower.startsWith(lower)) score = 1;
|
|
231
|
+
else score = 2 + t.length;
|
|
232
|
+
if (score < bestScore) { bestMatch = candidate; bestScore = score; }
|
|
233
|
+
if (score === 0) break; // exact match, stop
|
|
183
234
|
}
|
|
235
|
+
el = bestMatch;
|
|
184
236
|
}
|
|
185
237
|
if (!el) return { success: false, error: 'Element not found' };
|
|
238
|
+
if (el.disabled || el.getAttribute('aria-disabled') === 'true') return { success: false, error: 'Element is disabled' };
|
|
186
239
|
|
|
187
240
|
el.scrollIntoView({ block: 'center' });
|
|
188
241
|
|
|
@@ -243,17 +296,39 @@ export async function scrollPage(
|
|
|
243
296
|
const scrollY = Math.round(window.scrollY);
|
|
244
297
|
const scrollHeight = document.documentElement.scrollHeight;
|
|
245
298
|
const viewportHeight = window.innerHeight;
|
|
246
|
-
const atBottom = (scrollY + viewportHeight) >= (scrollHeight -
|
|
299
|
+
const atBottom = (scrollY + viewportHeight) >= (scrollHeight - 2);
|
|
247
300
|
|
|
248
|
-
// Get visible text content
|
|
249
|
-
const centerY = scrollY + viewportHeight / 2;
|
|
250
|
-
const elements = document.elementsFromPoint(window.innerWidth / 2, viewportHeight / 2);
|
|
301
|
+
// Get visible text content from elements in the current viewport
|
|
251
302
|
let contentPreview = '';
|
|
252
|
-
|
|
303
|
+
const visibleTexts = [];
|
|
304
|
+
const mainEl = document.querySelector('main, article, [role="main"]') || document.body;
|
|
305
|
+
const allEls = mainEl.querySelectorAll('p, li, td, th, h1, h2, h3, h4, h5, h6, dd, dt, blockquote, pre');
|
|
306
|
+
for (const el of allEls) {
|
|
307
|
+
if (visibleTexts.length >= 30) break;
|
|
308
|
+
const rect = el.getBoundingClientRect();
|
|
309
|
+
// Element must be within the viewport
|
|
310
|
+
if (rect.bottom < 0 || rect.top > viewportHeight || rect.height === 0) continue;
|
|
311
|
+
// Skip fixed/sticky elements (nav, TOC, sidebars)
|
|
312
|
+
const style = window.getComputedStyle(el.closest('nav, aside, [role="navigation"]') || el);
|
|
313
|
+
if (style.position === 'fixed' || style.position === 'sticky') continue;
|
|
253
314
|
const text = el.innerText?.trim();
|
|
254
|
-
if (text
|
|
255
|
-
|
|
256
|
-
|
|
315
|
+
if (!text || text.length < 5) continue;
|
|
316
|
+
// Skip if text is too long (likely a parent container)
|
|
317
|
+
if (text.length > 500) continue;
|
|
318
|
+
// Skip duplicates
|
|
319
|
+
if (visibleTexts.some(t => t.includes(text) || text.includes(t))) continue;
|
|
320
|
+
visibleTexts.push(text);
|
|
321
|
+
}
|
|
322
|
+
contentPreview = visibleTexts.join('\\n').substring(0, 1500);
|
|
323
|
+
if (!contentPreview) {
|
|
324
|
+
// Fallback: grab from center point
|
|
325
|
+
const elements = document.elementsFromPoint(window.innerWidth / 2, viewportHeight / 2);
|
|
326
|
+
for (const el of elements) {
|
|
327
|
+
const text = el.innerText?.trim();
|
|
328
|
+
if (text && text.length > 50 && text.length < 3000) {
|
|
329
|
+
contentPreview = text.substring(0, 1500);
|
|
330
|
+
break;
|
|
331
|
+
}
|
|
257
332
|
}
|
|
258
333
|
}
|
|
259
334
|
|
|
@@ -301,8 +376,16 @@ export async function navigatePage(
|
|
|
301
376
|
await client.Runtime.evaluate({ expression: 'window.history.forward()' });
|
|
302
377
|
await new Promise(resolve => setTimeout(resolve, waitMs));
|
|
303
378
|
} else if (request.url) {
|
|
379
|
+
// Block dangerous URL schemes
|
|
380
|
+
const scheme = request.url.trim().toLowerCase().split(':')[0];
|
|
381
|
+
if (['javascript', 'vbscript'].includes(scheme)) {
|
|
382
|
+
await client.close();
|
|
383
|
+
throw new Error('Blocked: javascript: URLs are not allowed');
|
|
384
|
+
}
|
|
304
385
|
await (client.Page as any).navigate({ url: request.url });
|
|
305
|
-
|
|
386
|
+
// Race loadEventFired against a timeout to prevent hanging on non-loading URLs
|
|
387
|
+
const loadTimeout = new Promise<void>(resolve => setTimeout(resolve, Math.min(waitMs + 10000, 30000)));
|
|
388
|
+
await Promise.race([(client.Page as any).loadEventFired(), loadTimeout]);
|
|
306
389
|
await new Promise(resolve => setTimeout(resolve, waitMs));
|
|
307
390
|
}
|
|
308
391
|
|
|
@@ -336,9 +419,18 @@ export async function evalInTab(
|
|
|
336
419
|
);
|
|
337
420
|
const evalPromise = client.Runtime.evaluate({
|
|
338
421
|
expression,
|
|
339
|
-
returnByValue: true
|
|
422
|
+
returnByValue: true,
|
|
423
|
+
awaitPromise: true
|
|
340
424
|
});
|
|
341
425
|
const result = await Promise.race([evalPromise, timeout]);
|
|
426
|
+
// Check for exceptions (syntax errors, thrown errors, etc.)
|
|
427
|
+
if (result.exceptionDetails) {
|
|
428
|
+
const desc = result.exceptionDetails.exception?.description
|
|
429
|
+
|| result.exceptionDetails.text
|
|
430
|
+
|| 'Unknown error';
|
|
431
|
+
await client.close();
|
|
432
|
+
return { __error: desc };
|
|
433
|
+
}
|
|
342
434
|
await client.close();
|
|
343
435
|
return result.result.value ?? null;
|
|
344
436
|
} catch (error) {
|
|
@@ -683,3 +775,43 @@ export async function focusTab(
|
|
|
683
775
|
throw error;
|
|
684
776
|
}
|
|
685
777
|
}
|
|
778
|
+
|
|
779
|
+
// Raw CDP key typing — no clear step, no element focus. Types directly into whatever has focus.
|
|
780
|
+
// Designed for apps like Google Sheets where Ctrl+A/Backspace clear causes side effects.
|
|
781
|
+
export async function typeKeys(
|
|
782
|
+
tabPattern: string,
|
|
783
|
+
keys: string,
|
|
784
|
+
options: { port?: number; host?: string; submit?: string }
|
|
785
|
+
): Promise<{ typed: number; submitted?: boolean }> {
|
|
786
|
+
const port = options.port || 9222;
|
|
787
|
+
const host = options.host || 'localhost';
|
|
788
|
+
|
|
789
|
+
const tab = await resolveTab(tabPattern, port, host);
|
|
790
|
+
const client = await connectToTab(tab.id, port, host);
|
|
791
|
+
const cdp = client as any;
|
|
792
|
+
|
|
793
|
+
try {
|
|
794
|
+
// Type each character via CDP Input.dispatchKeyEvent
|
|
795
|
+
for (const char of keys) {
|
|
796
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: char, text: char });
|
|
797
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: char });
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
let submitted = false;
|
|
801
|
+
if (options.submit === 'enter') {
|
|
802
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
803
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
804
|
+
submitted = true;
|
|
805
|
+
} else if (options.submit === 'tab') {
|
|
806
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'Tab', code: 'Tab', windowsVirtualKeyCode: 9, nativeVirtualKeyCode: 9 });
|
|
807
|
+
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'Tab', code: 'Tab', windowsVirtualKeyCode: 9, nativeVirtualKeyCode: 9 });
|
|
808
|
+
submitted = true;
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
await client.close();
|
|
812
|
+
return { typed: keys.length, submitted };
|
|
813
|
+
} catch (error) {
|
|
814
|
+
await client.close();
|
|
815
|
+
throw error;
|
|
816
|
+
}
|
|
817
|
+
}
|
package/src/api/recon.ts
CHANGED
|
@@ -25,7 +25,9 @@ export interface ReconResult {
|
|
|
25
25
|
role: string | null;
|
|
26
26
|
x: number;
|
|
27
27
|
y: number;
|
|
28
|
+
data?: Record<string, string>;
|
|
28
29
|
}[];
|
|
30
|
+
totalElements: number;
|
|
29
31
|
forms: {
|
|
30
32
|
action: string | null;
|
|
31
33
|
method: string | null;
|
|
@@ -77,7 +79,14 @@ const EXTRACTION_SCRIPT = `
|
|
|
77
79
|
const tag = el.tagName.toLowerCase();
|
|
78
80
|
if (el.getAttribute('aria-label')) return tag + '[aria-label="' + el.getAttribute('aria-label') + '"]';
|
|
79
81
|
if (el.getAttribute('data-testid')) return '[data-testid="' + el.getAttribute('data-testid') + '"]';
|
|
80
|
-
if (el.getAttribute('name'))
|
|
82
|
+
if (el.getAttribute('name')) {
|
|
83
|
+
const nameSelector = tag + '[name="' + el.getAttribute('name') + '"]';
|
|
84
|
+
// Disambiguate radio/checkbox with same name by adding value
|
|
85
|
+
if ((el.type === 'radio' || el.type === 'checkbox') && el.value) {
|
|
86
|
+
return nameSelector + '[value="' + el.value + '"]';
|
|
87
|
+
}
|
|
88
|
+
return nameSelector;
|
|
89
|
+
}
|
|
81
90
|
// Positional fallback
|
|
82
91
|
const parent = el.parentElement;
|
|
83
92
|
if (!parent) return tag;
|
|
@@ -153,7 +162,12 @@ const EXTRACTION_SCRIPT = `
|
|
|
153
162
|
selector: buildSelector(el),
|
|
154
163
|
role: el.getAttribute('role'),
|
|
155
164
|
x: Math.round(rect.x),
|
|
156
|
-
y: Math.round(rect.y)
|
|
165
|
+
y: Math.round(rect.y),
|
|
166
|
+
...(el.dataset && Object.keys(el.dataset).length > 0 ? {
|
|
167
|
+
data: Object.fromEntries(
|
|
168
|
+
['date','iso','value','testid','id'].filter(k => el.dataset[k]).map(k => [k, el.dataset[k]])
|
|
169
|
+
)
|
|
170
|
+
} : {})
|
|
157
171
|
});
|
|
158
172
|
}
|
|
159
173
|
}
|
|
@@ -255,6 +269,16 @@ const EXTRACTION_SCRIPT = `
|
|
|
255
269
|
}
|
|
256
270
|
}
|
|
257
271
|
}
|
|
272
|
+
// Deduplicate: remove overlays that are descendants of other overlays
|
|
273
|
+
const deduped = overlays.filter((o, i) => {
|
|
274
|
+
const el = document.querySelector(o.selector);
|
|
275
|
+
if (!el) return true;
|
|
276
|
+
return !overlays.some((other, j) => {
|
|
277
|
+
if (i === j) return false;
|
|
278
|
+
const otherEl = document.querySelector(other.selector);
|
|
279
|
+
return otherEl && otherEl !== el && otherEl.contains(el);
|
|
280
|
+
});
|
|
281
|
+
});
|
|
258
282
|
|
|
259
283
|
// ---- Captcha detection ----
|
|
260
284
|
const captchas = [];
|
|
@@ -285,9 +309,10 @@ const EXTRACTION_SCRIPT = `
|
|
|
285
309
|
headings,
|
|
286
310
|
navigation: navigation.slice(0, 50),
|
|
287
311
|
elements: elements.slice(0, 150),
|
|
312
|
+
totalElements: elements.length,
|
|
288
313
|
forms,
|
|
289
314
|
landmarks,
|
|
290
|
-
overlays,
|
|
315
|
+
overlays: deduped,
|
|
291
316
|
captchas,
|
|
292
317
|
contentSummary
|
|
293
318
|
};
|
|
@@ -352,6 +377,7 @@ export async function reconUrl(
|
|
|
352
377
|
headings: data.headings,
|
|
353
378
|
navigation: data.navigation,
|
|
354
379
|
elements: data.elements,
|
|
380
|
+
totalElements: data.totalElements || data.elements?.length || 0,
|
|
355
381
|
forms: data.forms,
|
|
356
382
|
contentSummary: data.contentSummary,
|
|
357
383
|
landmarks: data.landmarks,
|
|
@@ -414,6 +440,7 @@ export async function reconTab(
|
|
|
414
440
|
headings: data.headings,
|
|
415
441
|
navigation: data.navigation,
|
|
416
442
|
elements: data.elements,
|
|
443
|
+
totalElements: data.totalElements || data.elements?.length || 0,
|
|
417
444
|
forms: data.forms,
|
|
418
445
|
contentSummary: data.contentSummary,
|
|
419
446
|
landmarks: data.landmarks,
|
package/src/api/server.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import http from 'node:http';
|
|
4
4
|
import { reconUrl, reconTab } from './recon.js';
|
|
5
|
-
import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract, dismissOverlays } from './act.js';
|
|
5
|
+
import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract, dismissOverlays, typeKeys } from './act.js';
|
|
6
6
|
import { getAllTabs } from '../chrome/tabs.js';
|
|
7
7
|
|
|
8
8
|
const PORT = parseInt(process.env.API_PORT || '3456', 10);
|
|
@@ -160,15 +160,34 @@ const server = http.createServer(async (req, res) => {
|
|
|
160
160
|
return json(res, 400, { error: 'Provide "tab" and "expression"' });
|
|
161
161
|
}
|
|
162
162
|
const result = await evalInTab(body.tab, body.expression, { port: CDP_PORT, host: CDP_HOST });
|
|
163
|
+
if (result && result.__error) {
|
|
164
|
+
return json(res, 200, { result: null, error: result.__error });
|
|
165
|
+
}
|
|
163
166
|
return json(res, 200, { result });
|
|
164
167
|
}
|
|
165
168
|
|
|
169
|
+
// POST /type — raw CDP key typing, no clear step (for Google Sheets, contenteditable, etc.)
|
|
170
|
+
if (path === '/type' && req.method === 'POST') {
|
|
171
|
+
const body = parseBody(await readBody(req));
|
|
172
|
+
if (!body.tab || !body.keys) {
|
|
173
|
+
return json(res, 400, { error: 'Provide "tab" and "keys" (string to type), optional "submit": "enter"|"tab"' });
|
|
174
|
+
}
|
|
175
|
+
const result = await typeKeys(body.tab, body.keys, { port: CDP_PORT, host: CDP_HOST, submit: body.submit });
|
|
176
|
+
return json(res, 200, result);
|
|
177
|
+
}
|
|
178
|
+
|
|
166
179
|
// POST /navigate — go to url, back, or forward in same tab
|
|
167
180
|
if (path === '/navigate' && req.method === 'POST') {
|
|
168
181
|
const body = parseBody(await readBody(req));
|
|
169
182
|
if (!body.tab) {
|
|
170
183
|
return json(res, 400, { error: 'Provide "tab" and one of: "url", "back":true, "forward":true' });
|
|
171
184
|
}
|
|
185
|
+
if (!body.url && !body.back && !body.forward) {
|
|
186
|
+
return json(res, 400, { error: 'Provide one of: "url", "back":true, "forward":true' });
|
|
187
|
+
}
|
|
188
|
+
if ((body.url && body.back) || (body.url && body.forward) || (body.back && body.forward)) {
|
|
189
|
+
return json(res, 400, { error: 'Provide only one of: "url", "back", "forward"' });
|
|
190
|
+
}
|
|
172
191
|
const result = await navigatePage(body, { port: CDP_PORT, host: CDP_HOST });
|
|
173
192
|
return json(res, 200, result);
|
|
174
193
|
}
|