@phnx-labs/agents-cli 1.17.3 → 1.17.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/dist/commands/browser.js +60 -1
- package/dist/lib/browser/editor.d.ts +3 -0
- package/dist/lib/browser/editor.js +50 -0
- package/dist/lib/browser/input.js +42 -5
- package/dist/lib/browser/ipc.js +22 -3
- package/dist/lib/browser/refs.d.ts +1 -0
- package/dist/lib/browser/refs.js +36 -2
- package/dist/lib/browser/service.d.ts +11 -1
- package/dist/lib/browser/service.js +65 -4
- package/dist/lib/browser/types.d.ts +13 -1
- package/dist/lib/browser/upload.d.ts +54 -0
- package/dist/lib/browser/upload.js +264 -0
- package/package.json +2 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.17.4
|
|
4
|
+
|
|
5
|
+
**Browser**
|
|
6
|
+
|
|
7
|
+
- `agents browser type` now detects rich-text editor frameworks (Lexical, ProseMirror, Slate, Draft.js, Quill, CKEditor5, Trix) by walking up to 5 ancestor levels from each textbox and tagging refs with `[editor=<framework>]`. Editor-tagged refs route through the WHATWG `beforeinput` dispatch (`InputEvent('beforeinput', { inputType: 'insertText', ... })`) for Lexical/ProseMirror/Slate/Quill/CKEditor5/Draft and `el.editor.insertString()` for Trix. `agents browser refs --json` surfaces the new `editor` field, and `type --clear` prepends a select-all + `deleteContentBackward` dispatch before inserting.
|
|
8
|
+
- Plain-input reliability also improved: `typeText` now issues a single CDP `Input.insertText` instead of per-character `dispatchKeyEvent`, so framework-controlled inputs (React, Vue, Solid, MUI/Chakra/Mantine `TextField`, masked-number fields, Canva-style pickers) actually receive `beforeinput`/`input`/`textInput` events. `focusNode` falls back to the first focusable descendant when `DOM.focus` throws "Element is not focusable" — fixes wrapper-ref UIs like Slack composer, Linear comments, Notion blocks, and every MUI/Chakra/Mantine `TextField`. ([#12](https://github.com/phnx-labs/agents-cli/pull/12))
|
|
9
|
+
|
|
3
10
|
## 1.17.3
|
|
4
11
|
|
|
5
12
|
**Browser**
|
package/dist/commands/browser.js
CHANGED
|
@@ -754,6 +754,7 @@ function registerTaskCommands(browser) {
|
|
|
754
754
|
.option('-t, --tab <tabId>', 'Tab ID (defaults to current)')
|
|
755
755
|
.option('--all', 'Include non-interactive elements')
|
|
756
756
|
.option('-l, --limit <n>', 'Max elements (default 500)', '500')
|
|
757
|
+
.option('--json', 'Output machine-readable JSON')
|
|
757
758
|
.action(async (task, opts) => {
|
|
758
759
|
const response = await sendIPCRequest({
|
|
759
760
|
action: 'refs',
|
|
@@ -763,9 +764,18 @@ function registerTaskCommands(browser) {
|
|
|
763
764
|
limit: parseInt(opts.limit, 10),
|
|
764
765
|
});
|
|
765
766
|
if (!response.ok) {
|
|
766
|
-
|
|
767
|
+
if (opts.json) {
|
|
768
|
+
console.log(JSON.stringify({ ok: false, error: response.error }));
|
|
769
|
+
}
|
|
770
|
+
else {
|
|
771
|
+
console.error(response.error);
|
|
772
|
+
}
|
|
767
773
|
process.exit(1);
|
|
768
774
|
}
|
|
775
|
+
if (opts.json) {
|
|
776
|
+
console.log(JSON.stringify(response.nodes ?? [], null, 2));
|
|
777
|
+
return;
|
|
778
|
+
}
|
|
769
779
|
console.log(response.refs);
|
|
770
780
|
});
|
|
771
781
|
browser
|
|
@@ -789,6 +799,7 @@ function registerTaskCommands(browser) {
|
|
|
789
799
|
.command('type <task> <ref> <text>')
|
|
790
800
|
.description('Type text into an element by ref')
|
|
791
801
|
.option('-t, --tab <tabId>', 'Tab ID (defaults to current)')
|
|
802
|
+
.option('--clear', 'Clear editor content before typing')
|
|
792
803
|
.action(async (task, ref, text, opts) => {
|
|
793
804
|
const response = await sendIPCRequest({
|
|
794
805
|
action: 'type',
|
|
@@ -796,6 +807,7 @@ function registerTaskCommands(browser) {
|
|
|
796
807
|
tabId: opts.tab,
|
|
797
808
|
ref: parseInt(ref, 10),
|
|
798
809
|
text,
|
|
810
|
+
clear: opts.clear,
|
|
799
811
|
});
|
|
800
812
|
if (!response.ok) {
|
|
801
813
|
console.error(response.error);
|
|
@@ -859,6 +871,53 @@ function registerTaskCommands(browser) {
|
|
|
859
871
|
}
|
|
860
872
|
console.log('Scrolled');
|
|
861
873
|
});
|
|
874
|
+
browser
|
|
875
|
+
.command('upload <task>')
|
|
876
|
+
.description('Upload file(s) — supports hidden file inputs, drag-drop targets, and OS chooser interception')
|
|
877
|
+
.option('-t, --tab <tabId>', 'Tab ID (defaults to current)')
|
|
878
|
+
.option('-r, --ref <n>', 'Ref of the upload target element (file input or drop zone)', (v) => parseInt(v, 10))
|
|
879
|
+
.option('--trigger <n>', 'Ref of a button that opens the OS file chooser (Pattern C)', (v) => parseInt(v, 10))
|
|
880
|
+
.option('-f, --file <path...>', 'Absolute path(s) to file(s) to upload (repeatable)')
|
|
881
|
+
.option('--drop', 'Force drag-drop pattern even if ref is an <input type=file>')
|
|
882
|
+
.option('--input', 'Force file-input pattern (DOM.setFileInputFiles)')
|
|
883
|
+
.option('--timeout <ms>', 'Timeout for chooser interception (Pattern C)', (v) => parseInt(v, 10))
|
|
884
|
+
.action(async (task, opts) => {
|
|
885
|
+
const files = opts.file ?? [];
|
|
886
|
+
if (files.length === 0) {
|
|
887
|
+
console.error('--file <path> is required (repeat for multiple files)');
|
|
888
|
+
process.exit(1);
|
|
889
|
+
}
|
|
890
|
+
if (opts.ref === undefined && opts.trigger === undefined) {
|
|
891
|
+
console.error('--ref <n> or --trigger <n> is required');
|
|
892
|
+
process.exit(1);
|
|
893
|
+
}
|
|
894
|
+
if (opts.drop && opts.input) {
|
|
895
|
+
console.error('--drop and --input are mutually exclusive');
|
|
896
|
+
process.exit(1);
|
|
897
|
+
}
|
|
898
|
+
let mode = 'auto';
|
|
899
|
+
if (opts.trigger !== undefined)
|
|
900
|
+
mode = 'chooser';
|
|
901
|
+
else if (opts.drop)
|
|
902
|
+
mode = 'drop';
|
|
903
|
+
else if (opts.input)
|
|
904
|
+
mode = 'input';
|
|
905
|
+
const response = await sendIPCRequest({
|
|
906
|
+
action: 'upload',
|
|
907
|
+
task,
|
|
908
|
+
tabId: opts.tab,
|
|
909
|
+
ref: opts.ref,
|
|
910
|
+
trigger: opts.trigger,
|
|
911
|
+
files,
|
|
912
|
+
uploadMode: mode,
|
|
913
|
+
timeout: opts.timeout,
|
|
914
|
+
});
|
|
915
|
+
if (!response.ok) {
|
|
916
|
+
console.error(response.error);
|
|
917
|
+
process.exit(1);
|
|
918
|
+
}
|
|
919
|
+
console.log(`Uploaded ${files.length} file${files.length === 1 ? '' : 's'} (${response.uploadMode})`);
|
|
920
|
+
});
|
|
862
921
|
// ─── Viewport & Device ───────────────────────────────────────────────────────
|
|
863
922
|
const setCmd = browser.command('set').description('Set browser emulation options');
|
|
864
923
|
setCmd
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
const BEFOREINPUT_INSERT_FN = `(function(text) {
|
|
2
|
+
this.focus();
|
|
3
|
+
var sel = window.getSelection();
|
|
4
|
+
var range = document.createRange();
|
|
5
|
+
range.selectNodeContents(this);
|
|
6
|
+
range.collapse(false);
|
|
7
|
+
sel.removeAllRanges();
|
|
8
|
+
sel.addRange(range);
|
|
9
|
+
this.dispatchEvent(new InputEvent('beforeinput', {
|
|
10
|
+
inputType: 'insertText',
|
|
11
|
+
data: text,
|
|
12
|
+
bubbles: true,
|
|
13
|
+
cancelable: true,
|
|
14
|
+
composed: true,
|
|
15
|
+
}));
|
|
16
|
+
})`;
|
|
17
|
+
const BEFOREINPUT_CLEAR_FN = `(function() {
|
|
18
|
+
this.focus();
|
|
19
|
+
var sel = window.getSelection();
|
|
20
|
+
var range = document.createRange();
|
|
21
|
+
range.selectNodeContents(this);
|
|
22
|
+
sel.removeAllRanges();
|
|
23
|
+
sel.addRange(range);
|
|
24
|
+
this.dispatchEvent(new InputEvent('beforeinput', {
|
|
25
|
+
inputType: 'deleteContentBackward',
|
|
26
|
+
bubbles: true,
|
|
27
|
+
cancelable: true,
|
|
28
|
+
composed: true,
|
|
29
|
+
}));
|
|
30
|
+
})`;
|
|
31
|
+
const TRIX_INSERT_FN = `(function(text) { this.editor.insertString(text); })`;
|
|
32
|
+
export async function typeEditorText(cdp, sessionId, node, text, clear = false) {
|
|
33
|
+
const { object } = await cdp.send('DOM.resolveNode', { backendNodeId: node.backendNodeId }, sessionId);
|
|
34
|
+
if (!object.objectId)
|
|
35
|
+
throw new Error(`Could not resolve DOM node for ref ${node.ref}`);
|
|
36
|
+
const objectId = object.objectId;
|
|
37
|
+
try {
|
|
38
|
+
if (node.editor === 'trix') {
|
|
39
|
+
await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: TRIX_INSERT_FN, arguments: [{ value: text }], returnByValue: true }, sessionId);
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
if (clear) {
|
|
43
|
+
await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: BEFOREINPUT_CLEAR_FN, arguments: [], returnByValue: true }, sessionId);
|
|
44
|
+
}
|
|
45
|
+
await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: BEFOREINPUT_INSERT_FN, arguments: [{ value: text }], returnByValue: true }, sessionId);
|
|
46
|
+
}
|
|
47
|
+
finally {
|
|
48
|
+
await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -8,11 +8,13 @@ export async function hoverAtCoords(cdp, sessionId, x, y) {
|
|
|
8
8
|
export async function scrollAtCoords(cdp, sessionId, x, y, deltaX, deltaY) {
|
|
9
9
|
await cdp.send('Input.dispatchMouseEvent', { type: 'mouseWheel', x, y, deltaX, deltaY }, sessionId);
|
|
10
10
|
}
|
|
11
|
+
// `Input.insertText` is the CDP native text-insertion method. It dispatches a
|
|
12
|
+
// real `beforeinput`/`input`/`textInput` sequence on the focused element, which
|
|
13
|
+
// is what framework-controlled inputs (React, Vue, Solid, contenteditable
|
|
14
|
+
// editors) actually listen for. Per-character `dispatchKeyEvent` only fires
|
|
15
|
+
// `keydown`/`keyup` with no input event, so controlled inputs ignore it.
|
|
11
16
|
export async function typeText(cdp, sessionId, text) {
|
|
12
|
-
|
|
13
|
-
await cdp.send('Input.dispatchKeyEvent', { type: 'keyDown', text: char }, sessionId);
|
|
14
|
-
await cdp.send('Input.dispatchKeyEvent', { type: 'keyUp', text: char }, sessionId);
|
|
15
|
-
}
|
|
17
|
+
await cdp.send('Input.insertText', { text }, sessionId);
|
|
16
18
|
}
|
|
17
19
|
const KEY_CODES = {
|
|
18
20
|
Enter: { key: 'Enter', code: 'Enter', keyCode: 13 },
|
|
@@ -50,6 +52,41 @@ export async function pressKey(cdp, sessionId, keyName) {
|
|
|
50
52
|
nativeVirtualKeyCode: keyInfo.keyCode,
|
|
51
53
|
}, sessionId);
|
|
52
54
|
}
|
|
55
|
+
const FOCUS_DESCENDANT_FN = `(function() {
|
|
56
|
+
const selector = 'input:not([disabled]):not([type=hidden]),textarea:not([disabled]),select:not([disabled]),[contenteditable=""],[contenteditable=true],[tabindex]:not([tabindex="-1"])';
|
|
57
|
+
const candidates = this.querySelectorAll(selector);
|
|
58
|
+
for (const el of candidates) {
|
|
59
|
+
el.focus();
|
|
60
|
+
if (document.activeElement === el) return true;
|
|
61
|
+
}
|
|
62
|
+
return false;
|
|
63
|
+
})`;
|
|
64
|
+
// `DOM.focus` only works on natively focusable elements. UIs that wrap real
|
|
65
|
+
// inputs in styled containers (Slack composer, Linear comments, Notion blocks,
|
|
66
|
+
// Canva pickers, MUI/Chakra/Mantine TextField) often expose the wrapper as the
|
|
67
|
+
// accessible "ref" — focusing it throws "Element is not focusable". When that
|
|
68
|
+
// happens, walk the subtree for the first focusable descendant.
|
|
53
69
|
export async function focusNode(cdp, sessionId, backendNodeId) {
|
|
54
|
-
|
|
70
|
+
try {
|
|
71
|
+
await cdp.send('DOM.focus', { backendNodeId }, sessionId);
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
catch (err) {
|
|
75
|
+
const focused = await focusFirstFocusableDescendant(cdp, sessionId, backendNodeId);
|
|
76
|
+
if (!focused)
|
|
77
|
+
throw err;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
async function focusFirstFocusableDescendant(cdp, sessionId, backendNodeId) {
|
|
81
|
+
const { object } = await cdp.send('DOM.resolveNode', { backendNodeId }, sessionId);
|
|
82
|
+
if (!object.objectId)
|
|
83
|
+
return false;
|
|
84
|
+
const objectId = object.objectId;
|
|
85
|
+
try {
|
|
86
|
+
const { result } = await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: FOCUS_DESCENDANT_FN, returnByValue: true }, sessionId);
|
|
87
|
+
return result.value === true;
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
|
|
91
|
+
}
|
|
55
92
|
}
|
package/dist/lib/browser/ipc.js
CHANGED
|
@@ -164,11 +164,17 @@ export class BrowserIPCServer {
|
|
|
164
164
|
if (!request.task) {
|
|
165
165
|
return { ok: false, error: 'Task required' };
|
|
166
166
|
}
|
|
167
|
-
const { refs } = await this.service.refs(request.task, request.tabId, {
|
|
167
|
+
const { refs, nodeMap } = await this.service.refs(request.task, request.tabId, {
|
|
168
168
|
interactive: request.interactive ?? true,
|
|
169
169
|
limit: request.limit ?? 500,
|
|
170
170
|
});
|
|
171
|
-
|
|
171
|
+
const nodes = Array.from(nodeMap.values()).map(n => {
|
|
172
|
+
const entry = { ref: n.ref, role: n.role, name: n.name, attrs: n.attrs };
|
|
173
|
+
if (n.editor !== undefined)
|
|
174
|
+
entry.editor = n.editor;
|
|
175
|
+
return entry;
|
|
176
|
+
});
|
|
177
|
+
return { ok: true, refs, nodes };
|
|
172
178
|
}
|
|
173
179
|
case 'click': {
|
|
174
180
|
if (!request.task || request.ref === undefined) {
|
|
@@ -181,7 +187,7 @@ export class BrowserIPCServer {
|
|
|
181
187
|
if (!request.task || request.ref === undefined || !request.text) {
|
|
182
188
|
return { ok: false, error: 'Task, ref, and text required' };
|
|
183
189
|
}
|
|
184
|
-
await this.service.type(request.task, request.ref, request.text, request.tabId);
|
|
190
|
+
await this.service.type(request.task, request.ref, request.text, request.tabId, request.clear);
|
|
185
191
|
return { ok: true };
|
|
186
192
|
}
|
|
187
193
|
case 'press': {
|
|
@@ -290,6 +296,19 @@ export class BrowserIPCServer {
|
|
|
290
296
|
const downloadPath = await this.service.waitForDownload(request.task, request.timeout);
|
|
291
297
|
return { ok: true, downloadPath };
|
|
292
298
|
}
|
|
299
|
+
case 'upload': {
|
|
300
|
+
if (!request.task || !request.files || request.files.length === 0) {
|
|
301
|
+
return { ok: false, error: 'Task and at least one file required' };
|
|
302
|
+
}
|
|
303
|
+
const result = await this.service.upload(request.task, request.files, {
|
|
304
|
+
ref: request.ref,
|
|
305
|
+
trigger: request.trigger,
|
|
306
|
+
mode: request.uploadMode,
|
|
307
|
+
tabHint: request.tabId,
|
|
308
|
+
timeout: request.timeout,
|
|
309
|
+
});
|
|
310
|
+
return { ok: true, uploadMode: result.mode };
|
|
311
|
+
}
|
|
293
312
|
default:
|
|
294
313
|
return { ok: false, error: `Unknown action: ${request.action}` };
|
|
295
314
|
}
|
package/dist/lib/browser/refs.js
CHANGED
|
@@ -1,3 +1,31 @@
|
|
|
1
|
+
const EDITOR_DETECT_FN = `(function() {
|
|
2
|
+
let el = this;
|
|
3
|
+
for (let i = 0; i < 5; i++) {
|
|
4
|
+
if (!el || el === document.documentElement) break;
|
|
5
|
+
if (el.hasAttribute && el.hasAttribute('data-lexical-editor')) return 'lexical';
|
|
6
|
+
if (el.classList && el.classList.contains('ProseMirror')) return 'prosemirror';
|
|
7
|
+
if (el.hasAttribute && el.hasAttribute('data-slate-editor')) return 'slate';
|
|
8
|
+
if (el.classList && Array.from(el.classList).some(function(c) { return /^DraftEditor-/.test(c); })) return 'draft';
|
|
9
|
+
if (el.classList && el.classList.contains('ql-editor')) return 'quill';
|
|
10
|
+
if (el.classList && el.classList.contains('ck-editor__editable')) return 'ckeditor5';
|
|
11
|
+
if (el.tagName === 'TRIX-EDITOR') return 'trix';
|
|
12
|
+
el = el.parentElement;
|
|
13
|
+
}
|
|
14
|
+
return null;
|
|
15
|
+
})`;
|
|
16
|
+
async function detectEditorForNode(cdp, sessionId, backendNodeId) {
|
|
17
|
+
const { object } = await cdp.send('DOM.resolveNode', { backendNodeId }, sessionId);
|
|
18
|
+
if (!object.objectId)
|
|
19
|
+
return undefined;
|
|
20
|
+
const objectId = object.objectId;
|
|
21
|
+
try {
|
|
22
|
+
const { result } = await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: EDITOR_DETECT_FN, returnByValue: true }, sessionId);
|
|
23
|
+
return result.value ?? undefined;
|
|
24
|
+
}
|
|
25
|
+
finally {
|
|
26
|
+
await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
1
29
|
const INTERACTIVE_ROLES = new Set([
|
|
2
30
|
'button',
|
|
3
31
|
'link',
|
|
@@ -59,12 +87,18 @@ export async function getRefs(cdp, sessionId, opts = {}) {
|
|
|
59
87
|
attrs,
|
|
60
88
|
backendNodeId: node.backendDOMNodeId,
|
|
61
89
|
};
|
|
90
|
+
if (role === 'textbox' && node.backendDOMNodeId) {
|
|
91
|
+
const editor = await detectEditorForNode(cdp, sessionId, node.backendDOMNodeId);
|
|
92
|
+
if (editor)
|
|
93
|
+
refNode.editor = editor;
|
|
94
|
+
}
|
|
62
95
|
nodeMap.set(ref, refNode);
|
|
63
96
|
const attrStr = attrs.length > 0 ? ` [${attrs.join('] [')}]` : '';
|
|
97
|
+
const editorStr = refNode.editor ? ` [editor=${refNode.editor}]` : '';
|
|
64
98
|
const nameStr = name ? ` "${truncate(name, 50)}"` : '';
|
|
65
99
|
const line = compact
|
|
66
|
-
? `${role}${nameStr} [ref=${ref}]${attrStr}`
|
|
67
|
-
: `- ${role}${nameStr} [ref=${ref}]${attrStr}`;
|
|
100
|
+
? `${role}${nameStr} [ref=${ref}]${attrStr}${editorStr}`
|
|
101
|
+
: `- ${role}${nameStr} [ref=${ref}]${attrStr}${editorStr}`;
|
|
68
102
|
lines.push(line);
|
|
69
103
|
}
|
|
70
104
|
return { refs: lines.join('\n'), nodeMap };
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { type TabInfo, type ProfileStatus, type HistoricalTask } from './types.js';
|
|
2
2
|
import { type RefOpts, type RefNode } from './refs.js';
|
|
3
3
|
import type { TargetFilter } from './types.js';
|
|
4
|
+
export type UploadMode = 'auto' | 'input' | 'drop' | 'chooser';
|
|
4
5
|
/**
|
|
5
6
|
* Parse a `targetFilter` string into its kind + value, or return `null`
|
|
6
7
|
* when the input is missing or malformed. Filter syntax:
|
|
@@ -99,10 +100,19 @@ export declare class BrowserService {
|
|
|
99
100
|
nodeMap: Map<number, RefNode>;
|
|
100
101
|
}>;
|
|
101
102
|
click(taskId: string, ref: number, tabHint?: string): Promise<void>;
|
|
102
|
-
type(taskId: string, ref: number, text: string, tabHint?: string): Promise<void>;
|
|
103
|
+
type(taskId: string, ref: number, text: string, tabHint?: string, clear?: boolean): Promise<void>;
|
|
103
104
|
press(taskId: string, key: string, tabHint?: string): Promise<void>;
|
|
104
105
|
hover(taskId: string, ref: number, tabHint?: string): Promise<void>;
|
|
105
106
|
scroll(taskId: string, deltaX: number, deltaY: number, atX?: number, atY?: number, tabHint?: string): Promise<void>;
|
|
107
|
+
upload(taskId: string, files: string[], options: {
|
|
108
|
+
ref?: number;
|
|
109
|
+
trigger?: number;
|
|
110
|
+
mode?: UploadMode;
|
|
111
|
+
tabHint?: string;
|
|
112
|
+
timeout?: number;
|
|
113
|
+
}): Promise<{
|
|
114
|
+
mode: 'input' | 'drop' | 'chooser';
|
|
115
|
+
}>;
|
|
106
116
|
status(profileName?: string): Promise<ProfileStatus[]>;
|
|
107
117
|
private reconcileFromDisk;
|
|
108
118
|
setViewport(taskId: string, width: number, height: number, options?: {
|
|
@@ -8,6 +8,8 @@ import { connectSSH } from './drivers/ssh.js';
|
|
|
8
8
|
import { generateTaskId, generateShortId, generateFunName, } from './types.js';
|
|
9
9
|
import { getRefs, resolveRefToCoords } from './refs.js';
|
|
10
10
|
import { clickAtCoords, hoverAtCoords, scrollAtCoords, typeText, pressKey, focusNode } from './input.js';
|
|
11
|
+
import { typeEditorText } from './editor.js';
|
|
12
|
+
import { detectUploadPattern, uploadToDropTarget, uploadToFileInput, uploadViaFileChooser, } from './upload.js';
|
|
11
13
|
import { emit } from '../events.js';
|
|
12
14
|
/**
|
|
13
15
|
* Parse a `targetFilter` string into its kind + value, or return `null`
|
|
@@ -502,7 +504,7 @@ export class BrowserService {
|
|
|
502
504
|
const { x, y } = await resolveRefToCoords(conn.cdp, sessionId, nodeMap, ref);
|
|
503
505
|
await clickAtCoords(conn.cdp, sessionId, x, y);
|
|
504
506
|
}
|
|
505
|
-
async type(taskId, ref, text, tabHint) {
|
|
507
|
+
async type(taskId, ref, text, tabHint, clear) {
|
|
506
508
|
const { conn, task } = await this.findTask(taskId);
|
|
507
509
|
const shortId = tabHint ? await this.resolveTabHint(conn, task, tabHint) : this.resolveCurrentTab(task);
|
|
508
510
|
const cdpTargetId = this.getCdpTargetId(task, shortId);
|
|
@@ -514,10 +516,15 @@ export class BrowserService {
|
|
|
514
516
|
const node = nodeMap.get(ref);
|
|
515
517
|
if (!node)
|
|
516
518
|
throw new Error(`Ref ${ref} not found`);
|
|
517
|
-
if (node.
|
|
518
|
-
await
|
|
519
|
+
if (node.editor) {
|
|
520
|
+
await typeEditorText(conn.cdp, sessionId, node, text, clear);
|
|
521
|
+
}
|
|
522
|
+
else {
|
|
523
|
+
if (node.backendNodeId) {
|
|
524
|
+
await focusNode(conn.cdp, sessionId, node.backendNodeId);
|
|
525
|
+
}
|
|
526
|
+
await typeText(conn.cdp, sessionId, text);
|
|
519
527
|
}
|
|
520
|
-
await typeText(conn.cdp, sessionId, text);
|
|
521
528
|
}
|
|
522
529
|
async press(taskId, key, tabHint) {
|
|
523
530
|
const { conn, task } = await this.findTask(taskId);
|
|
@@ -551,6 +558,60 @@ export class BrowserService {
|
|
|
551
558
|
const sessionId = await this.getSessionId(conn, target.targetId);
|
|
552
559
|
await scrollAtCoords(conn.cdp, sessionId, atX ?? 0, atY ?? 0, deltaX, deltaY);
|
|
553
560
|
}
|
|
561
|
+
async upload(taskId, files, options) {
|
|
562
|
+
const { conn, task } = await this.findTask(taskId);
|
|
563
|
+
const shortId = options.tabHint
|
|
564
|
+
? await this.resolveTabHint(conn, task, options.tabHint)
|
|
565
|
+
: this.resolveCurrentTab(task);
|
|
566
|
+
const cdpTargetId = this.getCdpTargetId(task, shortId);
|
|
567
|
+
const target = await this.getTarget(conn, cdpTargetId);
|
|
568
|
+
if (!target)
|
|
569
|
+
throw new Error(`Tab ${shortId} not found`);
|
|
570
|
+
const sessionId = await this.getSessionId(conn, target.targetId);
|
|
571
|
+
// Match the user-facing ref numbering from `agents browser refs` (which
|
|
572
|
+
// defaults to interactive=true). The other action helpers in this file
|
|
573
|
+
// use interactive=false historically, but that produces ref numbers the
|
|
574
|
+
// user never sees — `--ref 1` then resolves to the RootWebArea instead of
|
|
575
|
+
// the first interactive element. Match the listing the user actually saw.
|
|
576
|
+
const { nodeMap } = await getRefs(conn.cdp, sessionId, { interactive: true, limit: 1000 });
|
|
577
|
+
const mode = options.mode ?? 'auto';
|
|
578
|
+
if (options.trigger !== undefined || mode === 'chooser') {
|
|
579
|
+
const ref = options.trigger ?? options.ref;
|
|
580
|
+
if (ref === undefined) {
|
|
581
|
+
throw new Error('chooser mode requires --trigger <ref> (or --ref) pointing at the button that opens the file dialog');
|
|
582
|
+
}
|
|
583
|
+
const node = nodeMap.get(ref);
|
|
584
|
+
if (!node)
|
|
585
|
+
throw new Error(`Ref ${ref} not found`);
|
|
586
|
+
await uploadViaFileChooser(conn.cdp, sessionId, { node, nodeMap }, files, options.timeout);
|
|
587
|
+
return { mode: 'chooser' };
|
|
588
|
+
}
|
|
589
|
+
if (options.ref === undefined) {
|
|
590
|
+
throw new Error('upload requires --ref <n> (target element) or --trigger <n> (button that opens chooser)');
|
|
591
|
+
}
|
|
592
|
+
const node = nodeMap.get(options.ref);
|
|
593
|
+
if (!node)
|
|
594
|
+
throw new Error(`Ref ${options.ref} not found`);
|
|
595
|
+
if (!node.backendNodeId)
|
|
596
|
+
throw new Error(`Ref ${options.ref} has no DOM node`);
|
|
597
|
+
let resolved;
|
|
598
|
+
if (mode === 'input') {
|
|
599
|
+
resolved = 'input';
|
|
600
|
+
}
|
|
601
|
+
else if (mode === 'drop') {
|
|
602
|
+
resolved = 'drop';
|
|
603
|
+
}
|
|
604
|
+
else {
|
|
605
|
+
resolved = await detectUploadPattern(conn.cdp, sessionId, node.backendNodeId);
|
|
606
|
+
}
|
|
607
|
+
if (resolved === 'input') {
|
|
608
|
+
await uploadToFileInput(conn.cdp, sessionId, node.backendNodeId, files);
|
|
609
|
+
}
|
|
610
|
+
else {
|
|
611
|
+
await uploadToDropTarget(conn.cdp, sessionId, node.backendNodeId, files);
|
|
612
|
+
}
|
|
613
|
+
return { mode: resolved };
|
|
614
|
+
}
|
|
554
615
|
async status(profileName) {
|
|
555
616
|
const seen = new Set();
|
|
556
617
|
const statuses = [];
|
|
@@ -83,7 +83,7 @@ export interface HistoricalTask {
|
|
|
83
83
|
domains: string[];
|
|
84
84
|
tabCount: number;
|
|
85
85
|
}
|
|
86
|
-
export type IPCAction = 'start' | 'launch-profile' | 'done' | 'stop' | 'status' | 'history' | 'navigate' | 'tab-add' | 'tab-focus' | 'tab-close' | 'tab-list' | 'evaluate' | 'screenshot' | 'refs' | 'click' | 'type' | 'press' | 'hover' | 'scroll' | 'set-viewport' | 'set-device' | 'console' | 'errors' | 'requests' | 'response-body' | 'wait' | 'set-download-path' | 'wait-download';
|
|
86
|
+
export type IPCAction = 'start' | 'launch-profile' | 'done' | 'stop' | 'status' | 'history' | 'navigate' | 'tab-add' | 'tab-focus' | 'tab-close' | 'tab-list' | 'evaluate' | 'screenshot' | 'refs' | 'click' | 'type' | 'press' | 'hover' | 'scroll' | 'set-viewport' | 'set-device' | 'console' | 'errors' | 'requests' | 'response-body' | 'wait' | 'set-download-path' | 'wait-download' | 'upload';
|
|
87
87
|
export interface IPCRequest {
|
|
88
88
|
action: IPCAction;
|
|
89
89
|
task?: string;
|
|
@@ -116,6 +116,9 @@ export interface IPCRequest {
|
|
|
116
116
|
waitValue?: string | number;
|
|
117
117
|
timeout?: number;
|
|
118
118
|
downloadPath?: string;
|
|
119
|
+
files?: string[];
|
|
120
|
+
trigger?: number;
|
|
121
|
+
uploadMode?: 'auto' | 'input' | 'drop' | 'chooser';
|
|
119
122
|
}
|
|
120
123
|
export interface IPCResponse {
|
|
121
124
|
ok: boolean;
|
|
@@ -129,6 +132,7 @@ export interface IPCResponse {
|
|
|
129
132
|
result?: unknown;
|
|
130
133
|
path?: string;
|
|
131
134
|
refs?: string;
|
|
135
|
+
nodes?: RefNodeJson[];
|
|
132
136
|
port?: number;
|
|
133
137
|
pid?: number;
|
|
134
138
|
logs?: ConsoleEntry[];
|
|
@@ -137,6 +141,7 @@ export interface IPCResponse {
|
|
|
137
141
|
body?: string;
|
|
138
142
|
downloadPath?: string;
|
|
139
143
|
devices?: string[];
|
|
144
|
+
uploadMode?: 'input' | 'drop' | 'chooser';
|
|
140
145
|
}
|
|
141
146
|
export interface ConsoleEntry {
|
|
142
147
|
level: 'log' | 'info' | 'warn' | 'error';
|
|
@@ -160,6 +165,13 @@ export interface NetworkRequest {
|
|
|
160
165
|
mimeType?: string;
|
|
161
166
|
timestamp: number;
|
|
162
167
|
}
|
|
168
|
+
export interface RefNodeJson {
|
|
169
|
+
ref: number;
|
|
170
|
+
role: string;
|
|
171
|
+
name: string;
|
|
172
|
+
attrs: string[];
|
|
173
|
+
editor?: string;
|
|
174
|
+
}
|
|
163
175
|
export interface DeviceDescriptor {
|
|
164
176
|
width: number;
|
|
165
177
|
height: number;
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import type { CDPClient } from './cdp.js';
|
|
2
|
+
import { type RefNode } from './refs.js';
|
|
3
|
+
/**
|
|
4
|
+
* File upload strategies for `agents browser upload`.
|
|
5
|
+
*
|
|
6
|
+
* Every uploader on the web is one of three patterns:
|
|
7
|
+
*
|
|
8
|
+
* A. Direct file input — the page exposes (or hides) `<input type=file>`.
|
|
9
|
+
* `DOM.setFileInputFiles` plants the paths directly. The cleanest
|
|
10
|
+
* path; works whenever the input is in the DOM, even when CSS-hidden
|
|
11
|
+
* or visually offscreen.
|
|
12
|
+
*
|
|
13
|
+
* B. Drag-drop target — the page listens for `drop` events on a region
|
|
14
|
+
* (Canva, Notion, Linear, GitHub PRs). We dispatch synthetic
|
|
15
|
+
* `dragenter`/`dragover`/`drop` events with a `DataTransfer` whose
|
|
16
|
+
* `files` list carries a `File` built from disk bytes. The dispatch
|
|
17
|
+
* uses real elementFromPoint coordinates so React/DOM listeners fire.
|
|
18
|
+
*
|
|
19
|
+
* C. Native chooser interception — the user clicks a button that calls
|
|
20
|
+
* `input.click()` and the only file input is dynamically created in
|
|
21
|
+
* response. `Page.setInterceptFileChooserDialog` flips the chooser
|
|
22
|
+
* from a blocking OS dialog into a CDP event; we click the trigger,
|
|
23
|
+
* wait for `Page.fileChooserOpened`, then satisfy it with
|
|
24
|
+
* `Page.handleFileChooser({ action: 'accept', files })`. Lifecycle:
|
|
25
|
+
* enable interception -> click -> wait -> accept -> disable.
|
|
26
|
+
*/
|
|
27
|
+
export interface UploadOptions {
|
|
28
|
+
files: string[];
|
|
29
|
+
}
|
|
30
|
+
/** Pattern A — direct file input via `DOM.setFileInputFiles`. */
|
|
31
|
+
export declare function uploadToFileInput(cdp: CDPClient, sessionId: string, backendNodeId: number, files: string[]): Promise<void>;
|
|
32
|
+
/** Pattern B — synthetic drag-drop onto a target node. */
|
|
33
|
+
export declare function uploadToDropTarget(cdp: CDPClient, sessionId: string, backendNodeId: number, files: string[]): Promise<void>;
|
|
34
|
+
/**
|
|
35
|
+
* Pattern C — click a trigger, intercept the OS file chooser, feed files.
|
|
36
|
+
*
|
|
37
|
+
* `Page.setInterceptFileChooserDialog` must be enabled before the click; the
|
|
38
|
+
* chooser only fires once, so we register a single-shot handler ahead of time.
|
|
39
|
+
* Auto-attached child sessions matter here: the chooser event arrives on the
|
|
40
|
+
* session whose page hosts the input, which is the same session we used for
|
|
41
|
+
* the click — so we filter event params by sessionId.
|
|
42
|
+
*/
|
|
43
|
+
export declare function uploadViaFileChooser(cdp: CDPClient, sessionId: string, triggerRef: {
|
|
44
|
+
node: RefNode;
|
|
45
|
+
nodeMap: Map<number, RefNode>;
|
|
46
|
+
}, files: string[], timeoutMs?: number): Promise<void>;
|
|
47
|
+
export declare function mimeFromExt(filePath: string): string;
|
|
48
|
+
/**
|
|
49
|
+
* Inspect a ref's DOM node to decide which pattern fits when the caller
|
|
50
|
+
* didn't specify. Returns 'input' if the node is `<input type=file>`,
|
|
51
|
+
* otherwise 'drop'. Chooser interception (Pattern C) is never auto-selected
|
|
52
|
+
* because it requires clicking the ref, which mutates page state — opt-in only.
|
|
53
|
+
*/
|
|
54
|
+
export declare function detectUploadPattern(cdp: CDPClient, sessionId: string, backendNodeId: number): Promise<'input' | 'drop'>;
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
import * as fs from 'fs';
|
|
2
|
+
import * as path from 'path';
|
|
3
|
+
import { clickAtCoords } from './input.js';
|
|
4
|
+
import { resolveRefToCoords } from './refs.js';
|
|
5
|
+
const RESOLVE_FILE_INPUT_FN = `(function() {
|
|
6
|
+
const start = this;
|
|
7
|
+
// Walk multiple paths to find an <input type=file>:
|
|
8
|
+
// 1. The node itself (when the AX backend node IS the input).
|
|
9
|
+
// 2. Its ancestors via parentElement (custom button wrappers).
|
|
10
|
+
// 3. closest('input[type=file]') (handles label/span/button inside or
|
|
11
|
+
// near a file input).
|
|
12
|
+
// 4. Across user-agent shadow boundaries via getRootNode().host
|
|
13
|
+
// (Chromium's internal shadow-button pseudo-element for file inputs).
|
|
14
|
+
// 5. If the AX backend node is associated with a <label for=...>, follow
|
|
15
|
+
// the htmlFor relationship.
|
|
16
|
+
// 6. Last resort: if the start node was a click target that fires
|
|
17
|
+
// input.click() inside a click handler, fall back to the unique
|
|
18
|
+
// <input type=file> on the page (when there is exactly one).
|
|
19
|
+
let el = start;
|
|
20
|
+
for (let i = 0; i < 8 && el; i++) {
|
|
21
|
+
if (el.tagName === 'INPUT' && el.type === 'file') return el;
|
|
22
|
+
if (el.closest) {
|
|
23
|
+
const found = el.closest('input[type=file]');
|
|
24
|
+
if (found) return found;
|
|
25
|
+
}
|
|
26
|
+
if (el.tagName === 'LABEL' && el.htmlFor) {
|
|
27
|
+
const t = document.getElementById(el.htmlFor);
|
|
28
|
+
if (t && t.tagName === 'INPUT' && t.type === 'file') return t;
|
|
29
|
+
}
|
|
30
|
+
const root = el.getRootNode && el.getRootNode();
|
|
31
|
+
if (root && root.host && root !== document) {
|
|
32
|
+
el = root.host;
|
|
33
|
+
continue;
|
|
34
|
+
}
|
|
35
|
+
el = el.parentElement;
|
|
36
|
+
}
|
|
37
|
+
// Final fallback: if exactly one file input exists on the page, use it.
|
|
38
|
+
// This handles cases where the AX tree exposes the input as an internal
|
|
39
|
+
// pseudo-button whose parentElement is null. A page with a single uploader
|
|
40
|
+
// (Slack composer, Notion image block, Canva ingredient) hits this branch.
|
|
41
|
+
const all = document.querySelectorAll('input[type=file]');
|
|
42
|
+
if (all.length === 1) return all[0];
|
|
43
|
+
return null;
|
|
44
|
+
})`;
|
|
45
|
+
/** Pattern A — direct file input via `DOM.setFileInputFiles`. */
|
|
46
|
+
export async function uploadToFileInput(cdp, sessionId, backendNodeId, files) {
|
|
47
|
+
validateFiles(files);
|
|
48
|
+
const resolvedId = await resolveActualFileInput(cdp, sessionId, backendNodeId);
|
|
49
|
+
await cdp.send('DOM.setFileInputFiles', { files, backendNodeId: resolvedId }, sessionId);
|
|
50
|
+
}
|
|
51
|
+
async function resolveActualFileInput(cdp, sessionId, backendNodeId) {
|
|
52
|
+
const { object } = await cdp.send('DOM.resolveNode', { backendNodeId }, sessionId);
|
|
53
|
+
if (!object.objectId)
|
|
54
|
+
return backendNodeId;
|
|
55
|
+
const objectId = object.objectId;
|
|
56
|
+
try {
|
|
57
|
+
const { result } = await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: RESOLVE_FILE_INPUT_FN, returnByValue: false }, sessionId);
|
|
58
|
+
if (!result.objectId) {
|
|
59
|
+
throw new Error('Ref is not (and is not contained in) an <input type=file>');
|
|
60
|
+
}
|
|
61
|
+
const inputObjectId = result.objectId;
|
|
62
|
+
try {
|
|
63
|
+
const { node } = await cdp.send('DOM.describeNode', { objectId: inputObjectId }, sessionId);
|
|
64
|
+
return node.backendNodeId;
|
|
65
|
+
}
|
|
66
|
+
finally {
|
|
67
|
+
await cdp.send('Runtime.releaseObject', { objectId: inputObjectId }, sessionId);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
finally {
|
|
71
|
+
await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
const DRAG_DROP_FN = `(function(files) {
|
|
75
|
+
const el = this;
|
|
76
|
+
const rect = el.getBoundingClientRect();
|
|
77
|
+
const x = rect.left + rect.width / 2;
|
|
78
|
+
const y = rect.top + rect.height / 2;
|
|
79
|
+
const dt = new DataTransfer();
|
|
80
|
+
for (const f of files) {
|
|
81
|
+
const u8 = Uint8Array.from(atob(f.bytes), c => c.charCodeAt(0));
|
|
82
|
+
const blob = new Blob([u8], { type: f.type || 'application/octet-stream' });
|
|
83
|
+
const file = new File([blob], f.name, { type: f.type || 'application/octet-stream' });
|
|
84
|
+
dt.items.add(file);
|
|
85
|
+
}
|
|
86
|
+
function dispatch(type) {
|
|
87
|
+
// Chromium does not honor the dataTransfer field in DragEventInit for
|
|
88
|
+
// synthetic events. Build the DragEvent with no dataTransfer in the init,
|
|
89
|
+
// then override the event dataTransfer getter via defineProperty so
|
|
90
|
+
// page-level listeners see the File list we constructed.
|
|
91
|
+
const ev = new DragEvent(type, {
|
|
92
|
+
bubbles: true,
|
|
93
|
+
cancelable: true,
|
|
94
|
+
composed: true,
|
|
95
|
+
clientX: x,
|
|
96
|
+
clientY: y,
|
|
97
|
+
});
|
|
98
|
+
Object.defineProperty(ev, 'dataTransfer', { value: dt });
|
|
99
|
+
el.dispatchEvent(ev);
|
|
100
|
+
}
|
|
101
|
+
dispatch('dragenter');
|
|
102
|
+
dispatch('dragover');
|
|
103
|
+
dispatch('drop');
|
|
104
|
+
return { dispatched: 3, files: files.length };
|
|
105
|
+
})`;
|
|
106
|
+
/** Pattern B — synthetic drag-drop onto a target node. */
|
|
107
|
+
export async function uploadToDropTarget(cdp, sessionId, backendNodeId, files) {
|
|
108
|
+
validateFiles(files);
|
|
109
|
+
const payload = files.map((p) => ({
|
|
110
|
+
name: path.basename(p),
|
|
111
|
+
type: mimeFromExt(p),
|
|
112
|
+
bytes: fs.readFileSync(p).toString('base64'),
|
|
113
|
+
}));
|
|
114
|
+
const { object } = await cdp.send('DOM.resolveNode', { backendNodeId }, sessionId);
|
|
115
|
+
if (!object.objectId) {
|
|
116
|
+
throw new Error('Drop target node could not be resolved');
|
|
117
|
+
}
|
|
118
|
+
const objectId = object.objectId;
|
|
119
|
+
try {
|
|
120
|
+
const r = await cdp.send('Runtime.callFunctionOn', {
|
|
121
|
+
objectId,
|
|
122
|
+
functionDeclaration: DRAG_DROP_FN,
|
|
123
|
+
arguments: [{ value: payload }],
|
|
124
|
+
returnByValue: true,
|
|
125
|
+
awaitPromise: true,
|
|
126
|
+
}, sessionId);
|
|
127
|
+
if (r.exceptionDetails) {
|
|
128
|
+
throw new Error('Drop dispatch threw: ' + JSON.stringify(r.exceptionDetails));
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
finally {
|
|
132
|
+
await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* Pattern C — click a trigger, intercept the OS file chooser, feed files.
|
|
137
|
+
*
|
|
138
|
+
* `Page.setInterceptFileChooserDialog` must be enabled before the click; the
|
|
139
|
+
* chooser only fires once, so we register a single-shot handler ahead of time.
|
|
140
|
+
* Auto-attached child sessions matter here: the chooser event arrives on the
|
|
141
|
+
* session whose page hosts the input, which is the same session we used for
|
|
142
|
+
* the click — so we filter event params by sessionId.
|
|
143
|
+
*/
|
|
144
|
+
export async function uploadViaFileChooser(cdp, sessionId, triggerRef, files, timeoutMs = 5000) {
|
|
145
|
+
validateFiles(files);
|
|
146
|
+
await cdp.send('Page.enable', {}, sessionId);
|
|
147
|
+
await cdp.send('Page.setInterceptFileChooserDialog', { enabled: true }, sessionId);
|
|
148
|
+
let opened = null;
|
|
149
|
+
let resolve;
|
|
150
|
+
let reject;
|
|
151
|
+
const wait = new Promise((res, rej) => {
|
|
152
|
+
resolve = res;
|
|
153
|
+
reject = rej;
|
|
154
|
+
});
|
|
155
|
+
const handler = (params) => {
|
|
156
|
+
const ev = params;
|
|
157
|
+
if (typeof ev.backendNodeId === 'number') {
|
|
158
|
+
opened = { backendNodeId: ev.backendNodeId };
|
|
159
|
+
resolve();
|
|
160
|
+
}
|
|
161
|
+
};
|
|
162
|
+
cdp.on('Page.fileChooserOpened', handler);
|
|
163
|
+
const timer = setTimeout(() => {
|
|
164
|
+
reject(new Error(`File chooser did not open within ${timeoutMs}ms — is the trigger ref correct?`));
|
|
165
|
+
}, timeoutMs);
|
|
166
|
+
try {
|
|
167
|
+
const { x, y } = await resolveRefToCoords(cdp, sessionId, triggerRef.nodeMap, triggerRef.node.ref);
|
|
168
|
+
await clickAtCoords(cdp, sessionId, x, y);
|
|
169
|
+
await wait;
|
|
170
|
+
await cdp.send('Page.handleFileChooser', { action: 'accept', files }, sessionId);
|
|
171
|
+
// Some Chromium builds expect setFileInputFiles instead of handleFileChooser.
|
|
172
|
+
// We try handleFileChooser first because it's the documented path for
|
|
173
|
+
// intercepted dialogs; if Chromium rejects it (older protocol), fall back
|
|
174
|
+
// to setFileInputFiles using the backendNodeId from the event.
|
|
175
|
+
}
|
|
176
|
+
catch (err) {
|
|
177
|
+
if (opened && err instanceof Error && /not supported|not found|Method/i.test(err.message)) {
|
|
178
|
+
await cdp.send('DOM.setFileInputFiles', { files, backendNodeId: opened.backendNodeId }, sessionId);
|
|
179
|
+
}
|
|
180
|
+
else {
|
|
181
|
+
throw err;
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
finally {
|
|
185
|
+
clearTimeout(timer);
|
|
186
|
+
cdp.off('Page.fileChooserOpened', handler);
|
|
187
|
+
await cdp.send('Page.setInterceptFileChooserDialog', { enabled: false }, sessionId).catch(() => { });
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
function validateFiles(files) {
|
|
191
|
+
if (!files || files.length === 0) {
|
|
192
|
+
throw new Error('At least one file path is required');
|
|
193
|
+
}
|
|
194
|
+
for (const f of files) {
|
|
195
|
+
if (!path.isAbsolute(f)) {
|
|
196
|
+
throw new Error(`Upload path must be absolute: ${f}`);
|
|
197
|
+
}
|
|
198
|
+
if (!fs.existsSync(f)) {
|
|
199
|
+
throw new Error(`File not found: ${f}`);
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
const MIME_BY_EXT = {
|
|
204
|
+
'.png': 'image/png',
|
|
205
|
+
'.jpg': 'image/jpeg',
|
|
206
|
+
'.jpeg': 'image/jpeg',
|
|
207
|
+
'.gif': 'image/gif',
|
|
208
|
+
'.webp': 'image/webp',
|
|
209
|
+
'.svg': 'image/svg+xml',
|
|
210
|
+
'.pdf': 'application/pdf',
|
|
211
|
+
'.txt': 'text/plain',
|
|
212
|
+
'.csv': 'text/csv',
|
|
213
|
+
'.json': 'application/json',
|
|
214
|
+
'.zip': 'application/zip',
|
|
215
|
+
'.mp4': 'video/mp4',
|
|
216
|
+
'.mov': 'video/quicktime',
|
|
217
|
+
'.mp3': 'audio/mpeg',
|
|
218
|
+
'.wav': 'audio/wav',
|
|
219
|
+
};
|
|
220
|
+
export function mimeFromExt(filePath) {
|
|
221
|
+
const ext = path.extname(filePath).toLowerCase();
|
|
222
|
+
return MIME_BY_EXT[ext] ?? 'application/octet-stream';
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Inspect a ref's DOM node to decide which pattern fits when the caller
|
|
226
|
+
* didn't specify. Returns 'input' if the node is `<input type=file>`,
|
|
227
|
+
* otherwise 'drop'. Chooser interception (Pattern C) is never auto-selected
|
|
228
|
+
* because it requires clicking the ref, which mutates page state — opt-in only.
|
|
229
|
+
*/
|
|
230
|
+
export async function detectUploadPattern(cdp, sessionId, backendNodeId) {
|
|
231
|
+
const { node } = await cdp.send('DOM.describeNode', { backendNodeId, depth: 0 }, sessionId);
|
|
232
|
+
if (isFileInputNode(node))
|
|
233
|
+
return 'input';
|
|
234
|
+
// The node itself isn't <input type=file>, but it might be a button or
|
|
235
|
+
// shadow-DOM descendant *inside* one — that's what the accessibility tree
|
|
236
|
+
// surfaces for file inputs. Walk up to confirm before falling back to drop.
|
|
237
|
+
const { object } = await cdp.send('DOM.resolveNode', { backendNodeId }, sessionId);
|
|
238
|
+
if (!object.objectId)
|
|
239
|
+
return 'drop';
|
|
240
|
+
const objectId = object.objectId;
|
|
241
|
+
try {
|
|
242
|
+
const { result } = await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: RESOLVE_FILE_INPUT_FN, returnByValue: false }, sessionId);
|
|
243
|
+
if (result.objectId) {
|
|
244
|
+
await cdp.send('Runtime.releaseObject', { objectId: result.objectId }, sessionId);
|
|
245
|
+
return 'input';
|
|
246
|
+
}
|
|
247
|
+
return 'drop';
|
|
248
|
+
}
|
|
249
|
+
finally {
|
|
250
|
+
await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
function isFileInputNode(node) {
|
|
254
|
+
const tag = (node.nodeName ?? '').toLowerCase();
|
|
255
|
+
if (tag !== 'input')
|
|
256
|
+
return false;
|
|
257
|
+
const attrs = node.attributes ?? [];
|
|
258
|
+
for (let i = 0; i < attrs.length; i += 2) {
|
|
259
|
+
if (attrs[i] === 'type' && attrs[i + 1]?.toLowerCase() === 'file') {
|
|
260
|
+
return true;
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
return false;
|
|
264
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@phnx-labs/agents-cli",
|
|
3
|
-
"version": "1.17.
|
|
3
|
+
"version": "1.17.5",
|
|
4
4
|
"description": "One CLI for all your AI coding agents - versions, config, cloud dispatch, sessions, and teams",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -87,6 +87,7 @@
|
|
|
87
87
|
"@types/diff": "^6.0.0",
|
|
88
88
|
"@types/marked-terminal": "^6.1.1",
|
|
89
89
|
"@types/node": "^22.0.0",
|
|
90
|
+
"playwright": "^1.44.0",
|
|
90
91
|
"tsx": "^4.19.0",
|
|
91
92
|
"typescript": "^5.5.0",
|
|
92
93
|
"vitest": "^2.0.0"
|