@phnx-labs/agents-cli 1.17.3 → 1.17.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/dist/commands/browser.js +13 -1
- package/dist/lib/browser/editor.d.ts +3 -0
- package/dist/lib/browser/editor.js +50 -0
- package/dist/lib/browser/input.js +42 -5
- package/dist/lib/browser/ipc.js +9 -3
- package/dist/lib/browser/refs.d.ts +1 -0
- package/dist/lib/browser/refs.js +36 -2
- package/dist/lib/browser/service.d.ts +1 -1
- package/dist/lib/browser/service.js +10 -4
- package/dist/lib/browser/types.d.ts +8 -0
- package/package.json +2 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 1.17.4
|
|
4
|
+
|
|
5
|
+
**Browser**
|
|
6
|
+
|
|
7
|
+
- `agents browser type` now detects rich-text editor frameworks (Lexical, ProseMirror, Slate, Draft.js, Quill, CKEditor5, Trix) by walking up to 5 ancestor levels from each textbox and tagging refs with `[editor=<framework>]`. Editor-tagged refs route through the WHATWG `beforeinput` dispatch (`InputEvent('beforeinput', { inputType: 'insertText', ... })`) for Lexical/ProseMirror/Slate/Quill/CKEditor5/Draft and `el.editor.insertString()` for Trix. `agents browser refs --json` surfaces the new `editor` field, and `type --clear` prepends a select-all + `deleteContentBackward` dispatch before inserting.
|
|
8
|
+
- Plain-input reliability also improved: `typeText` now issues a single CDP `Input.insertText` instead of per-character `dispatchKeyEvent`, so framework-controlled inputs (React, Vue, Solid, MUI/Chakra/Mantine `TextField`, masked-number fields, Canva-style pickers) actually receive `beforeinput`/`input`/`textInput` events. `focusNode` falls back to the first focusable descendant when `DOM.focus` throws "Element is not focusable" — fixes wrapper-ref UIs like Slack composer, Linear comments, Notion blocks, and every MUI/Chakra/Mantine `TextField`. ([#12](https://github.com/phnx-labs/agents-cli/pull/12))
|
|
9
|
+
|
|
3
10
|
## 1.17.3
|
|
4
11
|
|
|
5
12
|
**Browser**
|
package/dist/commands/browser.js
CHANGED
|
@@ -754,6 +754,7 @@ function registerTaskCommands(browser) {
|
|
|
754
754
|
.option('-t, --tab <tabId>', 'Tab ID (defaults to current)')
|
|
755
755
|
.option('--all', 'Include non-interactive elements')
|
|
756
756
|
.option('-l, --limit <n>', 'Max elements (default 500)', '500')
|
|
757
|
+
.option('--json', 'Output machine-readable JSON')
|
|
757
758
|
.action(async (task, opts) => {
|
|
758
759
|
const response = await sendIPCRequest({
|
|
759
760
|
action: 'refs',
|
|
@@ -763,9 +764,18 @@ function registerTaskCommands(browser) {
|
|
|
763
764
|
limit: parseInt(opts.limit, 10),
|
|
764
765
|
});
|
|
765
766
|
if (!response.ok) {
|
|
766
|
-
|
|
767
|
+
if (opts.json) {
|
|
768
|
+
console.log(JSON.stringify({ ok: false, error: response.error }));
|
|
769
|
+
}
|
|
770
|
+
else {
|
|
771
|
+
console.error(response.error);
|
|
772
|
+
}
|
|
767
773
|
process.exit(1);
|
|
768
774
|
}
|
|
775
|
+
if (opts.json) {
|
|
776
|
+
console.log(JSON.stringify(response.nodes ?? [], null, 2));
|
|
777
|
+
return;
|
|
778
|
+
}
|
|
769
779
|
console.log(response.refs);
|
|
770
780
|
});
|
|
771
781
|
browser
|
|
@@ -789,6 +799,7 @@ function registerTaskCommands(browser) {
|
|
|
789
799
|
.command('type <task> <ref> <text>')
|
|
790
800
|
.description('Type text into an element by ref')
|
|
791
801
|
.option('-t, --tab <tabId>', 'Tab ID (defaults to current)')
|
|
802
|
+
.option('--clear', 'Clear editor content before typing')
|
|
792
803
|
.action(async (task, ref, text, opts) => {
|
|
793
804
|
const response = await sendIPCRequest({
|
|
794
805
|
action: 'type',
|
|
@@ -796,6 +807,7 @@ function registerTaskCommands(browser) {
|
|
|
796
807
|
tabId: opts.tab,
|
|
797
808
|
ref: parseInt(ref, 10),
|
|
798
809
|
text,
|
|
810
|
+
clear: opts.clear,
|
|
799
811
|
});
|
|
800
812
|
if (!response.ok) {
|
|
801
813
|
console.error(response.error);
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
const BEFOREINPUT_INSERT_FN = `(function(text) {
|
|
2
|
+
this.focus();
|
|
3
|
+
var sel = window.getSelection();
|
|
4
|
+
var range = document.createRange();
|
|
5
|
+
range.selectNodeContents(this);
|
|
6
|
+
range.collapse(false);
|
|
7
|
+
sel.removeAllRanges();
|
|
8
|
+
sel.addRange(range);
|
|
9
|
+
this.dispatchEvent(new InputEvent('beforeinput', {
|
|
10
|
+
inputType: 'insertText',
|
|
11
|
+
data: text,
|
|
12
|
+
bubbles: true,
|
|
13
|
+
cancelable: true,
|
|
14
|
+
composed: true,
|
|
15
|
+
}));
|
|
16
|
+
})`;
|
|
17
|
+
const BEFOREINPUT_CLEAR_FN = `(function() {
|
|
18
|
+
this.focus();
|
|
19
|
+
var sel = window.getSelection();
|
|
20
|
+
var range = document.createRange();
|
|
21
|
+
range.selectNodeContents(this);
|
|
22
|
+
sel.removeAllRanges();
|
|
23
|
+
sel.addRange(range);
|
|
24
|
+
this.dispatchEvent(new InputEvent('beforeinput', {
|
|
25
|
+
inputType: 'deleteContentBackward',
|
|
26
|
+
bubbles: true,
|
|
27
|
+
cancelable: true,
|
|
28
|
+
composed: true,
|
|
29
|
+
}));
|
|
30
|
+
})`;
|
|
31
|
+
const TRIX_INSERT_FN = `(function(text) { this.editor.insertString(text); })`;
|
|
32
|
+
export async function typeEditorText(cdp, sessionId, node, text, clear = false) {
|
|
33
|
+
const { object } = await cdp.send('DOM.resolveNode', { backendNodeId: node.backendNodeId }, sessionId);
|
|
34
|
+
if (!object.objectId)
|
|
35
|
+
throw new Error(`Could not resolve DOM node for ref ${node.ref}`);
|
|
36
|
+
const objectId = object.objectId;
|
|
37
|
+
try {
|
|
38
|
+
if (node.editor === 'trix') {
|
|
39
|
+
await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: TRIX_INSERT_FN, arguments: [{ value: text }], returnByValue: true }, sessionId);
|
|
40
|
+
return;
|
|
41
|
+
}
|
|
42
|
+
if (clear) {
|
|
43
|
+
await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: BEFOREINPUT_CLEAR_FN, arguments: [], returnByValue: true }, sessionId);
|
|
44
|
+
}
|
|
45
|
+
await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: BEFOREINPUT_INSERT_FN, arguments: [{ value: text }], returnByValue: true }, sessionId);
|
|
46
|
+
}
|
|
47
|
+
finally {
|
|
48
|
+
await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
|
|
49
|
+
}
|
|
50
|
+
}
|
|
@@ -8,11 +8,13 @@ export async function hoverAtCoords(cdp, sessionId, x, y) {
|
|
|
8
8
|
export async function scrollAtCoords(cdp, sessionId, x, y, deltaX, deltaY) {
|
|
9
9
|
await cdp.send('Input.dispatchMouseEvent', { type: 'mouseWheel', x, y, deltaX, deltaY }, sessionId);
|
|
10
10
|
}
|
|
11
|
+
// `Input.insertText` is the CDP native text-insertion method. It dispatches a
|
|
12
|
+
// real `beforeinput`/`input`/`textInput` sequence on the focused element, which
|
|
13
|
+
// is what framework-controlled inputs (React, Vue, Solid, contenteditable
|
|
14
|
+
// editors) actually listen for. Per-character `dispatchKeyEvent` only fires
|
|
15
|
+
// `keydown`/`keyup` with no input event, so controlled inputs ignore it.
|
|
11
16
|
export async function typeText(cdp, sessionId, text) {
|
|
12
|
-
|
|
13
|
-
await cdp.send('Input.dispatchKeyEvent', { type: 'keyDown', text: char }, sessionId);
|
|
14
|
-
await cdp.send('Input.dispatchKeyEvent', { type: 'keyUp', text: char }, sessionId);
|
|
15
|
-
}
|
|
17
|
+
await cdp.send('Input.insertText', { text }, sessionId);
|
|
16
18
|
}
|
|
17
19
|
const KEY_CODES = {
|
|
18
20
|
Enter: { key: 'Enter', code: 'Enter', keyCode: 13 },
|
|
@@ -50,6 +52,41 @@ export async function pressKey(cdp, sessionId, keyName) {
|
|
|
50
52
|
nativeVirtualKeyCode: keyInfo.keyCode,
|
|
51
53
|
}, sessionId);
|
|
52
54
|
}
|
|
55
|
+
const FOCUS_DESCENDANT_FN = `(function() {
|
|
56
|
+
const selector = 'input:not([disabled]):not([type=hidden]),textarea:not([disabled]),select:not([disabled]),[contenteditable=""],[contenteditable=true],[tabindex]:not([tabindex="-1"])';
|
|
57
|
+
const candidates = this.querySelectorAll(selector);
|
|
58
|
+
for (const el of candidates) {
|
|
59
|
+
el.focus();
|
|
60
|
+
if (document.activeElement === el) return true;
|
|
61
|
+
}
|
|
62
|
+
return false;
|
|
63
|
+
})`;
|
|
64
|
+
// `DOM.focus` only works on natively focusable elements. UIs that wrap real
|
|
65
|
+
// inputs in styled containers (Slack composer, Linear comments, Notion blocks,
|
|
66
|
+
// Canva pickers, MUI/Chakra/Mantine TextField) often expose the wrapper as the
|
|
67
|
+
// accessible "ref" — focusing it throws "Element is not focusable". When that
|
|
68
|
+
// happens, walk the subtree for the first focusable descendant.
|
|
53
69
|
export async function focusNode(cdp, sessionId, backendNodeId) {
|
|
54
|
-
|
|
70
|
+
try {
|
|
71
|
+
await cdp.send('DOM.focus', { backendNodeId }, sessionId);
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
catch (err) {
|
|
75
|
+
const focused = await focusFirstFocusableDescendant(cdp, sessionId, backendNodeId);
|
|
76
|
+
if (!focused)
|
|
77
|
+
throw err;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
async function focusFirstFocusableDescendant(cdp, sessionId, backendNodeId) {
|
|
81
|
+
const { object } = await cdp.send('DOM.resolveNode', { backendNodeId }, sessionId);
|
|
82
|
+
if (!object.objectId)
|
|
83
|
+
return false;
|
|
84
|
+
const objectId = object.objectId;
|
|
85
|
+
try {
|
|
86
|
+
const { result } = await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: FOCUS_DESCENDANT_FN, returnByValue: true }, sessionId);
|
|
87
|
+
return result.value === true;
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
|
|
91
|
+
}
|
|
55
92
|
}
|
package/dist/lib/browser/ipc.js
CHANGED
|
@@ -164,11 +164,17 @@ export class BrowserIPCServer {
|
|
|
164
164
|
if (!request.task) {
|
|
165
165
|
return { ok: false, error: 'Task required' };
|
|
166
166
|
}
|
|
167
|
-
const { refs } = await this.service.refs(request.task, request.tabId, {
|
|
167
|
+
const { refs, nodeMap } = await this.service.refs(request.task, request.tabId, {
|
|
168
168
|
interactive: request.interactive ?? true,
|
|
169
169
|
limit: request.limit ?? 500,
|
|
170
170
|
});
|
|
171
|
-
|
|
171
|
+
const nodes = Array.from(nodeMap.values()).map(n => {
|
|
172
|
+
const entry = { ref: n.ref, role: n.role, name: n.name, attrs: n.attrs };
|
|
173
|
+
if (n.editor !== undefined)
|
|
174
|
+
entry.editor = n.editor;
|
|
175
|
+
return entry;
|
|
176
|
+
});
|
|
177
|
+
return { ok: true, refs, nodes };
|
|
172
178
|
}
|
|
173
179
|
case 'click': {
|
|
174
180
|
if (!request.task || request.ref === undefined) {
|
|
@@ -181,7 +187,7 @@ export class BrowserIPCServer {
|
|
|
181
187
|
if (!request.task || request.ref === undefined || !request.text) {
|
|
182
188
|
return { ok: false, error: 'Task, ref, and text required' };
|
|
183
189
|
}
|
|
184
|
-
await this.service.type(request.task, request.ref, request.text, request.tabId);
|
|
190
|
+
await this.service.type(request.task, request.ref, request.text, request.tabId, request.clear);
|
|
185
191
|
return { ok: true };
|
|
186
192
|
}
|
|
187
193
|
case 'press': {
|
package/dist/lib/browser/refs.js
CHANGED
|
@@ -1,3 +1,31 @@
|
|
|
1
|
+
const EDITOR_DETECT_FN = `(function() {
|
|
2
|
+
let el = this;
|
|
3
|
+
for (let i = 0; i < 5; i++) {
|
|
4
|
+
if (!el || el === document.documentElement) break;
|
|
5
|
+
if (el.hasAttribute && el.hasAttribute('data-lexical-editor')) return 'lexical';
|
|
6
|
+
if (el.classList && el.classList.contains('ProseMirror')) return 'prosemirror';
|
|
7
|
+
if (el.hasAttribute && el.hasAttribute('data-slate-editor')) return 'slate';
|
|
8
|
+
if (el.classList && Array.from(el.classList).some(function(c) { return /^DraftEditor-/.test(c); })) return 'draft';
|
|
9
|
+
if (el.classList && el.classList.contains('ql-editor')) return 'quill';
|
|
10
|
+
if (el.classList && el.classList.contains('ck-editor__editable')) return 'ckeditor5';
|
|
11
|
+
if (el.tagName === 'TRIX-EDITOR') return 'trix';
|
|
12
|
+
el = el.parentElement;
|
|
13
|
+
}
|
|
14
|
+
return null;
|
|
15
|
+
})`;
|
|
16
|
+
async function detectEditorForNode(cdp, sessionId, backendNodeId) {
|
|
17
|
+
const { object } = await cdp.send('DOM.resolveNode', { backendNodeId }, sessionId);
|
|
18
|
+
if (!object.objectId)
|
|
19
|
+
return undefined;
|
|
20
|
+
const objectId = object.objectId;
|
|
21
|
+
try {
|
|
22
|
+
const { result } = await cdp.send('Runtime.callFunctionOn', { objectId, functionDeclaration: EDITOR_DETECT_FN, returnByValue: true }, sessionId);
|
|
23
|
+
return result.value ?? undefined;
|
|
24
|
+
}
|
|
25
|
+
finally {
|
|
26
|
+
await cdp.send('Runtime.releaseObject', { objectId }, sessionId);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
1
29
|
const INTERACTIVE_ROLES = new Set([
|
|
2
30
|
'button',
|
|
3
31
|
'link',
|
|
@@ -59,12 +87,18 @@ export async function getRefs(cdp, sessionId, opts = {}) {
|
|
|
59
87
|
attrs,
|
|
60
88
|
backendNodeId: node.backendDOMNodeId,
|
|
61
89
|
};
|
|
90
|
+
if (role === 'textbox' && node.backendDOMNodeId) {
|
|
91
|
+
const editor = await detectEditorForNode(cdp, sessionId, node.backendDOMNodeId);
|
|
92
|
+
if (editor)
|
|
93
|
+
refNode.editor = editor;
|
|
94
|
+
}
|
|
62
95
|
nodeMap.set(ref, refNode);
|
|
63
96
|
const attrStr = attrs.length > 0 ? ` [${attrs.join('] [')}]` : '';
|
|
97
|
+
const editorStr = refNode.editor ? ` [editor=${refNode.editor}]` : '';
|
|
64
98
|
const nameStr = name ? ` "${truncate(name, 50)}"` : '';
|
|
65
99
|
const line = compact
|
|
66
|
-
? `${role}${nameStr} [ref=${ref}]${attrStr}`
|
|
67
|
-
: `- ${role}${nameStr} [ref=${ref}]${attrStr}`;
|
|
100
|
+
? `${role}${nameStr} [ref=${ref}]${attrStr}${editorStr}`
|
|
101
|
+
: `- ${role}${nameStr} [ref=${ref}]${attrStr}${editorStr}`;
|
|
68
102
|
lines.push(line);
|
|
69
103
|
}
|
|
70
104
|
return { refs: lines.join('\n'), nodeMap };
|
|
@@ -99,7 +99,7 @@ export declare class BrowserService {
|
|
|
99
99
|
nodeMap: Map<number, RefNode>;
|
|
100
100
|
}>;
|
|
101
101
|
click(taskId: string, ref: number, tabHint?: string): Promise<void>;
|
|
102
|
-
type(taskId: string, ref: number, text: string, tabHint?: string): Promise<void>;
|
|
102
|
+
type(taskId: string, ref: number, text: string, tabHint?: string, clear?: boolean): Promise<void>;
|
|
103
103
|
press(taskId: string, key: string, tabHint?: string): Promise<void>;
|
|
104
104
|
hover(taskId: string, ref: number, tabHint?: string): Promise<void>;
|
|
105
105
|
scroll(taskId: string, deltaX: number, deltaY: number, atX?: number, atY?: number, tabHint?: string): Promise<void>;
|
|
@@ -8,6 +8,7 @@ import { connectSSH } from './drivers/ssh.js';
|
|
|
8
8
|
import { generateTaskId, generateShortId, generateFunName, } from './types.js';
|
|
9
9
|
import { getRefs, resolveRefToCoords } from './refs.js';
|
|
10
10
|
import { clickAtCoords, hoverAtCoords, scrollAtCoords, typeText, pressKey, focusNode } from './input.js';
|
|
11
|
+
import { typeEditorText } from './editor.js';
|
|
11
12
|
import { emit } from '../events.js';
|
|
12
13
|
/**
|
|
13
14
|
* Parse a `targetFilter` string into its kind + value, or return `null`
|
|
@@ -502,7 +503,7 @@ export class BrowserService {
|
|
|
502
503
|
const { x, y } = await resolveRefToCoords(conn.cdp, sessionId, nodeMap, ref);
|
|
503
504
|
await clickAtCoords(conn.cdp, sessionId, x, y);
|
|
504
505
|
}
|
|
505
|
-
async type(taskId, ref, text, tabHint) {
|
|
506
|
+
async type(taskId, ref, text, tabHint, clear) {
|
|
506
507
|
const { conn, task } = await this.findTask(taskId);
|
|
507
508
|
const shortId = tabHint ? await this.resolveTabHint(conn, task, tabHint) : this.resolveCurrentTab(task);
|
|
508
509
|
const cdpTargetId = this.getCdpTargetId(task, shortId);
|
|
@@ -514,10 +515,15 @@ export class BrowserService {
|
|
|
514
515
|
const node = nodeMap.get(ref);
|
|
515
516
|
if (!node)
|
|
516
517
|
throw new Error(`Ref ${ref} not found`);
|
|
517
|
-
if (node.
|
|
518
|
-
await
|
|
518
|
+
if (node.editor) {
|
|
519
|
+
await typeEditorText(conn.cdp, sessionId, node, text, clear);
|
|
520
|
+
}
|
|
521
|
+
else {
|
|
522
|
+
if (node.backendNodeId) {
|
|
523
|
+
await focusNode(conn.cdp, sessionId, node.backendNodeId);
|
|
524
|
+
}
|
|
525
|
+
await typeText(conn.cdp, sessionId, text);
|
|
519
526
|
}
|
|
520
|
-
await typeText(conn.cdp, sessionId, text);
|
|
521
527
|
}
|
|
522
528
|
async press(taskId, key, tabHint) {
|
|
523
529
|
const { conn, task } = await this.findTask(taskId);
|
|
@@ -129,6 +129,7 @@ export interface IPCResponse {
|
|
|
129
129
|
result?: unknown;
|
|
130
130
|
path?: string;
|
|
131
131
|
refs?: string;
|
|
132
|
+
nodes?: RefNodeJson[];
|
|
132
133
|
port?: number;
|
|
133
134
|
pid?: number;
|
|
134
135
|
logs?: ConsoleEntry[];
|
|
@@ -160,6 +161,13 @@ export interface NetworkRequest {
|
|
|
160
161
|
mimeType?: string;
|
|
161
162
|
timestamp: number;
|
|
162
163
|
}
|
|
164
|
+
export interface RefNodeJson {
|
|
165
|
+
ref: number;
|
|
166
|
+
role: string;
|
|
167
|
+
name: string;
|
|
168
|
+
attrs: string[];
|
|
169
|
+
editor?: string;
|
|
170
|
+
}
|
|
163
171
|
export interface DeviceDescriptor {
|
|
164
172
|
width: number;
|
|
165
173
|
height: number;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@phnx-labs/agents-cli",
|
|
3
|
-
"version": "1.17.
|
|
3
|
+
"version": "1.17.4",
|
|
4
4
|
"description": "One CLI for all your AI coding agents - versions, config, cloud dispatch, sessions, and teams",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -87,6 +87,7 @@
|
|
|
87
87
|
"@types/diff": "^6.0.0",
|
|
88
88
|
"@types/marked-terminal": "^6.1.1",
|
|
89
89
|
"@types/node": "^22.0.0",
|
|
90
|
+
"playwright": "^1.44.0",
|
|
90
91
|
"tsx": "^4.19.0",
|
|
91
92
|
"typescript": "^5.5.0",
|
|
92
93
|
"vitest": "^2.0.0"
|