@skillful-agents/agent-computer 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/bin/ac-core-darwin-arm64 +0 -0
  2. package/bin/ac-core-darwin-x64 +0 -0
  3. package/bin/ac-core-win32-arm64.exe +0 -0
  4. package/bin/ac-core-win32-x64.exe +0 -0
  5. package/dist/src/platform/resolve.d.ts.map +1 -1
  6. package/dist/src/platform/resolve.js +5 -3
  7. package/dist/src/platform/resolve.js.map +1 -1
  8. package/dist-cjs/bin/ac.js +127 -0
  9. package/dist-cjs/package.json +1 -0
  10. package/dist-cjs/src/bridge.js +693 -0
  11. package/dist-cjs/src/cdp/ax-tree.js +162 -0
  12. package/dist-cjs/src/cdp/bounds.js +66 -0
  13. package/dist-cjs/src/cdp/client.js +272 -0
  14. package/dist-cjs/src/cdp/connection.js +285 -0
  15. package/dist-cjs/src/cdp/diff.js +55 -0
  16. package/dist-cjs/src/cdp/discovery.js +91 -0
  17. package/dist-cjs/src/cdp/index.js +27 -0
  18. package/dist-cjs/src/cdp/interactions.js +301 -0
  19. package/dist-cjs/src/cdp/port-manager.js +68 -0
  20. package/dist-cjs/src/cdp/role-map.js +102 -0
  21. package/dist-cjs/src/cdp/types.js +2 -0
  22. package/dist-cjs/src/cli/commands/apps.js +63 -0
  23. package/dist-cjs/src/cli/commands/batch.js +37 -0
  24. package/dist-cjs/src/cli/commands/click.js +61 -0
  25. package/dist-cjs/src/cli/commands/clipboard.js +31 -0
  26. package/dist-cjs/src/cli/commands/dialog.js +45 -0
  27. package/dist-cjs/src/cli/commands/drag.js +26 -0
  28. package/dist-cjs/src/cli/commands/find.js +99 -0
  29. package/dist-cjs/src/cli/commands/menu.js +36 -0
  30. package/dist-cjs/src/cli/commands/screenshot.js +27 -0
  31. package/dist-cjs/src/cli/commands/scroll.js +77 -0
  32. package/dist-cjs/src/cli/commands/session.js +27 -0
  33. package/dist-cjs/src/cli/commands/snapshot.js +24 -0
  34. package/dist-cjs/src/cli/commands/type.js +69 -0
  35. package/dist-cjs/src/cli/commands/windowmgmt.js +62 -0
  36. package/dist-cjs/src/cli/commands/windows.js +10 -0
  37. package/dist-cjs/src/cli/commands.js +215 -0
  38. package/dist-cjs/src/cli/output.js +253 -0
  39. package/dist-cjs/src/cli/parser.js +128 -0
  40. package/dist-cjs/src/config.js +79 -0
  41. package/dist-cjs/src/daemon.js +183 -0
  42. package/dist-cjs/src/errors.js +118 -0
  43. package/dist-cjs/src/index.js +24 -0
  44. package/dist-cjs/src/platform/index.js +16 -0
  45. package/dist-cjs/src/platform/resolve.js +71 -0
  46. package/dist-cjs/src/refs.js +91 -0
  47. package/dist-cjs/src/sdk.js +288 -0
  48. package/dist-cjs/src/types.js +11 -0
  49. package/package.json +4 -2
  50. package/scripts/fix-cjs-resolve.js +27 -0
@@ -0,0 +1,162 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CDPAXTree = void 0;
4
+ const refs_js_1 = require("../refs.js");
5
+ const role_map_js_1 = require("./role-map.js");
6
+ const MAX_ELEMENTS = 500;
7
+ const MAX_DEPTH = 50;
8
+ class CDPAXTree {
9
+ connection;
10
+ constructor(connection) {
11
+ this.connection = connection;
12
+ }
13
+ async getTree(options = {}) {
14
+ const { depth, interactive = false } = options;
15
+ const params = {};
16
+ if (depth !== undefined) {
17
+ params.depth = depth;
18
+ }
19
+ const result = (await this.connection.send('Accessibility.getFullAXTree', params));
20
+ const nodes = result.nodes;
21
+ if (!nodes || nodes.length === 0) {
22
+ return { elements: [], refMap: new Map() };
23
+ }
24
+ // Build a lookup map: nodeId → CDPAXNode
25
+ const nodeMap = new Map();
26
+ for (const node of nodes) {
27
+ nodeMap.set(node.nodeId, node);
28
+ }
29
+ // Ref counter per prefix
30
+ const refCounters = {};
31
+ const refMap = new Map();
32
+ let totalElements = 0;
33
+ const buildElement = (node, currentDepth) => {
34
+ if (totalElements >= MAX_ELEMENTS)
35
+ return null;
36
+ if (currentDepth > MAX_DEPTH)
37
+ return null;
38
+ // Skip ignored nodes — parent's buildChildren handles grandchild collection
39
+ if (node.ignored) {
40
+ return null;
41
+ }
42
+ const cdpRole = node.role?.value ?? 'generic';
43
+ const mappedRole = (0, role_map_js_1.mapCDPRole)(cdpRole);
44
+ // In interactive mode, check if this node's CDP role qualifies.
45
+ // We still recurse into children of non-interactive nodes.
46
+ if (interactive && !role_map_js_1.INTERACTIVE_ROLES.has(cdpRole)) {
47
+ const children = buildChildren(node, currentDepth);
48
+ // Promote children of non-interactive nodes
49
+ if (children.length === 1)
50
+ return children[0];
51
+ if (children.length > 1) {
52
+ // Wrap in a group so children aren't lost
53
+ const prefix = (0, refs_js_1.roleToPrefix)(mappedRole);
54
+ refCounters[prefix] = (refCounters[prefix] ?? 0) + 1;
55
+ const ref = `@${prefix}${refCounters[prefix]}`;
56
+ totalElements++;
57
+ if (node.backendDOMNodeId !== undefined) {
58
+ refMap.set(ref, {
59
+ nodeId: node.nodeId,
60
+ backendDOMNodeId: node.backendDOMNodeId,
61
+ });
62
+ }
63
+ const element = {
64
+ ref,
65
+ role: mappedRole,
66
+ label: node.name?.value ?? null,
67
+ value: node.value?.value != null ? String(node.value.value) : null,
68
+ enabled: getEnabled(node),
69
+ focused: getFocused(node),
70
+ bounds: [0, 0, 0, 0],
71
+ };
72
+ if (children.length > 0) {
73
+ element.children = children;
74
+ }
75
+ return element;
76
+ }
77
+ return null;
78
+ }
79
+ const prefix = (0, refs_js_1.roleToPrefix)(mappedRole);
80
+ refCounters[prefix] = (refCounters[prefix] ?? 0) + 1;
81
+ const ref = `@${prefix}${refCounters[prefix]}`;
82
+ totalElements++;
83
+ if (node.backendDOMNodeId !== undefined) {
84
+ refMap.set(ref, {
85
+ nodeId: node.nodeId,
86
+ backendDOMNodeId: node.backendDOMNodeId,
87
+ });
88
+ }
89
+ const children = buildChildren(node, currentDepth);
90
+ const element = {
91
+ ref,
92
+ role: mappedRole,
93
+ label: node.name?.value ?? null,
94
+ value: node.value?.value != null ? String(node.value.value) : null,
95
+ enabled: getEnabled(node),
96
+ focused: getFocused(node),
97
+ bounds: [0, 0, 0, 0],
98
+ };
99
+ if (children.length > 0) {
100
+ element.children = children;
101
+ }
102
+ return element;
103
+ };
104
+ const buildChildren = (parent, currentDepth) => {
105
+ if (!parent.childIds || parent.childIds.length === 0)
106
+ return [];
107
+ const children = [];
108
+ for (const childId of parent.childIds) {
109
+ if (totalElements >= MAX_ELEMENTS)
110
+ break;
111
+ const childNode = nodeMap.get(childId);
112
+ if (!childNode)
113
+ continue;
114
+ const el = buildElement(childNode, currentDepth + 1);
115
+ if (el) {
116
+ children.push(el);
117
+ }
118
+ else if (childNode.childIds) {
119
+ // If the child was ignored/skipped, collect its grandchildren
120
+ const grandchildren = buildChildren(childNode, currentDepth + 1);
121
+ children.push(...grandchildren);
122
+ }
123
+ }
124
+ return children;
125
+ };
126
+ // Root is the first node
127
+ const root = nodes[0];
128
+ const rootElement = buildElement(root, 0);
129
+ const elements = [];
130
+ if (rootElement) {
131
+ // If root is a webarea, return its children directly (common pattern)
132
+ if (rootElement.role === 'webarea' && rootElement.children) {
133
+ elements.push(...rootElement.children);
134
+ }
135
+ else {
136
+ elements.push(rootElement);
137
+ }
138
+ }
139
+ return { elements, refMap };
140
+ }
141
+ }
142
+ exports.CDPAXTree = CDPAXTree;
143
+ function getEnabled(node) {
144
+ if (!node.properties)
145
+ return true;
146
+ for (const prop of node.properties) {
147
+ if (prop.name === 'disabled') {
148
+ return !prop.value.value;
149
+ }
150
+ }
151
+ return true;
152
+ }
153
+ function getFocused(node) {
154
+ if (!node.properties)
155
+ return false;
156
+ for (const prop of node.properties) {
157
+ if (prop.name === 'focused') {
158
+ return !!prop.value.value;
159
+ }
160
+ }
161
+ return false;
162
+ }
@@ -0,0 +1,66 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getBounds = getBounds;
4
+ exports.toScreenCoords = toScreenCoords;
5
+ exports.resolveAllBounds = resolveAllBounds;
6
+ /**
7
+ * Fetch the CSS bounding box for a DOM node via CDP DOM.getBoxModel.
8
+ * Returns [x, y, width, height] from the content quad.
9
+ * Returns [0, 0, 0, 0] if the box model cannot be retrieved.
10
+ */
11
+ async function getBounds(connection, backendDOMNodeId) {
12
+ try {
13
+ const result = (await connection.send('DOM.getBoxModel', {
14
+ backendNodeId: backendDOMNodeId,
15
+ }));
16
+ const content = result.model.content;
17
+ // Content quad: [x1,y1, x2,y2, x3,y3, x4,y4]
18
+ const x = content[0];
19
+ const y = content[1];
20
+ const w = content[2] - content[0];
21
+ const h = content[7] - content[1];
22
+ return [x, y, w, h];
23
+ }
24
+ catch {
25
+ return [0, 0, 0, 0];
26
+ }
27
+ }
28
+ /**
29
+ * Convert CSS-pixel bounds to screen coordinates.
30
+ */
31
+ function toScreenCoords(cssBounds, windowBounds, contentOffset, scaleFactor) {
32
+ const screenX = (cssBounds[0] + contentOffset.x) * scaleFactor + windowBounds[0];
33
+ const screenY = (cssBounds[1] + contentOffset.y) * scaleFactor + windowBounds[1];
34
+ const screenW = cssBounds[2] * scaleFactor;
35
+ const screenH = cssBounds[3] * scaleFactor;
36
+ return [screenX, screenY, screenW, screenH];
37
+ }
38
+ const CONCURRENCY_LIMIT = 50;
39
+ /**
40
+ * Batch-resolve bounds for all elements that have entries in refMap.
41
+ * Mutates elements in-place, updating their `bounds` field.
42
+ */
43
+ async function resolveAllBounds(connection, elements, refMap, windowBounds, contentOffset, scaleFactor) {
44
+ // Collect all (element, nodeRef) pairs by flattening the tree
45
+ const work = [];
46
+ const collectWork = (els) => {
47
+ for (const el of els) {
48
+ const nodeRef = refMap.get(el.ref);
49
+ if (nodeRef) {
50
+ work.push({ element: el, nodeRef });
51
+ }
52
+ if (el.children) {
53
+ collectWork(el.children);
54
+ }
55
+ }
56
+ };
57
+ collectWork(elements);
58
+ // Process in batches with concurrency limit
59
+ for (let i = 0; i < work.length; i += CONCURRENCY_LIMIT) {
60
+ const batch = work.slice(i, i + CONCURRENCY_LIMIT);
61
+ await Promise.all(batch.map(async ({ element, nodeRef }) => {
62
+ const cssBounds = await getBounds(connection, nodeRef.backendDOMNodeId);
63
+ element.bounds = toScreenCoords(cssBounds, windowBounds, contentOffset, scaleFactor);
64
+ }));
65
+ }
66
+ }
@@ -0,0 +1,272 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.CDPClient = void 0;
4
+ const connection_js_1 = require("./connection.js");
5
+ const discovery_js_1 = require("./discovery.js");
6
+ const ax_tree_js_1 = require("./ax-tree.js");
7
+ const interactions_js_1 = require("./interactions.js");
8
+ const bounds_js_1 = require("./bounds.js");
9
+ const diff_js_1 = require("./diff.js");
10
+ class CDPClient {
11
+ port;
12
+ connection;
13
+ axTree = null;
14
+ interactions = null;
15
+ lastRefMap = new Map();
16
+ lastSnapshotId = null;
17
+ lastElements = [];
18
+ contentOffset = { x: 0, y: 0 };
19
+ scaleFactor = 2; // Retina default
20
+ constructor(port) {
21
+ this.port = port;
22
+ this.connection = new connection_js_1.CDPConnection();
23
+ }
24
+ /** Connect to CDP target, enable domains */
25
+ async connect() {
26
+ const target = await (0, discovery_js_1.waitForCDP)(this.port, 10000);
27
+ await this.connection.connect(target.webSocketDebuggerUrl);
28
+ await this.enableDomains();
29
+ }
30
+ /** Reconnect after WebSocket drop */
31
+ async reconnect() {
32
+ try {
33
+ await this.connection.close();
34
+ }
35
+ catch { /* ok */ }
36
+ const target = await (0, discovery_js_1.waitForCDP)(this.port, 10000);
37
+ this.connection = new connection_js_1.CDPConnection();
38
+ await this.connection.connect(target.webSocketDebuggerUrl);
39
+ await this.enableDomains();
40
+ }
41
+ /** Disconnect from CDP */
42
+ async disconnect() {
43
+ await this.connection.close();
44
+ this.axTree = null;
45
+ this.interactions = null;
46
+ }
47
+ async enableDomains() {
48
+ await Promise.all([
49
+ this.connection.send('Accessibility.enable'),
50
+ this.connection.send('DOM.enable'),
51
+ this.connection.send('Page.enable'),
52
+ this.connection.send('Runtime.enable'),
53
+ ]);
54
+ this.axTree = new ax_tree_js_1.CDPAXTree(this.connection);
55
+ this.interactions = new interactions_js_1.CDPInteractions(this.connection);
56
+ // Determine content offset once per connection
57
+ await this.updateContentOffset();
58
+ }
59
+ /** Check if connected */
60
+ isConnected() {
61
+ return this.connection.connected;
62
+ }
63
+ /** Take a snapshot of the accessibility tree */
64
+ async snapshot(options = {}, windowInfo) {
65
+ if (!this.axTree)
66
+ throw new Error('CDPClient not connected');
67
+ const { elements, refMap } = await this.axTree.getTree({
68
+ interactive: options.interactive,
69
+ depth: options.depth,
70
+ });
71
+ // Resolve bounds
72
+ await (0, bounds_js_1.resolveAllBounds)(this.connection, elements, refMap, windowInfo.bounds, this.contentOffset, this.scaleFactor);
73
+ this.lastRefMap = refMap;
74
+ this.lastSnapshotId = `cdp-${Date.now()}`;
75
+ this.lastElements = elements;
76
+ return {
77
+ snapshot_id: this.lastSnapshotId,
78
+ window: windowInfo,
79
+ elements,
80
+ fallback: null,
81
+ };
82
+ }
83
+ /** Click an element by ref */
84
+ async click(ref, options = {}) {
85
+ const nodeRef = this.resolveRef(ref);
86
+ const bounds = await this.getCSSBounds(nodeRef.backendDOMNodeId);
87
+ await this.interactions.click(nodeRef.backendDOMNodeId, bounds, options);
88
+ }
89
+ /** Click at CSS viewport coordinates */
90
+ async clickAt(x, y, options = {}) {
91
+ await this.interactions.clickAt(x, y, options);
92
+ }
93
+ /** Hover over an element */
94
+ async hover(ref) {
95
+ const nodeRef = this.resolveRef(ref);
96
+ const bounds = await this.getCSSBounds(nodeRef.backendDOMNodeId);
97
+ await this.interactions.hover(bounds);
98
+ }
99
+ /** Focus an element */
100
+ async focus(ref) {
101
+ const nodeRef = this.resolveRef(ref);
102
+ await this.interactions.focus(nodeRef.backendDOMNodeId);
103
+ }
104
+ /** Type text */
105
+ async type(text, options = {}) {
106
+ if (options.delay) {
107
+ await this.interactions.typeWithDelay(text, options.delay);
108
+ }
109
+ else {
110
+ await this.interactions.type(text);
111
+ }
112
+ }
113
+ /** Fill an element with text */
114
+ async fill(ref, text) {
115
+ const nodeRef = this.resolveRef(ref);
116
+ await this.interactions.fill(nodeRef.backendDOMNodeId, text);
117
+ }
118
+ /** Press a key combination */
119
+ async key(combo, repeat) {
120
+ await this.interactions.key(combo, repeat);
121
+ }
122
+ /** Scroll in a direction */
123
+ async scroll(direction, options = {}) {
124
+ let atX;
125
+ let atY;
126
+ if (options.on) {
127
+ const nodeRef = this.resolveRef(options.on);
128
+ const bounds = await this.getCSSBounds(nodeRef.backendDOMNodeId);
129
+ atX = bounds[0] + bounds[2] / 2;
130
+ atY = bounds[1] + bounds[3] / 2;
131
+ }
132
+ await this.interactions.scroll(direction, options.amount, atX, atY);
133
+ }
134
+ /** Select a value in a dropdown */
135
+ async select(ref, value) {
136
+ const nodeRef = this.resolveRef(ref);
137
+ await this.interactions.select(nodeRef.backendDOMNodeId, value);
138
+ }
139
+ /** Check a checkbox */
140
+ async check(ref) {
141
+ const nodeRef = this.resolveRef(ref);
142
+ const bounds = await this.getCSSBounds(nodeRef.backendDOMNodeId);
143
+ await this.interactions.check(nodeRef.backendDOMNodeId, bounds);
144
+ }
145
+ /** Uncheck a checkbox */
146
+ async uncheck(ref) {
147
+ const nodeRef = this.resolveRef(ref);
148
+ const bounds = await this.getCSSBounds(nodeRef.backendDOMNodeId);
149
+ await this.interactions.uncheck(nodeRef.backendDOMNodeId, bounds);
150
+ }
151
+ /** Check if UI changed since last snapshot */
152
+ async changed() {
153
+ if (!this.axTree || this.lastElements.length === 0)
154
+ return true;
155
+ const { elements } = await this.axTree.getTree({});
156
+ return (0, diff_js_1.computeChanged)(this.lastElements, elements);
157
+ }
158
+ /** Get diff since last snapshot */
159
+ async diff() {
160
+ if (!this.axTree || this.lastElements.length === 0) {
161
+ return { changed: true, added: [], removed: [], modified: [] };
162
+ }
163
+ const { elements } = await this.axTree.getTree({});
164
+ return (0, diff_js_1.computeDiff)(this.lastElements, elements);
165
+ }
166
+ /** Find elements by text in last snapshot */
167
+ find(text, options = {}) {
168
+ const lowerText = text.toLowerCase();
169
+ let results = this.flattenElements(this.lastElements).filter(el => {
170
+ const matchText = (el.label?.toLowerCase().includes(lowerText)) ||
171
+ (el.value?.toLowerCase().includes(lowerText));
172
+ if (!matchText)
173
+ return false;
174
+ if (options.role && el.role !== options.role)
175
+ return false;
176
+ return true;
177
+ });
178
+ if (options.first && results.length > 0) {
179
+ results = [results[0]];
180
+ }
181
+ return { elements: results };
182
+ }
183
+ /** Read element value from last snapshot */
184
+ read(ref, attr) {
185
+ const el = this.findElementByRef(ref);
186
+ if (!el)
187
+ throw new Error(`Element not found: ${ref}`);
188
+ if (attr === 'label')
189
+ return { ref, value: el.label };
190
+ if (attr === 'role')
191
+ return { ref, value: el.role };
192
+ if (attr === 'enabled')
193
+ return { ref, value: el.enabled };
194
+ if (attr === 'focused')
195
+ return { ref, value: el.focused };
196
+ return { ref, value: el.value };
197
+ }
198
+ /** Get element bounds */
199
+ async box(ref) {
200
+ const el = this.findElementByRef(ref);
201
+ if (!el)
202
+ throw new Error(`Element not found: ${ref}`);
203
+ return { ref, bounds: el.bounds };
204
+ }
205
+ /** Check element state */
206
+ is(state, ref) {
207
+ const el = this.findElementByRef(ref);
208
+ if (!el)
209
+ throw new Error(`Element not found: ${ref}`);
210
+ switch (state) {
211
+ case 'enabled': return { state, value: el.enabled };
212
+ case 'focused': return { state, value: el.focused };
213
+ case 'visible': return { state, value: el.bounds[2] > 0 && el.bounds[3] > 0 };
214
+ default: return { state, value: false };
215
+ }
216
+ }
217
+ /** Get children of an element from last snapshot */
218
+ children(ref) {
219
+ const el = this.findElementByRef(ref);
220
+ if (!el)
221
+ throw new Error(`Element not found: ${ref}`);
222
+ return { ref, children: el.children ?? [] };
223
+ }
224
+ /** Get the underlying connection (for advanced usage) */
225
+ getConnection() {
226
+ return this.connection;
227
+ }
228
+ /** Get the last ref map */
229
+ getLastRefMap() {
230
+ return this.lastRefMap;
231
+ }
232
+ // --- Private helpers ---
233
+ resolveRef(ref) {
234
+ const nodeRef = this.lastRefMap.get(ref);
235
+ if (!nodeRef) {
236
+ throw new Error(`CDP ref not found: ${ref}. Take a snapshot first.`);
237
+ }
238
+ return nodeRef;
239
+ }
240
+ async getCSSBounds(backendDOMNodeId) {
241
+ return (0, bounds_js_1.getBounds)(this.connection, backendDOMNodeId);
242
+ }
243
+ async updateContentOffset() {
244
+ try {
245
+ const result = await this.connection.send('Runtime.evaluate', {
246
+ expression: 'JSON.stringify({ x: window.screenX, y: window.screenY })',
247
+ returnByValue: true,
248
+ });
249
+ const parsed = JSON.parse(result.result.value);
250
+ this.contentOffset = { x: parsed.x ?? 0, y: parsed.y ?? 0 };
251
+ }
252
+ catch {
253
+ this.contentOffset = { x: 0, y: 0 };
254
+ }
255
+ }
256
+ flattenElements(elements) {
257
+ const result = [];
258
+ const walk = (els) => {
259
+ for (const el of els) {
260
+ result.push(el);
261
+ if (el.children)
262
+ walk(el.children);
263
+ }
264
+ };
265
+ walk(elements);
266
+ return result;
267
+ }
268
+ findElementByRef(ref) {
269
+ return this.flattenElements(this.lastElements).find(el => el.ref === ref);
270
+ }
271
+ }
272
+ exports.CDPClient = CDPClient;