@vox-ai-app/integrations 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +125 -0
- package/package.json +42 -0
- package/src/imessage/def.js +41 -0
- package/src/imessage/index.js +9 -0
- package/src/imessage/mac/data.js +144 -0
- package/src/imessage/mac/reply.js +68 -0
- package/src/imessage/mac/service.js +141 -0
- package/src/imessage/tools.js +44 -0
- package/src/index.js +7 -0
- package/src/mail/def.js +317 -0
- package/src/mail/index.js +165 -0
- package/src/mail/manage/index.js +10 -0
- package/src/mail/manage/mac/index.js +275 -0
- package/src/mail/read/index.js +1 -0
- package/src/mail/read/mac/accounts.js +53 -0
- package/src/mail/read/mac/index.js +170 -0
- package/src/mail/read/mac/permission.js +29 -0
- package/src/mail/read/mac/sync.js +98 -0
- package/src/mail/read/mac/transform.js +55 -0
- package/src/mail/send/index.js +1 -0
- package/src/mail/send/mac/index.js +93 -0
- package/src/mail/shared/index.js +6 -0
- package/src/mail/shared/mac/index.js +48 -0
- package/src/mail/tools.js +41 -0
- package/src/screen/capture/index.js +1 -0
- package/src/screen/capture/mac/index.js +109 -0
- package/src/screen/control/index.js +15 -0
- package/src/screen/control/mac/accessibility.js +25 -0
- package/src/screen/control/mac/apps.js +62 -0
- package/src/screen/control/mac/exec.js +66 -0
- package/src/screen/control/mac/helpers.js +5 -0
- package/src/screen/control/mac/index.js +10 -0
- package/src/screen/control/mac/keyboard.js +34 -0
- package/src/screen/control/mac/keycodes.js +87 -0
- package/src/screen/control/mac/mouse.js +59 -0
- package/src/screen/control/mac/python-keyboard.js +66 -0
- package/src/screen/control/mac/python-mouse.js +66 -0
- package/src/screen/control/mac/python.js +2 -0
- package/src/screen/control/mac/ui-scan.js +45 -0
- package/src/screen/def.js +304 -0
- package/src/screen/index.js +17 -0
- package/src/screen/queue.js +54 -0
- package/src/screen/tools.js +50 -0
- package/src/tools.js +6 -0
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
const MOD_FLAGS_PY = `MOD_FLAGS = {
|
|
2
|
+
'command': Quartz.kCGEventFlagMaskCommand,
|
|
3
|
+
'cmd': Quartz.kCGEventFlagMaskCommand,
|
|
4
|
+
'shift': Quartz.kCGEventFlagMaskShift,
|
|
5
|
+
'option': Quartz.kCGEventFlagMaskAlternate,
|
|
6
|
+
'alt': Quartz.kCGEventFlagMaskAlternate,
|
|
7
|
+
'control': Quartz.kCGEventFlagMaskControl,
|
|
8
|
+
'ctrl': Quartz.kCGEventFlagMaskControl,
|
|
9
|
+
}`
|
|
10
|
+
|
|
11
|
+
export const pyTypeText = (text) => {
|
|
12
|
+
const b64 = Buffer.from(text, 'utf8').toString('base64')
|
|
13
|
+
return `
|
|
14
|
+
import Quartz, time, base64
|
|
15
|
+
text = base64.b64decode('${b64}').decode('utf-8')
|
|
16
|
+
src = Quartz.CGEventSourceCreate(Quartz.kCGEventSourceStateHIDSystemState)
|
|
17
|
+
for char in text:
|
|
18
|
+
e = Quartz.CGEventCreateKeyboardEvent(src, 0, True)
|
|
19
|
+
Quartz.CGEventKeyboardSetUnicodeString(e, 1, char)
|
|
20
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
21
|
+
time.sleep(0.01)
|
|
22
|
+
e = Quartz.CGEventCreateKeyboardEvent(src, 0, False)
|
|
23
|
+
Quartz.CGEventKeyboardSetUnicodeString(e, 1, char)
|
|
24
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
25
|
+
time.sleep(0.02)
|
|
26
|
+
`
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export const pyKeyCode = (keyCode, mods) => `
|
|
30
|
+
import Quartz, time
|
|
31
|
+
${MOD_FLAGS_PY}
|
|
32
|
+
flags = 0
|
|
33
|
+
for m in ${JSON.stringify(mods)}:
|
|
34
|
+
flags |= MOD_FLAGS.get(m.lower(), 0)
|
|
35
|
+
src = Quartz.CGEventSourceCreate(Quartz.kCGEventSourceStateHIDSystemState)
|
|
36
|
+
e = Quartz.CGEventCreateKeyboardEvent(src, ${keyCode}, True)
|
|
37
|
+
if flags:
|
|
38
|
+
Quartz.CGEventSetFlags(e, flags)
|
|
39
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
40
|
+
time.sleep(0.05)
|
|
41
|
+
e = Quartz.CGEventCreateKeyboardEvent(src, ${keyCode}, False)
|
|
42
|
+
if flags:
|
|
43
|
+
Quartz.CGEventSetFlags(e, flags)
|
|
44
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
45
|
+
`
|
|
46
|
+
|
|
47
|
+
export const pyCharKey = (b64, mods) => `
|
|
48
|
+
import Quartz, time, base64
|
|
49
|
+
${MOD_FLAGS_PY}
|
|
50
|
+
flags = 0
|
|
51
|
+
for m in ${JSON.stringify(mods)}:
|
|
52
|
+
flags |= MOD_FLAGS.get(m.lower(), 0)
|
|
53
|
+
text = base64.b64decode('${b64}').decode('utf-8')
|
|
54
|
+
src = Quartz.CGEventSourceCreate(Quartz.kCGEventSourceStateHIDSystemState)
|
|
55
|
+
for char in text:
|
|
56
|
+
e = Quartz.CGEventCreateKeyboardEvent(src, 0, True)
|
|
57
|
+
Quartz.CGEventKeyboardSetUnicodeString(e, 1, char)
|
|
58
|
+
if flags:
|
|
59
|
+
Quartz.CGEventSetFlags(e, flags)
|
|
60
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
61
|
+
time.sleep(0.01)
|
|
62
|
+
e = Quartz.CGEventCreateKeyboardEvent(src, 0, False)
|
|
63
|
+
Quartz.CGEventKeyboardSetUnicodeString(e, 1, char)
|
|
64
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
65
|
+
time.sleep(0.02)
|
|
66
|
+
`
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
export const pyClick = (x, y, button = 'left', count = 1) => `
|
|
2
|
+
import sys, time
|
|
3
|
+
try:
|
|
4
|
+
import Quartz
|
|
5
|
+
except ImportError:
|
|
6
|
+
sys.exit(1)
|
|
7
|
+
|
|
8
|
+
x, y = ${x}, ${y}
|
|
9
|
+
count = ${count}
|
|
10
|
+
is_right = ${button === 'right' ? 'True' : 'False'}
|
|
11
|
+
|
|
12
|
+
btn = Quartz.kCGMouseButtonRight if is_right else Quartz.kCGMouseButtonLeft
|
|
13
|
+
down = Quartz.kCGEventRightMouseDown if is_right else Quartz.kCGEventLeftMouseDown
|
|
14
|
+
up = Quartz.kCGEventRightMouseUp if is_right else Quartz.kCGEventLeftMouseUp
|
|
15
|
+
pt = Quartz.CGPoint(x, y)
|
|
16
|
+
|
|
17
|
+
for i in range(count):
|
|
18
|
+
n = i + 1
|
|
19
|
+
e = Quartz.CGEventCreateMouseEvent(None, down, pt, btn)
|
|
20
|
+
Quartz.CGEventSetIntegerValueField(e, Quartz.kCGMouseEventClickState, n)
|
|
21
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
22
|
+
time.sleep(0.05)
|
|
23
|
+
e = Quartz.CGEventCreateMouseEvent(None, up, pt, btn)
|
|
24
|
+
Quartz.CGEventSetIntegerValueField(e, Quartz.kCGMouseEventClickState, n)
|
|
25
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
26
|
+
if i < count - 1:
|
|
27
|
+
time.sleep(0.12)
|
|
28
|
+
`
|
|
29
|
+
|
|
30
|
+
export const pyMove = (x, y) => `
|
|
31
|
+
import Quartz
|
|
32
|
+
pt = Quartz.CGPoint(${x}, ${y})
|
|
33
|
+
e = Quartz.CGEventCreateMouseEvent(None, Quartz.kCGEventMouseMoved, pt, Quartz.kCGMouseButtonLeft)
|
|
34
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
35
|
+
`
|
|
36
|
+
|
|
37
|
+
export const pyScroll = (x, y, dx, dy) => `
|
|
38
|
+
import Quartz, time
|
|
39
|
+
pt = Quartz.CGPoint(${x}, ${y})
|
|
40
|
+
e = Quartz.CGEventCreateMouseEvent(None, Quartz.kCGEventMouseMoved, pt, Quartz.kCGMouseButtonLeft)
|
|
41
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
42
|
+
time.sleep(0.05)
|
|
43
|
+
e = Quartz.CGEventCreateScrollWheelEvent(None, Quartz.kCGScrollEventUnitLine, 2, ${dy}, ${dx})
|
|
44
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
45
|
+
`
|
|
46
|
+
|
|
47
|
+
export const pyDrag = (x1, y1, x2, y2) => `
|
|
48
|
+
import Quartz, time
|
|
49
|
+
p1 = Quartz.CGPoint(${x1}, ${y1})
|
|
50
|
+
p2 = Quartz.CGPoint(${x2}, ${y2})
|
|
51
|
+
e = Quartz.CGEventCreateMouseEvent(None, Quartz.kCGEventLeftMouseDown, p1, Quartz.kCGMouseButtonLeft)
|
|
52
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
53
|
+
time.sleep(0.08)
|
|
54
|
+
e = Quartz.CGEventCreateMouseEvent(None, Quartz.kCGEventLeftMouseDragged, p2, Quartz.kCGMouseButtonLeft)
|
|
55
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
56
|
+
time.sleep(0.05)
|
|
57
|
+
e = Quartz.CGEventCreateMouseEvent(None, Quartz.kCGEventLeftMouseUp, p2, Quartz.kCGMouseButtonLeft)
|
|
58
|
+
Quartz.CGEventPost(Quartz.kCGHIDEventTap, e)
|
|
59
|
+
`
|
|
60
|
+
|
|
61
|
+
export const pyGetMousePos = () => `
|
|
62
|
+
import Quartz
|
|
63
|
+
event = Quartz.CGEventCreate(None)
|
|
64
|
+
pos = Quartz.CGEventGetLocation(event)
|
|
65
|
+
print(f"{int(pos.x)},{int(pos.y)}")
|
|
66
|
+
`
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
export const LONG_TIMEOUT = 60_000
|
|
2
|
+
|
|
3
|
+
export const UI_ELEMENTS_SCRIPT = `
|
|
4
|
+
ObjC.import('Foundation');
|
|
5
|
+
function collect(elem, depth, out) {
|
|
6
|
+
if (depth > 5) return;
|
|
7
|
+
var children;
|
|
8
|
+
try { children = elem.entireContents(); } catch(e) { return; }
|
|
9
|
+
for (var i = 0; i < children.length; i++) {
|
|
10
|
+
var el = children[i];
|
|
11
|
+
try {
|
|
12
|
+
var role = '';
|
|
13
|
+
try { role = el.role(); } catch(e) { continue; }
|
|
14
|
+
var label = '';
|
|
15
|
+
try { label = el.title(); } catch(e) {}
|
|
16
|
+
if (!label) try { label = el.description(); } catch(e) {}
|
|
17
|
+
if (!label) try { var v = el.value(); if (typeof v === 'string') label = v; } catch(e) {}
|
|
18
|
+
var enabled = true;
|
|
19
|
+
try { enabled = el.enabled(); } catch(e) {}
|
|
20
|
+
var pos = { x: 0, y: 0 };
|
|
21
|
+
try { var p = el.position(); pos = { x: p.x, y: p.y }; } catch(e) {}
|
|
22
|
+
var size = { w: 0, h: 0 };
|
|
23
|
+
try { var s = el.size(); size = { w: s.width, h: s.height }; } catch(e) {}
|
|
24
|
+
var INTERACTIVE = ['AXButton','AXTextField','AXTextArea','AXCheckBox','AXRadioButton',
|
|
25
|
+
'AXPopUpButton','AXComboBox','AXSlider','AXLink','AXMenuItem','AXMenu',
|
|
26
|
+
'AXTab','AXCell','AXRow','AXStaticText'];
|
|
27
|
+
if (label || INTERACTIVE.indexOf(role) !== -1) {
|
|
28
|
+
out.push({ role: role, label: label, enabled: enabled,
|
|
29
|
+
x: Math.round(pos.x), y: Math.round(pos.y),
|
|
30
|
+
w: Math.round(size.w), h: Math.round(size.h) });
|
|
31
|
+
}
|
|
32
|
+
collect(el, depth + 1, out);
|
|
33
|
+
} catch(e) {}
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
var se = Application('System Events');
|
|
37
|
+
var proc = se.processes.whose({ frontmost: true })[0];
|
|
38
|
+
var appName = proc.name();
|
|
39
|
+
var wins = proc.windows();
|
|
40
|
+
var out = [];
|
|
41
|
+
for (var w = 0; w < Math.min(wins.length, 3); w++) {
|
|
42
|
+
collect(wins[w], 0, out);
|
|
43
|
+
}
|
|
44
|
+
JSON.stringify({ app: appName, count: out.length, elements: out });
|
|
45
|
+
`
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
export const SCREEN_TOOL_DEFINITIONS = [
|
|
2
|
+
{
|
|
3
|
+
name: 'capture_full_screen',
|
|
4
|
+
description:
|
|
5
|
+
"Capture a screenshot of the user's entire screen. Use this whenever the user asks you to look at, read, or help with something on their screen — including emails, documents, web pages, code, chat messages, forms, or any other visible content. The returned image contains the full screen rendered at the time of capture. You must read and analyze ALL visible text and UI elements in the image in detail, including the content of open applications, browser tabs, emails, and documents. Never say you cannot read text from an image — you have full vision capabilities and must extract and reason over all on-screen content.",
|
|
6
|
+
parameters: {
|
|
7
|
+
type: 'object',
|
|
8
|
+
properties: {}
|
|
9
|
+
}
|
|
10
|
+
},
|
|
11
|
+
{
|
|
12
|
+
name: 'capture_region',
|
|
13
|
+
description:
|
|
14
|
+
'Capture a screenshot of a specific rectangular region of the screen. Use this instead of capture_full_screen when you already know the area of interest — it is faster and returns a smaller, more focused image. Coordinates must be in logical screen points, matching the values from click_at.',
|
|
15
|
+
parameters: {
|
|
16
|
+
type: 'object',
|
|
17
|
+
properties: {
|
|
18
|
+
x: {
|
|
19
|
+
type: 'number',
|
|
20
|
+
description: 'Left edge of the region in screen points.'
|
|
21
|
+
},
|
|
22
|
+
y: {
|
|
23
|
+
type: 'number',
|
|
24
|
+
description: 'Top edge of the region in screen points.'
|
|
25
|
+
},
|
|
26
|
+
width: {
|
|
27
|
+
type: 'number',
|
|
28
|
+
description: 'Width of the region in screen points.'
|
|
29
|
+
},
|
|
30
|
+
height: {
|
|
31
|
+
type: 'number',
|
|
32
|
+
description: 'Height of the region in screen points.'
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
required: ['x', 'y', 'width', 'height']
|
|
36
|
+
}
|
|
37
|
+
},
|
|
38
|
+
{
|
|
39
|
+
name: 'click_at',
|
|
40
|
+
description:
|
|
41
|
+
"Click the mouse at specific screen coordinates on the user's Mac. Use after capture_full_screen to interact with UI elements. Supports left click, right click, and double click.",
|
|
42
|
+
parameters: {
|
|
43
|
+
type: 'object',
|
|
44
|
+
properties: {
|
|
45
|
+
x: {
|
|
46
|
+
type: 'number',
|
|
47
|
+
description: 'Horizontal screen coordinate in pixels.'
|
|
48
|
+
},
|
|
49
|
+
y: {
|
|
50
|
+
type: 'number',
|
|
51
|
+
description: 'Vertical screen coordinate in pixels.'
|
|
52
|
+
},
|
|
53
|
+
button: {
|
|
54
|
+
type: 'string',
|
|
55
|
+
description: 'Mouse button: "left" (default) or "right".'
|
|
56
|
+
},
|
|
57
|
+
count: {
|
|
58
|
+
type: 'integer',
|
|
59
|
+
description: 'Number of clicks: 1 (default) or 2 for double-click.'
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
required: ['x', 'y']
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: 'move_mouse',
|
|
67
|
+
description: 'Move the mouse cursor to specific screen coordinates without clicking.',
|
|
68
|
+
parameters: {
|
|
69
|
+
type: 'object',
|
|
70
|
+
properties: {
|
|
71
|
+
x: {
|
|
72
|
+
type: 'number',
|
|
73
|
+
description: 'Horizontal screen coordinate in pixels.'
|
|
74
|
+
},
|
|
75
|
+
y: {
|
|
76
|
+
type: 'number',
|
|
77
|
+
description: 'Vertical screen coordinate in pixels.'
|
|
78
|
+
}
|
|
79
|
+
},
|
|
80
|
+
required: ['x', 'y']
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
name: 'type_text',
|
|
85
|
+
description:
|
|
86
|
+
'Type text at the current cursor position as if typed on a keyboard. Use after clicking the target input field.',
|
|
87
|
+
parameters: {
|
|
88
|
+
type: 'object',
|
|
89
|
+
properties: {
|
|
90
|
+
text: {
|
|
91
|
+
type: 'string',
|
|
92
|
+
description: 'The text to type.'
|
|
93
|
+
}
|
|
94
|
+
},
|
|
95
|
+
required: ['text']
|
|
96
|
+
}
|
|
97
|
+
},
|
|
98
|
+
{
|
|
99
|
+
name: 'key_press',
|
|
100
|
+
description:
|
|
101
|
+
'Press a keyboard key, optionally with modifier keys (command, shift, option, control). Use for shortcuts like Cmd+C, Cmd+V, Enter, Escape, arrow keys, etc.',
|
|
102
|
+
parameters: {
|
|
103
|
+
type: 'object',
|
|
104
|
+
properties: {
|
|
105
|
+
key: {
|
|
106
|
+
type: 'string',
|
|
107
|
+
description:
|
|
108
|
+
'Key to press. Named keys: return, tab, space, delete, escape, left, right, up, down, f1-f12, home, end, pageup, pagedown. Or a single character like "a", "c", "v".'
|
|
109
|
+
},
|
|
110
|
+
modifiers: {
|
|
111
|
+
type: 'array',
|
|
112
|
+
items: {
|
|
113
|
+
type: 'string'
|
|
114
|
+
},
|
|
115
|
+
description:
|
|
116
|
+
'Modifier keys to hold: "command" (or "cmd"), "shift", "option" (or "alt"), "control" (or "ctrl").'
|
|
117
|
+
}
|
|
118
|
+
},
|
|
119
|
+
required: ['key']
|
|
120
|
+
}
|
|
121
|
+
},
|
|
122
|
+
{
|
|
123
|
+
name: 'scroll',
|
|
124
|
+
description:
|
|
125
|
+
'Scroll at specific screen coordinates. Positive deltaY scrolls up, negative scrolls down.',
|
|
126
|
+
parameters: {
|
|
127
|
+
type: 'object',
|
|
128
|
+
properties: {
|
|
129
|
+
x: {
|
|
130
|
+
type: 'number',
|
|
131
|
+
description: 'Horizontal screen coordinate.'
|
|
132
|
+
},
|
|
133
|
+
y: {
|
|
134
|
+
type: 'number',
|
|
135
|
+
description: 'Vertical screen coordinate.'
|
|
136
|
+
},
|
|
137
|
+
deltaX: {
|
|
138
|
+
type: 'number',
|
|
139
|
+
description: 'Horizontal scroll amount (default 0).'
|
|
140
|
+
},
|
|
141
|
+
deltaY: {
|
|
142
|
+
type: 'number',
|
|
143
|
+
description:
|
|
144
|
+
'Vertical scroll amount. Negative = scroll down, positive = scroll up. Default -3.'
|
|
145
|
+
}
|
|
146
|
+
},
|
|
147
|
+
required: ['x', 'y']
|
|
148
|
+
}
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
name: 'drag',
|
|
152
|
+
description: 'Click and drag from one screen coordinate to another.',
|
|
153
|
+
parameters: {
|
|
154
|
+
type: 'object',
|
|
155
|
+
properties: {
|
|
156
|
+
fromX: {
|
|
157
|
+
type: 'number',
|
|
158
|
+
description: 'Start X coordinate.'
|
|
159
|
+
},
|
|
160
|
+
fromY: {
|
|
161
|
+
type: 'number',
|
|
162
|
+
description: 'Start Y coordinate.'
|
|
163
|
+
},
|
|
164
|
+
toX: {
|
|
165
|
+
type: 'number',
|
|
166
|
+
description: 'End X coordinate.'
|
|
167
|
+
},
|
|
168
|
+
toY: {
|
|
169
|
+
type: 'number',
|
|
170
|
+
description: 'End Y coordinate.'
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
required: ['fromX', 'fromY', 'toX', 'toY']
|
|
174
|
+
}
|
|
175
|
+
},
|
|
176
|
+
{
|
|
177
|
+
name: 'get_mouse_position',
|
|
178
|
+
description: 'Get the current mouse cursor position on screen.',
|
|
179
|
+
parameters: {
|
|
180
|
+
type: 'object',
|
|
181
|
+
properties: {}
|
|
182
|
+
}
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
name: 'get_ui_elements',
|
|
186
|
+
description:
|
|
187
|
+
'Inspect the macOS Accessibility tree of the frontmost app (or a named app) and return every visible UI element with its role, label, position (x, y), and size (w, h). Use this INSTEAD of screenshot-based coordinate guessing when you need to click a specific button, text field, menu item, or link — find the element by its label, then pass its center coordinates (x + w/2, y + h/2) to click_at. This is faster and more reliable than clicking by pixel.',
|
|
188
|
+
parameters: {
|
|
189
|
+
type: 'object',
|
|
190
|
+
properties: {
|
|
191
|
+
app: {
|
|
192
|
+
type: 'string',
|
|
193
|
+
description:
|
|
194
|
+
'Name of the macOS app process to inspect (e.g. "Safari", "Mail", "Finder"). If omitted, inspects whichever app is currently in the foreground.'
|
|
195
|
+
},
|
|
196
|
+
maxElements: {
|
|
197
|
+
type: 'integer',
|
|
198
|
+
description: 'Maximum number of UI elements to return (default 200, max 1000).'
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
},
|
|
203
|
+
{
|
|
204
|
+
name: 'clipboard_read',
|
|
205
|
+
description: 'Read the current text content of the system clipboard.',
|
|
206
|
+
parameters: {
|
|
207
|
+
type: 'object',
|
|
208
|
+
properties: {}
|
|
209
|
+
}
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
name: 'clipboard_write',
|
|
213
|
+
description: 'Write text to the system clipboard, replacing its current content.',
|
|
214
|
+
parameters: {
|
|
215
|
+
type: 'object',
|
|
216
|
+
properties: {
|
|
217
|
+
text: {
|
|
218
|
+
type: 'string',
|
|
219
|
+
description: 'Text to place on the clipboard.'
|
|
220
|
+
}
|
|
221
|
+
},
|
|
222
|
+
required: ['text']
|
|
223
|
+
}
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
name: 'focus_app',
|
|
227
|
+
description:
|
|
228
|
+
'Bring a macOS application to the foreground. Use before interacting with an app that may be in the background.',
|
|
229
|
+
parameters: {
|
|
230
|
+
type: 'object',
|
|
231
|
+
properties: {
|
|
232
|
+
app: {
|
|
233
|
+
type: 'string',
|
|
234
|
+
description:
|
|
235
|
+
'Application name as it appears in /Applications (e.g. "Safari", "Finder", "Visual Studio Code").'
|
|
236
|
+
}
|
|
237
|
+
},
|
|
238
|
+
required: ['app']
|
|
239
|
+
}
|
|
240
|
+
},
|
|
241
|
+
{
|
|
242
|
+
name: 'launch_app',
|
|
243
|
+
description: 'Launch a macOS application, opening it if not already running.',
|
|
244
|
+
parameters: {
|
|
245
|
+
type: 'object',
|
|
246
|
+
properties: {
|
|
247
|
+
app: {
|
|
248
|
+
type: 'string',
|
|
249
|
+
description: 'Application name (e.g. "Terminal", "Xcode").'
|
|
250
|
+
},
|
|
251
|
+
args: {
|
|
252
|
+
type: 'array',
|
|
253
|
+
items: {
|
|
254
|
+
type: 'string'
|
|
255
|
+
},
|
|
256
|
+
description: 'Optional arguments to pass to the application.'
|
|
257
|
+
}
|
|
258
|
+
},
|
|
259
|
+
required: ['app']
|
|
260
|
+
}
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
name: 'list_apps',
|
|
264
|
+
description: "List all installed applications in /Applications on the user's Mac.",
|
|
265
|
+
parameters: {
|
|
266
|
+
type: 'object',
|
|
267
|
+
properties: {}
|
|
268
|
+
}
|
|
269
|
+
},
|
|
270
|
+
{
|
|
271
|
+
name: 'acquire_screen',
|
|
272
|
+
description:
|
|
273
|
+
'Claim exclusive screen control for the current agent session. Call this before starting a multi-step screen automation task to prevent other agents from interfering. The lock auto-expires after 30s of screen inactivity. Always pair with release_screen when done. Use force=true to break a stuck lock from a dead session.',
|
|
274
|
+
parameters: {
|
|
275
|
+
type: 'object',
|
|
276
|
+
properties: {
|
|
277
|
+
sessionId: {
|
|
278
|
+
type: 'string',
|
|
279
|
+
description: 'Unique identifier for this agent session.'
|
|
280
|
+
},
|
|
281
|
+
force: {
|
|
282
|
+
type: 'boolean',
|
|
283
|
+
description:
|
|
284
|
+
'Override an existing lock held by another session. Use only when a previous session is known to be dead.'
|
|
285
|
+
}
|
|
286
|
+
},
|
|
287
|
+
required: ['sessionId']
|
|
288
|
+
}
|
|
289
|
+
},
|
|
290
|
+
{
|
|
291
|
+
name: 'release_screen',
|
|
292
|
+
description: 'Release the screen control lock acquired with acquire_screen.',
|
|
293
|
+
parameters: {
|
|
294
|
+
type: 'object',
|
|
295
|
+
properties: {
|
|
296
|
+
sessionId: {
|
|
297
|
+
type: 'string',
|
|
298
|
+
description: 'The session ID used in acquire_screen.'
|
|
299
|
+
}
|
|
300
|
+
},
|
|
301
|
+
required: ['sessionId']
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
]
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
export { captureFullScreen, captureRegion, waitForScreenPermission } from './capture/index.js'
|
|
2
|
+
export {
|
|
3
|
+
clickAt,
|
|
4
|
+
moveMouse,
|
|
5
|
+
typeText,
|
|
6
|
+
keyPress,
|
|
7
|
+
scroll,
|
|
8
|
+
drag,
|
|
9
|
+
getMousePosition,
|
|
10
|
+
getUiElements,
|
|
11
|
+
clipboardRead,
|
|
12
|
+
clipboardWrite,
|
|
13
|
+
focusApp,
|
|
14
|
+
launchApp,
|
|
15
|
+
listApps
|
|
16
|
+
} from './control/index.js'
|
|
17
|
+
export { enqueueScreen, acquireScreen, releaseScreen, getScreenSession } from './queue.js'
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
const LOCK_TTL_MS = 30_000
|
|
2
|
+
let _tail = Promise.resolve()
|
|
3
|
+
let _sessionId = null
|
|
4
|
+
let _lockExpiry = null
|
|
5
|
+
let _lockTimer = null
|
|
6
|
+
const _clearLock = () => {
|
|
7
|
+
_sessionId = null
|
|
8
|
+
_lockExpiry = null
|
|
9
|
+
if (_lockTimer) {
|
|
10
|
+
clearTimeout(_lockTimer)
|
|
11
|
+
_lockTimer = null
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
const _renewLock = () => {
|
|
15
|
+
if (!_sessionId) return
|
|
16
|
+
if (_lockTimer) clearTimeout(_lockTimer)
|
|
17
|
+
_lockExpiry = Date.now() + LOCK_TTL_MS
|
|
18
|
+
_lockTimer = setTimeout(_clearLock, LOCK_TTL_MS)
|
|
19
|
+
}
|
|
20
|
+
const _isLockExpired = () => _lockExpiry !== null && Date.now() > _lockExpiry
|
|
21
|
+
export const enqueueScreen = (fn) => {
|
|
22
|
+
_renewLock()
|
|
23
|
+
const task = _tail.then(fn)
|
|
24
|
+
_tail = task.catch(() => {})
|
|
25
|
+
return task
|
|
26
|
+
}
|
|
27
|
+
export const acquireScreen = ({ sessionId, force = false }) => {
|
|
28
|
+
if (_sessionId && _sessionId !== sessionId && !_isLockExpired() && !force) {
|
|
29
|
+
throw Object.assign(
|
|
30
|
+
new Error(`Screen is in use by session "${_sessionId}". Release it first or wait.`),
|
|
31
|
+
{
|
|
32
|
+
code: 'SCREEN_LOCKED',
|
|
33
|
+
holder: _sessionId
|
|
34
|
+
}
|
|
35
|
+
)
|
|
36
|
+
}
|
|
37
|
+
_clearLock()
|
|
38
|
+
_sessionId = sessionId
|
|
39
|
+
_renewLock()
|
|
40
|
+
return {
|
|
41
|
+
sessionId
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
export const releaseScreen = ({ sessionId }) => {
|
|
45
|
+
if (_sessionId === sessionId) _clearLock()
|
|
46
|
+
return {
|
|
47
|
+
ok: true
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
export const getScreenSession = () => ({
|
|
51
|
+
sessionId: _sessionId,
|
|
52
|
+
expiresAt: _lockExpiry,
|
|
53
|
+
ttlMs: _lockExpiry ? Math.max(0, _lockExpiry - Date.now()) : null
|
|
54
|
+
})
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { SCREEN_TOOL_DEFINITIONS } from './def.js'
|
|
2
|
+
import { captureFullScreen, captureRegion } from './capture/index.js'
|
|
3
|
+
import {
|
|
4
|
+
clickAt,
|
|
5
|
+
moveMouse,
|
|
6
|
+
typeText,
|
|
7
|
+
keyPress,
|
|
8
|
+
scroll,
|
|
9
|
+
drag,
|
|
10
|
+
getMousePosition,
|
|
11
|
+
getUiElements,
|
|
12
|
+
clipboardRead,
|
|
13
|
+
clipboardWrite,
|
|
14
|
+
focusApp,
|
|
15
|
+
launchApp,
|
|
16
|
+
listApps
|
|
17
|
+
} from './control/index.js'
|
|
18
|
+
import { acquireScreen, releaseScreen } from './queue.js'
|
|
19
|
+
|
|
20
|
+
const DARWIN_ONLY = () => {
|
|
21
|
+
throw new Error('Screen tools are only available on macOS.')
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const isDarwin = process.platform === 'darwin'
|
|
25
|
+
const guard = (fn) => (isDarwin ? fn : DARWIN_ONLY)
|
|
26
|
+
|
|
27
|
+
const executors = {
|
|
28
|
+
capture_full_screen: (_ctx) => guard(captureFullScreen),
|
|
29
|
+
capture_region: (_ctx) => guard(captureRegion),
|
|
30
|
+
click_at: (_ctx) => guard(clickAt),
|
|
31
|
+
move_mouse: (_ctx) => guard(moveMouse),
|
|
32
|
+
type_text: (_ctx) => guard(typeText),
|
|
33
|
+
key_press: (_ctx) => guard(keyPress),
|
|
34
|
+
scroll: (_ctx) => guard(scroll),
|
|
35
|
+
drag: (_ctx) => guard(drag),
|
|
36
|
+
get_mouse_position: (_ctx) => guard(getMousePosition),
|
|
37
|
+
get_ui_elements: (_ctx) => guard(getUiElements),
|
|
38
|
+
clipboard_read: (_ctx) => clipboardRead,
|
|
39
|
+
clipboard_write: (_ctx) => clipboardWrite,
|
|
40
|
+
focus_app: (_ctx) => guard(focusApp),
|
|
41
|
+
launch_app: (_ctx) => guard(launchApp),
|
|
42
|
+
list_apps: (_ctx) => guard(listApps),
|
|
43
|
+
acquire_screen: (_ctx) => acquireScreen,
|
|
44
|
+
release_screen: (_ctx) => releaseScreen
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export const SCREEN_TOOLS = SCREEN_TOOL_DEFINITIONS.map((def) => ({
|
|
48
|
+
definition: def,
|
|
49
|
+
execute: executors[def.name]
|
|
50
|
+
}))
|
package/src/tools.js
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import { SCREEN_TOOLS } from './screen/tools.js'
|
|
2
|
+
import { MAIL_TOOLS } from './mail/tools.js'
|
|
3
|
+
import { IMESSAGE_TOOLS } from './imessage/tools.js'
|
|
4
|
+
|
|
5
|
+
export const ALL_INTEGRATION_TOOLS = [...SCREEN_TOOLS, ...MAIL_TOOLS, ...IMESSAGE_TOOLS]
|
|
6
|
+
export { SCREEN_TOOLS, MAIL_TOOLS, IMESSAGE_TOOLS }
|