gclm-code 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bin/gc.js +53 -25
- package/bin/install-runtime.js +253 -0
- package/package.json +10 -5
- package/vendor/manifest.json +92 -0
- package/vendor/modules/node_modules/@ant/claude-for-chrome-mcp/package.json +9 -0
- package/vendor/modules/node_modules/@ant/claude-for-chrome-mcp/src/bridgeClient.ts +1126 -0
- package/vendor/modules/node_modules/@ant/claude-for-chrome-mcp/src/browserTools.ts +546 -0
- package/vendor/modules/node_modules/@ant/claude-for-chrome-mcp/src/index.ts +15 -0
- package/vendor/modules/node_modules/@ant/claude-for-chrome-mcp/src/mcpServer.ts +96 -0
- package/vendor/modules/node_modules/@ant/claude-for-chrome-mcp/src/mcpSocketClient.ts +493 -0
- package/vendor/modules/node_modules/@ant/claude-for-chrome-mcp/src/mcpSocketPool.ts +327 -0
- package/vendor/modules/node_modules/@ant/claude-for-chrome-mcp/src/toolCalls.ts +301 -0
- package/vendor/modules/node_modules/@ant/claude-for-chrome-mcp/src/types.ts +134 -0
- package/vendor/modules/node_modules/@ant/computer-use-input/package.json +9 -0
- package/vendor/modules/node_modules/@ant/computer-use-input/src/driver-jxa.js +341 -0
- package/vendor/modules/node_modules/@ant/computer-use-input/src/driver-swift.swift +417 -0
- package/vendor/modules/node_modules/@ant/computer-use-input/src/implementation.js +204 -0
- package/vendor/modules/node_modules/@ant/computer-use-input/src/index.js +5 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/package.json +11 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/deniedApps.ts +553 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/imageResize.ts +108 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/index.ts +69 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/keyBlocklist.ts +153 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/mcpServer.ts +313 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/pixelCompare.ts +171 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/sentinelApps.ts +43 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/subGates.ts +19 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/toolCalls.ts +3872 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/tools.ts +706 -0
- package/vendor/modules/node_modules/@ant/computer-use-mcp/src/types.ts +635 -0
- package/vendor/modules/node_modules/@ant/computer-use-swift/package.json +9 -0
- package/vendor/modules/node_modules/@ant/computer-use-swift/src/driver-jxa.js +108 -0
- package/vendor/modules/node_modules/@ant/computer-use-swift/src/implementation.js +706 -0
- package/vendor/modules/node_modules/@ant/computer-use-swift/src/index.js +7 -0
- package/vendor/modules/node_modules/audio-capture-napi/package.json +8 -0
- package/vendor/modules/node_modules/audio-capture-napi/src/index.ts +226 -0
- package/vendor/modules/node_modules/image-processor-napi/package.json +11 -0
- package/vendor/modules/node_modules/image-processor-napi/src/index.ts +396 -0
- package/vendor/modules/node_modules/modifiers-napi/package.json +8 -0
- package/vendor/modules/node_modules/modifiers-napi/src/index.ts +79 -0
- package/vendor/modules/node_modules/url-handler-napi/package.json +8 -0
- package/vendor/modules/node_modules/url-handler-napi/src/index.ts +62 -0
|
@@ -0,0 +1,706 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP tool schemas for the computer-use server. Mirrors
|
|
3
|
+
* claude-for-chrome-mcp/src/browserTools.ts in shape (plain `Tool`-shaped
|
|
4
|
+
* object literals, no zod).
|
|
5
|
+
*
|
|
6
|
+
* Coordinate descriptions are baked in at tool-list build time from the
|
|
7
|
+
* `chicago_coordinate_mode` gate. The model sees exactly ONE coordinate
|
|
8
|
+
* convention in the param descriptions and never learns the other exists.
|
|
9
|
+
* The host (`serverDef.ts`) reads the same frozen gate value for
|
|
10
|
+
* `scaleCoord` — both must agree or clicks land in the wrong space.
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { Tool } from "@modelcontextprotocol/sdk/types.js";
|
|
14
|
+
|
|
15
|
+
import type { CoordinateMode } from "./types.js";
|
|
16
|
+
|
|
17
|
+
// See packages/desktop/computer-use-mcp/COORDINATES.md before touching any
|
|
18
|
+
// model-facing coordinate text. Chrome's browserTools.ts:143 is the reference
|
|
19
|
+
// phrasing — "pixels from the left edge", no geometry, no number to do math with.
|
|
20
|
+
const COORD_DESC: Record<CoordinateMode, { x: string; y: string }> = {
|
|
21
|
+
pixels: {
|
|
22
|
+
x: "Horizontal pixel position read directly from the most recent screenshot image, measured from the left edge. The server handles all scaling.",
|
|
23
|
+
y: "Vertical pixel position read directly from the most recent screenshot image, measured from the top edge. The server handles all scaling.",
|
|
24
|
+
},
|
|
25
|
+
normalized_0_100: {
|
|
26
|
+
x: "Horizontal position as a percentage of screen width, 0.0–100.0 (0 = left edge, 100 = right edge).",
|
|
27
|
+
y: "Vertical position as a percentage of screen height, 0.0–100.0 (0 = top edge, 100 = bottom edge).",
|
|
28
|
+
},
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
const FRONTMOST_GATE_DESC =
|
|
32
|
+
"The frontmost application must be in the session allowlist at the time of this call, or this tool returns an error and does nothing.";
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Item schema for the `actions` array in `computer_batch`, `teach_step`, and
|
|
36
|
+
* `teach_batch`. All three dispatch through the same `dispatchAction` path
|
|
37
|
+
* with the same validation — keep this enum in sync with `BATCHABLE_ACTIONS`
|
|
38
|
+
* in toolCalls.ts.
|
|
39
|
+
*/
|
|
40
|
+
const BATCH_ACTION_ITEM_SCHEMA = {
|
|
41
|
+
type: "object",
|
|
42
|
+
properties: {
|
|
43
|
+
action: {
|
|
44
|
+
type: "string",
|
|
45
|
+
enum: [
|
|
46
|
+
"key",
|
|
47
|
+
"type",
|
|
48
|
+
"mouse_move",
|
|
49
|
+
"left_click",
|
|
50
|
+
"left_click_drag",
|
|
51
|
+
"right_click",
|
|
52
|
+
"middle_click",
|
|
53
|
+
"double_click",
|
|
54
|
+
"triple_click",
|
|
55
|
+
"scroll",
|
|
56
|
+
"hold_key",
|
|
57
|
+
"screenshot",
|
|
58
|
+
"cursor_position",
|
|
59
|
+
"left_mouse_down",
|
|
60
|
+
"left_mouse_up",
|
|
61
|
+
"wait",
|
|
62
|
+
],
|
|
63
|
+
description: "The action to perform.",
|
|
64
|
+
},
|
|
65
|
+
coordinate: {
|
|
66
|
+
type: "array",
|
|
67
|
+
items: { type: "number" },
|
|
68
|
+
minItems: 2,
|
|
69
|
+
maxItems: 2,
|
|
70
|
+
description:
|
|
71
|
+
"(x, y) for click/mouse_move/scroll/left_click_drag end point.",
|
|
72
|
+
},
|
|
73
|
+
start_coordinate: {
|
|
74
|
+
type: "array",
|
|
75
|
+
items: { type: "number" },
|
|
76
|
+
minItems: 2,
|
|
77
|
+
maxItems: 2,
|
|
78
|
+
description:
|
|
79
|
+
"(x, y) drag start — left_click_drag only. Omit to drag from current cursor.",
|
|
80
|
+
},
|
|
81
|
+
text: {
|
|
82
|
+
type: "string",
|
|
83
|
+
description:
|
|
84
|
+
"For type: the text. For key/hold_key: the chord string. For click/scroll: modifier keys to hold.",
|
|
85
|
+
},
|
|
86
|
+
scroll_direction: {
|
|
87
|
+
type: "string",
|
|
88
|
+
enum: ["up", "down", "left", "right"],
|
|
89
|
+
},
|
|
90
|
+
scroll_amount: { type: "integer", minimum: 0, maximum: 100 },
|
|
91
|
+
duration: {
|
|
92
|
+
type: "number",
|
|
93
|
+
description: "Seconds (0–100). For hold_key/wait.",
|
|
94
|
+
},
|
|
95
|
+
repeat: {
|
|
96
|
+
type: "integer",
|
|
97
|
+
minimum: 1,
|
|
98
|
+
maximum: 100,
|
|
99
|
+
description: "For key: repeat count.",
|
|
100
|
+
},
|
|
101
|
+
},
|
|
102
|
+
required: ["action"],
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
/**
|
|
106
|
+
* Build the tool list. Parameterized by capabilities and coordinate mode so
|
|
107
|
+
* descriptions are honest and unambiguous (plan §1 — "Unfiltered + honest").
|
|
108
|
+
*
|
|
109
|
+
* `coordinateMode` MUST match what the host passes to `scaleCoord` at tool-
|
|
110
|
+
* -call time. Both should read the same frozen-at-load gate constant.
|
|
111
|
+
*
|
|
112
|
+
* `installedAppNames` — optional pre-sanitized list of app display names to
|
|
113
|
+
* enumerate in the `request_access` description. The caller is responsible
|
|
114
|
+
* for sanitization (length cap, character allowlist, sort, count cap) —
|
|
115
|
+
* this function just splices the list into the description verbatim. Omit
|
|
116
|
+
* to fall back to the generic "display names or bundle IDs" wording.
|
|
117
|
+
*/
|
|
118
|
+
export function buildComputerUseTools(
|
|
119
|
+
caps: {
|
|
120
|
+
screenshotFiltering: "native" | "none";
|
|
121
|
+
platform: "darwin" | "win32";
|
|
122
|
+
/** Include request_teach_access + teach_step. Read once at server construction. */
|
|
123
|
+
teachMode?: boolean;
|
|
124
|
+
},
|
|
125
|
+
coordinateMode: CoordinateMode,
|
|
126
|
+
installedAppNames?: string[],
|
|
127
|
+
): Tool[] {
|
|
128
|
+
const coord = COORD_DESC[coordinateMode];
|
|
129
|
+
|
|
130
|
+
// Shared hint suffix for BOTH request_access and request_teach_access —
|
|
131
|
+
// they use the same resolveRequestedApps path, so the model should get
|
|
132
|
+
// the same enumeration for both.
|
|
133
|
+
const installedAppsHint =
|
|
134
|
+
installedAppNames && installedAppNames.length > 0
|
|
135
|
+
? ` Available applications on this machine: ${installedAppNames.join(", ")}.`
|
|
136
|
+
: "";
|
|
137
|
+
|
|
138
|
+
// [x, y]` tuple — param shape for all
|
|
139
|
+
// click/move/scroll tools.
|
|
140
|
+
const coordinateTuple = {
|
|
141
|
+
type: "array",
|
|
142
|
+
items: { type: "number" },
|
|
143
|
+
minItems: 2,
|
|
144
|
+
maxItems: 2,
|
|
145
|
+
description: `(x, y): ${coord.x}`,
|
|
146
|
+
};
|
|
147
|
+
// Modifier hold during click. Shared across all 5 click variants.
|
|
148
|
+
const clickModifierText = {
|
|
149
|
+
type: "string",
|
|
150
|
+
description:
|
|
151
|
+
'Modifier keys to hold during the click (e.g. "shift", "ctrl+shift"). Supports the same syntax as the key tool.',
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
const screenshotDesc =
|
|
155
|
+
caps.screenshotFiltering === "native"
|
|
156
|
+
? "Take a screenshot of the primary display. Applications not in the session allowlist are excluded at the compositor level — only granted apps and the desktop are visible."
|
|
157
|
+
: "Take a screenshot of the primary display. On this platform, screenshots are NOT filtered — all open windows are visible. Input actions targeting apps not in the session allowlist are rejected.";
|
|
158
|
+
|
|
159
|
+
return [
|
|
160
|
+
{
|
|
161
|
+
name: "request_access",
|
|
162
|
+
description:
|
|
163
|
+
"Request user permission to control a set of applications for this session. Must be called before any other tool in this server. " +
|
|
164
|
+
"The user sees a single dialog listing all requested apps and either allows the whole set or denies it. " +
|
|
165
|
+
"Call this again mid-session to add more apps; previously granted apps remain granted. " +
|
|
166
|
+
"Returns the granted apps, denied apps, and screenshot filtering capability.",
|
|
167
|
+
inputSchema: {
|
|
168
|
+
type: "object" as const,
|
|
169
|
+
properties: {
|
|
170
|
+
apps: {
|
|
171
|
+
type: "array",
|
|
172
|
+
items: { type: "string" },
|
|
173
|
+
description:
|
|
174
|
+
"Application display names (e.g. \"Slack\", \"Calendar\") or bundle identifiers (e.g. \"com.tinyspeck.slackmacgap\"). Display names are resolved case-insensitively against installed apps." +
|
|
175
|
+
installedAppsHint,
|
|
176
|
+
},
|
|
177
|
+
reason: {
|
|
178
|
+
type: "string",
|
|
179
|
+
description:
|
|
180
|
+
"One-sentence explanation shown to the user in the approval dialog. Explain the task, not the mechanism.",
|
|
181
|
+
},
|
|
182
|
+
clipboardRead: {
|
|
183
|
+
type: "boolean",
|
|
184
|
+
description:
|
|
185
|
+
"Also request permission to read the user's clipboard (separate checkbox in the dialog).",
|
|
186
|
+
},
|
|
187
|
+
clipboardWrite: {
|
|
188
|
+
type: "boolean",
|
|
189
|
+
description:
|
|
190
|
+
"Also request permission to write the user's clipboard. When granted, multi-line `type` calls use the clipboard fast path.",
|
|
191
|
+
},
|
|
192
|
+
systemKeyCombos: {
|
|
193
|
+
type: "boolean",
|
|
194
|
+
description:
|
|
195
|
+
"Also request permission to send system-level key combos (quit app, switch app, lock screen). Without this, those specific combos are blocked.",
|
|
196
|
+
},
|
|
197
|
+
},
|
|
198
|
+
required: ["apps", "reason"],
|
|
199
|
+
},
|
|
200
|
+
},
|
|
201
|
+
|
|
202
|
+
{
|
|
203
|
+
name: "screenshot",
|
|
204
|
+
description:
|
|
205
|
+
screenshotDesc +
|
|
206
|
+
" Returns an error if the allowlist is empty. The returned image is what subsequent click coordinates are relative to.",
|
|
207
|
+
inputSchema: {
|
|
208
|
+
type: "object" as const,
|
|
209
|
+
properties: {
|
|
210
|
+
save_to_disk: {
|
|
211
|
+
type: "boolean",
|
|
212
|
+
description:
|
|
213
|
+
"Save the image to disk so it can be attached to a message for the user. Returns the saved path in the tool result. Only set this when you intend to share the image — screenshots you're just looking at don't need saving.",
|
|
214
|
+
},
|
|
215
|
+
},
|
|
216
|
+
required: [],
|
|
217
|
+
},
|
|
218
|
+
},
|
|
219
|
+
|
|
220
|
+
{
|
|
221
|
+
name: "zoom",
|
|
222
|
+
description:
|
|
223
|
+
"Take a higher-resolution screenshot of a specific region of the last full-screen screenshot. Use this liberally to inspect small text, button labels, or fine UI details that are hard to read in the downsampled full-screen image. " +
|
|
224
|
+
"IMPORTANT: Coordinates in subsequent click calls always refer to the full-screen screenshot, never the zoomed image. This tool is read-only for inspecting detail.",
|
|
225
|
+
inputSchema: {
|
|
226
|
+
type: "object" as const,
|
|
227
|
+
properties: {
|
|
228
|
+
region: {
|
|
229
|
+
type: "array",
|
|
230
|
+
items: { type: "integer" },
|
|
231
|
+
minItems: 4,
|
|
232
|
+
maxItems: 4,
|
|
233
|
+
description:
|
|
234
|
+
"(x0, y0, x1, y1): Rectangle to zoom into, in the coordinate space of the most recent full-screen screenshot. x0,y0 = top-left, x1,y1 = bottom-right.",
|
|
235
|
+
},
|
|
236
|
+
save_to_disk: {
|
|
237
|
+
type: "boolean",
|
|
238
|
+
description:
|
|
239
|
+
"Save the image to disk so it can be attached to a message for the user. Returns the saved path in the tool result. Only set this when you intend to share the image.",
|
|
240
|
+
},
|
|
241
|
+
},
|
|
242
|
+
required: ["region"],
|
|
243
|
+
},
|
|
244
|
+
},
|
|
245
|
+
|
|
246
|
+
{
|
|
247
|
+
name: "left_click",
|
|
248
|
+
description: `Left-click at the given coordinates. ${FRONTMOST_GATE_DESC}`,
|
|
249
|
+
inputSchema: {
|
|
250
|
+
type: "object" as const,
|
|
251
|
+
properties: {
|
|
252
|
+
coordinate: coordinateTuple,
|
|
253
|
+
text: clickModifierText,
|
|
254
|
+
},
|
|
255
|
+
required: ["coordinate"],
|
|
256
|
+
},
|
|
257
|
+
},
|
|
258
|
+
|
|
259
|
+
{
|
|
260
|
+
name: "double_click",
|
|
261
|
+
description: `Double-click at the given coordinates. Selects a word in most text editors. ${FRONTMOST_GATE_DESC}`,
|
|
262
|
+
inputSchema: {
|
|
263
|
+
type: "object" as const,
|
|
264
|
+
properties: {
|
|
265
|
+
coordinate: coordinateTuple,
|
|
266
|
+
text: clickModifierText,
|
|
267
|
+
},
|
|
268
|
+
required: ["coordinate"],
|
|
269
|
+
},
|
|
270
|
+
},
|
|
271
|
+
|
|
272
|
+
{
|
|
273
|
+
name: "triple_click",
|
|
274
|
+
description: `Triple-click at the given coordinates. Selects a line in most text editors. ${FRONTMOST_GATE_DESC}`,
|
|
275
|
+
inputSchema: {
|
|
276
|
+
type: "object" as const,
|
|
277
|
+
properties: {
|
|
278
|
+
coordinate: coordinateTuple,
|
|
279
|
+
text: clickModifierText,
|
|
280
|
+
},
|
|
281
|
+
required: ["coordinate"],
|
|
282
|
+
},
|
|
283
|
+
},
|
|
284
|
+
|
|
285
|
+
{
|
|
286
|
+
name: "right_click",
|
|
287
|
+
description: `Right-click at the given coordinates. Opens a context menu in most applications. ${FRONTMOST_GATE_DESC}`,
|
|
288
|
+
inputSchema: {
|
|
289
|
+
type: "object" as const,
|
|
290
|
+
properties: {
|
|
291
|
+
coordinate: coordinateTuple,
|
|
292
|
+
text: clickModifierText,
|
|
293
|
+
},
|
|
294
|
+
required: ["coordinate"],
|
|
295
|
+
},
|
|
296
|
+
},
|
|
297
|
+
|
|
298
|
+
{
|
|
299
|
+
name: "middle_click",
|
|
300
|
+
description: `Middle-click (scroll-wheel click) at the given coordinates. ${FRONTMOST_GATE_DESC}`,
|
|
301
|
+
inputSchema: {
|
|
302
|
+
type: "object" as const,
|
|
303
|
+
properties: {
|
|
304
|
+
coordinate: coordinateTuple,
|
|
305
|
+
text: clickModifierText,
|
|
306
|
+
},
|
|
307
|
+
required: ["coordinate"],
|
|
308
|
+
},
|
|
309
|
+
},
|
|
310
|
+
|
|
311
|
+
{
|
|
312
|
+
name: "type",
|
|
313
|
+
description: `Type text into whatever currently has keyboard focus. ${FRONTMOST_GATE_DESC} Newlines are supported. For keyboard shortcuts use \`key\` instead.`,
|
|
314
|
+
inputSchema: {
|
|
315
|
+
type: "object" as const,
|
|
316
|
+
properties: {
|
|
317
|
+
text: { type: "string", description: "Text to type." },
|
|
318
|
+
},
|
|
319
|
+
required: ["text"],
|
|
320
|
+
},
|
|
321
|
+
},
|
|
322
|
+
|
|
323
|
+
{
|
|
324
|
+
name: "key",
|
|
325
|
+
description:
|
|
326
|
+
`Press a key or key combination (e.g. "return", "escape", "cmd+a", "ctrl+shift+tab"). ${FRONTMOST_GATE_DESC} ` +
|
|
327
|
+
"System-level combos (quit app, switch app, lock screen) require the `systemKeyCombos` grant — without it they return an error. All other combos work.",
|
|
328
|
+
inputSchema: {
|
|
329
|
+
type: "object" as const,
|
|
330
|
+
properties: {
|
|
331
|
+
text: {
|
|
332
|
+
type: "string",
|
|
333
|
+
description: 'Modifiers joined with "+", e.g. "cmd+shift+a".',
|
|
334
|
+
},
|
|
335
|
+
repeat: {
|
|
336
|
+
type: "integer",
|
|
337
|
+
minimum: 1,
|
|
338
|
+
maximum: 100,
|
|
339
|
+
description: "Number of times to repeat the key press. Default is 1.",
|
|
340
|
+
},
|
|
341
|
+
},
|
|
342
|
+
required: ["text"],
|
|
343
|
+
},
|
|
344
|
+
},
|
|
345
|
+
|
|
346
|
+
{
|
|
347
|
+
name: "scroll",
|
|
348
|
+
description: `Scroll at the given coordinates. ${FRONTMOST_GATE_DESC}`,
|
|
349
|
+
inputSchema: {
|
|
350
|
+
type: "object" as const,
|
|
351
|
+
properties: {
|
|
352
|
+
coordinate: coordinateTuple,
|
|
353
|
+
scroll_direction: {
|
|
354
|
+
type: "string",
|
|
355
|
+
enum: ["up", "down", "left", "right"],
|
|
356
|
+
description: "Direction to scroll.",
|
|
357
|
+
},
|
|
358
|
+
scroll_amount: {
|
|
359
|
+
type: "integer",
|
|
360
|
+
minimum: 0,
|
|
361
|
+
maximum: 100,
|
|
362
|
+
description: "Number of scroll ticks.",
|
|
363
|
+
},
|
|
364
|
+
},
|
|
365
|
+
required: ["coordinate", "scroll_direction", "scroll_amount"],
|
|
366
|
+
},
|
|
367
|
+
},
|
|
368
|
+
|
|
369
|
+
{
|
|
370
|
+
name: "left_click_drag",
|
|
371
|
+
description: `Press, move to target, and release. ${FRONTMOST_GATE_DESC}`,
|
|
372
|
+
inputSchema: {
|
|
373
|
+
type: "object" as const,
|
|
374
|
+
properties: {
|
|
375
|
+
coordinate: {
|
|
376
|
+
...coordinateTuple,
|
|
377
|
+
description: `(x, y) end point: ${coord.x}`,
|
|
378
|
+
},
|
|
379
|
+
start_coordinate: {
|
|
380
|
+
...coordinateTuple,
|
|
381
|
+
description: `(x, y) start point. If omitted, drags from the current cursor position. ${coord.x}`,
|
|
382
|
+
},
|
|
383
|
+
},
|
|
384
|
+
required: ["coordinate"],
|
|
385
|
+
},
|
|
386
|
+
},
|
|
387
|
+
|
|
388
|
+
{
|
|
389
|
+
name: "mouse_move",
|
|
390
|
+
description: `Move the mouse cursor without clicking. Useful for triggering hover states. ${FRONTMOST_GATE_DESC}`,
|
|
391
|
+
inputSchema: {
|
|
392
|
+
type: "object" as const,
|
|
393
|
+
properties: {
|
|
394
|
+
coordinate: coordinateTuple,
|
|
395
|
+
},
|
|
396
|
+
required: ["coordinate"],
|
|
397
|
+
},
|
|
398
|
+
},
|
|
399
|
+
|
|
400
|
+
{
|
|
401
|
+
name: "open_application",
|
|
402
|
+
description:
|
|
403
|
+
"Bring an application to the front, launching it if necessary. The target application must already be in the session allowlist — call request_access first.",
|
|
404
|
+
inputSchema: {
|
|
405
|
+
type: "object" as const,
|
|
406
|
+
properties: {
|
|
407
|
+
app: {
|
|
408
|
+
type: "string",
|
|
409
|
+
description:
|
|
410
|
+
"Display name (e.g. \"Slack\") or bundle identifier (e.g. \"com.tinyspeck.slackmacgap\").",
|
|
411
|
+
},
|
|
412
|
+
},
|
|
413
|
+
required: ["app"],
|
|
414
|
+
},
|
|
415
|
+
},
|
|
416
|
+
|
|
417
|
+
{
|
|
418
|
+
name: "switch_display",
|
|
419
|
+
description:
|
|
420
|
+
"Switch which monitor subsequent screenshots capture. Use this when the " +
|
|
421
|
+
"application you need is on a different monitor than the one shown. " +
|
|
422
|
+
"The screenshot tool tells you which monitor it captured and lists " +
|
|
423
|
+
"other attached monitors by name — pass one of those names here. " +
|
|
424
|
+
"After switching, call screenshot to see the new monitor. " +
|
|
425
|
+
'Pass "auto" to return to automatic monitor selection.',
|
|
426
|
+
inputSchema: {
|
|
427
|
+
type: "object" as const,
|
|
428
|
+
properties: {
|
|
429
|
+
display: {
|
|
430
|
+
type: "string",
|
|
431
|
+
description:
|
|
432
|
+
'Monitor name from the screenshot note (e.g. "Built-in Retina Display", ' +
|
|
433
|
+
'"LG UltraFine"), or "auto" to re-enable automatic selection.',
|
|
434
|
+
},
|
|
435
|
+
},
|
|
436
|
+
required: ["display"],
|
|
437
|
+
},
|
|
438
|
+
},
|
|
439
|
+
|
|
440
|
+
{
|
|
441
|
+
name: "list_granted_applications",
|
|
442
|
+
description:
|
|
443
|
+
"List the applications currently in the session allowlist, plus the active grant flags and coordinate mode. No side effects.",
|
|
444
|
+
inputSchema: {
|
|
445
|
+
type: "object" as const,
|
|
446
|
+
properties: {},
|
|
447
|
+
required: [],
|
|
448
|
+
},
|
|
449
|
+
},
|
|
450
|
+
|
|
451
|
+
{
|
|
452
|
+
name: "read_clipboard",
|
|
453
|
+
description:
|
|
454
|
+
"Read the current clipboard contents as text. Requires the `clipboardRead` grant.",
|
|
455
|
+
inputSchema: {
|
|
456
|
+
type: "object" as const,
|
|
457
|
+
properties: {},
|
|
458
|
+
required: [],
|
|
459
|
+
},
|
|
460
|
+
},
|
|
461
|
+
|
|
462
|
+
{
|
|
463
|
+
name: "write_clipboard",
|
|
464
|
+
description:
|
|
465
|
+
"Write text to the clipboard. Requires the `clipboardWrite` grant.",
|
|
466
|
+
inputSchema: {
|
|
467
|
+
type: "object" as const,
|
|
468
|
+
properties: {
|
|
469
|
+
text: { type: "string" },
|
|
470
|
+
},
|
|
471
|
+
required: ["text"],
|
|
472
|
+
},
|
|
473
|
+
},
|
|
474
|
+
|
|
475
|
+
{
|
|
476
|
+
name: "wait",
|
|
477
|
+
description: "Wait for a specified duration.",
|
|
478
|
+
inputSchema: {
|
|
479
|
+
type: "object" as const,
|
|
480
|
+
properties: {
|
|
481
|
+
duration: {
|
|
482
|
+
type: "number",
|
|
483
|
+
description: "Duration in seconds (0–100).",
|
|
484
|
+
},
|
|
485
|
+
},
|
|
486
|
+
required: ["duration"],
|
|
487
|
+
},
|
|
488
|
+
},
|
|
489
|
+
|
|
490
|
+
{
|
|
491
|
+
name: "cursor_position",
|
|
492
|
+
description:
|
|
493
|
+
"Get the current mouse cursor position. Returns image-pixel coordinates relative to the most recent screenshot, or logical points if no screenshot has been taken.",
|
|
494
|
+
inputSchema: {
|
|
495
|
+
type: "object" as const,
|
|
496
|
+
properties: {},
|
|
497
|
+
required: [],
|
|
498
|
+
},
|
|
499
|
+
},
|
|
500
|
+
|
|
501
|
+
{
|
|
502
|
+
name: "hold_key",
|
|
503
|
+
description:
|
|
504
|
+
`Press and hold a key or key combination for the specified duration, then release. ${FRONTMOST_GATE_DESC} ` +
|
|
505
|
+
"System-level combos require the `systemKeyCombos` grant.",
|
|
506
|
+
inputSchema: {
|
|
507
|
+
type: "object" as const,
|
|
508
|
+
properties: {
|
|
509
|
+
text: {
|
|
510
|
+
type: "string",
|
|
511
|
+
description: 'Key or chord to hold, e.g. "space", "shift+down".',
|
|
512
|
+
},
|
|
513
|
+
duration: {
|
|
514
|
+
type: "number",
|
|
515
|
+
description: "Duration in seconds (0–100).",
|
|
516
|
+
},
|
|
517
|
+
},
|
|
518
|
+
required: ["text", "duration"],
|
|
519
|
+
},
|
|
520
|
+
},
|
|
521
|
+
|
|
522
|
+
{
|
|
523
|
+
name: "left_mouse_down",
|
|
524
|
+
description:
|
|
525
|
+
`Press the left mouse button at the current cursor position and leave it held. ${FRONTMOST_GATE_DESC} ` +
|
|
526
|
+
"Use mouse_move first to position the cursor. Call left_mouse_up to release. Errors if the button is already held.",
|
|
527
|
+
inputSchema: {
|
|
528
|
+
type: "object" as const,
|
|
529
|
+
properties: {},
|
|
530
|
+
required: [],
|
|
531
|
+
},
|
|
532
|
+
},
|
|
533
|
+
|
|
534
|
+
{
|
|
535
|
+
name: "left_mouse_up",
|
|
536
|
+
description:
|
|
537
|
+
`Release the left mouse button at the current cursor position. ${FRONTMOST_GATE_DESC} ` +
|
|
538
|
+
"Pairs with left_mouse_down. Safe to call even if the button is not currently held.",
|
|
539
|
+
inputSchema: {
|
|
540
|
+
type: "object" as const,
|
|
541
|
+
properties: {},
|
|
542
|
+
required: [],
|
|
543
|
+
},
|
|
544
|
+
},
|
|
545
|
+
|
|
546
|
+
{
|
|
547
|
+
name: "computer_batch",
|
|
548
|
+
description:
|
|
549
|
+
"Execute a sequence of actions in ONE tool call. Each individual tool call requires a model→API round trip (seconds); " +
|
|
550
|
+
"batching a predictable sequence eliminates all but one. Use this whenever you can predict the outcome of several actions ahead — " +
|
|
551
|
+
"e.g. click a field, type into it, press Return. Actions execute sequentially and stop on the first error. " +
|
|
552
|
+
`${FRONTMOST_GATE_DESC} The frontmost check runs before EACH action inside the batch — if an action opens a non-allowed app, the next action's gate fires and the batch stops there. ` +
|
|
553
|
+
"Mid-batch screenshot actions are allowed for inspection but coordinates in subsequent clicks always refer to the PRE-BATCH full-screen screenshot.",
|
|
554
|
+
inputSchema: {
|
|
555
|
+
type: "object" as const,
|
|
556
|
+
properties: {
|
|
557
|
+
actions: {
|
|
558
|
+
type: "array",
|
|
559
|
+
minItems: 1,
|
|
560
|
+
items: BATCH_ACTION_ITEM_SCHEMA,
|
|
561
|
+
description:
|
|
562
|
+
'List of actions. Example: [{"action":"left_click","coordinate":[100,200]},{"action":"type","text":"hello"},{"action":"key","text":"Return"}]',
|
|
563
|
+
},
|
|
564
|
+
},
|
|
565
|
+
required: ["actions"],
|
|
566
|
+
},
|
|
567
|
+
},
|
|
568
|
+
|
|
569
|
+
...(caps.teachMode ? buildTeachTools(coord, installedAppsHint) : []),
|
|
570
|
+
];
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
/**
|
|
574
|
+
* Teach-mode tools. Split out so the spread above stays a single expression;
|
|
575
|
+
* takes `coord` so `teach_step.anchor`'s description uses the same
|
|
576
|
+
* frozen coordinate-mode phrasing as click coords, and `installedAppsHint`
|
|
577
|
+
* so `request_teach_access.apps` gets the same enumeration as
|
|
578
|
+
* `request_access.apps` (same resolution path → same hint).
|
|
579
|
+
*/
|
|
580
|
+
function buildTeachTools(
|
|
581
|
+
coord: { x: string; y: string },
|
|
582
|
+
installedAppsHint: string,
|
|
583
|
+
): Tool[] {
|
|
584
|
+
// Shared between teach_step (top-level) and teach_batch (inside steps[]
|
|
585
|
+
// items). Depends on coord, so it lives inside this factory.
|
|
586
|
+
const teachStepProperties = {
|
|
587
|
+
explanation: {
|
|
588
|
+
type: "string",
|
|
589
|
+
description:
|
|
590
|
+
"Tooltip body text. Explain what the user is looking at and why it matters. " +
|
|
591
|
+
"This is the ONLY place the user sees your words — be complete but concise.",
|
|
592
|
+
},
|
|
593
|
+
next_preview: {
|
|
594
|
+
type: "string",
|
|
595
|
+
description:
|
|
596
|
+
"One line describing exactly what will happen when the user clicks Next. " +
|
|
597
|
+
'Example: "Next: I\'ll click Create Bucket and type the name." ' +
|
|
598
|
+
"Shown below the explanation in a smaller font.",
|
|
599
|
+
},
|
|
600
|
+
anchor: {
|
|
601
|
+
type: "array",
|
|
602
|
+
items: { type: "number" },
|
|
603
|
+
minItems: 2,
|
|
604
|
+
maxItems: 2,
|
|
605
|
+
description:
|
|
606
|
+
`(x, y) — where the tooltip arrow points. ${coord.x} ` +
|
|
607
|
+
"Omit to center the tooltip with no arrow (for general-context steps).",
|
|
608
|
+
},
|
|
609
|
+
actions: {
|
|
610
|
+
type: "array",
|
|
611
|
+
// Empty allowed — "read this, click Next" steps.
|
|
612
|
+
items: BATCH_ACTION_ITEM_SCHEMA,
|
|
613
|
+
description:
|
|
614
|
+
"Actions to execute when the user clicks Next. Same item schema as computer_batch.actions. " +
|
|
615
|
+
"Empty array is valid for purely explanatory steps. Actions run sequentially and stop on first error.",
|
|
616
|
+
},
|
|
617
|
+
} as const;
|
|
618
|
+
|
|
619
|
+
return [
|
|
620
|
+
{
|
|
621
|
+
name: "request_teach_access",
|
|
622
|
+
description:
|
|
623
|
+
"Request permission to guide the user through a task step-by-step with on-screen tooltips. " +
|
|
624
|
+
"Use this INSTEAD OF request_access when the user wants to LEARN how to do something " +
|
|
625
|
+
'(phrases like "teach me", "walk me through", "show me how", "help me learn"). ' +
|
|
626
|
+
"On approval the main Gclm Code window hides and a fullscreen tooltip overlay appears. " +
|
|
627
|
+
"You then call teach_step repeatedly; each call shows one tooltip and waits for the user to click Next. " +
|
|
628
|
+
"Same app-allowlist semantics as request_access, but no clipboard/system-key flags. " +
|
|
629
|
+
"Teach mode ends automatically when your turn ends.",
|
|
630
|
+
inputSchema: {
|
|
631
|
+
type: "object" as const,
|
|
632
|
+
properties: {
|
|
633
|
+
apps: {
|
|
634
|
+
type: "array",
|
|
635
|
+
items: { type: "string" },
|
|
636
|
+
description:
|
|
637
|
+
'Application display names (e.g. "Slack", "Calendar") or bundle identifiers. Resolved case-insensitively against installed apps.' +
|
|
638
|
+
installedAppsHint,
|
|
639
|
+
},
|
|
640
|
+
reason: {
|
|
641
|
+
type: "string",
|
|
642
|
+
description:
|
|
643
|
+
'What you will be teaching. Shown in the approval dialog as "Gclm Code wants to guide you through {reason}". Keep it short and task-focused.',
|
|
644
|
+
},
|
|
645
|
+
},
|
|
646
|
+
required: ["apps", "reason"],
|
|
647
|
+
},
|
|
648
|
+
},
|
|
649
|
+
|
|
650
|
+
{
|
|
651
|
+
name: "teach_step",
|
|
652
|
+
description:
|
|
653
|
+
"Show one guided-tour tooltip and wait for the user to click Next. On Next, execute the actions, " +
|
|
654
|
+
"take a fresh screenshot, and return both — you do NOT need a separate screenshot call between steps. " +
|
|
655
|
+
"The returned image shows the state after your actions ran; anchor the next teach_step against it. " +
|
|
656
|
+
"IMPORTANT — the user only sees the tooltip during teach mode. Put ALL narration in `explanation`. " +
|
|
657
|
+
"Text you emit outside teach_step calls is NOT visible until teach mode ends. " +
|
|
658
|
+
"Pack as many actions as possible into each step's `actions` array — the user waits through " +
|
|
659
|
+
"the whole round trip between clicks, so one step that fills a form beats five steps that fill one field each. " +
|
|
660
|
+
"Returns {exited:true} if the user clicks Exit — do not call teach_step again after that. " +
|
|
661
|
+
"Take an initial screenshot before your FIRST teach_step to anchor it.",
|
|
662
|
+
inputSchema: {
|
|
663
|
+
type: "object" as const,
|
|
664
|
+
properties: teachStepProperties,
|
|
665
|
+
required: ["explanation", "next_preview", "actions"],
|
|
666
|
+
},
|
|
667
|
+
},
|
|
668
|
+
|
|
669
|
+
{
|
|
670
|
+
name: "teach_batch",
|
|
671
|
+
description:
|
|
672
|
+
"Queue multiple teach steps in one tool call. Parallels computer_batch: " +
|
|
673
|
+
"N steps → one model↔API round trip instead of N. Each step still shows a tooltip " +
|
|
674
|
+
"and waits for the user's Next click, but YOU aren't waiting for a round trip between steps. " +
|
|
675
|
+
"You can call teach_batch multiple times in one tour — treat each batch as one predictable " +
|
|
676
|
+
"SEGMENT (typically: all the steps on one page). The returned screenshot shows the state " +
|
|
677
|
+
"after the batch's final actions; anchor the NEXT teach_batch against it. " +
|
|
678
|
+
"WITHIN a batch, all anchors and click coordinates refer to the PRE-BATCH screenshot " +
|
|
679
|
+
"(same invariant as computer_batch) — for steps 2+ in a batch, either omit anchor " +
|
|
680
|
+
"(centered tooltip) or target elements you know won't have moved. " +
|
|
681
|
+
"Good pattern: batch 5 tooltips on page A (last step navigates) → read returned screenshot → " +
|
|
682
|
+
"batch 3 tooltips on page B → done. " +
|
|
683
|
+
"Returns {exited:true, stepsCompleted:N} if the user clicks Exit — do NOT call again after that; " +
|
|
684
|
+
"{stepsCompleted, stepFailed, ...} if an action errors mid-batch; " +
|
|
685
|
+
"otherwise {stepsCompleted, results:[...]} plus a final screenshot. " +
|
|
686
|
+
"Fall back to individual teach_step calls when you need to react to each intermediate screenshot.",
|
|
687
|
+
inputSchema: {
|
|
688
|
+
type: "object" as const,
|
|
689
|
+
properties: {
|
|
690
|
+
steps: {
|
|
691
|
+
type: "array",
|
|
692
|
+
minItems: 1,
|
|
693
|
+
items: {
|
|
694
|
+
type: "object",
|
|
695
|
+
properties: teachStepProperties,
|
|
696
|
+
required: ["explanation", "next_preview", "actions"],
|
|
697
|
+
},
|
|
698
|
+
description:
|
|
699
|
+
"Ordered steps. Validated upfront — a typo in step 5 errors before any tooltip shows.",
|
|
700
|
+
},
|
|
701
|
+
},
|
|
702
|
+
required: ["steps"],
|
|
703
|
+
},
|
|
704
|
+
},
|
|
705
|
+
];
|
|
706
|
+
}
|