illuma-agents 1.0.43 → 1.0.45
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/DesktopTools.cjs +215 -147
- package/dist/cjs/tools/DesktopTools.cjs.map +1 -1
- package/dist/esm/tools/DesktopTools.mjs +215 -147
- package/dist/esm/tools/DesktopTools.mjs.map +1 -1
- package/dist/types/tools/DesktopTools.d.ts +30 -0
- package/package.json +1 -1
- package/src/tools/DesktopTools.ts +324 -302
|
@@ -1,11 +1,6 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
2
|
import { tool, DynamicStructuredTool } from '@langchain/core/tools';
|
|
3
3
|
|
|
4
|
-
/**
|
|
5
|
-
* Type for tool configuration passed by LangChain runtime
|
|
6
|
-
*/
|
|
7
|
-
type ToolCallConfig = { toolCall?: { id?: string } };
|
|
8
|
-
|
|
9
4
|
/**
|
|
10
5
|
* Desktop tool names - keep in sync with Ranger Desktop Electron app
|
|
11
6
|
* These tools execute locally in the Electron app, NOT on the server
|
|
@@ -26,6 +21,12 @@ export const EDesktopTools = {
|
|
|
26
21
|
CLIPBOARD_WRITE: 'clipboard_write',
|
|
27
22
|
CLIPBOARD_PASTE: 'clipboard_paste',
|
|
28
23
|
WAIT: 'computer_wait',
|
|
24
|
+
// Native UI Automation tools (Windows) - faster and more reliable than screenshot-based
|
|
25
|
+
UI_FIND_ELEMENT: 'ui_find_element',
|
|
26
|
+
UI_CLICK_ELEMENT: 'ui_click_element',
|
|
27
|
+
UI_GET_WINDOW_TREE: 'ui_get_window_tree',
|
|
28
|
+
UI_FIND_BUTTONS: 'ui_find_buttons',
|
|
29
|
+
UI_FIND_INPUTS: 'ui_find_inputs',
|
|
29
30
|
} as const;
|
|
30
31
|
|
|
31
32
|
export type DesktopToolName =
|
|
@@ -65,6 +66,23 @@ export interface DesktopActionResult {
|
|
|
65
66
|
};
|
|
66
67
|
mousePosition?: { x: number; y: number };
|
|
67
68
|
clipboard?: string;
|
|
69
|
+
// UI Automation results
|
|
70
|
+
uiElement?: {
|
|
71
|
+
name: string;
|
|
72
|
+
automationId: string;
|
|
73
|
+
controlType: string;
|
|
74
|
+
boundingRectangle?: { x: number; y: number; width: number; height: number };
|
|
75
|
+
isEnabled: boolean;
|
|
76
|
+
};
|
|
77
|
+
uiElements?: Array<{
|
|
78
|
+
name: string;
|
|
79
|
+
automationId: string;
|
|
80
|
+
controlType: string;
|
|
81
|
+
boundingRectangle?: { x: number; y: number; width: number; height: number };
|
|
82
|
+
}>;
|
|
83
|
+
uiTree?: unknown;
|
|
84
|
+
// Generic data for extended results
|
|
85
|
+
data?: unknown;
|
|
68
86
|
}
|
|
69
87
|
|
|
70
88
|
/**
|
|
@@ -154,6 +172,29 @@ const WaitSchema = z.object({
|
|
|
154
172
|
ms: z.number().describe('Milliseconds to wait'),
|
|
155
173
|
});
|
|
156
174
|
|
|
175
|
+
// ============ Native UI Automation Schemas (Windows) ============
|
|
176
|
+
|
|
177
|
+
const UIFindElementSchema = z.object({
|
|
178
|
+
name: z.string().optional().describe('Element name/label to find (e.g., "Submit", "OK", "File")'),
|
|
179
|
+
automationId: z.string().optional().describe('Automation ID of element (unique identifier)'),
|
|
180
|
+
controlType: z.string().optional().describe('Type of control: Button, Edit, Text, ComboBox, List, Menu, MenuItem, Window, etc.'),
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
const UIClickElementSchema = z.object({
|
|
184
|
+
name: z.string().optional().describe('Element name/label to click'),
|
|
185
|
+
automationId: z.string().optional().describe('Automation ID of element to click'),
|
|
186
|
+
controlType: z.string().optional().describe('Type of control: Button, Edit, etc.'),
|
|
187
|
+
clickType: z.enum(['left', 'right', 'double']).optional().describe('Click type (default: left)'),
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
const UIGetWindowTreeSchema = z.object({
|
|
191
|
+
maxDepth: z.number().optional().describe('Maximum depth to traverse (default: 3)'),
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
const UIFindButtonsSchema = z.object({});
|
|
195
|
+
|
|
196
|
+
const UIFindInputsSchema = z.object({});
|
|
197
|
+
|
|
157
198
|
/**
|
|
158
199
|
* Desktop tool response interface
|
|
159
200
|
* This is what the Electron app returns after executing the action
|
|
@@ -218,44 +259,51 @@ function formatResultForLLM(
|
|
|
218
259
|
parts.push(`**Clipboard Content:** ${result.clipboard}`);
|
|
219
260
|
}
|
|
220
261
|
|
|
221
|
-
|
|
222
|
-
|
|
262
|
+
// UI Automation results
|
|
263
|
+
if (result.uiElement) {
|
|
264
|
+
const el = result.uiElement;
|
|
265
|
+
parts.push(`**UI Element Found:**`);
|
|
266
|
+
parts.push(` - Name: "${el.name}"`);
|
|
267
|
+
parts.push(` - AutomationId: "${el.automationId}"`);
|
|
268
|
+
parts.push(` - Type: ${el.controlType}`);
|
|
269
|
+
if (el.boundingRectangle) {
|
|
270
|
+
const b = el.boundingRectangle;
|
|
271
|
+
parts.push(` - Bounds: (${b.x}, ${b.y}) ${b.width}x${b.height}`);
|
|
272
|
+
parts.push(` - Center: (${Math.round(b.x + b.width/2)}, ${Math.round(b.y + b.height/2)})`);
|
|
273
|
+
}
|
|
274
|
+
parts.push(` - Enabled: ${el.isEnabled}`);
|
|
223
275
|
}
|
|
224
276
|
|
|
225
|
-
|
|
226
|
-
}
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
config: ToolCallConfig | undefined,
|
|
235
|
-
waitForResult?: DesktopToolCallback
|
|
236
|
-
): Promise<string> {
|
|
237
|
-
const toolCallId = config?.toolCall?.id || `desktop-${Date.now()}`;
|
|
238
|
-
|
|
239
|
-
if (waitForResult) {
|
|
240
|
-
// Server context: wait for actual result from Electron app
|
|
241
|
-
try {
|
|
242
|
-
const result = await waitForResult(action, args, toolCallId);
|
|
243
|
-
return formatResultForLLM(result, action);
|
|
244
|
-
} catch (error) {
|
|
245
|
-
const errorMessage =
|
|
246
|
-
error instanceof Error ? error.message : String(error);
|
|
247
|
-
return `Desktop action "${action}" failed: ${errorMessage}`;
|
|
277
|
+
if (result.uiElements && result.uiElements.length > 0) {
|
|
278
|
+
parts.push(`**UI Elements Found (${result.uiElements.length}):**`);
|
|
279
|
+
for (const el of result.uiElements.slice(0, 20)) { // Limit to 20
|
|
280
|
+
const bounds = el.boundingRectangle ?
|
|
281
|
+
` at (${el.boundingRectangle.x}, ${el.boundingRectangle.y})` : '';
|
|
282
|
+
parts.push(` - [${el.controlType}] "${el.name}"${el.automationId ? ` (id: ${el.automationId})` : ''}${bounds}`);
|
|
283
|
+
}
|
|
284
|
+
if (result.uiElements.length > 20) {
|
|
285
|
+
parts.push(` ... and ${result.uiElements.length - 20} more`);
|
|
248
286
|
}
|
|
249
287
|
}
|
|
250
288
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
289
|
+
if (result.uiTree) {
|
|
290
|
+
parts.push(`**UI Tree:**`);
|
|
291
|
+
parts.push('```json');
|
|
292
|
+
parts.push(JSON.stringify(result.uiTree, null, 2).slice(0, 3000)); // Limit size
|
|
293
|
+
parts.push('```');
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
if (result.data && !result.uiElement && !result.uiElements && !result.uiTree) {
|
|
297
|
+
// Generic data fallback
|
|
298
|
+
parts.push(`**Result:**`);
|
|
299
|
+
parts.push(JSON.stringify(result.data, null, 2).slice(0, 2000));
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (parts.length === 0) {
|
|
303
|
+
parts.push(`Desktop action "${action}" completed successfully.`);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return parts.join('\\n');
|
|
259
307
|
}
|
|
260
308
|
|
|
261
309
|
/**
|
|
@@ -266,275 +314,249 @@ export function createDesktopTools(
|
|
|
266
314
|
options: CreateDesktopToolsOptions = {}
|
|
267
315
|
): DynamicStructuredTool[] {
|
|
268
316
|
const { waitForResult } = options;
|
|
317
|
+
const tools: DynamicStructuredTool[] = [];
|
|
269
318
|
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
args,
|
|
295
|
-
config as ToolCallConfig,
|
|
296
|
-
waitForResult
|
|
297
|
-
);
|
|
298
|
-
},
|
|
299
|
-
{
|
|
300
|
-
name: EDesktopTools.CLICK,
|
|
301
|
-
description:
|
|
302
|
-
'Click the mouse at the specified screen coordinates. Use screenshot first to identify the target location.',
|
|
303
|
-
schema: ClickSchema,
|
|
304
|
-
}
|
|
305
|
-
),
|
|
306
|
-
|
|
307
|
-
// computer_double_click
|
|
308
|
-
tool(
|
|
309
|
-
async (args, config) => {
|
|
310
|
-
return createToolResult(
|
|
311
|
-
EDesktopTools.DOUBLE_CLICK,
|
|
312
|
-
args,
|
|
313
|
-
config as ToolCallConfig,
|
|
314
|
-
waitForResult
|
|
315
|
-
);
|
|
316
|
-
},
|
|
317
|
-
{
|
|
318
|
-
name: EDesktopTools.DOUBLE_CLICK,
|
|
319
|
-
description:
|
|
320
|
-
'Double-click the mouse at the specified screen coordinates.',
|
|
321
|
-
schema: DoubleClickSchema,
|
|
322
|
-
}
|
|
323
|
-
),
|
|
324
|
-
|
|
325
|
-
// computer_right_click
|
|
326
|
-
tool(
|
|
327
|
-
async (args, config) => {
|
|
328
|
-
return createToolResult(
|
|
329
|
-
EDesktopTools.RIGHT_CLICK,
|
|
330
|
-
args,
|
|
331
|
-
config as ToolCallConfig,
|
|
332
|
-
waitForResult
|
|
333
|
-
);
|
|
334
|
-
},
|
|
335
|
-
{
|
|
336
|
-
name: EDesktopTools.RIGHT_CLICK,
|
|
337
|
-
description:
|
|
338
|
-
'Right-click the mouse at the specified screen coordinates to open context menus.',
|
|
339
|
-
schema: RightClickSchema,
|
|
340
|
-
}
|
|
341
|
-
),
|
|
342
|
-
|
|
343
|
-
// computer_type
|
|
344
|
-
tool(
|
|
345
|
-
async (args, config) => {
|
|
346
|
-
return createToolResult(
|
|
347
|
-
EDesktopTools.TYPE,
|
|
348
|
-
args,
|
|
349
|
-
config as ToolCallConfig,
|
|
350
|
-
waitForResult
|
|
351
|
-
);
|
|
352
|
-
},
|
|
353
|
-
{
|
|
354
|
-
name: EDesktopTools.TYPE,
|
|
355
|
-
description:
|
|
356
|
-
'Type text using the keyboard. Make sure the target input field is focused first (use click).',
|
|
357
|
-
schema: TypeSchema,
|
|
358
|
-
}
|
|
359
|
-
),
|
|
360
|
-
|
|
361
|
-
// computer_key
|
|
362
|
-
tool(
|
|
363
|
-
async (args, config) => {
|
|
364
|
-
return createToolResult(
|
|
365
|
-
EDesktopTools.KEY,
|
|
366
|
-
args,
|
|
367
|
-
config as ToolCallConfig,
|
|
368
|
-
waitForResult
|
|
369
|
-
);
|
|
370
|
-
},
|
|
371
|
-
{
|
|
372
|
-
name: EDesktopTools.KEY,
|
|
373
|
-
description:
|
|
374
|
-
'Press a single key on the keyboard (Enter, Tab, Escape, arrow keys, function keys, etc.).',
|
|
375
|
-
schema: KeySchema,
|
|
376
|
-
}
|
|
377
|
-
),
|
|
378
|
-
|
|
379
|
-
// computer_key_combo
|
|
380
|
-
tool(
|
|
381
|
-
async (args, config) => {
|
|
382
|
-
return createToolResult(
|
|
383
|
-
EDesktopTools.KEY_COMBO,
|
|
384
|
-
args,
|
|
385
|
-
config as ToolCallConfig,
|
|
386
|
-
waitForResult
|
|
387
|
-
);
|
|
388
|
-
},
|
|
389
|
-
{
|
|
390
|
-
name: EDesktopTools.KEY_COMBO,
|
|
391
|
-
description:
|
|
392
|
-
'Press a key combination (e.g., Ctrl+C to copy, Ctrl+V to paste, Alt+Tab to switch windows).',
|
|
393
|
-
schema: KeyComboSchema,
|
|
394
|
-
}
|
|
395
|
-
),
|
|
396
|
-
|
|
397
|
-
// computer_scroll
|
|
398
|
-
tool(
|
|
399
|
-
async (args, config) => {
|
|
400
|
-
return createToolResult(
|
|
401
|
-
EDesktopTools.SCROLL,
|
|
402
|
-
args,
|
|
403
|
-
config as ToolCallConfig,
|
|
404
|
-
waitForResult
|
|
405
|
-
);
|
|
406
|
-
},
|
|
407
|
-
{
|
|
408
|
-
name: EDesktopTools.SCROLL,
|
|
409
|
-
description:
|
|
410
|
-
'Scroll at the specified screen coordinates. Use negative deltaY to scroll up, positive to scroll down.',
|
|
411
|
-
schema: ScrollSchema,
|
|
412
|
-
}
|
|
413
|
-
),
|
|
414
|
-
|
|
415
|
-
// computer_drag
|
|
416
|
-
tool(
|
|
417
|
-
async (args, config) => {
|
|
418
|
-
return createToolResult(
|
|
419
|
-
EDesktopTools.DRAG,
|
|
420
|
-
args,
|
|
421
|
-
config as ToolCallConfig,
|
|
422
|
-
waitForResult
|
|
423
|
-
);
|
|
424
|
-
},
|
|
425
|
-
{
|
|
426
|
-
name: EDesktopTools.DRAG,
|
|
427
|
-
description:
|
|
428
|
-
'Drag the mouse from one position to another (for moving windows, selecting text, etc.).',
|
|
429
|
-
schema: DragSchema,
|
|
430
|
-
}
|
|
431
|
-
),
|
|
432
|
-
|
|
433
|
-
// computer_get_active_window
|
|
434
|
-
tool(
|
|
435
|
-
async (_args, config) => {
|
|
436
|
-
return createToolResult(
|
|
437
|
-
EDesktopTools.GET_ACTIVE_WINDOW,
|
|
438
|
-
{},
|
|
439
|
-
config as ToolCallConfig,
|
|
440
|
-
waitForResult
|
|
441
|
-
);
|
|
442
|
-
},
|
|
443
|
-
{
|
|
444
|
-
name: EDesktopTools.GET_ACTIVE_WINDOW,
|
|
445
|
-
description:
|
|
446
|
-
'Get information about the currently active window (title, application name, position, size).',
|
|
447
|
-
schema: GetActiveWindowSchema,
|
|
448
|
-
}
|
|
449
|
-
),
|
|
450
|
-
|
|
451
|
-
// computer_get_mouse_position
|
|
452
|
-
tool(
|
|
453
|
-
async (_args, config) => {
|
|
454
|
-
return createToolResult(
|
|
455
|
-
EDesktopTools.GET_MOUSE_POSITION,
|
|
456
|
-
{},
|
|
457
|
-
config as ToolCallConfig,
|
|
458
|
-
waitForResult
|
|
459
|
-
);
|
|
460
|
-
},
|
|
461
|
-
{
|
|
462
|
-
name: EDesktopTools.GET_MOUSE_POSITION,
|
|
463
|
-
description: 'Get the current mouse cursor position on screen.',
|
|
464
|
-
schema: GetMousePositionSchema,
|
|
465
|
-
}
|
|
466
|
-
),
|
|
467
|
-
|
|
468
|
-
// clipboard_read
|
|
469
|
-
tool(
|
|
470
|
-
async (_args, config) => {
|
|
471
|
-
return createToolResult(
|
|
472
|
-
EDesktopTools.CLIPBOARD_READ,
|
|
473
|
-
{},
|
|
474
|
-
config as ToolCallConfig,
|
|
475
|
-
waitForResult
|
|
476
|
-
);
|
|
477
|
-
},
|
|
478
|
-
{
|
|
479
|
-
name: EDesktopTools.CLIPBOARD_READ,
|
|
480
|
-
description: 'Read the current contents of the system clipboard.',
|
|
481
|
-
schema: ClipboardReadSchema,
|
|
482
|
-
}
|
|
483
|
-
),
|
|
484
|
-
|
|
485
|
-
// clipboard_write
|
|
486
|
-
tool(
|
|
487
|
-
async (args, config) => {
|
|
488
|
-
return createToolResult(
|
|
489
|
-
EDesktopTools.CLIPBOARD_WRITE,
|
|
490
|
-
args,
|
|
491
|
-
config as ToolCallConfig,
|
|
492
|
-
waitForResult
|
|
493
|
-
);
|
|
494
|
-
},
|
|
495
|
-
{
|
|
496
|
-
name: EDesktopTools.CLIPBOARD_WRITE,
|
|
497
|
-
description: 'Write text to the system clipboard.',
|
|
498
|
-
schema: ClipboardWriteSchema,
|
|
319
|
+
/**
|
|
320
|
+
* Helper to create tool function that optionally waits for results
|
|
321
|
+
* The toolCallId is extracted from the RunnableConfig passed by LangChain
|
|
322
|
+
*/
|
|
323
|
+
const createToolFunction = (action: string) => {
|
|
324
|
+
return async (
|
|
325
|
+
args: Record<string, unknown>,
|
|
326
|
+
config?: { toolCall?: { id?: string } }
|
|
327
|
+
): Promise<string> => {
|
|
328
|
+
const toolCallId =
|
|
329
|
+
config?.toolCall?.id ??
|
|
330
|
+
`desktop_${Date.now()}_${Math.random().toString(36).slice(2)}`;
|
|
331
|
+
|
|
332
|
+
// Create marker for Electron app
|
|
333
|
+
const marker: DesktopToolResponse = {
|
|
334
|
+
requiresDesktopExecution: true,
|
|
335
|
+
action,
|
|
336
|
+
args,
|
|
337
|
+
toolCallId,
|
|
338
|
+
};
|
|
339
|
+
|
|
340
|
+
// If no callback, return marker immediately (Electron handles via SSE interception)
|
|
341
|
+
if (!waitForResult) {
|
|
342
|
+
return JSON.stringify(marker);
|
|
499
343
|
}
|
|
500
|
-
),
|
|
501
344
|
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
return
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
);
|
|
511
|
-
},
|
|
512
|
-
{
|
|
513
|
-
name: EDesktopTools.CLIPBOARD_PASTE,
|
|
514
|
-
description:
|
|
515
|
-
'Paste the clipboard contents (equivalent to Ctrl+V). Use clipboard_write first to set the content.',
|
|
516
|
-
schema: ClipboardPasteSchema,
|
|
345
|
+
// With callback: wait for actual results from Electron app
|
|
346
|
+
try {
|
|
347
|
+
const result = await waitForResult(action, args, toolCallId);
|
|
348
|
+
return formatResultForLLM(result, action);
|
|
349
|
+
} catch (error) {
|
|
350
|
+
const errorMessage =
|
|
351
|
+
error instanceof Error ? error.message : String(error);
|
|
352
|
+
return `Desktop action "${action}" failed: ${errorMessage}`;
|
|
517
353
|
}
|
|
518
|
-
|
|
354
|
+
};
|
|
355
|
+
};
|
|
519
356
|
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
357
|
+
// computer_screenshot
|
|
358
|
+
tools.push(
|
|
359
|
+
tool(createToolFunction(EDesktopTools.SCREENSHOT), {
|
|
360
|
+
name: EDesktopTools.SCREENSHOT,
|
|
361
|
+
description:
|
|
362
|
+
'Take a screenshot of the entire screen. Use this to see what is currently displayed on the desktop.',
|
|
363
|
+
schema: ScreenshotSchema,
|
|
364
|
+
})
|
|
365
|
+
);
|
|
366
|
+
|
|
367
|
+
// computer_click
|
|
368
|
+
tools.push(
|
|
369
|
+
tool(createToolFunction(EDesktopTools.CLICK), {
|
|
370
|
+
name: EDesktopTools.CLICK,
|
|
371
|
+
description:
|
|
372
|
+
'Click the mouse at the specified screen coordinates. Use screenshot first to identify the target location.',
|
|
373
|
+
schema: ClickSchema,
|
|
374
|
+
})
|
|
375
|
+
);
|
|
376
|
+
|
|
377
|
+
// computer_double_click
|
|
378
|
+
tools.push(
|
|
379
|
+
tool(createToolFunction(EDesktopTools.DOUBLE_CLICK), {
|
|
380
|
+
name: EDesktopTools.DOUBLE_CLICK,
|
|
381
|
+
description:
|
|
382
|
+
'Double-click the mouse at the specified screen coordinates.',
|
|
383
|
+
schema: DoubleClickSchema,
|
|
384
|
+
})
|
|
385
|
+
);
|
|
386
|
+
|
|
387
|
+
// computer_right_click
|
|
388
|
+
tools.push(
|
|
389
|
+
tool(createToolFunction(EDesktopTools.RIGHT_CLICK), {
|
|
390
|
+
name: EDesktopTools.RIGHT_CLICK,
|
|
391
|
+
description:
|
|
392
|
+
'Right-click the mouse at the specified screen coordinates to open context menus.',
|
|
393
|
+
schema: RightClickSchema,
|
|
394
|
+
})
|
|
395
|
+
);
|
|
396
|
+
|
|
397
|
+
// computer_type
|
|
398
|
+
tools.push(
|
|
399
|
+
tool(createToolFunction(EDesktopTools.TYPE), {
|
|
400
|
+
name: EDesktopTools.TYPE,
|
|
401
|
+
description:
|
|
402
|
+
'Type text using the keyboard. Make sure the target input field is focused first (use click).',
|
|
403
|
+
schema: TypeSchema,
|
|
404
|
+
})
|
|
405
|
+
);
|
|
406
|
+
|
|
407
|
+
// computer_key
|
|
408
|
+
tools.push(
|
|
409
|
+
tool(createToolFunction(EDesktopTools.KEY), {
|
|
410
|
+
name: EDesktopTools.KEY,
|
|
411
|
+
description:
|
|
412
|
+
'Press a single key on the keyboard (Enter, Tab, Escape, arrow keys, function keys, etc.).',
|
|
413
|
+
schema: KeySchema,
|
|
414
|
+
})
|
|
415
|
+
);
|
|
416
|
+
|
|
417
|
+
// computer_key_combo
|
|
418
|
+
tools.push(
|
|
419
|
+
tool(createToolFunction(EDesktopTools.KEY_COMBO), {
|
|
420
|
+
name: EDesktopTools.KEY_COMBO,
|
|
421
|
+
description:
|
|
422
|
+
'Press a key combination (e.g., Ctrl+C to copy, Ctrl+V to paste, Alt+Tab to switch windows).',
|
|
423
|
+
schema: KeyComboSchema,
|
|
424
|
+
})
|
|
425
|
+
);
|
|
426
|
+
|
|
427
|
+
// computer_scroll
|
|
428
|
+
tools.push(
|
|
429
|
+
tool(createToolFunction(EDesktopTools.SCROLL), {
|
|
430
|
+
name: EDesktopTools.SCROLL,
|
|
431
|
+
description:
|
|
432
|
+
'Scroll at the specified screen coordinates. Use negative deltaY to scroll up, positive to scroll down.',
|
|
433
|
+
schema: ScrollSchema,
|
|
434
|
+
})
|
|
435
|
+
);
|
|
436
|
+
|
|
437
|
+
// computer_drag
|
|
438
|
+
tools.push(
|
|
439
|
+
tool(createToolFunction(EDesktopTools.DRAG), {
|
|
440
|
+
name: EDesktopTools.DRAG,
|
|
441
|
+
description:
|
|
442
|
+
'Drag the mouse from one position to another (for moving windows, selecting text, etc.).',
|
|
443
|
+
schema: DragSchema,
|
|
444
|
+
})
|
|
445
|
+
);
|
|
446
|
+
|
|
447
|
+
// computer_get_active_window
|
|
448
|
+
tools.push(
|
|
449
|
+
tool(createToolFunction(EDesktopTools.GET_ACTIVE_WINDOW), {
|
|
450
|
+
name: EDesktopTools.GET_ACTIVE_WINDOW,
|
|
451
|
+
description:
|
|
452
|
+
'Get information about the currently active window (title, application name, position, size).',
|
|
453
|
+
schema: GetActiveWindowSchema,
|
|
454
|
+
})
|
|
455
|
+
);
|
|
456
|
+
|
|
457
|
+
// computer_get_mouse_position
|
|
458
|
+
tools.push(
|
|
459
|
+
tool(createToolFunction(EDesktopTools.GET_MOUSE_POSITION), {
|
|
460
|
+
name: EDesktopTools.GET_MOUSE_POSITION,
|
|
461
|
+
description: 'Get the current mouse cursor position on screen.',
|
|
462
|
+
schema: GetMousePositionSchema,
|
|
463
|
+
})
|
|
464
|
+
);
|
|
465
|
+
|
|
466
|
+
// clipboard_read
|
|
467
|
+
tools.push(
|
|
468
|
+
tool(createToolFunction(EDesktopTools.CLIPBOARD_READ), {
|
|
469
|
+
name: EDesktopTools.CLIPBOARD_READ,
|
|
470
|
+
description: 'Read the current contents of the system clipboard.',
|
|
471
|
+
schema: ClipboardReadSchema,
|
|
472
|
+
})
|
|
473
|
+
);
|
|
474
|
+
|
|
475
|
+
// clipboard_write
|
|
476
|
+
tools.push(
|
|
477
|
+
tool(createToolFunction(EDesktopTools.CLIPBOARD_WRITE), {
|
|
478
|
+
name: EDesktopTools.CLIPBOARD_WRITE,
|
|
479
|
+
description: 'Write text to the system clipboard.',
|
|
480
|
+
schema: ClipboardWriteSchema,
|
|
481
|
+
})
|
|
482
|
+
);
|
|
483
|
+
|
|
484
|
+
// clipboard_paste
|
|
485
|
+
tools.push(
|
|
486
|
+
tool(createToolFunction(EDesktopTools.CLIPBOARD_PASTE), {
|
|
487
|
+
name: EDesktopTools.CLIPBOARD_PASTE,
|
|
488
|
+
description:
|
|
489
|
+
'Paste the clipboard contents (equivalent to Ctrl+V). Use clipboard_write first to set the content.',
|
|
490
|
+
schema: ClipboardPasteSchema,
|
|
491
|
+
})
|
|
492
|
+
);
|
|
493
|
+
|
|
494
|
+
// computer_wait
|
|
495
|
+
tools.push(
|
|
496
|
+
tool(createToolFunction(EDesktopTools.WAIT), {
|
|
497
|
+
name: EDesktopTools.WAIT,
|
|
498
|
+
description:
|
|
499
|
+
'Wait for the specified number of milliseconds. Use this to wait for UI animations or loading.',
|
|
500
|
+
schema: WaitSchema,
|
|
501
|
+
})
|
|
502
|
+
);
|
|
503
|
+
|
|
504
|
+
// ============ Native UI Automation Tools (Windows) ============
|
|
505
|
+
// These are FASTER and MORE RELIABLE than screenshot-based automation
|
|
506
|
+
// They find elements by semantic properties (name, automationId, type)
|
|
507
|
+
// instead of relying on pixel coordinates from screenshots
|
|
508
|
+
|
|
509
|
+
// ui_find_element
|
|
510
|
+
tools.push(
|
|
511
|
+
tool(createToolFunction(EDesktopTools.UI_FIND_ELEMENT), {
|
|
512
|
+
name: EDesktopTools.UI_FIND_ELEMENT,
|
|
513
|
+
description:
|
|
514
|
+
'🚀 PREFERRED: Find a UI element by semantic properties (name, automationId, controlType). MUCH FASTER than screenshot analysis. Returns element bounds for clicking. Windows only.',
|
|
515
|
+
schema: UIFindElementSchema,
|
|
516
|
+
})
|
|
517
|
+
);
|
|
518
|
+
|
|
519
|
+
// ui_click_element
|
|
520
|
+
tools.push(
|
|
521
|
+
tool(createToolFunction(EDesktopTools.UI_CLICK_ELEMENT), {
|
|
522
|
+
name: EDesktopTools.UI_CLICK_ELEMENT,
|
|
523
|
+
description:
|
|
524
|
+
'🚀 PREFERRED: Find and click a UI element by name/automationId. More reliable than coordinate-based clicking. Example: ui_click_element({name: "OK"}) or ui_click_element({controlType: "Button", name: "Submit"}). Windows only.',
|
|
525
|
+
schema: UIClickElementSchema,
|
|
526
|
+
})
|
|
527
|
+
);
|
|
528
|
+
|
|
529
|
+
// ui_get_window_tree
|
|
530
|
+
tools.push(
|
|
531
|
+
tool(createToolFunction(EDesktopTools.UI_GET_WINDOW_TREE), {
|
|
532
|
+
name: EDesktopTools.UI_GET_WINDOW_TREE,
|
|
533
|
+
description:
|
|
534
|
+
'Get the UI element tree of the active window. Shows all buttons, inputs, menus, etc. with their names and automationIds. Use this to discover elements before clicking. Windows only.',
|
|
535
|
+
schema: UIGetWindowTreeSchema,
|
|
536
|
+
})
|
|
537
|
+
);
|
|
538
|
+
|
|
539
|
+
// ui_find_buttons
|
|
540
|
+
tools.push(
|
|
541
|
+
tool(createToolFunction(EDesktopTools.UI_FIND_BUTTONS), {
|
|
542
|
+
name: EDesktopTools.UI_FIND_BUTTONS,
|
|
543
|
+
description:
|
|
544
|
+
'Find all clickable buttons in the active window. Returns list with names and positions. Useful for discovering available actions. Windows only.',
|
|
545
|
+
schema: UIFindButtonsSchema,
|
|
546
|
+
})
|
|
547
|
+
);
|
|
548
|
+
|
|
549
|
+
// ui_find_inputs
|
|
550
|
+
tools.push(
|
|
551
|
+
tool(createToolFunction(EDesktopTools.UI_FIND_INPUTS), {
|
|
552
|
+
name: EDesktopTools.UI_FIND_INPUTS,
|
|
553
|
+
description:
|
|
554
|
+
'Find all text input fields in the active window. Returns list with names and positions. Useful for discovering form fields. Windows only.',
|
|
555
|
+
schema: UIFindInputsSchema,
|
|
556
|
+
})
|
|
557
|
+
);
|
|
558
|
+
|
|
559
|
+
return tools;
|
|
538
560
|
}
|
|
539
561
|
|
|
540
562
|
/**
|