illuma-agents 1.0.43 → 1.0.44

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,11 +1,6 @@
1
1
  import { z } from 'zod';
2
2
  import { tool, DynamicStructuredTool } from '@langchain/core/tools';
3
3
 
4
- /**
5
- * Type for tool configuration passed by LangChain runtime
6
- */
7
- type ToolCallConfig = { toolCall?: { id?: string } };
8
-
9
4
  /**
10
5
  * Desktop tool names - keep in sync with Ranger Desktop Electron app
11
6
  * These tools execute locally in the Electron app, NOT on the server
@@ -225,39 +220,6 @@ function formatResultForLLM(
225
220
  return parts.join('\n');
226
221
  }
227
222
 
228
- /**
229
- * Create a tool result (either wait for callback or return marker)
230
- */
231
- async function createToolResult(
232
- action: string,
233
- args: Record<string, unknown>,
234
- config: ToolCallConfig | undefined,
235
- waitForResult?: DesktopToolCallback
236
- ): Promise<string> {
237
- const toolCallId = config?.toolCall?.id || `desktop-${Date.now()}`;
238
-
239
- if (waitForResult) {
240
- // Server context: wait for actual result from Electron app
241
- try {
242
- const result = await waitForResult(action, args, toolCallId);
243
- return formatResultForLLM(result, action);
244
- } catch (error) {
245
- const errorMessage =
246
- error instanceof Error ? error.message : String(error);
247
- return `Desktop action "${action}" failed: ${errorMessage}`;
248
- }
249
- }
250
-
251
- // Non-server context: return marker for later processing
252
- const response: DesktopToolResponse = {
253
- requiresDesktopExecution: true,
254
- action,
255
- args,
256
- toolCallId,
257
- };
258
- return JSON.stringify(response);
259
- }
260
-
261
223
  /**
262
224
  * Create desktop automation tools for the agent
263
225
  * These tools allow AI to control the user's desktop when Ranger Desktop is running
@@ -266,275 +228,194 @@ export function createDesktopTools(
266
228
  options: CreateDesktopToolsOptions = {}
267
229
  ): DynamicStructuredTool[] {
268
230
  const { waitForResult } = options;
231
+ const tools: DynamicStructuredTool[] = [];
269
232
 
270
- return [
271
- // computer_screenshot
272
- tool(
273
- async (_args, config) => {
274
- return createToolResult(
275
- EDesktopTools.SCREENSHOT,
276
- {},
277
- config as ToolCallConfig,
278
- waitForResult
279
- );
280
- },
281
- {
282
- name: EDesktopTools.SCREENSHOT,
283
- description:
284
- 'Take a screenshot of the entire screen. Use this to see what is currently displayed on the desktop.',
285
- schema: ScreenshotSchema,
286
- }
287
- ),
288
-
289
- // computer_click
290
- tool(
291
- async (args, config) => {
292
- return createToolResult(
293
- EDesktopTools.CLICK,
294
- args,
295
- config as ToolCallConfig,
296
- waitForResult
297
- );
298
- },
299
- {
300
- name: EDesktopTools.CLICK,
301
- description:
302
- 'Click the mouse at the specified screen coordinates. Use screenshot first to identify the target location.',
303
- schema: ClickSchema,
304
- }
305
- ),
306
-
307
- // computer_double_click
308
- tool(
309
- async (args, config) => {
310
- return createToolResult(
311
- EDesktopTools.DOUBLE_CLICK,
312
- args,
313
- config as ToolCallConfig,
314
- waitForResult
315
- );
316
- },
317
- {
318
- name: EDesktopTools.DOUBLE_CLICK,
319
- description:
320
- 'Double-click the mouse at the specified screen coordinates.',
321
- schema: DoubleClickSchema,
322
- }
323
- ),
324
-
325
- // computer_right_click
326
- tool(
327
- async (args, config) => {
328
- return createToolResult(
329
- EDesktopTools.RIGHT_CLICK,
330
- args,
331
- config as ToolCallConfig,
332
- waitForResult
333
- );
334
- },
335
- {
336
- name: EDesktopTools.RIGHT_CLICK,
337
- description:
338
- 'Right-click the mouse at the specified screen coordinates to open context menus.',
339
- schema: RightClickSchema,
340
- }
341
- ),
342
-
343
- // computer_type
344
- tool(
345
- async (args, config) => {
346
- return createToolResult(
347
- EDesktopTools.TYPE,
348
- args,
349
- config as ToolCallConfig,
350
- waitForResult
351
- );
352
- },
353
- {
354
- name: EDesktopTools.TYPE,
355
- description:
356
- 'Type text using the keyboard. Make sure the target input field is focused first (use click).',
357
- schema: TypeSchema,
358
- }
359
- ),
360
-
361
- // computer_key
362
- tool(
363
- async (args, config) => {
364
- return createToolResult(
365
- EDesktopTools.KEY,
366
- args,
367
- config as ToolCallConfig,
368
- waitForResult
369
- );
370
- },
371
- {
372
- name: EDesktopTools.KEY,
373
- description:
374
- 'Press a single key on the keyboard (Enter, Tab, Escape, arrow keys, function keys, etc.).',
375
- schema: KeySchema,
376
- }
377
- ),
378
-
379
- // computer_key_combo
380
- tool(
381
- async (args, config) => {
382
- return createToolResult(
383
- EDesktopTools.KEY_COMBO,
384
- args,
385
- config as ToolCallConfig,
386
- waitForResult
387
- );
388
- },
389
- {
390
- name: EDesktopTools.KEY_COMBO,
391
- description:
392
- 'Press a key combination (e.g., Ctrl+C to copy, Ctrl+V to paste, Alt+Tab to switch windows).',
393
- schema: KeyComboSchema,
394
- }
395
- ),
396
-
397
- // computer_scroll
398
- tool(
399
- async (args, config) => {
400
- return createToolResult(
401
- EDesktopTools.SCROLL,
402
- args,
403
- config as ToolCallConfig,
404
- waitForResult
405
- );
406
- },
407
- {
408
- name: EDesktopTools.SCROLL,
409
- description:
410
- 'Scroll at the specified screen coordinates. Use negative deltaY to scroll up, positive to scroll down.',
411
- schema: ScrollSchema,
412
- }
413
- ),
414
-
415
- // computer_drag
416
- tool(
417
- async (args, config) => {
418
- return createToolResult(
419
- EDesktopTools.DRAG,
420
- args,
421
- config as ToolCallConfig,
422
- waitForResult
423
- );
424
- },
425
- {
426
- name: EDesktopTools.DRAG,
427
- description:
428
- 'Drag the mouse from one position to another (for moving windows, selecting text, etc.).',
429
- schema: DragSchema,
430
- }
431
- ),
432
-
433
- // computer_get_active_window
434
- tool(
435
- async (_args, config) => {
436
- return createToolResult(
437
- EDesktopTools.GET_ACTIVE_WINDOW,
438
- {},
439
- config as ToolCallConfig,
440
- waitForResult
441
- );
442
- },
443
- {
444
- name: EDesktopTools.GET_ACTIVE_WINDOW,
445
- description:
446
- 'Get information about the currently active window (title, application name, position, size).',
447
- schema: GetActiveWindowSchema,
448
- }
449
- ),
450
-
451
- // computer_get_mouse_position
452
- tool(
453
- async (_args, config) => {
454
- return createToolResult(
455
- EDesktopTools.GET_MOUSE_POSITION,
456
- {},
457
- config as ToolCallConfig,
458
- waitForResult
459
- );
460
- },
461
- {
462
- name: EDesktopTools.GET_MOUSE_POSITION,
463
- description: 'Get the current mouse cursor position on screen.',
464
- schema: GetMousePositionSchema,
465
- }
466
- ),
467
-
468
- // clipboard_read
469
- tool(
470
- async (_args, config) => {
471
- return createToolResult(
472
- EDesktopTools.CLIPBOARD_READ,
473
- {},
474
- config as ToolCallConfig,
475
- waitForResult
476
- );
477
- },
478
- {
479
- name: EDesktopTools.CLIPBOARD_READ,
480
- description: 'Read the current contents of the system clipboard.',
481
- schema: ClipboardReadSchema,
482
- }
483
- ),
484
-
485
- // clipboard_write
486
- tool(
487
- async (args, config) => {
488
- return createToolResult(
489
- EDesktopTools.CLIPBOARD_WRITE,
490
- args,
491
- config as ToolCallConfig,
492
- waitForResult
493
- );
494
- },
495
- {
496
- name: EDesktopTools.CLIPBOARD_WRITE,
497
- description: 'Write text to the system clipboard.',
498
- schema: ClipboardWriteSchema,
233
+ /**
234
+ * Helper to create tool function that optionally waits for results
235
+ * The toolCallId is extracted from the RunnableConfig passed by LangChain
236
+ */
237
+ const createToolFunction = (action: string) => {
238
+ return async (
239
+ args: Record<string, unknown>,
240
+ config?: { toolCall?: { id?: string } }
241
+ ): Promise<string> => {
242
+ const toolCallId =
243
+ config?.toolCall?.id ??
244
+ `desktop_${Date.now()}_${Math.random().toString(36).slice(2)}`;
245
+
246
+ // Create marker for Electron app
247
+ const marker: DesktopToolResponse = {
248
+ requiresDesktopExecution: true,
249
+ action,
250
+ args,
251
+ toolCallId,
252
+ };
253
+
254
+ // If no callback, return marker immediately (Electron handles via SSE interception)
255
+ if (!waitForResult) {
256
+ return JSON.stringify(marker);
499
257
  }
500
- ),
501
258
 
502
- // clipboard_paste
503
- tool(
504
- async (_args, config) => {
505
- return createToolResult(
506
- EDesktopTools.CLIPBOARD_PASTE,
507
- {},
508
- config as ToolCallConfig,
509
- waitForResult
510
- );
511
- },
512
- {
513
- name: EDesktopTools.CLIPBOARD_PASTE,
514
- description:
515
- 'Paste the clipboard contents (equivalent to Ctrl+V). Use clipboard_write first to set the content.',
516
- schema: ClipboardPasteSchema,
259
+ // With callback: wait for actual results from Electron app
260
+ try {
261
+ const result = await waitForResult(action, args, toolCallId);
262
+ return formatResultForLLM(result, action);
263
+ } catch (error) {
264
+ const errorMessage =
265
+ error instanceof Error ? error.message : String(error);
266
+ return `Desktop action "${action}" failed: ${errorMessage}`;
517
267
  }
518
- ),
268
+ };
269
+ };
519
270
 
520
- // computer_wait
521
- tool(
522
- async (args, config) => {
523
- return createToolResult(
524
- EDesktopTools.WAIT,
525
- args,
526
- config as ToolCallConfig,
527
- waitForResult
528
- );
529
- },
530
- {
531
- name: EDesktopTools.WAIT,
532
- description:
533
- 'Wait for the specified number of milliseconds. Use this to wait for UI animations or loading.',
534
- schema: WaitSchema,
535
- }
536
- ),
537
- ];
271
+ // computer_screenshot
272
+ tools.push(
273
+ tool(createToolFunction(EDesktopTools.SCREENSHOT), {
274
+ name: EDesktopTools.SCREENSHOT,
275
+ description:
276
+ 'Take a screenshot of the entire screen. Use this to see what is currently displayed on the desktop.',
277
+ schema: ScreenshotSchema,
278
+ })
279
+ );
280
+
281
+ // computer_click
282
+ tools.push(
283
+ tool(createToolFunction(EDesktopTools.CLICK), {
284
+ name: EDesktopTools.CLICK,
285
+ description:
286
+ 'Click the mouse at the specified screen coordinates. Use screenshot first to identify the target location.',
287
+ schema: ClickSchema,
288
+ })
289
+ );
290
+
291
+ // computer_double_click
292
+ tools.push(
293
+ tool(createToolFunction(EDesktopTools.DOUBLE_CLICK), {
294
+ name: EDesktopTools.DOUBLE_CLICK,
295
+ description:
296
+ 'Double-click the mouse at the specified screen coordinates.',
297
+ schema: DoubleClickSchema,
298
+ })
299
+ );
300
+
301
+ // computer_right_click
302
+ tools.push(
303
+ tool(createToolFunction(EDesktopTools.RIGHT_CLICK), {
304
+ name: EDesktopTools.RIGHT_CLICK,
305
+ description:
306
+ 'Right-click the mouse at the specified screen coordinates to open context menus.',
307
+ schema: RightClickSchema,
308
+ })
309
+ );
310
+
311
+ // computer_type
312
+ tools.push(
313
+ tool(createToolFunction(EDesktopTools.TYPE), {
314
+ name: EDesktopTools.TYPE,
315
+ description:
316
+ 'Type text using the keyboard. Make sure the target input field is focused first (use click).',
317
+ schema: TypeSchema,
318
+ })
319
+ );
320
+
321
+ // computer_key
322
+ tools.push(
323
+ tool(createToolFunction(EDesktopTools.KEY), {
324
+ name: EDesktopTools.KEY,
325
+ description:
326
+ 'Press a single key on the keyboard (Enter, Tab, Escape, arrow keys, function keys, etc.).',
327
+ schema: KeySchema,
328
+ })
329
+ );
330
+
331
+ // computer_key_combo
332
+ tools.push(
333
+ tool(createToolFunction(EDesktopTools.KEY_COMBO), {
334
+ name: EDesktopTools.KEY_COMBO,
335
+ description:
336
+ 'Press a key combination (e.g., Ctrl+C to copy, Ctrl+V to paste, Alt+Tab to switch windows).',
337
+ schema: KeyComboSchema,
338
+ })
339
+ );
340
+
341
+ // computer_scroll
342
+ tools.push(
343
+ tool(createToolFunction(EDesktopTools.SCROLL), {
344
+ name: EDesktopTools.SCROLL,
345
+ description:
346
+ 'Scroll at the specified screen coordinates. Use negative deltaY to scroll up, positive to scroll down.',
347
+ schema: ScrollSchema,
348
+ })
349
+ );
350
+
351
+ // computer_drag
352
+ tools.push(
353
+ tool(createToolFunction(EDesktopTools.DRAG), {
354
+ name: EDesktopTools.DRAG,
355
+ description:
356
+ 'Drag the mouse from one position to another (for moving windows, selecting text, etc.).',
357
+ schema: DragSchema,
358
+ })
359
+ );
360
+
361
+ // computer_get_active_window
362
+ tools.push(
363
+ tool(createToolFunction(EDesktopTools.GET_ACTIVE_WINDOW), {
364
+ name: EDesktopTools.GET_ACTIVE_WINDOW,
365
+ description:
366
+ 'Get information about the currently active window (title, application name, position, size).',
367
+ schema: GetActiveWindowSchema,
368
+ })
369
+ );
370
+
371
+ // computer_get_mouse_position
372
+ tools.push(
373
+ tool(createToolFunction(EDesktopTools.GET_MOUSE_POSITION), {
374
+ name: EDesktopTools.GET_MOUSE_POSITION,
375
+ description: 'Get the current mouse cursor position on screen.',
376
+ schema: GetMousePositionSchema,
377
+ })
378
+ );
379
+
380
+ // clipboard_read
381
+ tools.push(
382
+ tool(createToolFunction(EDesktopTools.CLIPBOARD_READ), {
383
+ name: EDesktopTools.CLIPBOARD_READ,
384
+ description: 'Read the current contents of the system clipboard.',
385
+ schema: ClipboardReadSchema,
386
+ })
387
+ );
388
+
389
+ // clipboard_write
390
+ tools.push(
391
+ tool(createToolFunction(EDesktopTools.CLIPBOARD_WRITE), {
392
+ name: EDesktopTools.CLIPBOARD_WRITE,
393
+ description: 'Write text to the system clipboard.',
394
+ schema: ClipboardWriteSchema,
395
+ })
396
+ );
397
+
398
+ // clipboard_paste
399
+ tools.push(
400
+ tool(createToolFunction(EDesktopTools.CLIPBOARD_PASTE), {
401
+ name: EDesktopTools.CLIPBOARD_PASTE,
402
+ description:
403
+ 'Paste the clipboard contents (equivalent to Ctrl+V). Use clipboard_write first to set the content.',
404
+ schema: ClipboardPasteSchema,
405
+ })
406
+ );
407
+
408
+ // computer_wait
409
+ tools.push(
410
+ tool(createToolFunction(EDesktopTools.WAIT), {
411
+ name: EDesktopTools.WAIT,
412
+ description:
413
+ 'Wait for the specified number of milliseconds. Use this to wait for UI animations or loading.',
414
+ schema: WaitSchema,
415
+ })
416
+ );
417
+
418
+ return tools;
538
419
  }
539
420
 
540
421
  /**