npm - illuma-agents - Versions diffs - 1.0.42 → 1.0.43 - Mend

illuma-agents 1.0.42 → 1.0.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/dist/cjs/main.cjs +6 -0
package/dist/cjs/main.cjs.map +1 -1
package/dist/cjs/stream.cjs +14 -0
package/dist/cjs/stream.cjs.map +1 -1
package/dist/cjs/tools/DesktopTools.cjs +295 -0
package/dist/cjs/tools/DesktopTools.cjs.map +1 -0
package/dist/esm/main.mjs +1 -0
package/dist/esm/main.mjs.map +1 -1
package/dist/esm/stream.mjs +14 -0
package/dist/esm/stream.mjs.map +1 -1
package/dist/esm/tools/DesktopTools.mjs +289 -0
package/dist/esm/tools/DesktopTools.mjs.map +1 -0
package/dist/types/index.d.ts +1 -0
package/dist/types/tools/DesktopTools.d.ts +104 -0
package/package.json +1 -1
package/src/index.ts +1 -0
package/src/stream.ts +17 -0
package/src/tools/DesktopTools.ts +552 -0

package/dist/types/tools/DesktopTools.d.ts ADDED Viewed

@@ -0,0 +1,104 @@
+import { DynamicStructuredTool } from '@langchain/core/tools';
+/**
+ * Desktop tool names - keep in sync with Ranger Desktop Electron app
+ * These tools execute locally in the Electron app, NOT on the server
+ */
+export declare const EDesktopTools: {
+    readonly SCREENSHOT: "computer_screenshot";
+    readonly CLICK: "computer_click";
+    readonly DOUBLE_CLICK: "computer_double_click";
+    readonly RIGHT_CLICK: "computer_right_click";
+    readonly TYPE: "computer_type";
+    readonly KEY: "computer_key";
+    readonly KEY_COMBO: "computer_key_combo";
+    readonly SCROLL: "computer_scroll";
+    readonly DRAG: "computer_drag";
+    readonly GET_ACTIVE_WINDOW: "computer_get_active_window";
+    readonly GET_MOUSE_POSITION: "computer_get_mouse_position";
+    readonly CLIPBOARD_READ: "clipboard_read";
+    readonly CLIPBOARD_WRITE: "clipboard_write";
+    readonly CLIPBOARD_PASTE: "clipboard_paste";
+    readonly WAIT: "computer_wait";
+};
+export type DesktopToolName = (typeof EDesktopTools)[keyof typeof EDesktopTools];
+/**
+ * Callback function type for waiting on desktop action results
+ * This allows the server (Ranger) to provide a callback that waits for the Electron app
+ * to POST results back to the server before returning to the LLM.
+ *
+ * @param action - The desktop action (click, type, screenshot, etc.)
+ * @param args - Arguments for the action
+ * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)
+ * @returns Promise that resolves with the actual desktop result
+ */
+export type DesktopToolCallback = (action: string, args: Record<string, unknown>, toolCallId: string) => Promise<DesktopActionResult>;
+/**
+ * Result returned from desktop action execution
+ */
+export interface DesktopActionResult {
+    success: boolean;
+    error?: string;
+    screenshot?: {
+        base64: string;
+        width: number;
+        height: number;
+    };
+    activeWindow?: {
+        title: string;
+        app: string;
+        bounds?: {
+            x: number;
+            y: number;
+            width: number;
+            height: number;
+        };
+    };
+    mousePosition?: {
+        x: number;
+        y: number;
+    };
+    clipboard?: string;
+}
+/**
+ * Check if desktop capability is available based on request headers or context
+ * The Ranger Desktop Electron app sets these headers when connected:
+ * - X-Ranger-Desktop: true
+ * - X-Ranger-Desktop-Capable: true
+ */
+export declare function hasDesktopCapability(req?: {
+    headers?: Record<string, string | string[] | undefined>;
+}): boolean;
+/**
+ * Desktop tool response interface
+ * This is what the Electron app returns after executing the action
+ */
+export interface DesktopToolResponse {
+    requiresDesktopExecution: true;
+    action: string;
+    args: Record<string, unknown>;
+    toolCallId?: string;
+}
+/**
+ * Options for creating desktop tools
+ */
+export interface CreateDesktopToolsOptions {
+    /**
+     * Optional callback that waits for desktop action results.
+     * When provided, tools will await this callback to get actual results from the Electron app.
+     * When not provided, tools return markers immediately (for non-server contexts).
+     */
+    waitForResult?: DesktopToolCallback;
+}
+/**
+ * Create desktop automation tools for the agent
+ * These tools allow AI to control the user's desktop when Ranger Desktop is running
+ */
+export declare function createDesktopTools(options?: CreateDesktopToolsOptions): DynamicStructuredTool[];
+/**
+ * Get all desktop tool names
+ */
+export declare function getDesktopToolNames(): DesktopToolName[];
+/**
+ * Check if a tool name is a desktop tool
+ */
+export declare function isDesktopTool(name: string): name is DesktopToolName;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "illuma-agents",
-  "version": "1.0.42",
+  "version": "1.0.43",
   "main": "./dist/cjs/main.cjs",
   "module": "./dist/esm/main.mjs",
   "types": "./dist/types/index.d.ts",

package/src/index.ts CHANGED Viewed

@@ -12,6 +12,7 @@ export * from './graphs';
 export * from './tools/Calculator';
 export * from './tools/CodeExecutor';
 export * from './tools/BrowserTools';
+export * from './tools/DesktopTools';
 export * from './tools/ProgrammaticToolCalling';
 export * from './tools/ToolSearch';
 export * from './tools/handlers';

package/src/stream.ts CHANGED Viewed

@@ -737,6 +737,23 @@ export function createContentAggregator(): t.ContentAggregatorResult {
       };
       updateContent(runStep.index, contentPart, true);
+    } else if (event === GraphEvents.ON_STRUCTURED_OUTPUT) {
+      // Handle structured output as text content with formatted JSON
+      const structuredData = data as unknown as {
+        structuredResponse: Record<string, unknown>;
+        schema: Record<string, unknown>;
+      };
+      if (structuredData.structuredResponse) {
+        const jsonText = JSON.stringify(structuredData.structuredResponse, null, 2);
+        const contentPart: t.MessageContentComplex = {
+          type: ContentTypes.TEXT,
+          text: jsonText,
+        };
+        // Add at index 0 or next available
+        const nextIndex = contentParts.length;
+        updateContent(nextIndex, contentPart);
+      }
     }
   };

package/src/tools/DesktopTools.ts ADDED Viewed

@@ -0,0 +1,552 @@
+import { z } from 'zod';
+import { tool, DynamicStructuredTool } from '@langchain/core/tools';
+/**
+ * Type for tool configuration passed by LangChain runtime
+ */
+type ToolCallConfig = { toolCall?: { id?: string } };
+/**
+ * Desktop tool names - keep in sync with Ranger Desktop Electron app
+ * These tools execute locally in the Electron app, NOT on the server
+ */
+export const EDesktopTools = {
+  SCREENSHOT: 'computer_screenshot',
+  CLICK: 'computer_click',
+  DOUBLE_CLICK: 'computer_double_click',
+  RIGHT_CLICK: 'computer_right_click',
+  TYPE: 'computer_type',
+  KEY: 'computer_key',
+  KEY_COMBO: 'computer_key_combo',
+  SCROLL: 'computer_scroll',
+  DRAG: 'computer_drag',
+  GET_ACTIVE_WINDOW: 'computer_get_active_window',
+  GET_MOUSE_POSITION: 'computer_get_mouse_position',
+  CLIPBOARD_READ: 'clipboard_read',
+  CLIPBOARD_WRITE: 'clipboard_write',
+  CLIPBOARD_PASTE: 'clipboard_paste',
+  WAIT: 'computer_wait',
+} as const;
+export type DesktopToolName =
+  (typeof EDesktopTools)[keyof typeof EDesktopTools];
+/**
+ * Callback function type for waiting on desktop action results
+ * This allows the server (Ranger) to provide a callback that waits for the Electron app
+ * to POST results back to the server before returning to the LLM.
+ *
+ * @param action - The desktop action (click, type, screenshot, etc.)
+ * @param args - Arguments for the action
+ * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)
+ * @returns Promise that resolves with the actual desktop result
+ */
+export type DesktopToolCallback = (
+  action: string,
+  args: Record<string, unknown>,
+  toolCallId: string
+) => Promise<DesktopActionResult>;
+/**
+ * Result returned from desktop action execution
+ */
+export interface DesktopActionResult {
+  success: boolean;
+  error?: string;
+  screenshot?: {
+    base64: string;
+    width: number;
+    height: number;
+  };
+  activeWindow?: {
+    title: string;
+    app: string;
+    bounds?: { x: number; y: number; width: number; height: number };
+  };
+  mousePosition?: { x: number; y: number };
+  clipboard?: string;
+}
+/**
+ * Check if desktop capability is available based on request headers or context
+ * The Ranger Desktop Electron app sets these headers when connected:
+ * - X-Ranger-Desktop: true
+ * - X-Ranger-Desktop-Capable: true
+ */
+export function hasDesktopCapability(req?: {
+  headers?: Record<string, string | string[] | undefined>;
+}): boolean {
+  if (!req?.headers) {
+    return false;
+  }
+  const desktopApp = req.headers['x-ranger-desktop'];
+  const desktopCapable = req.headers['x-ranger-desktop-capable'];
+  return desktopApp === 'true' || desktopCapable === 'true';
+}
+// Tool schemas
+const ScreenshotSchema = z.object({});
+const ClickSchema = z.object({
+  x: z.number().describe('X coordinate to click'),
+  y: z.number().describe('Y coordinate to click'),
+});
+const DoubleClickSchema = z.object({
+  x: z.number().describe('X coordinate to double-click'),
+  y: z.number().describe('Y coordinate to double-click'),
+});
+const RightClickSchema = z.object({
+  x: z.number().describe('X coordinate to right-click'),
+  y: z.number().describe('Y coordinate to right-click'),
+});
+const TypeSchema = z.object({
+  text: z.string().describe('Text to type'),
+});
+const KeySchema = z.object({
+  key: z
+    .string()
+    .describe(
+      'Key to press (e.g., "Enter", "Tab", "Escape", "Backspace", "Delete", "ArrowUp", "ArrowDown", "ArrowLeft", "ArrowRight", "Home", "End", "PageUp", "PageDown", "F1"-"F12")'
+    ),
+});
+const KeyComboSchema = z.object({
+  keys: z
+    .array(z.string())
+    .describe(
+      'Array of keys to press together (e.g., ["Control", "c"] for copy, ["Alt", "Tab"] for window switch)'
+    ),
+});
+const ScrollSchema = z.object({
+  x: z.number().describe('X coordinate to scroll at'),
+  y: z.number().describe('Y coordinate to scroll at'),
+  deltaX: z.number().optional().describe('Horizontal scroll amount (pixels)'),
+  deltaY: z.number().describe('Vertical scroll amount (pixels, negative = up, positive = down)'),
+});
+const DragSchema = z.object({
+  startX: z.number().describe('Starting X coordinate'),
+  startY: z.number().describe('Starting Y coordinate'),
+  endX: z.number().describe('Ending X coordinate'),
+  endY: z.number().describe('Ending Y coordinate'),
+});
+const GetActiveWindowSchema = z.object({});
+const GetMousePositionSchema = z.object({});
+const ClipboardReadSchema = z.object({});
+const ClipboardWriteSchema = z.object({
+  text: z.string().describe('Text to write to clipboard'),
+});
+const ClipboardPasteSchema = z.object({});
+const WaitSchema = z.object({
+  ms: z.number().describe('Milliseconds to wait'),
+});
+/**
+ * Desktop tool response interface
+ * This is what the Electron app returns after executing the action
+ */
+export interface DesktopToolResponse {
+  requiresDesktopExecution: true;
+  action: string;
+  args: Record<string, unknown>;
+  toolCallId?: string;
+}
+/**
+ * Options for creating desktop tools
+ */
+export interface CreateDesktopToolsOptions {
+  /**
+   * Optional callback that waits for desktop action results.
+   * When provided, tools will await this callback to get actual results from the Electron app.
+   * When not provided, tools return markers immediately (for non-server contexts).
+   */
+  waitForResult?: DesktopToolCallback;
+}
+/**
+ * Format desktop action result for LLM consumption
+ */
+function formatResultForLLM(
+  result: DesktopActionResult,
+  action: string
+): string {
+  if (!result.success && result.error) {
+    return `Desktop action "${action}" failed: ${result.error}`;
+  }
+  const parts: string[] = [];
+  if (result.screenshot) {
+    parts.push(
+      `Screenshot captured (${result.screenshot.width}x${result.screenshot.height})`
+    );
+    // The base64 image will be handled separately by the message formatter
+  }
+  if (result.activeWindow) {
+    parts.push(`**Active Window:**`);
+    parts.push(`  - Title: ${result.activeWindow.title}`);
+    parts.push(`  - App: ${result.activeWindow.app}`);
+    if (result.activeWindow.bounds) {
+      const b = result.activeWindow.bounds;
+      parts.push(`  - Position: (${b.x}, ${b.y})`);
+      parts.push(`  - Size: ${b.width}x${b.height}`);
+    }
+  }
+  if (result.mousePosition) {
+    parts.push(
+      `**Mouse Position:** (${result.mousePosition.x}, ${result.mousePosition.y})`
+    );
+  }
+  if (result.clipboard !== undefined) {
+    parts.push(`**Clipboard Content:** ${result.clipboard}`);
+  }
+  if (parts.length === 0) {
+    parts.push(`Desktop action "${action}" completed successfully.`);
+  }
+  return parts.join('\n');
+}
+/**
+ * Create a tool result (either wait for callback or return marker)
+ */
+async function createToolResult(
+  action: string,
+  args: Record<string, unknown>,
+  config: ToolCallConfig | undefined,
+  waitForResult?: DesktopToolCallback
+): Promise<string> {
+  const toolCallId = config?.toolCall?.id || `desktop-${Date.now()}`;
+  if (waitForResult) {
+    // Server context: wait for actual result from Electron app
+    try {
+      const result = await waitForResult(action, args, toolCallId);
+      return formatResultForLLM(result, action);
+    } catch (error) {
+      const errorMessage =
+        error instanceof Error ? error.message : String(error);
+      return `Desktop action "${action}" failed: ${errorMessage}`;
+    }
+  }
+  // Non-server context: return marker for later processing
+  const response: DesktopToolResponse = {
+    requiresDesktopExecution: true,
+    action,
+    args,
+    toolCallId,
+  };
+  return JSON.stringify(response);
+}
+/**
+ * Create desktop automation tools for the agent
+ * These tools allow AI to control the user's desktop when Ranger Desktop is running
+ */
+export function createDesktopTools(
+  options: CreateDesktopToolsOptions = {}
+): DynamicStructuredTool[] {
+  const { waitForResult } = options;
+  return [
+    // computer_screenshot
+    tool(
+      async (_args, config) => {
+        return createToolResult(
+          EDesktopTools.SCREENSHOT,
+          {},
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.SCREENSHOT,
+        description:
+          'Take a screenshot of the entire screen. Use this to see what is currently displayed on the desktop.',
+        schema: ScreenshotSchema,
+      }
+    ),
+    // computer_click
+    tool(
+      async (args, config) => {
+        return createToolResult(
+          EDesktopTools.CLICK,
+          args,
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.CLICK,
+        description:
+          'Click the mouse at the specified screen coordinates. Use screenshot first to identify the target location.',
+        schema: ClickSchema,
+      }
+    ),
+    // computer_double_click
+    tool(
+      async (args, config) => {
+        return createToolResult(
+          EDesktopTools.DOUBLE_CLICK,
+          args,
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.DOUBLE_CLICK,
+        description:
+          'Double-click the mouse at the specified screen coordinates.',
+        schema: DoubleClickSchema,
+      }
+    ),
+    // computer_right_click
+    tool(
+      async (args, config) => {
+        return createToolResult(
+          EDesktopTools.RIGHT_CLICK,
+          args,
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.RIGHT_CLICK,
+        description:
+          'Right-click the mouse at the specified screen coordinates to open context menus.',
+        schema: RightClickSchema,
+      }
+    ),
+    // computer_type
+    tool(
+      async (args, config) => {
+        return createToolResult(
+          EDesktopTools.TYPE,
+          args,
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.TYPE,
+        description:
+          'Type text using the keyboard. Make sure the target input field is focused first (use click).',
+        schema: TypeSchema,
+      }
+    ),
+    // computer_key
+    tool(
+      async (args, config) => {
+        return createToolResult(
+          EDesktopTools.KEY,
+          args,
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.KEY,
+        description:
+          'Press a single key on the keyboard (Enter, Tab, Escape, arrow keys, function keys, etc.).',
+        schema: KeySchema,
+      }
+    ),
+    // computer_key_combo
+    tool(
+      async (args, config) => {
+        return createToolResult(
+          EDesktopTools.KEY_COMBO,
+          args,
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.KEY_COMBO,
+        description:
+          'Press a key combination (e.g., Ctrl+C to copy, Ctrl+V to paste, Alt+Tab to switch windows).',
+        schema: KeyComboSchema,
+      }
+    ),
+    // computer_scroll
+    tool(
+      async (args, config) => {
+        return createToolResult(
+          EDesktopTools.SCROLL,
+          args,
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.SCROLL,
+        description:
+          'Scroll at the specified screen coordinates. Use negative deltaY to scroll up, positive to scroll down.',
+        schema: ScrollSchema,
+      }
+    ),
+    // computer_drag
+    tool(
+      async (args, config) => {
+        return createToolResult(
+          EDesktopTools.DRAG,
+          args,
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.DRAG,
+        description:
+          'Drag the mouse from one position to another (for moving windows, selecting text, etc.).',
+        schema: DragSchema,
+      }
+    ),
+    // computer_get_active_window
+    tool(
+      async (_args, config) => {
+        return createToolResult(
+          EDesktopTools.GET_ACTIVE_WINDOW,
+          {},
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.GET_ACTIVE_WINDOW,
+        description:
+          'Get information about the currently active window (title, application name, position, size).',
+        schema: GetActiveWindowSchema,
+      }
+    ),
+    // computer_get_mouse_position
+    tool(
+      async (_args, config) => {
+        return createToolResult(
+          EDesktopTools.GET_MOUSE_POSITION,
+          {},
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.GET_MOUSE_POSITION,
+        description: 'Get the current mouse cursor position on screen.',
+        schema: GetMousePositionSchema,
+      }
+    ),
+    // clipboard_read
+    tool(
+      async (_args, config) => {
+        return createToolResult(
+          EDesktopTools.CLIPBOARD_READ,
+          {},
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.CLIPBOARD_READ,
+        description: 'Read the current contents of the system clipboard.',
+        schema: ClipboardReadSchema,
+      }
+    ),
+    // clipboard_write
+    tool(
+      async (args, config) => {
+        return createToolResult(
+          EDesktopTools.CLIPBOARD_WRITE,
+          args,
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.CLIPBOARD_WRITE,
+        description: 'Write text to the system clipboard.',
+        schema: ClipboardWriteSchema,
+      }
+    ),
+    // clipboard_paste
+    tool(
+      async (_args, config) => {
+        return createToolResult(
+          EDesktopTools.CLIPBOARD_PASTE,
+          {},
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.CLIPBOARD_PASTE,
+        description:
+          'Paste the clipboard contents (equivalent to Ctrl+V). Use clipboard_write first to set the content.',
+        schema: ClipboardPasteSchema,
+      }
+    ),
+    // computer_wait
+    tool(
+      async (args, config) => {
+        return createToolResult(
+          EDesktopTools.WAIT,
+          args,
+          config as ToolCallConfig,
+          waitForResult
+        );
+      },
+      {
+        name: EDesktopTools.WAIT,
+        description:
+          'Wait for the specified number of milliseconds. Use this to wait for UI animations or loading.',
+        schema: WaitSchema,
+      }
+    ),
+  ];
+}
+/**
+ * Get all desktop tool names
+ */
+export function getDesktopToolNames(): DesktopToolName[] {
+  return Object.values(EDesktopTools);
+}
+/**
+ * Check if a tool name is a desktop tool
+ */
+export function isDesktopTool(name: string): name is DesktopToolName {
+  return Object.values(EDesktopTools).includes(name as DesktopToolName);
+}