illuma-agents 1.0.29 → 1.0.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/BrowserTools.cjs +77 -50
- package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
- package/dist/esm/tools/BrowserTools.mjs +77 -50
- package/dist/esm/tools/BrowserTools.mjs.map +1 -1
- package/dist/types/tools/BrowserTools.d.ts +23 -0
- package/package.json +1 -1
- package/src/tools/BrowserTools.ts +102 -52
|
@@ -111,26 +111,29 @@ const BrowserKeypressSchema = zod.z.object({
|
|
|
111
111
|
/**
|
|
112
112
|
* Format browser action result for LLM consumption
|
|
113
113
|
*/
|
|
114
|
-
function formatResultForLLM(result, action) {
|
|
114
|
+
function formatResultForLLM(result, action, actionArgs) {
|
|
115
115
|
if (!result.success && result.error) {
|
|
116
|
-
|
|
116
|
+
const errorType = result.errorType ? `[${result.errorType}] ` : '';
|
|
117
|
+
return `${errorType}Action "${action}" failed: ${result.error}`;
|
|
117
118
|
}
|
|
118
119
|
const parts = [];
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if (result.screenshot != null && result.screenshot !== '') {
|
|
129
|
-
parts.push('\n[Screenshot captured and displayed to user]');
|
|
130
|
-
}
|
|
131
|
-
if (parts.length === 0) {
|
|
132
|
-
return `Browser action "${action}" completed successfully.`;
|
|
120
|
+
// Verification result (Skyvern pattern)
|
|
121
|
+
if (result.verification) {
|
|
122
|
+
const v = result.verification;
|
|
123
|
+
if (v.dialogClosed || v.formGone || v.urlChanged) {
|
|
124
|
+
parts.push(`✓ ${v.changeDescription}`);
|
|
125
|
+
}
|
|
126
|
+
else if (!v.significantChange) {
|
|
127
|
+
parts.push(`⚠ No change detected`);
|
|
128
|
+
}
|
|
133
129
|
}
|
|
130
|
+
// Page state
|
|
131
|
+
if (result.url)
|
|
132
|
+
parts.push(`URL: ${result.url}`);
|
|
133
|
+
if (result.title)
|
|
134
|
+
parts.push(`Title: ${result.title}`);
|
|
135
|
+
if (result.elementList)
|
|
136
|
+
parts.push(`\nElements:\n${result.elementList}`);
|
|
134
137
|
return parts.join('\n');
|
|
135
138
|
}
|
|
136
139
|
/**
|
|
@@ -173,45 +176,65 @@ function createBrowserTools(options) {
|
|
|
173
176
|
// The marker is still returned initially via SSE, but we wait for the callback
|
|
174
177
|
try {
|
|
175
178
|
const result = await waitForResult(action, args, toolCallId);
|
|
176
|
-
return formatResultForLLM(result, action);
|
|
179
|
+
return formatResultForLLM(result, action, args);
|
|
177
180
|
}
|
|
178
181
|
catch (error) {
|
|
179
182
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
180
|
-
return
|
|
183
|
+
return `❌ **ACTION FAILED**: Browser action "${action}" failed: ${errorMessage}
|
|
184
|
+
|
|
185
|
+
**REQUIRED**: Analyze why the action failed and try an alternative approach.`;
|
|
181
186
|
}
|
|
182
187
|
};
|
|
183
188
|
};
|
|
184
|
-
// browser_click
|
|
189
|
+
// browser_click - PRIMARY action for buttons and links
|
|
185
190
|
tools$1.push(tools.tool(createToolFunction('click'), {
|
|
186
191
|
name: EBrowserTools.CLICK,
|
|
187
|
-
description: `Click an element
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
192
|
+
description: `Click an element by its [index] from the element list.
|
|
193
|
+
|
|
194
|
+
**WHEN TO USE (prioritize this over keypress):**
|
|
195
|
+
- Buttons labeled "Send", "Submit", "Save", "OK", "Confirm" → CLICK them
|
|
196
|
+
- Links (a tags) to navigate
|
|
197
|
+
- Checkboxes, radio buttons to select
|
|
198
|
+
- Any clickable element that performs an action
|
|
199
|
+
|
|
200
|
+
**SEMANTIC ELEMENT IDENTIFICATION:**
|
|
201
|
+
- Look for aria-label="Send", aria-label="Submit" for action buttons
|
|
202
|
+
- Look for type="submit" for form submission
|
|
203
|
+
- Look for button text like "Send", "Submit", "Save"
|
|
204
|
+
- For email compose: Find the Send button, DON'T use keyboard shortcuts
|
|
205
|
+
|
|
206
|
+
Returns verification: dialog closed, form submitted, URL changed, or NO CHANGE (action may have failed).`,
|
|
191
207
|
schema: BrowserClickSchema,
|
|
192
208
|
}));
|
|
193
209
|
// browser_type
|
|
194
210
|
tools$1.push(tools.tool(createToolFunction('type'), {
|
|
195
211
|
name: EBrowserTools.TYPE,
|
|
196
|
-
description: `Type text into an input element
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
212
|
+
description: `Type text into an input/textarea element by [index].
|
|
213
|
+
|
|
214
|
+
**WHEN TO USE:**
|
|
215
|
+
- Input fields (fieldType="email", "search", "phone", etc.)
|
|
216
|
+
- Textareas for longer content
|
|
217
|
+
- Contenteditable divs (like Gmail compose body)
|
|
218
|
+
|
|
219
|
+
**OPTIONS:**
|
|
220
|
+
- pressEnter: true → Press Enter after typing (for search forms)
|
|
221
|
+
- pressEnter: false → Just type without submitting
|
|
222
|
+
|
|
223
|
+
**DO NOT use pressEnter to send emails** - find and CLICK the Send button instead.
|
|
224
|
+
|
|
225
|
+
Returns verification showing if text was entered or form was submitted.`,
|
|
200
226
|
schema: BrowserTypeSchema,
|
|
201
227
|
}));
|
|
202
228
|
// browser_navigate
|
|
203
229
|
tools$1.push(tools.tool(createToolFunction('navigate'), {
|
|
204
230
|
name: EBrowserTools.NAVIGATE,
|
|
205
|
-
description: `Navigate to a URL
|
|
206
|
-
After navigation, you receive the new page's element list.`,
|
|
231
|
+
description: `Navigate to a URL (include https://). Returns new page's element list.`,
|
|
207
232
|
schema: BrowserNavigateSchema,
|
|
208
233
|
}));
|
|
209
234
|
// browser_scroll
|
|
210
235
|
tools$1.push(tools.tool(createToolFunction('scroll'), {
|
|
211
236
|
name: EBrowserTools.SCROLL,
|
|
212
|
-
description: `Scroll the page
|
|
213
|
-
Use 'down' to scroll down, 'up' to scroll up.
|
|
214
|
-
After scrolling, you receive an updated element list with newly visible elements.`,
|
|
237
|
+
description: `Scroll the page. Returns updated element list with newly visible elements.`,
|
|
215
238
|
schema: BrowserScrollSchema,
|
|
216
239
|
}));
|
|
217
240
|
// browser_extract
|
|
@@ -246,27 +269,19 @@ After going back, you receive the previous page's element list.`,
|
|
|
246
269
|
// browser_screenshot
|
|
247
270
|
tools$1.push(tools.tool(createToolFunction('screenshot'), {
|
|
248
271
|
name: EBrowserTools.SCREENSHOT,
|
|
249
|
-
description: `Capture a screenshot
|
|
250
|
-
Returns the page state with a note that screenshot was displayed to the user.
|
|
251
|
-
Use browser_get_page_state to get the element list for automation.`,
|
|
272
|
+
description: `Capture a screenshot. Use browser_get_page_state for element list.`,
|
|
252
273
|
schema: BrowserScreenshotSchema,
|
|
253
274
|
}));
|
|
254
275
|
// browser_get_page_state
|
|
255
276
|
tools$1.push(tools.tool(createToolFunction('get_page_state'), {
|
|
256
277
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
257
|
-
description: `Get
|
|
258
|
-
Use this at the start of a task to see what elements are available.
|
|
259
|
-
Returns a text list of elements with their index numbers for interaction.`,
|
|
278
|
+
description: `Get current page URL, title, and interactive elements list with index numbers.`,
|
|
260
279
|
schema: BrowserGetPageStateSchema,
|
|
261
280
|
}));
|
|
262
|
-
// browser_select_option
|
|
281
|
+
// browser_select_option
|
|
263
282
|
tools$1.push(tools.tool(createToolFunction('select_option'), {
|
|
264
283
|
name: EBrowserTools.SELECT_OPTION,
|
|
265
|
-
description: `Select an option from a dropdown
|
|
266
|
-
For native <select> elements: finds and selects the option by value/label.
|
|
267
|
-
For custom dropdowns: clicks to open, then clicks the matching option.
|
|
268
|
-
Use this instead of click for dropdowns - it handles both native and custom selects.
|
|
269
|
-
After selection, you receive an updated element list.`,
|
|
284
|
+
description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,
|
|
270
285
|
schema: BrowserSelectOptionSchema,
|
|
271
286
|
}));
|
|
272
287
|
// browser_upload_file - Skyvern-inspired for file input handling
|
|
@@ -278,14 +293,26 @@ The system will download the file and attach it to the input.
|
|
|
278
293
|
After upload, you receive an updated element list.`,
|
|
279
294
|
schema: BrowserUploadFileSchema,
|
|
280
295
|
}));
|
|
281
|
-
// browser_keypress -
|
|
296
|
+
// browser_keypress - LAST RESORT for keyboard shortcuts
|
|
282
297
|
tools$1.push(tools.tool(createToolFunction('keypress'), {
|
|
283
298
|
name: EBrowserTools.KEYPRESS,
|
|
284
|
-
description: `Press keyboard key(s)
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
299
|
+
description: `Press keyboard key(s) - USE ONLY AS LAST RESORT after clicking fails.
|
|
300
|
+
|
|
301
|
+
**PREFER browser_click OVER keypress:**
|
|
302
|
+
- To send email → Find and CLICK the Send button
|
|
303
|
+
- To submit form → CLICK the Submit button
|
|
304
|
+
- To close modal → CLICK the X or Cancel button
|
|
305
|
+
|
|
306
|
+
**WHEN TO USE keypress:**
|
|
307
|
+
- Navigation: "Tab", "ArrowDown", "ArrowUp" in dropdowns
|
|
308
|
+
- Escape to close popups if no X button exists
|
|
309
|
+
- Control+A, Control+C for select/copy operations
|
|
310
|
+
- ONLY use Enter to submit if there's NO visible submit button
|
|
311
|
+
|
|
312
|
+
Single keys: "Enter", "Escape", "Tab", "ArrowDown"
|
|
313
|
+
Key combos: "Control+A", "Shift+Enter"
|
|
314
|
+
|
|
315
|
+
Returns verification showing if page state changed.`,
|
|
289
316
|
schema: BrowserKeypressSchema,
|
|
290
317
|
}));
|
|
291
318
|
return tools$1;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"BrowserTools.cjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n // Skyvern-inspired additions for robust form handling\n SELECT_OPTION: 'browser_select_option',\n UPLOAD_FILE: 'browser_upload_file',\n KEYPRESS: 'browser_keypress',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n url?: string;\n title?: string;\n elementList?: string; // Text-based element list\n error?: string;\n screenshot?: string; // Base64 screenshot (if requested)\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n// Skyvern-inspired schemas for robust form handling\nconst BrowserSelectOptionSchema = z.object({\n index: z\n .number()\n .describe('The index number of the select/dropdown element'),\n value: z\n .string()\n .optional()\n .describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),\n});\n\nconst BrowserUploadFileSchema = z.object({\n index: z\n .number()\n .describe('The index number of the file input element'),\n fileUrl: z\n .string()\n .describe('URL of the file to upload (the system will download and upload it)'),\n});\n\nconst BrowserKeypressSchema = z.object({\n keys: z\n .string()\n .describe('Key(s) to press. Single key: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\". Combo: \"Control+A\", \"Shift+Enter\"'),\n});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string\n): string {\n if (!result.success && result.error) {\n return `Browser action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n if (result.url != null && result.url !== '') {\n parts.push(`**Current URL:** ${result.url}`);\n }\n if (result.title != null && result.title !== '') {\n parts.push(`**Page Title:** ${result.title}`);\n }\n if (result.elementList != null && result.elementList !== '') {\n parts.push(`\\n**Interactive Elements:**\\n${result.elementList}`);\n }\n if (result.screenshot != null && result.screenshot !== '') {\n parts.push('\\n[Screenshot captured and displayed to user]');\n }\n\n if (parts.length === 0) {\n return `Browser action \"${action}\" completed successfully.`;\n }\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `Browser action \"${action}\" failed: ${errorMessage}`;\n }\n };\n };\n\n // browser_click\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element on the current web page by its index number.\nThe element list shows clickable items like: [0]<button>Submit</button> [1]<a href=\"/home\">Home</a>\nUse the index number in brackets to click that element.\nAfter clicking, you receive an updated element list showing the new page state.`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input element on the page.\nFind the input element in the list by its index (e.g., [5]<input placeholder=\"Search\">).\nSet pressEnter: true to submit forms after typing.\nAfter typing, you receive an updated element list.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL. Always include the full URL with https://.\nAfter navigation, you receive the new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page to reveal more content.\nUse 'down' to scroll down, 'up' to scroll up.\nAfter scrolling, you receive an updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot of the current page.\nReturns the page state with a note that screenshot was displayed to the user.\nUse browser_get_page_state to get the element list for automation.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get the current page state including URL, title, and all interactive elements.\nUse this at the start of a task to see what elements are available.\nReturns a text list of elements with their index numbers for interaction.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n // browser_select_option - Skyvern-inspired for robust dropdown handling\n tools.push(\n tool(createToolFunction('select_option'), {\n name: EBrowserTools.SELECT_OPTION,\n description: `Select an option from a dropdown or select element.\nFor native <select> elements: finds and selects the option by value/label.\nFor custom dropdowns: clicks to open, then clicks the matching option.\nUse this instead of click for dropdowns - it handles both native and custom selects.\nAfter selection, you receive an updated element list.`,\n schema: BrowserSelectOptionSchema,\n })\n );\n\n // browser_upload_file - Skyvern-inspired for file input handling\n tools.push(\n tool(createToolFunction('upload_file'), {\n name: EBrowserTools.UPLOAD_FILE,\n description: `Upload a file to a file input element.\nProvide the index of the file input and the URL of the file to upload.\nThe system will download the file and attach it to the input.\nAfter upload, you receive an updated element list.`,\n schema: BrowserUploadFileSchema,\n })\n );\n\n // browser_keypress - For keyboard shortcuts and special keys\n tools.push(\n tool(createToolFunction('keypress'), {\n name: EBrowserTools.KEYPRESS,\n description: `Press keyboard key(s) on the page.\nSingle keys: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\", \"ArrowUp\", \"Backspace\", \"Delete\"\nKey combos: \"Control+A\" (select all), \"Control+C\" (copy), \"Shift+Enter\" (newline)\nUse this for form submission, closing modals, navigating dropdowns.\nAfter keypress, you receive an updated element list.`,\n schema: BrowserKeypressSchema,\n })\n );\n\n return tools;\n}\n"],"names":["z","tools","tool"],"mappings":";;;;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAExC,IAAA,aAAa,EAAE,uBAAuB;AACtC,IAAA,WAAW,EAAE,qBAAqB;AAClC,IAAA,QAAQ,EAAE,kBAAkB;;AAkC9B;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAEA;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAEA;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAEA;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAEA;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAEA;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE9C;AACA,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACzC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,iDAAiD,CAAC;AAC9D,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,gJAAgJ,CAAC;AAC9J,CAAA,CAAC;AAEF,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACvC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,4CAA4C,CAAC;AACzD,IAAA,OAAO,EAAEA;AACN,SAAA,MAAM;SACN,QAAQ,CAAC,oEAAoE,CAAC;AAClF,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACrC,IAAA,IAAI,EAAEA;AACH,SAAA,MAAM;SACN,QAAQ,CAAC,uGAAuG,CAAC;AACrH,CAAA,CAAC;AAyBF;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EAAA;IAEd,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,OAAO,mBAAmB,MAAM,CAAA,UAAA,EAAa,MAAM,CAAC,KAAK,EAAE;;IAG7D,MAAM,KAAK,GAAa,EAAE;AAE1B,IAAA,IAAI,MAAM,CAAC,GAAG,IAAI,IAAI,IAAI,MAAM,CAAC,GAAG,KAAK,EAAE,EAAE;QAC3C,KAAK,CAAC,IAAI,CAAC,CAAA,iBAAA,EAAoB,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;;AAE9C,IAAA,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE;QAC/C,KAAK,CAAC,IAAI,CAAC,CAAA,gBAAA,EAAmB,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;;AAE/C,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;QAC3D,KAAK,CAAC,IAAI,CAAC,CAAA,6BAAA,EAAgC,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;AAElE,IAAA,IAAI,MAAM,CAAC,UAAU,IAAI,IAAI,IAAI,MAAM,CAAC,UAAU,KAAK,EAAE,EAAE;AACzD,QAAA,KAAK,CAAC,IAAI,CAAC,+CAA+C,CAAC;;AAG7D,IAAA,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;QACtB,OAAO,CAAA,gBAAA,EAAmB,MAAM,CAAA,yBAAA,CAA2B;;AAG7D,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAMC,OAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;AAC5D,gBAAA,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;;YACzC,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;AACxD,gBAAA,OAAO,CAAmB,gBAAA,EAAA,MAAM,CAAa,UAAA,EAAA,YAAY,EAAE;;AAE/D,SAAC;AACH,KAAC;;IAGDA,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;AAG6D,+EAAA,CAAA;AAC1E,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;AACwC,0DAAA,CAAA;AACrD,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAAA;;AAE+D,iFAAA,CAAA;AAC5E,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAA;;AAEgD,kEAAA,CAAA;AAC7D,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAA;;AAEuD,yEAAA,CAAA;AACpE,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,eAAe,CAAC,EAAE;QACxC,IAAI,EAAE,aAAa,CAAC,aAAa;AACjC,QAAA,WAAW,EAAE,CAAA;;;;AAImC,qDAAA,CAAA;AAChD,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,aAAa,CAAC,EAAE;QACtC,IAAI,EAAE,aAAa,CAAC,WAAW;AAC/B,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;;;;AAIkC,oDAAA,CAAA;AAC/C,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;AAED,IAAA,OAAOD,OAAK;AACd;;;;;;"}
|
|
1
|
+
{"version":3,"file":"BrowserTools.cjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n // Skyvern-inspired additions for robust form handling\n SELECT_OPTION: 'browser_select_option',\n UPLOAD_FILE: 'browser_upload_file',\n KEYPRESS: 'browser_keypress',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Action status - matches Skyvern's ActionStatus\n */\nexport type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';\n\n/**\n * Verification status - matches Skyvern's VerificationStatus\n * - complete: Goal achieved\n * - terminate: Goal cannot be achieved, stop\n * - continue: Goal not yet achieved, keep going\n */\nexport type VerificationStatus = 'complete' | 'terminate' | 'continue';\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n status?: ActionStatus;\n url?: string;\n title?: string;\n elementList?: string;\n error?: string;\n errorType?: string; // Typed error classification\n screenshot?: string;\n elementDescription?: string;\n // State verification (before/after comparison)\n verification?: {\n urlChanged: boolean;\n titleChanged: boolean;\n elementCountDelta: number;\n significantChange: boolean;\n dialogClosed: boolean;\n formGone: boolean;\n changeDescription: string;\n };\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n// Skyvern-inspired schemas for robust form handling\nconst BrowserSelectOptionSchema = z.object({\n index: z\n .number()\n .describe('The index number of the select/dropdown element'),\n value: z\n .string()\n .optional()\n .describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),\n});\n\nconst BrowserUploadFileSchema = z.object({\n index: z\n .number()\n .describe('The index number of the file input element'),\n fileUrl: z\n .string()\n .describe('URL of the file to upload (the system will download and upload it)'),\n});\n\nconst BrowserKeypressSchema = z.object({\n keys: z\n .string()\n .describe('Key(s) to press. Single key: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\". Combo: \"Control+A\", \"Shift+Enter\"'),\n});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string,\n actionArgs?: Record<string, unknown>\n): string {\n if (!result.success && result.error) {\n const errorType = result.errorType ? `[${result.errorType}] ` : '';\n return `${errorType}Action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n // Verification result (Skyvern pattern)\n if (result.verification) {\n const v = result.verification;\n if (v.dialogClosed || v.formGone || v.urlChanged) {\n parts.push(`✓ ${v.changeDescription}`);\n } else if (!v.significantChange) {\n parts.push(`⚠ No change detected`);\n }\n }\n\n // Page state\n if (result.url) parts.push(`URL: ${result.url}`);\n if (result.title) parts.push(`Title: ${result.title}`);\n if (result.elementList) parts.push(`\\nElements:\\n${result.elementList}`);\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action, args);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `❌ **ACTION FAILED**: Browser action \"${action}\" failed: ${errorMessage}\n\n**REQUIRED**: Analyze why the action failed and try an alternative approach.`;\n }\n };\n };\n\n // browser_click - PRIMARY action for buttons and links\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element by its [index] from the element list.\n\n**WHEN TO USE (prioritize this over keypress):**\n- Buttons labeled \"Send\", \"Submit\", \"Save\", \"OK\", \"Confirm\" → CLICK them\n- Links (a tags) to navigate\n- Checkboxes, radio buttons to select\n- Any clickable element that performs an action\n\n**SEMANTIC ELEMENT IDENTIFICATION:**\n- Look for aria-label=\"Send\", aria-label=\"Submit\" for action buttons\n- Look for type=\"submit\" for form submission\n- Look for button text like \"Send\", \"Submit\", \"Save\"\n- For email compose: Find the Send button, DON'T use keyboard shortcuts\n\nReturns verification: dialog closed, form submitted, URL changed, or NO CHANGE (action may have failed).`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input/textarea element by [index].\n\n**WHEN TO USE:**\n- Input fields (fieldType=\"email\", \"search\", \"phone\", etc.)\n- Textareas for longer content\n- Contenteditable divs (like Gmail compose body)\n\n**OPTIONS:**\n- pressEnter: true → Press Enter after typing (for search forms)\n- pressEnter: false → Just type without submitting\n\n**DO NOT use pressEnter to send emails** - find and CLICK the Send button instead.\n\nReturns verification showing if text was entered or form was submitted.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL (include https://). Returns new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page. Returns updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot. Use browser_get_page_state for element list.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get current page URL, title, and interactive elements list with index numbers.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n // browser_select_option\n tools.push(\n tool(createToolFunction('select_option'), {\n name: EBrowserTools.SELECT_OPTION,\n description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,\n schema: BrowserSelectOptionSchema,\n })\n );\n\n // browser_upload_file - Skyvern-inspired for file input handling\n tools.push(\n tool(createToolFunction('upload_file'), {\n name: EBrowserTools.UPLOAD_FILE,\n description: `Upload a file to a file input element.\nProvide the index of the file input and the URL of the file to upload.\nThe system will download the file and attach it to the input.\nAfter upload, you receive an updated element list.`,\n schema: BrowserUploadFileSchema,\n })\n );\n\n // browser_keypress - LAST RESORT for keyboard shortcuts\n tools.push(\n tool(createToolFunction('keypress'), {\n name: EBrowserTools.KEYPRESS,\n description: `Press keyboard key(s) - USE ONLY AS LAST RESORT after clicking fails.\n\n**PREFER browser_click OVER keypress:**\n- To send email → Find and CLICK the Send button\n- To submit form → CLICK the Submit button\n- To close modal → CLICK the X or Cancel button\n\n**WHEN TO USE keypress:**\n- Navigation: \"Tab\", \"ArrowDown\", \"ArrowUp\" in dropdowns\n- Escape to close popups if no X button exists\n- Control+A, Control+C for select/copy operations\n- ONLY use Enter to submit if there's NO visible submit button\n\nSingle keys: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\"\nKey combos: \"Control+A\", \"Shift+Enter\"\n\nReturns verification showing if page state changed.`,\n schema: BrowserKeypressSchema,\n })\n );\n\n return tools;\n}\n"],"names":["z","tools","tool"],"mappings":";;;;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAExC,IAAA,aAAa,EAAE,uBAAuB;AACtC,IAAA,WAAW,EAAE,qBAAqB;AAClC,IAAA,QAAQ,EAAE,kBAAkB;;AA4D9B;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAEA;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAEA;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAEA;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAEA;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAEA;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE9C;AACA,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACzC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,iDAAiD,CAAC;AAC9D,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,gJAAgJ,CAAC;AAC9J,CAAA,CAAC;AAEF,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACvC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,4CAA4C,CAAC;AACzD,IAAA,OAAO,EAAEA;AACN,SAAA,MAAM;SACN,QAAQ,CAAC,oEAAoE,CAAC;AAClF,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACrC,IAAA,IAAI,EAAEA;AACH,SAAA,MAAM;SACN,QAAQ,CAAC,uGAAuG,CAAC;AACrH,CAAA,CAAC;AAyBF;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EACd,UAAoC,EAAA;IAEpC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,GAAG,CAAI,CAAA,EAAA,MAAM,CAAC,SAAS,CAAA,EAAA,CAAI,GAAG,EAAE;QAClE,OAAO,CAAA,EAAG,SAAS,CAAW,QAAA,EAAA,MAAM,aAAa,MAAM,CAAC,KAAK,CAAA,CAAE;;IAGjE,MAAM,KAAK,GAAa,EAAE;;AAG1B,IAAA,IAAI,MAAM,CAAC,YAAY,EAAE;AACvB,QAAA,MAAM,CAAC,GAAG,MAAM,CAAC,YAAY;AAC7B,QAAA,IAAI,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,EAAE;YAChD,KAAK,CAAC,IAAI,CAAC,CAAA,EAAA,EAAK,CAAC,CAAC,iBAAiB,CAAE,CAAA,CAAC;;AACjC,aAAA,IAAI,CAAC,CAAC,CAAC,iBAAiB,EAAE;AAC/B,YAAA,KAAK,CAAC,IAAI,CAAC,CAAA,oBAAA,CAAsB,CAAC;;;;IAKtC,IAAI,MAAM,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,KAAA,EAAQ,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;IAChD,IAAI,MAAM,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,OAAA,EAAU,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;IACtD,IAAI,MAAM,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,aAAA,EAAgB,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;AAExE,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAMC,OAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;gBAC5D,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;;YAC/C,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;gBACxD,OAAO,CAAA,qCAAA,EAAwC,MAAM,CAAA,UAAA,EAAa,YAAY;;6EAET;;AAEzE,SAAC;AACH,KAAC;;IAGDA,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;AAcsF,wGAAA,CAAA;AACnG,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;AAaqD,uEAAA,CAAA;AAClE,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAwE,sEAAA,CAAA;AACrF,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAA4E,0EAAA,CAAA;AACzF,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAoE,kEAAA,CAAA;AACjF,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAgF,8EAAA,CAAA;AAC7F,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,eAAe,CAAC,EAAE;QACxC,IAAI,EAAE,aAAa,CAAC,aAAa;AACjC,QAAA,WAAW,EAAE,CAAsF,oFAAA,CAAA;AACnG,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,aAAa,CAAC,EAAE;QACtC,IAAI,EAAE,aAAa,CAAC,WAAW;AAC/B,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;AAgBiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;AAED,IAAA,OAAOD,OAAK;AACd;;;;;;"}
|
|
@@ -109,26 +109,29 @@ const BrowserKeypressSchema = z.object({
|
|
|
109
109
|
/**
|
|
110
110
|
* Format browser action result for LLM consumption
|
|
111
111
|
*/
|
|
112
|
-
function formatResultForLLM(result, action) {
|
|
112
|
+
function formatResultForLLM(result, action, actionArgs) {
|
|
113
113
|
if (!result.success && result.error) {
|
|
114
|
-
|
|
114
|
+
const errorType = result.errorType ? `[${result.errorType}] ` : '';
|
|
115
|
+
return `${errorType}Action "${action}" failed: ${result.error}`;
|
|
115
116
|
}
|
|
116
117
|
const parts = [];
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
if (result.screenshot != null && result.screenshot !== '') {
|
|
127
|
-
parts.push('\n[Screenshot captured and displayed to user]');
|
|
128
|
-
}
|
|
129
|
-
if (parts.length === 0) {
|
|
130
|
-
return `Browser action "${action}" completed successfully.`;
|
|
118
|
+
// Verification result (Skyvern pattern)
|
|
119
|
+
if (result.verification) {
|
|
120
|
+
const v = result.verification;
|
|
121
|
+
if (v.dialogClosed || v.formGone || v.urlChanged) {
|
|
122
|
+
parts.push(`✓ ${v.changeDescription}`);
|
|
123
|
+
}
|
|
124
|
+
else if (!v.significantChange) {
|
|
125
|
+
parts.push(`⚠ No change detected`);
|
|
126
|
+
}
|
|
131
127
|
}
|
|
128
|
+
// Page state
|
|
129
|
+
if (result.url)
|
|
130
|
+
parts.push(`URL: ${result.url}`);
|
|
131
|
+
if (result.title)
|
|
132
|
+
parts.push(`Title: ${result.title}`);
|
|
133
|
+
if (result.elementList)
|
|
134
|
+
parts.push(`\nElements:\n${result.elementList}`);
|
|
132
135
|
return parts.join('\n');
|
|
133
136
|
}
|
|
134
137
|
/**
|
|
@@ -171,45 +174,65 @@ function createBrowserTools(options) {
|
|
|
171
174
|
// The marker is still returned initially via SSE, but we wait for the callback
|
|
172
175
|
try {
|
|
173
176
|
const result = await waitForResult(action, args, toolCallId);
|
|
174
|
-
return formatResultForLLM(result, action);
|
|
177
|
+
return formatResultForLLM(result, action, args);
|
|
175
178
|
}
|
|
176
179
|
catch (error) {
|
|
177
180
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
178
|
-
return
|
|
181
|
+
return `❌ **ACTION FAILED**: Browser action "${action}" failed: ${errorMessage}
|
|
182
|
+
|
|
183
|
+
**REQUIRED**: Analyze why the action failed and try an alternative approach.`;
|
|
179
184
|
}
|
|
180
185
|
};
|
|
181
186
|
};
|
|
182
|
-
// browser_click
|
|
187
|
+
// browser_click - PRIMARY action for buttons and links
|
|
183
188
|
tools.push(tool(createToolFunction('click'), {
|
|
184
189
|
name: EBrowserTools.CLICK,
|
|
185
|
-
description: `Click an element
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
190
|
+
description: `Click an element by its [index] from the element list.
|
|
191
|
+
|
|
192
|
+
**WHEN TO USE (prioritize this over keypress):**
|
|
193
|
+
- Buttons labeled "Send", "Submit", "Save", "OK", "Confirm" → CLICK them
|
|
194
|
+
- Links (a tags) to navigate
|
|
195
|
+
- Checkboxes, radio buttons to select
|
|
196
|
+
- Any clickable element that performs an action
|
|
197
|
+
|
|
198
|
+
**SEMANTIC ELEMENT IDENTIFICATION:**
|
|
199
|
+
- Look for aria-label="Send", aria-label="Submit" for action buttons
|
|
200
|
+
- Look for type="submit" for form submission
|
|
201
|
+
- Look for button text like "Send", "Submit", "Save"
|
|
202
|
+
- For email compose: Find the Send button, DON'T use keyboard shortcuts
|
|
203
|
+
|
|
204
|
+
Returns verification: dialog closed, form submitted, URL changed, or NO CHANGE (action may have failed).`,
|
|
189
205
|
schema: BrowserClickSchema,
|
|
190
206
|
}));
|
|
191
207
|
// browser_type
|
|
192
208
|
tools.push(tool(createToolFunction('type'), {
|
|
193
209
|
name: EBrowserTools.TYPE,
|
|
194
|
-
description: `Type text into an input element
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
210
|
+
description: `Type text into an input/textarea element by [index].
|
|
211
|
+
|
|
212
|
+
**WHEN TO USE:**
|
|
213
|
+
- Input fields (fieldType="email", "search", "phone", etc.)
|
|
214
|
+
- Textareas for longer content
|
|
215
|
+
- Contenteditable divs (like Gmail compose body)
|
|
216
|
+
|
|
217
|
+
**OPTIONS:**
|
|
218
|
+
- pressEnter: true → Press Enter after typing (for search forms)
|
|
219
|
+
- pressEnter: false → Just type without submitting
|
|
220
|
+
|
|
221
|
+
**DO NOT use pressEnter to send emails** - find and CLICK the Send button instead.
|
|
222
|
+
|
|
223
|
+
Returns verification showing if text was entered or form was submitted.`,
|
|
198
224
|
schema: BrowserTypeSchema,
|
|
199
225
|
}));
|
|
200
226
|
// browser_navigate
|
|
201
227
|
tools.push(tool(createToolFunction('navigate'), {
|
|
202
228
|
name: EBrowserTools.NAVIGATE,
|
|
203
|
-
description: `Navigate to a URL
|
|
204
|
-
After navigation, you receive the new page's element list.`,
|
|
229
|
+
description: `Navigate to a URL (include https://). Returns new page's element list.`,
|
|
205
230
|
schema: BrowserNavigateSchema,
|
|
206
231
|
}));
|
|
207
232
|
// browser_scroll
|
|
208
233
|
tools.push(tool(createToolFunction('scroll'), {
|
|
209
234
|
name: EBrowserTools.SCROLL,
|
|
210
|
-
description: `Scroll the page
|
|
211
|
-
Use 'down' to scroll down, 'up' to scroll up.
|
|
212
|
-
After scrolling, you receive an updated element list with newly visible elements.`,
|
|
235
|
+
description: `Scroll the page. Returns updated element list with newly visible elements.`,
|
|
213
236
|
schema: BrowserScrollSchema,
|
|
214
237
|
}));
|
|
215
238
|
// browser_extract
|
|
@@ -244,27 +267,19 @@ After going back, you receive the previous page's element list.`,
|
|
|
244
267
|
// browser_screenshot
|
|
245
268
|
tools.push(tool(createToolFunction('screenshot'), {
|
|
246
269
|
name: EBrowserTools.SCREENSHOT,
|
|
247
|
-
description: `Capture a screenshot
|
|
248
|
-
Returns the page state with a note that screenshot was displayed to the user.
|
|
249
|
-
Use browser_get_page_state to get the element list for automation.`,
|
|
270
|
+
description: `Capture a screenshot. Use browser_get_page_state for element list.`,
|
|
250
271
|
schema: BrowserScreenshotSchema,
|
|
251
272
|
}));
|
|
252
273
|
// browser_get_page_state
|
|
253
274
|
tools.push(tool(createToolFunction('get_page_state'), {
|
|
254
275
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
255
|
-
description: `Get
|
|
256
|
-
Use this at the start of a task to see what elements are available.
|
|
257
|
-
Returns a text list of elements with their index numbers for interaction.`,
|
|
276
|
+
description: `Get current page URL, title, and interactive elements list with index numbers.`,
|
|
258
277
|
schema: BrowserGetPageStateSchema,
|
|
259
278
|
}));
|
|
260
|
-
// browser_select_option
|
|
279
|
+
// browser_select_option
|
|
261
280
|
tools.push(tool(createToolFunction('select_option'), {
|
|
262
281
|
name: EBrowserTools.SELECT_OPTION,
|
|
263
|
-
description: `Select an option from a dropdown
|
|
264
|
-
For native <select> elements: finds and selects the option by value/label.
|
|
265
|
-
For custom dropdowns: clicks to open, then clicks the matching option.
|
|
266
|
-
Use this instead of click for dropdowns - it handles both native and custom selects.
|
|
267
|
-
After selection, you receive an updated element list.`,
|
|
282
|
+
description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,
|
|
268
283
|
schema: BrowserSelectOptionSchema,
|
|
269
284
|
}));
|
|
270
285
|
// browser_upload_file - Skyvern-inspired for file input handling
|
|
@@ -276,14 +291,26 @@ The system will download the file and attach it to the input.
|
|
|
276
291
|
After upload, you receive an updated element list.`,
|
|
277
292
|
schema: BrowserUploadFileSchema,
|
|
278
293
|
}));
|
|
279
|
-
// browser_keypress -
|
|
294
|
+
// browser_keypress - LAST RESORT for keyboard shortcuts
|
|
280
295
|
tools.push(tool(createToolFunction('keypress'), {
|
|
281
296
|
name: EBrowserTools.KEYPRESS,
|
|
282
|
-
description: `Press keyboard key(s)
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
297
|
+
description: `Press keyboard key(s) - USE ONLY AS LAST RESORT after clicking fails.
|
|
298
|
+
|
|
299
|
+
**PREFER browser_click OVER keypress:**
|
|
300
|
+
- To send email → Find and CLICK the Send button
|
|
301
|
+
- To submit form → CLICK the Submit button
|
|
302
|
+
- To close modal → CLICK the X or Cancel button
|
|
303
|
+
|
|
304
|
+
**WHEN TO USE keypress:**
|
|
305
|
+
- Navigation: "Tab", "ArrowDown", "ArrowUp" in dropdowns
|
|
306
|
+
- Escape to close popups if no X button exists
|
|
307
|
+
- Control+A, Control+C for select/copy operations
|
|
308
|
+
- ONLY use Enter to submit if there's NO visible submit button
|
|
309
|
+
|
|
310
|
+
Single keys: "Enter", "Escape", "Tab", "ArrowDown"
|
|
311
|
+
Key combos: "Control+A", "Shift+Enter"
|
|
312
|
+
|
|
313
|
+
Returns verification showing if page state changed.`,
|
|
287
314
|
schema: BrowserKeypressSchema,
|
|
288
315
|
}));
|
|
289
316
|
return tools;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"BrowserTools.mjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n // Skyvern-inspired additions for robust form handling\n SELECT_OPTION: 'browser_select_option',\n UPLOAD_FILE: 'browser_upload_file',\n KEYPRESS: 'browser_keypress',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n url?: string;\n title?: string;\n elementList?: string; // Text-based element list\n error?: string;\n screenshot?: string; // Base64 screenshot (if requested)\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n// Skyvern-inspired schemas for robust form handling\nconst BrowserSelectOptionSchema = z.object({\n index: z\n .number()\n .describe('The index number of the select/dropdown element'),\n value: z\n .string()\n .optional()\n .describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),\n});\n\nconst BrowserUploadFileSchema = z.object({\n index: z\n .number()\n .describe('The index number of the file input element'),\n fileUrl: z\n .string()\n .describe('URL of the file to upload (the system will download and upload it)'),\n});\n\nconst BrowserKeypressSchema = z.object({\n keys: z\n .string()\n .describe('Key(s) to press. Single key: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\". Combo: \"Control+A\", \"Shift+Enter\"'),\n});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string\n): string {\n if (!result.success && result.error) {\n return `Browser action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n if (result.url != null && result.url !== '') {\n parts.push(`**Current URL:** ${result.url}`);\n }\n if (result.title != null && result.title !== '') {\n parts.push(`**Page Title:** ${result.title}`);\n }\n if (result.elementList != null && result.elementList !== '') {\n parts.push(`\\n**Interactive Elements:**\\n${result.elementList}`);\n }\n if (result.screenshot != null && result.screenshot !== '') {\n parts.push('\\n[Screenshot captured and displayed to user]');\n }\n\n if (parts.length === 0) {\n return `Browser action \"${action}\" completed successfully.`;\n }\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `Browser action \"${action}\" failed: ${errorMessage}`;\n }\n };\n };\n\n // browser_click\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element on the current web page by its index number.\nThe element list shows clickable items like: [0]<button>Submit</button> [1]<a href=\"/home\">Home</a>\nUse the index number in brackets to click that element.\nAfter clicking, you receive an updated element list showing the new page state.`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input element on the page.\nFind the input element in the list by its index (e.g., [5]<input placeholder=\"Search\">).\nSet pressEnter: true to submit forms after typing.\nAfter typing, you receive an updated element list.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL. Always include the full URL with https://.\nAfter navigation, you receive the new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page to reveal more content.\nUse 'down' to scroll down, 'up' to scroll up.\nAfter scrolling, you receive an updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot of the current page.\nReturns the page state with a note that screenshot was displayed to the user.\nUse browser_get_page_state to get the element list for automation.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get the current page state including URL, title, and all interactive elements.\nUse this at the start of a task to see what elements are available.\nReturns a text list of elements with their index numbers for interaction.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n // browser_select_option - Skyvern-inspired for robust dropdown handling\n tools.push(\n tool(createToolFunction('select_option'), {\n name: EBrowserTools.SELECT_OPTION,\n description: `Select an option from a dropdown or select element.\nFor native <select> elements: finds and selects the option by value/label.\nFor custom dropdowns: clicks to open, then clicks the matching option.\nUse this instead of click for dropdowns - it handles both native and custom selects.\nAfter selection, you receive an updated element list.`,\n schema: BrowserSelectOptionSchema,\n })\n );\n\n // browser_upload_file - Skyvern-inspired for file input handling\n tools.push(\n tool(createToolFunction('upload_file'), {\n name: EBrowserTools.UPLOAD_FILE,\n description: `Upload a file to a file input element.\nProvide the index of the file input and the URL of the file to upload.\nThe system will download the file and attach it to the input.\nAfter upload, you receive an updated element list.`,\n schema: BrowserUploadFileSchema,\n })\n );\n\n // browser_keypress - For keyboard shortcuts and special keys\n tools.push(\n tool(createToolFunction('keypress'), {\n name: EBrowserTools.KEYPRESS,\n description: `Press keyboard key(s) on the page.\nSingle keys: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\", \"ArrowUp\", \"Backspace\", \"Delete\"\nKey combos: \"Control+A\" (select all), \"Control+C\" (copy), \"Shift+Enter\" (newline)\nUse this for form submission, closing modals, navigating dropdowns.\nAfter keypress, you receive an updated element list.`,\n schema: BrowserKeypressSchema,\n })\n );\n\n return tools;\n}\n"],"names":[],"mappings":";;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAExC,IAAA,aAAa,EAAE,uBAAuB;AACtC,IAAA,WAAW,EAAE,qBAAqB;AAClC,IAAA,QAAQ,EAAE,kBAAkB;;AAkC9B;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAE;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAE;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAE;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAE;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAE;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE9C;AACA,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC;AACzC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,iDAAiD,CAAC;AAC9D,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,gJAAgJ,CAAC;AAC9J,CAAA,CAAC;AAEF,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;AACvC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,4CAA4C,CAAC;AACzD,IAAA,OAAO,EAAE;AACN,SAAA,MAAM;SACN,QAAQ,CAAC,oEAAoE,CAAC;AAClF,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;AACrC,IAAA,IAAI,EAAE;AACH,SAAA,MAAM;SACN,QAAQ,CAAC,uGAAuG,CAAC;AACrH,CAAA,CAAC;AAyBF;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EAAA;IAEd,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,OAAO,mBAAmB,MAAM,CAAA,UAAA,EAAa,MAAM,CAAC,KAAK,EAAE;;IAG7D,MAAM,KAAK,GAAa,EAAE;AAE1B,IAAA,IAAI,MAAM,CAAC,GAAG,IAAI,IAAI,IAAI,MAAM,CAAC,GAAG,KAAK,EAAE,EAAE;QAC3C,KAAK,CAAC,IAAI,CAAC,CAAA,iBAAA,EAAoB,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;;AAE9C,IAAA,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE;QAC/C,KAAK,CAAC,IAAI,CAAC,CAAA,gBAAA,EAAmB,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;;AAE/C,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;QAC3D,KAAK,CAAC,IAAI,CAAC,CAAA,6BAAA,EAAgC,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;AAElE,IAAA,IAAI,MAAM,CAAC,UAAU,IAAI,IAAI,IAAI,MAAM,CAAC,UAAU,KAAK,EAAE,EAAE;AACzD,QAAA,KAAK,CAAC,IAAI,CAAC,+CAA+C,CAAC;;AAG7D,IAAA,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;QACtB,OAAO,CAAA,gBAAA,EAAmB,MAAM,CAAA,yBAAA,CAA2B;;AAG7D,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAM,KAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;AAC5D,gBAAA,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;;YACzC,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;AACxD,gBAAA,OAAO,CAAmB,gBAAA,EAAA,MAAM,CAAa,UAAA,EAAA,YAAY,EAAE;;AAE/D,SAAC;AACH,KAAC;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;AAG6D,+EAAA,CAAA;AAC1E,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;AACwC,0DAAA,CAAA;AACrD,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAAA;;AAE+D,iFAAA,CAAA;AAC5E,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAA;;AAEgD,kEAAA,CAAA;AAC7D,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAA;;AAEuD,yEAAA,CAAA;AACpE,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,eAAe,CAAC,EAAE;QACxC,IAAI,EAAE,aAAa,CAAC,aAAa;AACjC,QAAA,WAAW,EAAE,CAAA;;;;AAImC,qDAAA,CAAA;AAChD,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,aAAa,CAAC,EAAE;QACtC,IAAI,EAAE,aAAa,CAAC,WAAW;AAC/B,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;;;;AAIkC,oDAAA,CAAA;AAC/C,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;AAED,IAAA,OAAO,KAAK;AACd;;;;"}
|
|
1
|
+
{"version":3,"file":"BrowserTools.mjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n // Skyvern-inspired additions for robust form handling\n SELECT_OPTION: 'browser_select_option',\n UPLOAD_FILE: 'browser_upload_file',\n KEYPRESS: 'browser_keypress',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Action status - matches Skyvern's ActionStatus\n */\nexport type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';\n\n/**\n * Verification status - matches Skyvern's VerificationStatus\n * - complete: Goal achieved\n * - terminate: Goal cannot be achieved, stop\n * - continue: Goal not yet achieved, keep going\n */\nexport type VerificationStatus = 'complete' | 'terminate' | 'continue';\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n status?: ActionStatus;\n url?: string;\n title?: string;\n elementList?: string;\n error?: string;\n errorType?: string; // Typed error classification\n screenshot?: string;\n elementDescription?: string;\n // State verification (before/after comparison)\n verification?: {\n urlChanged: boolean;\n titleChanged: boolean;\n elementCountDelta: number;\n significantChange: boolean;\n dialogClosed: boolean;\n formGone: boolean;\n changeDescription: string;\n };\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n// Skyvern-inspired schemas for robust form handling\nconst BrowserSelectOptionSchema = z.object({\n index: z\n .number()\n .describe('The index number of the select/dropdown element'),\n value: z\n .string()\n .optional()\n .describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),\n});\n\nconst BrowserUploadFileSchema = z.object({\n index: z\n .number()\n .describe('The index number of the file input element'),\n fileUrl: z\n .string()\n .describe('URL of the file to upload (the system will download and upload it)'),\n});\n\nconst BrowserKeypressSchema = z.object({\n keys: z\n .string()\n .describe('Key(s) to press. Single key: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\". Combo: \"Control+A\", \"Shift+Enter\"'),\n});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string,\n actionArgs?: Record<string, unknown>\n): string {\n if (!result.success && result.error) {\n const errorType = result.errorType ? `[${result.errorType}] ` : '';\n return `${errorType}Action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n // Verification result (Skyvern pattern)\n if (result.verification) {\n const v = result.verification;\n if (v.dialogClosed || v.formGone || v.urlChanged) {\n parts.push(`✓ ${v.changeDescription}`);\n } else if (!v.significantChange) {\n parts.push(`⚠ No change detected`);\n }\n }\n\n // Page state\n if (result.url) parts.push(`URL: ${result.url}`);\n if (result.title) parts.push(`Title: ${result.title}`);\n if (result.elementList) parts.push(`\\nElements:\\n${result.elementList}`);\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action, args);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `❌ **ACTION FAILED**: Browser action \"${action}\" failed: ${errorMessage}\n\n**REQUIRED**: Analyze why the action failed and try an alternative approach.`;\n }\n };\n };\n\n // browser_click - PRIMARY action for buttons and links\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element by its [index] from the element list.\n\n**WHEN TO USE (prioritize this over keypress):**\n- Buttons labeled \"Send\", \"Submit\", \"Save\", \"OK\", \"Confirm\" → CLICK them\n- Links (a tags) to navigate\n- Checkboxes, radio buttons to select\n- Any clickable element that performs an action\n\n**SEMANTIC ELEMENT IDENTIFICATION:**\n- Look for aria-label=\"Send\", aria-label=\"Submit\" for action buttons\n- Look for type=\"submit\" for form submission\n- Look for button text like \"Send\", \"Submit\", \"Save\"\n- For email compose: Find the Send button, DON'T use keyboard shortcuts\n\nReturns verification: dialog closed, form submitted, URL changed, or NO CHANGE (action may have failed).`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input/textarea element by [index].\n\n**WHEN TO USE:**\n- Input fields (fieldType=\"email\", \"search\", \"phone\", etc.)\n- Textareas for longer content\n- Contenteditable divs (like Gmail compose body)\n\n**OPTIONS:**\n- pressEnter: true → Press Enter after typing (for search forms)\n- pressEnter: false → Just type without submitting\n\n**DO NOT use pressEnter to send emails** - find and CLICK the Send button instead.\n\nReturns verification showing if text was entered or form was submitted.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL (include https://). Returns new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page. Returns updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot. Use browser_get_page_state for element list.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get current page URL, title, and interactive elements list with index numbers.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n // browser_select_option\n tools.push(\n tool(createToolFunction('select_option'), {\n name: EBrowserTools.SELECT_OPTION,\n description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,\n schema: BrowserSelectOptionSchema,\n })\n );\n\n // browser_upload_file - Skyvern-inspired for file input handling\n tools.push(\n tool(createToolFunction('upload_file'), {\n name: EBrowserTools.UPLOAD_FILE,\n description: `Upload a file to a file input element.\nProvide the index of the file input and the URL of the file to upload.\nThe system will download the file and attach it to the input.\nAfter upload, you receive an updated element list.`,\n schema: BrowserUploadFileSchema,\n })\n );\n\n // browser_keypress - LAST RESORT for keyboard shortcuts\n tools.push(\n tool(createToolFunction('keypress'), {\n name: EBrowserTools.KEYPRESS,\n description: `Press keyboard key(s) - USE ONLY AS LAST RESORT after clicking fails.\n\n**PREFER browser_click OVER keypress:**\n- To send email → Find and CLICK the Send button\n- To submit form → CLICK the Submit button\n- To close modal → CLICK the X or Cancel button\n\n**WHEN TO USE keypress:**\n- Navigation: \"Tab\", \"ArrowDown\", \"ArrowUp\" in dropdowns\n- Escape to close popups if no X button exists\n- Control+A, Control+C for select/copy operations\n- ONLY use Enter to submit if there's NO visible submit button\n\nSingle keys: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\"\nKey combos: \"Control+A\", \"Shift+Enter\"\n\nReturns verification showing if page state changed.`,\n schema: BrowserKeypressSchema,\n })\n );\n\n return tools;\n}\n"],"names":[],"mappings":";;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAExC,IAAA,aAAa,EAAE,uBAAuB;AACtC,IAAA,WAAW,EAAE,qBAAqB;AAClC,IAAA,QAAQ,EAAE,kBAAkB;;AA4D9B;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAE;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAE;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAE;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAE;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAE;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE9C;AACA,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC;AACzC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,iDAAiD,CAAC;AAC9D,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,gJAAgJ,CAAC;AAC9J,CAAA,CAAC;AAEF,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;AACvC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,4CAA4C,CAAC;AACzD,IAAA,OAAO,EAAE;AACN,SAAA,MAAM;SACN,QAAQ,CAAC,oEAAoE,CAAC;AAClF,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;AACrC,IAAA,IAAI,EAAE;AACH,SAAA,MAAM;SACN,QAAQ,CAAC,uGAAuG,CAAC;AACrH,CAAA,CAAC;AAyBF;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EACd,UAAoC,EAAA;IAEpC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,GAAG,CAAI,CAAA,EAAA,MAAM,CAAC,SAAS,CAAA,EAAA,CAAI,GAAG,EAAE;QAClE,OAAO,CAAA,EAAG,SAAS,CAAW,QAAA,EAAA,MAAM,aAAa,MAAM,CAAC,KAAK,CAAA,CAAE;;IAGjE,MAAM,KAAK,GAAa,EAAE;;AAG1B,IAAA,IAAI,MAAM,CAAC,YAAY,EAAE;AACvB,QAAA,MAAM,CAAC,GAAG,MAAM,CAAC,YAAY;AAC7B,QAAA,IAAI,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,EAAE;YAChD,KAAK,CAAC,IAAI,CAAC,CAAA,EAAA,EAAK,CAAC,CAAC,iBAAiB,CAAE,CAAA,CAAC;;AACjC,aAAA,IAAI,CAAC,CAAC,CAAC,iBAAiB,EAAE;AAC/B,YAAA,KAAK,CAAC,IAAI,CAAC,CAAA,oBAAA,CAAsB,CAAC;;;;IAKtC,IAAI,MAAM,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,KAAA,EAAQ,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;IAChD,IAAI,MAAM,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,OAAA,EAAU,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;IACtD,IAAI,MAAM,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,aAAA,EAAgB,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;AAExE,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAM,KAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;gBAC5D,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;;YAC/C,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;gBACxD,OAAO,CAAA,qCAAA,EAAwC,MAAM,CAAA,UAAA,EAAa,YAAY;;6EAET;;AAEzE,SAAC;AACH,KAAC;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;AAcsF,wGAAA,CAAA;AACnG,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;AAaqD,uEAAA,CAAA;AAClE,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAwE,sEAAA,CAAA;AACrF,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAA4E,0EAAA,CAAA;AACzF,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAoE,kEAAA,CAAA;AACjF,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAgF,8EAAA,CAAA;AAC7F,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,eAAe,CAAC,EAAE;QACxC,IAAI,EAAE,aAAa,CAAC,aAAa;AACjC,QAAA,WAAW,EAAE,CAAsF,oFAAA,CAAA;AACnG,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,aAAa,CAAC,EAAE;QACtC,IAAI,EAAE,aAAa,CAAC,WAAW;AAC/B,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;AAgBiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;AAED,IAAA,OAAO,KAAK;AACd;;;;"}
|
|
@@ -30,16 +30,39 @@ export type BrowserToolName = (typeof EBrowserTools)[keyof typeof EBrowserTools]
|
|
|
30
30
|
* @returns Promise that resolves with the actual browser result (page state, etc.)
|
|
31
31
|
*/
|
|
32
32
|
export type BrowserToolCallback = (action: string, args: Record<string, unknown>, toolCallId: string) => Promise<BrowserActionResult>;
|
|
33
|
+
/**
|
|
34
|
+
* Action status - matches Skyvern's ActionStatus
|
|
35
|
+
*/
|
|
36
|
+
export type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';
|
|
37
|
+
/**
|
|
38
|
+
* Verification status - matches Skyvern's VerificationStatus
|
|
39
|
+
* - complete: Goal achieved
|
|
40
|
+
* - terminate: Goal cannot be achieved, stop
|
|
41
|
+
* - continue: Goal not yet achieved, keep going
|
|
42
|
+
*/
|
|
43
|
+
export type VerificationStatus = 'complete' | 'terminate' | 'continue';
|
|
33
44
|
/**
|
|
34
45
|
* Result returned from browser action execution
|
|
35
46
|
*/
|
|
36
47
|
export interface BrowserActionResult {
|
|
37
48
|
success: boolean;
|
|
49
|
+
status?: ActionStatus;
|
|
38
50
|
url?: string;
|
|
39
51
|
title?: string;
|
|
40
52
|
elementList?: string;
|
|
41
53
|
error?: string;
|
|
54
|
+
errorType?: string;
|
|
42
55
|
screenshot?: string;
|
|
56
|
+
elementDescription?: string;
|
|
57
|
+
verification?: {
|
|
58
|
+
urlChanged: boolean;
|
|
59
|
+
titleChanged: boolean;
|
|
60
|
+
elementCountDelta: number;
|
|
61
|
+
significantChange: boolean;
|
|
62
|
+
dialogClosed: boolean;
|
|
63
|
+
formGone: boolean;
|
|
64
|
+
changeDescription: string;
|
|
65
|
+
};
|
|
43
66
|
}
|
|
44
67
|
/**
|
|
45
68
|
* Check if browser capability is available based on request headers or context
|
package/package.json
CHANGED
|
@@ -42,16 +42,42 @@ export type BrowserToolCallback = (
|
|
|
42
42
|
toolCallId: string
|
|
43
43
|
) => Promise<BrowserActionResult>;
|
|
44
44
|
|
|
45
|
+
/**
|
|
46
|
+
* Action status - matches Skyvern's ActionStatus
|
|
47
|
+
*/
|
|
48
|
+
export type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Verification status - matches Skyvern's VerificationStatus
|
|
52
|
+
* - complete: Goal achieved
|
|
53
|
+
* - terminate: Goal cannot be achieved, stop
|
|
54
|
+
* - continue: Goal not yet achieved, keep going
|
|
55
|
+
*/
|
|
56
|
+
export type VerificationStatus = 'complete' | 'terminate' | 'continue';
|
|
57
|
+
|
|
45
58
|
/**
|
|
46
59
|
* Result returned from browser action execution
|
|
47
60
|
*/
|
|
48
61
|
export interface BrowserActionResult {
|
|
49
62
|
success: boolean;
|
|
63
|
+
status?: ActionStatus;
|
|
50
64
|
url?: string;
|
|
51
65
|
title?: string;
|
|
52
|
-
elementList?: string;
|
|
66
|
+
elementList?: string;
|
|
53
67
|
error?: string;
|
|
54
|
-
|
|
68
|
+
errorType?: string; // Typed error classification
|
|
69
|
+
screenshot?: string;
|
|
70
|
+
elementDescription?: string;
|
|
71
|
+
// State verification (before/after comparison)
|
|
72
|
+
verification?: {
|
|
73
|
+
urlChanged: boolean;
|
|
74
|
+
titleChanged: boolean;
|
|
75
|
+
elementCountDelta: number;
|
|
76
|
+
significantChange: boolean;
|
|
77
|
+
dialogClosed: boolean;
|
|
78
|
+
formGone: boolean;
|
|
79
|
+
changeDescription: string;
|
|
80
|
+
};
|
|
55
81
|
}
|
|
56
82
|
|
|
57
83
|
/**
|
|
@@ -187,30 +213,30 @@ export interface CreateBrowserToolsOptions {
|
|
|
187
213
|
*/
|
|
188
214
|
function formatResultForLLM(
|
|
189
215
|
result: BrowserActionResult,
|
|
190
|
-
action: string
|
|
216
|
+
action: string,
|
|
217
|
+
actionArgs?: Record<string, unknown>
|
|
191
218
|
): string {
|
|
192
219
|
if (!result.success && result.error) {
|
|
193
|
-
|
|
220
|
+
const errorType = result.errorType ? `[${result.errorType}] ` : '';
|
|
221
|
+
return `${errorType}Action "${action}" failed: ${result.error}`;
|
|
194
222
|
}
|
|
195
223
|
|
|
196
224
|
const parts: string[] = [];
|
|
197
225
|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
}
|
|
207
|
-
if (result.screenshot != null && result.screenshot !== '') {
|
|
208
|
-
parts.push('\n[Screenshot captured and displayed to user]');
|
|
226
|
+
// Verification result (Skyvern pattern)
|
|
227
|
+
if (result.verification) {
|
|
228
|
+
const v = result.verification;
|
|
229
|
+
if (v.dialogClosed || v.formGone || v.urlChanged) {
|
|
230
|
+
parts.push(`✓ ${v.changeDescription}`);
|
|
231
|
+
} else if (!v.significantChange) {
|
|
232
|
+
parts.push(`⚠ No change detected`);
|
|
233
|
+
}
|
|
209
234
|
}
|
|
210
235
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
}
|
|
236
|
+
// Page state
|
|
237
|
+
if (result.url) parts.push(`URL: ${result.url}`);
|
|
238
|
+
if (result.title) parts.push(`Title: ${result.title}`);
|
|
239
|
+
if (result.elementList) parts.push(`\nElements:\n${result.elementList}`);
|
|
214
240
|
|
|
215
241
|
return parts.join('\n');
|
|
216
242
|
}
|
|
@@ -265,23 +291,36 @@ export function createBrowserTools(
|
|
|
265
291
|
// The marker is still returned initially via SSE, but we wait for the callback
|
|
266
292
|
try {
|
|
267
293
|
const result = await waitForResult(action, args, toolCallId);
|
|
268
|
-
return formatResultForLLM(result, action);
|
|
294
|
+
return formatResultForLLM(result, action, args);
|
|
269
295
|
} catch (error) {
|
|
270
296
|
const errorMessage =
|
|
271
297
|
error instanceof Error ? error.message : String(error);
|
|
272
|
-
return
|
|
298
|
+
return `❌ **ACTION FAILED**: Browser action "${action}" failed: ${errorMessage}
|
|
299
|
+
|
|
300
|
+
**REQUIRED**: Analyze why the action failed and try an alternative approach.`;
|
|
273
301
|
}
|
|
274
302
|
};
|
|
275
303
|
};
|
|
276
304
|
|
|
277
|
-
// browser_click
|
|
305
|
+
// browser_click - PRIMARY action for buttons and links
|
|
278
306
|
tools.push(
|
|
279
307
|
tool(createToolFunction('click'), {
|
|
280
308
|
name: EBrowserTools.CLICK,
|
|
281
|
-
description: `Click an element
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
309
|
+
description: `Click an element by its [index] from the element list.
|
|
310
|
+
|
|
311
|
+
**WHEN TO USE (prioritize this over keypress):**
|
|
312
|
+
- Buttons labeled "Send", "Submit", "Save", "OK", "Confirm" → CLICK them
|
|
313
|
+
- Links (a tags) to navigate
|
|
314
|
+
- Checkboxes, radio buttons to select
|
|
315
|
+
- Any clickable element that performs an action
|
|
316
|
+
|
|
317
|
+
**SEMANTIC ELEMENT IDENTIFICATION:**
|
|
318
|
+
- Look for aria-label="Send", aria-label="Submit" for action buttons
|
|
319
|
+
- Look for type="submit" for form submission
|
|
320
|
+
- Look for button text like "Send", "Submit", "Save"
|
|
321
|
+
- For email compose: Find the Send button, DON'T use keyboard shortcuts
|
|
322
|
+
|
|
323
|
+
Returns verification: dialog closed, form submitted, URL changed, or NO CHANGE (action may have failed).`,
|
|
285
324
|
schema: BrowserClickSchema,
|
|
286
325
|
})
|
|
287
326
|
);
|
|
@@ -290,10 +329,20 @@ After clicking, you receive an updated element list showing the new page state.`
|
|
|
290
329
|
tools.push(
|
|
291
330
|
tool(createToolFunction('type'), {
|
|
292
331
|
name: EBrowserTools.TYPE,
|
|
293
|
-
description: `Type text into an input element
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
332
|
+
description: `Type text into an input/textarea element by [index].
|
|
333
|
+
|
|
334
|
+
**WHEN TO USE:**
|
|
335
|
+
- Input fields (fieldType="email", "search", "phone", etc.)
|
|
336
|
+
- Textareas for longer content
|
|
337
|
+
- Contenteditable divs (like Gmail compose body)
|
|
338
|
+
|
|
339
|
+
**OPTIONS:**
|
|
340
|
+
- pressEnter: true → Press Enter after typing (for search forms)
|
|
341
|
+
- pressEnter: false → Just type without submitting
|
|
342
|
+
|
|
343
|
+
**DO NOT use pressEnter to send emails** - find and CLICK the Send button instead.
|
|
344
|
+
|
|
345
|
+
Returns verification showing if text was entered or form was submitted.`,
|
|
297
346
|
schema: BrowserTypeSchema,
|
|
298
347
|
})
|
|
299
348
|
);
|
|
@@ -302,8 +351,7 @@ After typing, you receive an updated element list.`,
|
|
|
302
351
|
tools.push(
|
|
303
352
|
tool(createToolFunction('navigate'), {
|
|
304
353
|
name: EBrowserTools.NAVIGATE,
|
|
305
|
-
description: `Navigate to a URL
|
|
306
|
-
After navigation, you receive the new page's element list.`,
|
|
354
|
+
description: `Navigate to a URL (include https://). Returns new page's element list.`,
|
|
307
355
|
schema: BrowserNavigateSchema,
|
|
308
356
|
})
|
|
309
357
|
);
|
|
@@ -312,9 +360,7 @@ After navigation, you receive the new page's element list.`,
|
|
|
312
360
|
tools.push(
|
|
313
361
|
tool(createToolFunction('scroll'), {
|
|
314
362
|
name: EBrowserTools.SCROLL,
|
|
315
|
-
description: `Scroll the page
|
|
316
|
-
Use 'down' to scroll down, 'up' to scroll up.
|
|
317
|
-
After scrolling, you receive an updated element list with newly visible elements.`,
|
|
363
|
+
description: `Scroll the page. Returns updated element list with newly visible elements.`,
|
|
318
364
|
schema: BrowserScrollSchema,
|
|
319
365
|
})
|
|
320
366
|
);
|
|
@@ -364,9 +410,7 @@ After going back, you receive the previous page's element list.`,
|
|
|
364
410
|
tools.push(
|
|
365
411
|
tool(createToolFunction('screenshot'), {
|
|
366
412
|
name: EBrowserTools.SCREENSHOT,
|
|
367
|
-
description: `Capture a screenshot
|
|
368
|
-
Returns the page state with a note that screenshot was displayed to the user.
|
|
369
|
-
Use browser_get_page_state to get the element list for automation.`,
|
|
413
|
+
description: `Capture a screenshot. Use browser_get_page_state for element list.`,
|
|
370
414
|
schema: BrowserScreenshotSchema,
|
|
371
415
|
})
|
|
372
416
|
);
|
|
@@ -375,22 +419,16 @@ Use browser_get_page_state to get the element list for automation.`,
|
|
|
375
419
|
tools.push(
|
|
376
420
|
tool(createToolFunction('get_page_state'), {
|
|
377
421
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
378
|
-
description: `Get
|
|
379
|
-
Use this at the start of a task to see what elements are available.
|
|
380
|
-
Returns a text list of elements with their index numbers for interaction.`,
|
|
422
|
+
description: `Get current page URL, title, and interactive elements list with index numbers.`,
|
|
381
423
|
schema: BrowserGetPageStateSchema,
|
|
382
424
|
})
|
|
383
425
|
);
|
|
384
426
|
|
|
385
|
-
// browser_select_option
|
|
427
|
+
// browser_select_option
|
|
386
428
|
tools.push(
|
|
387
429
|
tool(createToolFunction('select_option'), {
|
|
388
430
|
name: EBrowserTools.SELECT_OPTION,
|
|
389
|
-
description: `Select an option from a dropdown
|
|
390
|
-
For native <select> elements: finds and selects the option by value/label.
|
|
391
|
-
For custom dropdowns: clicks to open, then clicks the matching option.
|
|
392
|
-
Use this instead of click for dropdowns - it handles both native and custom selects.
|
|
393
|
-
After selection, you receive an updated element list.`,
|
|
431
|
+
description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,
|
|
394
432
|
schema: BrowserSelectOptionSchema,
|
|
395
433
|
})
|
|
396
434
|
);
|
|
@@ -407,15 +445,27 @@ After upload, you receive an updated element list.`,
|
|
|
407
445
|
})
|
|
408
446
|
);
|
|
409
447
|
|
|
410
|
-
// browser_keypress -
|
|
448
|
+
// browser_keypress - LAST RESORT for keyboard shortcuts
|
|
411
449
|
tools.push(
|
|
412
450
|
tool(createToolFunction('keypress'), {
|
|
413
451
|
name: EBrowserTools.KEYPRESS,
|
|
414
|
-
description: `Press keyboard key(s)
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
452
|
+
description: `Press keyboard key(s) - USE ONLY AS LAST RESORT after clicking fails.
|
|
453
|
+
|
|
454
|
+
**PREFER browser_click OVER keypress:**
|
|
455
|
+
- To send email → Find and CLICK the Send button
|
|
456
|
+
- To submit form → CLICK the Submit button
|
|
457
|
+
- To close modal → CLICK the X or Cancel button
|
|
458
|
+
|
|
459
|
+
**WHEN TO USE keypress:**
|
|
460
|
+
- Navigation: "Tab", "ArrowDown", "ArrowUp" in dropdowns
|
|
461
|
+
- Escape to close popups if no X button exists
|
|
462
|
+
- Control+A, Control+C for select/copy operations
|
|
463
|
+
- ONLY use Enter to submit if there's NO visible submit button
|
|
464
|
+
|
|
465
|
+
Single keys: "Enter", "Escape", "Tab", "ArrowDown"
|
|
466
|
+
Key combos: "Control+A", "Shift+Enter"
|
|
467
|
+
|
|
468
|
+
Returns verification showing if page state changed.`,
|
|
419
469
|
schema: BrowserKeypressSchema,
|
|
420
470
|
})
|
|
421
471
|
);
|