illuma-agents 1.0.28 → 1.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/BrowserTools.cjs +83 -37
- package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
- package/dist/esm/tools/BrowserTools.mjs +83 -37
- package/dist/esm/tools/BrowserTools.mjs.map +1 -1
- package/dist/types/tools/BrowserTools.d.ts +26 -0
- package/package.json +1 -1
- package/src/tools/BrowserTools.ts +120 -39
|
@@ -18,6 +18,10 @@ const EBrowserTools = {
|
|
|
18
18
|
BACK: 'browser_back',
|
|
19
19
|
SCREENSHOT: 'browser_screenshot',
|
|
20
20
|
GET_PAGE_STATE: 'browser_get_page_state',
|
|
21
|
+
// Skyvern-inspired additions for robust form handling
|
|
22
|
+
SELECT_OPTION: 'browser_select_option',
|
|
23
|
+
UPLOAD_FILE: 'browser_upload_file',
|
|
24
|
+
KEYPRESS: 'browser_keypress',
|
|
21
25
|
};
|
|
22
26
|
/**
|
|
23
27
|
* Check if browser capability is available based on request headers or context
|
|
@@ -81,29 +85,55 @@ const BrowserWaitSchema = zod.z.object({
|
|
|
81
85
|
const BrowserBackSchema = zod.z.object({});
|
|
82
86
|
const BrowserScreenshotSchema = zod.z.object({});
|
|
83
87
|
const BrowserGetPageStateSchema = zod.z.object({});
|
|
88
|
+
// Skyvern-inspired schemas for robust form handling
|
|
89
|
+
const BrowserSelectOptionSchema = zod.z.object({
|
|
90
|
+
index: zod.z
|
|
91
|
+
.number()
|
|
92
|
+
.describe('The index number of the select/dropdown element'),
|
|
93
|
+
value: zod.z
|
|
94
|
+
.string()
|
|
95
|
+
.optional()
|
|
96
|
+
.describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),
|
|
97
|
+
});
|
|
98
|
+
const BrowserUploadFileSchema = zod.z.object({
|
|
99
|
+
index: zod.z
|
|
100
|
+
.number()
|
|
101
|
+
.describe('The index number of the file input element'),
|
|
102
|
+
fileUrl: zod.z
|
|
103
|
+
.string()
|
|
104
|
+
.describe('URL of the file to upload (the system will download and upload it)'),
|
|
105
|
+
});
|
|
106
|
+
const BrowserKeypressSchema = zod.z.object({
|
|
107
|
+
keys: zod.z
|
|
108
|
+
.string()
|
|
109
|
+
.describe('Key(s) to press. Single key: "Enter", "Escape", "Tab", "ArrowDown". Combo: "Control+A", "Shift+Enter"'),
|
|
110
|
+
});
|
|
84
111
|
/**
|
|
85
112
|
* Format browser action result for LLM consumption
|
|
86
113
|
*/
|
|
87
|
-
function formatResultForLLM(result, action) {
|
|
114
|
+
function formatResultForLLM(result, action, actionArgs) {
|
|
88
115
|
if (!result.success && result.error) {
|
|
89
|
-
|
|
116
|
+
const errorType = result.errorType ? `[${result.errorType}] ` : '';
|
|
117
|
+
return `${errorType}Action "${action}" failed: ${result.error}`;
|
|
90
118
|
}
|
|
91
119
|
const parts = [];
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
if (result.screenshot != null && result.screenshot !== '') {
|
|
102
|
-
parts.push('\n[Screenshot captured and displayed to user]');
|
|
103
|
-
}
|
|
104
|
-
if (parts.length === 0) {
|
|
105
|
-
return `Browser action "${action}" completed successfully.`;
|
|
120
|
+
// Verification result (Skyvern pattern)
|
|
121
|
+
if (result.verification) {
|
|
122
|
+
const v = result.verification;
|
|
123
|
+
if (v.dialogClosed || v.formGone || v.urlChanged) {
|
|
124
|
+
parts.push(`✓ ${v.changeDescription}`);
|
|
125
|
+
}
|
|
126
|
+
else if (!v.significantChange) {
|
|
127
|
+
parts.push(`⚠ No change detected`);
|
|
128
|
+
}
|
|
106
129
|
}
|
|
130
|
+
// Page state
|
|
131
|
+
if (result.url)
|
|
132
|
+
parts.push(`URL: ${result.url}`);
|
|
133
|
+
if (result.title)
|
|
134
|
+
parts.push(`Title: ${result.title}`);
|
|
135
|
+
if (result.elementList)
|
|
136
|
+
parts.push(`\nElements:\n${result.elementList}`);
|
|
107
137
|
return parts.join('\n');
|
|
108
138
|
}
|
|
109
139
|
/**
|
|
@@ -146,45 +176,40 @@ function createBrowserTools(options) {
|
|
|
146
176
|
// The marker is still returned initially via SSE, but we wait for the callback
|
|
147
177
|
try {
|
|
148
178
|
const result = await waitForResult(action, args, toolCallId);
|
|
149
|
-
return formatResultForLLM(result, action);
|
|
179
|
+
return formatResultForLLM(result, action, args);
|
|
150
180
|
}
|
|
151
181
|
catch (error) {
|
|
152
182
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
153
|
-
return
|
|
183
|
+
return `❌ **ACTION FAILED**: Browser action "${action}" failed: ${errorMessage}
|
|
184
|
+
|
|
185
|
+
**REQUIRED**: Analyze why the action failed and try an alternative approach.`;
|
|
154
186
|
}
|
|
155
187
|
};
|
|
156
188
|
};
|
|
157
189
|
// browser_click
|
|
158
190
|
tools$1.push(tools.tool(createToolFunction('click'), {
|
|
159
191
|
name: EBrowserTools.CLICK,
|
|
160
|
-
description: `Click an element
|
|
161
|
-
|
|
162
|
-
Use the index number in brackets to click that element.
|
|
163
|
-
After clicking, you receive an updated element list showing the new page state.`,
|
|
192
|
+
description: `Click an element by its index number from the element list (e.g., [0], [1]).
|
|
193
|
+
Returns state verification showing if UI changed (dialog closed, form submitted, etc.).`,
|
|
164
194
|
schema: BrowserClickSchema,
|
|
165
195
|
}));
|
|
166
196
|
// browser_type
|
|
167
197
|
tools$1.push(tools.tool(createToolFunction('type'), {
|
|
168
198
|
name: EBrowserTools.TYPE,
|
|
169
|
-
description: `Type text into an input element
|
|
170
|
-
|
|
171
|
-
Set pressEnter: true to submit forms after typing.
|
|
172
|
-
After typing, you receive an updated element list.`,
|
|
199
|
+
description: `Type text into an input element by index. Set pressEnter: true to submit.
|
|
200
|
+
Returns state verification showing if form submitted or errors appeared.`,
|
|
173
201
|
schema: BrowserTypeSchema,
|
|
174
202
|
}));
|
|
175
203
|
// browser_navigate
|
|
176
204
|
tools$1.push(tools.tool(createToolFunction('navigate'), {
|
|
177
205
|
name: EBrowserTools.NAVIGATE,
|
|
178
|
-
description: `Navigate to a URL
|
|
179
|
-
After navigation, you receive the new page's element list.`,
|
|
206
|
+
description: `Navigate to a URL (include https://). Returns new page's element list.`,
|
|
180
207
|
schema: BrowserNavigateSchema,
|
|
181
208
|
}));
|
|
182
209
|
// browser_scroll
|
|
183
210
|
tools$1.push(tools.tool(createToolFunction('scroll'), {
|
|
184
211
|
name: EBrowserTools.SCROLL,
|
|
185
|
-
description: `Scroll the page
|
|
186
|
-
Use 'down' to scroll down, 'up' to scroll up.
|
|
187
|
-
After scrolling, you receive an updated element list with newly visible elements.`,
|
|
212
|
+
description: `Scroll the page. Returns updated element list with newly visible elements.`,
|
|
188
213
|
schema: BrowserScrollSchema,
|
|
189
214
|
}));
|
|
190
215
|
// browser_extract
|
|
@@ -219,19 +244,40 @@ After going back, you receive the previous page's element list.`,
|
|
|
219
244
|
// browser_screenshot
|
|
220
245
|
tools$1.push(tools.tool(createToolFunction('screenshot'), {
|
|
221
246
|
name: EBrowserTools.SCREENSHOT,
|
|
222
|
-
description: `Capture a screenshot
|
|
223
|
-
Returns the page state with a note that screenshot was displayed to the user.
|
|
224
|
-
Use browser_get_page_state to get the element list for automation.`,
|
|
247
|
+
description: `Capture a screenshot. Use browser_get_page_state for element list.`,
|
|
225
248
|
schema: BrowserScreenshotSchema,
|
|
226
249
|
}));
|
|
227
250
|
// browser_get_page_state
|
|
228
251
|
tools$1.push(tools.tool(createToolFunction('get_page_state'), {
|
|
229
252
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
230
|
-
description: `Get
|
|
231
|
-
Use this at the start of a task to see what elements are available.
|
|
232
|
-
Returns a text list of elements with their index numbers for interaction.`,
|
|
253
|
+
description: `Get current page URL, title, and interactive elements list with index numbers.`,
|
|
233
254
|
schema: BrowserGetPageStateSchema,
|
|
234
255
|
}));
|
|
256
|
+
// browser_select_option
|
|
257
|
+
tools$1.push(tools.tool(createToolFunction('select_option'), {
|
|
258
|
+
name: EBrowserTools.SELECT_OPTION,
|
|
259
|
+
description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,
|
|
260
|
+
schema: BrowserSelectOptionSchema,
|
|
261
|
+
}));
|
|
262
|
+
// browser_upload_file - Skyvern-inspired for file input handling
|
|
263
|
+
tools$1.push(tools.tool(createToolFunction('upload_file'), {
|
|
264
|
+
name: EBrowserTools.UPLOAD_FILE,
|
|
265
|
+
description: `Upload a file to a file input element.
|
|
266
|
+
Provide the index of the file input and the URL of the file to upload.
|
|
267
|
+
The system will download the file and attach it to the input.
|
|
268
|
+
After upload, you receive an updated element list.`,
|
|
269
|
+
schema: BrowserUploadFileSchema,
|
|
270
|
+
}));
|
|
271
|
+
// browser_keypress - For keyboard shortcuts and special keys
|
|
272
|
+
tools$1.push(tools.tool(createToolFunction('keypress'), {
|
|
273
|
+
name: EBrowserTools.KEYPRESS,
|
|
274
|
+
description: `Press keyboard key(s) on the page.
|
|
275
|
+
Single keys: "Enter", "Escape", "Tab", "ArrowDown", "ArrowUp", "Backspace", "Delete"
|
|
276
|
+
Key combos: "Control+A" (select all), "Control+C" (copy), "Shift+Enter" (newline)
|
|
277
|
+
Use this for form submission, closing modals, navigating dropdowns.
|
|
278
|
+
After keypress, you receive an updated element list.`,
|
|
279
|
+
schema: BrowserKeypressSchema,
|
|
280
|
+
}));
|
|
235
281
|
return tools$1;
|
|
236
282
|
}
|
|
237
283
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"BrowserTools.cjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n url?: string;\n title?: string;\n elementList?: string; // Text-based element list\n error?: string;\n screenshot?: string; // Base64 screenshot (if requested)\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string\n): string {\n if (!result.success && result.error) {\n return `Browser action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n if (result.url != null && result.url !== '') {\n parts.push(`**Current URL:** ${result.url}`);\n }\n if (result.title != null && result.title !== '') {\n parts.push(`**Page Title:** ${result.title}`);\n }\n if (result.elementList != null && result.elementList !== '') {\n parts.push(`\\n**Interactive Elements:**\\n${result.elementList}`);\n }\n if (result.screenshot != null && result.screenshot !== '') {\n parts.push('\\n[Screenshot captured and displayed to user]');\n }\n\n if (parts.length === 0) {\n return `Browser action \"${action}\" completed successfully.`;\n }\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `Browser action \"${action}\" failed: ${errorMessage}`;\n }\n };\n };\n\n // browser_click\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element on the current web page by its index number.\nThe element list shows clickable items like: [0]<button>Submit</button> [1]<a href=\"/home\">Home</a>\nUse the index number in brackets to click that element.\nAfter clicking, you receive an updated element list showing the new page state.`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input element on the page.\nFind the input element in the list by its index (e.g., [5]<input placeholder=\"Search\">).\nSet pressEnter: true to submit forms after typing.\nAfter typing, you receive an updated element list.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL. Always include the full URL with https://.\nAfter navigation, you receive the new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page to reveal more content.\nUse 'down' to scroll down, 'up' to scroll up.\nAfter scrolling, you receive an updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot of the current page.\nReturns the page state with a note that screenshot was displayed to the user.\nUse browser_get_page_state to get the element list for automation.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get the current page state including URL, title, and all interactive elements.\nUse this at the start of a task to see what elements are available.\nReturns a text list of elements with their index numbers for interaction.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n return tools;\n}\n"],"names":["z","tools","tool"],"mappings":";;;;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAkC1C;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAEA;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAEA;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAEA;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAEA;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAEA;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAyB9C;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EAAA;IAEd,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,OAAO,mBAAmB,MAAM,CAAA,UAAA,EAAa,MAAM,CAAC,KAAK,EAAE;;IAG7D,MAAM,KAAK,GAAa,EAAE;AAE1B,IAAA,IAAI,MAAM,CAAC,GAAG,IAAI,IAAI,IAAI,MAAM,CAAC,GAAG,KAAK,EAAE,EAAE;QAC3C,KAAK,CAAC,IAAI,CAAC,CAAA,iBAAA,EAAoB,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;;AAE9C,IAAA,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE;QAC/C,KAAK,CAAC,IAAI,CAAC,CAAA,gBAAA,EAAmB,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;;AAE/C,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;QAC3D,KAAK,CAAC,IAAI,CAAC,CAAA,6BAAA,EAAgC,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;AAElE,IAAA,IAAI,MAAM,CAAC,UAAU,IAAI,IAAI,IAAI,MAAM,CAAC,UAAU,KAAK,EAAE,EAAE;AACzD,QAAA,KAAK,CAAC,IAAI,CAAC,+CAA+C,CAAC;;AAG7D,IAAA,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;QACtB,OAAO,CAAA,gBAAA,EAAmB,MAAM,CAAA,yBAAA,CAA2B;;AAG7D,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAMC,OAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;AAC5D,gBAAA,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;;YACzC,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;AACxD,gBAAA,OAAO,CAAmB,gBAAA,EAAA,MAAM,CAAa,UAAA,EAAA,YAAY,EAAE;;AAE/D,SAAC;AACH,KAAC;;IAGDA,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;AAG6D,+EAAA,CAAA;AAC1E,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;AACwC,0DAAA,CAAA;AACrD,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAAA;;AAE+D,iFAAA,CAAA;AAC5E,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAA;;AAEgD,kEAAA,CAAA;AAC7D,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAA;;AAEuD,yEAAA,CAAA;AACpE,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;AAED,IAAA,OAAOD,OAAK;AACd;;;;;;"}
|
|
1
|
+
{"version":3,"file":"BrowserTools.cjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n // Skyvern-inspired additions for robust form handling\n SELECT_OPTION: 'browser_select_option',\n UPLOAD_FILE: 'browser_upload_file',\n KEYPRESS: 'browser_keypress',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Action status - matches Skyvern's ActionStatus\n */\nexport type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';\n\n/**\n * Verification status - matches Skyvern's VerificationStatus\n * - complete: Goal achieved\n * - terminate: Goal cannot be achieved, stop\n * - continue: Goal not yet achieved, keep going\n */\nexport type VerificationStatus = 'complete' | 'terminate' | 'continue';\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n status?: ActionStatus;\n url?: string;\n title?: string;\n elementList?: string;\n error?: string;\n errorType?: string; // Typed error classification\n screenshot?: string;\n elementDescription?: string;\n // State verification (before/after comparison)\n verification?: {\n urlChanged: boolean;\n titleChanged: boolean;\n elementCountDelta: number;\n significantChange: boolean;\n dialogClosed: boolean;\n formGone: boolean;\n changeDescription: string;\n };\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n// Skyvern-inspired schemas for robust form handling\nconst BrowserSelectOptionSchema = z.object({\n index: z\n .number()\n .describe('The index number of the select/dropdown element'),\n value: z\n .string()\n .optional()\n .describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),\n});\n\nconst BrowserUploadFileSchema = z.object({\n index: z\n .number()\n .describe('The index number of the file input element'),\n fileUrl: z\n .string()\n .describe('URL of the file to upload (the system will download and upload it)'),\n});\n\nconst BrowserKeypressSchema = z.object({\n keys: z\n .string()\n .describe('Key(s) to press. Single key: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\". Combo: \"Control+A\", \"Shift+Enter\"'),\n});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string,\n actionArgs?: Record<string, unknown>\n): string {\n if (!result.success && result.error) {\n const errorType = result.errorType ? `[${result.errorType}] ` : '';\n return `${errorType}Action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n // Verification result (Skyvern pattern)\n if (result.verification) {\n const v = result.verification;\n if (v.dialogClosed || v.formGone || v.urlChanged) {\n parts.push(`✓ ${v.changeDescription}`);\n } else if (!v.significantChange) {\n parts.push(`⚠ No change detected`);\n }\n }\n\n // Page state\n if (result.url) parts.push(`URL: ${result.url}`);\n if (result.title) parts.push(`Title: ${result.title}`);\n if (result.elementList) parts.push(`\\nElements:\\n${result.elementList}`);\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action, args);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `❌ **ACTION FAILED**: Browser action \"${action}\" failed: ${errorMessage}\n\n**REQUIRED**: Analyze why the action failed and try an alternative approach.`;\n }\n };\n };\n\n // browser_click\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element by its index number from the element list (e.g., [0], [1]).\nReturns state verification showing if UI changed (dialog closed, form submitted, etc.).`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input element by index. Set pressEnter: true to submit.\nReturns state verification showing if form submitted or errors appeared.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL (include https://). Returns new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page. Returns updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot. Use browser_get_page_state for element list.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get current page URL, title, and interactive elements list with index numbers.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n // browser_select_option\n tools.push(\n tool(createToolFunction('select_option'), {\n name: EBrowserTools.SELECT_OPTION,\n description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,\n schema: BrowserSelectOptionSchema,\n })\n );\n\n // browser_upload_file - Skyvern-inspired for file input handling\n tools.push(\n tool(createToolFunction('upload_file'), {\n name: EBrowserTools.UPLOAD_FILE,\n description: `Upload a file to a file input element.\nProvide the index of the file input and the URL of the file to upload.\nThe system will download the file and attach it to the input.\nAfter upload, you receive an updated element list.`,\n schema: BrowserUploadFileSchema,\n })\n );\n\n // browser_keypress - For keyboard shortcuts and special keys\n tools.push(\n tool(createToolFunction('keypress'), {\n name: EBrowserTools.KEYPRESS,\n description: `Press keyboard key(s) on the page.\nSingle keys: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\", \"ArrowUp\", \"Backspace\", \"Delete\"\nKey combos: \"Control+A\" (select all), \"Control+C\" (copy), \"Shift+Enter\" (newline)\nUse this for form submission, closing modals, navigating dropdowns.\nAfter keypress, you receive an updated element list.`,\n schema: BrowserKeypressSchema,\n })\n );\n\n return tools;\n}\n"],"names":["z","tools","tool"],"mappings":";;;;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAExC,IAAA,aAAa,EAAE,uBAAuB;AACtC,IAAA,WAAW,EAAE,qBAAqB;AAClC,IAAA,QAAQ,EAAE,kBAAkB;;AA4D9B;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAEA;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAEA;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAEA;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAEA;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAEA;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE9C;AACA,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACzC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,iDAAiD,CAAC;AAC9D,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,gJAAgJ,CAAC;AAC9J,CAAA,CAAC;AAEF,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACvC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,4CAA4C,CAAC;AACzD,IAAA,OAAO,EAAEA;AACN,SAAA,MAAM;SACN,QAAQ,CAAC,oEAAoE,CAAC;AAClF,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACrC,IAAA,IAAI,EAAEA;AACH,SAAA,MAAM;SACN,QAAQ,CAAC,uGAAuG,CAAC;AACrH,CAAA,CAAC;AAyBF;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EACd,UAAoC,EAAA;IAEpC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,GAAG,CAAI,CAAA,EAAA,MAAM,CAAC,SAAS,CAAA,EAAA,CAAI,GAAG,EAAE;QAClE,OAAO,CAAA,EAAG,SAAS,CAAW,QAAA,EAAA,MAAM,aAAa,MAAM,CAAC,KAAK,CAAA,CAAE;;IAGjE,MAAM,KAAK,GAAa,EAAE;;AAG1B,IAAA,IAAI,MAAM,CAAC,YAAY,EAAE;AACvB,QAAA,MAAM,CAAC,GAAG,MAAM,CAAC,YAAY;AAC7B,QAAA,IAAI,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,EAAE;YAChD,KAAK,CAAC,IAAI,CAAC,CAAA,EAAA,EAAK,CAAC,CAAC,iBAAiB,CAAE,CAAA,CAAC;;AACjC,aAAA,IAAI,CAAC,CAAC,CAAC,iBAAiB,EAAE;AAC/B,YAAA,KAAK,CAAC,IAAI,CAAC,CAAA,oBAAA,CAAsB,CAAC;;;;IAKtC,IAAI,MAAM,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,KAAA,EAAQ,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;IAChD,IAAI,MAAM,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,OAAA,EAAU,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;IACtD,IAAI,MAAM,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,aAAA,EAAgB,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;AAExE,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAMC,OAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;gBAC5D,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;;YAC/C,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;gBACxD,OAAO,CAAA,qCAAA,EAAwC,MAAM,CAAA,UAAA,EAAa,YAAY;;6EAET;;AAEzE,SAAC;AACH,KAAC;;IAGDA,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACqE,uFAAA,CAAA;AAClF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AACsD,wEAAA,CAAA;AACnE,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAwE,sEAAA,CAAA;AACrF,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAA4E,0EAAA,CAAA;AACzF,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAoE,kEAAA,CAAA;AACjF,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAgF,8EAAA,CAAA;AAC7F,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,eAAe,CAAC,EAAE;QACxC,IAAI,EAAE,aAAa,CAAC,aAAa;AACjC,QAAA,WAAW,EAAE,CAAsF,oFAAA,CAAA;AACnG,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,aAAa,CAAC,EAAE;QACtC,IAAI,EAAE,aAAa,CAAC,WAAW;AAC/B,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;;;;AAIkC,oDAAA,CAAA;AAC/C,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;AAED,IAAA,OAAOD,OAAK;AACd;;;;;;"}
|
|
@@ -16,6 +16,10 @@ const EBrowserTools = {
|
|
|
16
16
|
BACK: 'browser_back',
|
|
17
17
|
SCREENSHOT: 'browser_screenshot',
|
|
18
18
|
GET_PAGE_STATE: 'browser_get_page_state',
|
|
19
|
+
// Skyvern-inspired additions for robust form handling
|
|
20
|
+
SELECT_OPTION: 'browser_select_option',
|
|
21
|
+
UPLOAD_FILE: 'browser_upload_file',
|
|
22
|
+
KEYPRESS: 'browser_keypress',
|
|
19
23
|
};
|
|
20
24
|
/**
|
|
21
25
|
* Check if browser capability is available based on request headers or context
|
|
@@ -79,29 +83,55 @@ const BrowserWaitSchema = z.object({
|
|
|
79
83
|
const BrowserBackSchema = z.object({});
|
|
80
84
|
const BrowserScreenshotSchema = z.object({});
|
|
81
85
|
const BrowserGetPageStateSchema = z.object({});
|
|
86
|
+
// Skyvern-inspired schemas for robust form handling
|
|
87
|
+
const BrowserSelectOptionSchema = z.object({
|
|
88
|
+
index: z
|
|
89
|
+
.number()
|
|
90
|
+
.describe('The index number of the select/dropdown element'),
|
|
91
|
+
value: z
|
|
92
|
+
.string()
|
|
93
|
+
.optional()
|
|
94
|
+
.describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),
|
|
95
|
+
});
|
|
96
|
+
const BrowserUploadFileSchema = z.object({
|
|
97
|
+
index: z
|
|
98
|
+
.number()
|
|
99
|
+
.describe('The index number of the file input element'),
|
|
100
|
+
fileUrl: z
|
|
101
|
+
.string()
|
|
102
|
+
.describe('URL of the file to upload (the system will download and upload it)'),
|
|
103
|
+
});
|
|
104
|
+
const BrowserKeypressSchema = z.object({
|
|
105
|
+
keys: z
|
|
106
|
+
.string()
|
|
107
|
+
.describe('Key(s) to press. Single key: "Enter", "Escape", "Tab", "ArrowDown". Combo: "Control+A", "Shift+Enter"'),
|
|
108
|
+
});
|
|
82
109
|
/**
|
|
83
110
|
* Format browser action result for LLM consumption
|
|
84
111
|
*/
|
|
85
|
-
function formatResultForLLM(result, action) {
|
|
112
|
+
function formatResultForLLM(result, action, actionArgs) {
|
|
86
113
|
if (!result.success && result.error) {
|
|
87
|
-
|
|
114
|
+
const errorType = result.errorType ? `[${result.errorType}] ` : '';
|
|
115
|
+
return `${errorType}Action "${action}" failed: ${result.error}`;
|
|
88
116
|
}
|
|
89
117
|
const parts = [];
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
if (result.screenshot != null && result.screenshot !== '') {
|
|
100
|
-
parts.push('\n[Screenshot captured and displayed to user]');
|
|
101
|
-
}
|
|
102
|
-
if (parts.length === 0) {
|
|
103
|
-
return `Browser action "${action}" completed successfully.`;
|
|
118
|
+
// Verification result (Skyvern pattern)
|
|
119
|
+
if (result.verification) {
|
|
120
|
+
const v = result.verification;
|
|
121
|
+
if (v.dialogClosed || v.formGone || v.urlChanged) {
|
|
122
|
+
parts.push(`✓ ${v.changeDescription}`);
|
|
123
|
+
}
|
|
124
|
+
else if (!v.significantChange) {
|
|
125
|
+
parts.push(`⚠ No change detected`);
|
|
126
|
+
}
|
|
104
127
|
}
|
|
128
|
+
// Page state
|
|
129
|
+
if (result.url)
|
|
130
|
+
parts.push(`URL: ${result.url}`);
|
|
131
|
+
if (result.title)
|
|
132
|
+
parts.push(`Title: ${result.title}`);
|
|
133
|
+
if (result.elementList)
|
|
134
|
+
parts.push(`\nElements:\n${result.elementList}`);
|
|
105
135
|
return parts.join('\n');
|
|
106
136
|
}
|
|
107
137
|
/**
|
|
@@ -144,45 +174,40 @@ function createBrowserTools(options) {
|
|
|
144
174
|
// The marker is still returned initially via SSE, but we wait for the callback
|
|
145
175
|
try {
|
|
146
176
|
const result = await waitForResult(action, args, toolCallId);
|
|
147
|
-
return formatResultForLLM(result, action);
|
|
177
|
+
return formatResultForLLM(result, action, args);
|
|
148
178
|
}
|
|
149
179
|
catch (error) {
|
|
150
180
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
151
|
-
return
|
|
181
|
+
return `❌ **ACTION FAILED**: Browser action "${action}" failed: ${errorMessage}
|
|
182
|
+
|
|
183
|
+
**REQUIRED**: Analyze why the action failed and try an alternative approach.`;
|
|
152
184
|
}
|
|
153
185
|
};
|
|
154
186
|
};
|
|
155
187
|
// browser_click
|
|
156
188
|
tools.push(tool(createToolFunction('click'), {
|
|
157
189
|
name: EBrowserTools.CLICK,
|
|
158
|
-
description: `Click an element
|
|
159
|
-
|
|
160
|
-
Use the index number in brackets to click that element.
|
|
161
|
-
After clicking, you receive an updated element list showing the new page state.`,
|
|
190
|
+
description: `Click an element by its index number from the element list (e.g., [0], [1]).
|
|
191
|
+
Returns state verification showing if UI changed (dialog closed, form submitted, etc.).`,
|
|
162
192
|
schema: BrowserClickSchema,
|
|
163
193
|
}));
|
|
164
194
|
// browser_type
|
|
165
195
|
tools.push(tool(createToolFunction('type'), {
|
|
166
196
|
name: EBrowserTools.TYPE,
|
|
167
|
-
description: `Type text into an input element
|
|
168
|
-
|
|
169
|
-
Set pressEnter: true to submit forms after typing.
|
|
170
|
-
After typing, you receive an updated element list.`,
|
|
197
|
+
description: `Type text into an input element by index. Set pressEnter: true to submit.
|
|
198
|
+
Returns state verification showing if form submitted or errors appeared.`,
|
|
171
199
|
schema: BrowserTypeSchema,
|
|
172
200
|
}));
|
|
173
201
|
// browser_navigate
|
|
174
202
|
tools.push(tool(createToolFunction('navigate'), {
|
|
175
203
|
name: EBrowserTools.NAVIGATE,
|
|
176
|
-
description: `Navigate to a URL
|
|
177
|
-
After navigation, you receive the new page's element list.`,
|
|
204
|
+
description: `Navigate to a URL (include https://). Returns new page's element list.`,
|
|
178
205
|
schema: BrowserNavigateSchema,
|
|
179
206
|
}));
|
|
180
207
|
// browser_scroll
|
|
181
208
|
tools.push(tool(createToolFunction('scroll'), {
|
|
182
209
|
name: EBrowserTools.SCROLL,
|
|
183
|
-
description: `Scroll the page
|
|
184
|
-
Use 'down' to scroll down, 'up' to scroll up.
|
|
185
|
-
After scrolling, you receive an updated element list with newly visible elements.`,
|
|
210
|
+
description: `Scroll the page. Returns updated element list with newly visible elements.`,
|
|
186
211
|
schema: BrowserScrollSchema,
|
|
187
212
|
}));
|
|
188
213
|
// browser_extract
|
|
@@ -217,19 +242,40 @@ After going back, you receive the previous page's element list.`,
|
|
|
217
242
|
// browser_screenshot
|
|
218
243
|
tools.push(tool(createToolFunction('screenshot'), {
|
|
219
244
|
name: EBrowserTools.SCREENSHOT,
|
|
220
|
-
description: `Capture a screenshot
|
|
221
|
-
Returns the page state with a note that screenshot was displayed to the user.
|
|
222
|
-
Use browser_get_page_state to get the element list for automation.`,
|
|
245
|
+
description: `Capture a screenshot. Use browser_get_page_state for element list.`,
|
|
223
246
|
schema: BrowserScreenshotSchema,
|
|
224
247
|
}));
|
|
225
248
|
// browser_get_page_state
|
|
226
249
|
tools.push(tool(createToolFunction('get_page_state'), {
|
|
227
250
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
228
|
-
description: `Get
|
|
229
|
-
Use this at the start of a task to see what elements are available.
|
|
230
|
-
Returns a text list of elements with their index numbers for interaction.`,
|
|
251
|
+
description: `Get current page URL, title, and interactive elements list with index numbers.`,
|
|
231
252
|
schema: BrowserGetPageStateSchema,
|
|
232
253
|
}));
|
|
254
|
+
// browser_select_option
|
|
255
|
+
tools.push(tool(createToolFunction('select_option'), {
|
|
256
|
+
name: EBrowserTools.SELECT_OPTION,
|
|
257
|
+
description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,
|
|
258
|
+
schema: BrowserSelectOptionSchema,
|
|
259
|
+
}));
|
|
260
|
+
// browser_upload_file - Skyvern-inspired for file input handling
|
|
261
|
+
tools.push(tool(createToolFunction('upload_file'), {
|
|
262
|
+
name: EBrowserTools.UPLOAD_FILE,
|
|
263
|
+
description: `Upload a file to a file input element.
|
|
264
|
+
Provide the index of the file input and the URL of the file to upload.
|
|
265
|
+
The system will download the file and attach it to the input.
|
|
266
|
+
After upload, you receive an updated element list.`,
|
|
267
|
+
schema: BrowserUploadFileSchema,
|
|
268
|
+
}));
|
|
269
|
+
// browser_keypress - For keyboard shortcuts and special keys
|
|
270
|
+
tools.push(tool(createToolFunction('keypress'), {
|
|
271
|
+
name: EBrowserTools.KEYPRESS,
|
|
272
|
+
description: `Press keyboard key(s) on the page.
|
|
273
|
+
Single keys: "Enter", "Escape", "Tab", "ArrowDown", "ArrowUp", "Backspace", "Delete"
|
|
274
|
+
Key combos: "Control+A" (select all), "Control+C" (copy), "Shift+Enter" (newline)
|
|
275
|
+
Use this for form submission, closing modals, navigating dropdowns.
|
|
276
|
+
After keypress, you receive an updated element list.`,
|
|
277
|
+
schema: BrowserKeypressSchema,
|
|
278
|
+
}));
|
|
233
279
|
return tools;
|
|
234
280
|
}
|
|
235
281
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"BrowserTools.mjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n url?: string;\n title?: string;\n elementList?: string; // Text-based element list\n error?: string;\n screenshot?: string; // Base64 screenshot (if requested)\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string\n): string {\n if (!result.success && result.error) {\n return `Browser action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n if (result.url != null && result.url !== '') {\n parts.push(`**Current URL:** ${result.url}`);\n }\n if (result.title != null && result.title !== '') {\n parts.push(`**Page Title:** ${result.title}`);\n }\n if (result.elementList != null && result.elementList !== '') {\n parts.push(`\\n**Interactive Elements:**\\n${result.elementList}`);\n }\n if (result.screenshot != null && result.screenshot !== '') {\n parts.push('\\n[Screenshot captured and displayed to user]');\n }\n\n if (parts.length === 0) {\n return `Browser action \"${action}\" completed successfully.`;\n }\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `Browser action \"${action}\" failed: ${errorMessage}`;\n }\n };\n };\n\n // browser_click\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element on the current web page by its index number.\nThe element list shows clickable items like: [0]<button>Submit</button> [1]<a href=\"/home\">Home</a>\nUse the index number in brackets to click that element.\nAfter clicking, you receive an updated element list showing the new page state.`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input element on the page.\nFind the input element in the list by its index (e.g., [5]<input placeholder=\"Search\">).\nSet pressEnter: true to submit forms after typing.\nAfter typing, you receive an updated element list.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL. Always include the full URL with https://.\nAfter navigation, you receive the new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page to reveal more content.\nUse 'down' to scroll down, 'up' to scroll up.\nAfter scrolling, you receive an updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot of the current page.\nReturns the page state with a note that screenshot was displayed to the user.\nUse browser_get_page_state to get the element list for automation.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get the current page state including URL, title, and all interactive elements.\nUse this at the start of a task to see what elements are available.\nReturns a text list of elements with their index numbers for interaction.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n return tools;\n}\n"],"names":[],"mappings":";;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAkC1C;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAE;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAE;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAE;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAE;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAE;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAyB9C;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EAAA;IAEd,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,OAAO,mBAAmB,MAAM,CAAA,UAAA,EAAa,MAAM,CAAC,KAAK,EAAE;;IAG7D,MAAM,KAAK,GAAa,EAAE;AAE1B,IAAA,IAAI,MAAM,CAAC,GAAG,IAAI,IAAI,IAAI,MAAM,CAAC,GAAG,KAAK,EAAE,EAAE;QAC3C,KAAK,CAAC,IAAI,CAAC,CAAA,iBAAA,EAAoB,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;;AAE9C,IAAA,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE;QAC/C,KAAK,CAAC,IAAI,CAAC,CAAA,gBAAA,EAAmB,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;;AAE/C,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;QAC3D,KAAK,CAAC,IAAI,CAAC,CAAA,6BAAA,EAAgC,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;AAElE,IAAA,IAAI,MAAM,CAAC,UAAU,IAAI,IAAI,IAAI,MAAM,CAAC,UAAU,KAAK,EAAE,EAAE;AACzD,QAAA,KAAK,CAAC,IAAI,CAAC,+CAA+C,CAAC;;AAG7D,IAAA,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;QACtB,OAAO,CAAA,gBAAA,EAAmB,MAAM,CAAA,yBAAA,CAA2B;;AAG7D,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAM,KAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;AAC5D,gBAAA,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;;YACzC,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;AACxD,gBAAA,OAAO,CAAmB,gBAAA,EAAA,MAAM,CAAa,UAAA,EAAA,YAAY,EAAE;;AAE/D,SAAC;AACH,KAAC;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;AAG6D,+EAAA,CAAA;AAC1E,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;AACwC,0DAAA,CAAA;AACrD,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAAA;;AAE+D,iFAAA,CAAA;AAC5E,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAA;;AAEgD,kEAAA,CAAA;AAC7D,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAA;;AAEuD,yEAAA,CAAA;AACpE,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;AAED,IAAA,OAAO,KAAK;AACd;;;;"}
|
|
1
|
+
{"version":3,"file":"BrowserTools.mjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n // Skyvern-inspired additions for robust form handling\n SELECT_OPTION: 'browser_select_option',\n UPLOAD_FILE: 'browser_upload_file',\n KEYPRESS: 'browser_keypress',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Action status - matches Skyvern's ActionStatus\n */\nexport type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';\n\n/**\n * Verification status - matches Skyvern's VerificationStatus\n * - complete: Goal achieved\n * - terminate: Goal cannot be achieved, stop\n * - continue: Goal not yet achieved, keep going\n */\nexport type VerificationStatus = 'complete' | 'terminate' | 'continue';\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n status?: ActionStatus;\n url?: string;\n title?: string;\n elementList?: string;\n error?: string;\n errorType?: string; // Typed error classification\n screenshot?: string;\n elementDescription?: string;\n // State verification (before/after comparison)\n verification?: {\n urlChanged: boolean;\n titleChanged: boolean;\n elementCountDelta: number;\n significantChange: boolean;\n dialogClosed: boolean;\n formGone: boolean;\n changeDescription: string;\n };\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n// Skyvern-inspired schemas for robust form handling\nconst BrowserSelectOptionSchema = z.object({\n index: z\n .number()\n .describe('The index number of the select/dropdown element'),\n value: z\n .string()\n .optional()\n .describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),\n});\n\nconst BrowserUploadFileSchema = z.object({\n index: z\n .number()\n .describe('The index number of the file input element'),\n fileUrl: z\n .string()\n .describe('URL of the file to upload (the system will download and upload it)'),\n});\n\nconst BrowserKeypressSchema = z.object({\n keys: z\n .string()\n .describe('Key(s) to press. Single key: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\". Combo: \"Control+A\", \"Shift+Enter\"'),\n});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string,\n actionArgs?: Record<string, unknown>\n): string {\n if (!result.success && result.error) {\n const errorType = result.errorType ? `[${result.errorType}] ` : '';\n return `${errorType}Action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n // Verification result (Skyvern pattern)\n if (result.verification) {\n const v = result.verification;\n if (v.dialogClosed || v.formGone || v.urlChanged) {\n parts.push(`✓ ${v.changeDescription}`);\n } else if (!v.significantChange) {\n parts.push(`⚠ No change detected`);\n }\n }\n\n // Page state\n if (result.url) parts.push(`URL: ${result.url}`);\n if (result.title) parts.push(`Title: ${result.title}`);\n if (result.elementList) parts.push(`\\nElements:\\n${result.elementList}`);\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action, args);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `❌ **ACTION FAILED**: Browser action \"${action}\" failed: ${errorMessage}\n\n**REQUIRED**: Analyze why the action failed and try an alternative approach.`;\n }\n };\n };\n\n // browser_click\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element by its index number from the element list (e.g., [0], [1]).\nReturns state verification showing if UI changed (dialog closed, form submitted, etc.).`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input element by index. Set pressEnter: true to submit.\nReturns state verification showing if form submitted or errors appeared.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL (include https://). Returns new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page. Returns updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot. Use browser_get_page_state for element list.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get current page URL, title, and interactive elements list with index numbers.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n // browser_select_option\n tools.push(\n tool(createToolFunction('select_option'), {\n name: EBrowserTools.SELECT_OPTION,\n description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,\n schema: BrowserSelectOptionSchema,\n })\n );\n\n // browser_upload_file - Skyvern-inspired for file input handling\n tools.push(\n tool(createToolFunction('upload_file'), {\n name: EBrowserTools.UPLOAD_FILE,\n description: `Upload a file to a file input element.\nProvide the index of the file input and the URL of the file to upload.\nThe system will download the file and attach it to the input.\nAfter upload, you receive an updated element list.`,\n schema: BrowserUploadFileSchema,\n })\n );\n\n // browser_keypress - For keyboard shortcuts and special keys\n tools.push(\n tool(createToolFunction('keypress'), {\n name: EBrowserTools.KEYPRESS,\n description: `Press keyboard key(s) on the page.\nSingle keys: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\", \"ArrowUp\", \"Backspace\", \"Delete\"\nKey combos: \"Control+A\" (select all), \"Control+C\" (copy), \"Shift+Enter\" (newline)\nUse this for form submission, closing modals, navigating dropdowns.\nAfter keypress, you receive an updated element list.`,\n schema: BrowserKeypressSchema,\n })\n );\n\n return tools;\n}\n"],"names":[],"mappings":";;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAExC,IAAA,aAAa,EAAE,uBAAuB;AACtC,IAAA,WAAW,EAAE,qBAAqB;AAClC,IAAA,QAAQ,EAAE,kBAAkB;;AA4D9B;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAE;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAE;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAE;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAE;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAE;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE9C;AACA,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC;AACzC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,iDAAiD,CAAC;AAC9D,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,gJAAgJ,CAAC;AAC9J,CAAA,CAAC;AAEF,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;AACvC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,4CAA4C,CAAC;AACzD,IAAA,OAAO,EAAE;AACN,SAAA,MAAM;SACN,QAAQ,CAAC,oEAAoE,CAAC;AAClF,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;AACrC,IAAA,IAAI,EAAE;AACH,SAAA,MAAM;SACN,QAAQ,CAAC,uGAAuG,CAAC;AACrH,CAAA,CAAC;AAyBF;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EACd,UAAoC,EAAA;IAEpC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,GAAG,CAAI,CAAA,EAAA,MAAM,CAAC,SAAS,CAAA,EAAA,CAAI,GAAG,EAAE;QAClE,OAAO,CAAA,EAAG,SAAS,CAAW,QAAA,EAAA,MAAM,aAAa,MAAM,CAAC,KAAK,CAAA,CAAE;;IAGjE,MAAM,KAAK,GAAa,EAAE;;AAG1B,IAAA,IAAI,MAAM,CAAC,YAAY,EAAE;AACvB,QAAA,MAAM,CAAC,GAAG,MAAM,CAAC,YAAY;AAC7B,QAAA,IAAI,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,EAAE;YAChD,KAAK,CAAC,IAAI,CAAC,CAAA,EAAA,EAAK,CAAC,CAAC,iBAAiB,CAAE,CAAA,CAAC;;AACjC,aAAA,IAAI,CAAC,CAAC,CAAC,iBAAiB,EAAE;AAC/B,YAAA,KAAK,CAAC,IAAI,CAAC,CAAA,oBAAA,CAAsB,CAAC;;;;IAKtC,IAAI,MAAM,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,KAAA,EAAQ,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;IAChD,IAAI,MAAM,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,OAAA,EAAU,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;IACtD,IAAI,MAAM,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,aAAA,EAAgB,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;AAExE,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAM,KAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;gBAC5D,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;;YAC/C,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;gBACxD,OAAO,CAAA,qCAAA,EAAwC,MAAM,CAAA,UAAA,EAAa,YAAY;;6EAET;;AAEzE,SAAC;AACH,KAAC;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACqE,uFAAA,CAAA;AAClF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AACsD,wEAAA,CAAA;AACnE,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAwE,sEAAA,CAAA;AACrF,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAA4E,0EAAA,CAAA;AACzF,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAoE,kEAAA,CAAA;AACjF,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAgF,8EAAA,CAAA;AAC7F,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,eAAe,CAAC,EAAE;QACxC,IAAI,EAAE,aAAa,CAAC,aAAa;AACjC,QAAA,WAAW,EAAE,CAAsF,oFAAA,CAAA;AACnG,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,aAAa,CAAC,EAAE;QACtC,IAAI,EAAE,aAAa,CAAC,WAAW;AAC/B,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;;;;AAIkC,oDAAA,CAAA;AAC/C,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;AAED,IAAA,OAAO,KAAK;AACd;;;;"}
|
|
@@ -14,6 +14,9 @@ export declare const EBrowserTools: {
|
|
|
14
14
|
readonly BACK: "browser_back";
|
|
15
15
|
readonly SCREENSHOT: "browser_screenshot";
|
|
16
16
|
readonly GET_PAGE_STATE: "browser_get_page_state";
|
|
17
|
+
readonly SELECT_OPTION: "browser_select_option";
|
|
18
|
+
readonly UPLOAD_FILE: "browser_upload_file";
|
|
19
|
+
readonly KEYPRESS: "browser_keypress";
|
|
17
20
|
};
|
|
18
21
|
export type BrowserToolName = (typeof EBrowserTools)[keyof typeof EBrowserTools];
|
|
19
22
|
/**
|
|
@@ -27,16 +30,39 @@ export type BrowserToolName = (typeof EBrowserTools)[keyof typeof EBrowserTools]
|
|
|
27
30
|
* @returns Promise that resolves with the actual browser result (page state, etc.)
|
|
28
31
|
*/
|
|
29
32
|
export type BrowserToolCallback = (action: string, args: Record<string, unknown>, toolCallId: string) => Promise<BrowserActionResult>;
|
|
33
|
+
/**
|
|
34
|
+
* Action status - matches Skyvern's ActionStatus
|
|
35
|
+
*/
|
|
36
|
+
export type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';
|
|
37
|
+
/**
|
|
38
|
+
* Verification status - matches Skyvern's VerificationStatus
|
|
39
|
+
* - complete: Goal achieved
|
|
40
|
+
* - terminate: Goal cannot be achieved, stop
|
|
41
|
+
* - continue: Goal not yet achieved, keep going
|
|
42
|
+
*/
|
|
43
|
+
export type VerificationStatus = 'complete' | 'terminate' | 'continue';
|
|
30
44
|
/**
|
|
31
45
|
* Result returned from browser action execution
|
|
32
46
|
*/
|
|
33
47
|
export interface BrowserActionResult {
|
|
34
48
|
success: boolean;
|
|
49
|
+
status?: ActionStatus;
|
|
35
50
|
url?: string;
|
|
36
51
|
title?: string;
|
|
37
52
|
elementList?: string;
|
|
38
53
|
error?: string;
|
|
54
|
+
errorType?: string;
|
|
39
55
|
screenshot?: string;
|
|
56
|
+
elementDescription?: string;
|
|
57
|
+
verification?: {
|
|
58
|
+
urlChanged: boolean;
|
|
59
|
+
titleChanged: boolean;
|
|
60
|
+
elementCountDelta: number;
|
|
61
|
+
significantChange: boolean;
|
|
62
|
+
dialogClosed: boolean;
|
|
63
|
+
formGone: boolean;
|
|
64
|
+
changeDescription: string;
|
|
65
|
+
};
|
|
40
66
|
}
|
|
41
67
|
/**
|
|
42
68
|
* Check if browser capability is available based on request headers or context
|
package/package.json
CHANGED
|
@@ -17,6 +17,10 @@ export const EBrowserTools = {
|
|
|
17
17
|
BACK: 'browser_back',
|
|
18
18
|
SCREENSHOT: 'browser_screenshot',
|
|
19
19
|
GET_PAGE_STATE: 'browser_get_page_state',
|
|
20
|
+
// Skyvern-inspired additions for robust form handling
|
|
21
|
+
SELECT_OPTION: 'browser_select_option',
|
|
22
|
+
UPLOAD_FILE: 'browser_upload_file',
|
|
23
|
+
KEYPRESS: 'browser_keypress',
|
|
20
24
|
} as const;
|
|
21
25
|
|
|
22
26
|
export type BrowserToolName =
|
|
@@ -38,16 +42,42 @@ export type BrowserToolCallback = (
|
|
|
38
42
|
toolCallId: string
|
|
39
43
|
) => Promise<BrowserActionResult>;
|
|
40
44
|
|
|
45
|
+
/**
|
|
46
|
+
* Action status - matches Skyvern's ActionStatus
|
|
47
|
+
*/
|
|
48
|
+
export type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Verification status - matches Skyvern's VerificationStatus
|
|
52
|
+
* - complete: Goal achieved
|
|
53
|
+
* - terminate: Goal cannot be achieved, stop
|
|
54
|
+
* - continue: Goal not yet achieved, keep going
|
|
55
|
+
*/
|
|
56
|
+
export type VerificationStatus = 'complete' | 'terminate' | 'continue';
|
|
57
|
+
|
|
41
58
|
/**
|
|
42
59
|
* Result returned from browser action execution
|
|
43
60
|
*/
|
|
44
61
|
export interface BrowserActionResult {
|
|
45
62
|
success: boolean;
|
|
63
|
+
status?: ActionStatus;
|
|
46
64
|
url?: string;
|
|
47
65
|
title?: string;
|
|
48
|
-
elementList?: string;
|
|
66
|
+
elementList?: string;
|
|
49
67
|
error?: string;
|
|
50
|
-
|
|
68
|
+
errorType?: string; // Typed error classification
|
|
69
|
+
screenshot?: string;
|
|
70
|
+
elementDescription?: string;
|
|
71
|
+
// State verification (before/after comparison)
|
|
72
|
+
verification?: {
|
|
73
|
+
urlChanged: boolean;
|
|
74
|
+
titleChanged: boolean;
|
|
75
|
+
elementCountDelta: number;
|
|
76
|
+
significantChange: boolean;
|
|
77
|
+
dialogClosed: boolean;
|
|
78
|
+
formGone: boolean;
|
|
79
|
+
changeDescription: string;
|
|
80
|
+
};
|
|
51
81
|
}
|
|
52
82
|
|
|
53
83
|
/**
|
|
@@ -129,6 +159,32 @@ const BrowserScreenshotSchema = z.object({});
|
|
|
129
159
|
|
|
130
160
|
const BrowserGetPageStateSchema = z.object({});
|
|
131
161
|
|
|
162
|
+
// Skyvern-inspired schemas for robust form handling
|
|
163
|
+
const BrowserSelectOptionSchema = z.object({
|
|
164
|
+
index: z
|
|
165
|
+
.number()
|
|
166
|
+
.describe('The index number of the select/dropdown element'),
|
|
167
|
+
value: z
|
|
168
|
+
.string()
|
|
169
|
+
.optional()
|
|
170
|
+
.describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
const BrowserUploadFileSchema = z.object({
|
|
174
|
+
index: z
|
|
175
|
+
.number()
|
|
176
|
+
.describe('The index number of the file input element'),
|
|
177
|
+
fileUrl: z
|
|
178
|
+
.string()
|
|
179
|
+
.describe('URL of the file to upload (the system will download and upload it)'),
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
const BrowserKeypressSchema = z.object({
|
|
183
|
+
keys: z
|
|
184
|
+
.string()
|
|
185
|
+
.describe('Key(s) to press. Single key: "Enter", "Escape", "Tab", "ArrowDown". Combo: "Control+A", "Shift+Enter"'),
|
|
186
|
+
});
|
|
187
|
+
|
|
132
188
|
/**
|
|
133
189
|
* Browser tool response interface
|
|
134
190
|
* This is what the extension returns after executing the action
|
|
@@ -157,30 +213,30 @@ export interface CreateBrowserToolsOptions {
|
|
|
157
213
|
*/
|
|
158
214
|
function formatResultForLLM(
|
|
159
215
|
result: BrowserActionResult,
|
|
160
|
-
action: string
|
|
216
|
+
action: string,
|
|
217
|
+
actionArgs?: Record<string, unknown>
|
|
161
218
|
): string {
|
|
162
219
|
if (!result.success && result.error) {
|
|
163
|
-
|
|
220
|
+
const errorType = result.errorType ? `[${result.errorType}] ` : '';
|
|
221
|
+
return `${errorType}Action "${action}" failed: ${result.error}`;
|
|
164
222
|
}
|
|
165
223
|
|
|
166
224
|
const parts: string[] = [];
|
|
167
225
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
}
|
|
177
|
-
if (result.screenshot != null && result.screenshot !== '') {
|
|
178
|
-
parts.push('\n[Screenshot captured and displayed to user]');
|
|
226
|
+
// Verification result (Skyvern pattern)
|
|
227
|
+
if (result.verification) {
|
|
228
|
+
const v = result.verification;
|
|
229
|
+
if (v.dialogClosed || v.formGone || v.urlChanged) {
|
|
230
|
+
parts.push(`✓ ${v.changeDescription}`);
|
|
231
|
+
} else if (!v.significantChange) {
|
|
232
|
+
parts.push(`⚠ No change detected`);
|
|
233
|
+
}
|
|
179
234
|
}
|
|
180
235
|
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
}
|
|
236
|
+
// Page state
|
|
237
|
+
if (result.url) parts.push(`URL: ${result.url}`);
|
|
238
|
+
if (result.title) parts.push(`Title: ${result.title}`);
|
|
239
|
+
if (result.elementList) parts.push(`\nElements:\n${result.elementList}`);
|
|
184
240
|
|
|
185
241
|
return parts.join('\n');
|
|
186
242
|
}
|
|
@@ -235,11 +291,13 @@ export function createBrowserTools(
|
|
|
235
291
|
// The marker is still returned initially via SSE, but we wait for the callback
|
|
236
292
|
try {
|
|
237
293
|
const result = await waitForResult(action, args, toolCallId);
|
|
238
|
-
return formatResultForLLM(result, action);
|
|
294
|
+
return formatResultForLLM(result, action, args);
|
|
239
295
|
} catch (error) {
|
|
240
296
|
const errorMessage =
|
|
241
297
|
error instanceof Error ? error.message : String(error);
|
|
242
|
-
return
|
|
298
|
+
return `❌ **ACTION FAILED**: Browser action "${action}" failed: ${errorMessage}
|
|
299
|
+
|
|
300
|
+
**REQUIRED**: Analyze why the action failed and try an alternative approach.`;
|
|
243
301
|
}
|
|
244
302
|
};
|
|
245
303
|
};
|
|
@@ -248,10 +306,8 @@ export function createBrowserTools(
|
|
|
248
306
|
tools.push(
|
|
249
307
|
tool(createToolFunction('click'), {
|
|
250
308
|
name: EBrowserTools.CLICK,
|
|
251
|
-
description: `Click an element
|
|
252
|
-
|
|
253
|
-
Use the index number in brackets to click that element.
|
|
254
|
-
After clicking, you receive an updated element list showing the new page state.`,
|
|
309
|
+
description: `Click an element by its index number from the element list (e.g., [0], [1]).
|
|
310
|
+
Returns state verification showing if UI changed (dialog closed, form submitted, etc.).`,
|
|
255
311
|
schema: BrowserClickSchema,
|
|
256
312
|
})
|
|
257
313
|
);
|
|
@@ -260,10 +316,8 @@ After clicking, you receive an updated element list showing the new page state.`
|
|
|
260
316
|
tools.push(
|
|
261
317
|
tool(createToolFunction('type'), {
|
|
262
318
|
name: EBrowserTools.TYPE,
|
|
263
|
-
description: `Type text into an input element
|
|
264
|
-
|
|
265
|
-
Set pressEnter: true to submit forms after typing.
|
|
266
|
-
After typing, you receive an updated element list.`,
|
|
319
|
+
description: `Type text into an input element by index. Set pressEnter: true to submit.
|
|
320
|
+
Returns state verification showing if form submitted or errors appeared.`,
|
|
267
321
|
schema: BrowserTypeSchema,
|
|
268
322
|
})
|
|
269
323
|
);
|
|
@@ -272,8 +326,7 @@ After typing, you receive an updated element list.`,
|
|
|
272
326
|
tools.push(
|
|
273
327
|
tool(createToolFunction('navigate'), {
|
|
274
328
|
name: EBrowserTools.NAVIGATE,
|
|
275
|
-
description: `Navigate to a URL
|
|
276
|
-
After navigation, you receive the new page's element list.`,
|
|
329
|
+
description: `Navigate to a URL (include https://). Returns new page's element list.`,
|
|
277
330
|
schema: BrowserNavigateSchema,
|
|
278
331
|
})
|
|
279
332
|
);
|
|
@@ -282,9 +335,7 @@ After navigation, you receive the new page's element list.`,
|
|
|
282
335
|
tools.push(
|
|
283
336
|
tool(createToolFunction('scroll'), {
|
|
284
337
|
name: EBrowserTools.SCROLL,
|
|
285
|
-
description: `Scroll the page
|
|
286
|
-
Use 'down' to scroll down, 'up' to scroll up.
|
|
287
|
-
After scrolling, you receive an updated element list with newly visible elements.`,
|
|
338
|
+
description: `Scroll the page. Returns updated element list with newly visible elements.`,
|
|
288
339
|
schema: BrowserScrollSchema,
|
|
289
340
|
})
|
|
290
341
|
);
|
|
@@ -334,9 +385,7 @@ After going back, you receive the previous page's element list.`,
|
|
|
334
385
|
tools.push(
|
|
335
386
|
tool(createToolFunction('screenshot'), {
|
|
336
387
|
name: EBrowserTools.SCREENSHOT,
|
|
337
|
-
description: `Capture a screenshot
|
|
338
|
-
Returns the page state with a note that screenshot was displayed to the user.
|
|
339
|
-
Use browser_get_page_state to get the element list for automation.`,
|
|
388
|
+
description: `Capture a screenshot. Use browser_get_page_state for element list.`,
|
|
340
389
|
schema: BrowserScreenshotSchema,
|
|
341
390
|
})
|
|
342
391
|
);
|
|
@@ -345,12 +394,44 @@ Use browser_get_page_state to get the element list for automation.`,
|
|
|
345
394
|
tools.push(
|
|
346
395
|
tool(createToolFunction('get_page_state'), {
|
|
347
396
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
348
|
-
description: `Get
|
|
349
|
-
Use this at the start of a task to see what elements are available.
|
|
350
|
-
Returns a text list of elements with their index numbers for interaction.`,
|
|
397
|
+
description: `Get current page URL, title, and interactive elements list with index numbers.`,
|
|
351
398
|
schema: BrowserGetPageStateSchema,
|
|
352
399
|
})
|
|
353
400
|
);
|
|
354
401
|
|
|
402
|
+
// browser_select_option
|
|
403
|
+
tools.push(
|
|
404
|
+
tool(createToolFunction('select_option'), {
|
|
405
|
+
name: EBrowserTools.SELECT_OPTION,
|
|
406
|
+
description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,
|
|
407
|
+
schema: BrowserSelectOptionSchema,
|
|
408
|
+
})
|
|
409
|
+
);
|
|
410
|
+
|
|
411
|
+
// browser_upload_file - Skyvern-inspired for file input handling
|
|
412
|
+
tools.push(
|
|
413
|
+
tool(createToolFunction('upload_file'), {
|
|
414
|
+
name: EBrowserTools.UPLOAD_FILE,
|
|
415
|
+
description: `Upload a file to a file input element.
|
|
416
|
+
Provide the index of the file input and the URL of the file to upload.
|
|
417
|
+
The system will download the file and attach it to the input.
|
|
418
|
+
After upload, you receive an updated element list.`,
|
|
419
|
+
schema: BrowserUploadFileSchema,
|
|
420
|
+
})
|
|
421
|
+
);
|
|
422
|
+
|
|
423
|
+
// browser_keypress - For keyboard shortcuts and special keys
|
|
424
|
+
tools.push(
|
|
425
|
+
tool(createToolFunction('keypress'), {
|
|
426
|
+
name: EBrowserTools.KEYPRESS,
|
|
427
|
+
description: `Press keyboard key(s) on the page.
|
|
428
|
+
Single keys: "Enter", "Escape", "Tab", "ArrowDown", "ArrowUp", "Backspace", "Delete"
|
|
429
|
+
Key combos: "Control+A" (select all), "Control+C" (copy), "Shift+Enter" (newline)
|
|
430
|
+
Use this for form submission, closing modals, navigating dropdowns.
|
|
431
|
+
After keypress, you receive an updated element list.`,
|
|
432
|
+
schema: BrowserKeypressSchema,
|
|
433
|
+
})
|
|
434
|
+
);
|
|
435
|
+
|
|
355
436
|
return tools;
|
|
356
437
|
}
|