illuma-agents 1.0.31 → 1.0.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/BrowserTools.cjs +38 -121
- package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
- package/dist/esm/tools/BrowserTools.mjs +38 -121
- package/dist/esm/tools/BrowserTools.mjs.map +1 -1
- package/dist/types/tools/BrowserTools.d.ts +0 -26
- package/package.json +1 -1
- package/src/tools/BrowserTools.ts +40 -158
|
@@ -18,10 +18,6 @@ const EBrowserTools = {
|
|
|
18
18
|
BACK: 'browser_back',
|
|
19
19
|
SCREENSHOT: 'browser_screenshot',
|
|
20
20
|
GET_PAGE_STATE: 'browser_get_page_state',
|
|
21
|
-
// Skyvern-inspired additions for robust form handling
|
|
22
|
-
SELECT_OPTION: 'browser_select_option',
|
|
23
|
-
UPLOAD_FILE: 'browser_upload_file',
|
|
24
|
-
KEYPRESS: 'browser_keypress',
|
|
25
21
|
};
|
|
26
22
|
/**
|
|
27
23
|
* Check if browser capability is available based on request headers or context
|
|
@@ -85,55 +81,29 @@ const BrowserWaitSchema = zod.z.object({
|
|
|
85
81
|
const BrowserBackSchema = zod.z.object({});
|
|
86
82
|
const BrowserScreenshotSchema = zod.z.object({});
|
|
87
83
|
const BrowserGetPageStateSchema = zod.z.object({});
|
|
88
|
-
// Skyvern-inspired schemas for robust form handling
|
|
89
|
-
const BrowserSelectOptionSchema = zod.z.object({
|
|
90
|
-
index: zod.z
|
|
91
|
-
.number()
|
|
92
|
-
.describe('The index number of the select/dropdown element'),
|
|
93
|
-
value: zod.z
|
|
94
|
-
.string()
|
|
95
|
-
.optional()
|
|
96
|
-
.describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),
|
|
97
|
-
});
|
|
98
|
-
const BrowserUploadFileSchema = zod.z.object({
|
|
99
|
-
index: zod.z
|
|
100
|
-
.number()
|
|
101
|
-
.describe('The index number of the file input element'),
|
|
102
|
-
fileUrl: zod.z
|
|
103
|
-
.string()
|
|
104
|
-
.describe('URL of the file to upload (the system will download and upload it)'),
|
|
105
|
-
});
|
|
106
|
-
const BrowserKeypressSchema = zod.z.object({
|
|
107
|
-
keys: zod.z
|
|
108
|
-
.string()
|
|
109
|
-
.describe('Key(s) to press. Single key: "Enter", "Escape", "Tab", "ArrowDown". Combo: "Control+A", "Shift+Enter"'),
|
|
110
|
-
});
|
|
111
84
|
/**
|
|
112
85
|
* Format browser action result for LLM consumption
|
|
113
86
|
*/
|
|
114
|
-
function formatResultForLLM(result, action
|
|
87
|
+
function formatResultForLLM(result, action) {
|
|
115
88
|
if (!result.success && result.error) {
|
|
116
|
-
|
|
117
|
-
return `${errorType}Action "${action}" failed: ${result.error}`;
|
|
89
|
+
return `Browser action "${action}" failed: ${result.error}`;
|
|
118
90
|
}
|
|
119
91
|
const parts = [];
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
92
|
+
if (result.url != null && result.url !== '') {
|
|
93
|
+
parts.push(`**Current URL:** ${result.url}`);
|
|
94
|
+
}
|
|
95
|
+
if (result.title != null && result.title !== '') {
|
|
96
|
+
parts.push(`**Page Title:** ${result.title}`);
|
|
97
|
+
}
|
|
98
|
+
if (result.elementList != null && result.elementList !== '') {
|
|
99
|
+
parts.push(`\n**Interactive Elements:**\n${result.elementList}`);
|
|
100
|
+
}
|
|
101
|
+
if (result.screenshot != null && result.screenshot !== '') {
|
|
102
|
+
parts.push('\n[Screenshot captured and displayed to user]');
|
|
103
|
+
}
|
|
104
|
+
if (parts.length === 0) {
|
|
105
|
+
return `Browser action "${action}" completed successfully.`;
|
|
129
106
|
}
|
|
130
|
-
// Page state
|
|
131
|
-
if (result.url)
|
|
132
|
-
parts.push(`URL: ${result.url}`);
|
|
133
|
-
if (result.title)
|
|
134
|
-
parts.push(`Title: ${result.title}`);
|
|
135
|
-
if (result.elementList)
|
|
136
|
-
parts.push(`\nElements:\n${result.elementList}`);
|
|
137
107
|
return parts.join('\n');
|
|
138
108
|
}
|
|
139
109
|
/**
|
|
@@ -176,65 +146,45 @@ function createBrowserTools(options) {
|
|
|
176
146
|
// The marker is still returned initially via SSE, but we wait for the callback
|
|
177
147
|
try {
|
|
178
148
|
const result = await waitForResult(action, args, toolCallId);
|
|
179
|
-
return formatResultForLLM(result, action
|
|
149
|
+
return formatResultForLLM(result, action);
|
|
180
150
|
}
|
|
181
151
|
catch (error) {
|
|
182
152
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
183
|
-
return
|
|
184
|
-
|
|
185
|
-
**REQUIRED**: Analyze why the action failed and try an alternative approach.`;
|
|
153
|
+
return `Browser action "${action}" failed: ${errorMessage}`;
|
|
186
154
|
}
|
|
187
155
|
};
|
|
188
156
|
};
|
|
189
|
-
// browser_click
|
|
157
|
+
// browser_click
|
|
190
158
|
tools$1.push(tools.tool(createToolFunction('click'), {
|
|
191
159
|
name: EBrowserTools.CLICK,
|
|
192
|
-
description: `Click an element by its
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
- Links (a tags) to navigate
|
|
197
|
-
- Checkboxes, radio buttons to select
|
|
198
|
-
- Any clickable element that performs an action
|
|
199
|
-
|
|
200
|
-
**SEMANTIC ELEMENT IDENTIFICATION:**
|
|
201
|
-
- Look for aria-label="Send", aria-label="Submit" for action buttons
|
|
202
|
-
- Look for type="submit" for form submission
|
|
203
|
-
- Look for button text like "Send", "Submit", "Save"
|
|
204
|
-
- For email compose: Find the Send button, DON'T use keyboard shortcuts
|
|
205
|
-
|
|
206
|
-
Returns verification: dialog closed, form submitted, URL changed, or NO CHANGE (action may have failed).`,
|
|
160
|
+
description: `Click an element on the current web page by its index number.
|
|
161
|
+
The element list shows clickable items like: [0]<button>Submit</button> [1]<a href="/home">Home</a>
|
|
162
|
+
Use the index number in brackets to click that element.
|
|
163
|
+
After clicking, you receive an updated element list showing the new page state.`,
|
|
207
164
|
schema: BrowserClickSchema,
|
|
208
165
|
}));
|
|
209
166
|
// browser_type
|
|
210
167
|
tools$1.push(tools.tool(createToolFunction('type'), {
|
|
211
168
|
name: EBrowserTools.TYPE,
|
|
212
|
-
description: `Type text into an input
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
- Textareas for longer content
|
|
217
|
-
- Contenteditable divs (like Gmail compose body)
|
|
218
|
-
|
|
219
|
-
**OPTIONS:**
|
|
220
|
-
- pressEnter: true → Press Enter after typing (for search forms)
|
|
221
|
-
- pressEnter: false → Just type without submitting
|
|
222
|
-
|
|
223
|
-
**DO NOT use pressEnter to send emails** - find and CLICK the Send button instead.
|
|
224
|
-
|
|
225
|
-
Returns verification showing if text was entered or form was submitted.`,
|
|
169
|
+
description: `Type text into an input element on the page.
|
|
170
|
+
Find the input element in the list by its index (e.g., [5]<input placeholder="Search">).
|
|
171
|
+
Set pressEnter: true to submit forms after typing.
|
|
172
|
+
After typing, you receive an updated element list.`,
|
|
226
173
|
schema: BrowserTypeSchema,
|
|
227
174
|
}));
|
|
228
175
|
// browser_navigate
|
|
229
176
|
tools$1.push(tools.tool(createToolFunction('navigate'), {
|
|
230
177
|
name: EBrowserTools.NAVIGATE,
|
|
231
|
-
description: `Navigate to a URL
|
|
178
|
+
description: `Navigate to a URL. Always include the full URL with https://.
|
|
179
|
+
After navigation, you receive the new page's element list.`,
|
|
232
180
|
schema: BrowserNavigateSchema,
|
|
233
181
|
}));
|
|
234
182
|
// browser_scroll
|
|
235
183
|
tools$1.push(tools.tool(createToolFunction('scroll'), {
|
|
236
184
|
name: EBrowserTools.SCROLL,
|
|
237
|
-
description: `Scroll the page
|
|
185
|
+
description: `Scroll the page to reveal more content.
|
|
186
|
+
Use 'down' to scroll down, 'up' to scroll up.
|
|
187
|
+
After scrolling, you receive an updated element list with newly visible elements.`,
|
|
238
188
|
schema: BrowserScrollSchema,
|
|
239
189
|
}));
|
|
240
190
|
// browser_extract
|
|
@@ -269,52 +219,19 @@ After going back, you receive the previous page's element list.`,
|
|
|
269
219
|
// browser_screenshot
|
|
270
220
|
tools$1.push(tools.tool(createToolFunction('screenshot'), {
|
|
271
221
|
name: EBrowserTools.SCREENSHOT,
|
|
272
|
-
description: `Capture a screenshot
|
|
222
|
+
description: `Capture a screenshot of the current page.
|
|
223
|
+
Returns the page state with a note that screenshot was displayed to the user.
|
|
224
|
+
Use browser_get_page_state to get the element list for automation.`,
|
|
273
225
|
schema: BrowserScreenshotSchema,
|
|
274
226
|
}));
|
|
275
227
|
// browser_get_page_state
|
|
276
228
|
tools$1.push(tools.tool(createToolFunction('get_page_state'), {
|
|
277
229
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
278
|
-
description: `Get current page URL, title, and interactive elements
|
|
230
|
+
description: `Get the current page state including URL, title, and all interactive elements.
|
|
231
|
+
Use this at the start of a task to see what elements are available.
|
|
232
|
+
Returns a text list of elements with their index numbers for interaction.`,
|
|
279
233
|
schema: BrowserGetPageStateSchema,
|
|
280
234
|
}));
|
|
281
|
-
// browser_select_option
|
|
282
|
-
tools$1.push(tools.tool(createToolFunction('select_option'), {
|
|
283
|
-
name: EBrowserTools.SELECT_OPTION,
|
|
284
|
-
description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,
|
|
285
|
-
schema: BrowserSelectOptionSchema,
|
|
286
|
-
}));
|
|
287
|
-
// browser_upload_file - Skyvern-inspired for file input handling
|
|
288
|
-
tools$1.push(tools.tool(createToolFunction('upload_file'), {
|
|
289
|
-
name: EBrowserTools.UPLOAD_FILE,
|
|
290
|
-
description: `Upload a file to a file input element.
|
|
291
|
-
Provide the index of the file input and the URL of the file to upload.
|
|
292
|
-
The system will download the file and attach it to the input.
|
|
293
|
-
After upload, you receive an updated element list.`,
|
|
294
|
-
schema: BrowserUploadFileSchema,
|
|
295
|
-
}));
|
|
296
|
-
// browser_keypress - LAST RESORT for keyboard shortcuts
|
|
297
|
-
tools$1.push(tools.tool(createToolFunction('keypress'), {
|
|
298
|
-
name: EBrowserTools.KEYPRESS,
|
|
299
|
-
description: `Press keyboard key(s) - USE ONLY AS LAST RESORT after clicking fails.
|
|
300
|
-
|
|
301
|
-
**PREFER browser_click OVER keypress:**
|
|
302
|
-
- To send email → Find and CLICK the Send button
|
|
303
|
-
- To submit form → CLICK the Submit button
|
|
304
|
-
- To close modal → CLICK the X or Cancel button
|
|
305
|
-
|
|
306
|
-
**WHEN TO USE keypress:**
|
|
307
|
-
- Navigation: "Tab", "ArrowDown", "ArrowUp" in dropdowns
|
|
308
|
-
- Escape to close popups if no X button exists
|
|
309
|
-
- Control+A, Control+C for select/copy operations
|
|
310
|
-
- ONLY use Enter to submit if there's NO visible submit button
|
|
311
|
-
|
|
312
|
-
Single keys: "Enter", "Escape", "Tab", "ArrowDown"
|
|
313
|
-
Key combos: "Control+A", "Shift+Enter"
|
|
314
|
-
|
|
315
|
-
Returns verification showing if page state changed.`,
|
|
316
|
-
schema: BrowserKeypressSchema,
|
|
317
|
-
}));
|
|
318
235
|
return tools$1;
|
|
319
236
|
}
|
|
320
237
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"BrowserTools.cjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n // Skyvern-inspired additions for robust form handling\n SELECT_OPTION: 'browser_select_option',\n UPLOAD_FILE: 'browser_upload_file',\n KEYPRESS: 'browser_keypress',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Action status - matches Skyvern's ActionStatus\n */\nexport type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';\n\n/**\n * Verification status - matches Skyvern's VerificationStatus\n * - complete: Goal achieved\n * - terminate: Goal cannot be achieved, stop\n * - continue: Goal not yet achieved, keep going\n */\nexport type VerificationStatus = 'complete' | 'terminate' | 'continue';\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n status?: ActionStatus;\n url?: string;\n title?: string;\n elementList?: string;\n error?: string;\n errorType?: string; // Typed error classification\n screenshot?: string;\n elementDescription?: string;\n // State verification (before/after comparison)\n verification?: {\n urlChanged: boolean;\n titleChanged: boolean;\n elementCountDelta: number;\n significantChange: boolean;\n dialogClosed: boolean;\n formGone: boolean;\n changeDescription: string;\n };\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n// Skyvern-inspired schemas for robust form handling\nconst BrowserSelectOptionSchema = z.object({\n index: z\n .number()\n .describe('The index number of the select/dropdown element'),\n value: z\n .string()\n .optional()\n .describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),\n});\n\nconst BrowserUploadFileSchema = z.object({\n index: z\n .number()\n .describe('The index number of the file input element'),\n fileUrl: z\n .string()\n .describe('URL of the file to upload (the system will download and upload it)'),\n});\n\nconst BrowserKeypressSchema = z.object({\n keys: z\n .string()\n .describe('Key(s) to press. Single key: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\". Combo: \"Control+A\", \"Shift+Enter\"'),\n});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string,\n actionArgs?: Record<string, unknown>\n): string {\n if (!result.success && result.error) {\n const errorType = result.errorType ? `[${result.errorType}] ` : '';\n return `${errorType}Action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n // Verification result (Skyvern pattern)\n if (result.verification) {\n const v = result.verification;\n if (v.dialogClosed || v.formGone || v.urlChanged) {\n parts.push(`✓ ${v.changeDescription}`);\n } else if (!v.significantChange) {\n parts.push(`⚠ No change detected`);\n }\n }\n\n // Page state\n if (result.url) parts.push(`URL: ${result.url}`);\n if (result.title) parts.push(`Title: ${result.title}`);\n if (result.elementList) parts.push(`\\nElements:\\n${result.elementList}`);\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action, args);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `❌ **ACTION FAILED**: Browser action \"${action}\" failed: ${errorMessage}\n\n**REQUIRED**: Analyze why the action failed and try an alternative approach.`;\n }\n };\n };\n\n // browser_click - PRIMARY action for buttons and links\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element by its [index] from the element list.\n\n**WHEN TO USE (prioritize this over keypress):**\n- Buttons labeled \"Send\", \"Submit\", \"Save\", \"OK\", \"Confirm\" → CLICK them\n- Links (a tags) to navigate\n- Checkboxes, radio buttons to select\n- Any clickable element that performs an action\n\n**SEMANTIC ELEMENT IDENTIFICATION:**\n- Look for aria-label=\"Send\", aria-label=\"Submit\" for action buttons\n- Look for type=\"submit\" for form submission\n- Look for button text like \"Send\", \"Submit\", \"Save\"\n- For email compose: Find the Send button, DON'T use keyboard shortcuts\n\nReturns verification: dialog closed, form submitted, URL changed, or NO CHANGE (action may have failed).`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input/textarea element by [index].\n\n**WHEN TO USE:**\n- Input fields (fieldType=\"email\", \"search\", \"phone\", etc.)\n- Textareas for longer content\n- Contenteditable divs (like Gmail compose body)\n\n**OPTIONS:**\n- pressEnter: true → Press Enter after typing (for search forms)\n- pressEnter: false → Just type without submitting\n\n**DO NOT use pressEnter to send emails** - find and CLICK the Send button instead.\n\nReturns verification showing if text was entered or form was submitted.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL (include https://). Returns new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page. Returns updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot. Use browser_get_page_state for element list.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get current page URL, title, and interactive elements list with index numbers.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n // browser_select_option\n tools.push(\n tool(createToolFunction('select_option'), {\n name: EBrowserTools.SELECT_OPTION,\n description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,\n schema: BrowserSelectOptionSchema,\n })\n );\n\n // browser_upload_file - Skyvern-inspired for file input handling\n tools.push(\n tool(createToolFunction('upload_file'), {\n name: EBrowserTools.UPLOAD_FILE,\n description: `Upload a file to a file input element.\nProvide the index of the file input and the URL of the file to upload.\nThe system will download the file and attach it to the input.\nAfter upload, you receive an updated element list.`,\n schema: BrowserUploadFileSchema,\n })\n );\n\n // browser_keypress - LAST RESORT for keyboard shortcuts\n tools.push(\n tool(createToolFunction('keypress'), {\n name: EBrowserTools.KEYPRESS,\n description: `Press keyboard key(s) - USE ONLY AS LAST RESORT after clicking fails.\n\n**PREFER browser_click OVER keypress:**\n- To send email → Find and CLICK the Send button\n- To submit form → CLICK the Submit button\n- To close modal → CLICK the X or Cancel button\n\n**WHEN TO USE keypress:**\n- Navigation: \"Tab\", \"ArrowDown\", \"ArrowUp\" in dropdowns\n- Escape to close popups if no X button exists\n- Control+A, Control+C for select/copy operations\n- ONLY use Enter to submit if there's NO visible submit button\n\nSingle keys: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\"\nKey combos: \"Control+A\", \"Shift+Enter\"\n\nReturns verification showing if page state changed.`,\n schema: BrowserKeypressSchema,\n })\n );\n\n return tools;\n}\n"],"names":["z","tools","tool"],"mappings":";;;;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAExC,IAAA,aAAa,EAAE,uBAAuB;AACtC,IAAA,WAAW,EAAE,qBAAqB;AAClC,IAAA,QAAQ,EAAE,kBAAkB;;AA4D9B;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAEA;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAEA;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAEA;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAEA;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAEA;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE9C;AACA,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACzC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,iDAAiD,CAAC;AAC9D,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,gJAAgJ,CAAC;AAC9J,CAAA,CAAC;AAEF,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACvC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,4CAA4C,CAAC;AACzD,IAAA,OAAO,EAAEA;AACN,SAAA,MAAM;SACN,QAAQ,CAAC,oEAAoE,CAAC;AAClF,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACrC,IAAA,IAAI,EAAEA;AACH,SAAA,MAAM;SACN,QAAQ,CAAC,uGAAuG,CAAC;AACrH,CAAA,CAAC;AAyBF;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EACd,UAAoC,EAAA;IAEpC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,GAAG,CAAI,CAAA,EAAA,MAAM,CAAC,SAAS,CAAA,EAAA,CAAI,GAAG,EAAE;QAClE,OAAO,CAAA,EAAG,SAAS,CAAW,QAAA,EAAA,MAAM,aAAa,MAAM,CAAC,KAAK,CAAA,CAAE;;IAGjE,MAAM,KAAK,GAAa,EAAE;;AAG1B,IAAA,IAAI,MAAM,CAAC,YAAY,EAAE;AACvB,QAAA,MAAM,CAAC,GAAG,MAAM,CAAC,YAAY;AAC7B,QAAA,IAAI,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,EAAE;YAChD,KAAK,CAAC,IAAI,CAAC,CAAA,EAAA,EAAK,CAAC,CAAC,iBAAiB,CAAE,CAAA,CAAC;;AACjC,aAAA,IAAI,CAAC,CAAC,CAAC,iBAAiB,EAAE;AAC/B,YAAA,KAAK,CAAC,IAAI,CAAC,CAAA,oBAAA,CAAsB,CAAC;;;;IAKtC,IAAI,MAAM,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,KAAA,EAAQ,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;IAChD,IAAI,MAAM,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,OAAA,EAAU,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;IACtD,IAAI,MAAM,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,aAAA,EAAgB,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;AAExE,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAMC,OAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;gBAC5D,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;;YAC/C,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;gBACxD,OAAO,CAAA,qCAAA,EAAwC,MAAM,CAAA,UAAA,EAAa,YAAY;;6EAET;;AAEzE,SAAC;AACH,KAAC;;IAGDA,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;AAcsF,wGAAA,CAAA;AACnG,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;AAaqD,uEAAA,CAAA;AAClE,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAwE,sEAAA,CAAA;AACrF,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAA4E,0EAAA,CAAA;AACzF,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAoE,kEAAA,CAAA;AACjF,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAgF,8EAAA,CAAA;AAC7F,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,eAAe,CAAC,EAAE;QACxC,IAAI,EAAE,aAAa,CAAC,aAAa;AACjC,QAAA,WAAW,EAAE,CAAsF,oFAAA,CAAA;AACnG,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,aAAa,CAAC,EAAE;QACtC,IAAI,EAAE,aAAa,CAAC,WAAW;AAC/B,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;AAgBiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;AAED,IAAA,OAAOD,OAAK;AACd;;;;;;"}
|
|
1
|
+
{"version":3,"file":"BrowserTools.cjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n url?: string;\n title?: string;\n elementList?: string; // Text-based element list\n error?: string;\n screenshot?: string; // Base64 screenshot (if requested)\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string\n): string {\n if (!result.success && result.error) {\n return `Browser action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n if (result.url != null && result.url !== '') {\n parts.push(`**Current URL:** ${result.url}`);\n }\n if (result.title != null && result.title !== '') {\n parts.push(`**Page Title:** ${result.title}`);\n }\n if (result.elementList != null && result.elementList !== '') {\n parts.push(`\\n**Interactive Elements:**\\n${result.elementList}`);\n }\n if (result.screenshot != null && result.screenshot !== '') {\n parts.push('\\n[Screenshot captured and displayed to user]');\n }\n\n if (parts.length === 0) {\n return `Browser action \"${action}\" completed successfully.`;\n }\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `Browser action \"${action}\" failed: ${errorMessage}`;\n }\n };\n };\n\n // browser_click\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element on the current web page by its index number.\nThe element list shows clickable items like: [0]<button>Submit</button> [1]<a href=\"/home\">Home</a>\nUse the index number in brackets to click that element.\nAfter clicking, you receive an updated element list showing the new page state.`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input element on the page.\nFind the input element in the list by its index (e.g., [5]<input placeholder=\"Search\">).\nSet pressEnter: true to submit forms after typing.\nAfter typing, you receive an updated element list.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL. Always include the full URL with https://.\nAfter navigation, you receive the new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page to reveal more content.\nUse 'down' to scroll down, 'up' to scroll up.\nAfter scrolling, you receive an updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot of the current page.\nReturns the page state with a note that screenshot was displayed to the user.\nUse browser_get_page_state to get the element list for automation.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get the current page state including URL, title, and all interactive elements.\nUse this at the start of a task to see what elements are available.\nReturns a text list of elements with their index numbers for interaction.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n return tools;\n}\n"],"names":["z","tools","tool"],"mappings":";;;;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAkC1C;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAEA;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAEA;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAEA;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAEA;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAEA;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAEA;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAyB9C;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EAAA;IAEd,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,OAAO,mBAAmB,MAAM,CAAA,UAAA,EAAa,MAAM,CAAC,KAAK,EAAE;;IAG7D,MAAM,KAAK,GAAa,EAAE;AAE1B,IAAA,IAAI,MAAM,CAAC,GAAG,IAAI,IAAI,IAAI,MAAM,CAAC,GAAG,KAAK,EAAE,EAAE;QAC3C,KAAK,CAAC,IAAI,CAAC,CAAA,iBAAA,EAAoB,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;;AAE9C,IAAA,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE;QAC/C,KAAK,CAAC,IAAI,CAAC,CAAA,gBAAA,EAAmB,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;;AAE/C,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;QAC3D,KAAK,CAAC,IAAI,CAAC,CAAA,6BAAA,EAAgC,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;AAElE,IAAA,IAAI,MAAM,CAAC,UAAU,IAAI,IAAI,IAAI,MAAM,CAAC,UAAU,KAAK,EAAE,EAAE;AACzD,QAAA,KAAK,CAAC,IAAI,CAAC,+CAA+C,CAAC;;AAG7D,IAAA,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;QACtB,OAAO,CAAA,gBAAA,EAAmB,MAAM,CAAA,yBAAA,CAA2B;;AAG7D,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAMC,OAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;AAC5D,gBAAA,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;;YACzC,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;AACxD,gBAAA,OAAO,CAAmB,gBAAA,EAAA,MAAM,CAAa,UAAA,EAAA,YAAY,EAAE;;AAE/D,SAAC;AACH,KAAC;;IAGDA,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;AAG6D,+EAAA,CAAA;AAC1E,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;AACwC,0DAAA,CAAA;AACrD,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAAA;;AAE+D,iFAAA,CAAA;AAC5E,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAA;;AAEgD,kEAAA,CAAA;AAC7D,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAA;;AAEuD,yEAAA,CAAA;AACpE,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;AAED,IAAA,OAAOD,OAAK;AACd;;;;;;"}
|
|
@@ -16,10 +16,6 @@ const EBrowserTools = {
|
|
|
16
16
|
BACK: 'browser_back',
|
|
17
17
|
SCREENSHOT: 'browser_screenshot',
|
|
18
18
|
GET_PAGE_STATE: 'browser_get_page_state',
|
|
19
|
-
// Skyvern-inspired additions for robust form handling
|
|
20
|
-
SELECT_OPTION: 'browser_select_option',
|
|
21
|
-
UPLOAD_FILE: 'browser_upload_file',
|
|
22
|
-
KEYPRESS: 'browser_keypress',
|
|
23
19
|
};
|
|
24
20
|
/**
|
|
25
21
|
* Check if browser capability is available based on request headers or context
|
|
@@ -83,55 +79,29 @@ const BrowserWaitSchema = z.object({
|
|
|
83
79
|
const BrowserBackSchema = z.object({});
|
|
84
80
|
const BrowserScreenshotSchema = z.object({});
|
|
85
81
|
const BrowserGetPageStateSchema = z.object({});
|
|
86
|
-
// Skyvern-inspired schemas for robust form handling
|
|
87
|
-
const BrowserSelectOptionSchema = z.object({
|
|
88
|
-
index: z
|
|
89
|
-
.number()
|
|
90
|
-
.describe('The index number of the select/dropdown element'),
|
|
91
|
-
value: z
|
|
92
|
-
.string()
|
|
93
|
-
.optional()
|
|
94
|
-
.describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),
|
|
95
|
-
});
|
|
96
|
-
const BrowserUploadFileSchema = z.object({
|
|
97
|
-
index: z
|
|
98
|
-
.number()
|
|
99
|
-
.describe('The index number of the file input element'),
|
|
100
|
-
fileUrl: z
|
|
101
|
-
.string()
|
|
102
|
-
.describe('URL of the file to upload (the system will download and upload it)'),
|
|
103
|
-
});
|
|
104
|
-
const BrowserKeypressSchema = z.object({
|
|
105
|
-
keys: z
|
|
106
|
-
.string()
|
|
107
|
-
.describe('Key(s) to press. Single key: "Enter", "Escape", "Tab", "ArrowDown". Combo: "Control+A", "Shift+Enter"'),
|
|
108
|
-
});
|
|
109
82
|
/**
|
|
110
83
|
* Format browser action result for LLM consumption
|
|
111
84
|
*/
|
|
112
|
-
function formatResultForLLM(result, action
|
|
85
|
+
function formatResultForLLM(result, action) {
|
|
113
86
|
if (!result.success && result.error) {
|
|
114
|
-
|
|
115
|
-
return `${errorType}Action "${action}" failed: ${result.error}`;
|
|
87
|
+
return `Browser action "${action}" failed: ${result.error}`;
|
|
116
88
|
}
|
|
117
89
|
const parts = [];
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
90
|
+
if (result.url != null && result.url !== '') {
|
|
91
|
+
parts.push(`**Current URL:** ${result.url}`);
|
|
92
|
+
}
|
|
93
|
+
if (result.title != null && result.title !== '') {
|
|
94
|
+
parts.push(`**Page Title:** ${result.title}`);
|
|
95
|
+
}
|
|
96
|
+
if (result.elementList != null && result.elementList !== '') {
|
|
97
|
+
parts.push(`\n**Interactive Elements:**\n${result.elementList}`);
|
|
98
|
+
}
|
|
99
|
+
if (result.screenshot != null && result.screenshot !== '') {
|
|
100
|
+
parts.push('\n[Screenshot captured and displayed to user]');
|
|
101
|
+
}
|
|
102
|
+
if (parts.length === 0) {
|
|
103
|
+
return `Browser action "${action}" completed successfully.`;
|
|
127
104
|
}
|
|
128
|
-
// Page state
|
|
129
|
-
if (result.url)
|
|
130
|
-
parts.push(`URL: ${result.url}`);
|
|
131
|
-
if (result.title)
|
|
132
|
-
parts.push(`Title: ${result.title}`);
|
|
133
|
-
if (result.elementList)
|
|
134
|
-
parts.push(`\nElements:\n${result.elementList}`);
|
|
135
105
|
return parts.join('\n');
|
|
136
106
|
}
|
|
137
107
|
/**
|
|
@@ -174,65 +144,45 @@ function createBrowserTools(options) {
|
|
|
174
144
|
// The marker is still returned initially via SSE, but we wait for the callback
|
|
175
145
|
try {
|
|
176
146
|
const result = await waitForResult(action, args, toolCallId);
|
|
177
|
-
return formatResultForLLM(result, action
|
|
147
|
+
return formatResultForLLM(result, action);
|
|
178
148
|
}
|
|
179
149
|
catch (error) {
|
|
180
150
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
181
|
-
return
|
|
182
|
-
|
|
183
|
-
**REQUIRED**: Analyze why the action failed and try an alternative approach.`;
|
|
151
|
+
return `Browser action "${action}" failed: ${errorMessage}`;
|
|
184
152
|
}
|
|
185
153
|
};
|
|
186
154
|
};
|
|
187
|
-
// browser_click
|
|
155
|
+
// browser_click
|
|
188
156
|
tools.push(tool(createToolFunction('click'), {
|
|
189
157
|
name: EBrowserTools.CLICK,
|
|
190
|
-
description: `Click an element by its
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
- Links (a tags) to navigate
|
|
195
|
-
- Checkboxes, radio buttons to select
|
|
196
|
-
- Any clickable element that performs an action
|
|
197
|
-
|
|
198
|
-
**SEMANTIC ELEMENT IDENTIFICATION:**
|
|
199
|
-
- Look for aria-label="Send", aria-label="Submit" for action buttons
|
|
200
|
-
- Look for type="submit" for form submission
|
|
201
|
-
- Look for button text like "Send", "Submit", "Save"
|
|
202
|
-
- For email compose: Find the Send button, DON'T use keyboard shortcuts
|
|
203
|
-
|
|
204
|
-
Returns verification: dialog closed, form submitted, URL changed, or NO CHANGE (action may have failed).`,
|
|
158
|
+
description: `Click an element on the current web page by its index number.
|
|
159
|
+
The element list shows clickable items like: [0]<button>Submit</button> [1]<a href="/home">Home</a>
|
|
160
|
+
Use the index number in brackets to click that element.
|
|
161
|
+
After clicking, you receive an updated element list showing the new page state.`,
|
|
205
162
|
schema: BrowserClickSchema,
|
|
206
163
|
}));
|
|
207
164
|
// browser_type
|
|
208
165
|
tools.push(tool(createToolFunction('type'), {
|
|
209
166
|
name: EBrowserTools.TYPE,
|
|
210
|
-
description: `Type text into an input
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
- Textareas for longer content
|
|
215
|
-
- Contenteditable divs (like Gmail compose body)
|
|
216
|
-
|
|
217
|
-
**OPTIONS:**
|
|
218
|
-
- pressEnter: true → Press Enter after typing (for search forms)
|
|
219
|
-
- pressEnter: false → Just type without submitting
|
|
220
|
-
|
|
221
|
-
**DO NOT use pressEnter to send emails** - find and CLICK the Send button instead.
|
|
222
|
-
|
|
223
|
-
Returns verification showing if text was entered or form was submitted.`,
|
|
167
|
+
description: `Type text into an input element on the page.
|
|
168
|
+
Find the input element in the list by its index (e.g., [5]<input placeholder="Search">).
|
|
169
|
+
Set pressEnter: true to submit forms after typing.
|
|
170
|
+
After typing, you receive an updated element list.`,
|
|
224
171
|
schema: BrowserTypeSchema,
|
|
225
172
|
}));
|
|
226
173
|
// browser_navigate
|
|
227
174
|
tools.push(tool(createToolFunction('navigate'), {
|
|
228
175
|
name: EBrowserTools.NAVIGATE,
|
|
229
|
-
description: `Navigate to a URL
|
|
176
|
+
description: `Navigate to a URL. Always include the full URL with https://.
|
|
177
|
+
After navigation, you receive the new page's element list.`,
|
|
230
178
|
schema: BrowserNavigateSchema,
|
|
231
179
|
}));
|
|
232
180
|
// browser_scroll
|
|
233
181
|
tools.push(tool(createToolFunction('scroll'), {
|
|
234
182
|
name: EBrowserTools.SCROLL,
|
|
235
|
-
description: `Scroll the page
|
|
183
|
+
description: `Scroll the page to reveal more content.
|
|
184
|
+
Use 'down' to scroll down, 'up' to scroll up.
|
|
185
|
+
After scrolling, you receive an updated element list with newly visible elements.`,
|
|
236
186
|
schema: BrowserScrollSchema,
|
|
237
187
|
}));
|
|
238
188
|
// browser_extract
|
|
@@ -267,52 +217,19 @@ After going back, you receive the previous page's element list.`,
|
|
|
267
217
|
// browser_screenshot
|
|
268
218
|
tools.push(tool(createToolFunction('screenshot'), {
|
|
269
219
|
name: EBrowserTools.SCREENSHOT,
|
|
270
|
-
description: `Capture a screenshot
|
|
220
|
+
description: `Capture a screenshot of the current page.
|
|
221
|
+
Returns the page state with a note that screenshot was displayed to the user.
|
|
222
|
+
Use browser_get_page_state to get the element list for automation.`,
|
|
271
223
|
schema: BrowserScreenshotSchema,
|
|
272
224
|
}));
|
|
273
225
|
// browser_get_page_state
|
|
274
226
|
tools.push(tool(createToolFunction('get_page_state'), {
|
|
275
227
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
276
|
-
description: `Get current page URL, title, and interactive elements
|
|
228
|
+
description: `Get the current page state including URL, title, and all interactive elements.
|
|
229
|
+
Use this at the start of a task to see what elements are available.
|
|
230
|
+
Returns a text list of elements with their index numbers for interaction.`,
|
|
277
231
|
schema: BrowserGetPageStateSchema,
|
|
278
232
|
}));
|
|
279
|
-
// browser_select_option
|
|
280
|
-
tools.push(tool(createToolFunction('select_option'), {
|
|
281
|
-
name: EBrowserTools.SELECT_OPTION,
|
|
282
|
-
description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,
|
|
283
|
-
schema: BrowserSelectOptionSchema,
|
|
284
|
-
}));
|
|
285
|
-
// browser_upload_file - Skyvern-inspired for file input handling
|
|
286
|
-
tools.push(tool(createToolFunction('upload_file'), {
|
|
287
|
-
name: EBrowserTools.UPLOAD_FILE,
|
|
288
|
-
description: `Upload a file to a file input element.
|
|
289
|
-
Provide the index of the file input and the URL of the file to upload.
|
|
290
|
-
The system will download the file and attach it to the input.
|
|
291
|
-
After upload, you receive an updated element list.`,
|
|
292
|
-
schema: BrowserUploadFileSchema,
|
|
293
|
-
}));
|
|
294
|
-
// browser_keypress - LAST RESORT for keyboard shortcuts
|
|
295
|
-
tools.push(tool(createToolFunction('keypress'), {
|
|
296
|
-
name: EBrowserTools.KEYPRESS,
|
|
297
|
-
description: `Press keyboard key(s) - USE ONLY AS LAST RESORT after clicking fails.
|
|
298
|
-
|
|
299
|
-
**PREFER browser_click OVER keypress:**
|
|
300
|
-
- To send email → Find and CLICK the Send button
|
|
301
|
-
- To submit form → CLICK the Submit button
|
|
302
|
-
- To close modal → CLICK the X or Cancel button
|
|
303
|
-
|
|
304
|
-
**WHEN TO USE keypress:**
|
|
305
|
-
- Navigation: "Tab", "ArrowDown", "ArrowUp" in dropdowns
|
|
306
|
-
- Escape to close popups if no X button exists
|
|
307
|
-
- Control+A, Control+C for select/copy operations
|
|
308
|
-
- ONLY use Enter to submit if there's NO visible submit button
|
|
309
|
-
|
|
310
|
-
Single keys: "Enter", "Escape", "Tab", "ArrowDown"
|
|
311
|
-
Key combos: "Control+A", "Shift+Enter"
|
|
312
|
-
|
|
313
|
-
Returns verification showing if page state changed.`,
|
|
314
|
-
schema: BrowserKeypressSchema,
|
|
315
|
-
}));
|
|
316
233
|
return tools;
|
|
317
234
|
}
|
|
318
235
|
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"BrowserTools.mjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n // Skyvern-inspired additions for robust form handling\n SELECT_OPTION: 'browser_select_option',\n UPLOAD_FILE: 'browser_upload_file',\n KEYPRESS: 'browser_keypress',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Action status - matches Skyvern's ActionStatus\n */\nexport type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';\n\n/**\n * Verification status - matches Skyvern's VerificationStatus\n * - complete: Goal achieved\n * - terminate: Goal cannot be achieved, stop\n * - continue: Goal not yet achieved, keep going\n */\nexport type VerificationStatus = 'complete' | 'terminate' | 'continue';\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n status?: ActionStatus;\n url?: string;\n title?: string;\n elementList?: string;\n error?: string;\n errorType?: string; // Typed error classification\n screenshot?: string;\n elementDescription?: string;\n // State verification (before/after comparison)\n verification?: {\n urlChanged: boolean;\n titleChanged: boolean;\n elementCountDelta: number;\n significantChange: boolean;\n dialogClosed: boolean;\n formGone: boolean;\n changeDescription: string;\n };\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n// Skyvern-inspired schemas for robust form handling\nconst BrowserSelectOptionSchema = z.object({\n index: z\n .number()\n .describe('The index number of the select/dropdown element'),\n value: z\n .string()\n .optional()\n .describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),\n});\n\nconst BrowserUploadFileSchema = z.object({\n index: z\n .number()\n .describe('The index number of the file input element'),\n fileUrl: z\n .string()\n .describe('URL of the file to upload (the system will download and upload it)'),\n});\n\nconst BrowserKeypressSchema = z.object({\n keys: z\n .string()\n .describe('Key(s) to press. Single key: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\". Combo: \"Control+A\", \"Shift+Enter\"'),\n});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string,\n actionArgs?: Record<string, unknown>\n): string {\n if (!result.success && result.error) {\n const errorType = result.errorType ? `[${result.errorType}] ` : '';\n return `${errorType}Action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n // Verification result (Skyvern pattern)\n if (result.verification) {\n const v = result.verification;\n if (v.dialogClosed || v.formGone || v.urlChanged) {\n parts.push(`✓ ${v.changeDescription}`);\n } else if (!v.significantChange) {\n parts.push(`⚠ No change detected`);\n }\n }\n\n // Page state\n if (result.url) parts.push(`URL: ${result.url}`);\n if (result.title) parts.push(`Title: ${result.title}`);\n if (result.elementList) parts.push(`\\nElements:\\n${result.elementList}`);\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action, args);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `❌ **ACTION FAILED**: Browser action \"${action}\" failed: ${errorMessage}\n\n**REQUIRED**: Analyze why the action failed and try an alternative approach.`;\n }\n };\n };\n\n // browser_click - PRIMARY action for buttons and links\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element by its [index] from the element list.\n\n**WHEN TO USE (prioritize this over keypress):**\n- Buttons labeled \"Send\", \"Submit\", \"Save\", \"OK\", \"Confirm\" → CLICK them\n- Links (a tags) to navigate\n- Checkboxes, radio buttons to select\n- Any clickable element that performs an action\n\n**SEMANTIC ELEMENT IDENTIFICATION:**\n- Look for aria-label=\"Send\", aria-label=\"Submit\" for action buttons\n- Look for type=\"submit\" for form submission\n- Look for button text like \"Send\", \"Submit\", \"Save\"\n- For email compose: Find the Send button, DON'T use keyboard shortcuts\n\nReturns verification: dialog closed, form submitted, URL changed, or NO CHANGE (action may have failed).`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input/textarea element by [index].\n\n**WHEN TO USE:**\n- Input fields (fieldType=\"email\", \"search\", \"phone\", etc.)\n- Textareas for longer content\n- Contenteditable divs (like Gmail compose body)\n\n**OPTIONS:**\n- pressEnter: true → Press Enter after typing (for search forms)\n- pressEnter: false → Just type without submitting\n\n**DO NOT use pressEnter to send emails** - find and CLICK the Send button instead.\n\nReturns verification showing if text was entered or form was submitted.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL (include https://). Returns new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page. Returns updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot. Use browser_get_page_state for element list.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get current page URL, title, and interactive elements list with index numbers.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n // browser_select_option\n tools.push(\n tool(createToolFunction('select_option'), {\n name: EBrowserTools.SELECT_OPTION,\n description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,\n schema: BrowserSelectOptionSchema,\n })\n );\n\n // browser_upload_file - Skyvern-inspired for file input handling\n tools.push(\n tool(createToolFunction('upload_file'), {\n name: EBrowserTools.UPLOAD_FILE,\n description: `Upload a file to a file input element.\nProvide the index of the file input and the URL of the file to upload.\nThe system will download the file and attach it to the input.\nAfter upload, you receive an updated element list.`,\n schema: BrowserUploadFileSchema,\n })\n );\n\n // browser_keypress - LAST RESORT for keyboard shortcuts\n tools.push(\n tool(createToolFunction('keypress'), {\n name: EBrowserTools.KEYPRESS,\n description: `Press keyboard key(s) - USE ONLY AS LAST RESORT after clicking fails.\n\n**PREFER browser_click OVER keypress:**\n- To send email → Find and CLICK the Send button\n- To submit form → CLICK the Submit button\n- To close modal → CLICK the X or Cancel button\n\n**WHEN TO USE keypress:**\n- Navigation: \"Tab\", \"ArrowDown\", \"ArrowUp\" in dropdowns\n- Escape to close popups if no X button exists\n- Control+A, Control+C for select/copy operations\n- ONLY use Enter to submit if there's NO visible submit button\n\nSingle keys: \"Enter\", \"Escape\", \"Tab\", \"ArrowDown\"\nKey combos: \"Control+A\", \"Shift+Enter\"\n\nReturns verification showing if page state changed.`,\n schema: BrowserKeypressSchema,\n })\n );\n\n return tools;\n}\n"],"names":[],"mappings":";;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAExC,IAAA,aAAa,EAAE,uBAAuB;AACtC,IAAA,WAAW,EAAE,qBAAqB;AAClC,IAAA,QAAQ,EAAE,kBAAkB;;AA4D9B;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAE;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAE;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAE;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAE;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAE;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE9C;AACA,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC;AACzC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,iDAAiD,CAAC;AAC9D,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,gJAAgJ,CAAC;AAC9J,CAAA,CAAC;AAEF,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;AACvC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,4CAA4C,CAAC;AACzD,IAAA,OAAO,EAAE;AACN,SAAA,MAAM;SACN,QAAQ,CAAC,oEAAoE,CAAC;AAClF,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;AACrC,IAAA,IAAI,EAAE;AACH,SAAA,MAAM;SACN,QAAQ,CAAC,uGAAuG,CAAC;AACrH,CAAA,CAAC;AAyBF;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EACd,UAAoC,EAAA;IAEpC,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,GAAG,CAAI,CAAA,EAAA,MAAM,CAAC,SAAS,CAAA,EAAA,CAAI,GAAG,EAAE;QAClE,OAAO,CAAA,EAAG,SAAS,CAAW,QAAA,EAAA,MAAM,aAAa,MAAM,CAAC,KAAK,CAAA,CAAE;;IAGjE,MAAM,KAAK,GAAa,EAAE;;AAG1B,IAAA,IAAI,MAAM,CAAC,YAAY,EAAE;AACvB,QAAA,MAAM,CAAC,GAAG,MAAM,CAAC,YAAY;AAC7B,QAAA,IAAI,CAAC,CAAC,YAAY,IAAI,CAAC,CAAC,QAAQ,IAAI,CAAC,CAAC,UAAU,EAAE;YAChD,KAAK,CAAC,IAAI,CAAC,CAAA,EAAA,EAAK,CAAC,CAAC,iBAAiB,CAAE,CAAA,CAAC;;AACjC,aAAA,IAAI,CAAC,CAAC,CAAC,iBAAiB,EAAE;AAC/B,YAAA,KAAK,CAAC,IAAI,CAAC,CAAA,oBAAA,CAAsB,CAAC;;;;IAKtC,IAAI,MAAM,CAAC,GAAG;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,KAAA,EAAQ,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;IAChD,IAAI,MAAM,CAAC,KAAK;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,OAAA,EAAU,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;IACtD,IAAI,MAAM,CAAC,WAAW;QAAE,KAAK,CAAC,IAAI,CAAC,CAAA,aAAA,EAAgB,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;AAExE,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAM,KAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;gBAC5D,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,CAAC;;YAC/C,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;gBACxD,OAAO,CAAA,qCAAA,EAAwC,MAAM,CAAA,UAAA,EAAa,YAAY;;6EAET;;AAEzE,SAAC;AACH,KAAC;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;AAcsF,wGAAA,CAAA;AACnG,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;AAaqD,uEAAA,CAAA;AAClE,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAwE,sEAAA,CAAA;AACrF,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAA4E,0EAAA,CAAA;AACzF,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAoE,kEAAA,CAAA;AACjF,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAgF,8EAAA,CAAA;AAC7F,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,eAAe,CAAC,EAAE;QACxC,IAAI,EAAE,aAAa,CAAC,aAAa;AACjC,QAAA,WAAW,EAAE,CAAsF,oFAAA,CAAA;AACnG,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,aAAa,CAAC,EAAE;QACtC,IAAI,EAAE,aAAa,CAAC,WAAW;AAC/B,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;;;;;;;;;;;;;;;;AAgBiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;AAED,IAAA,OAAO,KAAK;AACd;;;;"}
|
|
1
|
+
{"version":3,"file":"BrowserTools.mjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\nimport type * as _t from '@/types';\n\n/**\n * Browser tool names - keep in sync with ranger-browser extension\n * These tools execute locally in the browser extension, NOT on the server\n */\nexport const EBrowserTools = {\n CLICK: 'browser_click',\n TYPE: 'browser_type',\n NAVIGATE: 'browser_navigate',\n SCROLL: 'browser_scroll',\n EXTRACT: 'browser_extract',\n HOVER: 'browser_hover',\n WAIT: 'browser_wait',\n BACK: 'browser_back',\n SCREENSHOT: 'browser_screenshot',\n GET_PAGE_STATE: 'browser_get_page_state',\n} as const;\n\nexport type BrowserToolName =\n (typeof EBrowserTools)[keyof typeof EBrowserTools];\n\n/**\n * Callback function type for waiting on browser action results\n * This allows the server (Ranger) to provide a callback that waits for the extension\n * to POST results back to the server before returning to the LLM.\n *\n * @param action - The browser action (click, type, navigate, etc.)\n * @param args - Arguments for the action\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\n * @returns Promise that resolves with the actual browser result (page state, etc.)\n */\nexport type BrowserToolCallback = (\n action: string,\n args: Record<string, unknown>,\n toolCallId: string\n) => Promise<BrowserActionResult>;\n\n/**\n * Result returned from browser action execution\n */\nexport interface BrowserActionResult {\n success: boolean;\n url?: string;\n title?: string;\n elementList?: string; // Text-based element list\n error?: string;\n screenshot?: string; // Base64 screenshot (if requested)\n}\n\n/**\n * Check if browser capability is available based on request headers or context\n * The browser extension sets these headers when connected:\n * - X-Ranger-Browser-Extension: true\n * - X-Ranger-Browser-Capable: true\n */\nexport function hasBrowserCapability(req?: {\n headers?: Record<string, string | string[] | undefined>;\n}): boolean {\n if (!req?.headers) {\n return false;\n }\n\n const browserExtension = req.headers['x-ranger-browser-extension'];\n const browserCapable = req.headers['x-ranger-browser-capable'];\n\n return browserExtension === 'true' || browserCapable === 'true';\n}\n\n// Tool schemas\nconst BrowserClickSchema = z.object({\n index: z\n .number()\n .describe(\n 'The index number [0], [1], etc. of the element to click from the page state element list'\n ),\n});\n\nconst BrowserTypeSchema = z.object({\n index: z\n .number()\n .describe('The index number of the input element to type into'),\n text: z.string().describe('The text to type into the element'),\n pressEnter: z\n .boolean()\n .optional()\n .describe('Whether to press Enter after typing (useful for search forms)'),\n});\n\nconst BrowserNavigateSchema = z.object({\n url: z\n .string()\n .describe('The full URL to navigate to (must include https://)'),\n});\n\nconst BrowserScrollSchema = z.object({\n direction: z\n .enum(['up', 'down', 'left', 'right'])\n .describe('Direction to scroll'),\n amount: z\n .number()\n .optional()\n .describe('Pixels to scroll (default: one viewport height)'),\n});\n\nconst BrowserExtractSchema = z.object({\n query: z\n .string()\n .optional()\n .describe('Optional: specific content to extract from the page'),\n});\n\nconst BrowserHoverSchema = z.object({\n index: z.number().describe('The index number of the element to hover over'),\n});\n\nconst BrowserWaitSchema = z.object({\n duration: z\n .number()\n .optional()\n .describe('Milliseconds to wait (default: 1000)'),\n});\n\nconst BrowserBackSchema = z.object({});\n\nconst BrowserScreenshotSchema = z.object({});\n\nconst BrowserGetPageStateSchema = z.object({});\n\n/**\n * Browser tool response interface\n * This is what the extension returns after executing the action\n */\nexport interface BrowserToolResponse {\n requiresBrowserExecution: true;\n action: string;\n args: Record<string, unknown>;\n toolCallId?: string; // Added to help extension correlate with callback\n}\n\n/**\n * Options for creating browser tools\n */\nexport interface CreateBrowserToolsOptions {\n /**\n * Optional callback that waits for browser action results.\n * When provided, tools will await this callback to get actual results from the extension.\n * When not provided, tools return markers immediately (for non-server contexts).\n */\n waitForResult?: BrowserToolCallback;\n}\n\n/**\n * Format browser action result for LLM consumption\n */\nfunction formatResultForLLM(\n result: BrowserActionResult,\n action: string\n): string {\n if (!result.success && result.error) {\n return `Browser action \"${action}\" failed: ${result.error}`;\n }\n\n const parts: string[] = [];\n\n if (result.url != null && result.url !== '') {\n parts.push(`**Current URL:** ${result.url}`);\n }\n if (result.title != null && result.title !== '') {\n parts.push(`**Page Title:** ${result.title}`);\n }\n if (result.elementList != null && result.elementList !== '') {\n parts.push(`\\n**Interactive Elements:**\\n${result.elementList}`);\n }\n if (result.screenshot != null && result.screenshot !== '') {\n parts.push('\\n[Screenshot captured and displayed to user]');\n }\n\n if (parts.length === 0) {\n return `Browser action \"${action}\" completed successfully.`;\n }\n\n return parts.join('\\n');\n}\n\n/**\n * Create browser tools with optional callback for waiting on results\n *\n * When waitForResult callback is provided:\n * 1. Tool returns marker that triggers extension\n * 2. Tool then awaits callback to get actual results\n * 3. Returns real page state to LLM\n *\n * When no callback:\n * 1. Tool returns marker only (for non-server contexts)\n *\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\n */\nexport function createBrowserTools(\n options?: CreateBrowserToolsOptions\n): DynamicStructuredTool[] {\n const { waitForResult } = options || {};\n const tools: DynamicStructuredTool[] = [];\n\n /**\n * Helper to create tool function that optionally waits for results\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\n */\n const createToolFunction = (action: string) => {\n return async (\n args: Record<string, unknown>,\n config?: { toolCall?: { id?: string } }\n ): Promise<string> => {\n const toolCallId =\n config?.toolCall?.id ??\n `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\n\n // Create marker for extension\n const marker: BrowserToolResponse = {\n requiresBrowserExecution: true,\n action,\n args,\n toolCallId,\n };\n\n // If no callback, return marker immediately (extension handles via SSE interception)\n if (!waitForResult) {\n return JSON.stringify(marker);\n }\n\n // With callback: wait for actual results from extension\n // The marker is still returned initially via SSE, but we wait for the callback\n try {\n const result = await waitForResult(action, args, toolCallId);\n return formatResultForLLM(result, action);\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return `Browser action \"${action}\" failed: ${errorMessage}`;\n }\n };\n };\n\n // browser_click\n tools.push(\n tool(createToolFunction('click'), {\n name: EBrowserTools.CLICK,\n description: `Click an element on the current web page by its index number.\nThe element list shows clickable items like: [0]<button>Submit</button> [1]<a href=\"/home\">Home</a>\nUse the index number in brackets to click that element.\nAfter clicking, you receive an updated element list showing the new page state.`,\n schema: BrowserClickSchema,\n })\n );\n\n // browser_type\n tools.push(\n tool(createToolFunction('type'), {\n name: EBrowserTools.TYPE,\n description: `Type text into an input element on the page.\nFind the input element in the list by its index (e.g., [5]<input placeholder=\"Search\">).\nSet pressEnter: true to submit forms after typing.\nAfter typing, you receive an updated element list.`,\n schema: BrowserTypeSchema,\n })\n );\n\n // browser_navigate\n tools.push(\n tool(createToolFunction('navigate'), {\n name: EBrowserTools.NAVIGATE,\n description: `Navigate to a URL. Always include the full URL with https://.\nAfter navigation, you receive the new page's element list.`,\n schema: BrowserNavigateSchema,\n })\n );\n\n // browser_scroll\n tools.push(\n tool(createToolFunction('scroll'), {\n name: EBrowserTools.SCROLL,\n description: `Scroll the page to reveal more content.\nUse 'down' to scroll down, 'up' to scroll up.\nAfter scrolling, you receive an updated element list with newly visible elements.`,\n schema: BrowserScrollSchema,\n })\n );\n\n // browser_extract\n tools.push(\n tool(createToolFunction('extract'), {\n name: EBrowserTools.EXTRACT,\n description: `Extract content from the current page.\nReturns page URL, title, and element list.`,\n schema: BrowserExtractSchema,\n })\n );\n\n // browser_hover\n tools.push(\n tool(createToolFunction('hover'), {\n name: EBrowserTools.HOVER,\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\n schema: BrowserHoverSchema,\n })\n );\n\n // browser_wait\n tools.push(\n tool(createToolFunction('wait'), {\n name: EBrowserTools.WAIT,\n description: `Wait for a specified duration for page content to load.\nUse this after actions that trigger async content loading.\nAfter waiting, you receive an updated element list.`,\n schema: BrowserWaitSchema,\n })\n );\n\n // browser_back\n tools.push(\n tool(createToolFunction('back'), {\n name: EBrowserTools.BACK,\n description: `Go back to the previous page in browser history.\nAfter going back, you receive the previous page's element list.`,\n schema: BrowserBackSchema,\n })\n );\n\n // browser_screenshot\n tools.push(\n tool(createToolFunction('screenshot'), {\n name: EBrowserTools.SCREENSHOT,\n description: `Capture a screenshot of the current page.\nReturns the page state with a note that screenshot was displayed to the user.\nUse browser_get_page_state to get the element list for automation.`,\n schema: BrowserScreenshotSchema,\n })\n );\n\n // browser_get_page_state\n tools.push(\n tool(createToolFunction('get_page_state'), {\n name: EBrowserTools.GET_PAGE_STATE,\n description: `Get the current page state including URL, title, and all interactive elements.\nUse this at the start of a task to see what elements are available.\nReturns a text list of elements with their index numbers for interaction.`,\n schema: BrowserGetPageStateSchema,\n })\n );\n\n return tools;\n}\n"],"names":[],"mappings":";;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAkC1C;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAEpC,EAAA;AACC,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CACP,0FAA0F,CAC3F;AACJ,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;SACN,QAAQ,CAAC,oDAAoD,CAAC;IACjE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAE;AACT,SAAA,OAAO;AACP,SAAA,QAAQ;SACR,QAAQ,CAAC,+DAA+D,CAAC;AAC7E,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;AACrC,IAAA,GAAG,EAAE;AACF,SAAA,MAAM;SACN,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAE;SACR,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC;SACpC,QAAQ,CAAC,qBAAqB,CAAC;AAClC,IAAA,MAAM,EAAE;AACL,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,iDAAiD,CAAC;AAC/D,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAE;AACJ,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,qDAAqD,CAAC;AACnE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAE;AACP,SAAA,MAAM;AACN,SAAA,QAAQ;SACR,QAAQ,CAAC,sCAAsC,CAAC;AACpD,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAyB9C;;AAEG;AACH,SAAS,kBAAkB,CACzB,MAA2B,EAC3B,MAAc,EAAA;IAEd,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,OAAO,mBAAmB,MAAM,CAAA,UAAA,EAAa,MAAM,CAAC,KAAK,EAAE;;IAG7D,MAAM,KAAK,GAAa,EAAE;AAE1B,IAAA,IAAI,MAAM,CAAC,GAAG,IAAI,IAAI,IAAI,MAAM,CAAC,GAAG,KAAK,EAAE,EAAE;QAC3C,KAAK,CAAC,IAAI,CAAC,CAAA,iBAAA,EAAoB,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;;AAE9C,IAAA,IAAI,MAAM,CAAC,KAAK,IAAI,IAAI,IAAI,MAAM,CAAC,KAAK,KAAK,EAAE,EAAE;QAC/C,KAAK,CAAC,IAAI,CAAC,CAAA,gBAAA,EAAmB,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;;AAE/C,IAAA,IAAI,MAAM,CAAC,WAAW,IAAI,IAAI,IAAI,MAAM,CAAC,WAAW,KAAK,EAAE,EAAE;QAC3D,KAAK,CAAC,IAAI,CAAC,CAAA,6BAAA,EAAgC,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;AAElE,IAAA,IAAI,MAAM,CAAC,UAAU,IAAI,IAAI,IAAI,MAAM,CAAC,UAAU,KAAK,EAAE,EAAE;AACzD,QAAA,KAAK,CAAC,IAAI,CAAC,+CAA+C,CAAC;;AAG7D,IAAA,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;QACtB,OAAO,CAAA,gBAAA,EAAmB,MAAM,CAAA,yBAAA,CAA2B;;AAG7D,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAChC,OAAmC,EAAA;AAEnC,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAM,KAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OACL,IAA6B,EAC7B,MAAuC,KACpB;AACnB,YAAA,MAAM,UAAU,GACd,MAAM,EAAE,QAAQ,EAAE,EAAE;gBACpB,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAG7D,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;AAC5D,gBAAA,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;;YACzC,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;AACxD,gBAAA,OAAO,CAAmB,gBAAA,EAAA,MAAM,CAAa,UAAA,EAAA,YAAY,EAAE;;AAE/D,SAAC;AACH,KAAC;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;AAG6D,+EAAA,CAAA;AAC1E,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAGgC,kDAAA,CAAA;AAC7C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,UAAU,CAAC,EAAE;QACnC,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;AACwC,0DAAA,CAAA;AACrD,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,EAAE;QACjC,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAAA;;AAE+D,iFAAA,CAAA;AAC5E,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,SAAS,CAAC,EAAE;QAClC,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACwB,0CAAA,CAAA;AACrC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,EAAE;QAChC,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACmE,qFAAA,CAAA;AAChF,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAEiC,mDAAA,CAAA;AAC9C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,MAAM,CAAC,EAAE;QAC/B,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC6C,+DAAA,CAAA;AAC1D,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,EAAE;QACrC,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAA;;AAEgD,kEAAA,CAAA;AAC7D,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CAAC,CACH;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CAAC,kBAAkB,CAAC,gBAAgB,CAAC,EAAE;QACzC,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAA;;AAEuD,yEAAA,CAAA;AACpE,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CAAC,CACH;AAED,IAAA,OAAO,KAAK;AACd;;;;"}
|
|
@@ -14,9 +14,6 @@ export declare const EBrowserTools: {
|
|
|
14
14
|
readonly BACK: "browser_back";
|
|
15
15
|
readonly SCREENSHOT: "browser_screenshot";
|
|
16
16
|
readonly GET_PAGE_STATE: "browser_get_page_state";
|
|
17
|
-
readonly SELECT_OPTION: "browser_select_option";
|
|
18
|
-
readonly UPLOAD_FILE: "browser_upload_file";
|
|
19
|
-
readonly KEYPRESS: "browser_keypress";
|
|
20
17
|
};
|
|
21
18
|
export type BrowserToolName = (typeof EBrowserTools)[keyof typeof EBrowserTools];
|
|
22
19
|
/**
|
|
@@ -30,39 +27,16 @@ export type BrowserToolName = (typeof EBrowserTools)[keyof typeof EBrowserTools]
|
|
|
30
27
|
* @returns Promise that resolves with the actual browser result (page state, etc.)
|
|
31
28
|
*/
|
|
32
29
|
export type BrowserToolCallback = (action: string, args: Record<string, unknown>, toolCallId: string) => Promise<BrowserActionResult>;
|
|
33
|
-
/**
|
|
34
|
-
* Action status - matches Skyvern's ActionStatus
|
|
35
|
-
*/
|
|
36
|
-
export type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';
|
|
37
|
-
/**
|
|
38
|
-
* Verification status - matches Skyvern's VerificationStatus
|
|
39
|
-
* - complete: Goal achieved
|
|
40
|
-
* - terminate: Goal cannot be achieved, stop
|
|
41
|
-
* - continue: Goal not yet achieved, keep going
|
|
42
|
-
*/
|
|
43
|
-
export type VerificationStatus = 'complete' | 'terminate' | 'continue';
|
|
44
30
|
/**
|
|
45
31
|
* Result returned from browser action execution
|
|
46
32
|
*/
|
|
47
33
|
export interface BrowserActionResult {
|
|
48
34
|
success: boolean;
|
|
49
|
-
status?: ActionStatus;
|
|
50
35
|
url?: string;
|
|
51
36
|
title?: string;
|
|
52
37
|
elementList?: string;
|
|
53
38
|
error?: string;
|
|
54
|
-
errorType?: string;
|
|
55
39
|
screenshot?: string;
|
|
56
|
-
elementDescription?: string;
|
|
57
|
-
verification?: {
|
|
58
|
-
urlChanged: boolean;
|
|
59
|
-
titleChanged: boolean;
|
|
60
|
-
elementCountDelta: number;
|
|
61
|
-
significantChange: boolean;
|
|
62
|
-
dialogClosed: boolean;
|
|
63
|
-
formGone: boolean;
|
|
64
|
-
changeDescription: string;
|
|
65
|
-
};
|
|
66
40
|
}
|
|
67
41
|
/**
|
|
68
42
|
* Check if browser capability is available based on request headers or context
|
package/package.json
CHANGED
|
@@ -17,10 +17,6 @@ export const EBrowserTools = {
|
|
|
17
17
|
BACK: 'browser_back',
|
|
18
18
|
SCREENSHOT: 'browser_screenshot',
|
|
19
19
|
GET_PAGE_STATE: 'browser_get_page_state',
|
|
20
|
-
// Skyvern-inspired additions for robust form handling
|
|
21
|
-
SELECT_OPTION: 'browser_select_option',
|
|
22
|
-
UPLOAD_FILE: 'browser_upload_file',
|
|
23
|
-
KEYPRESS: 'browser_keypress',
|
|
24
20
|
} as const;
|
|
25
21
|
|
|
26
22
|
export type BrowserToolName =
|
|
@@ -42,42 +38,16 @@ export type BrowserToolCallback = (
|
|
|
42
38
|
toolCallId: string
|
|
43
39
|
) => Promise<BrowserActionResult>;
|
|
44
40
|
|
|
45
|
-
/**
|
|
46
|
-
* Action status - matches Skyvern's ActionStatus
|
|
47
|
-
*/
|
|
48
|
-
export type ActionStatus = 'pending' | 'completed' | 'failed' | 'skipped';
|
|
49
|
-
|
|
50
|
-
/**
|
|
51
|
-
* Verification status - matches Skyvern's VerificationStatus
|
|
52
|
-
* - complete: Goal achieved
|
|
53
|
-
* - terminate: Goal cannot be achieved, stop
|
|
54
|
-
* - continue: Goal not yet achieved, keep going
|
|
55
|
-
*/
|
|
56
|
-
export type VerificationStatus = 'complete' | 'terminate' | 'continue';
|
|
57
|
-
|
|
58
41
|
/**
|
|
59
42
|
* Result returned from browser action execution
|
|
60
43
|
*/
|
|
61
44
|
export interface BrowserActionResult {
|
|
62
45
|
success: boolean;
|
|
63
|
-
status?: ActionStatus;
|
|
64
46
|
url?: string;
|
|
65
47
|
title?: string;
|
|
66
|
-
elementList?: string;
|
|
48
|
+
elementList?: string; // Text-based element list
|
|
67
49
|
error?: string;
|
|
68
|
-
|
|
69
|
-
screenshot?: string;
|
|
70
|
-
elementDescription?: string;
|
|
71
|
-
// State verification (before/after comparison)
|
|
72
|
-
verification?: {
|
|
73
|
-
urlChanged: boolean;
|
|
74
|
-
titleChanged: boolean;
|
|
75
|
-
elementCountDelta: number;
|
|
76
|
-
significantChange: boolean;
|
|
77
|
-
dialogClosed: boolean;
|
|
78
|
-
formGone: boolean;
|
|
79
|
-
changeDescription: string;
|
|
80
|
-
};
|
|
50
|
+
screenshot?: string; // Base64 screenshot (if requested)
|
|
81
51
|
}
|
|
82
52
|
|
|
83
53
|
/**
|
|
@@ -159,32 +129,6 @@ const BrowserScreenshotSchema = z.object({});
|
|
|
159
129
|
|
|
160
130
|
const BrowserGetPageStateSchema = z.object({});
|
|
161
131
|
|
|
162
|
-
// Skyvern-inspired schemas for robust form handling
|
|
163
|
-
const BrowserSelectOptionSchema = z.object({
|
|
164
|
-
index: z
|
|
165
|
-
.number()
|
|
166
|
-
.describe('The index number of the select/dropdown element'),
|
|
167
|
-
value: z
|
|
168
|
-
.string()
|
|
169
|
-
.optional()
|
|
170
|
-
.describe('The value or label of the option to select. For native <select>, use the option text. For custom dropdowns, this is the option label to click.'),
|
|
171
|
-
});
|
|
172
|
-
|
|
173
|
-
const BrowserUploadFileSchema = z.object({
|
|
174
|
-
index: z
|
|
175
|
-
.number()
|
|
176
|
-
.describe('The index number of the file input element'),
|
|
177
|
-
fileUrl: z
|
|
178
|
-
.string()
|
|
179
|
-
.describe('URL of the file to upload (the system will download and upload it)'),
|
|
180
|
-
});
|
|
181
|
-
|
|
182
|
-
const BrowserKeypressSchema = z.object({
|
|
183
|
-
keys: z
|
|
184
|
-
.string()
|
|
185
|
-
.describe('Key(s) to press. Single key: "Enter", "Escape", "Tab", "ArrowDown". Combo: "Control+A", "Shift+Enter"'),
|
|
186
|
-
});
|
|
187
|
-
|
|
188
132
|
/**
|
|
189
133
|
* Browser tool response interface
|
|
190
134
|
* This is what the extension returns after executing the action
|
|
@@ -213,30 +157,30 @@ export interface CreateBrowserToolsOptions {
|
|
|
213
157
|
*/
|
|
214
158
|
function formatResultForLLM(
|
|
215
159
|
result: BrowserActionResult,
|
|
216
|
-
action: string
|
|
217
|
-
actionArgs?: Record<string, unknown>
|
|
160
|
+
action: string
|
|
218
161
|
): string {
|
|
219
162
|
if (!result.success && result.error) {
|
|
220
|
-
|
|
221
|
-
return `${errorType}Action "${action}" failed: ${result.error}`;
|
|
163
|
+
return `Browser action "${action}" failed: ${result.error}`;
|
|
222
164
|
}
|
|
223
165
|
|
|
224
166
|
const parts: string[] = [];
|
|
225
167
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
}
|
|
168
|
+
if (result.url != null && result.url !== '') {
|
|
169
|
+
parts.push(`**Current URL:** ${result.url}`);
|
|
170
|
+
}
|
|
171
|
+
if (result.title != null && result.title !== '') {
|
|
172
|
+
parts.push(`**Page Title:** ${result.title}`);
|
|
173
|
+
}
|
|
174
|
+
if (result.elementList != null && result.elementList !== '') {
|
|
175
|
+
parts.push(`\n**Interactive Elements:**\n${result.elementList}`);
|
|
176
|
+
}
|
|
177
|
+
if (result.screenshot != null && result.screenshot !== '') {
|
|
178
|
+
parts.push('\n[Screenshot captured and displayed to user]');
|
|
234
179
|
}
|
|
235
180
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
if (result.elementList) parts.push(`\nElements:\n${result.elementList}`);
|
|
181
|
+
if (parts.length === 0) {
|
|
182
|
+
return `Browser action "${action}" completed successfully.`;
|
|
183
|
+
}
|
|
240
184
|
|
|
241
185
|
return parts.join('\n');
|
|
242
186
|
}
|
|
@@ -291,36 +235,23 @@ export function createBrowserTools(
|
|
|
291
235
|
// The marker is still returned initially via SSE, but we wait for the callback
|
|
292
236
|
try {
|
|
293
237
|
const result = await waitForResult(action, args, toolCallId);
|
|
294
|
-
return formatResultForLLM(result, action
|
|
238
|
+
return formatResultForLLM(result, action);
|
|
295
239
|
} catch (error) {
|
|
296
240
|
const errorMessage =
|
|
297
241
|
error instanceof Error ? error.message : String(error);
|
|
298
|
-
return
|
|
299
|
-
|
|
300
|
-
**REQUIRED**: Analyze why the action failed and try an alternative approach.`;
|
|
242
|
+
return `Browser action "${action}" failed: ${errorMessage}`;
|
|
301
243
|
}
|
|
302
244
|
};
|
|
303
245
|
};
|
|
304
246
|
|
|
305
|
-
// browser_click
|
|
247
|
+
// browser_click
|
|
306
248
|
tools.push(
|
|
307
249
|
tool(createToolFunction('click'), {
|
|
308
250
|
name: EBrowserTools.CLICK,
|
|
309
|
-
description: `Click an element by its
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
- Links (a tags) to navigate
|
|
314
|
-
- Checkboxes, radio buttons to select
|
|
315
|
-
- Any clickable element that performs an action
|
|
316
|
-
|
|
317
|
-
**SEMANTIC ELEMENT IDENTIFICATION:**
|
|
318
|
-
- Look for aria-label="Send", aria-label="Submit" for action buttons
|
|
319
|
-
- Look for type="submit" for form submission
|
|
320
|
-
- Look for button text like "Send", "Submit", "Save"
|
|
321
|
-
- For email compose: Find the Send button, DON'T use keyboard shortcuts
|
|
322
|
-
|
|
323
|
-
Returns verification: dialog closed, form submitted, URL changed, or NO CHANGE (action may have failed).`,
|
|
251
|
+
description: `Click an element on the current web page by its index number.
|
|
252
|
+
The element list shows clickable items like: [0]<button>Submit</button> [1]<a href="/home">Home</a>
|
|
253
|
+
Use the index number in brackets to click that element.
|
|
254
|
+
After clicking, you receive an updated element list showing the new page state.`,
|
|
324
255
|
schema: BrowserClickSchema,
|
|
325
256
|
})
|
|
326
257
|
);
|
|
@@ -329,20 +260,10 @@ Returns verification: dialog closed, form submitted, URL changed, or NO CHANGE (
|
|
|
329
260
|
tools.push(
|
|
330
261
|
tool(createToolFunction('type'), {
|
|
331
262
|
name: EBrowserTools.TYPE,
|
|
332
|
-
description: `Type text into an input
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
- Textareas for longer content
|
|
337
|
-
- Contenteditable divs (like Gmail compose body)
|
|
338
|
-
|
|
339
|
-
**OPTIONS:**
|
|
340
|
-
- pressEnter: true → Press Enter after typing (for search forms)
|
|
341
|
-
- pressEnter: false → Just type without submitting
|
|
342
|
-
|
|
343
|
-
**DO NOT use pressEnter to send emails** - find and CLICK the Send button instead.
|
|
344
|
-
|
|
345
|
-
Returns verification showing if text was entered or form was submitted.`,
|
|
263
|
+
description: `Type text into an input element on the page.
|
|
264
|
+
Find the input element in the list by its index (e.g., [5]<input placeholder="Search">).
|
|
265
|
+
Set pressEnter: true to submit forms after typing.
|
|
266
|
+
After typing, you receive an updated element list.`,
|
|
346
267
|
schema: BrowserTypeSchema,
|
|
347
268
|
})
|
|
348
269
|
);
|
|
@@ -351,7 +272,8 @@ Returns verification showing if text was entered or form was submitted.`,
|
|
|
351
272
|
tools.push(
|
|
352
273
|
tool(createToolFunction('navigate'), {
|
|
353
274
|
name: EBrowserTools.NAVIGATE,
|
|
354
|
-
description: `Navigate to a URL
|
|
275
|
+
description: `Navigate to a URL. Always include the full URL with https://.
|
|
276
|
+
After navigation, you receive the new page's element list.`,
|
|
355
277
|
schema: BrowserNavigateSchema,
|
|
356
278
|
})
|
|
357
279
|
);
|
|
@@ -360,7 +282,9 @@ Returns verification showing if text was entered or form was submitted.`,
|
|
|
360
282
|
tools.push(
|
|
361
283
|
tool(createToolFunction('scroll'), {
|
|
362
284
|
name: EBrowserTools.SCROLL,
|
|
363
|
-
description: `Scroll the page
|
|
285
|
+
description: `Scroll the page to reveal more content.
|
|
286
|
+
Use 'down' to scroll down, 'up' to scroll up.
|
|
287
|
+
After scrolling, you receive an updated element list with newly visible elements.`,
|
|
364
288
|
schema: BrowserScrollSchema,
|
|
365
289
|
})
|
|
366
290
|
);
|
|
@@ -410,7 +334,9 @@ After going back, you receive the previous page's element list.`,
|
|
|
410
334
|
tools.push(
|
|
411
335
|
tool(createToolFunction('screenshot'), {
|
|
412
336
|
name: EBrowserTools.SCREENSHOT,
|
|
413
|
-
description: `Capture a screenshot
|
|
337
|
+
description: `Capture a screenshot of the current page.
|
|
338
|
+
Returns the page state with a note that screenshot was displayed to the user.
|
|
339
|
+
Use browser_get_page_state to get the element list for automation.`,
|
|
414
340
|
schema: BrowserScreenshotSchema,
|
|
415
341
|
})
|
|
416
342
|
);
|
|
@@ -419,56 +345,12 @@ After going back, you receive the previous page's element list.`,
|
|
|
419
345
|
tools.push(
|
|
420
346
|
tool(createToolFunction('get_page_state'), {
|
|
421
347
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
422
|
-
description: `Get current page URL, title, and interactive elements
|
|
348
|
+
description: `Get the current page state including URL, title, and all interactive elements.
|
|
349
|
+
Use this at the start of a task to see what elements are available.
|
|
350
|
+
Returns a text list of elements with their index numbers for interaction.`,
|
|
423
351
|
schema: BrowserGetPageStateSchema,
|
|
424
352
|
})
|
|
425
353
|
);
|
|
426
354
|
|
|
427
|
-
// browser_select_option
|
|
428
|
-
tools.push(
|
|
429
|
-
tool(createToolFunction('select_option'), {
|
|
430
|
-
name: EBrowserTools.SELECT_OPTION,
|
|
431
|
-
description: `Select an option from a dropdown. Handles both native <select> and custom dropdowns.`,
|
|
432
|
-
schema: BrowserSelectOptionSchema,
|
|
433
|
-
})
|
|
434
|
-
);
|
|
435
|
-
|
|
436
|
-
// browser_upload_file - Skyvern-inspired for file input handling
|
|
437
|
-
tools.push(
|
|
438
|
-
tool(createToolFunction('upload_file'), {
|
|
439
|
-
name: EBrowserTools.UPLOAD_FILE,
|
|
440
|
-
description: `Upload a file to a file input element.
|
|
441
|
-
Provide the index of the file input and the URL of the file to upload.
|
|
442
|
-
The system will download the file and attach it to the input.
|
|
443
|
-
After upload, you receive an updated element list.`,
|
|
444
|
-
schema: BrowserUploadFileSchema,
|
|
445
|
-
})
|
|
446
|
-
);
|
|
447
|
-
|
|
448
|
-
// browser_keypress - LAST RESORT for keyboard shortcuts
|
|
449
|
-
tools.push(
|
|
450
|
-
tool(createToolFunction('keypress'), {
|
|
451
|
-
name: EBrowserTools.KEYPRESS,
|
|
452
|
-
description: `Press keyboard key(s) - USE ONLY AS LAST RESORT after clicking fails.
|
|
453
|
-
|
|
454
|
-
**PREFER browser_click OVER keypress:**
|
|
455
|
-
- To send email → Find and CLICK the Send button
|
|
456
|
-
- To submit form → CLICK the Submit button
|
|
457
|
-
- To close modal → CLICK the X or Cancel button
|
|
458
|
-
|
|
459
|
-
**WHEN TO USE keypress:**
|
|
460
|
-
- Navigation: "Tab", "ArrowDown", "ArrowUp" in dropdowns
|
|
461
|
-
- Escape to close popups if no X button exists
|
|
462
|
-
- Control+A, Control+C for select/copy operations
|
|
463
|
-
- ONLY use Enter to submit if there's NO visible submit button
|
|
464
|
-
|
|
465
|
-
Single keys: "Enter", "Escape", "Tab", "ArrowDown"
|
|
466
|
-
Key combos: "Control+A", "Shift+Enter"
|
|
467
|
-
|
|
468
|
-
Returns verification showing if page state changed.`,
|
|
469
|
-
schema: BrowserKeypressSchema,
|
|
470
|
-
})
|
|
471
|
-
);
|
|
472
|
-
|
|
473
355
|
return tools;
|
|
474
356
|
}
|