illuma-agents 1.0.26 → 1.0.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/tools/BrowserTools.cjs +79 -88
- package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
- package/dist/esm/tools/BrowserTools.mjs +79 -88
- package/dist/esm/tools/BrowserTools.mjs.map +1 -1
- package/dist/types/tools/BrowserTools.d.ts +44 -4
- package/package.json +1 -1
- package/src/tools/BrowserTools.ts +128 -88
|
@@ -62,25 +62,79 @@ const BrowserBackSchema = zod.z.object({});
|
|
|
62
62
|
const BrowserScreenshotSchema = zod.z.object({});
|
|
63
63
|
const BrowserGetPageStateSchema = zod.z.object({});
|
|
64
64
|
/**
|
|
65
|
-
*
|
|
66
|
-
|
|
67
|
-
|
|
65
|
+
* Format browser action result for LLM consumption
|
|
66
|
+
*/
|
|
67
|
+
function formatResultForLLM(result, action) {
|
|
68
|
+
if (!result.success && result.error) {
|
|
69
|
+
return `Browser action "${action}" failed: ${result.error}`;
|
|
70
|
+
}
|
|
71
|
+
const parts = [];
|
|
72
|
+
if (result.url) {
|
|
73
|
+
parts.push(`**Current URL:** ${result.url}`);
|
|
74
|
+
}
|
|
75
|
+
if (result.title) {
|
|
76
|
+
parts.push(`**Page Title:** ${result.title}`);
|
|
77
|
+
}
|
|
78
|
+
if (result.elementList) {
|
|
79
|
+
parts.push(`\n**Interactive Elements:**\n${result.elementList}`);
|
|
80
|
+
}
|
|
81
|
+
if (result.screenshot) {
|
|
82
|
+
parts.push(`\n[Screenshot captured and displayed to user]`);
|
|
83
|
+
}
|
|
84
|
+
if (parts.length === 0) {
|
|
85
|
+
return `Browser action "${action}" completed successfully.`;
|
|
86
|
+
}
|
|
87
|
+
return parts.join('\n');
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Create browser tools with optional callback for waiting on results
|
|
91
|
+
*
|
|
92
|
+
* When waitForResult callback is provided:
|
|
93
|
+
* 1. Tool returns marker that triggers extension
|
|
94
|
+
* 2. Tool then awaits callback to get actual results
|
|
95
|
+
* 3. Returns real page state to LLM
|
|
96
|
+
*
|
|
97
|
+
* When no callback:
|
|
98
|
+
* 1. Tool returns marker only (for non-server contexts)
|
|
68
99
|
*
|
|
69
100
|
* NOTE: These tools use TEXT-BASED element lists, NOT screenshots
|
|
70
101
|
* Screenshots would be 100K+ tokens each - element lists are ~100 tokens
|
|
71
102
|
*/
|
|
72
|
-
function createBrowserTools() {
|
|
103
|
+
function createBrowserTools(options) {
|
|
104
|
+
const { waitForResult } = options || {};
|
|
73
105
|
const tools$1 = [];
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
106
|
+
/**
|
|
107
|
+
* Helper to create tool function that optionally waits for results
|
|
108
|
+
* The toolCallId is extracted from the RunnableConfig passed by LangChain
|
|
109
|
+
*/
|
|
110
|
+
const createToolFunction = (action) => {
|
|
111
|
+
return async (args, config) => {
|
|
112
|
+
const toolCallId = config?.toolCall?.id || `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;
|
|
113
|
+
// Create marker for extension
|
|
114
|
+
const marker = {
|
|
115
|
+
requiresBrowserExecution: true,
|
|
116
|
+
action,
|
|
117
|
+
args,
|
|
118
|
+
toolCallId,
|
|
119
|
+
};
|
|
120
|
+
// If no callback, return marker immediately (extension handles via SSE interception)
|
|
121
|
+
if (!waitForResult) {
|
|
122
|
+
return JSON.stringify(marker);
|
|
123
|
+
}
|
|
124
|
+
// With callback: wait for actual results from extension
|
|
125
|
+
// The marker is still returned initially via SSE, but we wait for the callback
|
|
126
|
+
try {
|
|
127
|
+
const result = await waitForResult(action, args, toolCallId);
|
|
128
|
+
return formatResultForLLM(result, action);
|
|
129
|
+
}
|
|
130
|
+
catch (error) {
|
|
131
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
132
|
+
return `Browser action "${action}" failed: ${errorMessage}`;
|
|
133
|
+
}
|
|
81
134
|
};
|
|
82
|
-
|
|
83
|
-
|
|
135
|
+
};
|
|
136
|
+
// browser_click
|
|
137
|
+
tools$1.push(tools.tool(createToolFunction('click'), {
|
|
84
138
|
name: EBrowserTools.CLICK,
|
|
85
139
|
description: `Click an element on the current web page by its index number.
|
|
86
140
|
The element list shows clickable items like: [0]<button>Submit</button> [1]<a href="/home">Home</a>
|
|
@@ -89,14 +143,7 @@ After clicking, you receive an updated element list showing the new page state.`
|
|
|
89
143
|
schema: BrowserClickSchema,
|
|
90
144
|
}));
|
|
91
145
|
// browser_type
|
|
92
|
-
tools$1.push(tools.tool(
|
|
93
|
-
const response = {
|
|
94
|
-
requiresBrowserExecution: true,
|
|
95
|
-
action: 'type',
|
|
96
|
-
args,
|
|
97
|
-
};
|
|
98
|
-
return JSON.stringify(response);
|
|
99
|
-
}, {
|
|
146
|
+
tools$1.push(tools.tool(createToolFunction('type'), {
|
|
100
147
|
name: EBrowserTools.TYPE,
|
|
101
148
|
description: `Type text into an input element on the page.
|
|
102
149
|
Find the input element in the list by its index (e.g., [5]<input placeholder="Search">).
|
|
@@ -105,28 +152,14 @@ After typing, you receive an updated element list.`,
|
|
|
105
152
|
schema: BrowserTypeSchema,
|
|
106
153
|
}));
|
|
107
154
|
// browser_navigate
|
|
108
|
-
tools$1.push(tools.tool(
|
|
109
|
-
const response = {
|
|
110
|
-
requiresBrowserExecution: true,
|
|
111
|
-
action: 'navigate',
|
|
112
|
-
args,
|
|
113
|
-
};
|
|
114
|
-
return JSON.stringify(response);
|
|
115
|
-
}, {
|
|
155
|
+
tools$1.push(tools.tool(createToolFunction('navigate'), {
|
|
116
156
|
name: EBrowserTools.NAVIGATE,
|
|
117
157
|
description: `Navigate to a URL. Always include the full URL with https://.
|
|
118
158
|
After navigation, you receive the new page's element list.`,
|
|
119
159
|
schema: BrowserNavigateSchema,
|
|
120
160
|
}));
|
|
121
161
|
// browser_scroll
|
|
122
|
-
tools$1.push(tools.tool(
|
|
123
|
-
const response = {
|
|
124
|
-
requiresBrowserExecution: true,
|
|
125
|
-
action: 'scroll',
|
|
126
|
-
args,
|
|
127
|
-
};
|
|
128
|
-
return JSON.stringify(response);
|
|
129
|
-
}, {
|
|
162
|
+
tools$1.push(tools.tool(createToolFunction('scroll'), {
|
|
130
163
|
name: EBrowserTools.SCROLL,
|
|
131
164
|
description: `Scroll the page to reveal more content.
|
|
132
165
|
Use 'down' to scroll down, 'up' to scroll up.
|
|
@@ -134,42 +167,21 @@ After scrolling, you receive an updated element list with newly visible elements
|
|
|
134
167
|
schema: BrowserScrollSchema,
|
|
135
168
|
}));
|
|
136
169
|
// browser_extract
|
|
137
|
-
tools$1.push(tools.tool(
|
|
138
|
-
const response = {
|
|
139
|
-
requiresBrowserExecution: true,
|
|
140
|
-
action: 'extract',
|
|
141
|
-
args,
|
|
142
|
-
};
|
|
143
|
-
return JSON.stringify(response);
|
|
144
|
-
}, {
|
|
170
|
+
tools$1.push(tools.tool(createToolFunction('extract'), {
|
|
145
171
|
name: EBrowserTools.EXTRACT,
|
|
146
172
|
description: `Extract content from the current page.
|
|
147
173
|
Returns page URL, title, and element list.`,
|
|
148
174
|
schema: BrowserExtractSchema,
|
|
149
175
|
}));
|
|
150
176
|
// browser_hover
|
|
151
|
-
tools$1.push(tools.tool(
|
|
152
|
-
const response = {
|
|
153
|
-
requiresBrowserExecution: true,
|
|
154
|
-
action: 'hover',
|
|
155
|
-
args,
|
|
156
|
-
};
|
|
157
|
-
return JSON.stringify(response);
|
|
158
|
-
}, {
|
|
177
|
+
tools$1.push(tools.tool(createToolFunction('hover'), {
|
|
159
178
|
name: EBrowserTools.HOVER,
|
|
160
179
|
description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.
|
|
161
180
|
After hovering, you receive an updated element list with any newly revealed elements.`,
|
|
162
181
|
schema: BrowserHoverSchema,
|
|
163
182
|
}));
|
|
164
183
|
// browser_wait
|
|
165
|
-
tools$1.push(tools.tool(
|
|
166
|
-
const response = {
|
|
167
|
-
requiresBrowserExecution: true,
|
|
168
|
-
action: 'wait',
|
|
169
|
-
args,
|
|
170
|
-
};
|
|
171
|
-
return JSON.stringify(response);
|
|
172
|
-
}, {
|
|
184
|
+
tools$1.push(tools.tool(createToolFunction('wait'), {
|
|
173
185
|
name: EBrowserTools.WAIT,
|
|
174
186
|
description: `Wait for a specified duration for page content to load.
|
|
175
187
|
Use this after actions that trigger async content loading.
|
|
@@ -177,47 +189,26 @@ After waiting, you receive an updated element list.`,
|
|
|
177
189
|
schema: BrowserWaitSchema,
|
|
178
190
|
}));
|
|
179
191
|
// browser_back
|
|
180
|
-
tools$1.push(tools.tool(
|
|
181
|
-
const response = {
|
|
182
|
-
requiresBrowserExecution: true,
|
|
183
|
-
action: 'back',
|
|
184
|
-
args,
|
|
185
|
-
};
|
|
186
|
-
return JSON.stringify(response);
|
|
187
|
-
}, {
|
|
192
|
+
tools$1.push(tools.tool(createToolFunction('back'), {
|
|
188
193
|
name: EBrowserTools.BACK,
|
|
189
194
|
description: `Go back to the previous page in browser history.
|
|
190
195
|
After going back, you receive the previous page's element list.`,
|
|
191
196
|
schema: BrowserBackSchema,
|
|
192
197
|
}));
|
|
193
198
|
// browser_screenshot
|
|
194
|
-
tools$1.push(tools.tool(
|
|
195
|
-
const response = {
|
|
196
|
-
requiresBrowserExecution: true,
|
|
197
|
-
action: 'screenshot',
|
|
198
|
-
args,
|
|
199
|
-
};
|
|
200
|
-
return JSON.stringify(response);
|
|
201
|
-
}, {
|
|
199
|
+
tools$1.push(tools.tool(createToolFunction('screenshot'), {
|
|
202
200
|
name: EBrowserTools.SCREENSHOT,
|
|
203
201
|
description: `Capture a screenshot of the current page.
|
|
204
|
-
|
|
205
|
-
Use browser_get_page_state
|
|
202
|
+
Returns the page state with a note that screenshot was displayed to the user.
|
|
203
|
+
Use browser_get_page_state to get the element list for automation.`,
|
|
206
204
|
schema: BrowserScreenshotSchema,
|
|
207
205
|
}));
|
|
208
206
|
// browser_get_page_state
|
|
209
|
-
tools$1.push(tools.tool(
|
|
210
|
-
const response = {
|
|
211
|
-
requiresBrowserExecution: true,
|
|
212
|
-
action: 'get_page_state',
|
|
213
|
-
args,
|
|
214
|
-
};
|
|
215
|
-
return JSON.stringify(response);
|
|
216
|
-
}, {
|
|
207
|
+
tools$1.push(tools.tool(createToolFunction('get_page_state'), {
|
|
217
208
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
218
209
|
description: `Get the current page state including URL, title, and all interactive elements.
|
|
219
210
|
Use this at the start of a task to see what elements are available.
|
|
220
|
-
Returns a text list of elements with their index numbers.`,
|
|
211
|
+
Returns a text list of elements with their index numbers for interaction.`,
|
|
221
212
|
schema: BrowserGetPageStateSchema,
|
|
222
213
|
}));
|
|
223
214
|
return tools$1;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"BrowserTools.cjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\r\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\r\nimport type * as t from '@/types';\r\n\r\n/**\r\n * Browser tool names - keep in sync with ranger-browser extension\r\n * These tools execute locally in the browser extension, NOT on the server\r\n */\r\nexport const EBrowserTools = {\r\n CLICK: 'browser_click',\r\n TYPE: 'browser_type',\r\n NAVIGATE: 'browser_navigate',\r\n SCROLL: 'browser_scroll',\r\n EXTRACT: 'browser_extract',\r\n HOVER: 'browser_hover',\r\n WAIT: 'browser_wait',\r\n BACK: 'browser_back',\r\n SCREENSHOT: 'browser_screenshot',\r\n GET_PAGE_STATE: 'browser_get_page_state',\r\n} as const;\r\n\r\nexport type BrowserToolName = typeof EBrowserTools[keyof typeof EBrowserTools];\r\n\r\n/**\r\n * Check if browser capability is available based on request headers or context\r\n * The browser extension sets these headers when connected:\r\n * - X-Ranger-Browser-Extension: true\r\n * - X-Ranger-Browser-Capable: true\r\n */\r\nexport function hasBrowserCapability(req?: { headers?: Record<string, string | string[] | undefined> }): boolean {\r\n if (!req?.headers) {\r\n return false;\r\n }\r\n \r\n const browserExtension = req.headers['x-ranger-browser-extension'];\r\n const browserCapable = req.headers['x-ranger-browser-capable'];\r\n \r\n return browserExtension === 'true' || browserCapable === 'true';\r\n}\r\n\r\n// Tool schemas\r\nconst BrowserClickSchema = z.object({\r\n index: z.number().describe('The index number [0], [1], etc. of the element to click from the page state element list'),\r\n});\r\n\r\nconst BrowserTypeSchema = z.object({\r\n index: z.number().describe('The index number of the input element to type into'),\r\n text: z.string().describe('The text to type into the element'),\r\n pressEnter: z.boolean().optional().describe('Whether to press Enter after typing (useful for search forms)'),\r\n});\r\n\r\nconst BrowserNavigateSchema = z.object({\r\n url: z.string().describe('The full URL to navigate to (must include https://)'),\r\n});\r\n\r\nconst BrowserScrollSchema = z.object({\r\n direction: z.enum(['up', 'down', 'left', 'right']).describe('Direction to scroll'),\r\n amount: z.number().optional().describe('Pixels to scroll (default: one viewport height)'),\r\n});\r\n\r\nconst BrowserExtractSchema = z.object({\r\n query: z.string().optional().describe('Optional: specific content to extract from the page'),\r\n});\r\n\r\nconst BrowserHoverSchema = z.object({\r\n index: z.number().describe('The index number of the element to hover over'),\r\n});\r\n\r\nconst BrowserWaitSchema = z.object({\r\n duration: z.number().optional().describe('Milliseconds to wait (default: 1000)'),\r\n});\r\n\r\nconst BrowserBackSchema = z.object({});\r\n\r\nconst BrowserScreenshotSchema = z.object({});\r\n\r\nconst BrowserGetPageStateSchema = z.object({});\r\n\r\n/**\r\n * Browser tool response interface\r\n * This is what the extension returns after executing the action\r\n */\r\nexport interface BrowserToolResponse {\r\n requiresBrowserExecution: true;\r\n action: string;\r\n args: Record<string, unknown>;\r\n}\r\n\r\n/**\r\n * Create placeholder browser tools that signal to the extension to execute locally\r\n * The server creates these tools to give the LLM the schema, but actual execution\r\n * happens in the browser extension which intercepts tool calls\r\n * \r\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\r\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\r\n */\r\nexport function createBrowserTools(): DynamicStructuredTool[] {\r\n const tools: DynamicStructuredTool[] = [];\r\n\r\n // browser_click\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n // This returns a marker that tells the extension to execute locally\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'click',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.CLICK,\r\n description: `Click an element on the current web page by its index number.\r\nThe element list shows clickable items like: [0]<button>Submit</button> [1]<a href=\"/home\">Home</a>\r\nUse the index number in brackets to click that element.\r\nAfter clicking, you receive an updated element list showing the new page state.`,\r\n schema: BrowserClickSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_type\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'type',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.TYPE,\r\n description: `Type text into an input element on the page.\r\nFind the input element in the list by its index (e.g., [5]<input placeholder=\"Search\">).\r\nSet pressEnter: true to submit forms after typing.\r\nAfter typing, you receive an updated element list.`,\r\n schema: BrowserTypeSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_navigate\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'navigate',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.NAVIGATE,\r\n description: `Navigate to a URL. Always include the full URL with https://.\r\nAfter navigation, you receive the new page's element list.`,\r\n schema: BrowserNavigateSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_scroll\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'scroll',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.SCROLL,\r\n description: `Scroll the page to reveal more content.\r\nUse 'down' to scroll down, 'up' to scroll up.\r\nAfter scrolling, you receive an updated element list with newly visible elements.`,\r\n schema: BrowserScrollSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_extract\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'extract',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.EXTRACT,\r\n description: `Extract content from the current page.\r\nReturns page URL, title, and element list.`,\r\n schema: BrowserExtractSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_hover\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'hover',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.HOVER,\r\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\r\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\r\n schema: BrowserHoverSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_wait\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'wait',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.WAIT,\r\n description: `Wait for a specified duration for page content to load.\r\nUse this after actions that trigger async content loading.\r\nAfter waiting, you receive an updated element list.`,\r\n schema: BrowserWaitSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_back\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'back',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.BACK,\r\n description: `Go back to the previous page in browser history.\r\nAfter going back, you receive the previous page's element list.`,\r\n schema: BrowserBackSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_screenshot\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'screenshot',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.SCREENSHOT,\r\n description: `Capture a screenshot of the current page.\r\nNOTE: Screenshot is displayed to the USER only, not returned to you.\r\nUse browser_get_page_state instead to get the element list.`,\r\n schema: BrowserScreenshotSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_get_page_state\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'get_page_state',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.GET_PAGE_STATE,\r\n description: `Get the current page state including URL, title, and all interactive elements.\r\nUse this at the start of a task to see what elements are available.\r\nReturns a text list of elements with their index numbers.`,\r\n schema: BrowserGetPageStateSchema,\r\n }\r\n )\r\n );\r\n\r\n return tools;\r\n}\r\n"],"names":["z","tools","tool"],"mappings":";;;;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAK1C;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAAiE,EAAA;AACpG,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0FAA0F,CAAC;AACvH,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;IACjC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,oDAAoD,CAAC;IAChF,IAAI,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAEA,KAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,+DAA+D,CAAC;AAC7G,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;IACrC,GAAG,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qDAAqD,CAAC;AAChF,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAEA,KAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,qBAAqB,CAAC;AAClF,IAAA,MAAM,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,iDAAiD,CAAC;AAC1F,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qDAAqD,CAAC;AAC7F,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;AACjF,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAY9C;;;;;;;AAOG;SACa,kBAAkB,GAAA;IAChC,MAAMC,OAAK,GAA4B,EAAE;;IAGzCA,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,OAAO,IAAI,KAAqB;;AAE9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,OAAO;YACf,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;AAG2D,+EAAA,CAAA;AACxE,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,MAAM;YACd,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAG8B,kDAAA,CAAA;AAC3C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,UAAU;YAClB,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;AACsC,0DAAA,CAAA;AACnD,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,QAAQ;YAChB,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAAA;;AAE6D,iFAAA,CAAA;AAC1E,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,SAAS;YACjB,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACsB,0CAAA,CAAA;AACnC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,OAAO;YACf,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACiE,qFAAA,CAAA;AAC9E,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,MAAM;YACd,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAE+B,mDAAA,CAAA;AAC5C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,MAAM;YACd,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC2C,+DAAA,CAAA;AACxD,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,YAAY;YACpB,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAA;;AAEuC,2DAAA,CAAA;AACpD,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,gBAAgB;YACxB,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAA;;AAEqC,yDAAA,CAAA;AAClD,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CACF,CACF;AAED,IAAA,OAAOD,OAAK;AACd;;;;;;"}
|
|
1
|
+
{"version":3,"file":"BrowserTools.cjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\r\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\r\nimport type * as t from '@/types';\r\n\r\n/**\r\n * Browser tool names - keep in sync with ranger-browser extension\r\n * These tools execute locally in the browser extension, NOT on the server\r\n */\r\nexport const EBrowserTools = {\r\n CLICK: 'browser_click',\r\n TYPE: 'browser_type',\r\n NAVIGATE: 'browser_navigate',\r\n SCROLL: 'browser_scroll',\r\n EXTRACT: 'browser_extract',\r\n HOVER: 'browser_hover',\r\n WAIT: 'browser_wait',\r\n BACK: 'browser_back',\r\n SCREENSHOT: 'browser_screenshot',\r\n GET_PAGE_STATE: 'browser_get_page_state',\r\n} as const;\r\n\r\nexport type BrowserToolName = typeof EBrowserTools[keyof typeof EBrowserTools];\r\n\r\n/**\r\n * Callback function type for waiting on browser action results\r\n * This allows the server (Ranger) to provide a callback that waits for the extension\r\n * to POST results back to the server before returning to the LLM.\r\n * \r\n * @param action - The browser action (click, type, navigate, etc.)\r\n * @param args - Arguments for the action\r\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\r\n * @returns Promise that resolves with the actual browser result (page state, etc.)\r\n */\r\nexport type BrowserToolCallback = (\r\n action: string,\r\n args: Record<string, unknown>,\r\n toolCallId: string\r\n) => Promise<BrowserActionResult>;\r\n\r\n/**\r\n * Result returned from browser action execution\r\n */\r\nexport interface BrowserActionResult {\r\n success: boolean;\r\n url?: string;\r\n title?: string;\r\n elementList?: string; // Text-based element list\r\n error?: string;\r\n screenshot?: string; // Base64 screenshot (if requested)\r\n}\r\n\r\n/**\r\n * Check if browser capability is available based on request headers or context\r\n * The browser extension sets these headers when connected:\r\n * - X-Ranger-Browser-Extension: true\r\n * - X-Ranger-Browser-Capable: true\r\n */\r\nexport function hasBrowserCapability(req?: { headers?: Record<string, string | string[] | undefined> }): boolean {\r\n if (!req?.headers) {\r\n return false;\r\n }\r\n \r\n const browserExtension = req.headers['x-ranger-browser-extension'];\r\n const browserCapable = req.headers['x-ranger-browser-capable'];\r\n \r\n return browserExtension === 'true' || browserCapable === 'true';\r\n}\r\n\r\n// Tool schemas\r\nconst BrowserClickSchema = z.object({\r\n index: z.number().describe('The index number [0], [1], etc. of the element to click from the page state element list'),\r\n});\r\n\r\nconst BrowserTypeSchema = z.object({\r\n index: z.number().describe('The index number of the input element to type into'),\r\n text: z.string().describe('The text to type into the element'),\r\n pressEnter: z.boolean().optional().describe('Whether to press Enter after typing (useful for search forms)'),\r\n});\r\n\r\nconst BrowserNavigateSchema = z.object({\r\n url: z.string().describe('The full URL to navigate to (must include https://)'),\r\n});\r\n\r\nconst BrowserScrollSchema = z.object({\r\n direction: z.enum(['up', 'down', 'left', 'right']).describe('Direction to scroll'),\r\n amount: z.number().optional().describe('Pixels to scroll (default: one viewport height)'),\r\n});\r\n\r\nconst BrowserExtractSchema = z.object({\r\n query: z.string().optional().describe('Optional: specific content to extract from the page'),\r\n});\r\n\r\nconst BrowserHoverSchema = z.object({\r\n index: z.number().describe('The index number of the element to hover over'),\r\n});\r\n\r\nconst BrowserWaitSchema = z.object({\r\n duration: z.number().optional().describe('Milliseconds to wait (default: 1000)'),\r\n});\r\n\r\nconst BrowserBackSchema = z.object({});\r\n\r\nconst BrowserScreenshotSchema = z.object({});\r\n\r\nconst BrowserGetPageStateSchema = z.object({});\r\n\r\n/**\r\n * Browser tool response interface\r\n * This is what the extension returns after executing the action\r\n */\r\nexport interface BrowserToolResponse {\r\n requiresBrowserExecution: true;\r\n action: string;\r\n args: Record<string, unknown>;\r\n toolCallId?: string; // Added to help extension correlate with callback\r\n}\r\n\r\n/**\r\n * Options for creating browser tools\r\n */\r\nexport interface CreateBrowserToolsOptions {\r\n /**\r\n * Optional callback that waits for browser action results.\r\n * When provided, tools will await this callback to get actual results from the extension.\r\n * When not provided, tools return markers immediately (for non-server contexts).\r\n */\r\n waitForResult?: BrowserToolCallback;\r\n}\r\n\r\n/**\r\n * Format browser action result for LLM consumption\r\n */\r\nfunction formatResultForLLM(result: BrowserActionResult, action: string): string {\r\n if (!result.success && result.error) {\r\n return `Browser action \"${action}\" failed: ${result.error}`;\r\n }\r\n\r\n const parts: string[] = [];\r\n \r\n if (result.url) {\r\n parts.push(`**Current URL:** ${result.url}`);\r\n }\r\n if (result.title) {\r\n parts.push(`**Page Title:** ${result.title}`);\r\n }\r\n if (result.elementList) {\r\n parts.push(`\\n**Interactive Elements:**\\n${result.elementList}`);\r\n }\r\n if (result.screenshot) {\r\n parts.push(`\\n[Screenshot captured and displayed to user]`);\r\n }\r\n \r\n if (parts.length === 0) {\r\n return `Browser action \"${action}\" completed successfully.`;\r\n }\r\n \r\n return parts.join('\\n');\r\n}\r\n\r\n/**\r\n * Create browser tools with optional callback for waiting on results\r\n * \r\n * When waitForResult callback is provided:\r\n * 1. Tool returns marker that triggers extension\r\n * 2. Tool then awaits callback to get actual results\r\n * 3. Returns real page state to LLM\r\n * \r\n * When no callback:\r\n * 1. Tool returns marker only (for non-server contexts)\r\n * \r\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\r\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\r\n */\r\nexport function createBrowserTools(options?: CreateBrowserToolsOptions): DynamicStructuredTool[] {\r\n const { waitForResult } = options || {};\r\n const tools: DynamicStructuredTool[] = [];\r\n\r\n /**\r\n * Helper to create tool function that optionally waits for results\r\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\r\n */\r\n const createToolFunction = (action: string) => {\r\n return async (args: Record<string, unknown>, config?: { toolCall?: { id?: string } }): Promise<string> => {\r\n const toolCallId = config?.toolCall?.id || `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\r\n \r\n // Create marker for extension\r\n const marker: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action,\r\n args,\r\n toolCallId,\r\n };\r\n \r\n // If no callback, return marker immediately (extension handles via SSE interception)\r\n if (!waitForResult) {\r\n return JSON.stringify(marker);\r\n }\r\n \r\n // With callback: wait for actual results from extension\r\n // The marker is still returned initially via SSE, but we wait for the callback\r\n try {\r\n const result = await waitForResult(action, args, toolCallId);\r\n return formatResultForLLM(result, action);\r\n } catch (error) {\r\n const errorMessage = error instanceof Error ? error.message : String(error);\r\n return `Browser action \"${action}\" failed: ${errorMessage}`;\r\n }\r\n };\r\n };\r\n\r\n // browser_click\r\n tools.push(\r\n tool(\r\n createToolFunction('click'),\r\n {\r\n name: EBrowserTools.CLICK,\r\n description: `Click an element on the current web page by its index number.\r\nThe element list shows clickable items like: [0]<button>Submit</button> [1]<a href=\"/home\">Home</a>\r\nUse the index number in brackets to click that element.\r\nAfter clicking, you receive an updated element list showing the new page state.`,\r\n schema: BrowserClickSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_type\r\n tools.push(\r\n tool(\r\n createToolFunction('type'),\r\n {\r\n name: EBrowserTools.TYPE,\r\n description: `Type text into an input element on the page.\r\nFind the input element in the list by its index (e.g., [5]<input placeholder=\"Search\">).\r\nSet pressEnter: true to submit forms after typing.\r\nAfter typing, you receive an updated element list.`,\r\n schema: BrowserTypeSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_navigate\r\n tools.push(\r\n tool(\r\n createToolFunction('navigate'),\r\n {\r\n name: EBrowserTools.NAVIGATE,\r\n description: `Navigate to a URL. Always include the full URL with https://.\r\nAfter navigation, you receive the new page's element list.`,\r\n schema: BrowserNavigateSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_scroll\r\n tools.push(\r\n tool(\r\n createToolFunction('scroll'),\r\n {\r\n name: EBrowserTools.SCROLL,\r\n description: `Scroll the page to reveal more content.\r\nUse 'down' to scroll down, 'up' to scroll up.\r\nAfter scrolling, you receive an updated element list with newly visible elements.`,\r\n schema: BrowserScrollSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_extract\r\n tools.push(\r\n tool(\r\n createToolFunction('extract'),\r\n {\r\n name: EBrowserTools.EXTRACT,\r\n description: `Extract content from the current page.\r\nReturns page URL, title, and element list.`,\r\n schema: BrowserExtractSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_hover\r\n tools.push(\r\n tool(\r\n createToolFunction('hover'),\r\n {\r\n name: EBrowserTools.HOVER,\r\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\r\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\r\n schema: BrowserHoverSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_wait\r\n tools.push(\r\n tool(\r\n createToolFunction('wait'),\r\n {\r\n name: EBrowserTools.WAIT,\r\n description: `Wait for a specified duration for page content to load.\r\nUse this after actions that trigger async content loading.\r\nAfter waiting, you receive an updated element list.`,\r\n schema: BrowserWaitSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_back\r\n tools.push(\r\n tool(\r\n createToolFunction('back'),\r\n {\r\n name: EBrowserTools.BACK,\r\n description: `Go back to the previous page in browser history.\r\nAfter going back, you receive the previous page's element list.`,\r\n schema: BrowserBackSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_screenshot\r\n tools.push(\r\n tool(\r\n createToolFunction('screenshot'),\r\n {\r\n name: EBrowserTools.SCREENSHOT,\r\n description: `Capture a screenshot of the current page.\r\nReturns the page state with a note that screenshot was displayed to the user.\r\nUse browser_get_page_state to get the element list for automation.`,\r\n schema: BrowserScreenshotSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_get_page_state\r\n tools.push(\r\n tool(\r\n createToolFunction('get_page_state'),\r\n {\r\n name: EBrowserTools.GET_PAGE_STATE,\r\n description: `Get the current page state including URL, title, and all interactive elements.\r\nUse this at the start of a task to see what elements are available.\r\nReturns a text list of elements with their index numbers for interaction.`,\r\n schema: BrowserGetPageStateSchema,\r\n }\r\n )\r\n );\r\n\r\n return tools;\r\n}\r\n"],"names":["z","tools","tool"],"mappings":";;;;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAiC1C;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAAiE,EAAA;AACpG,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0FAA0F,CAAC;AACvH,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;IACjC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,oDAAoD,CAAC;IAChF,IAAI,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAEA,KAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,+DAA+D,CAAC;AAC7G,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAGA,KAAC,CAAC,MAAM,CAAC;IACrC,GAAG,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qDAAqD,CAAC;AAChF,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAEA,KAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,qBAAqB,CAAC;AAClF,IAAA,MAAM,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,iDAAiD,CAAC;AAC1F,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qDAAqD,CAAC;AAC7F,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAGA,KAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAEA,KAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;AACjF,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAGA,KAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAyB9C;;AAEG;AACH,SAAS,kBAAkB,CAAC,MAA2B,EAAE,MAAc,EAAA;IACrE,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,OAAO,mBAAmB,MAAM,CAAA,UAAA,EAAa,MAAM,CAAC,KAAK,EAAE;;IAG7D,MAAM,KAAK,GAAa,EAAE;AAE1B,IAAA,IAAI,MAAM,CAAC,GAAG,EAAE;QACd,KAAK,CAAC,IAAI,CAAC,CAAA,iBAAA,EAAoB,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;;AAE9C,IAAA,IAAI,MAAM,CAAC,KAAK,EAAE;QAChB,KAAK,CAAC,IAAI,CAAC,CAAA,gBAAA,EAAmB,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;;AAE/C,IAAA,IAAI,MAAM,CAAC,WAAW,EAAE;QACtB,KAAK,CAAC,IAAI,CAAC,CAAA,6BAAA,EAAgC,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;AAElE,IAAA,IAAI,MAAM,CAAC,UAAU,EAAE;AACrB,QAAA,KAAK,CAAC,IAAI,CAAC,CAAA,6CAAA,CAA+C,CAAC;;AAG7D,IAAA,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;QACtB,OAAO,CAAA,gBAAA,EAAmB,MAAM,CAAA,yBAAA,CAA2B;;AAG7D,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAAC,OAAmC,EAAA;AACpE,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAMC,OAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OAAO,IAA6B,EAAE,MAAuC,KAAqB;AACvG,YAAA,MAAM,UAAU,GAAG,MAAM,EAAE,QAAQ,EAAE,EAAE,IAAI,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,CAAA,CAAA,EAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAGtG,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;AAC5D,gBAAA,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;;YACzC,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;AAC3E,gBAAA,OAAO,CAAmB,gBAAA,EAAA,MAAM,CAAa,UAAA,EAAA,YAAY,EAAE;;AAE/D,SAAC;AACH,KAAC;;IAGDA,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,kBAAkB,CAAC,OAAO,CAAC,EAC3B;QACE,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;AAG2D,+EAAA,CAAA;AACxE,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,kBAAkB,CAAC,MAAM,CAAC,EAC1B;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAG8B,kDAAA,CAAA;AAC3C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,kBAAkB,CAAC,UAAU,CAAC,EAC9B;QACE,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;AACsC,0DAAA,CAAA;AACnD,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,kBAAkB,CAAC,QAAQ,CAAC,EAC5B;QACE,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAAA;;AAE6D,iFAAA,CAAA;AAC1E,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,kBAAkB,CAAC,SAAS,CAAC,EAC7B;QACE,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACsB,0CAAA,CAAA;AACnC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,kBAAkB,CAAC,OAAO,CAAC,EAC3B;QACE,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACiE,qFAAA,CAAA;AAC9E,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,kBAAkB,CAAC,MAAM,CAAC,EAC1B;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAE+B,mDAAA,CAAA;AAC5C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,kBAAkB,CAAC,MAAM,CAAC,EAC1B;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC2C,+DAAA,CAAA;AACxD,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,kBAAkB,CAAC,YAAY,CAAC,EAChC;QACE,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAA;;AAE8C,kEAAA,CAAA;AAC3D,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CACF,CACF;;IAGDD,OAAK,CAAC,IAAI,CACRC,UAAI,CACF,kBAAkB,CAAC,gBAAgB,CAAC,EACpC;QACE,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAA;;AAEqD,yEAAA,CAAA;AAClE,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CACF,CACF;AAED,IAAA,OAAOD,OAAK;AACd;;;;;;"}
|
|
@@ -60,25 +60,79 @@ const BrowserBackSchema = z.object({});
|
|
|
60
60
|
const BrowserScreenshotSchema = z.object({});
|
|
61
61
|
const BrowserGetPageStateSchema = z.object({});
|
|
62
62
|
/**
|
|
63
|
-
*
|
|
64
|
-
|
|
65
|
-
|
|
63
|
+
* Format browser action result for LLM consumption
|
|
64
|
+
*/
|
|
65
|
+
function formatResultForLLM(result, action) {
|
|
66
|
+
if (!result.success && result.error) {
|
|
67
|
+
return `Browser action "${action}" failed: ${result.error}`;
|
|
68
|
+
}
|
|
69
|
+
const parts = [];
|
|
70
|
+
if (result.url) {
|
|
71
|
+
parts.push(`**Current URL:** ${result.url}`);
|
|
72
|
+
}
|
|
73
|
+
if (result.title) {
|
|
74
|
+
parts.push(`**Page Title:** ${result.title}`);
|
|
75
|
+
}
|
|
76
|
+
if (result.elementList) {
|
|
77
|
+
parts.push(`\n**Interactive Elements:**\n${result.elementList}`);
|
|
78
|
+
}
|
|
79
|
+
if (result.screenshot) {
|
|
80
|
+
parts.push(`\n[Screenshot captured and displayed to user]`);
|
|
81
|
+
}
|
|
82
|
+
if (parts.length === 0) {
|
|
83
|
+
return `Browser action "${action}" completed successfully.`;
|
|
84
|
+
}
|
|
85
|
+
return parts.join('\n');
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Create browser tools with optional callback for waiting on results
|
|
89
|
+
*
|
|
90
|
+
* When waitForResult callback is provided:
|
|
91
|
+
* 1. Tool returns marker that triggers extension
|
|
92
|
+
* 2. Tool then awaits callback to get actual results
|
|
93
|
+
* 3. Returns real page state to LLM
|
|
94
|
+
*
|
|
95
|
+
* When no callback:
|
|
96
|
+
* 1. Tool returns marker only (for non-server contexts)
|
|
66
97
|
*
|
|
67
98
|
* NOTE: These tools use TEXT-BASED element lists, NOT screenshots
|
|
68
99
|
* Screenshots would be 100K+ tokens each - element lists are ~100 tokens
|
|
69
100
|
*/
|
|
70
|
-
function createBrowserTools() {
|
|
101
|
+
function createBrowserTools(options) {
|
|
102
|
+
const { waitForResult } = options || {};
|
|
71
103
|
const tools = [];
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
104
|
+
/**
|
|
105
|
+
* Helper to create tool function that optionally waits for results
|
|
106
|
+
* The toolCallId is extracted from the RunnableConfig passed by LangChain
|
|
107
|
+
*/
|
|
108
|
+
const createToolFunction = (action) => {
|
|
109
|
+
return async (args, config) => {
|
|
110
|
+
const toolCallId = config?.toolCall?.id || `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;
|
|
111
|
+
// Create marker for extension
|
|
112
|
+
const marker = {
|
|
113
|
+
requiresBrowserExecution: true,
|
|
114
|
+
action,
|
|
115
|
+
args,
|
|
116
|
+
toolCallId,
|
|
117
|
+
};
|
|
118
|
+
// If no callback, return marker immediately (extension handles via SSE interception)
|
|
119
|
+
if (!waitForResult) {
|
|
120
|
+
return JSON.stringify(marker);
|
|
121
|
+
}
|
|
122
|
+
// With callback: wait for actual results from extension
|
|
123
|
+
// The marker is still returned initially via SSE, but we wait for the callback
|
|
124
|
+
try {
|
|
125
|
+
const result = await waitForResult(action, args, toolCallId);
|
|
126
|
+
return formatResultForLLM(result, action);
|
|
127
|
+
}
|
|
128
|
+
catch (error) {
|
|
129
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
130
|
+
return `Browser action "${action}" failed: ${errorMessage}`;
|
|
131
|
+
}
|
|
79
132
|
};
|
|
80
|
-
|
|
81
|
-
|
|
133
|
+
};
|
|
134
|
+
// browser_click
|
|
135
|
+
tools.push(tool(createToolFunction('click'), {
|
|
82
136
|
name: EBrowserTools.CLICK,
|
|
83
137
|
description: `Click an element on the current web page by its index number.
|
|
84
138
|
The element list shows clickable items like: [0]<button>Submit</button> [1]<a href="/home">Home</a>
|
|
@@ -87,14 +141,7 @@ After clicking, you receive an updated element list showing the new page state.`
|
|
|
87
141
|
schema: BrowserClickSchema,
|
|
88
142
|
}));
|
|
89
143
|
// browser_type
|
|
90
|
-
tools.push(tool(
|
|
91
|
-
const response = {
|
|
92
|
-
requiresBrowserExecution: true,
|
|
93
|
-
action: 'type',
|
|
94
|
-
args,
|
|
95
|
-
};
|
|
96
|
-
return JSON.stringify(response);
|
|
97
|
-
}, {
|
|
144
|
+
tools.push(tool(createToolFunction('type'), {
|
|
98
145
|
name: EBrowserTools.TYPE,
|
|
99
146
|
description: `Type text into an input element on the page.
|
|
100
147
|
Find the input element in the list by its index (e.g., [5]<input placeholder="Search">).
|
|
@@ -103,28 +150,14 @@ After typing, you receive an updated element list.`,
|
|
|
103
150
|
schema: BrowserTypeSchema,
|
|
104
151
|
}));
|
|
105
152
|
// browser_navigate
|
|
106
|
-
tools.push(tool(
|
|
107
|
-
const response = {
|
|
108
|
-
requiresBrowserExecution: true,
|
|
109
|
-
action: 'navigate',
|
|
110
|
-
args,
|
|
111
|
-
};
|
|
112
|
-
return JSON.stringify(response);
|
|
113
|
-
}, {
|
|
153
|
+
tools.push(tool(createToolFunction('navigate'), {
|
|
114
154
|
name: EBrowserTools.NAVIGATE,
|
|
115
155
|
description: `Navigate to a URL. Always include the full URL with https://.
|
|
116
156
|
After navigation, you receive the new page's element list.`,
|
|
117
157
|
schema: BrowserNavigateSchema,
|
|
118
158
|
}));
|
|
119
159
|
// browser_scroll
|
|
120
|
-
tools.push(tool(
|
|
121
|
-
const response = {
|
|
122
|
-
requiresBrowserExecution: true,
|
|
123
|
-
action: 'scroll',
|
|
124
|
-
args,
|
|
125
|
-
};
|
|
126
|
-
return JSON.stringify(response);
|
|
127
|
-
}, {
|
|
160
|
+
tools.push(tool(createToolFunction('scroll'), {
|
|
128
161
|
name: EBrowserTools.SCROLL,
|
|
129
162
|
description: `Scroll the page to reveal more content.
|
|
130
163
|
Use 'down' to scroll down, 'up' to scroll up.
|
|
@@ -132,42 +165,21 @@ After scrolling, you receive an updated element list with newly visible elements
|
|
|
132
165
|
schema: BrowserScrollSchema,
|
|
133
166
|
}));
|
|
134
167
|
// browser_extract
|
|
135
|
-
tools.push(tool(
|
|
136
|
-
const response = {
|
|
137
|
-
requiresBrowserExecution: true,
|
|
138
|
-
action: 'extract',
|
|
139
|
-
args,
|
|
140
|
-
};
|
|
141
|
-
return JSON.stringify(response);
|
|
142
|
-
}, {
|
|
168
|
+
tools.push(tool(createToolFunction('extract'), {
|
|
143
169
|
name: EBrowserTools.EXTRACT,
|
|
144
170
|
description: `Extract content from the current page.
|
|
145
171
|
Returns page URL, title, and element list.`,
|
|
146
172
|
schema: BrowserExtractSchema,
|
|
147
173
|
}));
|
|
148
174
|
// browser_hover
|
|
149
|
-
tools.push(tool(
|
|
150
|
-
const response = {
|
|
151
|
-
requiresBrowserExecution: true,
|
|
152
|
-
action: 'hover',
|
|
153
|
-
args,
|
|
154
|
-
};
|
|
155
|
-
return JSON.stringify(response);
|
|
156
|
-
}, {
|
|
175
|
+
tools.push(tool(createToolFunction('hover'), {
|
|
157
176
|
name: EBrowserTools.HOVER,
|
|
158
177
|
description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.
|
|
159
178
|
After hovering, you receive an updated element list with any newly revealed elements.`,
|
|
160
179
|
schema: BrowserHoverSchema,
|
|
161
180
|
}));
|
|
162
181
|
// browser_wait
|
|
163
|
-
tools.push(tool(
|
|
164
|
-
const response = {
|
|
165
|
-
requiresBrowserExecution: true,
|
|
166
|
-
action: 'wait',
|
|
167
|
-
args,
|
|
168
|
-
};
|
|
169
|
-
return JSON.stringify(response);
|
|
170
|
-
}, {
|
|
182
|
+
tools.push(tool(createToolFunction('wait'), {
|
|
171
183
|
name: EBrowserTools.WAIT,
|
|
172
184
|
description: `Wait for a specified duration for page content to load.
|
|
173
185
|
Use this after actions that trigger async content loading.
|
|
@@ -175,47 +187,26 @@ After waiting, you receive an updated element list.`,
|
|
|
175
187
|
schema: BrowserWaitSchema,
|
|
176
188
|
}));
|
|
177
189
|
// browser_back
|
|
178
|
-
tools.push(tool(
|
|
179
|
-
const response = {
|
|
180
|
-
requiresBrowserExecution: true,
|
|
181
|
-
action: 'back',
|
|
182
|
-
args,
|
|
183
|
-
};
|
|
184
|
-
return JSON.stringify(response);
|
|
185
|
-
}, {
|
|
190
|
+
tools.push(tool(createToolFunction('back'), {
|
|
186
191
|
name: EBrowserTools.BACK,
|
|
187
192
|
description: `Go back to the previous page in browser history.
|
|
188
193
|
After going back, you receive the previous page's element list.`,
|
|
189
194
|
schema: BrowserBackSchema,
|
|
190
195
|
}));
|
|
191
196
|
// browser_screenshot
|
|
192
|
-
tools.push(tool(
|
|
193
|
-
const response = {
|
|
194
|
-
requiresBrowserExecution: true,
|
|
195
|
-
action: 'screenshot',
|
|
196
|
-
args,
|
|
197
|
-
};
|
|
198
|
-
return JSON.stringify(response);
|
|
199
|
-
}, {
|
|
197
|
+
tools.push(tool(createToolFunction('screenshot'), {
|
|
200
198
|
name: EBrowserTools.SCREENSHOT,
|
|
201
199
|
description: `Capture a screenshot of the current page.
|
|
202
|
-
|
|
203
|
-
Use browser_get_page_state
|
|
200
|
+
Returns the page state with a note that screenshot was displayed to the user.
|
|
201
|
+
Use browser_get_page_state to get the element list for automation.`,
|
|
204
202
|
schema: BrowserScreenshotSchema,
|
|
205
203
|
}));
|
|
206
204
|
// browser_get_page_state
|
|
207
|
-
tools.push(tool(
|
|
208
|
-
const response = {
|
|
209
|
-
requiresBrowserExecution: true,
|
|
210
|
-
action: 'get_page_state',
|
|
211
|
-
args,
|
|
212
|
-
};
|
|
213
|
-
return JSON.stringify(response);
|
|
214
|
-
}, {
|
|
205
|
+
tools.push(tool(createToolFunction('get_page_state'), {
|
|
215
206
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
216
207
|
description: `Get the current page state including URL, title, and all interactive elements.
|
|
217
208
|
Use this at the start of a task to see what elements are available.
|
|
218
|
-
Returns a text list of elements with their index numbers.`,
|
|
209
|
+
Returns a text list of elements with their index numbers for interaction.`,
|
|
219
210
|
schema: BrowserGetPageStateSchema,
|
|
220
211
|
}));
|
|
221
212
|
return tools;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"BrowserTools.mjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\r\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\r\nimport type * as t from '@/types';\r\n\r\n/**\r\n * Browser tool names - keep in sync with ranger-browser extension\r\n * These tools execute locally in the browser extension, NOT on the server\r\n */\r\nexport const EBrowserTools = {\r\n CLICK: 'browser_click',\r\n TYPE: 'browser_type',\r\n NAVIGATE: 'browser_navigate',\r\n SCROLL: 'browser_scroll',\r\n EXTRACT: 'browser_extract',\r\n HOVER: 'browser_hover',\r\n WAIT: 'browser_wait',\r\n BACK: 'browser_back',\r\n SCREENSHOT: 'browser_screenshot',\r\n GET_PAGE_STATE: 'browser_get_page_state',\r\n} as const;\r\n\r\nexport type BrowserToolName = typeof EBrowserTools[keyof typeof EBrowserTools];\r\n\r\n/**\r\n * Check if browser capability is available based on request headers or context\r\n * The browser extension sets these headers when connected:\r\n * - X-Ranger-Browser-Extension: true\r\n * - X-Ranger-Browser-Capable: true\r\n */\r\nexport function hasBrowserCapability(req?: { headers?: Record<string, string | string[] | undefined> }): boolean {\r\n if (!req?.headers) {\r\n return false;\r\n }\r\n \r\n const browserExtension = req.headers['x-ranger-browser-extension'];\r\n const browserCapable = req.headers['x-ranger-browser-capable'];\r\n \r\n return browserExtension === 'true' || browserCapable === 'true';\r\n}\r\n\r\n// Tool schemas\r\nconst BrowserClickSchema = z.object({\r\n index: z.number().describe('The index number [0], [1], etc. of the element to click from the page state element list'),\r\n});\r\n\r\nconst BrowserTypeSchema = z.object({\r\n index: z.number().describe('The index number of the input element to type into'),\r\n text: z.string().describe('The text to type into the element'),\r\n pressEnter: z.boolean().optional().describe('Whether to press Enter after typing (useful for search forms)'),\r\n});\r\n\r\nconst BrowserNavigateSchema = z.object({\r\n url: z.string().describe('The full URL to navigate to (must include https://)'),\r\n});\r\n\r\nconst BrowserScrollSchema = z.object({\r\n direction: z.enum(['up', 'down', 'left', 'right']).describe('Direction to scroll'),\r\n amount: z.number().optional().describe('Pixels to scroll (default: one viewport height)'),\r\n});\r\n\r\nconst BrowserExtractSchema = z.object({\r\n query: z.string().optional().describe('Optional: specific content to extract from the page'),\r\n});\r\n\r\nconst BrowserHoverSchema = z.object({\r\n index: z.number().describe('The index number of the element to hover over'),\r\n});\r\n\r\nconst BrowserWaitSchema = z.object({\r\n duration: z.number().optional().describe('Milliseconds to wait (default: 1000)'),\r\n});\r\n\r\nconst BrowserBackSchema = z.object({});\r\n\r\nconst BrowserScreenshotSchema = z.object({});\r\n\r\nconst BrowserGetPageStateSchema = z.object({});\r\n\r\n/**\r\n * Browser tool response interface\r\n * This is what the extension returns after executing the action\r\n */\r\nexport interface BrowserToolResponse {\r\n requiresBrowserExecution: true;\r\n action: string;\r\n args: Record<string, unknown>;\r\n}\r\n\r\n/**\r\n * Create placeholder browser tools that signal to the extension to execute locally\r\n * The server creates these tools to give the LLM the schema, but actual execution\r\n * happens in the browser extension which intercepts tool calls\r\n * \r\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\r\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\r\n */\r\nexport function createBrowserTools(): DynamicStructuredTool[] {\r\n const tools: DynamicStructuredTool[] = [];\r\n\r\n // browser_click\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n // This returns a marker that tells the extension to execute locally\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'click',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.CLICK,\r\n description: `Click an element on the current web page by its index number.\r\nThe element list shows clickable items like: [0]<button>Submit</button> [1]<a href=\"/home\">Home</a>\r\nUse the index number in brackets to click that element.\r\nAfter clicking, you receive an updated element list showing the new page state.`,\r\n schema: BrowserClickSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_type\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'type',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.TYPE,\r\n description: `Type text into an input element on the page.\r\nFind the input element in the list by its index (e.g., [5]<input placeholder=\"Search\">).\r\nSet pressEnter: true to submit forms after typing.\r\nAfter typing, you receive an updated element list.`,\r\n schema: BrowserTypeSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_navigate\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'navigate',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.NAVIGATE,\r\n description: `Navigate to a URL. Always include the full URL with https://.\r\nAfter navigation, you receive the new page's element list.`,\r\n schema: BrowserNavigateSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_scroll\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'scroll',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.SCROLL,\r\n description: `Scroll the page to reveal more content.\r\nUse 'down' to scroll down, 'up' to scroll up.\r\nAfter scrolling, you receive an updated element list with newly visible elements.`,\r\n schema: BrowserScrollSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_extract\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'extract',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.EXTRACT,\r\n description: `Extract content from the current page.\r\nReturns page URL, title, and element list.`,\r\n schema: BrowserExtractSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_hover\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'hover',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.HOVER,\r\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\r\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\r\n schema: BrowserHoverSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_wait\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'wait',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.WAIT,\r\n description: `Wait for a specified duration for page content to load.\r\nUse this after actions that trigger async content loading.\r\nAfter waiting, you receive an updated element list.`,\r\n schema: BrowserWaitSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_back\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'back',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.BACK,\r\n description: `Go back to the previous page in browser history.\r\nAfter going back, you receive the previous page's element list.`,\r\n schema: BrowserBackSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_screenshot\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'screenshot',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.SCREENSHOT,\r\n description: `Capture a screenshot of the current page.\r\nNOTE: Screenshot is displayed to the USER only, not returned to you.\r\nUse browser_get_page_state instead to get the element list.`,\r\n schema: BrowserScreenshotSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_get_page_state\r\n tools.push(\r\n tool(\r\n async (args): Promise<string> => {\r\n const response: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action: 'get_page_state',\r\n args,\r\n };\r\n return JSON.stringify(response);\r\n },\r\n {\r\n name: EBrowserTools.GET_PAGE_STATE,\r\n description: `Get the current page state including URL, title, and all interactive elements.\r\nUse this at the start of a task to see what elements are available.\r\nReturns a text list of elements with their index numbers.`,\r\n schema: BrowserGetPageStateSchema,\r\n }\r\n )\r\n );\r\n\r\n return tools;\r\n}\r\n"],"names":[],"mappings":";;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAK1C;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAAiE,EAAA;AACpG,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0FAA0F,CAAC;AACvH,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACjC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,oDAAoD,CAAC;IAChF,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,+DAA+D,CAAC;AAC7G,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qDAAqD,CAAC;AAChF,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,qBAAqB,CAAC;AAClF,IAAA,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,iDAAiD,CAAC;AAC1F,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qDAAqD,CAAC;AAC7F,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;AACjF,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAY9C;;;;;;;AAOG;SACa,kBAAkB,GAAA;IAChC,MAAM,KAAK,GAA4B,EAAE;;IAGzC,KAAK,CAAC,IAAI,CACR,IAAI,CACF,OAAO,IAAI,KAAqB;;AAE9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,OAAO;YACf,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;AAG2D,+EAAA,CAAA;AACxE,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,MAAM;YACd,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAG8B,kDAAA,CAAA;AAC3C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,UAAU;YAClB,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;AACsC,0DAAA,CAAA;AACnD,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,QAAQ;YAChB,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAAA;;AAE6D,iFAAA,CAAA;AAC1E,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,SAAS;YACjB,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACsB,0CAAA,CAAA;AACnC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,OAAO;YACf,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACiE,qFAAA,CAAA;AAC9E,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,MAAM;YACd,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAE+B,mDAAA,CAAA;AAC5C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,MAAM;YACd,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC2C,+DAAA,CAAA;AACxD,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,YAAY;YACpB,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAA;;AAEuC,2DAAA,CAAA;AACpD,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,OAAO,IAAI,KAAqB;AAC9B,QAAA,MAAM,QAAQ,GAAwB;AACpC,YAAA,wBAAwB,EAAE,IAAI;AAC9B,YAAA,MAAM,EAAE,gBAAgB;YACxB,IAAI;SACL;AACD,QAAA,OAAO,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC;AACjC,KAAC,EACD;QACE,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAA;;AAEqC,yDAAA,CAAA;AAClD,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CACF,CACF;AAED,IAAA,OAAO,KAAK;AACd;;;;"}
|
|
1
|
+
{"version":3,"file":"BrowserTools.mjs","sources":["../../../src/tools/BrowserTools.ts"],"sourcesContent":["import { z } from 'zod';\r\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\r\nimport type * as t from '@/types';\r\n\r\n/**\r\n * Browser tool names - keep in sync with ranger-browser extension\r\n * These tools execute locally in the browser extension, NOT on the server\r\n */\r\nexport const EBrowserTools = {\r\n CLICK: 'browser_click',\r\n TYPE: 'browser_type',\r\n NAVIGATE: 'browser_navigate',\r\n SCROLL: 'browser_scroll',\r\n EXTRACT: 'browser_extract',\r\n HOVER: 'browser_hover',\r\n WAIT: 'browser_wait',\r\n BACK: 'browser_back',\r\n SCREENSHOT: 'browser_screenshot',\r\n GET_PAGE_STATE: 'browser_get_page_state',\r\n} as const;\r\n\r\nexport type BrowserToolName = typeof EBrowserTools[keyof typeof EBrowserTools];\r\n\r\n/**\r\n * Callback function type for waiting on browser action results\r\n * This allows the server (Ranger) to provide a callback that waits for the extension\r\n * to POST results back to the server before returning to the LLM.\r\n * \r\n * @param action - The browser action (click, type, navigate, etc.)\r\n * @param args - Arguments for the action\r\n * @param toolCallId - Unique ID for this tool call (from config.toolCall.id)\r\n * @returns Promise that resolves with the actual browser result (page state, etc.)\r\n */\r\nexport type BrowserToolCallback = (\r\n action: string,\r\n args: Record<string, unknown>,\r\n toolCallId: string\r\n) => Promise<BrowserActionResult>;\r\n\r\n/**\r\n * Result returned from browser action execution\r\n */\r\nexport interface BrowserActionResult {\r\n success: boolean;\r\n url?: string;\r\n title?: string;\r\n elementList?: string; // Text-based element list\r\n error?: string;\r\n screenshot?: string; // Base64 screenshot (if requested)\r\n}\r\n\r\n/**\r\n * Check if browser capability is available based on request headers or context\r\n * The browser extension sets these headers when connected:\r\n * - X-Ranger-Browser-Extension: true\r\n * - X-Ranger-Browser-Capable: true\r\n */\r\nexport function hasBrowserCapability(req?: { headers?: Record<string, string | string[] | undefined> }): boolean {\r\n if (!req?.headers) {\r\n return false;\r\n }\r\n \r\n const browserExtension = req.headers['x-ranger-browser-extension'];\r\n const browserCapable = req.headers['x-ranger-browser-capable'];\r\n \r\n return browserExtension === 'true' || browserCapable === 'true';\r\n}\r\n\r\n// Tool schemas\r\nconst BrowserClickSchema = z.object({\r\n index: z.number().describe('The index number [0], [1], etc. of the element to click from the page state element list'),\r\n});\r\n\r\nconst BrowserTypeSchema = z.object({\r\n index: z.number().describe('The index number of the input element to type into'),\r\n text: z.string().describe('The text to type into the element'),\r\n pressEnter: z.boolean().optional().describe('Whether to press Enter after typing (useful for search forms)'),\r\n});\r\n\r\nconst BrowserNavigateSchema = z.object({\r\n url: z.string().describe('The full URL to navigate to (must include https://)'),\r\n});\r\n\r\nconst BrowserScrollSchema = z.object({\r\n direction: z.enum(['up', 'down', 'left', 'right']).describe('Direction to scroll'),\r\n amount: z.number().optional().describe('Pixels to scroll (default: one viewport height)'),\r\n});\r\n\r\nconst BrowserExtractSchema = z.object({\r\n query: z.string().optional().describe('Optional: specific content to extract from the page'),\r\n});\r\n\r\nconst BrowserHoverSchema = z.object({\r\n index: z.number().describe('The index number of the element to hover over'),\r\n});\r\n\r\nconst BrowserWaitSchema = z.object({\r\n duration: z.number().optional().describe('Milliseconds to wait (default: 1000)'),\r\n});\r\n\r\nconst BrowserBackSchema = z.object({});\r\n\r\nconst BrowserScreenshotSchema = z.object({});\r\n\r\nconst BrowserGetPageStateSchema = z.object({});\r\n\r\n/**\r\n * Browser tool response interface\r\n * This is what the extension returns after executing the action\r\n */\r\nexport interface BrowserToolResponse {\r\n requiresBrowserExecution: true;\r\n action: string;\r\n args: Record<string, unknown>;\r\n toolCallId?: string; // Added to help extension correlate with callback\r\n}\r\n\r\n/**\r\n * Options for creating browser tools\r\n */\r\nexport interface CreateBrowserToolsOptions {\r\n /**\r\n * Optional callback that waits for browser action results.\r\n * When provided, tools will await this callback to get actual results from the extension.\r\n * When not provided, tools return markers immediately (for non-server contexts).\r\n */\r\n waitForResult?: BrowserToolCallback;\r\n}\r\n\r\n/**\r\n * Format browser action result for LLM consumption\r\n */\r\nfunction formatResultForLLM(result: BrowserActionResult, action: string): string {\r\n if (!result.success && result.error) {\r\n return `Browser action \"${action}\" failed: ${result.error}`;\r\n }\r\n\r\n const parts: string[] = [];\r\n \r\n if (result.url) {\r\n parts.push(`**Current URL:** ${result.url}`);\r\n }\r\n if (result.title) {\r\n parts.push(`**Page Title:** ${result.title}`);\r\n }\r\n if (result.elementList) {\r\n parts.push(`\\n**Interactive Elements:**\\n${result.elementList}`);\r\n }\r\n if (result.screenshot) {\r\n parts.push(`\\n[Screenshot captured and displayed to user]`);\r\n }\r\n \r\n if (parts.length === 0) {\r\n return `Browser action \"${action}\" completed successfully.`;\r\n }\r\n \r\n return parts.join('\\n');\r\n}\r\n\r\n/**\r\n * Create browser tools with optional callback for waiting on results\r\n * \r\n * When waitForResult callback is provided:\r\n * 1. Tool returns marker that triggers extension\r\n * 2. Tool then awaits callback to get actual results\r\n * 3. Returns real page state to LLM\r\n * \r\n * When no callback:\r\n * 1. Tool returns marker only (for non-server contexts)\r\n * \r\n * NOTE: These tools use TEXT-BASED element lists, NOT screenshots\r\n * Screenshots would be 100K+ tokens each - element lists are ~100 tokens\r\n */\r\nexport function createBrowserTools(options?: CreateBrowserToolsOptions): DynamicStructuredTool[] {\r\n const { waitForResult } = options || {};\r\n const tools: DynamicStructuredTool[] = [];\r\n\r\n /**\r\n * Helper to create tool function that optionally waits for results\r\n * The toolCallId is extracted from the RunnableConfig passed by LangChain\r\n */\r\n const createToolFunction = (action: string) => {\r\n return async (args: Record<string, unknown>, config?: { toolCall?: { id?: string } }): Promise<string> => {\r\n const toolCallId = config?.toolCall?.id || `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;\r\n \r\n // Create marker for extension\r\n const marker: BrowserToolResponse = {\r\n requiresBrowserExecution: true,\r\n action,\r\n args,\r\n toolCallId,\r\n };\r\n \r\n // If no callback, return marker immediately (extension handles via SSE interception)\r\n if (!waitForResult) {\r\n return JSON.stringify(marker);\r\n }\r\n \r\n // With callback: wait for actual results from extension\r\n // The marker is still returned initially via SSE, but we wait for the callback\r\n try {\r\n const result = await waitForResult(action, args, toolCallId);\r\n return formatResultForLLM(result, action);\r\n } catch (error) {\r\n const errorMessage = error instanceof Error ? error.message : String(error);\r\n return `Browser action \"${action}\" failed: ${errorMessage}`;\r\n }\r\n };\r\n };\r\n\r\n // browser_click\r\n tools.push(\r\n tool(\r\n createToolFunction('click'),\r\n {\r\n name: EBrowserTools.CLICK,\r\n description: `Click an element on the current web page by its index number.\r\nThe element list shows clickable items like: [0]<button>Submit</button> [1]<a href=\"/home\">Home</a>\r\nUse the index number in brackets to click that element.\r\nAfter clicking, you receive an updated element list showing the new page state.`,\r\n schema: BrowserClickSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_type\r\n tools.push(\r\n tool(\r\n createToolFunction('type'),\r\n {\r\n name: EBrowserTools.TYPE,\r\n description: `Type text into an input element on the page.\r\nFind the input element in the list by its index (e.g., [5]<input placeholder=\"Search\">).\r\nSet pressEnter: true to submit forms after typing.\r\nAfter typing, you receive an updated element list.`,\r\n schema: BrowserTypeSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_navigate\r\n tools.push(\r\n tool(\r\n createToolFunction('navigate'),\r\n {\r\n name: EBrowserTools.NAVIGATE,\r\n description: `Navigate to a URL. Always include the full URL with https://.\r\nAfter navigation, you receive the new page's element list.`,\r\n schema: BrowserNavigateSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_scroll\r\n tools.push(\r\n tool(\r\n createToolFunction('scroll'),\r\n {\r\n name: EBrowserTools.SCROLL,\r\n description: `Scroll the page to reveal more content.\r\nUse 'down' to scroll down, 'up' to scroll up.\r\nAfter scrolling, you receive an updated element list with newly visible elements.`,\r\n schema: BrowserScrollSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_extract\r\n tools.push(\r\n tool(\r\n createToolFunction('extract'),\r\n {\r\n name: EBrowserTools.EXTRACT,\r\n description: `Extract content from the current page.\r\nReturns page URL, title, and element list.`,\r\n schema: BrowserExtractSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_hover\r\n tools.push(\r\n tool(\r\n createToolFunction('hover'),\r\n {\r\n name: EBrowserTools.HOVER,\r\n description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.\r\nAfter hovering, you receive an updated element list with any newly revealed elements.`,\r\n schema: BrowserHoverSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_wait\r\n tools.push(\r\n tool(\r\n createToolFunction('wait'),\r\n {\r\n name: EBrowserTools.WAIT,\r\n description: `Wait for a specified duration for page content to load.\r\nUse this after actions that trigger async content loading.\r\nAfter waiting, you receive an updated element list.`,\r\n schema: BrowserWaitSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_back\r\n tools.push(\r\n tool(\r\n createToolFunction('back'),\r\n {\r\n name: EBrowserTools.BACK,\r\n description: `Go back to the previous page in browser history.\r\nAfter going back, you receive the previous page's element list.`,\r\n schema: BrowserBackSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_screenshot\r\n tools.push(\r\n tool(\r\n createToolFunction('screenshot'),\r\n {\r\n name: EBrowserTools.SCREENSHOT,\r\n description: `Capture a screenshot of the current page.\r\nReturns the page state with a note that screenshot was displayed to the user.\r\nUse browser_get_page_state to get the element list for automation.`,\r\n schema: BrowserScreenshotSchema,\r\n }\r\n )\r\n );\r\n\r\n // browser_get_page_state\r\n tools.push(\r\n tool(\r\n createToolFunction('get_page_state'),\r\n {\r\n name: EBrowserTools.GET_PAGE_STATE,\r\n description: `Get the current page state including URL, title, and all interactive elements.\r\nUse this at the start of a task to see what elements are available.\r\nReturns a text list of elements with their index numbers for interaction.`,\r\n schema: BrowserGetPageStateSchema,\r\n }\r\n )\r\n );\r\n\r\n return tools;\r\n}\r\n"],"names":[],"mappings":";;;AAIA;;;AAGG;AACU,MAAA,aAAa,GAAG;AAC3B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAiC1C;;;;;AAKG;AACG,SAAU,oBAAoB,CAAC,GAAiE,EAAA;AACpG,IAAA,IAAI,CAAC,GAAG,EAAE,OAAO,EAAE;AACjB,QAAA,OAAO,KAAK;;IAGd,MAAM,gBAAgB,GAAG,GAAG,CAAC,OAAO,CAAC,4BAA4B,CAAC;IAClE,MAAM,cAAc,GAAG,GAAG,CAAC,OAAO,CAAC,0BAA0B,CAAC;AAE9D,IAAA,OAAO,gBAAgB,KAAK,MAAM,IAAI,cAAc,KAAK,MAAM;AACjE;AAEA;AACA,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,0FAA0F,CAAC;AACvH,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACjC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,oDAAoD,CAAC;IAChF,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC9D,IAAA,UAAU,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,+DAA+D,CAAC;AAC7G,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,qDAAqD,CAAC;AAChF,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,qBAAqB,CAAC;AAClF,IAAA,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,iDAAiD,CAAC;AAC1F,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,qDAAqD,CAAC;AAC7F,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,+CAA+C,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;AACjF,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAEtC,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAE5C,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC,EAAE,CAAC;AAyB9C;;AAEG;AACH,SAAS,kBAAkB,CAAC,MAA2B,EAAE,MAAc,EAAA;IACrE,IAAI,CAAC,MAAM,CAAC,OAAO,IAAI,MAAM,CAAC,KAAK,EAAE;AACnC,QAAA,OAAO,mBAAmB,MAAM,CAAA,UAAA,EAAa,MAAM,CAAC,KAAK,EAAE;;IAG7D,MAAM,KAAK,GAAa,EAAE;AAE1B,IAAA,IAAI,MAAM,CAAC,GAAG,EAAE;QACd,KAAK,CAAC,IAAI,CAAC,CAAA,iBAAA,EAAoB,MAAM,CAAC,GAAG,CAAE,CAAA,CAAC;;AAE9C,IAAA,IAAI,MAAM,CAAC,KAAK,EAAE;QAChB,KAAK,CAAC,IAAI,CAAC,CAAA,gBAAA,EAAmB,MAAM,CAAC,KAAK,CAAE,CAAA,CAAC;;AAE/C,IAAA,IAAI,MAAM,CAAC,WAAW,EAAE;QACtB,KAAK,CAAC,IAAI,CAAC,CAAA,6BAAA,EAAgC,MAAM,CAAC,WAAW,CAAE,CAAA,CAAC;;AAElE,IAAA,IAAI,MAAM,CAAC,UAAU,EAAE;AACrB,QAAA,KAAK,CAAC,IAAI,CAAC,CAAA,6CAAA,CAA+C,CAAC;;AAG7D,IAAA,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE;QACtB,OAAO,CAAA,gBAAA,EAAmB,MAAM,CAAA,yBAAA,CAA2B;;AAG7D,IAAA,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC;AACzB;AAEA;;;;;;;;;;;;;AAaG;AACG,SAAU,kBAAkB,CAAC,OAAmC,EAAA;AACpE,IAAA,MAAM,EAAE,aAAa,EAAE,GAAG,OAAO,IAAI,EAAE;IACvC,MAAM,KAAK,GAA4B,EAAE;AAEzC;;;AAGG;AACH,IAAA,MAAM,kBAAkB,GAAG,CAAC,MAAc,KAAI;AAC5C,QAAA,OAAO,OAAO,IAA6B,EAAE,MAAuC,KAAqB;AACvG,YAAA,MAAM,UAAU,GAAG,MAAM,EAAE,QAAQ,EAAE,EAAE,IAAI,CAAQ,KAAA,EAAA,IAAI,CAAC,GAAG,EAAE,CAAA,CAAA,EAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE;;AAGtG,YAAA,MAAM,MAAM,GAAwB;AAClC,gBAAA,wBAAwB,EAAE,IAAI;gBAC9B,MAAM;gBACN,IAAI;gBACJ,UAAU;aACX;;YAGD,IAAI,CAAC,aAAa,EAAE;AAClB,gBAAA,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC;;;;AAK/B,YAAA,IAAI;gBACF,MAAM,MAAM,GAAG,MAAM,aAAa,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,CAAC;AAC5D,gBAAA,OAAO,kBAAkB,CAAC,MAAM,EAAE,MAAM,CAAC;;YACzC,OAAO,KAAK,EAAE;AACd,gBAAA,MAAM,YAAY,GAAG,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;AAC3E,gBAAA,OAAO,CAAmB,gBAAA,EAAA,MAAM,CAAa,UAAA,EAAA,YAAY,EAAE;;AAE/D,SAAC;AACH,KAAC;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,kBAAkB,CAAC,OAAO,CAAC,EAC3B;QACE,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;;;AAG2D,+EAAA,CAAA;AACxE,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,kBAAkB,CAAC,MAAM,CAAC,EAC1B;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAG8B,kDAAA,CAAA;AAC3C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,kBAAkB,CAAC,UAAU,CAAC,EAC9B;QACE,IAAI,EAAE,aAAa,CAAC,QAAQ;AAC5B,QAAA,WAAW,EAAE,CAAA;AACsC,0DAAA,CAAA;AACnD,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,kBAAkB,CAAC,QAAQ,CAAC,EAC5B;QACE,IAAI,EAAE,aAAa,CAAC,MAAM;AAC1B,QAAA,WAAW,EAAE,CAAA;;AAE6D,iFAAA,CAAA;AAC1E,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,kBAAkB,CAAC,SAAS,CAAC,EAC7B;QACE,IAAI,EAAE,aAAa,CAAC,OAAO;AAC3B,QAAA,WAAW,EAAE,CAAA;AACsB,0CAAA,CAAA;AACnC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,kBAAkB,CAAC,OAAO,CAAC,EAC3B;QACE,IAAI,EAAE,aAAa,CAAC,KAAK;AACzB,QAAA,WAAW,EAAE,CAAA;AACiE,qFAAA,CAAA;AAC9E,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,kBAAkB,CAAC,MAAM,CAAC,EAC1B;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;;AAE+B,mDAAA,CAAA;AAC5C,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,kBAAkB,CAAC,MAAM,CAAC,EAC1B;QACE,IAAI,EAAE,aAAa,CAAC,IAAI;AACxB,QAAA,WAAW,EAAE,CAAA;AAC2C,+DAAA,CAAA;AACxD,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,kBAAkB,CAAC,YAAY,CAAC,EAChC;QACE,IAAI,EAAE,aAAa,CAAC,UAAU;AAC9B,QAAA,WAAW,EAAE,CAAA;;AAE8C,kEAAA,CAAA;AAC3D,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CACF,CACF;;IAGD,KAAK,CAAC,IAAI,CACR,IAAI,CACF,kBAAkB,CAAC,gBAAgB,CAAC,EACpC;QACE,IAAI,EAAE,aAAa,CAAC,cAAc;AAClC,QAAA,WAAW,EAAE,CAAA;;AAEqD,yEAAA,CAAA;AAClE,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CACF,CACF;AAED,IAAA,OAAO,KAAK;AACd;;;;"}
|
|
@@ -16,6 +16,28 @@ export declare const EBrowserTools: {
|
|
|
16
16
|
readonly GET_PAGE_STATE: "browser_get_page_state";
|
|
17
17
|
};
|
|
18
18
|
export type BrowserToolName = typeof EBrowserTools[keyof typeof EBrowserTools];
|
|
19
|
+
/**
|
|
20
|
+
* Callback function type for waiting on browser action results
|
|
21
|
+
* This allows the server (Ranger) to provide a callback that waits for the extension
|
|
22
|
+
* to POST results back to the server before returning to the LLM.
|
|
23
|
+
*
|
|
24
|
+
* @param action - The browser action (click, type, navigate, etc.)
|
|
25
|
+
* @param args - Arguments for the action
|
|
26
|
+
* @param toolCallId - Unique ID for this tool call (from config.toolCall.id)
|
|
27
|
+
* @returns Promise that resolves with the actual browser result (page state, etc.)
|
|
28
|
+
*/
|
|
29
|
+
export type BrowserToolCallback = (action: string, args: Record<string, unknown>, toolCallId: string) => Promise<BrowserActionResult>;
|
|
30
|
+
/**
|
|
31
|
+
* Result returned from browser action execution
|
|
32
|
+
*/
|
|
33
|
+
export interface BrowserActionResult {
|
|
34
|
+
success: boolean;
|
|
35
|
+
url?: string;
|
|
36
|
+
title?: string;
|
|
37
|
+
elementList?: string;
|
|
38
|
+
error?: string;
|
|
39
|
+
screenshot?: string;
|
|
40
|
+
}
|
|
19
41
|
/**
|
|
20
42
|
* Check if browser capability is available based on request headers or context
|
|
21
43
|
* The browser extension sets these headers when connected:
|
|
@@ -33,13 +55,31 @@ export interface BrowserToolResponse {
|
|
|
33
55
|
requiresBrowserExecution: true;
|
|
34
56
|
action: string;
|
|
35
57
|
args: Record<string, unknown>;
|
|
58
|
+
toolCallId?: string;
|
|
36
59
|
}
|
|
37
60
|
/**
|
|
38
|
-
*
|
|
39
|
-
|
|
40
|
-
|
|
61
|
+
* Options for creating browser tools
|
|
62
|
+
*/
|
|
63
|
+
export interface CreateBrowserToolsOptions {
|
|
64
|
+
/**
|
|
65
|
+
* Optional callback that waits for browser action results.
|
|
66
|
+
* When provided, tools will await this callback to get actual results from the extension.
|
|
67
|
+
* When not provided, tools return markers immediately (for non-server contexts).
|
|
68
|
+
*/
|
|
69
|
+
waitForResult?: BrowserToolCallback;
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Create browser tools with optional callback for waiting on results
|
|
73
|
+
*
|
|
74
|
+
* When waitForResult callback is provided:
|
|
75
|
+
* 1. Tool returns marker that triggers extension
|
|
76
|
+
* 2. Tool then awaits callback to get actual results
|
|
77
|
+
* 3. Returns real page state to LLM
|
|
78
|
+
*
|
|
79
|
+
* When no callback:
|
|
80
|
+
* 1. Tool returns marker only (for non-server contexts)
|
|
41
81
|
*
|
|
42
82
|
* NOTE: These tools use TEXT-BASED element lists, NOT screenshots
|
|
43
83
|
* Screenshots would be 100K+ tokens each - element lists are ~100 tokens
|
|
44
84
|
*/
|
|
45
|
-
export declare function createBrowserTools(): DynamicStructuredTool[];
|
|
85
|
+
export declare function createBrowserTools(options?: CreateBrowserToolsOptions): DynamicStructuredTool[];
|
package/package.json
CHANGED
|
@@ -21,6 +21,34 @@ export const EBrowserTools = {
|
|
|
21
21
|
|
|
22
22
|
export type BrowserToolName = typeof EBrowserTools[keyof typeof EBrowserTools];
|
|
23
23
|
|
|
24
|
+
/**
|
|
25
|
+
* Callback function type for waiting on browser action results
|
|
26
|
+
* This allows the server (Ranger) to provide a callback that waits for the extension
|
|
27
|
+
* to POST results back to the server before returning to the LLM.
|
|
28
|
+
*
|
|
29
|
+
* @param action - The browser action (click, type, navigate, etc.)
|
|
30
|
+
* @param args - Arguments for the action
|
|
31
|
+
* @param toolCallId - Unique ID for this tool call (from config.toolCall.id)
|
|
32
|
+
* @returns Promise that resolves with the actual browser result (page state, etc.)
|
|
33
|
+
*/
|
|
34
|
+
export type BrowserToolCallback = (
|
|
35
|
+
action: string,
|
|
36
|
+
args: Record<string, unknown>,
|
|
37
|
+
toolCallId: string
|
|
38
|
+
) => Promise<BrowserActionResult>;
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Result returned from browser action execution
|
|
42
|
+
*/
|
|
43
|
+
export interface BrowserActionResult {
|
|
44
|
+
success: boolean;
|
|
45
|
+
url?: string;
|
|
46
|
+
title?: string;
|
|
47
|
+
elementList?: string; // Text-based element list
|
|
48
|
+
error?: string;
|
|
49
|
+
screenshot?: string; // Base64 screenshot (if requested)
|
|
50
|
+
}
|
|
51
|
+
|
|
24
52
|
/**
|
|
25
53
|
* Check if browser capability is available based on request headers or context
|
|
26
54
|
* The browser extension sets these headers when connected:
|
|
@@ -84,31 +112,106 @@ export interface BrowserToolResponse {
|
|
|
84
112
|
requiresBrowserExecution: true;
|
|
85
113
|
action: string;
|
|
86
114
|
args: Record<string, unknown>;
|
|
115
|
+
toolCallId?: string; // Added to help extension correlate with callback
|
|
87
116
|
}
|
|
88
117
|
|
|
89
118
|
/**
|
|
90
|
-
*
|
|
91
|
-
|
|
92
|
-
|
|
119
|
+
* Options for creating browser tools
|
|
120
|
+
*/
|
|
121
|
+
export interface CreateBrowserToolsOptions {
|
|
122
|
+
/**
|
|
123
|
+
* Optional callback that waits for browser action results.
|
|
124
|
+
* When provided, tools will await this callback to get actual results from the extension.
|
|
125
|
+
* When not provided, tools return markers immediately (for non-server contexts).
|
|
126
|
+
*/
|
|
127
|
+
waitForResult?: BrowserToolCallback;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
/**
|
|
131
|
+
* Format browser action result for LLM consumption
|
|
132
|
+
*/
|
|
133
|
+
function formatResultForLLM(result: BrowserActionResult, action: string): string {
|
|
134
|
+
if (!result.success && result.error) {
|
|
135
|
+
return `Browser action "${action}" failed: ${result.error}`;
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
const parts: string[] = [];
|
|
139
|
+
|
|
140
|
+
if (result.url) {
|
|
141
|
+
parts.push(`**Current URL:** ${result.url}`);
|
|
142
|
+
}
|
|
143
|
+
if (result.title) {
|
|
144
|
+
parts.push(`**Page Title:** ${result.title}`);
|
|
145
|
+
}
|
|
146
|
+
if (result.elementList) {
|
|
147
|
+
parts.push(`\n**Interactive Elements:**\n${result.elementList}`);
|
|
148
|
+
}
|
|
149
|
+
if (result.screenshot) {
|
|
150
|
+
parts.push(`\n[Screenshot captured and displayed to user]`);
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
if (parts.length === 0) {
|
|
154
|
+
return `Browser action "${action}" completed successfully.`;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
return parts.join('\n');
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Create browser tools with optional callback for waiting on results
|
|
162
|
+
*
|
|
163
|
+
* When waitForResult callback is provided:
|
|
164
|
+
* 1. Tool returns marker that triggers extension
|
|
165
|
+
* 2. Tool then awaits callback to get actual results
|
|
166
|
+
* 3. Returns real page state to LLM
|
|
167
|
+
*
|
|
168
|
+
* When no callback:
|
|
169
|
+
* 1. Tool returns marker only (for non-server contexts)
|
|
93
170
|
*
|
|
94
171
|
* NOTE: These tools use TEXT-BASED element lists, NOT screenshots
|
|
95
172
|
* Screenshots would be 100K+ tokens each - element lists are ~100 tokens
|
|
96
173
|
*/
|
|
97
|
-
export function createBrowserTools(): DynamicStructuredTool[] {
|
|
174
|
+
export function createBrowserTools(options?: CreateBrowserToolsOptions): DynamicStructuredTool[] {
|
|
175
|
+
const { waitForResult } = options || {};
|
|
98
176
|
const tools: DynamicStructuredTool[] = [];
|
|
99
177
|
|
|
178
|
+
/**
|
|
179
|
+
* Helper to create tool function that optionally waits for results
|
|
180
|
+
* The toolCallId is extracted from the RunnableConfig passed by LangChain
|
|
181
|
+
*/
|
|
182
|
+
const createToolFunction = (action: string) => {
|
|
183
|
+
return async (args: Record<string, unknown>, config?: { toolCall?: { id?: string } }): Promise<string> => {
|
|
184
|
+
const toolCallId = config?.toolCall?.id || `tool_${Date.now()}_${Math.random().toString(36).slice(2)}`;
|
|
185
|
+
|
|
186
|
+
// Create marker for extension
|
|
187
|
+
const marker: BrowserToolResponse = {
|
|
188
|
+
requiresBrowserExecution: true,
|
|
189
|
+
action,
|
|
190
|
+
args,
|
|
191
|
+
toolCallId,
|
|
192
|
+
};
|
|
193
|
+
|
|
194
|
+
// If no callback, return marker immediately (extension handles via SSE interception)
|
|
195
|
+
if (!waitForResult) {
|
|
196
|
+
return JSON.stringify(marker);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
// With callback: wait for actual results from extension
|
|
200
|
+
// The marker is still returned initially via SSE, but we wait for the callback
|
|
201
|
+
try {
|
|
202
|
+
const result = await waitForResult(action, args, toolCallId);
|
|
203
|
+
return formatResultForLLM(result, action);
|
|
204
|
+
} catch (error) {
|
|
205
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
206
|
+
return `Browser action "${action}" failed: ${errorMessage}`;
|
|
207
|
+
}
|
|
208
|
+
};
|
|
209
|
+
};
|
|
210
|
+
|
|
100
211
|
// browser_click
|
|
101
212
|
tools.push(
|
|
102
213
|
tool(
|
|
103
|
-
|
|
104
|
-
// This returns a marker that tells the extension to execute locally
|
|
105
|
-
const response: BrowserToolResponse = {
|
|
106
|
-
requiresBrowserExecution: true,
|
|
107
|
-
action: 'click',
|
|
108
|
-
args,
|
|
109
|
-
};
|
|
110
|
-
return JSON.stringify(response);
|
|
111
|
-
},
|
|
214
|
+
createToolFunction('click'),
|
|
112
215
|
{
|
|
113
216
|
name: EBrowserTools.CLICK,
|
|
114
217
|
description: `Click an element on the current web page by its index number.
|
|
@@ -123,14 +226,7 @@ After clicking, you receive an updated element list showing the new page state.`
|
|
|
123
226
|
// browser_type
|
|
124
227
|
tools.push(
|
|
125
228
|
tool(
|
|
126
|
-
|
|
127
|
-
const response: BrowserToolResponse = {
|
|
128
|
-
requiresBrowserExecution: true,
|
|
129
|
-
action: 'type',
|
|
130
|
-
args,
|
|
131
|
-
};
|
|
132
|
-
return JSON.stringify(response);
|
|
133
|
-
},
|
|
229
|
+
createToolFunction('type'),
|
|
134
230
|
{
|
|
135
231
|
name: EBrowserTools.TYPE,
|
|
136
232
|
description: `Type text into an input element on the page.
|
|
@@ -145,14 +241,7 @@ After typing, you receive an updated element list.`,
|
|
|
145
241
|
// browser_navigate
|
|
146
242
|
tools.push(
|
|
147
243
|
tool(
|
|
148
|
-
|
|
149
|
-
const response: BrowserToolResponse = {
|
|
150
|
-
requiresBrowserExecution: true,
|
|
151
|
-
action: 'navigate',
|
|
152
|
-
args,
|
|
153
|
-
};
|
|
154
|
-
return JSON.stringify(response);
|
|
155
|
-
},
|
|
244
|
+
createToolFunction('navigate'),
|
|
156
245
|
{
|
|
157
246
|
name: EBrowserTools.NAVIGATE,
|
|
158
247
|
description: `Navigate to a URL. Always include the full URL with https://.
|
|
@@ -165,14 +254,7 @@ After navigation, you receive the new page's element list.`,
|
|
|
165
254
|
// browser_scroll
|
|
166
255
|
tools.push(
|
|
167
256
|
tool(
|
|
168
|
-
|
|
169
|
-
const response: BrowserToolResponse = {
|
|
170
|
-
requiresBrowserExecution: true,
|
|
171
|
-
action: 'scroll',
|
|
172
|
-
args,
|
|
173
|
-
};
|
|
174
|
-
return JSON.stringify(response);
|
|
175
|
-
},
|
|
257
|
+
createToolFunction('scroll'),
|
|
176
258
|
{
|
|
177
259
|
name: EBrowserTools.SCROLL,
|
|
178
260
|
description: `Scroll the page to reveal more content.
|
|
@@ -186,14 +268,7 @@ After scrolling, you receive an updated element list with newly visible elements
|
|
|
186
268
|
// browser_extract
|
|
187
269
|
tools.push(
|
|
188
270
|
tool(
|
|
189
|
-
|
|
190
|
-
const response: BrowserToolResponse = {
|
|
191
|
-
requiresBrowserExecution: true,
|
|
192
|
-
action: 'extract',
|
|
193
|
-
args,
|
|
194
|
-
};
|
|
195
|
-
return JSON.stringify(response);
|
|
196
|
-
},
|
|
271
|
+
createToolFunction('extract'),
|
|
197
272
|
{
|
|
198
273
|
name: EBrowserTools.EXTRACT,
|
|
199
274
|
description: `Extract content from the current page.
|
|
@@ -206,14 +281,7 @@ Returns page URL, title, and element list.`,
|
|
|
206
281
|
// browser_hover
|
|
207
282
|
tools.push(
|
|
208
283
|
tool(
|
|
209
|
-
|
|
210
|
-
const response: BrowserToolResponse = {
|
|
211
|
-
requiresBrowserExecution: true,
|
|
212
|
-
action: 'hover',
|
|
213
|
-
args,
|
|
214
|
-
};
|
|
215
|
-
return JSON.stringify(response);
|
|
216
|
-
},
|
|
284
|
+
createToolFunction('hover'),
|
|
217
285
|
{
|
|
218
286
|
name: EBrowserTools.HOVER,
|
|
219
287
|
description: `Hover over an element to reveal tooltips, dropdowns, or other hover-triggered content.
|
|
@@ -226,14 +294,7 @@ After hovering, you receive an updated element list with any newly revealed elem
|
|
|
226
294
|
// browser_wait
|
|
227
295
|
tools.push(
|
|
228
296
|
tool(
|
|
229
|
-
|
|
230
|
-
const response: BrowserToolResponse = {
|
|
231
|
-
requiresBrowserExecution: true,
|
|
232
|
-
action: 'wait',
|
|
233
|
-
args,
|
|
234
|
-
};
|
|
235
|
-
return JSON.stringify(response);
|
|
236
|
-
},
|
|
297
|
+
createToolFunction('wait'),
|
|
237
298
|
{
|
|
238
299
|
name: EBrowserTools.WAIT,
|
|
239
300
|
description: `Wait for a specified duration for page content to load.
|
|
@@ -247,14 +308,7 @@ After waiting, you receive an updated element list.`,
|
|
|
247
308
|
// browser_back
|
|
248
309
|
tools.push(
|
|
249
310
|
tool(
|
|
250
|
-
|
|
251
|
-
const response: BrowserToolResponse = {
|
|
252
|
-
requiresBrowserExecution: true,
|
|
253
|
-
action: 'back',
|
|
254
|
-
args,
|
|
255
|
-
};
|
|
256
|
-
return JSON.stringify(response);
|
|
257
|
-
},
|
|
311
|
+
createToolFunction('back'),
|
|
258
312
|
{
|
|
259
313
|
name: EBrowserTools.BACK,
|
|
260
314
|
description: `Go back to the previous page in browser history.
|
|
@@ -267,19 +321,12 @@ After going back, you receive the previous page's element list.`,
|
|
|
267
321
|
// browser_screenshot
|
|
268
322
|
tools.push(
|
|
269
323
|
tool(
|
|
270
|
-
|
|
271
|
-
const response: BrowserToolResponse = {
|
|
272
|
-
requiresBrowserExecution: true,
|
|
273
|
-
action: 'screenshot',
|
|
274
|
-
args,
|
|
275
|
-
};
|
|
276
|
-
return JSON.stringify(response);
|
|
277
|
-
},
|
|
324
|
+
createToolFunction('screenshot'),
|
|
278
325
|
{
|
|
279
326
|
name: EBrowserTools.SCREENSHOT,
|
|
280
327
|
description: `Capture a screenshot of the current page.
|
|
281
|
-
|
|
282
|
-
Use browser_get_page_state
|
|
328
|
+
Returns the page state with a note that screenshot was displayed to the user.
|
|
329
|
+
Use browser_get_page_state to get the element list for automation.`,
|
|
283
330
|
schema: BrowserScreenshotSchema,
|
|
284
331
|
}
|
|
285
332
|
)
|
|
@@ -288,19 +335,12 @@ Use browser_get_page_state instead to get the element list.`,
|
|
|
288
335
|
// browser_get_page_state
|
|
289
336
|
tools.push(
|
|
290
337
|
tool(
|
|
291
|
-
|
|
292
|
-
const response: BrowserToolResponse = {
|
|
293
|
-
requiresBrowserExecution: true,
|
|
294
|
-
action: 'get_page_state',
|
|
295
|
-
args,
|
|
296
|
-
};
|
|
297
|
-
return JSON.stringify(response);
|
|
298
|
-
},
|
|
338
|
+
createToolFunction('get_page_state'),
|
|
299
339
|
{
|
|
300
340
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
301
341
|
description: `Get the current page state including URL, title, and all interactive elements.
|
|
302
342
|
Use this at the start of a task to see what elements are available.
|
|
303
|
-
Returns a text list of elements with their index numbers.`,
|
|
343
|
+
Returns a text list of elements with their index numbers for interaction.`,
|
|
304
344
|
schema: BrowserGetPageStateSchema,
|
|
305
345
|
}
|
|
306
346
|
)
|