illuma-agents 1.0.20 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +3 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +18 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/run.cjs +137 -3
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/tools/BrowserInterruptTools.cjs +431 -0
- package/dist/cjs/tools/BrowserInterruptTools.cjs.map +1 -0
- package/dist/cjs/tools/BrowserTools.cjs +15 -10
- package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +3 -3
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/run.mjs +136 -4
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/tools/BrowserInterruptTools.mjs +415 -0
- package/dist/esm/tools/BrowserInterruptTools.mjs.map +1 -0
- package/dist/esm/tools/BrowserTools.mjs +15 -10
- package/dist/esm/tools/BrowserTools.mjs.map +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/run.d.ts +47 -0
- package/dist/types/tools/BrowserInterruptTools.d.ts +282 -0
- package/dist/types/tools/BrowserTools.d.ts +2 -2
- package/dist/types/types/run.d.ts +8 -0
- package/package.json +1 -1
- package/src/graphs/Graph.ts +3 -3
- package/src/index.ts +1 -0
- package/src/run.ts +176 -3
- package/src/specs/browser-interrupt-tools.test.ts +235 -0
- package/src/tools/BrowserInterruptTools.ts +571 -0
- package/src/tools/BrowserTools.test.ts +41 -6
- package/src/tools/BrowserTools.ts +15 -10
- package/src/types/run.ts +8 -0
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
import { interrupt } from '@langchain/langgraph';
|
|
3
|
+
import { tool } from '@langchain/core/tools';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Browser Automation Tools with LangGraph Interrupt Support
|
|
7
|
+
*
|
|
8
|
+
* These tools use LangGraph's interrupt() mechanism to pause execution
|
|
9
|
+
* and wait for the browser extension to execute actions and return results.
|
|
10
|
+
*
|
|
11
|
+
* Flow:
|
|
12
|
+
* 1. Agent calls browser tool (e.g., browser_navigate)
|
|
13
|
+
* 2. Tool calls interrupt() with action details
|
|
14
|
+
* 3. Graph pauses and returns interrupt to client
|
|
15
|
+
* 4. Extension executes action in browser
|
|
16
|
+
* 5. Extension sends resume Command with actual result
|
|
17
|
+
* 6. Graph continues with real browser data
|
|
18
|
+
*
|
|
19
|
+
* This enables proper chain-of-thought with browser context because
|
|
20
|
+
* the agent receives ACTUAL results (page elements, screenshots, etc.)
|
|
21
|
+
* instead of placeholder acknowledgments.
|
|
22
|
+
*/
|
|
23
|
+
// ============================================
|
|
24
|
+
// Tool Schemas (same as BrowserTools.ts)
|
|
25
|
+
// ============================================
|
|
26
|
+
const BrowserClickSchema = z.object({
|
|
27
|
+
index: z.number().optional().describe('The index of the element to click, as shown in the page context'),
|
|
28
|
+
coordinates: z.object({
|
|
29
|
+
x: z.number().describe('X coordinate in viewport pixels'),
|
|
30
|
+
y: z.number().describe('Y coordinate in viewport pixels'),
|
|
31
|
+
}).optional().describe('Coordinates for clicking by position'),
|
|
32
|
+
reason: z.string().optional().describe('Why you are clicking this element'),
|
|
33
|
+
});
|
|
34
|
+
const BrowserTypeSchema = z.object({
|
|
35
|
+
index: z.number().describe('The index of the input element to type into'),
|
|
36
|
+
text: z.string().describe('The text to type'),
|
|
37
|
+
clear: z.boolean().optional().describe('Clear existing content first'),
|
|
38
|
+
pressEnter: z.boolean().optional().describe('Press Enter after typing'),
|
|
39
|
+
});
|
|
40
|
+
const BrowserNavigateSchema = z.object({
|
|
41
|
+
url: z.string().describe('The URL to navigate to'),
|
|
42
|
+
reason: z.string().optional().describe('Why navigating to this URL'),
|
|
43
|
+
});
|
|
44
|
+
const BrowserScrollSchema = z.object({
|
|
45
|
+
direction: z.enum(['up', 'down', 'left', 'right']).describe('Scroll direction'),
|
|
46
|
+
amount: z.number().optional().describe('Pixels to scroll (default: 500)'),
|
|
47
|
+
});
|
|
48
|
+
const BrowserExtractSchema = z.object({
|
|
49
|
+
query: z.string().optional().describe('Query to filter extracted content'),
|
|
50
|
+
selector: z.string().optional().describe('CSS selector to extract from'),
|
|
51
|
+
});
|
|
52
|
+
const BrowserHoverSchema = z.object({
|
|
53
|
+
index: z.number().describe('Element index to hover over'),
|
|
54
|
+
});
|
|
55
|
+
const BrowserWaitSchema = z.object({
|
|
56
|
+
duration: z.number().optional().describe('Milliseconds to wait (default: 1000)'),
|
|
57
|
+
reason: z.string().optional().describe('Why waiting'),
|
|
58
|
+
});
|
|
59
|
+
const BrowserGoBackSchema = z.object({
|
|
60
|
+
reason: z.string().optional().describe('Why going back'),
|
|
61
|
+
});
|
|
62
|
+
const BrowserScreenshotSchema = z.object({
|
|
63
|
+
fullPage: z.boolean().optional().describe('Capture full page vs viewport'),
|
|
64
|
+
});
|
|
65
|
+
const BrowserGetPageStateSchema = z.object({
|
|
66
|
+
reason: z.string().optional().describe('Why getting page state'),
|
|
67
|
+
});
|
|
68
|
+
// ============================================
|
|
69
|
+
// Helper to generate interrupt IDs
|
|
70
|
+
// ============================================
|
|
71
|
+
let interruptCounter = 0;
|
|
72
|
+
function generateInterruptId() {
|
|
73
|
+
return `browser_${Date.now()}_${++interruptCounter}`;
|
|
74
|
+
}
|
|
75
|
+
// ============================================
|
|
76
|
+
// Interrupt-based Tool Implementations
|
|
77
|
+
// ============================================
|
|
78
|
+
/**
|
|
79
|
+
* Navigate tool - navigates to URL and returns page state
|
|
80
|
+
*/
|
|
81
|
+
function createBrowserNavigateInterruptTool() {
|
|
82
|
+
return tool(async ({ url, reason }) => {
|
|
83
|
+
const interruptId = generateInterruptId();
|
|
84
|
+
// This will pause the graph and wait for extension to provide result
|
|
85
|
+
const result = interrupt({
|
|
86
|
+
type: 'browser_interrupt',
|
|
87
|
+
action: { type: 'navigate', url, reason },
|
|
88
|
+
interruptId,
|
|
89
|
+
});
|
|
90
|
+
// When resumed, result contains actual page state
|
|
91
|
+
if (!result.success) {
|
|
92
|
+
return `Navigation failed: ${result.error}`;
|
|
93
|
+
}
|
|
94
|
+
if (result.pageState) {
|
|
95
|
+
return `Successfully navigated to ${result.pageState.url} (${result.pageState.title})
|
|
96
|
+
|
|
97
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
98
|
+
${result.pageState.elementList}
|
|
99
|
+
|
|
100
|
+
Viewport: ${result.pageState.viewportHeight}px, Scroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;
|
|
101
|
+
}
|
|
102
|
+
return `Successfully navigated to ${url}`;
|
|
103
|
+
}, {
|
|
104
|
+
name: 'browser_navigate',
|
|
105
|
+
description: `Navigate to a URL. Returns the page state with interactive elements after navigation completes.
|
|
106
|
+
|
|
107
|
+
Example: browser_navigate({ url: "https://www.amazon.com" })
|
|
108
|
+
Returns: Page title, URL, and list of interactive elements with their [index] numbers.`,
|
|
109
|
+
schema: BrowserNavigateSchema,
|
|
110
|
+
});
|
|
111
|
+
}
|
|
112
|
+
/**
|
|
113
|
+
* Click tool - clicks element and returns updated state
|
|
114
|
+
*/
|
|
115
|
+
function createBrowserClickInterruptTool() {
|
|
116
|
+
return tool(async ({ index, coordinates, reason }) => {
|
|
117
|
+
const interruptId = generateInterruptId();
|
|
118
|
+
const result = interrupt({
|
|
119
|
+
type: 'browser_interrupt',
|
|
120
|
+
action: { type: 'click', index, coordinates, reason },
|
|
121
|
+
interruptId,
|
|
122
|
+
});
|
|
123
|
+
if (!result.success) {
|
|
124
|
+
return `Click failed: ${result.error}`;
|
|
125
|
+
}
|
|
126
|
+
// If click caused navigation, return new page state
|
|
127
|
+
if (result.pageState) {
|
|
128
|
+
return `Clicked element. Page updated:
|
|
129
|
+
|
|
130
|
+
URL: ${result.pageState.url}
|
|
131
|
+
Title: ${result.pageState.title}
|
|
132
|
+
|
|
133
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
134
|
+
${result.pageState.elementList}`;
|
|
135
|
+
}
|
|
136
|
+
return `Successfully clicked element${index !== undefined ? ` [${index}]` : ''}`;
|
|
137
|
+
}, {
|
|
138
|
+
name: 'browser_click',
|
|
139
|
+
description: `Click an element by index or coordinates.
|
|
140
|
+
|
|
141
|
+
Use the [index] number from the interactive elements list.
|
|
142
|
+
Example: browser_click({ index: 5 }) to click element [5]`,
|
|
143
|
+
schema: BrowserClickSchema,
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Type tool - types text into input field
|
|
148
|
+
*/
|
|
149
|
+
function createBrowserTypeInterruptTool() {
|
|
150
|
+
return tool(async ({ index, text, clear, pressEnter }) => {
|
|
151
|
+
const interruptId = generateInterruptId();
|
|
152
|
+
const result = interrupt({
|
|
153
|
+
type: 'browser_interrupt',
|
|
154
|
+
action: { type: 'type', index, text, clear, pressEnter },
|
|
155
|
+
interruptId,
|
|
156
|
+
});
|
|
157
|
+
if (!result.success) {
|
|
158
|
+
return `Type failed: ${result.error}`;
|
|
159
|
+
}
|
|
160
|
+
// If typing + enter caused navigation (e.g., search), return new state
|
|
161
|
+
if (result.pageState) {
|
|
162
|
+
return `Typed "${text}"${pressEnter ? ' and pressed Enter' : ''}. Page updated:
|
|
163
|
+
|
|
164
|
+
URL: ${result.pageState.url}
|
|
165
|
+
Title: ${result.pageState.title}
|
|
166
|
+
|
|
167
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
168
|
+
${result.pageState.elementList}`;
|
|
169
|
+
}
|
|
170
|
+
return `Successfully typed "${text}" into element [${index}]${pressEnter ? ' and pressed Enter' : ''}`;
|
|
171
|
+
}, {
|
|
172
|
+
name: 'browser_type',
|
|
173
|
+
description: `Type text into an input field.
|
|
174
|
+
|
|
175
|
+
Use the [index] from interactive elements list. Look for <input> elements.
|
|
176
|
+
Set pressEnter: true to submit after typing (for search fields).
|
|
177
|
+
|
|
178
|
+
Example: browser_type({ index: 3, text: "laptop 16gb ram", pressEnter: true })`,
|
|
179
|
+
schema: BrowserTypeSchema,
|
|
180
|
+
});
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Get page state tool - captures current page elements
|
|
184
|
+
*/
|
|
185
|
+
function createBrowserGetPageStateInterruptTool() {
|
|
186
|
+
return tool(async ({ reason }) => {
|
|
187
|
+
const interruptId = generateInterruptId();
|
|
188
|
+
const result = interrupt({
|
|
189
|
+
type: 'browser_interrupt',
|
|
190
|
+
action: { type: 'get_page_state', reason },
|
|
191
|
+
interruptId,
|
|
192
|
+
});
|
|
193
|
+
if (!result.success) {
|
|
194
|
+
return `Failed to get page state: ${result.error}`;
|
|
195
|
+
}
|
|
196
|
+
if (result.pageState) {
|
|
197
|
+
return `## Current Page
|
|
198
|
+
URL: ${result.pageState.url}
|
|
199
|
+
Title: ${result.pageState.title}
|
|
200
|
+
|
|
201
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
202
|
+
${result.pageState.elementList}
|
|
203
|
+
|
|
204
|
+
Viewport: ${result.pageState.viewportHeight}px
|
|
205
|
+
Scroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;
|
|
206
|
+
}
|
|
207
|
+
return 'Page state captured but no elements found.';
|
|
208
|
+
}, {
|
|
209
|
+
name: 'browser_get_page_state',
|
|
210
|
+
description: `Get the current page state with all interactive elements.
|
|
211
|
+
|
|
212
|
+
Returns the list of clickable/typeable elements with their [index] numbers.
|
|
213
|
+
Use this to see what elements are available on the current page.`,
|
|
214
|
+
schema: BrowserGetPageStateSchema,
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Scroll tool
|
|
219
|
+
*/
|
|
220
|
+
function createBrowserScrollInterruptTool() {
|
|
221
|
+
return tool(async ({ direction, amount }) => {
|
|
222
|
+
const interruptId = generateInterruptId();
|
|
223
|
+
const result = interrupt({
|
|
224
|
+
type: 'browser_interrupt',
|
|
225
|
+
action: { type: 'scroll', direction, amount },
|
|
226
|
+
interruptId,
|
|
227
|
+
});
|
|
228
|
+
if (!result.success) {
|
|
229
|
+
return `Scroll failed: ${result.error}`;
|
|
230
|
+
}
|
|
231
|
+
if (result.pageState) {
|
|
232
|
+
return `Scrolled ${direction}. New elements visible:
|
|
233
|
+
|
|
234
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
235
|
+
${result.pageState.elementList}
|
|
236
|
+
|
|
237
|
+
Scroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;
|
|
238
|
+
}
|
|
239
|
+
return `Successfully scrolled ${direction}${amount ? ` ${amount}px` : ''}`;
|
|
240
|
+
}, {
|
|
241
|
+
name: 'browser_scroll',
|
|
242
|
+
description: `Scroll the page in a direction.
|
|
243
|
+
|
|
244
|
+
Example: browser_scroll({ direction: "down", amount: 500 })`,
|
|
245
|
+
schema: BrowserScrollSchema,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
/**
|
|
249
|
+
* Extract tool
|
|
250
|
+
*/
|
|
251
|
+
function createBrowserExtractInterruptTool() {
|
|
252
|
+
return tool(async ({ query, selector }) => {
|
|
253
|
+
const interruptId = generateInterruptId();
|
|
254
|
+
const result = interrupt({
|
|
255
|
+
type: 'browser_interrupt',
|
|
256
|
+
action: { type: 'extract', query, selector },
|
|
257
|
+
interruptId,
|
|
258
|
+
});
|
|
259
|
+
if (!result.success) {
|
|
260
|
+
return `Extract failed: ${result.error}`;
|
|
261
|
+
}
|
|
262
|
+
return result.extractedContent || 'No content extracted.';
|
|
263
|
+
}, {
|
|
264
|
+
name: 'browser_extract',
|
|
265
|
+
description: `Extract text content from the page.
|
|
266
|
+
|
|
267
|
+
Example: browser_extract({ query: "price" })`,
|
|
268
|
+
schema: BrowserExtractSchema,
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
/**
|
|
272
|
+
* Hover tool
|
|
273
|
+
*/
|
|
274
|
+
function createBrowserHoverInterruptTool() {
|
|
275
|
+
return tool(async ({ index }) => {
|
|
276
|
+
const interruptId = generateInterruptId();
|
|
277
|
+
const result = interrupt({
|
|
278
|
+
type: 'browser_interrupt',
|
|
279
|
+
action: { type: 'hover', index },
|
|
280
|
+
interruptId,
|
|
281
|
+
});
|
|
282
|
+
if (!result.success) {
|
|
283
|
+
return `Hover failed: ${result.error}`;
|
|
284
|
+
}
|
|
285
|
+
return `Successfully hovered over element [${index}]`;
|
|
286
|
+
}, {
|
|
287
|
+
name: 'browser_hover',
|
|
288
|
+
description: `Hover over an element to reveal tooltips or menus.`,
|
|
289
|
+
schema: BrowserHoverSchema,
|
|
290
|
+
});
|
|
291
|
+
}
|
|
292
|
+
/**
|
|
293
|
+
* Wait tool
|
|
294
|
+
*/
|
|
295
|
+
function createBrowserWaitInterruptTool() {
|
|
296
|
+
return tool(async ({ duration, reason }) => {
|
|
297
|
+
const interruptId = generateInterruptId();
|
|
298
|
+
const result = interrupt({
|
|
299
|
+
type: 'browser_interrupt',
|
|
300
|
+
action: { type: 'wait', duration, reason },
|
|
301
|
+
interruptId,
|
|
302
|
+
});
|
|
303
|
+
if (!result.success) {
|
|
304
|
+
return `Wait failed: ${result.error}`;
|
|
305
|
+
}
|
|
306
|
+
return `Waited ${duration || 1000}ms`;
|
|
307
|
+
}, {
|
|
308
|
+
name: 'browser_wait',
|
|
309
|
+
description: `Wait for a duration before next action.`,
|
|
310
|
+
schema: BrowserWaitSchema,
|
|
311
|
+
});
|
|
312
|
+
}
|
|
313
|
+
/**
|
|
314
|
+
* Go back tool
|
|
315
|
+
*/
|
|
316
|
+
function createBrowserGoBackInterruptTool() {
|
|
317
|
+
return tool(async ({ reason }) => {
|
|
318
|
+
const interruptId = generateInterruptId();
|
|
319
|
+
const result = interrupt({
|
|
320
|
+
type: 'browser_interrupt',
|
|
321
|
+
action: { type: 'back', reason },
|
|
322
|
+
interruptId,
|
|
323
|
+
});
|
|
324
|
+
if (!result.success) {
|
|
325
|
+
return `Go back failed: ${result.error}`;
|
|
326
|
+
}
|
|
327
|
+
if (result.pageState) {
|
|
328
|
+
return `Went back to: ${result.pageState.url}
|
|
329
|
+
|
|
330
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
331
|
+
${result.pageState.elementList}`;
|
|
332
|
+
}
|
|
333
|
+
return 'Successfully went back';
|
|
334
|
+
}, {
|
|
335
|
+
name: 'browser_back',
|
|
336
|
+
description: `Go back to the previous page in history.`,
|
|
337
|
+
schema: BrowserGoBackSchema,
|
|
338
|
+
});
|
|
339
|
+
}
|
|
340
|
+
/**
|
|
341
|
+
* Screenshot tool
|
|
342
|
+
*/
|
|
343
|
+
function createBrowserScreenshotInterruptTool() {
|
|
344
|
+
return tool(async ({ fullPage }) => {
|
|
345
|
+
const interruptId = generateInterruptId();
|
|
346
|
+
const result = interrupt({
|
|
347
|
+
type: 'browser_interrupt',
|
|
348
|
+
action: { type: 'screenshot', fullPage },
|
|
349
|
+
interruptId,
|
|
350
|
+
});
|
|
351
|
+
if (!result.success) {
|
|
352
|
+
return `Screenshot failed: ${result.error}`;
|
|
353
|
+
}
|
|
354
|
+
if (result.screenshot) {
|
|
355
|
+
return `Screenshot captured. [Image data available]`;
|
|
356
|
+
}
|
|
357
|
+
return 'Screenshot captured';
|
|
358
|
+
}, {
|
|
359
|
+
name: 'browser_screenshot',
|
|
360
|
+
description: `Capture a screenshot of the current page.`,
|
|
361
|
+
schema: BrowserScreenshotSchema,
|
|
362
|
+
});
|
|
363
|
+
}
|
|
364
|
+
// ============================================
|
|
365
|
+
// Tool Collection
|
|
366
|
+
// ============================================
|
|
367
|
+
const EBrowserInterruptTools = {
|
|
368
|
+
CLICK: 'browser_click',
|
|
369
|
+
TYPE: 'browser_type',
|
|
370
|
+
NAVIGATE: 'browser_navigate',
|
|
371
|
+
SCROLL: 'browser_scroll',
|
|
372
|
+
EXTRACT: 'browser_extract',
|
|
373
|
+
HOVER: 'browser_hover',
|
|
374
|
+
WAIT: 'browser_wait',
|
|
375
|
+
BACK: 'browser_back',
|
|
376
|
+
SCREENSHOT: 'browser_screenshot',
|
|
377
|
+
GET_PAGE_STATE: 'browser_get_page_state',
|
|
378
|
+
};
|
|
379
|
+
const BROWSER_INTERRUPT_TOOL_NAMES = Object.values(EBrowserInterruptTools);
|
|
380
|
+
function isBrowserInterruptToolCall(toolName) {
|
|
381
|
+
return BROWSER_INTERRUPT_TOOL_NAMES.includes(toolName);
|
|
382
|
+
}
|
|
383
|
+
/**
|
|
384
|
+
* Create all interrupt-based browser tools
|
|
385
|
+
*
|
|
386
|
+
* Use these when the client is a browser extension that can:
|
|
387
|
+
* 1. Detect browser_interrupt events in the stream
|
|
388
|
+
* 2. Execute browser actions locally
|
|
389
|
+
* 3. Send Command({ resume: result }) to continue the graph
|
|
390
|
+
*/
|
|
391
|
+
function createBrowserInterruptTools() {
|
|
392
|
+
return [
|
|
393
|
+
createBrowserNavigateInterruptTool(),
|
|
394
|
+
createBrowserClickInterruptTool(),
|
|
395
|
+
createBrowserTypeInterruptTool(),
|
|
396
|
+
createBrowserGetPageStateInterruptTool(),
|
|
397
|
+
createBrowserScrollInterruptTool(),
|
|
398
|
+
createBrowserExtractInterruptTool(),
|
|
399
|
+
createBrowserHoverInterruptTool(),
|
|
400
|
+
createBrowserWaitInterruptTool(),
|
|
401
|
+
createBrowserGoBackInterruptTool(),
|
|
402
|
+
createBrowserScreenshotInterruptTool(),
|
|
403
|
+
];
|
|
404
|
+
}
|
|
405
|
+
/**
|
|
406
|
+
* Check if an interrupt is a browser interrupt
|
|
407
|
+
*/
|
|
408
|
+
function isBrowserInterrupt(value) {
|
|
409
|
+
return (typeof value === 'object' &&
|
|
410
|
+
value !== null &&
|
|
411
|
+
value.type === 'browser_interrupt');
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
export { BROWSER_INTERRUPT_TOOL_NAMES, EBrowserInterruptTools, createBrowserClickInterruptTool, createBrowserExtractInterruptTool, createBrowserGetPageStateInterruptTool, createBrowserGoBackInterruptTool, createBrowserHoverInterruptTool, createBrowserInterruptTools, createBrowserNavigateInterruptTool, createBrowserScreenshotInterruptTool, createBrowserScrollInterruptTool, createBrowserTypeInterruptTool, createBrowserWaitInterruptTool, isBrowserInterrupt, isBrowserInterruptToolCall };
|
|
415
|
+
//# sourceMappingURL=BrowserInterruptTools.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"BrowserInterruptTools.mjs","sources":["../../../src/tools/BrowserInterruptTools.ts"],"sourcesContent":["/**\r\n * Browser Automation Tools with LangGraph Interrupt Support\r\n * \r\n * These tools use LangGraph's interrupt() mechanism to pause execution\r\n * and wait for the browser extension to execute actions and return results.\r\n * \r\n * Flow:\r\n * 1. Agent calls browser tool (e.g., browser_navigate)\r\n * 2. Tool calls interrupt() with action details\r\n * 3. Graph pauses and returns interrupt to client\r\n * 4. Extension executes action in browser\r\n * 5. Extension sends resume Command with actual result\r\n * 6. Graph continues with real browser data\r\n * \r\n * This enables proper chain-of-thought with browser context because\r\n * the agent receives ACTUAL results (page elements, screenshots, etc.)\r\n * instead of placeholder acknowledgments.\r\n */\r\n\r\nimport { z } from 'zod';\r\nimport { interrupt } from '@langchain/langgraph';\r\nimport { tool, DynamicStructuredTool } from '@langchain/core/tools';\r\n\r\n// ============================================\r\n// Browser Interrupt Types\r\n// ============================================\r\n\r\n/**\r\n * Interrupt payload sent to the extension\r\n */\r\nexport interface BrowserInterrupt {\r\n /** Type of browser action to execute */\r\n type: 'browser_interrupt';\r\n /** The specific action to perform */\r\n action: BrowserAction;\r\n /** Unique ID for this interrupt (for matching resume) */\r\n interruptId: string;\r\n}\r\n\r\n/**\r\n * Browser action types\r\n */\r\nexport type BrowserAction = \r\n | { type: 'navigate'; url: string; reason?: string }\r\n | { type: 'click'; index?: number; coordinates?: { x: number; y: number }; reason?: string }\r\n | { type: 'type'; index: number; text: string; clear?: boolean; pressEnter?: boolean }\r\n | { type: 'scroll'; direction: 'up' | 'down' | 'left' | 'right'; amount?: number }\r\n | { type: 'extract'; query?: string; selector?: string }\r\n | { type: 'hover'; index: number }\r\n | { type: 'wait'; duration?: number; reason?: string }\r\n | { type: 'back'; reason?: string }\r\n | { type: 'screenshot'; fullPage?: boolean }\r\n | { type: 'get_page_state'; reason?: string };\r\n\r\n/**\r\n * Result returned from extension after executing action\r\n */\r\nexport interface BrowserActionResult {\r\n success: boolean;\r\n error?: string;\r\n /** Page state after action (for navigate, click, get_page_state) */\r\n pageState?: {\r\n url: string;\r\n title: string;\r\n /** Formatted element list for LLM */\r\n elementList: string;\r\n elementCount: number;\r\n scrollPosition: number;\r\n scrollHeight: number;\r\n viewportHeight: number;\r\n };\r\n /** Screenshot data URL */\r\n screenshot?: string;\r\n /** Extracted content */\r\n extractedContent?: string;\r\n /** Any additional data */\r\n data?: unknown;\r\n}\r\n\r\n// ============================================\r\n// Tool Schemas (same as BrowserTools.ts)\r\n// ============================================\r\n\r\nconst BrowserClickSchema = z.object({\r\n index: z.number().optional().describe(\r\n 'The index of the element to click, as shown in the page context'\r\n ),\r\n coordinates: z.object({\r\n x: z.number().describe('X coordinate in viewport pixels'),\r\n y: z.number().describe('Y coordinate in viewport pixels'),\r\n }).optional().describe('Coordinates for clicking by position'),\r\n reason: z.string().optional().describe('Why you are clicking this element'),\r\n});\r\n\r\nconst BrowserTypeSchema = z.object({\r\n index: z.number().describe('The index of the input element to type into'),\r\n text: z.string().describe('The text to type'),\r\n clear: z.boolean().optional().describe('Clear existing content first'),\r\n pressEnter: z.boolean().optional().describe('Press Enter after typing'),\r\n});\r\n\r\nconst BrowserNavigateSchema = z.object({\r\n url: z.string().describe('The URL to navigate to'),\r\n reason: z.string().optional().describe('Why navigating to this URL'),\r\n});\r\n\r\nconst BrowserScrollSchema = z.object({\r\n direction: z.enum(['up', 'down', 'left', 'right']).describe('Scroll direction'),\r\n amount: z.number().optional().describe('Pixels to scroll (default: 500)'),\r\n});\r\n\r\nconst BrowserExtractSchema = z.object({\r\n query: z.string().optional().describe('Query to filter extracted content'),\r\n selector: z.string().optional().describe('CSS selector to extract from'),\r\n});\r\n\r\nconst BrowserHoverSchema = z.object({\r\n index: z.number().describe('Element index to hover over'),\r\n});\r\n\r\nconst BrowserWaitSchema = z.object({\r\n duration: z.number().optional().describe('Milliseconds to wait (default: 1000)'),\r\n reason: z.string().optional().describe('Why waiting'),\r\n});\r\n\r\nconst BrowserGoBackSchema = z.object({\r\n reason: z.string().optional().describe('Why going back'),\r\n});\r\n\r\nconst BrowserScreenshotSchema = z.object({\r\n fullPage: z.boolean().optional().describe('Capture full page vs viewport'),\r\n});\r\n\r\nconst BrowserGetPageStateSchema = z.object({\r\n reason: z.string().optional().describe('Why getting page state'),\r\n});\r\n\r\n// ============================================\r\n// Helper to generate interrupt IDs\r\n// ============================================\r\n\r\nlet interruptCounter = 0;\r\nfunction generateInterruptId(): string {\r\n return `browser_${Date.now()}_${++interruptCounter}`;\r\n}\r\n\r\n// ============================================\r\n// Interrupt-based Tool Implementations\r\n// ============================================\r\n\r\n/**\r\n * Navigate tool - navigates to URL and returns page state\r\n */\r\nexport function createBrowserNavigateInterruptTool(): DynamicStructuredTool<typeof BrowserNavigateSchema> {\r\n return tool<typeof BrowserNavigateSchema>(\r\n async ({ url, reason }) => {\r\n const interruptId = generateInterruptId();\r\n \r\n // This will pause the graph and wait for extension to provide result\r\n const result = interrupt<BrowserInterrupt, BrowserActionResult>({\r\n type: 'browser_interrupt',\r\n action: { type: 'navigate', url, reason },\r\n interruptId,\r\n });\r\n \r\n // When resumed, result contains actual page state\r\n if (!result.success) {\r\n return `Navigation failed: ${result.error}`;\r\n }\r\n \r\n if (result.pageState) {\r\n return `Successfully navigated to ${result.pageState.url} (${result.pageState.title})\r\n\r\n## Interactive Elements (${result.pageState.elementCount} total)\r\n${result.pageState.elementList}\r\n\r\nViewport: ${result.pageState.viewportHeight}px, Scroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;\r\n }\r\n \r\n return `Successfully navigated to ${url}`;\r\n },\r\n {\r\n name: 'browser_navigate',\r\n description: `Navigate to a URL. Returns the page state with interactive elements after navigation completes.\r\n\r\nExample: browser_navigate({ url: \"https://www.amazon.com\" })\r\nReturns: Page title, URL, and list of interactive elements with their [index] numbers.`,\r\n schema: BrowserNavigateSchema,\r\n }\r\n );\r\n}\r\n\r\n/**\r\n * Click tool - clicks element and returns updated state\r\n */\r\nexport function createBrowserClickInterruptTool(): DynamicStructuredTool<typeof BrowserClickSchema> {\r\n return tool<typeof BrowserClickSchema>(\r\n async ({ index, coordinates, reason }) => {\r\n const interruptId = generateInterruptId();\r\n \r\n const result = interrupt<BrowserInterrupt, BrowserActionResult>({\r\n type: 'browser_interrupt',\r\n action: { type: 'click', index, coordinates, reason },\r\n interruptId,\r\n });\r\n \r\n if (!result.success) {\r\n return `Click failed: ${result.error}`;\r\n }\r\n \r\n // If click caused navigation, return new page state\r\n if (result.pageState) {\r\n return `Clicked element. Page updated:\r\n\r\nURL: ${result.pageState.url}\r\nTitle: ${result.pageState.title}\r\n\r\n## Interactive Elements (${result.pageState.elementCount} total)\r\n${result.pageState.elementList}`;\r\n }\r\n \r\n return `Successfully clicked element${index !== undefined ? ` [${index}]` : ''}`;\r\n },\r\n {\r\n name: 'browser_click',\r\n description: `Click an element by index or coordinates.\r\n\r\nUse the [index] number from the interactive elements list.\r\nExample: browser_click({ index: 5 }) to click element [5]`,\r\n schema: BrowserClickSchema,\r\n }\r\n );\r\n}\r\n\r\n/**\r\n * Type tool - types text into input field\r\n */\r\nexport function createBrowserTypeInterruptTool(): DynamicStructuredTool<typeof BrowserTypeSchema> {\r\n return tool<typeof BrowserTypeSchema>(\r\n async ({ index, text, clear, pressEnter }) => {\r\n const interruptId = generateInterruptId();\r\n \r\n const result = interrupt<BrowserInterrupt, BrowserActionResult>({\r\n type: 'browser_interrupt',\r\n action: { type: 'type', index, text, clear, pressEnter },\r\n interruptId,\r\n });\r\n \r\n if (!result.success) {\r\n return `Type failed: ${result.error}`;\r\n }\r\n \r\n // If typing + enter caused navigation (e.g., search), return new state\r\n if (result.pageState) {\r\n return `Typed \"${text}\"${pressEnter ? ' and pressed Enter' : ''}. Page updated:\r\n\r\nURL: ${result.pageState.url}\r\nTitle: ${result.pageState.title}\r\n\r\n## Interactive Elements (${result.pageState.elementCount} total)\r\n${result.pageState.elementList}`;\r\n }\r\n \r\n return `Successfully typed \"${text}\" into element [${index}]${pressEnter ? ' and pressed Enter' : ''}`;\r\n },\r\n {\r\n name: 'browser_type',\r\n description: `Type text into an input field.\r\n\r\nUse the [index] from interactive elements list. Look for <input> elements.\r\nSet pressEnter: true to submit after typing (for search fields).\r\n\r\nExample: browser_type({ index: 3, text: \"laptop 16gb ram\", pressEnter: true })`,\r\n schema: BrowserTypeSchema,\r\n }\r\n );\r\n}\r\n\r\n/**\r\n * Get page state tool - captures current page elements\r\n */\r\nexport function createBrowserGetPageStateInterruptTool(): DynamicStructuredTool<typeof BrowserGetPageStateSchema> {\r\n return tool<typeof BrowserGetPageStateSchema>(\r\n async ({ reason }) => {\r\n const interruptId = generateInterruptId();\r\n \r\n const result = interrupt<BrowserInterrupt, BrowserActionResult>({\r\n type: 'browser_interrupt',\r\n action: { type: 'get_page_state', reason },\r\n interruptId,\r\n });\r\n \r\n if (!result.success) {\r\n return `Failed to get page state: ${result.error}`;\r\n }\r\n \r\n if (result.pageState) {\r\n return `## Current Page\r\nURL: ${result.pageState.url}\r\nTitle: ${result.pageState.title}\r\n\r\n## Interactive Elements (${result.pageState.elementCount} total)\r\n${result.pageState.elementList}\r\n\r\nViewport: ${result.pageState.viewportHeight}px\r\nScroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;\r\n }\r\n \r\n return 'Page state captured but no elements found.';\r\n },\r\n {\r\n name: 'browser_get_page_state',\r\n description: `Get the current page state with all interactive elements.\r\n\r\nReturns the list of clickable/typeable elements with their [index] numbers.\r\nUse this to see what elements are available on the current page.`,\r\n schema: BrowserGetPageStateSchema,\r\n }\r\n );\r\n}\r\n\r\n/**\r\n * Scroll tool\r\n */\r\nexport function createBrowserScrollInterruptTool(): DynamicStructuredTool<typeof BrowserScrollSchema> {\r\n return tool<typeof BrowserScrollSchema>(\r\n async ({ direction, amount }) => {\r\n const interruptId = generateInterruptId();\r\n \r\n const result = interrupt<BrowserInterrupt, BrowserActionResult>({\r\n type: 'browser_interrupt',\r\n action: { type: 'scroll', direction, amount },\r\n interruptId,\r\n });\r\n \r\n if (!result.success) {\r\n return `Scroll failed: ${result.error}`;\r\n }\r\n \r\n if (result.pageState) {\r\n return `Scrolled ${direction}. New elements visible:\r\n\r\n## Interactive Elements (${result.pageState.elementCount} total)\r\n${result.pageState.elementList}\r\n\r\nScroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;\r\n }\r\n \r\n return `Successfully scrolled ${direction}${amount ? ` ${amount}px` : ''}`;\r\n },\r\n {\r\n name: 'browser_scroll',\r\n description: `Scroll the page in a direction.\r\n\r\nExample: browser_scroll({ direction: \"down\", amount: 500 })`,\r\n schema: BrowserScrollSchema,\r\n }\r\n );\r\n}\r\n\r\n/**\r\n * Extract tool\r\n */\r\nexport function createBrowserExtractInterruptTool(): DynamicStructuredTool<typeof BrowserExtractSchema> {\r\n return tool<typeof BrowserExtractSchema>(\r\n async ({ query, selector }) => {\r\n const interruptId = generateInterruptId();\r\n \r\n const result = interrupt<BrowserInterrupt, BrowserActionResult>({\r\n type: 'browser_interrupt',\r\n action: { type: 'extract', query, selector },\r\n interruptId,\r\n });\r\n \r\n if (!result.success) {\r\n return `Extract failed: ${result.error}`;\r\n }\r\n \r\n return result.extractedContent || 'No content extracted.';\r\n },\r\n {\r\n name: 'browser_extract',\r\n description: `Extract text content from the page.\r\n\r\nExample: browser_extract({ query: \"price\" })`,\r\n schema: BrowserExtractSchema,\r\n }\r\n );\r\n}\r\n\r\n/**\r\n * Hover tool\r\n */\r\nexport function createBrowserHoverInterruptTool(): DynamicStructuredTool<typeof BrowserHoverSchema> {\r\n return tool<typeof BrowserHoverSchema>(\r\n async ({ index }) => {\r\n const interruptId = generateInterruptId();\r\n \r\n const result = interrupt<BrowserInterrupt, BrowserActionResult>({\r\n type: 'browser_interrupt',\r\n action: { type: 'hover', index },\r\n interruptId,\r\n });\r\n \r\n if (!result.success) {\r\n return `Hover failed: ${result.error}`;\r\n }\r\n \r\n return `Successfully hovered over element [${index}]`;\r\n },\r\n {\r\n name: 'browser_hover',\r\n description: `Hover over an element to reveal tooltips or menus.`,\r\n schema: BrowserHoverSchema,\r\n }\r\n );\r\n}\r\n\r\n/**\r\n * Wait tool\r\n */\r\nexport function createBrowserWaitInterruptTool(): DynamicStructuredTool<typeof BrowserWaitSchema> {\r\n return tool<typeof BrowserWaitSchema>(\r\n async ({ duration, reason }) => {\r\n const interruptId = generateInterruptId();\r\n \r\n const result = interrupt<BrowserInterrupt, BrowserActionResult>({\r\n type: 'browser_interrupt',\r\n action: { type: 'wait', duration, reason },\r\n interruptId,\r\n });\r\n \r\n if (!result.success) {\r\n return `Wait failed: ${result.error}`;\r\n }\r\n \r\n return `Waited ${duration || 1000}ms`;\r\n },\r\n {\r\n name: 'browser_wait',\r\n description: `Wait for a duration before next action.`,\r\n schema: BrowserWaitSchema,\r\n }\r\n );\r\n}\r\n\r\n/**\r\n * Go back tool\r\n */\r\nexport function createBrowserGoBackInterruptTool(): DynamicStructuredTool<typeof BrowserGoBackSchema> {\r\n return tool<typeof BrowserGoBackSchema>(\r\n async ({ reason }) => {\r\n const interruptId = generateInterruptId();\r\n \r\n const result = interrupt<BrowserInterrupt, BrowserActionResult>({\r\n type: 'browser_interrupt',\r\n action: { type: 'back', reason },\r\n interruptId,\r\n });\r\n \r\n if (!result.success) {\r\n return `Go back failed: ${result.error}`;\r\n }\r\n \r\n if (result.pageState) {\r\n return `Went back to: ${result.pageState.url}\r\n\r\n## Interactive Elements (${result.pageState.elementCount} total)\r\n${result.pageState.elementList}`;\r\n }\r\n \r\n return 'Successfully went back';\r\n },\r\n {\r\n name: 'browser_back',\r\n description: `Go back to the previous page in history.`,\r\n schema: BrowserGoBackSchema,\r\n }\r\n );\r\n}\r\n\r\n/**\r\n * Screenshot tool\r\n */\r\nexport function createBrowserScreenshotInterruptTool(): DynamicStructuredTool<typeof BrowserScreenshotSchema> {\r\n return tool<typeof BrowserScreenshotSchema>(\r\n async ({ fullPage }) => {\r\n const interruptId = generateInterruptId();\r\n \r\n const result = interrupt<BrowserInterrupt, BrowserActionResult>({\r\n type: 'browser_interrupt',\r\n action: { type: 'screenshot', fullPage },\r\n interruptId,\r\n });\r\n \r\n if (!result.success) {\r\n return `Screenshot failed: ${result.error}`;\r\n }\r\n \r\n if (result.screenshot) {\r\n return `Screenshot captured. [Image data available]`;\r\n }\r\n \r\n return 'Screenshot captured';\r\n },\r\n {\r\n name: 'browser_screenshot',\r\n description: `Capture a screenshot of the current page.`,\r\n schema: BrowserScreenshotSchema,\r\n }\r\n );\r\n}\r\n\r\n// ============================================\r\n// Tool Collection\r\n// ============================================\r\n\r\nexport const EBrowserInterruptTools = {\r\n CLICK: 'browser_click',\r\n TYPE: 'browser_type',\r\n NAVIGATE: 'browser_navigate',\r\n SCROLL: 'browser_scroll',\r\n EXTRACT: 'browser_extract',\r\n HOVER: 'browser_hover',\r\n WAIT: 'browser_wait',\r\n BACK: 'browser_back',\r\n SCREENSHOT: 'browser_screenshot',\r\n GET_PAGE_STATE: 'browser_get_page_state',\r\n} as const;\r\n\r\nexport const BROWSER_INTERRUPT_TOOL_NAMES = Object.values(EBrowserInterruptTools);\r\n\r\nexport type BrowserInterruptToolName = typeof BROWSER_INTERRUPT_TOOL_NAMES[number];\r\n\r\nexport function isBrowserInterruptToolCall(toolName: string): toolName is BrowserInterruptToolName {\r\n return BROWSER_INTERRUPT_TOOL_NAMES.includes(toolName as BrowserInterruptToolName);\r\n}\r\n\r\n/**\r\n * Create all interrupt-based browser tools\r\n * \r\n * Use these when the client is a browser extension that can:\r\n * 1. Detect browser_interrupt events in the stream\r\n * 2. Execute browser actions locally\r\n * 3. Send Command({ resume: result }) to continue the graph\r\n */\r\nexport function createBrowserInterruptTools(): DynamicStructuredTool[] {\r\n return [\r\n createBrowserNavigateInterruptTool(),\r\n createBrowserClickInterruptTool(),\r\n createBrowserTypeInterruptTool(),\r\n createBrowserGetPageStateInterruptTool(),\r\n createBrowserScrollInterruptTool(),\r\n createBrowserExtractInterruptTool(),\r\n createBrowserHoverInterruptTool(),\r\n createBrowserWaitInterruptTool(),\r\n createBrowserGoBackInterruptTool(),\r\n createBrowserScreenshotInterruptTool(),\r\n ];\r\n}\r\n\r\n/**\r\n * Check if an interrupt is a browser interrupt\r\n */\r\nexport function isBrowserInterrupt(value: unknown): value is BrowserInterrupt {\r\n return (\r\n typeof value === 'object' &&\r\n value !== null &&\r\n (value as BrowserInterrupt).type === 'browser_interrupt'\r\n );\r\n}\r\n"],"names":[],"mappings":";;;;AAAA;;;;;;;;;;;;;;;;;AAiBG;AA8DH;AACA;AACA;AAEA,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;AAClC,IAAA,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CACnC,iEAAiE,CAClE;AACD,IAAA,WAAW,EAAE,CAAC,CAAC,MAAM,CAAC;QACpB,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iCAAiC,CAAC;QACzD,CAAC,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,iCAAiC,CAAC;AAC1D,KAAA,CAAC,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;AAC9D,IAAA,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC5E,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;IACjC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,6CAA6C,CAAC;IACzE,IAAI,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,kBAAkB,CAAC;AAC7C,IAAA,KAAK,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,8BAA8B,CAAC;AACtE,IAAA,UAAU,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,0BAA0B,CAAC;AACxE,CAAA,CAAC;AAEF,MAAM,qBAAqB,GAAG,CAAC,CAAC,MAAM,CAAC;IACrC,GAAG,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,wBAAwB,CAAC;AAClD,IAAA,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,4BAA4B,CAAC;AACrE,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;AACnC,IAAA,SAAS,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC,CAAC,QAAQ,CAAC,kBAAkB,CAAC;AAC/E,IAAA,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,iCAAiC,CAAC;AAC1E,CAAA,CAAC;AAEF,MAAM,oBAAoB,GAAG,CAAC,CAAC,MAAM,CAAC;AACpC,IAAA,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,mCAAmC,CAAC;AAC1E,IAAA,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,8BAA8B,CAAC;AACzE,CAAA,CAAC;AAEF,MAAM,kBAAkB,GAAG,CAAC,CAAC,MAAM,CAAC;IAClC,KAAK,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,6BAA6B,CAAC;AAC1D,CAAA,CAAC;AAEF,MAAM,iBAAiB,GAAG,CAAC,CAAC,MAAM,CAAC;AACjC,IAAA,QAAQ,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,sCAAsC,CAAC;AAChF,IAAA,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,aAAa,CAAC;AACtD,CAAA,CAAC;AAEF,MAAM,mBAAmB,GAAG,CAAC,CAAC,MAAM,CAAC;AACnC,IAAA,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,gBAAgB,CAAC;AACzD,CAAA,CAAC;AAEF,MAAM,uBAAuB,GAAG,CAAC,CAAC,MAAM,CAAC;AACvC,IAAA,QAAQ,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,+BAA+B,CAAC;AAC3E,CAAA,CAAC;AAEF,MAAM,yBAAyB,GAAG,CAAC,CAAC,MAAM,CAAC;AACzC,IAAA,MAAM,EAAE,CAAC,CAAC,MAAM,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,wBAAwB,CAAC;AACjE,CAAA,CAAC;AAEF;AACA;AACA;AAEA,IAAI,gBAAgB,GAAG,CAAC;AACxB,SAAS,mBAAmB,GAAA;IAC1B,OAAO,CAAA,QAAA,EAAW,IAAI,CAAC,GAAG,EAAE,CAAI,CAAA,EAAA,EAAE,gBAAgB,CAAA,CAAE;AACtD;AAEA;AACA;AACA;AAEA;;AAEG;SACa,kCAAkC,GAAA;IAChD,OAAO,IAAI,CACT,OAAO,EAAE,GAAG,EAAE,MAAM,EAAE,KAAI;AACxB,QAAA,MAAM,WAAW,GAAG,mBAAmB,EAAE;;QAGzC,MAAM,MAAM,GAAG,SAAS,CAAwC;AAC9D,YAAA,IAAI,EAAE,mBAAmB;YACzB,MAAM,EAAE,EAAE,IAAI,EAAE,UAAU,EAAE,GAAG,EAAE,MAAM,EAAE;YACzC,WAAW;AACZ,SAAA,CAAC;;AAGF,QAAA,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;AACnB,YAAA,OAAO,CAAsB,mBAAA,EAAA,MAAM,CAAC,KAAK,EAAE;;AAG7C,QAAA,IAAI,MAAM,CAAC,SAAS,EAAE;YACpB,OAAO,CAAA,0BAAA,EAA6B,MAAM,CAAC,SAAS,CAAC,GAAG,CAAA,EAAA,EAAK,MAAM,CAAC,SAAS,CAAC,KAAK,CAAA;;2BAEhE,MAAM,CAAC,SAAS,CAAC,YAAY,CAAA;EACtD,MAAM,CAAC,SAAS,CAAC,WAAW,CAAA;;AAElB,UAAA,EAAA,MAAM,CAAC,SAAS,CAAC,cAAc,CAAA,YAAA,EAAe,MAAM,CAAC,SAAS,CAAC,cAAc,IAAI,MAAM,CAAC,SAAS,CAAC,YAAY,IAAI;;QAGxH,OAAO,CAAA,0BAAA,EAA6B,GAAG,CAAA,CAAE;AAC3C,KAAC,EACD;AACE,QAAA,IAAI,EAAE,kBAAkB;AACxB,QAAA,WAAW,EAAE,CAAA;;;AAGoE,sFAAA,CAAA;AACjF,QAAA,MAAM,EAAE,qBAAqB;AAC9B,KAAA,CACF;AACH;AAEA;;AAEG;SACa,+BAA+B,GAAA;AAC7C,IAAA,OAAO,IAAI,CACT,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE,KAAI;AACvC,QAAA,MAAM,WAAW,GAAG,mBAAmB,EAAE;QAEzC,MAAM,MAAM,GAAG,SAAS,CAAwC;AAC9D,YAAA,IAAI,EAAE,mBAAmB;YACzB,MAAM,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,EAAE;YACrD,WAAW;AACZ,SAAA,CAAC;AAEF,QAAA,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;AACnB,YAAA,OAAO,CAAiB,cAAA,EAAA,MAAM,CAAC,KAAK,EAAE;;;AAIxC,QAAA,IAAI,MAAM,CAAC,SAAS,EAAE;YACpB,OAAO,CAAA;;OAER,MAAM,CAAC,SAAS,CAAC,GAAG,CAAA;SAClB,MAAM,CAAC,SAAS,CAAC,KAAK,CAAA;;2BAEJ,MAAM,CAAC,SAAS,CAAC,YAAY,CAAA;AACtD,EAAA,MAAM,CAAC,SAAS,CAAC,WAAW,EAAE;;AAG1B,QAAA,OAAO,CAA+B,4BAAA,EAAA,KAAK,KAAK,SAAS,GAAG,CAAK,EAAA,EAAA,KAAK,GAAG,GAAG,EAAE,EAAE;AAClF,KAAC,EACD;AACE,QAAA,IAAI,EAAE,eAAe;AACrB,QAAA,WAAW,EAAE,CAAA;;;AAGuC,yDAAA,CAAA;AACpD,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CACF;AACH;AAEA;;AAEG;SACa,8BAA8B,GAAA;AAC5C,IAAA,OAAO,IAAI,CACT,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,UAAU,EAAE,KAAI;AAC3C,QAAA,MAAM,WAAW,GAAG,mBAAmB,EAAE;QAEzC,MAAM,MAAM,GAAG,SAAS,CAAwC;AAC9D,YAAA,IAAI,EAAE,mBAAmB;AACzB,YAAA,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,UAAU,EAAE;YACxD,WAAW;AACZ,SAAA,CAAC;AAEF,QAAA,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;AACnB,YAAA,OAAO,CAAgB,aAAA,EAAA,MAAM,CAAC,KAAK,EAAE;;;AAIvC,QAAA,IAAI,MAAM,CAAC,SAAS,EAAE;YACpB,OAAO,CAAA,OAAA,EAAU,IAAI,CAAA,CAAA,EAAI,UAAU,GAAG,oBAAoB,GAAG,EAAE,CAAA;;OAEhE,MAAM,CAAC,SAAS,CAAC,GAAG,CAAA;SAClB,MAAM,CAAC,SAAS,CAAC,KAAK,CAAA;;2BAEJ,MAAM,CAAC,SAAS,CAAC,YAAY,CAAA;AACtD,EAAA,MAAM,CAAC,SAAS,CAAC,WAAW,EAAE;;AAG1B,QAAA,OAAO,CAAuB,oBAAA,EAAA,IAAI,CAAmB,gBAAA,EAAA,KAAK,IAAI,UAAU,GAAG,oBAAoB,GAAG,EAAE,EAAE;AACxG,KAAC,EACD;AACE,QAAA,IAAI,EAAE,cAAc;AACpB,QAAA,WAAW,EAAE,CAAA;;;;;AAK4D,8EAAA,CAAA;AACzE,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF;AACH;AAEA;;AAEG;SACa,sCAAsC,GAAA;IACpD,OAAO,IAAI,CACT,OAAO,EAAE,MAAM,EAAE,KAAI;AACnB,QAAA,MAAM,WAAW,GAAG,mBAAmB,EAAE;QAEzC,MAAM,MAAM,GAAG,SAAS,CAAwC;AAC9D,YAAA,IAAI,EAAE,mBAAmB;AACzB,YAAA,MAAM,EAAE,EAAE,IAAI,EAAE,gBAAgB,EAAE,MAAM,EAAE;YAC1C,WAAW;AACZ,SAAA,CAAC;AAEF,QAAA,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;AACnB,YAAA,OAAO,CAA6B,0BAAA,EAAA,MAAM,CAAC,KAAK,EAAE;;AAGpD,QAAA,IAAI,MAAM,CAAC,SAAS,EAAE;YACpB,OAAO,CAAA;OACR,MAAM,CAAC,SAAS,CAAC,GAAG,CAAA;SAClB,MAAM,CAAC,SAAS,CAAC,KAAK,CAAA;;2BAEJ,MAAM,CAAC,SAAS,CAAC,YAAY,CAAA;EACtD,MAAM,CAAC,SAAS,CAAC,WAAW,CAAA;;YAElB,MAAM,CAAC,SAAS,CAAC,cAAc,CAAA;UACjC,MAAM,CAAC,SAAS,CAAC,cAAc,CAAA,CAAA,EAAI,MAAM,CAAC,SAAS,CAAC,YAAY,CAAA,EAAA,CAAI;;AAGxE,QAAA,OAAO,4CAA4C;AACrD,KAAC,EACD;AACE,QAAA,IAAI,EAAE,wBAAwB;AAC9B,QAAA,WAAW,EAAE,CAAA;;;AAG8C,gEAAA,CAAA;AAC3D,QAAA,MAAM,EAAE,yBAAyB;AAClC,KAAA,CACF;AACH;AAEA;;AAEG;SACa,gCAAgC,GAAA;IAC9C,OAAO,IAAI,CACT,OAAO,EAAE,SAAS,EAAE,MAAM,EAAE,KAAI;AAC9B,QAAA,MAAM,WAAW,GAAG,mBAAmB,EAAE;QAEzC,MAAM,MAAM,GAAG,SAAS,CAAwC;AAC9D,YAAA,IAAI,EAAE,mBAAmB;YACzB,MAAM,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,EAAE;YAC7C,WAAW;AACZ,SAAA,CAAC;AAEF,QAAA,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;AACnB,YAAA,OAAO,CAAkB,eAAA,EAAA,MAAM,CAAC,KAAK,EAAE;;AAGzC,QAAA,IAAI,MAAM,CAAC,SAAS,EAAE;AACpB,YAAA,OAAO,YAAY,SAAS,CAAA;;2BAET,MAAM,CAAC,SAAS,CAAC,YAAY,CAAA;EACtD,MAAM,CAAC,SAAS,CAAC,WAAW,CAAA;;UAEpB,MAAM,CAAC,SAAS,CAAC,cAAc,CAAA,CAAA,EAAI,MAAM,CAAC,SAAS,CAAC,YAAY,CAAA,EAAA,CAAI;;AAGxE,QAAA,OAAO,CAAyB,sBAAA,EAAA,SAAS,CAAG,EAAA,MAAM,GAAG,CAAI,CAAA,EAAA,MAAM,IAAI,GAAG,EAAE,EAAE;AAC5E,KAAC,EACD;AACE,QAAA,IAAI,EAAE,gBAAgB;AACtB,QAAA,WAAW,EAAE,CAAA;;AAEyC,2DAAA,CAAA;AACtD,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CACF;AACH;AAEA;;AAEG;SACa,iCAAiC,GAAA;IAC/C,OAAO,IAAI,CACT,OAAO,EAAE,KAAK,EAAE,QAAQ,EAAE,KAAI;AAC5B,QAAA,MAAM,WAAW,GAAG,mBAAmB,EAAE;QAEzC,MAAM,MAAM,GAAG,SAAS,CAAwC;AAC9D,YAAA,IAAI,EAAE,mBAAmB;YACzB,MAAM,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,KAAK,EAAE,QAAQ,EAAE;YAC5C,WAAW;AACZ,SAAA,CAAC;AAEF,QAAA,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;AACnB,YAAA,OAAO,CAAmB,gBAAA,EAAA,MAAM,CAAC,KAAK,EAAE;;AAG1C,QAAA,OAAO,MAAM,CAAC,gBAAgB,IAAI,uBAAuB;AAC3D,KAAC,EACD;AACE,QAAA,IAAI,EAAE,iBAAiB;AACvB,QAAA,WAAW,EAAE,CAAA;;AAE0B,4CAAA,CAAA;AACvC,QAAA,MAAM,EAAE,oBAAoB;AAC7B,KAAA,CACF;AACH;AAEA;;AAEG;SACa,+BAA+B,GAAA;IAC7C,OAAO,IAAI,CACT,OAAO,EAAE,KAAK,EAAE,KAAI;AAClB,QAAA,MAAM,WAAW,GAAG,mBAAmB,EAAE;QAEzC,MAAM,MAAM,GAAG,SAAS,CAAwC;AAC9D,YAAA,IAAI,EAAE,mBAAmB;AACzB,YAAA,MAAM,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE;YAChC,WAAW;AACZ,SAAA,CAAC;AAEF,QAAA,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;AACnB,YAAA,OAAO,CAAiB,cAAA,EAAA,MAAM,CAAC,KAAK,EAAE;;QAGxC,OAAO,CAAA,mCAAA,EAAsC,KAAK,CAAA,CAAA,CAAG;AACvD,KAAC,EACD;AACE,QAAA,IAAI,EAAE,eAAe;AACrB,QAAA,WAAW,EAAE,CAAoD,kDAAA,CAAA;AACjE,QAAA,MAAM,EAAE,kBAAkB;AAC3B,KAAA,CACF;AACH;AAEA;;AAEG;SACa,8BAA8B,GAAA;IAC5C,OAAO,IAAI,CACT,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,KAAI;AAC7B,QAAA,MAAM,WAAW,GAAG,mBAAmB,EAAE;QAEzC,MAAM,MAAM,GAAG,SAAS,CAAwC;AAC9D,YAAA,IAAI,EAAE,mBAAmB;YACzB,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE;YAC1C,WAAW;AACZ,SAAA,CAAC;AAEF,QAAA,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;AACnB,YAAA,OAAO,CAAgB,aAAA,EAAA,MAAM,CAAC,KAAK,EAAE;;AAGvC,QAAA,OAAO,CAAU,OAAA,EAAA,QAAQ,IAAI,IAAI,IAAI;AACvC,KAAC,EACD;AACE,QAAA,IAAI,EAAE,cAAc;AACpB,QAAA,WAAW,EAAE,CAAyC,uCAAA,CAAA;AACtD,QAAA,MAAM,EAAE,iBAAiB;AAC1B,KAAA,CACF;AACH;AAEA;;AAEG;SACa,gCAAgC,GAAA;IAC9C,OAAO,IAAI,CACT,OAAO,EAAE,MAAM,EAAE,KAAI;AACnB,QAAA,MAAM,WAAW,GAAG,mBAAmB,EAAE;QAEzC,MAAM,MAAM,GAAG,SAAS,CAAwC;AAC9D,YAAA,IAAI,EAAE,mBAAmB;AACzB,YAAA,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE;YAChC,WAAW;AACZ,SAAA,CAAC;AAEF,QAAA,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;AACnB,YAAA,OAAO,CAAmB,gBAAA,EAAA,MAAM,CAAC,KAAK,EAAE;;AAG1C,QAAA,IAAI,MAAM,CAAC,SAAS,EAAE;AACpB,YAAA,OAAO,CAAiB,cAAA,EAAA,MAAM,CAAC,SAAS,CAAC,GAAG,CAAA;;2BAEzB,MAAM,CAAC,SAAS,CAAC,YAAY,CAAA;AACtD,EAAA,MAAM,CAAC,SAAS,CAAC,WAAW,EAAE;;AAG1B,QAAA,OAAO,wBAAwB;AACjC,KAAC,EACD;AACE,QAAA,IAAI,EAAE,cAAc;AACpB,QAAA,WAAW,EAAE,CAA0C,wCAAA,CAAA;AACvD,QAAA,MAAM,EAAE,mBAAmB;AAC5B,KAAA,CACF;AACH;AAEA;;AAEG;SACa,oCAAoC,GAAA;IAClD,OAAO,IAAI,CACT,OAAO,EAAE,QAAQ,EAAE,KAAI;AACrB,QAAA,MAAM,WAAW,GAAG,mBAAmB,EAAE;QAEzC,MAAM,MAAM,GAAG,SAAS,CAAwC;AAC9D,YAAA,IAAI,EAAE,mBAAmB;AACzB,YAAA,MAAM,EAAE,EAAE,IAAI,EAAE,YAAY,EAAE,QAAQ,EAAE;YACxC,WAAW;AACZ,SAAA,CAAC;AAEF,QAAA,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE;AACnB,YAAA,OAAO,CAAsB,mBAAA,EAAA,MAAM,CAAC,KAAK,EAAE;;AAG7C,QAAA,IAAI,MAAM,CAAC,UAAU,EAAE;AACrB,YAAA,OAAO,6CAA6C;;AAGtD,QAAA,OAAO,qBAAqB;AAC9B,KAAC,EACD;AACE,QAAA,IAAI,EAAE,oBAAoB;AAC1B,QAAA,WAAW,EAAE,CAA2C,yCAAA,CAAA;AACxD,QAAA,MAAM,EAAE,uBAAuB;AAChC,KAAA,CACF;AACH;AAEA;AACA;AACA;AAEa,MAAA,sBAAsB,GAAG;AACpC,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,QAAQ,EAAE,kBAAkB;AAC5B,IAAA,MAAM,EAAE,gBAAgB;AACxB,IAAA,OAAO,EAAE,iBAAiB;AAC1B,IAAA,KAAK,EAAE,eAAe;AACtB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,IAAI,EAAE,cAAc;AACpB,IAAA,UAAU,EAAE,oBAAoB;AAChC,IAAA,cAAc,EAAE,wBAAwB;;AAG7B,MAAA,4BAA4B,GAAG,MAAM,CAAC,MAAM,CAAC,sBAAsB;AAI1E,SAAU,0BAA0B,CAAC,QAAgB,EAAA;AACzD,IAAA,OAAO,4BAA4B,CAAC,QAAQ,CAAC,QAAoC,CAAC;AACpF;AAEA;;;;;;;AAOG;SACa,2BAA2B,GAAA;IACzC,OAAO;AACL,QAAA,kCAAkC,EAAE;AACpC,QAAA,+BAA+B,EAAE;AACjC,QAAA,8BAA8B,EAAE;AAChC,QAAA,sCAAsC,EAAE;AACxC,QAAA,gCAAgC,EAAE;AAClC,QAAA,iCAAiC,EAAE;AACnC,QAAA,+BAA+B,EAAE;AACjC,QAAA,8BAA8B,EAAE;AAChC,QAAA,gCAAgC,EAAE;AAClC,QAAA,oCAAoC,EAAE;KACvC;AACH;AAEA;;AAEG;AACG,SAAU,kBAAkB,CAAC,KAAc,EAAA;AAC/C,IAAA,QACE,OAAO,KAAK,KAAK,QAAQ;AACzB,QAAA,KAAK,KAAK,IAAI;AACb,QAAA,KAA0B,CAAC,IAAI,KAAK,mBAAmB;AAE5D;;;;"}
|
|
@@ -383,23 +383,28 @@ function createBrowserGetPageStateTool() {
|
|
|
383
383
|
requiresBrowserExecution: true,
|
|
384
384
|
// Special flag: extension should inject fresh context into the conversation
|
|
385
385
|
requiresContextRefresh: true,
|
|
386
|
+
// IMPORTANT: Tell the agent to wait
|
|
387
|
+
message: 'Page state is being captured by the browser extension. The element list will be provided in the next message. DO NOT proceed with click or type actions until you receive the actual element list.',
|
|
386
388
|
});
|
|
387
389
|
}, {
|
|
388
390
|
name: EBrowserTools.GET_PAGE_STATE,
|
|
389
391
|
description: `Get fresh page state showing current interactive elements.
|
|
390
392
|
|
|
391
|
-
**CRITICAL**:
|
|
392
|
-
- browser_navigate (to see elements on the new page)
|
|
393
|
-
- browser_click (if it caused navigation or page changes)
|
|
394
|
-
- Any action that might have changed the visible elements
|
|
393
|
+
**CRITICAL WORKFLOW**: After calling this tool, you MUST STOP and WAIT. The browser extension will capture the page state and return the element list. DO NOT plan any browser_click or browser_type actions in the same response - you don't have the element indices yet!
|
|
395
394
|
|
|
396
|
-
|
|
397
|
-
|
|
395
|
+
**When to use**:
|
|
396
|
+
- After browser_navigate (to see elements on the new page)
|
|
397
|
+
- After browser_click (if it caused navigation or page changes)
|
|
398
|
+
- Any time you need to see what elements are currently on the page
|
|
398
399
|
|
|
399
|
-
**
|
|
400
|
-
1.
|
|
401
|
-
2.
|
|
402
|
-
3.
|
|
400
|
+
**IMPORTANT**: This tool captures the page state asynchronously. The actual element list will be provided AFTER this tool completes. You should:
|
|
401
|
+
1. Call this tool
|
|
402
|
+
2. STOP and wait for the response with the element list
|
|
403
|
+
3. In your NEXT response, use the element indices for click/type actions
|
|
404
|
+
|
|
405
|
+
Example workflow:
|
|
406
|
+
- Turn 1: browser_navigate to amazon.com, then browser_get_page_state
|
|
407
|
+
- Turn 2: (After receiving element list) browser_type with the correct search input index
|
|
403
408
|
|
|
404
409
|
Example: browser_get_page_state({ reason: "to see elements after navigation" })`,
|
|
405
410
|
schema: BrowserGetPageStateSchema,
|