illuma-agents 1.0.20 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +3 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +18 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/run.cjs +137 -3
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/tools/BrowserInterruptTools.cjs +431 -0
- package/dist/cjs/tools/BrowserInterruptTools.cjs.map +1 -0
- package/dist/cjs/tools/BrowserTools.cjs +15 -10
- package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +3 -3
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +2 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/run.mjs +136 -4
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/tools/BrowserInterruptTools.mjs +415 -0
- package/dist/esm/tools/BrowserInterruptTools.mjs.map +1 -0
- package/dist/esm/tools/BrowserTools.mjs +15 -10
- package/dist/esm/tools/BrowserTools.mjs.map +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/run.d.ts +47 -0
- package/dist/types/tools/BrowserInterruptTools.d.ts +282 -0
- package/dist/types/tools/BrowserTools.d.ts +2 -2
- package/dist/types/types/run.d.ts +8 -0
- package/package.json +1 -1
- package/src/graphs/Graph.ts +3 -3
- package/src/index.ts +1 -0
- package/src/run.ts +176 -3
- package/src/specs/browser-interrupt-tools.test.ts +235 -0
- package/src/tools/BrowserInterruptTools.ts +571 -0
- package/src/tools/BrowserTools.test.ts +41 -6
- package/src/tools/BrowserTools.ts +15 -10
- package/src/types/run.ts +8 -0
|
@@ -0,0 +1,571 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser Automation Tools with LangGraph Interrupt Support
|
|
3
|
+
*
|
|
4
|
+
* These tools use LangGraph's interrupt() mechanism to pause execution
|
|
5
|
+
* and wait for the browser extension to execute actions and return results.
|
|
6
|
+
*
|
|
7
|
+
* Flow:
|
|
8
|
+
* 1. Agent calls browser tool (e.g., browser_navigate)
|
|
9
|
+
* 2. Tool calls interrupt() with action details
|
|
10
|
+
* 3. Graph pauses and returns interrupt to client
|
|
11
|
+
* 4. Extension executes action in browser
|
|
12
|
+
* 5. Extension sends resume Command with actual result
|
|
13
|
+
* 6. Graph continues with real browser data
|
|
14
|
+
*
|
|
15
|
+
* This enables proper chain-of-thought with browser context because
|
|
16
|
+
* the agent receives ACTUAL results (page elements, screenshots, etc.)
|
|
17
|
+
* instead of placeholder acknowledgments.
|
|
18
|
+
*/
|
|
19
|
+
|
|
20
|
+
import { z } from 'zod';
|
|
21
|
+
import { interrupt } from '@langchain/langgraph';
|
|
22
|
+
import { tool, DynamicStructuredTool } from '@langchain/core/tools';
|
|
23
|
+
|
|
24
|
+
// ============================================
|
|
25
|
+
// Browser Interrupt Types
|
|
26
|
+
// ============================================
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Interrupt payload sent to the extension
|
|
30
|
+
*/
|
|
31
|
+
export interface BrowserInterrupt {
|
|
32
|
+
/** Type of browser action to execute */
|
|
33
|
+
type: 'browser_interrupt';
|
|
34
|
+
/** The specific action to perform */
|
|
35
|
+
action: BrowserAction;
|
|
36
|
+
/** Unique ID for this interrupt (for matching resume) */
|
|
37
|
+
interruptId: string;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Browser action types
|
|
42
|
+
*/
|
|
43
|
+
export type BrowserAction =
|
|
44
|
+
| { type: 'navigate'; url: string; reason?: string }
|
|
45
|
+
| { type: 'click'; index?: number; coordinates?: { x: number; y: number }; reason?: string }
|
|
46
|
+
| { type: 'type'; index: number; text: string; clear?: boolean; pressEnter?: boolean }
|
|
47
|
+
| { type: 'scroll'; direction: 'up' | 'down' | 'left' | 'right'; amount?: number }
|
|
48
|
+
| { type: 'extract'; query?: string; selector?: string }
|
|
49
|
+
| { type: 'hover'; index: number }
|
|
50
|
+
| { type: 'wait'; duration?: number; reason?: string }
|
|
51
|
+
| { type: 'back'; reason?: string }
|
|
52
|
+
| { type: 'screenshot'; fullPage?: boolean }
|
|
53
|
+
| { type: 'get_page_state'; reason?: string };
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Result returned from extension after executing action
|
|
57
|
+
*/
|
|
58
|
+
export interface BrowserActionResult {
|
|
59
|
+
success: boolean;
|
|
60
|
+
error?: string;
|
|
61
|
+
/** Page state after action (for navigate, click, get_page_state) */
|
|
62
|
+
pageState?: {
|
|
63
|
+
url: string;
|
|
64
|
+
title: string;
|
|
65
|
+
/** Formatted element list for LLM */
|
|
66
|
+
elementList: string;
|
|
67
|
+
elementCount: number;
|
|
68
|
+
scrollPosition: number;
|
|
69
|
+
scrollHeight: number;
|
|
70
|
+
viewportHeight: number;
|
|
71
|
+
};
|
|
72
|
+
/** Screenshot data URL */
|
|
73
|
+
screenshot?: string;
|
|
74
|
+
/** Extracted content */
|
|
75
|
+
extractedContent?: string;
|
|
76
|
+
/** Any additional data */
|
|
77
|
+
data?: unknown;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
// ============================================
|
|
81
|
+
// Tool Schemas (same as BrowserTools.ts)
|
|
82
|
+
// ============================================
|
|
83
|
+
|
|
84
|
+
const BrowserClickSchema = z.object({
|
|
85
|
+
index: z.number().optional().describe(
|
|
86
|
+
'The index of the element to click, as shown in the page context'
|
|
87
|
+
),
|
|
88
|
+
coordinates: z.object({
|
|
89
|
+
x: z.number().describe('X coordinate in viewport pixels'),
|
|
90
|
+
y: z.number().describe('Y coordinate in viewport pixels'),
|
|
91
|
+
}).optional().describe('Coordinates for clicking by position'),
|
|
92
|
+
reason: z.string().optional().describe('Why you are clicking this element'),
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
const BrowserTypeSchema = z.object({
|
|
96
|
+
index: z.number().describe('The index of the input element to type into'),
|
|
97
|
+
text: z.string().describe('The text to type'),
|
|
98
|
+
clear: z.boolean().optional().describe('Clear existing content first'),
|
|
99
|
+
pressEnter: z.boolean().optional().describe('Press Enter after typing'),
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
const BrowserNavigateSchema = z.object({
|
|
103
|
+
url: z.string().describe('The URL to navigate to'),
|
|
104
|
+
reason: z.string().optional().describe('Why navigating to this URL'),
|
|
105
|
+
});
|
|
106
|
+
|
|
107
|
+
const BrowserScrollSchema = z.object({
|
|
108
|
+
direction: z.enum(['up', 'down', 'left', 'right']).describe('Scroll direction'),
|
|
109
|
+
amount: z.number().optional().describe('Pixels to scroll (default: 500)'),
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
const BrowserExtractSchema = z.object({
|
|
113
|
+
query: z.string().optional().describe('Query to filter extracted content'),
|
|
114
|
+
selector: z.string().optional().describe('CSS selector to extract from'),
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
const BrowserHoverSchema = z.object({
|
|
118
|
+
index: z.number().describe('Element index to hover over'),
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
const BrowserWaitSchema = z.object({
|
|
122
|
+
duration: z.number().optional().describe('Milliseconds to wait (default: 1000)'),
|
|
123
|
+
reason: z.string().optional().describe('Why waiting'),
|
|
124
|
+
});
|
|
125
|
+
|
|
126
|
+
const BrowserGoBackSchema = z.object({
|
|
127
|
+
reason: z.string().optional().describe('Why going back'),
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
const BrowserScreenshotSchema = z.object({
|
|
131
|
+
fullPage: z.boolean().optional().describe('Capture full page vs viewport'),
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
const BrowserGetPageStateSchema = z.object({
|
|
135
|
+
reason: z.string().optional().describe('Why getting page state'),
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
// ============================================
|
|
139
|
+
// Helper to generate interrupt IDs
|
|
140
|
+
// ============================================
|
|
141
|
+
|
|
142
|
+
let interruptCounter = 0;
|
|
143
|
+
function generateInterruptId(): string {
|
|
144
|
+
return `browser_${Date.now()}_${++interruptCounter}`;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// ============================================
|
|
148
|
+
// Interrupt-based Tool Implementations
|
|
149
|
+
// ============================================
|
|
150
|
+
|
|
151
|
+
/**
|
|
152
|
+
* Navigate tool - navigates to URL and returns page state
|
|
153
|
+
*/
|
|
154
|
+
export function createBrowserNavigateInterruptTool(): DynamicStructuredTool<typeof BrowserNavigateSchema> {
|
|
155
|
+
return tool<typeof BrowserNavigateSchema>(
|
|
156
|
+
async ({ url, reason }) => {
|
|
157
|
+
const interruptId = generateInterruptId();
|
|
158
|
+
|
|
159
|
+
// This will pause the graph and wait for extension to provide result
|
|
160
|
+
const result = interrupt<BrowserInterrupt, BrowserActionResult>({
|
|
161
|
+
type: 'browser_interrupt',
|
|
162
|
+
action: { type: 'navigate', url, reason },
|
|
163
|
+
interruptId,
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
// When resumed, result contains actual page state
|
|
167
|
+
if (!result.success) {
|
|
168
|
+
return `Navigation failed: ${result.error}`;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (result.pageState) {
|
|
172
|
+
return `Successfully navigated to ${result.pageState.url} (${result.pageState.title})
|
|
173
|
+
|
|
174
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
175
|
+
${result.pageState.elementList}
|
|
176
|
+
|
|
177
|
+
Viewport: ${result.pageState.viewportHeight}px, Scroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
return `Successfully navigated to ${url}`;
|
|
181
|
+
},
|
|
182
|
+
{
|
|
183
|
+
name: 'browser_navigate',
|
|
184
|
+
description: `Navigate to a URL. Returns the page state with interactive elements after navigation completes.
|
|
185
|
+
|
|
186
|
+
Example: browser_navigate({ url: "https://www.amazon.com" })
|
|
187
|
+
Returns: Page title, URL, and list of interactive elements with their [index] numbers.`,
|
|
188
|
+
schema: BrowserNavigateSchema,
|
|
189
|
+
}
|
|
190
|
+
);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
/**
|
|
194
|
+
* Click tool - clicks element and returns updated state
|
|
195
|
+
*/
|
|
196
|
+
export function createBrowserClickInterruptTool(): DynamicStructuredTool<typeof BrowserClickSchema> {
|
|
197
|
+
return tool<typeof BrowserClickSchema>(
|
|
198
|
+
async ({ index, coordinates, reason }) => {
|
|
199
|
+
const interruptId = generateInterruptId();
|
|
200
|
+
|
|
201
|
+
const result = interrupt<BrowserInterrupt, BrowserActionResult>({
|
|
202
|
+
type: 'browser_interrupt',
|
|
203
|
+
action: { type: 'click', index, coordinates, reason },
|
|
204
|
+
interruptId,
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
if (!result.success) {
|
|
208
|
+
return `Click failed: ${result.error}`;
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// If click caused navigation, return new page state
|
|
212
|
+
if (result.pageState) {
|
|
213
|
+
return `Clicked element. Page updated:
|
|
214
|
+
|
|
215
|
+
URL: ${result.pageState.url}
|
|
216
|
+
Title: ${result.pageState.title}
|
|
217
|
+
|
|
218
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
219
|
+
${result.pageState.elementList}`;
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
return `Successfully clicked element${index !== undefined ? ` [${index}]` : ''}`;
|
|
223
|
+
},
|
|
224
|
+
{
|
|
225
|
+
name: 'browser_click',
|
|
226
|
+
description: `Click an element by index or coordinates.
|
|
227
|
+
|
|
228
|
+
Use the [index] number from the interactive elements list.
|
|
229
|
+
Example: browser_click({ index: 5 }) to click element [5]`,
|
|
230
|
+
schema: BrowserClickSchema,
|
|
231
|
+
}
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* Type tool - types text into input field
|
|
237
|
+
*/
|
|
238
|
+
export function createBrowserTypeInterruptTool(): DynamicStructuredTool<typeof BrowserTypeSchema> {
|
|
239
|
+
return tool<typeof BrowserTypeSchema>(
|
|
240
|
+
async ({ index, text, clear, pressEnter }) => {
|
|
241
|
+
const interruptId = generateInterruptId();
|
|
242
|
+
|
|
243
|
+
const result = interrupt<BrowserInterrupt, BrowserActionResult>({
|
|
244
|
+
type: 'browser_interrupt',
|
|
245
|
+
action: { type: 'type', index, text, clear, pressEnter },
|
|
246
|
+
interruptId,
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
if (!result.success) {
|
|
250
|
+
return `Type failed: ${result.error}`;
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// If typing + enter caused navigation (e.g., search), return new state
|
|
254
|
+
if (result.pageState) {
|
|
255
|
+
return `Typed "${text}"${pressEnter ? ' and pressed Enter' : ''}. Page updated:
|
|
256
|
+
|
|
257
|
+
URL: ${result.pageState.url}
|
|
258
|
+
Title: ${result.pageState.title}
|
|
259
|
+
|
|
260
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
261
|
+
${result.pageState.elementList}`;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
return `Successfully typed "${text}" into element [${index}]${pressEnter ? ' and pressed Enter' : ''}`;
|
|
265
|
+
},
|
|
266
|
+
{
|
|
267
|
+
name: 'browser_type',
|
|
268
|
+
description: `Type text into an input field.
|
|
269
|
+
|
|
270
|
+
Use the [index] from interactive elements list. Look for <input> elements.
|
|
271
|
+
Set pressEnter: true to submit after typing (for search fields).
|
|
272
|
+
|
|
273
|
+
Example: browser_type({ index: 3, text: "laptop 16gb ram", pressEnter: true })`,
|
|
274
|
+
schema: BrowserTypeSchema,
|
|
275
|
+
}
|
|
276
|
+
);
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
/**
|
|
280
|
+
* Get page state tool - captures current page elements
|
|
281
|
+
*/
|
|
282
|
+
export function createBrowserGetPageStateInterruptTool(): DynamicStructuredTool<typeof BrowserGetPageStateSchema> {
|
|
283
|
+
return tool<typeof BrowserGetPageStateSchema>(
|
|
284
|
+
async ({ reason }) => {
|
|
285
|
+
const interruptId = generateInterruptId();
|
|
286
|
+
|
|
287
|
+
const result = interrupt<BrowserInterrupt, BrowserActionResult>({
|
|
288
|
+
type: 'browser_interrupt',
|
|
289
|
+
action: { type: 'get_page_state', reason },
|
|
290
|
+
interruptId,
|
|
291
|
+
});
|
|
292
|
+
|
|
293
|
+
if (!result.success) {
|
|
294
|
+
return `Failed to get page state: ${result.error}`;
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
if (result.pageState) {
|
|
298
|
+
return `## Current Page
|
|
299
|
+
URL: ${result.pageState.url}
|
|
300
|
+
Title: ${result.pageState.title}
|
|
301
|
+
|
|
302
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
303
|
+
${result.pageState.elementList}
|
|
304
|
+
|
|
305
|
+
Viewport: ${result.pageState.viewportHeight}px
|
|
306
|
+
Scroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
return 'Page state captured but no elements found.';
|
|
310
|
+
},
|
|
311
|
+
{
|
|
312
|
+
name: 'browser_get_page_state',
|
|
313
|
+
description: `Get the current page state with all interactive elements.
|
|
314
|
+
|
|
315
|
+
Returns the list of clickable/typeable elements with their [index] numbers.
|
|
316
|
+
Use this to see what elements are available on the current page.`,
|
|
317
|
+
schema: BrowserGetPageStateSchema,
|
|
318
|
+
}
|
|
319
|
+
);
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
/**
|
|
323
|
+
* Scroll tool
|
|
324
|
+
*/
|
|
325
|
+
export function createBrowserScrollInterruptTool(): DynamicStructuredTool<typeof BrowserScrollSchema> {
|
|
326
|
+
return tool<typeof BrowserScrollSchema>(
|
|
327
|
+
async ({ direction, amount }) => {
|
|
328
|
+
const interruptId = generateInterruptId();
|
|
329
|
+
|
|
330
|
+
const result = interrupt<BrowserInterrupt, BrowserActionResult>({
|
|
331
|
+
type: 'browser_interrupt',
|
|
332
|
+
action: { type: 'scroll', direction, amount },
|
|
333
|
+
interruptId,
|
|
334
|
+
});
|
|
335
|
+
|
|
336
|
+
if (!result.success) {
|
|
337
|
+
return `Scroll failed: ${result.error}`;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (result.pageState) {
|
|
341
|
+
return `Scrolled ${direction}. New elements visible:
|
|
342
|
+
|
|
343
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
344
|
+
${result.pageState.elementList}
|
|
345
|
+
|
|
346
|
+
Scroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
return `Successfully scrolled ${direction}${amount ? ` ${amount}px` : ''}`;
|
|
350
|
+
},
|
|
351
|
+
{
|
|
352
|
+
name: 'browser_scroll',
|
|
353
|
+
description: `Scroll the page in a direction.
|
|
354
|
+
|
|
355
|
+
Example: browser_scroll({ direction: "down", amount: 500 })`,
|
|
356
|
+
schema: BrowserScrollSchema,
|
|
357
|
+
}
|
|
358
|
+
);
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
/**
|
|
362
|
+
* Extract tool
|
|
363
|
+
*/
|
|
364
|
+
export function createBrowserExtractInterruptTool(): DynamicStructuredTool<typeof BrowserExtractSchema> {
|
|
365
|
+
return tool<typeof BrowserExtractSchema>(
|
|
366
|
+
async ({ query, selector }) => {
|
|
367
|
+
const interruptId = generateInterruptId();
|
|
368
|
+
|
|
369
|
+
const result = interrupt<BrowserInterrupt, BrowserActionResult>({
|
|
370
|
+
type: 'browser_interrupt',
|
|
371
|
+
action: { type: 'extract', query, selector },
|
|
372
|
+
interruptId,
|
|
373
|
+
});
|
|
374
|
+
|
|
375
|
+
if (!result.success) {
|
|
376
|
+
return `Extract failed: ${result.error}`;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
return result.extractedContent || 'No content extracted.';
|
|
380
|
+
},
|
|
381
|
+
{
|
|
382
|
+
name: 'browser_extract',
|
|
383
|
+
description: `Extract text content from the page.
|
|
384
|
+
|
|
385
|
+
Example: browser_extract({ query: "price" })`,
|
|
386
|
+
schema: BrowserExtractSchema,
|
|
387
|
+
}
|
|
388
|
+
);
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
/**
|
|
392
|
+
* Hover tool
|
|
393
|
+
*/
|
|
394
|
+
export function createBrowserHoverInterruptTool(): DynamicStructuredTool<typeof BrowserHoverSchema> {
|
|
395
|
+
return tool<typeof BrowserHoverSchema>(
|
|
396
|
+
async ({ index }) => {
|
|
397
|
+
const interruptId = generateInterruptId();
|
|
398
|
+
|
|
399
|
+
const result = interrupt<BrowserInterrupt, BrowserActionResult>({
|
|
400
|
+
type: 'browser_interrupt',
|
|
401
|
+
action: { type: 'hover', index },
|
|
402
|
+
interruptId,
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
if (!result.success) {
|
|
406
|
+
return `Hover failed: ${result.error}`;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
return `Successfully hovered over element [${index}]`;
|
|
410
|
+
},
|
|
411
|
+
{
|
|
412
|
+
name: 'browser_hover',
|
|
413
|
+
description: `Hover over an element to reveal tooltips or menus.`,
|
|
414
|
+
schema: BrowserHoverSchema,
|
|
415
|
+
}
|
|
416
|
+
);
|
|
417
|
+
}
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Wait tool
|
|
421
|
+
*/
|
|
422
|
+
export function createBrowserWaitInterruptTool(): DynamicStructuredTool<typeof BrowserWaitSchema> {
|
|
423
|
+
return tool<typeof BrowserWaitSchema>(
|
|
424
|
+
async ({ duration, reason }) => {
|
|
425
|
+
const interruptId = generateInterruptId();
|
|
426
|
+
|
|
427
|
+
const result = interrupt<BrowserInterrupt, BrowserActionResult>({
|
|
428
|
+
type: 'browser_interrupt',
|
|
429
|
+
action: { type: 'wait', duration, reason },
|
|
430
|
+
interruptId,
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
if (!result.success) {
|
|
434
|
+
return `Wait failed: ${result.error}`;
|
|
435
|
+
}
|
|
436
|
+
|
|
437
|
+
return `Waited ${duration || 1000}ms`;
|
|
438
|
+
},
|
|
439
|
+
{
|
|
440
|
+
name: 'browser_wait',
|
|
441
|
+
description: `Wait for a duration before next action.`,
|
|
442
|
+
schema: BrowserWaitSchema,
|
|
443
|
+
}
|
|
444
|
+
);
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
/**
|
|
448
|
+
* Go back tool
|
|
449
|
+
*/
|
|
450
|
+
export function createBrowserGoBackInterruptTool(): DynamicStructuredTool<typeof BrowserGoBackSchema> {
|
|
451
|
+
return tool<typeof BrowserGoBackSchema>(
|
|
452
|
+
async ({ reason }) => {
|
|
453
|
+
const interruptId = generateInterruptId();
|
|
454
|
+
|
|
455
|
+
const result = interrupt<BrowserInterrupt, BrowserActionResult>({
|
|
456
|
+
type: 'browser_interrupt',
|
|
457
|
+
action: { type: 'back', reason },
|
|
458
|
+
interruptId,
|
|
459
|
+
});
|
|
460
|
+
|
|
461
|
+
if (!result.success) {
|
|
462
|
+
return `Go back failed: ${result.error}`;
|
|
463
|
+
}
|
|
464
|
+
|
|
465
|
+
if (result.pageState) {
|
|
466
|
+
return `Went back to: ${result.pageState.url}
|
|
467
|
+
|
|
468
|
+
## Interactive Elements (${result.pageState.elementCount} total)
|
|
469
|
+
${result.pageState.elementList}`;
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
return 'Successfully went back';
|
|
473
|
+
},
|
|
474
|
+
{
|
|
475
|
+
name: 'browser_back',
|
|
476
|
+
description: `Go back to the previous page in history.`,
|
|
477
|
+
schema: BrowserGoBackSchema,
|
|
478
|
+
}
|
|
479
|
+
);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
/**
|
|
483
|
+
* Screenshot tool
|
|
484
|
+
*/
|
|
485
|
+
export function createBrowserScreenshotInterruptTool(): DynamicStructuredTool<typeof BrowserScreenshotSchema> {
|
|
486
|
+
return tool<typeof BrowserScreenshotSchema>(
|
|
487
|
+
async ({ fullPage }) => {
|
|
488
|
+
const interruptId = generateInterruptId();
|
|
489
|
+
|
|
490
|
+
const result = interrupt<BrowserInterrupt, BrowserActionResult>({
|
|
491
|
+
type: 'browser_interrupt',
|
|
492
|
+
action: { type: 'screenshot', fullPage },
|
|
493
|
+
interruptId,
|
|
494
|
+
});
|
|
495
|
+
|
|
496
|
+
if (!result.success) {
|
|
497
|
+
return `Screenshot failed: ${result.error}`;
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
if (result.screenshot) {
|
|
501
|
+
return `Screenshot captured. [Image data available]`;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
return 'Screenshot captured';
|
|
505
|
+
},
|
|
506
|
+
{
|
|
507
|
+
name: 'browser_screenshot',
|
|
508
|
+
description: `Capture a screenshot of the current page.`,
|
|
509
|
+
schema: BrowserScreenshotSchema,
|
|
510
|
+
}
|
|
511
|
+
);
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// ============================================
|
|
515
|
+
// Tool Collection
|
|
516
|
+
// ============================================
|
|
517
|
+
|
|
518
|
+
export const EBrowserInterruptTools = {
|
|
519
|
+
CLICK: 'browser_click',
|
|
520
|
+
TYPE: 'browser_type',
|
|
521
|
+
NAVIGATE: 'browser_navigate',
|
|
522
|
+
SCROLL: 'browser_scroll',
|
|
523
|
+
EXTRACT: 'browser_extract',
|
|
524
|
+
HOVER: 'browser_hover',
|
|
525
|
+
WAIT: 'browser_wait',
|
|
526
|
+
BACK: 'browser_back',
|
|
527
|
+
SCREENSHOT: 'browser_screenshot',
|
|
528
|
+
GET_PAGE_STATE: 'browser_get_page_state',
|
|
529
|
+
} as const;
|
|
530
|
+
|
|
531
|
+
export const BROWSER_INTERRUPT_TOOL_NAMES = Object.values(EBrowserInterruptTools);
|
|
532
|
+
|
|
533
|
+
export type BrowserInterruptToolName = typeof BROWSER_INTERRUPT_TOOL_NAMES[number];
|
|
534
|
+
|
|
535
|
+
export function isBrowserInterruptToolCall(toolName: string): toolName is BrowserInterruptToolName {
|
|
536
|
+
return BROWSER_INTERRUPT_TOOL_NAMES.includes(toolName as BrowserInterruptToolName);
|
|
537
|
+
}
|
|
538
|
+
|
|
539
|
+
/**
|
|
540
|
+
* Create all interrupt-based browser tools
|
|
541
|
+
*
|
|
542
|
+
* Use these when the client is a browser extension that can:
|
|
543
|
+
* 1. Detect browser_interrupt events in the stream
|
|
544
|
+
* 2. Execute browser actions locally
|
|
545
|
+
* 3. Send Command({ resume: result }) to continue the graph
|
|
546
|
+
*/
|
|
547
|
+
export function createBrowserInterruptTools(): DynamicStructuredTool[] {
|
|
548
|
+
return [
|
|
549
|
+
createBrowserNavigateInterruptTool(),
|
|
550
|
+
createBrowserClickInterruptTool(),
|
|
551
|
+
createBrowserTypeInterruptTool(),
|
|
552
|
+
createBrowserGetPageStateInterruptTool(),
|
|
553
|
+
createBrowserScrollInterruptTool(),
|
|
554
|
+
createBrowserExtractInterruptTool(),
|
|
555
|
+
createBrowserHoverInterruptTool(),
|
|
556
|
+
createBrowserWaitInterruptTool(),
|
|
557
|
+
createBrowserGoBackInterruptTool(),
|
|
558
|
+
createBrowserScreenshotInterruptTool(),
|
|
559
|
+
];
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
/**
|
|
563
|
+
* Check if an interrupt is a browser interrupt
|
|
564
|
+
*/
|
|
565
|
+
export function isBrowserInterrupt(value: unknown): value is BrowserInterrupt {
|
|
566
|
+
return (
|
|
567
|
+
typeof value === 'object' &&
|
|
568
|
+
value !== null &&
|
|
569
|
+
(value as BrowserInterrupt).type === 'browser_interrupt'
|
|
570
|
+
);
|
|
571
|
+
}
|
|
@@ -8,6 +8,7 @@ import {
|
|
|
8
8
|
createBrowserWaitTool,
|
|
9
9
|
createBrowserGoBackTool,
|
|
10
10
|
createBrowserScreenshotTool,
|
|
11
|
+
createBrowserGetPageStateTool,
|
|
11
12
|
createBrowserTools,
|
|
12
13
|
EBrowserTools,
|
|
13
14
|
BROWSER_TOOL_NAMES,
|
|
@@ -31,10 +32,11 @@ describe('BrowserTools', () => {
|
|
|
31
32
|
expect(EBrowserTools.WAIT).toBe('browser_wait');
|
|
32
33
|
expect(EBrowserTools.BACK).toBe('browser_back');
|
|
33
34
|
expect(EBrowserTools.SCREENSHOT).toBe('browser_screenshot');
|
|
35
|
+
expect(EBrowserTools.GET_PAGE_STATE).toBe('browser_get_page_state');
|
|
34
36
|
});
|
|
35
37
|
|
|
36
|
-
it('should have exactly
|
|
37
|
-
expect(Object.keys(EBrowserTools)).toHaveLength(
|
|
38
|
+
it('should have exactly 10 tool names', () => {
|
|
39
|
+
expect(Object.keys(EBrowserTools)).toHaveLength(10);
|
|
38
40
|
});
|
|
39
41
|
});
|
|
40
42
|
|
|
@@ -52,10 +54,11 @@ describe('BrowserTools', () => {
|
|
|
52
54
|
expect(BROWSER_TOOL_NAMES).toContain(EBrowserTools.WAIT);
|
|
53
55
|
expect(BROWSER_TOOL_NAMES).toContain(EBrowserTools.BACK);
|
|
54
56
|
expect(BROWSER_TOOL_NAMES).toContain(EBrowserTools.SCREENSHOT);
|
|
57
|
+
expect(BROWSER_TOOL_NAMES).toContain(EBrowserTools.GET_PAGE_STATE);
|
|
55
58
|
});
|
|
56
59
|
|
|
57
|
-
it('should have exactly
|
|
58
|
-
expect(BROWSER_TOOL_NAMES).toHaveLength(
|
|
60
|
+
it('should have exactly 10 entries', () => {
|
|
61
|
+
expect(BROWSER_TOOL_NAMES).toHaveLength(10);
|
|
59
62
|
});
|
|
60
63
|
});
|
|
61
64
|
|
|
@@ -73,6 +76,7 @@ describe('BrowserTools', () => {
|
|
|
73
76
|
expect(isBrowserToolCall('browser_wait')).toBe(true);
|
|
74
77
|
expect(isBrowserToolCall('browser_back')).toBe(true);
|
|
75
78
|
expect(isBrowserToolCall('browser_screenshot')).toBe(true);
|
|
79
|
+
expect(isBrowserToolCall('browser_get_page_state')).toBe(true);
|
|
76
80
|
});
|
|
77
81
|
|
|
78
82
|
it('should return false for non-browser tool names', () => {
|
|
@@ -406,13 +410,40 @@ describe('BrowserTools', () => {
|
|
|
406
410
|
});
|
|
407
411
|
});
|
|
408
412
|
|
|
413
|
+
describe('createBrowserGetPageStateTool', () => {
|
|
414
|
+
it('should create a tool with correct name', () => {
|
|
415
|
+
const tool = createBrowserGetPageStateTool();
|
|
416
|
+
expect(tool.name).toBe(EBrowserTools.GET_PAGE_STATE);
|
|
417
|
+
});
|
|
418
|
+
|
|
419
|
+
it('should return browser_action for get page state', async () => {
|
|
420
|
+
const tool = createBrowserGetPageStateTool();
|
|
421
|
+
const result = await tool.invoke({ reason: 'checking elements' });
|
|
422
|
+
const parsed = JSON.parse(result);
|
|
423
|
+
|
|
424
|
+
expect(parsed.type).toBe('browser_action');
|
|
425
|
+
expect(parsed.action.type).toBe('get_page_state');
|
|
426
|
+
expect(parsed.action.reason).toBe('checking elements');
|
|
427
|
+
expect(parsed.requiresBrowserExecution).toBe(true);
|
|
428
|
+
});
|
|
429
|
+
|
|
430
|
+
it('should work with empty input', async () => {
|
|
431
|
+
const tool = createBrowserGetPageStateTool();
|
|
432
|
+
const result = await tool.invoke({});
|
|
433
|
+
const parsed = JSON.parse(result);
|
|
434
|
+
|
|
435
|
+
expect(parsed.type).toBe('browser_action');
|
|
436
|
+
expect(parsed.action.type).toBe('get_page_state');
|
|
437
|
+
});
|
|
438
|
+
});
|
|
439
|
+
|
|
409
440
|
// ============================================
|
|
410
441
|
// createBrowserTools Tests
|
|
411
442
|
// ============================================
|
|
412
443
|
describe('createBrowserTools', () => {
|
|
413
|
-
it('should create all
|
|
444
|
+
it('should create all 10 tools by default', () => {
|
|
414
445
|
const tools = createBrowserTools();
|
|
415
|
-
expect(tools).toHaveLength(
|
|
446
|
+
expect(tools).toHaveLength(10);
|
|
416
447
|
});
|
|
417
448
|
|
|
418
449
|
it('should create tools with correct names', () => {
|
|
@@ -428,6 +459,7 @@ describe('BrowserTools', () => {
|
|
|
428
459
|
expect(toolNames).toContain(EBrowserTools.WAIT);
|
|
429
460
|
expect(toolNames).toContain(EBrowserTools.BACK);
|
|
430
461
|
expect(toolNames).toContain(EBrowserTools.SCREENSHOT);
|
|
462
|
+
expect(toolNames).toContain(EBrowserTools.GET_PAGE_STATE);
|
|
431
463
|
});
|
|
432
464
|
|
|
433
465
|
it('should allow disabling specific tools', () => {
|
|
@@ -455,6 +487,7 @@ describe('BrowserTools', () => {
|
|
|
455
487
|
enableWait: false,
|
|
456
488
|
enableBack: false,
|
|
457
489
|
enableScreenshot: false,
|
|
490
|
+
enableGetPageState: false,
|
|
458
491
|
};
|
|
459
492
|
const tools = createBrowserTools(config);
|
|
460
493
|
expect(tools).toHaveLength(0);
|
|
@@ -471,6 +504,7 @@ describe('BrowserTools', () => {
|
|
|
471
504
|
enableWait: false,
|
|
472
505
|
enableBack: false,
|
|
473
506
|
enableScreenshot: false,
|
|
507
|
+
enableGetPageState: false,
|
|
474
508
|
};
|
|
475
509
|
const tools = createBrowserTools(config);
|
|
476
510
|
expect(tools).toHaveLength(2);
|
|
@@ -499,6 +533,7 @@ describe('BrowserTools', () => {
|
|
|
499
533
|
{ tool: EBrowserTools.WAIT, input: {} },
|
|
500
534
|
{ tool: EBrowserTools.BACK, input: {} },
|
|
501
535
|
{ tool: EBrowserTools.SCREENSHOT, input: {} },
|
|
536
|
+
{ tool: EBrowserTools.GET_PAGE_STATE, input: {} },
|
|
502
537
|
];
|
|
503
538
|
|
|
504
539
|
for (const { tool: toolName, input } of testInputs) {
|