illuma-agents 1.0.22 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,571 +0,0 @@
1
- /**
2
- * Browser Automation Tools with LangGraph Interrupt Support
3
- *
4
- * These tools use LangGraph's interrupt() mechanism to pause execution
5
- * and wait for the browser extension to execute actions and return results.
6
- *
7
- * Flow:
8
- * 1. Agent calls browser tool (e.g., browser_navigate)
9
- * 2. Tool calls interrupt() with action details
10
- * 3. Graph pauses and returns interrupt to client
11
- * 4. Extension executes action in browser
12
- * 5. Extension sends resume Command with actual result
13
- * 6. Graph continues with real browser data
14
- *
15
- * This enables proper chain-of-thought with browser context because
16
- * the agent receives ACTUAL results (page elements, screenshots, etc.)
17
- * instead of placeholder acknowledgments.
18
- */
19
-
20
- import { z } from 'zod';
21
- import { interrupt } from '@langchain/langgraph';
22
- import { tool, DynamicStructuredTool } from '@langchain/core/tools';
23
-
24
- // ============================================
25
- // Browser Interrupt Types
26
- // ============================================
27
-
28
- /**
29
- * Interrupt payload sent to the extension
30
- */
31
- export interface BrowserInterrupt {
32
- /** Type of browser action to execute */
33
- type: 'browser_interrupt';
34
- /** The specific action to perform */
35
- action: BrowserAction;
36
- /** Unique ID for this interrupt (for matching resume) */
37
- interruptId: string;
38
- }
39
-
40
- /**
41
- * Browser action types
42
- */
43
- export type BrowserAction =
44
- | { type: 'navigate'; url: string; reason?: string }
45
- | { type: 'click'; index?: number; coordinates?: { x: number; y: number }; reason?: string }
46
- | { type: 'type'; index: number; text: string; clear?: boolean; pressEnter?: boolean }
47
- | { type: 'scroll'; direction: 'up' | 'down' | 'left' | 'right'; amount?: number }
48
- | { type: 'extract'; query?: string; selector?: string }
49
- | { type: 'hover'; index: number }
50
- | { type: 'wait'; duration?: number; reason?: string }
51
- | { type: 'back'; reason?: string }
52
- | { type: 'screenshot'; fullPage?: boolean }
53
- | { type: 'get_page_state'; reason?: string };
54
-
55
- /**
56
- * Result returned from extension after executing action
57
- */
58
- export interface BrowserActionResult {
59
- success: boolean;
60
- error?: string;
61
- /** Page state after action (for navigate, click, get_page_state) */
62
- pageState?: {
63
- url: string;
64
- title: string;
65
- /** Formatted element list for LLM */
66
- elementList: string;
67
- elementCount: number;
68
- scrollPosition: number;
69
- scrollHeight: number;
70
- viewportHeight: number;
71
- };
72
- /** Screenshot data URL */
73
- screenshot?: string;
74
- /** Extracted content */
75
- extractedContent?: string;
76
- /** Any additional data */
77
- data?: unknown;
78
- }
79
-
80
- // ============================================
81
- // Tool Schemas (same as BrowserTools.ts)
82
- // ============================================
83
-
84
- const BrowserClickSchema = z.object({
85
- index: z.number().optional().describe(
86
- 'The index of the element to click, as shown in the page context'
87
- ),
88
- coordinates: z.object({
89
- x: z.number().describe('X coordinate in viewport pixels'),
90
- y: z.number().describe('Y coordinate in viewport pixels'),
91
- }).optional().describe('Coordinates for clicking by position'),
92
- reason: z.string().optional().describe('Why you are clicking this element'),
93
- });
94
-
95
- const BrowserTypeSchema = z.object({
96
- index: z.number().describe('The index of the input element to type into'),
97
- text: z.string().describe('The text to type'),
98
- clear: z.boolean().optional().describe('Clear existing content first'),
99
- pressEnter: z.boolean().optional().describe('Press Enter after typing'),
100
- });
101
-
102
- const BrowserNavigateSchema = z.object({
103
- url: z.string().describe('The URL to navigate to'),
104
- reason: z.string().optional().describe('Why navigating to this URL'),
105
- });
106
-
107
- const BrowserScrollSchema = z.object({
108
- direction: z.enum(['up', 'down', 'left', 'right']).describe('Scroll direction'),
109
- amount: z.number().optional().describe('Pixels to scroll (default: 500)'),
110
- });
111
-
112
- const BrowserExtractSchema = z.object({
113
- query: z.string().optional().describe('Query to filter extracted content'),
114
- selector: z.string().optional().describe('CSS selector to extract from'),
115
- });
116
-
117
- const BrowserHoverSchema = z.object({
118
- index: z.number().describe('Element index to hover over'),
119
- });
120
-
121
- const BrowserWaitSchema = z.object({
122
- duration: z.number().optional().describe('Milliseconds to wait (default: 1000)'),
123
- reason: z.string().optional().describe('Why waiting'),
124
- });
125
-
126
- const BrowserGoBackSchema = z.object({
127
- reason: z.string().optional().describe('Why going back'),
128
- });
129
-
130
- const BrowserScreenshotSchema = z.object({
131
- fullPage: z.boolean().optional().describe('Capture full page vs viewport'),
132
- });
133
-
134
- const BrowserGetPageStateSchema = z.object({
135
- reason: z.string().optional().describe('Why getting page state'),
136
- });
137
-
138
- // ============================================
139
- // Helper to generate interrupt IDs
140
- // ============================================
141
-
142
- let interruptCounter = 0;
143
- function generateInterruptId(): string {
144
- return `browser_${Date.now()}_${++interruptCounter}`;
145
- }
146
-
147
- // ============================================
148
- // Interrupt-based Tool Implementations
149
- // ============================================
150
-
151
- /**
152
- * Navigate tool - navigates to URL and returns page state
153
- */
154
- export function createBrowserNavigateInterruptTool(): DynamicStructuredTool<typeof BrowserNavigateSchema> {
155
- return tool<typeof BrowserNavigateSchema>(
156
- async ({ url, reason }) => {
157
- const interruptId = generateInterruptId();
158
-
159
- // This will pause the graph and wait for extension to provide result
160
- const result = interrupt<BrowserInterrupt, BrowserActionResult>({
161
- type: 'browser_interrupt',
162
- action: { type: 'navigate', url, reason },
163
- interruptId,
164
- });
165
-
166
- // When resumed, result contains actual page state
167
- if (!result.success) {
168
- return `Navigation failed: ${result.error}`;
169
- }
170
-
171
- if (result.pageState) {
172
- return `Successfully navigated to ${result.pageState.url} (${result.pageState.title})
173
-
174
- ## Interactive Elements (${result.pageState.elementCount} total)
175
- ${result.pageState.elementList}
176
-
177
- Viewport: ${result.pageState.viewportHeight}px, Scroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;
178
- }
179
-
180
- return `Successfully navigated to ${url}`;
181
- },
182
- {
183
- name: 'browser_navigate',
184
- description: `Navigate to a URL. Returns the page state with interactive elements after navigation completes.
185
-
186
- Example: browser_navigate({ url: "https://www.amazon.com" })
187
- Returns: Page title, URL, and list of interactive elements with their [index] numbers.`,
188
- schema: BrowserNavigateSchema,
189
- }
190
- );
191
- }
192
-
193
- /**
194
- * Click tool - clicks element and returns updated state
195
- */
196
- export function createBrowserClickInterruptTool(): DynamicStructuredTool<typeof BrowserClickSchema> {
197
- return tool<typeof BrowserClickSchema>(
198
- async ({ index, coordinates, reason }) => {
199
- const interruptId = generateInterruptId();
200
-
201
- const result = interrupt<BrowserInterrupt, BrowserActionResult>({
202
- type: 'browser_interrupt',
203
- action: { type: 'click', index, coordinates, reason },
204
- interruptId,
205
- });
206
-
207
- if (!result.success) {
208
- return `Click failed: ${result.error}`;
209
- }
210
-
211
- // If click caused navigation, return new page state
212
- if (result.pageState) {
213
- return `Clicked element. Page updated:
214
-
215
- URL: ${result.pageState.url}
216
- Title: ${result.pageState.title}
217
-
218
- ## Interactive Elements (${result.pageState.elementCount} total)
219
- ${result.pageState.elementList}`;
220
- }
221
-
222
- return `Successfully clicked element${index !== undefined ? ` [${index}]` : ''}`;
223
- },
224
- {
225
- name: 'browser_click',
226
- description: `Click an element by index or coordinates.
227
-
228
- Use the [index] number from the interactive elements list.
229
- Example: browser_click({ index: 5 }) to click element [5]`,
230
- schema: BrowserClickSchema,
231
- }
232
- );
233
- }
234
-
235
- /**
236
- * Type tool - types text into input field
237
- */
238
- export function createBrowserTypeInterruptTool(): DynamicStructuredTool<typeof BrowserTypeSchema> {
239
- return tool<typeof BrowserTypeSchema>(
240
- async ({ index, text, clear, pressEnter }) => {
241
- const interruptId = generateInterruptId();
242
-
243
- const result = interrupt<BrowserInterrupt, BrowserActionResult>({
244
- type: 'browser_interrupt',
245
- action: { type: 'type', index, text, clear, pressEnter },
246
- interruptId,
247
- });
248
-
249
- if (!result.success) {
250
- return `Type failed: ${result.error}`;
251
- }
252
-
253
- // If typing + enter caused navigation (e.g., search), return new state
254
- if (result.pageState) {
255
- return `Typed "${text}"${pressEnter ? ' and pressed Enter' : ''}. Page updated:
256
-
257
- URL: ${result.pageState.url}
258
- Title: ${result.pageState.title}
259
-
260
- ## Interactive Elements (${result.pageState.elementCount} total)
261
- ${result.pageState.elementList}`;
262
- }
263
-
264
- return `Successfully typed "${text}" into element [${index}]${pressEnter ? ' and pressed Enter' : ''}`;
265
- },
266
- {
267
- name: 'browser_type',
268
- description: `Type text into an input field.
269
-
270
- Use the [index] from interactive elements list. Look for <input> elements.
271
- Set pressEnter: true to submit after typing (for search fields).
272
-
273
- Example: browser_type({ index: 3, text: "laptop 16gb ram", pressEnter: true })`,
274
- schema: BrowserTypeSchema,
275
- }
276
- );
277
- }
278
-
279
- /**
280
- * Get page state tool - captures current page elements
281
- */
282
- export function createBrowserGetPageStateInterruptTool(): DynamicStructuredTool<typeof BrowserGetPageStateSchema> {
283
- return tool<typeof BrowserGetPageStateSchema>(
284
- async ({ reason }) => {
285
- const interruptId = generateInterruptId();
286
-
287
- const result = interrupt<BrowserInterrupt, BrowserActionResult>({
288
- type: 'browser_interrupt',
289
- action: { type: 'get_page_state', reason },
290
- interruptId,
291
- });
292
-
293
- if (!result.success) {
294
- return `Failed to get page state: ${result.error}`;
295
- }
296
-
297
- if (result.pageState) {
298
- return `## Current Page
299
- URL: ${result.pageState.url}
300
- Title: ${result.pageState.title}
301
-
302
- ## Interactive Elements (${result.pageState.elementCount} total)
303
- ${result.pageState.elementList}
304
-
305
- Viewport: ${result.pageState.viewportHeight}px
306
- Scroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;
307
- }
308
-
309
- return 'Page state captured but no elements found.';
310
- },
311
- {
312
- name: 'browser_get_page_state',
313
- description: `Get the current page state with all interactive elements.
314
-
315
- Returns the list of clickable/typeable elements with their [index] numbers.
316
- Use this to see what elements are available on the current page.`,
317
- schema: BrowserGetPageStateSchema,
318
- }
319
- );
320
- }
321
-
322
- /**
323
- * Scroll tool
324
- */
325
- export function createBrowserScrollInterruptTool(): DynamicStructuredTool<typeof BrowserScrollSchema> {
326
- return tool<typeof BrowserScrollSchema>(
327
- async ({ direction, amount }) => {
328
- const interruptId = generateInterruptId();
329
-
330
- const result = interrupt<BrowserInterrupt, BrowserActionResult>({
331
- type: 'browser_interrupt',
332
- action: { type: 'scroll', direction, amount },
333
- interruptId,
334
- });
335
-
336
- if (!result.success) {
337
- return `Scroll failed: ${result.error}`;
338
- }
339
-
340
- if (result.pageState) {
341
- return `Scrolled ${direction}. New elements visible:
342
-
343
- ## Interactive Elements (${result.pageState.elementCount} total)
344
- ${result.pageState.elementList}
345
-
346
- Scroll: ${result.pageState.scrollPosition}/${result.pageState.scrollHeight}px`;
347
- }
348
-
349
- return `Successfully scrolled ${direction}${amount ? ` ${amount}px` : ''}`;
350
- },
351
- {
352
- name: 'browser_scroll',
353
- description: `Scroll the page in a direction.
354
-
355
- Example: browser_scroll({ direction: "down", amount: 500 })`,
356
- schema: BrowserScrollSchema,
357
- }
358
- );
359
- }
360
-
361
- /**
362
- * Extract tool
363
- */
364
- export function createBrowserExtractInterruptTool(): DynamicStructuredTool<typeof BrowserExtractSchema> {
365
- return tool<typeof BrowserExtractSchema>(
366
- async ({ query, selector }) => {
367
- const interruptId = generateInterruptId();
368
-
369
- const result = interrupt<BrowserInterrupt, BrowserActionResult>({
370
- type: 'browser_interrupt',
371
- action: { type: 'extract', query, selector },
372
- interruptId,
373
- });
374
-
375
- if (!result.success) {
376
- return `Extract failed: ${result.error}`;
377
- }
378
-
379
- return result.extractedContent || 'No content extracted.';
380
- },
381
- {
382
- name: 'browser_extract',
383
- description: `Extract text content from the page.
384
-
385
- Example: browser_extract({ query: "price" })`,
386
- schema: BrowserExtractSchema,
387
- }
388
- );
389
- }
390
-
391
- /**
392
- * Hover tool
393
- */
394
- export function createBrowserHoverInterruptTool(): DynamicStructuredTool<typeof BrowserHoverSchema> {
395
- return tool<typeof BrowserHoverSchema>(
396
- async ({ index }) => {
397
- const interruptId = generateInterruptId();
398
-
399
- const result = interrupt<BrowserInterrupt, BrowserActionResult>({
400
- type: 'browser_interrupt',
401
- action: { type: 'hover', index },
402
- interruptId,
403
- });
404
-
405
- if (!result.success) {
406
- return `Hover failed: ${result.error}`;
407
- }
408
-
409
- return `Successfully hovered over element [${index}]`;
410
- },
411
- {
412
- name: 'browser_hover',
413
- description: `Hover over an element to reveal tooltips or menus.`,
414
- schema: BrowserHoverSchema,
415
- }
416
- );
417
- }
418
-
419
- /**
420
- * Wait tool
421
- */
422
- export function createBrowserWaitInterruptTool(): DynamicStructuredTool<typeof BrowserWaitSchema> {
423
- return tool<typeof BrowserWaitSchema>(
424
- async ({ duration, reason }) => {
425
- const interruptId = generateInterruptId();
426
-
427
- const result = interrupt<BrowserInterrupt, BrowserActionResult>({
428
- type: 'browser_interrupt',
429
- action: { type: 'wait', duration, reason },
430
- interruptId,
431
- });
432
-
433
- if (!result.success) {
434
- return `Wait failed: ${result.error}`;
435
- }
436
-
437
- return `Waited ${duration || 1000}ms`;
438
- },
439
- {
440
- name: 'browser_wait',
441
- description: `Wait for a duration before next action.`,
442
- schema: BrowserWaitSchema,
443
- }
444
- );
445
- }
446
-
447
- /**
448
- * Go back tool
449
- */
450
- export function createBrowserGoBackInterruptTool(): DynamicStructuredTool<typeof BrowserGoBackSchema> {
451
- return tool<typeof BrowserGoBackSchema>(
452
- async ({ reason }) => {
453
- const interruptId = generateInterruptId();
454
-
455
- const result = interrupt<BrowserInterrupt, BrowserActionResult>({
456
- type: 'browser_interrupt',
457
- action: { type: 'back', reason },
458
- interruptId,
459
- });
460
-
461
- if (!result.success) {
462
- return `Go back failed: ${result.error}`;
463
- }
464
-
465
- if (result.pageState) {
466
- return `Went back to: ${result.pageState.url}
467
-
468
- ## Interactive Elements (${result.pageState.elementCount} total)
469
- ${result.pageState.elementList}`;
470
- }
471
-
472
- return 'Successfully went back';
473
- },
474
- {
475
- name: 'browser_back',
476
- description: `Go back to the previous page in history.`,
477
- schema: BrowserGoBackSchema,
478
- }
479
- );
480
- }
481
-
482
- /**
483
- * Screenshot tool
484
- */
485
- export function createBrowserScreenshotInterruptTool(): DynamicStructuredTool<typeof BrowserScreenshotSchema> {
486
- return tool<typeof BrowserScreenshotSchema>(
487
- async ({ fullPage }) => {
488
- const interruptId = generateInterruptId();
489
-
490
- const result = interrupt<BrowserInterrupt, BrowserActionResult>({
491
- type: 'browser_interrupt',
492
- action: { type: 'screenshot', fullPage },
493
- interruptId,
494
- });
495
-
496
- if (!result.success) {
497
- return `Screenshot failed: ${result.error}`;
498
- }
499
-
500
- if (result.screenshot) {
501
- return `Screenshot captured. [Image data available]`;
502
- }
503
-
504
- return 'Screenshot captured';
505
- },
506
- {
507
- name: 'browser_screenshot',
508
- description: `Capture a screenshot of the current page.`,
509
- schema: BrowserScreenshotSchema,
510
- }
511
- );
512
- }
513
-
514
- // ============================================
515
- // Tool Collection
516
- // ============================================
517
-
518
- export const EBrowserInterruptTools = {
519
- CLICK: 'browser_click',
520
- TYPE: 'browser_type',
521
- NAVIGATE: 'browser_navigate',
522
- SCROLL: 'browser_scroll',
523
- EXTRACT: 'browser_extract',
524
- HOVER: 'browser_hover',
525
- WAIT: 'browser_wait',
526
- BACK: 'browser_back',
527
- SCREENSHOT: 'browser_screenshot',
528
- GET_PAGE_STATE: 'browser_get_page_state',
529
- } as const;
530
-
531
- export const BROWSER_INTERRUPT_TOOL_NAMES = Object.values(EBrowserInterruptTools);
532
-
533
- export type BrowserInterruptToolName = typeof BROWSER_INTERRUPT_TOOL_NAMES[number];
534
-
535
- export function isBrowserInterruptToolCall(toolName: string): toolName is BrowserInterruptToolName {
536
- return BROWSER_INTERRUPT_TOOL_NAMES.includes(toolName as BrowserInterruptToolName);
537
- }
538
-
539
- /**
540
- * Create all interrupt-based browser tools
541
- *
542
- * Use these when the client is a browser extension that can:
543
- * 1. Detect browser_interrupt events in the stream
544
- * 2. Execute browser actions locally
545
- * 3. Send Command({ resume: result }) to continue the graph
546
- */
547
- export function createBrowserInterruptTools(): DynamicStructuredTool[] {
548
- return [
549
- createBrowserNavigateInterruptTool(),
550
- createBrowserClickInterruptTool(),
551
- createBrowserTypeInterruptTool(),
552
- createBrowserGetPageStateInterruptTool(),
553
- createBrowserScrollInterruptTool(),
554
- createBrowserExtractInterruptTool(),
555
- createBrowserHoverInterruptTool(),
556
- createBrowserWaitInterruptTool(),
557
- createBrowserGoBackInterruptTool(),
558
- createBrowserScreenshotInterruptTool(),
559
- ];
560
- }
561
-
562
- /**
563
- * Check if an interrupt is a browser interrupt
564
- */
565
- export function isBrowserInterrupt(value: unknown): value is BrowserInterrupt {
566
- return (
567
- typeof value === 'object' &&
568
- value !== null &&
569
- (value as BrowserInterrupt).type === 'browser_interrupt'
570
- );
571
- }