illuma-agents 1.0.19 → 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -117,6 +117,12 @@ const BrowserScreenshotSchema = z.object({
117
117
  ),
118
118
  });
119
119
 
120
+ const BrowserGetPageStateSchema = z.object({
121
+ reason: z.string().optional().describe(
122
+ 'Why you need fresh page state (e.g., "after navigation", "to see updated elements")'
123
+ ),
124
+ });
125
+
120
126
  // ============================================
121
127
  // Tool Implementations
122
128
  // ============================================
@@ -241,8 +247,18 @@ Use this tool when you need to:
241
247
  - Navigate to a different page
242
248
  - Open a new URL
243
249
 
250
+ **IMPORTANT**: After calling browser_navigate, you MUST call browser_get_page_state
251
+ before using browser_click or browser_type. This is because navigation changes the page,
252
+ and you need to see the new page's elements before you can interact with them.
253
+
244
254
  Provide the full URL including the protocol (https://).
245
255
 
256
+ **Correct workflow**:
257
+ 1. browser_navigate({ url: "https://www.amazon.com" })
258
+ 2. browser_get_page_state({ reason: "see elements on Amazon" })
259
+ 3. Now find the search input's [index] in the returned state
260
+ 4. browser_type({ index: <search_input_index>, text: "query", pressEnter: true })
261
+
246
262
  Example: browser_navigate({ url: "https://www.google.com" })`,
247
263
  schema: BrowserNavigateSchema,
248
264
  }
@@ -444,6 +460,52 @@ Example: browser_screenshot({ fullPage: false })`,
444
460
  );
445
461
  }
446
462
 
463
+ /**
464
+ * Browser get page state tool - gets fresh page context after navigation or actions
465
+ * CRITICAL: Use this after browser_navigate or any action that changes the page
466
+ */
467
+ export function createBrowserGetPageStateTool(): DynamicStructuredTool<typeof BrowserGetPageStateSchema> {
468
+ return tool<typeof BrowserGetPageStateSchema>(
469
+ async ({ reason }) => {
470
+ return JSON.stringify({
471
+ type: 'browser_action',
472
+ action: {
473
+ type: 'get_page_state',
474
+ reason,
475
+ },
476
+ requiresBrowserExecution: true,
477
+ // Special flag: extension should inject fresh context into the conversation
478
+ requiresContextRefresh: true,
479
+ // IMPORTANT: Tell the agent to wait
480
+ message: 'Page state is being captured by the browser extension. The element list will be provided in the next message. DO NOT proceed with click or type actions until you receive the actual element list.',
481
+ });
482
+ },
483
+ {
484
+ name: EBrowserTools.GET_PAGE_STATE,
485
+ description: `Get fresh page state showing current interactive elements.
486
+
487
+ **CRITICAL WORKFLOW**: After calling this tool, you MUST STOP and WAIT. The browser extension will capture the page state and return the element list. DO NOT plan any browser_click or browser_type actions in the same response - you don't have the element indices yet!
488
+
489
+ **When to use**:
490
+ - After browser_navigate (to see elements on the new page)
491
+ - After browser_click (if it caused navigation or page changes)
492
+ - Any time you need to see what elements are currently on the page
493
+
494
+ **IMPORTANT**: This tool captures the page state asynchronously. The actual element list will be provided AFTER this tool completes. You should:
495
+ 1. Call this tool
496
+ 2. STOP and wait for the response with the element list
497
+ 3. In your NEXT response, use the element indices for click/type actions
498
+
499
+ Example workflow:
500
+ - Turn 1: browser_navigate to amazon.com, then browser_get_page_state
501
+ - Turn 2: (After receiving element list) browser_type with the correct search input index
502
+
503
+ Example: browser_get_page_state({ reason: "to see elements after navigation" })`,
504
+ schema: BrowserGetPageStateSchema,
505
+ }
506
+ );
507
+ }
508
+
447
509
  // ============================================
448
510
  // Tool Collection
449
511
  // ============================================
@@ -467,6 +529,8 @@ export type BrowserToolsConfig = {
467
529
  enableBack?: boolean;
468
530
  /** Enable screenshot tool */
469
531
  enableScreenshot?: boolean;
532
+ /** Enable get page state tool */
533
+ enableGetPageState?: boolean;
470
534
  };
471
535
 
472
536
  /**
@@ -504,6 +568,7 @@ export function createBrowserTools(config: BrowserToolsConfig = {}): DynamicStru
504
568
  enableWait = true,
505
569
  enableBack = true,
506
570
  enableScreenshot = true,
571
+ enableGetPageState = true,
507
572
  } = config;
508
573
 
509
574
  if (enableClick) tools.push(createBrowserClickTool());
@@ -515,6 +580,7 @@ export function createBrowserTools(config: BrowserToolsConfig = {}): DynamicStru
515
580
  if (enableWait) tools.push(createBrowserWaitTool());
516
581
  if (enableBack) tools.push(createBrowserGoBackTool());
517
582
  if (enableScreenshot) tools.push(createBrowserScreenshotTool());
583
+ if (enableGetPageState) tools.push(createBrowserGetPageStateTool());
518
584
 
519
585
  return tools;
520
586
  }
@@ -533,6 +599,7 @@ export const EBrowserTools = {
533
599
  WAIT: 'browser_wait',
534
600
  BACK: 'browser_back',
535
601
  SCREENSHOT: 'browser_screenshot',
602
+ GET_PAGE_STATE: 'browser_get_page_state',
536
603
  } as const;
537
604
 
538
605
  /**
@@ -548,6 +615,7 @@ export const BROWSER_TOOL_NAMES = [
548
615
  EBrowserTools.WAIT,
549
616
  EBrowserTools.BACK,
550
617
  EBrowserTools.SCREENSHOT,
618
+ EBrowserTools.GET_PAGE_STATE,
551
619
  ] as const;
552
620
 
553
621
  export type BrowserToolName = typeof BROWSER_TOOL_NAMES[number];
package/src/types/run.ts CHANGED
@@ -115,6 +115,14 @@ export type RunConfig = {
115
115
  returnContent?: boolean;
116
116
  tokenCounter?: TokenCounter;
117
117
  indexTokenCountMap?: Record<string, number>;
118
+ /**
119
+ * Enable browser extension mode with interrupt-based tool execution.
120
+ * When true:
121
+ * - Uses MemorySaver checkpointer for pause/resume
122
+ * - Browser tools will interrupt execution and wait for extension results
123
+ * - Extension must call resume endpoint with Command to continue
124
+ */
125
+ browserMode?: boolean;
118
126
  };
119
127
 
120
128
  export type ProvidedCallbacks =