illuma-agents 1.0.19 → 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/graphs/Graph.cjs +3 -3
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +19 -0
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/run.cjs +137 -3
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/tools/BrowserInterruptTools.cjs +431 -0
- package/dist/cjs/tools/BrowserInterruptTools.cjs.map +1 -0
- package/dist/cjs/tools/BrowserTools.cjs +61 -1
- package/dist/cjs/tools/BrowserTools.cjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +3 -3
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -2
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/run.mjs +136 -4
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/tools/BrowserInterruptTools.mjs +415 -0
- package/dist/esm/tools/BrowserInterruptTools.mjs.map +1 -0
- package/dist/esm/tools/BrowserTools.mjs +61 -2
- package/dist/esm/tools/BrowserTools.mjs.map +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/run.d.ts +47 -0
- package/dist/types/tools/BrowserInterruptTools.d.ts +282 -0
- package/dist/types/tools/BrowserTools.d.ts +18 -3
- package/dist/types/types/run.d.ts +8 -0
- package/package.json +1 -1
- package/src/graphs/Graph.ts +3 -3
- package/src/index.ts +1 -0
- package/src/run.ts +176 -3
- package/src/specs/browser-interrupt-tools.test.ts +235 -0
- package/src/tools/BrowserInterruptTools.ts +571 -0
- package/src/tools/BrowserTools.test.ts +41 -6
- package/src/tools/BrowserTools.ts +68 -0
- package/src/types/run.ts +8 -0
|
@@ -117,6 +117,12 @@ const BrowserScreenshotSchema = z.object({
|
|
|
117
117
|
),
|
|
118
118
|
});
|
|
119
119
|
|
|
120
|
+
const BrowserGetPageStateSchema = z.object({
|
|
121
|
+
reason: z.string().optional().describe(
|
|
122
|
+
'Why you need fresh page state (e.g., "after navigation", "to see updated elements")'
|
|
123
|
+
),
|
|
124
|
+
});
|
|
125
|
+
|
|
120
126
|
// ============================================
|
|
121
127
|
// Tool Implementations
|
|
122
128
|
// ============================================
|
|
@@ -241,8 +247,18 @@ Use this tool when you need to:
|
|
|
241
247
|
- Navigate to a different page
|
|
242
248
|
- Open a new URL
|
|
243
249
|
|
|
250
|
+
**IMPORTANT**: After calling browser_navigate, you MUST call browser_get_page_state
|
|
251
|
+
before using browser_click or browser_type. This is because navigation changes the page,
|
|
252
|
+
and you need to see the new page's elements before you can interact with them.
|
|
253
|
+
|
|
244
254
|
Provide the full URL including the protocol (https://).
|
|
245
255
|
|
|
256
|
+
**Correct workflow**:
|
|
257
|
+
1. browser_navigate({ url: "https://www.amazon.com" })
|
|
258
|
+
2. browser_get_page_state({ reason: "see elements on Amazon" })
|
|
259
|
+
3. Now find the search input's [index] in the returned state
|
|
260
|
+
4. browser_type({ index: <search_input_index>, text: "query", pressEnter: true })
|
|
261
|
+
|
|
246
262
|
Example: browser_navigate({ url: "https://www.google.com" })`,
|
|
247
263
|
schema: BrowserNavigateSchema,
|
|
248
264
|
}
|
|
@@ -444,6 +460,52 @@ Example: browser_screenshot({ fullPage: false })`,
|
|
|
444
460
|
);
|
|
445
461
|
}
|
|
446
462
|
|
|
463
|
+
/**
|
|
464
|
+
* Browser get page state tool - gets fresh page context after navigation or actions
|
|
465
|
+
* CRITICAL: Use this after browser_navigate or any action that changes the page
|
|
466
|
+
*/
|
|
467
|
+
export function createBrowserGetPageStateTool(): DynamicStructuredTool<typeof BrowserGetPageStateSchema> {
|
|
468
|
+
return tool<typeof BrowserGetPageStateSchema>(
|
|
469
|
+
async ({ reason }) => {
|
|
470
|
+
return JSON.stringify({
|
|
471
|
+
type: 'browser_action',
|
|
472
|
+
action: {
|
|
473
|
+
type: 'get_page_state',
|
|
474
|
+
reason,
|
|
475
|
+
},
|
|
476
|
+
requiresBrowserExecution: true,
|
|
477
|
+
// Special flag: extension should inject fresh context into the conversation
|
|
478
|
+
requiresContextRefresh: true,
|
|
479
|
+
// IMPORTANT: Tell the agent to wait
|
|
480
|
+
message: 'Page state is being captured by the browser extension. The element list will be provided in the next message. DO NOT proceed with click or type actions until you receive the actual element list.',
|
|
481
|
+
});
|
|
482
|
+
},
|
|
483
|
+
{
|
|
484
|
+
name: EBrowserTools.GET_PAGE_STATE,
|
|
485
|
+
description: `Get fresh page state showing current interactive elements.
|
|
486
|
+
|
|
487
|
+
**CRITICAL WORKFLOW**: After calling this tool, you MUST STOP and WAIT. The browser extension will capture the page state and return the element list. DO NOT plan any browser_click or browser_type actions in the same response - you don't have the element indices yet!
|
|
488
|
+
|
|
489
|
+
**When to use**:
|
|
490
|
+
- After browser_navigate (to see elements on the new page)
|
|
491
|
+
- After browser_click (if it caused navigation or page changes)
|
|
492
|
+
- Any time you need to see what elements are currently on the page
|
|
493
|
+
|
|
494
|
+
**IMPORTANT**: This tool captures the page state asynchronously. The actual element list will be provided AFTER this tool completes. You should:
|
|
495
|
+
1. Call this tool
|
|
496
|
+
2. STOP and wait for the response with the element list
|
|
497
|
+
3. In your NEXT response, use the element indices for click/type actions
|
|
498
|
+
|
|
499
|
+
Example workflow:
|
|
500
|
+
- Turn 1: browser_navigate to amazon.com, then browser_get_page_state
|
|
501
|
+
- Turn 2: (After receiving element list) browser_type with the correct search input index
|
|
502
|
+
|
|
503
|
+
Example: browser_get_page_state({ reason: "to see elements after navigation" })`,
|
|
504
|
+
schema: BrowserGetPageStateSchema,
|
|
505
|
+
}
|
|
506
|
+
);
|
|
507
|
+
}
|
|
508
|
+
|
|
447
509
|
// ============================================
|
|
448
510
|
// Tool Collection
|
|
449
511
|
// ============================================
|
|
@@ -467,6 +529,8 @@ export type BrowserToolsConfig = {
|
|
|
467
529
|
enableBack?: boolean;
|
|
468
530
|
/** Enable screenshot tool */
|
|
469
531
|
enableScreenshot?: boolean;
|
|
532
|
+
/** Enable get page state tool */
|
|
533
|
+
enableGetPageState?: boolean;
|
|
470
534
|
};
|
|
471
535
|
|
|
472
536
|
/**
|
|
@@ -504,6 +568,7 @@ export function createBrowserTools(config: BrowserToolsConfig = {}): DynamicStru
|
|
|
504
568
|
enableWait = true,
|
|
505
569
|
enableBack = true,
|
|
506
570
|
enableScreenshot = true,
|
|
571
|
+
enableGetPageState = true,
|
|
507
572
|
} = config;
|
|
508
573
|
|
|
509
574
|
if (enableClick) tools.push(createBrowserClickTool());
|
|
@@ -515,6 +580,7 @@ export function createBrowserTools(config: BrowserToolsConfig = {}): DynamicStru
|
|
|
515
580
|
if (enableWait) tools.push(createBrowserWaitTool());
|
|
516
581
|
if (enableBack) tools.push(createBrowserGoBackTool());
|
|
517
582
|
if (enableScreenshot) tools.push(createBrowserScreenshotTool());
|
|
583
|
+
if (enableGetPageState) tools.push(createBrowserGetPageStateTool());
|
|
518
584
|
|
|
519
585
|
return tools;
|
|
520
586
|
}
|
|
@@ -533,6 +599,7 @@ export const EBrowserTools = {
|
|
|
533
599
|
WAIT: 'browser_wait',
|
|
534
600
|
BACK: 'browser_back',
|
|
535
601
|
SCREENSHOT: 'browser_screenshot',
|
|
602
|
+
GET_PAGE_STATE: 'browser_get_page_state',
|
|
536
603
|
} as const;
|
|
537
604
|
|
|
538
605
|
/**
|
|
@@ -548,6 +615,7 @@ export const BROWSER_TOOL_NAMES = [
|
|
|
548
615
|
EBrowserTools.WAIT,
|
|
549
616
|
EBrowserTools.BACK,
|
|
550
617
|
EBrowserTools.SCREENSHOT,
|
|
618
|
+
EBrowserTools.GET_PAGE_STATE,
|
|
551
619
|
] as const;
|
|
552
620
|
|
|
553
621
|
export type BrowserToolName = typeof BROWSER_TOOL_NAMES[number];
|
package/src/types/run.ts
CHANGED
|
@@ -115,6 +115,14 @@ export type RunConfig = {
|
|
|
115
115
|
returnContent?: boolean;
|
|
116
116
|
tokenCounter?: TokenCounter;
|
|
117
117
|
indexTokenCountMap?: Record<string, number>;
|
|
118
|
+
/**
|
|
119
|
+
* Enable browser extension mode with interrupt-based tool execution.
|
|
120
|
+
* When true:
|
|
121
|
+
* - Uses MemorySaver checkpointer for pause/resume
|
|
122
|
+
* - Browser tools will interrupt execution and wait for extension results
|
|
123
|
+
* - Extension must call resume endpoint with Command to continue
|
|
124
|
+
*/
|
|
125
|
+
browserMode?: boolean;
|
|
118
126
|
};
|
|
119
127
|
|
|
120
128
|
export type ProvidedCallbacks =
|