illuma-agents 1.0.18 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,244 @@
1
+ /**
2
+ * Browser Automation Tools for Ranger Browser Extension
3
+ *
4
+ * These tools allow the LLM to interact with the browser through the
5
+ * ranger-browser extension. They generate structured actions that are
6
+ * sent to the extension via SSE streaming for execution.
7
+ *
8
+ * The extension handles:
9
+ * - DOM extraction with element indexing
10
+ * - Click, type, hover, scroll actions
11
+ * - Navigation and page context
12
+ * - Visual element highlighting
13
+ */
14
+ import { z } from 'zod';
15
+ import { DynamicStructuredTool } from '@langchain/core/tools';
16
+ /**
17
+ * Enhanced click schema that supports both index-based and coordinate-based clicking
18
+ */
19
+ declare const BrowserClickSchema: z.ZodObject<{
20
+ index: z.ZodOptional<z.ZodNumber>;
21
+ coordinates: z.ZodOptional<z.ZodObject<{
22
+ x: z.ZodNumber;
23
+ y: z.ZodNumber;
24
+ }, "strip", z.ZodTypeAny, {
25
+ y: number;
26
+ x: number;
27
+ }, {
28
+ y: number;
29
+ x: number;
30
+ }>>;
31
+ visualDescription: z.ZodOptional<z.ZodString>;
32
+ reason: z.ZodOptional<z.ZodString>;
33
+ }, "strip", z.ZodTypeAny, {
34
+ index?: number | undefined;
35
+ coordinates?: {
36
+ y: number;
37
+ x: number;
38
+ } | undefined;
39
+ visualDescription?: string | undefined;
40
+ reason?: string | undefined;
41
+ }, {
42
+ index?: number | undefined;
43
+ coordinates?: {
44
+ y: number;
45
+ x: number;
46
+ } | undefined;
47
+ visualDescription?: string | undefined;
48
+ reason?: string | undefined;
49
+ }>;
50
+ declare const BrowserTypeSchema: z.ZodObject<{
51
+ index: z.ZodNumber;
52
+ text: z.ZodString;
53
+ clear: z.ZodOptional<z.ZodBoolean>;
54
+ pressEnter: z.ZodOptional<z.ZodBoolean>;
55
+ }, "strip", z.ZodTypeAny, {
56
+ text: string;
57
+ index: number;
58
+ clear?: boolean | undefined;
59
+ pressEnter?: boolean | undefined;
60
+ }, {
61
+ text: string;
62
+ index: number;
63
+ clear?: boolean | undefined;
64
+ pressEnter?: boolean | undefined;
65
+ }>;
66
+ declare const BrowserNavigateSchema: z.ZodObject<{
67
+ url: z.ZodString;
68
+ reason: z.ZodOptional<z.ZodString>;
69
+ }, "strip", z.ZodTypeAny, {
70
+ url: string;
71
+ reason?: string | undefined;
72
+ }, {
73
+ url: string;
74
+ reason?: string | undefined;
75
+ }>;
76
+ declare const BrowserScrollSchema: z.ZodObject<{
77
+ direction: z.ZodEnum<["up", "down", "left", "right"]>;
78
+ amount: z.ZodOptional<z.ZodNumber>;
79
+ }, "strip", z.ZodTypeAny, {
80
+ direction: "up" | "down" | "left" | "right";
81
+ amount?: number | undefined;
82
+ }, {
83
+ direction: "up" | "down" | "left" | "right";
84
+ amount?: number | undefined;
85
+ }>;
86
+ declare const BrowserExtractSchema: z.ZodObject<{
87
+ query: z.ZodOptional<z.ZodString>;
88
+ selector: z.ZodOptional<z.ZodString>;
89
+ }, "strip", z.ZodTypeAny, {
90
+ query?: string | undefined;
91
+ selector?: string | undefined;
92
+ }, {
93
+ query?: string | undefined;
94
+ selector?: string | undefined;
95
+ }>;
96
+ declare const BrowserHoverSchema: z.ZodObject<{
97
+ index: z.ZodNumber;
98
+ }, "strip", z.ZodTypeAny, {
99
+ index: number;
100
+ }, {
101
+ index: number;
102
+ }>;
103
+ declare const BrowserWaitSchema: z.ZodObject<{
104
+ duration: z.ZodOptional<z.ZodNumber>;
105
+ reason: z.ZodOptional<z.ZodString>;
106
+ }, "strip", z.ZodTypeAny, {
107
+ duration?: number | undefined;
108
+ reason?: string | undefined;
109
+ }, {
110
+ duration?: number | undefined;
111
+ reason?: string | undefined;
112
+ }>;
113
+ declare const BrowserGoBackSchema: z.ZodObject<{
114
+ reason: z.ZodOptional<z.ZodString>;
115
+ }, "strip", z.ZodTypeAny, {
116
+ reason?: string | undefined;
117
+ }, {
118
+ reason?: string | undefined;
119
+ }>;
120
+ declare const BrowserScreenshotSchema: z.ZodObject<{
121
+ fullPage: z.ZodOptional<z.ZodBoolean>;
122
+ reason: z.ZodOptional<z.ZodString>;
123
+ }, "strip", z.ZodTypeAny, {
124
+ reason?: string | undefined;
125
+ fullPage?: boolean | undefined;
126
+ }, {
127
+ reason?: string | undefined;
128
+ fullPage?: boolean | undefined;
129
+ }>;
130
+ /**
131
+ * Browser click tool - clicks an element by index or coordinates
132
+ * Supports both semantic (index-based) and vision (coordinate-based) clicking
133
+ */
134
+ export declare function createBrowserClickTool(): DynamicStructuredTool<typeof BrowserClickSchema>;
135
+ /**
136
+ * Browser type tool - types text into an input field
137
+ */
138
+ export declare function createBrowserTypeTool(): DynamicStructuredTool<typeof BrowserTypeSchema>;
139
+ /**
140
+ * Browser navigate tool - navigates to a URL
141
+ */
142
+ export declare function createBrowserNavigateTool(): DynamicStructuredTool<typeof BrowserNavigateSchema>;
143
+ /**
144
+ * Browser scroll tool - scrolls the page
145
+ */
146
+ export declare function createBrowserScrollTool(): DynamicStructuredTool<typeof BrowserScrollSchema>;
147
+ /**
148
+ * Browser extract tool - extracts content from the page
149
+ */
150
+ export declare function createBrowserExtractTool(): DynamicStructuredTool<typeof BrowserExtractSchema>;
151
+ /**
152
+ * Browser hover tool - hovers over an element
153
+ */
154
+ export declare function createBrowserHoverTool(): DynamicStructuredTool<typeof BrowserHoverSchema>;
155
+ /**
156
+ * Browser wait tool - waits for a specified duration
157
+ */
158
+ export declare function createBrowserWaitTool(): DynamicStructuredTool<typeof BrowserWaitSchema>;
159
+ /**
160
+ * Browser go back tool - navigates back in history
161
+ */
162
+ export declare function createBrowserGoBackTool(): DynamicStructuredTool<typeof BrowserGoBackSchema>;
163
+ /**
164
+ * Browser screenshot tool - captures a screenshot
165
+ */
166
+ export declare function createBrowserScreenshotTool(): DynamicStructuredTool<typeof BrowserScreenshotSchema>;
167
+ export type BrowserToolsConfig = {
168
+ /** Enable click tool */
169
+ enableClick?: boolean;
170
+ /** Enable type tool */
171
+ enableType?: boolean;
172
+ /** Enable navigate tool */
173
+ enableNavigate?: boolean;
174
+ /** Enable scroll tool */
175
+ enableScroll?: boolean;
176
+ /** Enable extract tool */
177
+ enableExtract?: boolean;
178
+ /** Enable hover tool */
179
+ enableHover?: boolean;
180
+ /** Enable wait tool */
181
+ enableWait?: boolean;
182
+ /** Enable back tool */
183
+ enableBack?: boolean;
184
+ /** Enable screenshot tool */
185
+ enableScreenshot?: boolean;
186
+ };
187
+ /**
188
+ * Create all browser automation tools
189
+ *
190
+ * IMPORTANT: These tools should ONLY be registered when:
191
+ * 1. The request comes from a browser extension that can execute them
192
+ * 2. The client has indicated browser capability (e.g., via header or parameter)
193
+ *
194
+ * DO NOT register these for normal web UI users - they cannot execute browser actions.
195
+ *
196
+ * Detection in Ranger API:
197
+ * - Check for `X-Ranger-Browser-Extension: true` header
198
+ * - Or check for `browserCapable: true` in request body
199
+ * - Or check user agent for extension identifier
200
+ *
201
+ * @example
202
+ * // In Ranger API endpoint:
203
+ * const hasBrowserExtension = req.headers['x-ranger-browser-extension'] === 'true';
204
+ * const tools = hasBrowserExtension
205
+ * ? [...normalTools, ...createBrowserTools()]
206
+ * : normalTools;
207
+ */
208
+ export declare function createBrowserTools(config?: BrowserToolsConfig): DynamicStructuredTool[];
209
+ /**
210
+ * Browser tool name constants
211
+ * Use these instead of magic strings
212
+ */
213
+ export declare const EBrowserTools: {
214
+ readonly CLICK: "browser_click";
215
+ readonly TYPE: "browser_type";
216
+ readonly NAVIGATE: "browser_navigate";
217
+ readonly SCROLL: "browser_scroll";
218
+ readonly EXTRACT: "browser_extract";
219
+ readonly HOVER: "browser_hover";
220
+ readonly WAIT: "browser_wait";
221
+ readonly BACK: "browser_back";
222
+ readonly SCREENSHOT: "browser_screenshot";
223
+ };
224
+ /**
225
+ * Get browser tool names for filtering/identification
226
+ */
227
+ export declare const BROWSER_TOOL_NAMES: readonly ["browser_click", "browser_type", "browser_navigate", "browser_scroll", "browser_extract", "browser_hover", "browser_wait", "browser_back", "browser_screenshot"];
228
+ export type BrowserToolName = typeof BROWSER_TOOL_NAMES[number];
229
+ /**
230
+ * Check if a tool call is a browser action
231
+ */
232
+ export declare function isBrowserToolCall(toolName: string): toolName is BrowserToolName;
233
+ /**
234
+ * Check if request indicates browser extension capability
235
+ * Use this to conditionally register browser tools
236
+ *
237
+ * @example
238
+ * // In Express middleware or endpoint:
239
+ * if (hasBrowserCapability(req.headers)) {
240
+ * tools.push(...createBrowserTools());
241
+ * }
242
+ */
243
+ export declare function hasBrowserCapability(headers: Record<string, string | string[] | undefined>): boolean;
244
+ export {};
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "illuma-agents",
3
- "version": "1.0.18",
3
+ "version": "1.0.19",
4
4
  "main": "./dist/cjs/main.cjs",
5
5
  "module": "./dist/esm/main.mjs",
6
6
  "types": "./dist/types/index.d.ts",
@@ -13,6 +13,10 @@
13
13
  },
14
14
  "type": "module",
15
15
  "description": "Illuma AI Agents Library",
16
+ "repository": {
17
+ "type": "git",
18
+ "url": "https://github.com/codevakure/agents"
19
+ },
16
20
  "author": "Illuma Team",
17
21
  "license": "MIT",
18
22
  "packageManager": "npm@10.5.2",
package/src/index.ts CHANGED
@@ -13,6 +13,7 @@ export * from './tools/Calculator';
13
13
  export * from './tools/CodeExecutor';
14
14
  export * from './tools/ProgrammaticToolCalling';
15
15
  export * from './tools/ToolSearchRegex';
16
+ export * from './tools/BrowserTools';
16
17
  export * from './tools/handlers';
17
18
  export * from './tools/search';
18
19