@soederpop/luca 0.0.28 → 0.0.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/commands/try-all-challenges.ts +1 -1
- package/docs/TABLE-OF-CONTENTS.md +0 -3
- package/docs/examples/structured-output-with-assistants.md +144 -0
- package/docs/tutorials/20-browser-esm.md +234 -0
- package/package.json +1 -1
- package/src/agi/container.server.ts +4 -0
- package/src/agi/features/assistant.ts +132 -2
- package/src/agi/features/browser-use.ts +623 -0
- package/src/agi/features/conversation.ts +135 -45
- package/src/agi/lib/interceptor-chain.ts +79 -0
- package/src/bootstrap/generated.ts +381 -308
- package/src/cli/build-info.ts +2 -2
- package/src/clients/rest.ts +7 -7
- package/src/commands/chat.ts +22 -0
- package/src/commands/describe.ts +67 -2
- package/src/commands/prompt.ts +23 -3
- package/src/container.ts +411 -113
- package/src/helper.ts +189 -5
- package/src/introspection/generated.agi.ts +17664 -11568
- package/src/introspection/generated.node.ts +4891 -1860
- package/src/introspection/generated.web.ts +379 -291
- package/src/introspection/index.ts +7 -0
- package/src/introspection/scan.ts +224 -7
- package/src/node/container.ts +31 -10
- package/src/node/features/content-db.ts +7 -7
- package/src/node/features/disk-cache.ts +11 -11
- package/src/node/features/esbuild.ts +3 -3
- package/src/node/features/file-manager.ts +37 -16
- package/src/node/features/fs.ts +64 -25
- package/src/node/features/git.ts +10 -10
- package/src/node/features/helpers.ts +25 -18
- package/src/node/features/ink.ts +13 -13
- package/src/node/features/ipc-socket.ts +8 -8
- package/src/node/features/networking.ts +3 -3
- package/src/node/features/os.ts +7 -7
- package/src/node/features/package-finder.ts +15 -15
- package/src/node/features/proc.ts +1 -1
- package/src/node/features/ui.ts +13 -13
- package/src/node/features/vm.ts +4 -4
- package/src/scaffolds/generated.ts +1 -1
- package/src/servers/express.ts +6 -6
- package/src/servers/mcp.ts +4 -4
- package/src/servers/socket.ts +6 -6
- package/test/interceptor-chain.test.ts +61 -0
- package/docs/apis/features/node/window-manager.md +0 -445
- package/docs/examples/window-manager-layouts.md +0 -180
- package/docs/examples/window-manager.md +0 -125
- package/docs/window-manager-fix.md +0 -249
- package/scripts/test-window-manager-lifecycle.ts +0 -86
- package/scripts/test-window-manager.ts +0 -43
- package/src/node/features/window-manager.ts +0 -1603
|
@@ -0,0 +1,623 @@
|
|
|
1
|
+
import { z } from 'zod'
|
|
2
|
+
import { FeatureStateSchema, FeatureOptionsSchema, FeatureEventsSchema } from '../../schemas/base.js'
|
|
3
|
+
import { Feature } from '@soederpop/luca/feature'
|
|
4
|
+
|
|
5
|
+
declare module '@soederpop/luca/feature' {
|
|
6
|
+
interface AvailableFeatures {
|
|
7
|
+
browserUse: typeof BrowserUse
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export const BrowserUseStateSchema = FeatureStateSchema.extend({
|
|
12
|
+
session: z.string().default('default').describe('Active browser session name'),
|
|
13
|
+
headed: z.boolean().default(false).describe('Whether the browser window is visible'),
|
|
14
|
+
currentUrl: z.string().optional().describe('The current page URL'),
|
|
15
|
+
})
|
|
16
|
+
export type BrowserUseState = z.infer<typeof BrowserUseStateSchema>
|
|
17
|
+
|
|
18
|
+
export const BrowserUseOptionsSchema = FeatureOptionsSchema.extend({
|
|
19
|
+
session: z.string().optional().describe('Default session name'),
|
|
20
|
+
headed: z.boolean().optional().describe('Show browser window by default'),
|
|
21
|
+
profile: z.string().optional().describe('Chrome profile name to use'),
|
|
22
|
+
connect: z.boolean().optional().describe('Auto-discover and connect to a running Chrome via CDP'),
|
|
23
|
+
cdpUrl: z.string().optional().describe('Connect to an existing browser via CDP URL (http:// or ws://)'),
|
|
24
|
+
})
|
|
25
|
+
export type BrowserUseOptions = z.infer<typeof BrowserUseOptionsSchema>
|
|
26
|
+
|
|
27
|
+
export const BrowserUseEventsSchema = FeatureEventsSchema.extend({
|
|
28
|
+
navigated: z.tuple([z.string().describe('URL navigated to')]).describe('Emitted after navigating to a URL'),
|
|
29
|
+
clicked: z.tuple([z.string().describe('Target description')]).describe('Emitted after clicking an element'),
|
|
30
|
+
typed: z.tuple([z.string().describe('Text typed')]).describe('Emitted after typing text'),
|
|
31
|
+
screenshot: z.tuple([z.string().describe('Base64 or file path')]).describe('Emitted after taking a screenshot'),
|
|
32
|
+
closed: z.tuple([]).describe('Emitted when the browser session is closed'),
|
|
33
|
+
})
|
|
34
|
+
|
|
35
|
+
/** Result shape from browser-use --json */
|
|
36
|
+
interface BrowserUseResult {
|
|
37
|
+
id?: string
|
|
38
|
+
success: boolean
|
|
39
|
+
data: Record<string, any>
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Browser automation feature wrapping the browser-use CLI.
|
|
44
|
+
* Provides programmatic browser control — navigation, clicking, typing,
|
|
45
|
+
* screenshots, JavaScript evaluation, data extraction, and more.
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```typescript
|
|
49
|
+
* const browser = container.feature('browserUse')
|
|
50
|
+
* await browser.open('https://example.com')
|
|
51
|
+
* const state = await browser.getState()
|
|
52
|
+
* await browser.click('21')
|
|
53
|
+
* await browser.close()
|
|
54
|
+
* ```
|
|
55
|
+
*
|
|
56
|
+
* @extends Feature
|
|
57
|
+
*/
|
|
58
|
+
export class BrowserUse extends Feature<BrowserUseState, BrowserUseOptions> {
|
|
59
|
+
static override shortcut = 'features.browserUse' as const
|
|
60
|
+
static override stateSchema = BrowserUseStateSchema
|
|
61
|
+
static override optionsSchema = BrowserUseOptionsSchema
|
|
62
|
+
static override eventsSchema = BrowserUseEventsSchema
|
|
63
|
+
|
|
64
|
+
static tools = {
|
|
65
|
+
browserOpen: {
|
|
66
|
+
description: 'Navigate the browser to a URL. Call this first to open a page before any interaction.',
|
|
67
|
+
schema: z.object({
|
|
68
|
+
url: z.string().describe('The URL to navigate to'),
|
|
69
|
+
}).describe('Navigate the browser to a URL. Call this first to open a page before any interaction.'),
|
|
70
|
+
},
|
|
71
|
+
browserGetState: {
|
|
72
|
+
description: 'Get the current page URL, title, and all interactive elements with their index numbers. Always call this after navigating or when you need to discover clickable elements — the indices returned are required for click, input, select, and other element interactions.',
|
|
73
|
+
schema: z.object({}).describe('Get the current page URL, title, and all interactive elements with their index numbers. Always call this after navigating or when you need to discover clickable elements — the indices returned are required for click, input, select, and other element interactions.'),
|
|
74
|
+
},
|
|
75
|
+
browserClick: {
|
|
76
|
+
description: 'Click an element by its index (from browserGetState) or by x,y pixel coordinates.',
|
|
77
|
+
schema: z.object({
|
|
78
|
+
target: z.string().describe('Element index number from browserGetState, or "x y" pixel coordinates separated by a space'),
|
|
79
|
+
}).describe('Click an element by its index (from browserGetState) or by x,y pixel coordinates. You must call browserGetState first to obtain element indices.'),
|
|
80
|
+
},
|
|
81
|
+
browserType: {
|
|
82
|
+
description: 'Type text at the current cursor/focus position. Use browserInput instead if you need to target a specific element.',
|
|
83
|
+
schema: z.object({
|
|
84
|
+
text: z.string().describe('Text to type'),
|
|
85
|
+
}).describe('Type text at the current cursor/focus position. Use browserInput instead if you need to target a specific element by index.'),
|
|
86
|
+
},
|
|
87
|
+
browserInput: {
|
|
88
|
+
description: 'Click a specific element by index and type text into it. Combines click + type in one step.',
|
|
89
|
+
schema: z.object({
|
|
90
|
+
index: z.string().describe('Element index number from browserGetState'),
|
|
91
|
+
text: z.string().describe('Text to type into the element'),
|
|
92
|
+
}).describe('Click a specific element by index and type text into it. Combines click + type in one step. Use this for filling form fields — get element indices from browserGetState first.'),
|
|
93
|
+
},
|
|
94
|
+
browserScreenshot: {
|
|
95
|
+
description: 'Take a screenshot of the current page. Returns base64 PNG if no path given.',
|
|
96
|
+
schema: z.object({
|
|
97
|
+
path: z.string().optional().describe('File path to save the screenshot to. If omitted, returns base64-encoded PNG.'),
|
|
98
|
+
full: z.boolean().optional().describe('If true, capture the full scrollable page instead of just the viewport'),
|
|
99
|
+
}).describe('Take a screenshot of the current browser viewport. Use for visual verification, debugging, or capturing page state. Returns base64 PNG data unless a file path is provided.'),
|
|
100
|
+
},
|
|
101
|
+
browserEval: {
|
|
102
|
+
description: 'Execute JavaScript code in the browser page context and return the result.',
|
|
103
|
+
schema: z.object({
|
|
104
|
+
js: z.string().describe('JavaScript code to execute in the page context. Has access to document, window, etc.'),
|
|
105
|
+
}).describe('Execute arbitrary JavaScript in the browser page context. Use for DOM manipulation, extracting data via selectors, or running page-level logic. The return value is sent back as the result.'),
|
|
106
|
+
},
|
|
107
|
+
browserExtract: {
|
|
108
|
+
description: 'Extract structured data from the current page using a natural-language query processed by an LLM.',
|
|
109
|
+
schema: z.object({
|
|
110
|
+
query: z.string().describe('Natural language description of what data to extract, e.g. "all product names and prices in the table"'),
|
|
111
|
+
}).describe('Extract structured data from the current page using a natural-language query processed by an LLM. Use when you need to pull specific information from complex pages without writing selectors manually.'),
|
|
112
|
+
},
|
|
113
|
+
browserScroll: {
|
|
114
|
+
description: 'Scroll the page up or down. Use when elements are not visible in the current viewport.',
|
|
115
|
+
schema: z.object({
|
|
116
|
+
direction: z.enum(['up', 'down']).default('down').describe('Scroll direction'),
|
|
117
|
+
amount: z.number().optional().describe('Scroll amount in pixels. Omit for a default scroll step.'),
|
|
118
|
+
}).describe('Scroll the page up or down. Use when target elements are outside the current viewport — after scrolling, call browserGetState to see newly visible elements.'),
|
|
119
|
+
},
|
|
120
|
+
browserKeys: {
|
|
121
|
+
description: 'Send keyboard keys or key combinations to the page.',
|
|
122
|
+
schema: z.object({
|
|
123
|
+
keys: z.string().describe('Key or combination to send, e.g. "Enter", "Tab", "Control+a", "Escape", "ArrowDown"'),
|
|
124
|
+
}).describe('Send keyboard keys or key combinations to the browser. Use for pressing Enter to submit, Tab to move focus, Escape to close dialogs, or keyboard shortcuts like Control+a.'),
|
|
125
|
+
},
|
|
126
|
+
browserBack: {
|
|
127
|
+
description: 'Go back to the previous page in browser history.',
|
|
128
|
+
schema: z.object({}).describe('Navigate back to the previous page in browser history, like clicking the back button.'),
|
|
129
|
+
},
|
|
130
|
+
browserSelect: {
|
|
131
|
+
description: 'Select an option from a dropdown/select element by index.',
|
|
132
|
+
schema: z.object({
|
|
133
|
+
index: z.string().describe('Element index of the dropdown/select from browserGetState'),
|
|
134
|
+
value: z.string().describe('The option text or value to select'),
|
|
135
|
+
}).describe('Select an option from a <select> dropdown element. Get the dropdown element index from browserGetState first, then specify the option value to choose.'),
|
|
136
|
+
},
|
|
137
|
+
browserHover: {
|
|
138
|
+
description: 'Hover over an element by index. Use for revealing tooltips, dropdown menus, or hover-triggered content.',
|
|
139
|
+
schema: z.object({
|
|
140
|
+
index: z.string().describe('Element index number from browserGetState'),
|
|
141
|
+
}).describe('Hover the mouse over an element by index. Use to reveal tooltips, trigger hover menus, or expose hidden UI. Get element indices from browserGetState first.'),
|
|
142
|
+
},
|
|
143
|
+
browserDblclick: {
|
|
144
|
+
description: 'Double-click an element by index.',
|
|
145
|
+
schema: z.object({
|
|
146
|
+
index: z.string().describe('Element index number from browserGetState'),
|
|
147
|
+
}).describe('Double-click an element by index. Use for actions that require double-click, such as editing text in-place or selecting words.'),
|
|
148
|
+
},
|
|
149
|
+
browserRightclick: {
|
|
150
|
+
description: 'Right-click (context menu) an element by index.',
|
|
151
|
+
schema: z.object({
|
|
152
|
+
index: z.string().describe('Element index number from browserGetState'),
|
|
153
|
+
}).describe('Right-click an element by index to open a context menu. Get element indices from browserGetState first.'),
|
|
154
|
+
},
|
|
155
|
+
browserUpload: {
|
|
156
|
+
description: 'Upload a file to a file input element.',
|
|
157
|
+
schema: z.object({
|
|
158
|
+
index: z.string().describe('Element index of the file input from browserGetState'),
|
|
159
|
+
path: z.string().describe('Local file path to upload'),
|
|
160
|
+
}).describe('Upload a local file to a file input (<input type="file">) element. Get the file input element index from browserGetState first.'),
|
|
161
|
+
},
|
|
162
|
+
browserGetTitle: {
|
|
163
|
+
description: 'Get the current page title.',
|
|
164
|
+
schema: z.object({}).describe('Get the current page title. Useful for verifying you are on the expected page.'),
|
|
165
|
+
},
|
|
166
|
+
browserGetHtml: {
|
|
167
|
+
description: 'Get the full HTML source of the current page.',
|
|
168
|
+
schema: z.object({}).describe('Get the full HTML source of the current page. Use sparingly — prefer browserGetState or browserGetText for targeted extraction.'),
|
|
169
|
+
},
|
|
170
|
+
browserGetText: {
|
|
171
|
+
description: 'Get the text content of a specific element by index.',
|
|
172
|
+
schema: z.object({
|
|
173
|
+
index: z.string().describe('Element index number from browserGetState'),
|
|
174
|
+
}).describe('Get the visible text content of a specific element by its index. Get element indices from browserGetState first.'),
|
|
175
|
+
},
|
|
176
|
+
browserGetValue: {
|
|
177
|
+
description: 'Get the current value of an input or textarea element.',
|
|
178
|
+
schema: z.object({
|
|
179
|
+
index: z.string().describe('Element index number from browserGetState'),
|
|
180
|
+
}).describe('Get the current value of an input, textarea, or select element by index. Use to verify what has been typed or selected.'),
|
|
181
|
+
},
|
|
182
|
+
browserGetAttributes: {
|
|
183
|
+
description: 'Get all HTML attributes of an element by index.',
|
|
184
|
+
schema: z.object({
|
|
185
|
+
index: z.string().describe('Element index number from browserGetState'),
|
|
186
|
+
}).describe('Get all HTML attributes (id, class, href, src, etc.) of an element by index. Useful for inspecting element properties.'),
|
|
187
|
+
},
|
|
188
|
+
browserWaitForSelector: {
|
|
189
|
+
description: 'Wait for a CSS selector to appear on the page before continuing.',
|
|
190
|
+
schema: z.object({
|
|
191
|
+
selector: z.string().describe('CSS selector to wait for, e.g. "#results", ".loaded", "[data-ready]"'),
|
|
192
|
+
}).describe('Wait for an element matching a CSS selector to appear in the DOM. Use after actions that trigger async page updates (form submissions, AJAX loads, navigation).'),
|
|
193
|
+
},
|
|
194
|
+
browserWaitForText: {
|
|
195
|
+
description: 'Wait for specific text to appear on the page before continuing.',
|
|
196
|
+
schema: z.object({
|
|
197
|
+
text: z.string().describe('Text string to wait for on the page'),
|
|
198
|
+
}).describe('Wait for specific text content to appear anywhere on the page. Use after actions that trigger async content changes, such as form submissions or loading states.'),
|
|
199
|
+
},
|
|
200
|
+
browserSwitchTab: {
|
|
201
|
+
description: 'Switch to a different browser tab by its index.',
|
|
202
|
+
schema: z.object({
|
|
203
|
+
tab: z.string().describe('Tab index to switch to (0-based)'),
|
|
204
|
+
}).describe('Switch focus to a different browser tab by its index. Use when links open in new tabs or when working across multiple pages.'),
|
|
205
|
+
},
|
|
206
|
+
browserCloseTab: {
|
|
207
|
+
description: 'Close a browser tab.',
|
|
208
|
+
schema: z.object({
|
|
209
|
+
tab: z.string().optional().describe('Tab index to close. Omit to close the current tab.'),
|
|
210
|
+
}).describe('Close a browser tab by index, or close the current tab if no index is given.'),
|
|
211
|
+
},
|
|
212
|
+
browserClose: {
|
|
213
|
+
description: 'Close the browser session and stop the daemon.',
|
|
214
|
+
schema: z.object({
|
|
215
|
+
all: z.boolean().optional().describe('If true, close all browser sessions'),
|
|
216
|
+
}).describe('Close the browser session. Call this when you are done with browser automation to free resources.'),
|
|
217
|
+
},
|
|
218
|
+
browserSessions: {
|
|
219
|
+
description: 'List all active browser sessions.',
|
|
220
|
+
schema: z.object({}).describe('List all currently active browser sessions with their names and status.'),
|
|
221
|
+
},
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
static { Feature.register(this, 'browserUse') }
|
|
225
|
+
|
|
226
|
+
override async afterInitialize() {
|
|
227
|
+
if (this.options.session) this.state.set('session', this.options.session)
|
|
228
|
+
if (this.options.headed) this.state.set('headed', true)
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
/** Build the base args array with global flags */
|
|
232
|
+
private baseArgs(): string[] {
|
|
233
|
+
const args: string[] = ['--json']
|
|
234
|
+
if (this.state.get('headed')) args.push('--headed')
|
|
235
|
+
const session = this.state.get('session')
|
|
236
|
+
if (session && session !== 'default') args.push('--session', session)
|
|
237
|
+
if (this.options.profile) args.push('--profile', this.options.profile)
|
|
238
|
+
if (this.options.connect) args.push('--connect')
|
|
239
|
+
if (this.options.cdpUrl) args.push('--cdp-url', this.options.cdpUrl)
|
|
240
|
+
return args
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
/** Execute a browser-use command and parse the JSON result */
|
|
244
|
+
private async exec(subcommand: string, ...cmdArgs: string[]): Promise<BrowserUseResult> {
|
|
245
|
+
const args = [...this.baseArgs(), subcommand, ...cmdArgs]
|
|
246
|
+
const proc = this.container.feature('proc')
|
|
247
|
+
const result = await proc.spawnAndCapture('browser-use', args)
|
|
248
|
+
|
|
249
|
+
const stdout = (result.stdout || '').trim()
|
|
250
|
+
if (!stdout) {
|
|
251
|
+
return { success: false, data: { error: result.stderr || 'No output from browser-use' } }
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
try {
|
|
255
|
+
return JSON.parse(stdout) as BrowserUseResult
|
|
256
|
+
} catch {
|
|
257
|
+
return { success: true, data: { _raw_text: stdout } }
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// --- Core methods ---
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* Navigate to a URL
|
|
265
|
+
* @param url - The URL to open
|
|
266
|
+
* @returns The browser-use result
|
|
267
|
+
*
|
|
268
|
+
* @example
|
|
269
|
+
* ```typescript
|
|
270
|
+
* await browserUse.open('https://example.com')
|
|
271
|
+
* ```
|
|
272
|
+
*/
|
|
273
|
+
async open(url: string): Promise<BrowserUseResult> {
|
|
274
|
+
const result = await this.exec('open', url)
|
|
275
|
+
if (result.success) {
|
|
276
|
+
this.state.set('currentUrl', url)
|
|
277
|
+
this.emit('navigated', url)
|
|
278
|
+
}
|
|
279
|
+
return result
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
/**
|
|
283
|
+
* Click an element by index or coordinates
|
|
284
|
+
* @param target - Element index or "x y" coordinates
|
|
285
|
+
*
|
|
286
|
+
* @example
|
|
287
|
+
* ```typescript
|
|
288
|
+
* await browserUse.click('21') // click element 21
|
|
289
|
+
* await browserUse.click('100 200') // click at coordinates
|
|
290
|
+
* ```
|
|
291
|
+
*/
|
|
292
|
+
async click(target: string): Promise<BrowserUseResult> {
|
|
293
|
+
const args = target.split(/\s+/)
|
|
294
|
+
const result = await this.exec('click', ...args)
|
|
295
|
+
if (result.success) this.emit('clicked', target)
|
|
296
|
+
return result
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Type text at the current cursor position
|
|
301
|
+
* @param text - Text to type
|
|
302
|
+
*/
|
|
303
|
+
async type(text: string): Promise<BrowserUseResult> {
|
|
304
|
+
const result = await this.exec('type', text)
|
|
305
|
+
if (result.success) this.emit('typed', text)
|
|
306
|
+
return result
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
/**
|
|
310
|
+
* Type text into a specific element
|
|
311
|
+
* @param index - Element index
|
|
312
|
+
* @param text - Text to enter
|
|
313
|
+
*/
|
|
314
|
+
async input(index: string, text: string): Promise<BrowserUseResult> {
|
|
315
|
+
return this.exec('input', index, text)
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Get the current browser state (URL, title, interactive elements)
|
|
320
|
+
*
|
|
321
|
+
* @example
|
|
322
|
+
* ```typescript
|
|
323
|
+
* const state = await browserUse.getState()
|
|
324
|
+
* console.log(state.data._raw_text)
|
|
325
|
+
* ```
|
|
326
|
+
*/
|
|
327
|
+
async getState(): Promise<BrowserUseResult> {
|
|
328
|
+
return this.exec('state')
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
/**
|
|
332
|
+
* Take a screenshot
|
|
333
|
+
* @param options - Optional path and full-page flag
|
|
334
|
+
* @returns Base64 PNG data or file path
|
|
335
|
+
*/
|
|
336
|
+
async screenshot(options: { path?: string; full?: boolean } = {}): Promise<BrowserUseResult> {
|
|
337
|
+
const args: string[] = []
|
|
338
|
+
if (options.full) args.push('--full')
|
|
339
|
+
if (options.path) args.push(options.path)
|
|
340
|
+
const result = await this.exec('screenshot', ...args)
|
|
341
|
+
if (result.success) {
|
|
342
|
+
this.emit('screenshot', options.path || 'base64')
|
|
343
|
+
}
|
|
344
|
+
return result
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Execute JavaScript in the page context
|
|
349
|
+
* @param js - JavaScript code to evaluate
|
|
350
|
+
*/
|
|
351
|
+
async evaluate(js: string): Promise<BrowserUseResult> {
|
|
352
|
+
return this.exec('eval', js)
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
/**
|
|
356
|
+
* Extract structured data from the page using an LLM
|
|
357
|
+
* @param query - Natural language description of what to extract
|
|
358
|
+
*/
|
|
359
|
+
async extract(query: string): Promise<BrowserUseResult> {
|
|
360
|
+
return this.exec('extract', query)
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
/**
|
|
364
|
+
* Scroll the page
|
|
365
|
+
* @param direction - 'up' or 'down'
|
|
366
|
+
* @param amount - Pixels to scroll
|
|
367
|
+
*/
|
|
368
|
+
async scroll(direction: 'up' | 'down' = 'down', amount?: number): Promise<BrowserUseResult> {
|
|
369
|
+
const args: string[] = [direction]
|
|
370
|
+
if (amount) args.push('--amount', String(amount))
|
|
371
|
+
return this.exec('scroll', ...args)
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
/**
|
|
375
|
+
* Send keyboard keys
|
|
376
|
+
* @param keys - Key combination (e.g. "Enter", "Control+a")
|
|
377
|
+
*/
|
|
378
|
+
async keys(keys: string): Promise<BrowserUseResult> {
|
|
379
|
+
return this.exec('keys', keys)
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
/** Go back in browser history */
|
|
383
|
+
async back(): Promise<BrowserUseResult> {
|
|
384
|
+
return this.exec('back')
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
/** Get the current page title */
|
|
388
|
+
async getTitle(): Promise<BrowserUseResult> {
|
|
389
|
+
return this.exec('get', 'title')
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
/** Get the full page HTML */
|
|
393
|
+
async getHtml(): Promise<BrowserUseResult> {
|
|
394
|
+
return this.exec('get', 'html')
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
/**
|
|
398
|
+
* Get text content of an element
|
|
399
|
+
* @param index - Element index
|
|
400
|
+
*/
|
|
401
|
+
async getText(index: string): Promise<BrowserUseResult> {
|
|
402
|
+
return this.exec('get', 'text', index)
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
/**
|
|
406
|
+
* Select a dropdown option
|
|
407
|
+
* @param index - Element index of the dropdown
|
|
408
|
+
* @param value - Value to select
|
|
409
|
+
*/
|
|
410
|
+
async select(index: string, value: string): Promise<BrowserUseResult> {
|
|
411
|
+
return this.exec('select', index, value)
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
/**
|
|
415
|
+
* Wait for a CSS selector to appear
|
|
416
|
+
* @param selector - CSS selector
|
|
417
|
+
*/
|
|
418
|
+
async waitForSelector(selector: string): Promise<BrowserUseResult> {
|
|
419
|
+
return this.exec('wait', 'selector', selector)
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
/**
|
|
423
|
+
* Wait for text to appear on the page
|
|
424
|
+
* @param text - Text to wait for
|
|
425
|
+
*/
|
|
426
|
+
async waitForText(text: string): Promise<BrowserUseResult> {
|
|
427
|
+
return this.exec('wait', 'text', text)
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Switch to a tab by index
|
|
432
|
+
* @param tab - Tab index
|
|
433
|
+
*/
|
|
434
|
+
async switchTab(tab: string): Promise<BrowserUseResult> {
|
|
435
|
+
return this.exec('switch', tab)
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Close a tab
|
|
440
|
+
* @param tab - Tab index (closes current if omitted)
|
|
441
|
+
*/
|
|
442
|
+
async closeTab(tab?: string): Promise<BrowserUseResult> {
|
|
443
|
+
const args = tab ? [tab] : []
|
|
444
|
+
return this.exec('close-tab', ...args)
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
/**
|
|
448
|
+
* Close the browser session
|
|
449
|
+
* @param all - If true, close all sessions
|
|
450
|
+
*/
|
|
451
|
+
async close(all?: boolean): Promise<BrowserUseResult> {
|
|
452
|
+
const args = all ? ['--all'] : []
|
|
453
|
+
const result = await this.exec('close', ...args)
|
|
454
|
+
if (result.success) this.emit('closed')
|
|
455
|
+
return result
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
/** List active browser sessions */
|
|
459
|
+
async sessions(): Promise<BrowserUseResult> {
|
|
460
|
+
return this.exec('sessions')
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
/**
|
|
464
|
+
* Hover over an element
|
|
465
|
+
* @param index - Element index
|
|
466
|
+
*/
|
|
467
|
+
async hover(index: string): Promise<BrowserUseResult> {
|
|
468
|
+
return this.exec('hover', index)
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
/**
|
|
472
|
+
* Double-click an element
|
|
473
|
+
* @param index - Element index
|
|
474
|
+
*/
|
|
475
|
+
async dblclick(index: string): Promise<BrowserUseResult> {
|
|
476
|
+
return this.exec('dblclick', index)
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
/**
|
|
480
|
+
* Right-click an element
|
|
481
|
+
* @param index - Element index
|
|
482
|
+
*/
|
|
483
|
+
async rightclick(index: string): Promise<BrowserUseResult> {
|
|
484
|
+
return this.exec('rightclick', index)
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
/**
|
|
488
|
+
* Upload a file to a file input element
|
|
489
|
+
* @param index - Element index of the file input
|
|
490
|
+
* @param path - Local file path to upload
|
|
491
|
+
*/
|
|
492
|
+
async upload(index: string, path: string): Promise<BrowserUseResult> {
|
|
493
|
+
return this.exec('upload', index, path)
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
/**
|
|
497
|
+
* Get the value of an input or textarea element
|
|
498
|
+
* @param index - Element index
|
|
499
|
+
*/
|
|
500
|
+
async getValue(index: string): Promise<BrowserUseResult> {
|
|
501
|
+
return this.exec('get', 'value', index)
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
/**
|
|
505
|
+
* Get all attributes of an element
|
|
506
|
+
* @param index - Element index
|
|
507
|
+
*/
|
|
508
|
+
async getAttributes(index: string): Promise<BrowserUseResult> {
|
|
509
|
+
return this.exec('get', 'attributes', index)
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// --- Tool handlers (matched by name to static tools) ---
|
|
513
|
+
|
|
514
|
+
async browserOpen(options: { url: string }) {
|
|
515
|
+
return this.open(options.url)
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
async browserClick(options: { target: string }) {
|
|
519
|
+
return this.click(options.target)
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
async browserType(options: { text: string }) {
|
|
523
|
+
return this.type(options.text)
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
async browserInput(options: { index: string; text: string }) {
|
|
527
|
+
return this.input(options.index, options.text)
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
async browserGetState() {
|
|
531
|
+
return this.getState()
|
|
532
|
+
}
|
|
533
|
+
|
|
534
|
+
async browserScreenshot(options: { path?: string; full?: boolean }) {
|
|
535
|
+
return this.screenshot(options)
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
async browserEval(options: { js: string }) {
|
|
539
|
+
return this.evaluate(options.js)
|
|
540
|
+
}
|
|
541
|
+
|
|
542
|
+
async browserExtract(options: { query: string }) {
|
|
543
|
+
return this.extract(options.query)
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
async browserScroll(options: { direction: 'up' | 'down'; amount?: number }) {
|
|
547
|
+
return this.scroll(options.direction, options.amount)
|
|
548
|
+
}
|
|
549
|
+
|
|
550
|
+
async browserKeys(options: { keys: string }) {
|
|
551
|
+
return this.keys(options.keys)
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
async browserBack() {
|
|
555
|
+
return this.back()
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
async browserGetTitle() {
|
|
559
|
+
return this.getTitle()
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
async browserGetHtml() {
|
|
563
|
+
return this.getHtml()
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
async browserGetText(options: { index: string }) {
|
|
567
|
+
return this.getText(options.index)
|
|
568
|
+
}
|
|
569
|
+
|
|
570
|
+
async browserSelect(options: { index: string; value: string }) {
|
|
571
|
+
return this.select(options.index, options.value)
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
async browserWaitForSelector(options: { selector: string }) {
|
|
575
|
+
return this.waitForSelector(options.selector)
|
|
576
|
+
}
|
|
577
|
+
|
|
578
|
+
async browserWaitForText(options: { text: string }) {
|
|
579
|
+
return this.waitForText(options.text)
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
async browserSwitchTab(options: { tab: string }) {
|
|
583
|
+
return this.switchTab(options.tab)
|
|
584
|
+
}
|
|
585
|
+
|
|
586
|
+
async browserCloseTab(options: { tab?: string }) {
|
|
587
|
+
return this.closeTab(options.tab)
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
async browserClose(options: { all?: boolean }) {
|
|
591
|
+
return this.close(options.all)
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
async browserSessions() {
|
|
595
|
+
return this.sessions()
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
async browserHover(options: { index: string }) {
|
|
599
|
+
return this.hover(options.index)
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
async browserDblclick(options: { index: string }) {
|
|
603
|
+
return this.dblclick(options.index)
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
async browserRightclick(options: { index: string }) {
|
|
607
|
+
return this.rightclick(options.index)
|
|
608
|
+
}
|
|
609
|
+
|
|
610
|
+
async browserUpload(options: { index: string; path: string }) {
|
|
611
|
+
return this.upload(options.index, options.path)
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
async browserGetValue(options: { index: string }) {
|
|
615
|
+
return this.getValue(options.index)
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
async browserGetAttributes(options: { index: string }) {
|
|
619
|
+
return this.getAttributes(options.index)
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
export default BrowserUse
|