browser-use 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +761 -0
- package/dist/agent/cloud-events.d.ts +264 -0
- package/dist/agent/cloud-events.js +318 -0
- package/dist/agent/gif.d.ts +15 -0
- package/dist/agent/gif.js +215 -0
- package/dist/agent/index.d.ts +8 -0
- package/dist/agent/index.js +8 -0
- package/dist/agent/message-manager/service.d.ts +30 -0
- package/dist/agent/message-manager/service.js +208 -0
- package/dist/agent/message-manager/utils.d.ts +2 -0
- package/dist/agent/message-manager/utils.js +41 -0
- package/dist/agent/message-manager/views.d.ts +26 -0
- package/dist/agent/message-manager/views.js +73 -0
- package/dist/agent/prompts.d.ts +52 -0
- package/dist/agent/prompts.js +259 -0
- package/dist/agent/service.d.ts +290 -0
- package/dist/agent/service.js +2200 -0
- package/dist/agent/views.d.ts +741 -0
- package/dist/agent/views.js +537 -0
- package/dist/browser/browser.d.ts +7 -0
- package/dist/browser/browser.js +5 -0
- package/dist/browser/context.d.ts +8 -0
- package/dist/browser/context.js +4 -0
- package/dist/browser/dvd-screensaver.d.ts +101 -0
- package/dist/browser/dvd-screensaver.js +270 -0
- package/dist/browser/extensions.d.ts +63 -0
- package/dist/browser/extensions.js +359 -0
- package/dist/browser/index.d.ts +10 -0
- package/dist/browser/index.js +9 -0
- package/dist/browser/playwright-manager.d.ts +47 -0
- package/dist/browser/playwright-manager.js +146 -0
- package/dist/browser/profile.d.ts +196 -0
- package/dist/browser/profile.js +815 -0
- package/dist/browser/session.d.ts +505 -0
- package/dist/browser/session.js +3409 -0
- package/dist/browser/types.d.ts +1184 -0
- package/dist/browser/types.js +1 -0
- package/dist/browser/utils.d.ts +1 -0
- package/dist/browser/utils.js +19 -0
- package/dist/browser/views.d.ts +78 -0
- package/dist/browser/views.js +72 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +44 -0
- package/dist/config.d.ts +108 -0
- package/dist/config.js +430 -0
- package/dist/controller/index.d.ts +3 -0
- package/dist/controller/index.js +3 -0
- package/dist/controller/registry/index.d.ts +2 -0
- package/dist/controller/registry/index.js +2 -0
- package/dist/controller/registry/service.d.ts +45 -0
- package/dist/controller/registry/service.js +184 -0
- package/dist/controller/registry/views.d.ts +55 -0
- package/dist/controller/registry/views.js +174 -0
- package/dist/controller/service.d.ts +49 -0
- package/dist/controller/service.js +1176 -0
- package/dist/controller/views.d.ts +241 -0
- package/dist/controller/views.js +88 -0
- package/dist/dom/clickable-element-processor/service.d.ts +11 -0
- package/dist/dom/clickable-element-processor/service.js +60 -0
- package/dist/dom/dom_tree/index.js +1400 -0
- package/dist/dom/history-tree-processor/service.d.ts +14 -0
- package/dist/dom/history-tree-processor/service.js +75 -0
- package/dist/dom/history-tree-processor/view.d.ts +54 -0
- package/dist/dom/history-tree-processor/view.js +56 -0
- package/dist/dom/playground/extraction.d.ts +19 -0
- package/dist/dom/playground/extraction.js +187 -0
- package/dist/dom/playground/process-dom.d.ts +1 -0
- package/dist/dom/playground/process-dom.js +5 -0
- package/dist/dom/playground/test-accessibility.d.ts +44 -0
- package/dist/dom/playground/test-accessibility.js +111 -0
- package/dist/dom/service.d.ts +19 -0
- package/dist/dom/service.js +227 -0
- package/dist/dom/utils.d.ts +1 -0
- package/dist/dom/utils.js +6 -0
- package/dist/dom/views.d.ts +61 -0
- package/dist/dom/views.js +247 -0
- package/dist/event-bus.d.ts +11 -0
- package/dist/event-bus.js +19 -0
- package/dist/exceptions.d.ts +10 -0
- package/dist/exceptions.js +22 -0
- package/dist/filesystem/file-system.d.ts +68 -0
- package/dist/filesystem/file-system.js +412 -0
- package/dist/filesystem/index.d.ts +1 -0
- package/dist/filesystem/index.js +1 -0
- package/dist/index.d.ts +31 -0
- package/dist/index.js +33 -0
- package/dist/integrations/gmail/actions.d.ts +12 -0
- package/dist/integrations/gmail/actions.js +113 -0
- package/dist/integrations/gmail/index.d.ts +2 -0
- package/dist/integrations/gmail/index.js +2 -0
- package/dist/integrations/gmail/service.d.ts +61 -0
- package/dist/integrations/gmail/service.js +260 -0
- package/dist/llm/anthropic/chat.d.ts +28 -0
- package/dist/llm/anthropic/chat.js +126 -0
- package/dist/llm/anthropic/index.d.ts +2 -0
- package/dist/llm/anthropic/index.js +2 -0
- package/dist/llm/anthropic/serializer.d.ts +68 -0
- package/dist/llm/anthropic/serializer.js +285 -0
- package/dist/llm/aws/chat-anthropic.d.ts +61 -0
- package/dist/llm/aws/chat-anthropic.js +176 -0
- package/dist/llm/aws/chat-bedrock.d.ts +15 -0
- package/dist/llm/aws/chat-bedrock.js +80 -0
- package/dist/llm/aws/index.d.ts +3 -0
- package/dist/llm/aws/index.js +3 -0
- package/dist/llm/aws/serializer.d.ts +5 -0
- package/dist/llm/aws/serializer.js +68 -0
- package/dist/llm/azure/chat.d.ts +15 -0
- package/dist/llm/azure/chat.js +83 -0
- package/dist/llm/azure/index.d.ts +1 -0
- package/dist/llm/azure/index.js +1 -0
- package/dist/llm/base.d.ts +16 -0
- package/dist/llm/base.js +1 -0
- package/dist/llm/deepseek/chat.d.ts +15 -0
- package/dist/llm/deepseek/chat.js +51 -0
- package/dist/llm/deepseek/index.d.ts +2 -0
- package/dist/llm/deepseek/index.js +2 -0
- package/dist/llm/deepseek/serializer.d.ts +6 -0
- package/dist/llm/deepseek/serializer.js +57 -0
- package/dist/llm/exceptions.d.ts +10 -0
- package/dist/llm/exceptions.js +18 -0
- package/dist/llm/google/chat.d.ts +20 -0
- package/dist/llm/google/chat.js +144 -0
- package/dist/llm/google/index.d.ts +2 -0
- package/dist/llm/google/index.js +2 -0
- package/dist/llm/google/serializer.d.ts +6 -0
- package/dist/llm/google/serializer.js +64 -0
- package/dist/llm/groq/chat.d.ts +15 -0
- package/dist/llm/groq/chat.js +52 -0
- package/dist/llm/groq/index.d.ts +3 -0
- package/dist/llm/groq/index.js +3 -0
- package/dist/llm/groq/parser.d.ts +32 -0
- package/dist/llm/groq/parser.js +189 -0
- package/dist/llm/groq/serializer.d.ts +6 -0
- package/dist/llm/groq/serializer.js +56 -0
- package/dist/llm/messages.d.ts +77 -0
- package/dist/llm/messages.js +157 -0
- package/dist/llm/ollama/chat.d.ts +15 -0
- package/dist/llm/ollama/chat.js +77 -0
- package/dist/llm/ollama/index.d.ts +2 -0
- package/dist/llm/ollama/index.js +2 -0
- package/dist/llm/ollama/serializer.d.ts +6 -0
- package/dist/llm/ollama/serializer.js +53 -0
- package/dist/llm/openai/chat.d.ts +38 -0
- package/dist/llm/openai/chat.js +174 -0
- package/dist/llm/openai/index.d.ts +3 -0
- package/dist/llm/openai/index.js +3 -0
- package/dist/llm/openai/like.d.ts +17 -0
- package/dist/llm/openai/like.js +19 -0
- package/dist/llm/openai/serializer.d.ts +6 -0
- package/dist/llm/openai/serializer.js +57 -0
- package/dist/llm/openrouter/chat.d.ts +15 -0
- package/dist/llm/openrouter/chat.js +74 -0
- package/dist/llm/openrouter/index.d.ts +2 -0
- package/dist/llm/openrouter/index.js +2 -0
- package/dist/llm/openrouter/serializer.d.ts +3 -0
- package/dist/llm/openrouter/serializer.js +3 -0
- package/dist/llm/schema.d.ts +6 -0
- package/dist/llm/schema.js +77 -0
- package/dist/llm/views.d.ts +15 -0
- package/dist/llm/views.js +12 -0
- package/dist/logging-config.d.ts +25 -0
- package/dist/logging-config.js +89 -0
- package/dist/mcp/client.d.ts +142 -0
- package/dist/mcp/client.js +638 -0
- package/dist/mcp/controller.d.ts +6 -0
- package/dist/mcp/controller.js +38 -0
- package/dist/mcp/index.d.ts +3 -0
- package/dist/mcp/index.js +3 -0
- package/dist/mcp/server.d.ts +134 -0
- package/dist/mcp/server.js +759 -0
- package/dist/observability-decorators.d.ts +158 -0
- package/dist/observability-decorators.js +286 -0
- package/dist/observability.d.ts +23 -0
- package/dist/observability.js +58 -0
- package/dist/screenshots/index.d.ts +1 -0
- package/dist/screenshots/index.js +1 -0
- package/dist/screenshots/service.d.ts +6 -0
- package/dist/screenshots/service.js +28 -0
- package/dist/sync/auth.d.ts +27 -0
- package/dist/sync/auth.js +205 -0
- package/dist/sync/index.d.ts +2 -0
- package/dist/sync/index.js +2 -0
- package/dist/sync/service.d.ts +21 -0
- package/dist/sync/service.js +146 -0
- package/dist/telemetry/index.d.ts +2 -0
- package/dist/telemetry/index.js +2 -0
- package/dist/telemetry/service.d.ts +12 -0
- package/dist/telemetry/service.js +85 -0
- package/dist/telemetry/views.d.ts +112 -0
- package/dist/telemetry/views.js +112 -0
- package/dist/tokens/index.d.ts +2 -0
- package/dist/tokens/index.js +2 -0
- package/dist/tokens/service.d.ts +35 -0
- package/dist/tokens/service.js +423 -0
- package/dist/tokens/views.d.ts +58 -0
- package/dist/tokens/views.js +1 -0
- package/dist/utils.d.ts +128 -0
- package/dist/utils.js +529 -0
- package/package.json +94 -5
|
@@ -0,0 +1,1176 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import { ActionResult } from '../agent/views.js';
|
|
3
|
+
import { BrowserError } from '../browser/views.js';
|
|
4
|
+
import { FileSystem } from '../filesystem/file-system.js';
|
|
5
|
+
import { ClickElementActionSchema, CloseTabActionSchema, DoneActionSchema, ExtractStructuredDataActionSchema, DropdownOptionsActionSchema, SelectDropdownActionSchema, GoToUrlActionSchema, InputTextActionSchema, NoParamsActionSchema, ReadFileActionSchema, ReplaceFileStrActionSchema, ScrollActionSchema, ScrollToTextActionSchema, SearchGoogleActionSchema, StructuredOutputActionSchema, SwitchTabActionSchema, UploadFileActionSchema, WaitActionSchema, WriteFileActionSchema, SendKeysActionSchema, SheetsRangeActionSchema, SheetsUpdateActionSchema, SheetsInputActionSchema, } from './views.js';
|
|
6
|
+
import { Registry } from './registry/service.js';
|
|
7
|
+
import TurndownService from 'turndown';
|
|
8
|
+
import { UserMessage } from '../llm/messages.js';
|
|
9
|
+
import { createLogger } from '../logging-config.js';
|
|
10
|
+
const DEFAULT_WAIT_OFFSET = 3;
|
|
11
|
+
const MAX_WAIT_SECONDS = 10;
|
|
12
|
+
const toActionEntries = (action) => {
|
|
13
|
+
if (!action) {
|
|
14
|
+
return [];
|
|
15
|
+
}
|
|
16
|
+
return Object.entries(action).filter(([, params]) => params != null);
|
|
17
|
+
};
|
|
18
|
+
const createAbortError = (reason) => {
|
|
19
|
+
if (reason instanceof Error) {
|
|
20
|
+
return reason;
|
|
21
|
+
}
|
|
22
|
+
const error = new Error('Operation aborted');
|
|
23
|
+
error.name = 'AbortError';
|
|
24
|
+
return error;
|
|
25
|
+
};
|
|
26
|
+
const isAbortError = (error) => {
|
|
27
|
+
return error instanceof Error && error.name === 'AbortError';
|
|
28
|
+
};
|
|
29
|
+
const throwIfAborted = (signal) => {
|
|
30
|
+
if (signal?.aborted) {
|
|
31
|
+
throw createAbortError(signal.reason);
|
|
32
|
+
}
|
|
33
|
+
};
|
|
34
|
+
const waitWithSignal = async (timeoutMs, signal) => {
|
|
35
|
+
if (timeoutMs <= 0) {
|
|
36
|
+
throwIfAborted(signal);
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
await new Promise((resolve, reject) => {
|
|
40
|
+
const timeout = setTimeout(() => {
|
|
41
|
+
cleanup();
|
|
42
|
+
resolve();
|
|
43
|
+
}, timeoutMs);
|
|
44
|
+
const onAbort = () => {
|
|
45
|
+
clearTimeout(timeout);
|
|
46
|
+
cleanup();
|
|
47
|
+
reject(createAbortError(signal?.reason));
|
|
48
|
+
};
|
|
49
|
+
const cleanup = () => {
|
|
50
|
+
signal?.removeEventListener('abort', onAbort);
|
|
51
|
+
};
|
|
52
|
+
if (signal) {
|
|
53
|
+
if (signal.aborted) {
|
|
54
|
+
onAbort();
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
58
|
+
}
|
|
59
|
+
});
|
|
60
|
+
};
|
|
61
|
+
const runWithTimeoutAndSignal = async (operation, timeoutMs, signal, timeoutMessage = 'Operation timed out') => {
|
|
62
|
+
throwIfAborted(signal);
|
|
63
|
+
if (timeoutMs <= 0) {
|
|
64
|
+
return operation();
|
|
65
|
+
}
|
|
66
|
+
return await new Promise((resolve, reject) => {
|
|
67
|
+
let settled = false;
|
|
68
|
+
const onAbort = () => {
|
|
69
|
+
if (settled) {
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
settled = true;
|
|
73
|
+
cleanup();
|
|
74
|
+
reject(createAbortError(signal?.reason));
|
|
75
|
+
};
|
|
76
|
+
const onTimeout = () => {
|
|
77
|
+
if (settled) {
|
|
78
|
+
return;
|
|
79
|
+
}
|
|
80
|
+
settled = true;
|
|
81
|
+
cleanup();
|
|
82
|
+
reject(new Error(timeoutMessage));
|
|
83
|
+
};
|
|
84
|
+
const cleanup = () => {
|
|
85
|
+
if (timeout) {
|
|
86
|
+
clearTimeout(timeout);
|
|
87
|
+
}
|
|
88
|
+
signal?.removeEventListener('abort', onAbort);
|
|
89
|
+
};
|
|
90
|
+
const timeout = setTimeout(onTimeout, timeoutMs);
|
|
91
|
+
if (signal) {
|
|
92
|
+
if (signal.aborted) {
|
|
93
|
+
onAbort();
|
|
94
|
+
return;
|
|
95
|
+
}
|
|
96
|
+
signal.addEventListener('abort', onAbort, { once: true });
|
|
97
|
+
}
|
|
98
|
+
void operation()
|
|
99
|
+
.then((value) => {
|
|
100
|
+
if (settled) {
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
settled = true;
|
|
104
|
+
cleanup();
|
|
105
|
+
resolve(value);
|
|
106
|
+
})
|
|
107
|
+
.catch((error) => {
|
|
108
|
+
if (settled) {
|
|
109
|
+
return;
|
|
110
|
+
}
|
|
111
|
+
settled = true;
|
|
112
|
+
cleanup();
|
|
113
|
+
reject(error);
|
|
114
|
+
});
|
|
115
|
+
});
|
|
116
|
+
};
|
|
117
|
+
export class Controller {
|
|
118
|
+
registry;
|
|
119
|
+
displayFilesInDoneText;
|
|
120
|
+
logger;
|
|
121
|
+
constructor(options = {}) {
|
|
122
|
+
const { exclude_actions = [], output_model = null, display_files_in_done_text = true, } = options;
|
|
123
|
+
this.registry = new Registry(exclude_actions);
|
|
124
|
+
this.displayFilesInDoneText = display_files_in_done_text;
|
|
125
|
+
this.logger = createLogger('browser_use.controller');
|
|
126
|
+
this.registerDefaultActions(output_model);
|
|
127
|
+
}
|
|
128
|
+
registerDefaultActions(outputModel) {
|
|
129
|
+
this.registerDoneAction(outputModel);
|
|
130
|
+
this.registerNavigationActions();
|
|
131
|
+
this.registerElementActions();
|
|
132
|
+
this.registerTabActions();
|
|
133
|
+
this.registerContentActions();
|
|
134
|
+
this.registerScrollActions();
|
|
135
|
+
this.registerFileSystemActions();
|
|
136
|
+
this.registerKeyboardActions();
|
|
137
|
+
this.registerDropdownActions();
|
|
138
|
+
this.registerSheetsActions();
|
|
139
|
+
}
|
|
140
|
+
registerNavigationActions() {
|
|
141
|
+
this.registry.action('Search the query in Google...', {
|
|
142
|
+
param_model: SearchGoogleActionSchema,
|
|
143
|
+
})(async function search_google(params, { browser_session, signal }) {
|
|
144
|
+
if (!browser_session)
|
|
145
|
+
throw new Error('Browser session missing');
|
|
146
|
+
throwIfAborted(signal);
|
|
147
|
+
const searchUrl = `https://www.google.com/search?q=${encodeURIComponent(params.query)}&udm=14`;
|
|
148
|
+
const page = await browser_session.get_current_page();
|
|
149
|
+
const currentUrl = page?.url().replace(/\/+$/, '');
|
|
150
|
+
if (currentUrl === 'https://www.google.com') {
|
|
151
|
+
await browser_session.navigate_to(searchUrl, { signal });
|
|
152
|
+
}
|
|
153
|
+
else {
|
|
154
|
+
await browser_session.create_new_tab(searchUrl, { signal });
|
|
155
|
+
}
|
|
156
|
+
const msg = `🔍 Searched for "${params.query}" in Google`;
|
|
157
|
+
return new ActionResult({
|
|
158
|
+
extracted_content: msg,
|
|
159
|
+
include_in_memory: true,
|
|
160
|
+
long_term_memory: `Searched Google for '${params.query}'`,
|
|
161
|
+
});
|
|
162
|
+
});
|
|
163
|
+
this.registry.action('Navigate to URL...', {
|
|
164
|
+
param_model: GoToUrlActionSchema,
|
|
165
|
+
})(async function go_to_url(params, { browser_session, signal }) {
|
|
166
|
+
if (!browser_session)
|
|
167
|
+
throw new Error('Browser session missing');
|
|
168
|
+
throwIfAborted(signal);
|
|
169
|
+
try {
|
|
170
|
+
if (params.new_tab) {
|
|
171
|
+
await browser_session.create_new_tab(params.url, { signal });
|
|
172
|
+
const tabIdx = browser_session.active_tab_index;
|
|
173
|
+
const msg = `🔗 Opened new tab #${tabIdx} with url ${params.url}`;
|
|
174
|
+
return new ActionResult({
|
|
175
|
+
extracted_content: msg,
|
|
176
|
+
include_in_memory: true,
|
|
177
|
+
long_term_memory: `Opened new tab with URL ${params.url}`,
|
|
178
|
+
});
|
|
179
|
+
}
|
|
180
|
+
await browser_session.navigate_to(params.url, { signal });
|
|
181
|
+
const msg = `🔗 Navigated to ${params.url}`;
|
|
182
|
+
return new ActionResult({
|
|
183
|
+
extracted_content: msg,
|
|
184
|
+
include_in_memory: true,
|
|
185
|
+
long_term_memory: `Navigated to ${params.url}`,
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
catch (error) {
|
|
189
|
+
const errorMsg = String(error?.message ?? error ?? '');
|
|
190
|
+
const networkFailures = [
|
|
191
|
+
'ERR_NAME_NOT_RESOLVED',
|
|
192
|
+
'ERR_INTERNET_DISCONNECTED',
|
|
193
|
+
'ERR_CONNECTION_REFUSED',
|
|
194
|
+
'ERR_TIMED_OUT',
|
|
195
|
+
'net::',
|
|
196
|
+
];
|
|
197
|
+
if (networkFailures.some((needle) => errorMsg.includes(needle))) {
|
|
198
|
+
const message = `Site unavailable: ${params.url} - ${errorMsg}`;
|
|
199
|
+
throw new BrowserError(message);
|
|
200
|
+
}
|
|
201
|
+
throw error;
|
|
202
|
+
}
|
|
203
|
+
});
|
|
204
|
+
this.registry.action('Go back', { param_model: NoParamsActionSchema })(async function go_back(_params, { browser_session, signal }) {
|
|
205
|
+
if (!browser_session)
|
|
206
|
+
throw new Error('Browser session missing');
|
|
207
|
+
throwIfAborted(signal);
|
|
208
|
+
await browser_session.go_back({ signal });
|
|
209
|
+
const msg = '🔙 Navigated back';
|
|
210
|
+
return new ActionResult({ extracted_content: msg });
|
|
211
|
+
});
|
|
212
|
+
this.registry.action('Wait for x seconds default 3 (max 10 seconds). This can be used to wait until the page is fully loaded.', { param_model: WaitActionSchema })(async function wait(params, { signal }) {
|
|
213
|
+
const seconds = params.seconds ?? 3;
|
|
214
|
+
const actualSeconds = Math.min(Math.max(seconds - DEFAULT_WAIT_OFFSET, 0), MAX_WAIT_SECONDS);
|
|
215
|
+
const msg = `🕒 Waiting for ${actualSeconds + DEFAULT_WAIT_OFFSET} seconds`;
|
|
216
|
+
if (actualSeconds > 0) {
|
|
217
|
+
await waitWithSignal(actualSeconds * 1000, signal);
|
|
218
|
+
}
|
|
219
|
+
return new ActionResult({ extracted_content: msg });
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
registerElementActions() {
|
|
223
|
+
this.registry.action('Click element by index', {
|
|
224
|
+
param_model: ClickElementActionSchema,
|
|
225
|
+
})(async function click_element_by_index(params, { browser_session, signal }) {
|
|
226
|
+
if (!browser_session)
|
|
227
|
+
throw new Error('Browser session missing');
|
|
228
|
+
throwIfAborted(signal);
|
|
229
|
+
const element = await browser_session.get_dom_element_by_index(params.index, { signal });
|
|
230
|
+
if (!element) {
|
|
231
|
+
throw new BrowserError(`Element index ${params.index} does not exist - retry or use alternative actions`);
|
|
232
|
+
}
|
|
233
|
+
const initialTabs = Array.isArray(browser_session.tabs)
|
|
234
|
+
? browser_session.tabs.length
|
|
235
|
+
: 0;
|
|
236
|
+
if (browser_session.is_file_input?.(element)) {
|
|
237
|
+
const msg = `Index ${params.index} - has an element which opens file upload dialog.`;
|
|
238
|
+
return new ActionResult({
|
|
239
|
+
extracted_content: msg,
|
|
240
|
+
include_in_memory: true,
|
|
241
|
+
success: false,
|
|
242
|
+
long_term_memory: msg,
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
const downloadPath = await browser_session._click_element_node(element, {
|
|
246
|
+
signal,
|
|
247
|
+
});
|
|
248
|
+
let msg = '';
|
|
249
|
+
if (downloadPath) {
|
|
250
|
+
msg = `💾 Downloaded file to ${downloadPath}`;
|
|
251
|
+
}
|
|
252
|
+
else {
|
|
253
|
+
const snippet = element.get_all_text_till_next_clickable_element?.(2) ?? '';
|
|
254
|
+
msg = `🖱️ Clicked button with index ${params.index}: ${snippet}`;
|
|
255
|
+
}
|
|
256
|
+
if (Array.isArray(browser_session.tabs) &&
|
|
257
|
+
browser_session.tabs.length > initialTabs) {
|
|
258
|
+
msg += ' - New tab opened - switching to it';
|
|
259
|
+
await browser_session.switch_to_tab(-1, { signal });
|
|
260
|
+
}
|
|
261
|
+
return new ActionResult({
|
|
262
|
+
extracted_content: msg,
|
|
263
|
+
include_in_memory: true,
|
|
264
|
+
long_term_memory: msg,
|
|
265
|
+
});
|
|
266
|
+
});
|
|
267
|
+
this.registry.action('Click and input text into an input interactive element', { param_model: InputTextActionSchema })(async function input_text(params, { browser_session, has_sensitive_data, signal }) {
|
|
268
|
+
if (!browser_session)
|
|
269
|
+
throw new Error('Browser session missing');
|
|
270
|
+
throwIfAborted(signal);
|
|
271
|
+
const element = await browser_session.get_dom_element_by_index(params.index, { signal });
|
|
272
|
+
if (!element) {
|
|
273
|
+
throw new BrowserError(`Element index ${params.index} does not exist - retry or use alternative actions`);
|
|
274
|
+
}
|
|
275
|
+
await browser_session._input_text_element_node(element, params.text, {
|
|
276
|
+
signal,
|
|
277
|
+
});
|
|
278
|
+
const msg = has_sensitive_data
|
|
279
|
+
? `⌨️ Input sensitive data into index ${params.index}`
|
|
280
|
+
: `⌨️ Input ${params.text} into index ${params.index}`;
|
|
281
|
+
return new ActionResult({
|
|
282
|
+
extracted_content: msg,
|
|
283
|
+
include_in_memory: true,
|
|
284
|
+
long_term_memory: `Input '${params.text}' into element ${params.index}.`,
|
|
285
|
+
});
|
|
286
|
+
});
|
|
287
|
+
this.registry.action('Upload file to interactive element with file path', {
|
|
288
|
+
param_model: UploadFileActionSchema,
|
|
289
|
+
})(async function upload_file(params, { browser_session, available_file_paths, signal }) {
|
|
290
|
+
if (!browser_session)
|
|
291
|
+
throw new Error('Browser session missing');
|
|
292
|
+
throwIfAborted(signal);
|
|
293
|
+
if (!available_file_paths?.includes(params.path)) {
|
|
294
|
+
throw new BrowserError(`File path ${params.path} is not available`);
|
|
295
|
+
}
|
|
296
|
+
if (!fs.existsSync(params.path)) {
|
|
297
|
+
throw new BrowserError(`File ${params.path} does not exist`);
|
|
298
|
+
}
|
|
299
|
+
const node = await browser_session.find_file_upload_element_by_index(params.index, 3, 3, { signal });
|
|
300
|
+
if (!node) {
|
|
301
|
+
throw new BrowserError(`No file upload element found at index ${params.index}`);
|
|
302
|
+
}
|
|
303
|
+
const locator = await browser_session.get_locate_element(node);
|
|
304
|
+
if (!locator) {
|
|
305
|
+
throw new BrowserError(`No file upload element found at index ${params.index}`);
|
|
306
|
+
}
|
|
307
|
+
await locator.setInputFiles(params.path);
|
|
308
|
+
const msg = `📁 Successfully uploaded file to index ${params.index}`;
|
|
309
|
+
return new ActionResult({
|
|
310
|
+
extracted_content: msg,
|
|
311
|
+
include_in_memory: true,
|
|
312
|
+
long_term_memory: `Uploaded file ${params.path} to element ${params.index}`,
|
|
313
|
+
});
|
|
314
|
+
});
|
|
315
|
+
}
|
|
316
|
+
registerTabActions() {
|
|
317
|
+
this.registry.action('Switch tab', { param_model: SwitchTabActionSchema })(async function switch_tab(params, ctx) {
|
|
318
|
+
const { browser_session, signal } = ctx;
|
|
319
|
+
if (!browser_session)
|
|
320
|
+
throw new Error('Browser session missing');
|
|
321
|
+
throwIfAborted(signal);
|
|
322
|
+
await browser_session.switch_to_tab(params.page_id, { signal });
|
|
323
|
+
const page = await browser_session.get_current_page();
|
|
324
|
+
try {
|
|
325
|
+
await page?.wait_for_load_state?.('domcontentloaded', {
|
|
326
|
+
timeout: 5000,
|
|
327
|
+
});
|
|
328
|
+
}
|
|
329
|
+
catch {
|
|
330
|
+
/* ignore */
|
|
331
|
+
}
|
|
332
|
+
const msg = `🔄 Switched to tab #${params.page_id} with url ${page?.url ?? ''}`;
|
|
333
|
+
return new ActionResult({
|
|
334
|
+
extracted_content: msg,
|
|
335
|
+
include_in_memory: true,
|
|
336
|
+
long_term_memory: `Switched to tab ${params.page_id}`,
|
|
337
|
+
});
|
|
338
|
+
});
|
|
339
|
+
this.registry.action('Close an existing tab', {
|
|
340
|
+
param_model: CloseTabActionSchema,
|
|
341
|
+
})(async function close_tab(params, { browser_session, signal }) {
|
|
342
|
+
if (!browser_session)
|
|
343
|
+
throw new Error('Browser session missing');
|
|
344
|
+
throwIfAborted(signal);
|
|
345
|
+
await browser_session.switch_to_tab(params.page_id, { signal });
|
|
346
|
+
const page = await browser_session.get_current_page();
|
|
347
|
+
const url = page?.url ?? '';
|
|
348
|
+
await page?.close?.();
|
|
349
|
+
const newPage = await browser_session.get_current_page();
|
|
350
|
+
const newIndex = browser_session.active_tab_index;
|
|
351
|
+
const msg = `❌ Closed tab #${params.page_id} with ${url}, now focused on tab #${newIndex} with url ${newPage?.url ?? ''}`;
|
|
352
|
+
return new ActionResult({
|
|
353
|
+
extracted_content: msg,
|
|
354
|
+
include_in_memory: true,
|
|
355
|
+
long_term_memory: `Closed tab ${params.page_id} with url ${url}, now focused on tab ${newIndex} with url ${newPage?.url ?? ''}.`,
|
|
356
|
+
});
|
|
357
|
+
});
|
|
358
|
+
}
|
|
359
|
+
registerContentActions() {
|
|
360
|
+
this.registry.action('Extract structured, semantic data from the current webpage based on a textual query.', {
|
|
361
|
+
param_model: ExtractStructuredDataActionSchema,
|
|
362
|
+
})(async function extract_structured_data(params, { page, page_extraction_llm, file_system, signal }) {
|
|
363
|
+
throwIfAborted(signal);
|
|
364
|
+
if (!page) {
|
|
365
|
+
throw new BrowserError('No active page available for extraction.');
|
|
366
|
+
}
|
|
367
|
+
if (!page_extraction_llm) {
|
|
368
|
+
throw new BrowserError('page_extraction_llm is not configured.');
|
|
369
|
+
}
|
|
370
|
+
const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
|
|
371
|
+
const html = await page.content?.();
|
|
372
|
+
if (!html) {
|
|
373
|
+
throw new BrowserError('Unable to extract page content.');
|
|
374
|
+
}
|
|
375
|
+
const turndown = new TurndownService({
|
|
376
|
+
headingStyle: 'atx',
|
|
377
|
+
codeBlockStyle: 'fenced',
|
|
378
|
+
});
|
|
379
|
+
let rawHtml = html;
|
|
380
|
+
if (!params.extract_links) {
|
|
381
|
+
rawHtml = rawHtml.replace(/<a\b[^>]*>/gi, '').replace(/<\/a>/gi, '');
|
|
382
|
+
}
|
|
383
|
+
let content = turndown.turndown(rawHtml);
|
|
384
|
+
content = content.replace(/\n+/g, '\n');
|
|
385
|
+
// Manually append iframe text into the content so it's readable by the LLM (includes cross-origin iframes)
|
|
386
|
+
const frames = page.frames?.() || [];
|
|
387
|
+
for (const iframe of frames) {
|
|
388
|
+
throwIfAborted(signal);
|
|
389
|
+
try {
|
|
390
|
+
// Wait for iframe to load with aggressive timeout
|
|
391
|
+
await runWithTimeoutAndSignal(async () => {
|
|
392
|
+
await iframe.waitForLoadState?.('load');
|
|
393
|
+
}, 2000, signal, 'Iframe load timeout');
|
|
394
|
+
}
|
|
395
|
+
catch (error) {
|
|
396
|
+
if (isAbortError(error)) {
|
|
397
|
+
throw error;
|
|
398
|
+
}
|
|
399
|
+
// Ignore iframe load errors
|
|
400
|
+
}
|
|
401
|
+
const iframeUrl = iframe.url?.();
|
|
402
|
+
const pageUrl = page.url?.();
|
|
403
|
+
if (iframeUrl &&
|
|
404
|
+
pageUrl &&
|
|
405
|
+
iframeUrl !== pageUrl &&
|
|
406
|
+
!iframeUrl.startsWith('data:') &&
|
|
407
|
+
!iframeUrl.startsWith('about:')) {
|
|
408
|
+
content += `\n\nIFRAME ${iframeUrl}:\n`;
|
|
409
|
+
try {
|
|
410
|
+
const iframeHtml = await runWithTimeoutAndSignal(async () => (await iframe.content?.()) ?? '', 2000, signal, 'Iframe content extraction timeout');
|
|
411
|
+
const iframeMarkdown = turndown.turndown(iframeHtml || '');
|
|
412
|
+
content += iframeMarkdown;
|
|
413
|
+
}
|
|
414
|
+
catch (error) {
|
|
415
|
+
if (isAbortError(error)) {
|
|
416
|
+
throw error;
|
|
417
|
+
}
|
|
418
|
+
// Skip failed iframes
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
// Replace multiple sequential \n with a single \n
|
|
423
|
+
content = content.replace(/\n+/g, '\n');
|
|
424
|
+
const maxChars = 30000;
|
|
425
|
+
if (content.length > maxChars) {
|
|
426
|
+
const head = content.slice(0, maxChars / 2);
|
|
427
|
+
const tail = content.slice(-maxChars / 2);
|
|
428
|
+
content = `${head}\n... left out the middle because it was too long ...\n${tail}`;
|
|
429
|
+
}
|
|
430
|
+
const prompt = `You convert websites into structured information. Extract information from this webpage based on the query. Focus only on content relevant to the query. If
|
|
431
|
+
1. The query is vague
|
|
432
|
+
2. Does not make sense for the page
|
|
433
|
+
3. Some/all of the information is not available
|
|
434
|
+
|
|
435
|
+
Explain the content of the page and that the requested information is not available in the page. Respond in JSON format.
|
|
436
|
+
Query: ${params.query}
|
|
437
|
+
Website:
|
|
438
|
+
${content}`;
|
|
439
|
+
const extraction = await page_extraction_llm.ainvoke([new UserMessage(prompt)], undefined, { signal: signal ?? undefined });
|
|
440
|
+
throwIfAborted(signal);
|
|
441
|
+
const completion = extraction?.completion ?? '';
|
|
442
|
+
const extracted_content = `Page Link: ${page.url}\nQuery: ${params.query}\nExtracted Content:\n${completion}`;
|
|
443
|
+
let includeOnce = false;
|
|
444
|
+
let memory = extracted_content;
|
|
445
|
+
const MAX_MEMORY_SIZE = 600;
|
|
446
|
+
if (extracted_content.length > MAX_MEMORY_SIZE) {
|
|
447
|
+
const lines = extracted_content.split('\n');
|
|
448
|
+
let display = '';
|
|
449
|
+
let count = 0;
|
|
450
|
+
for (const line of lines) {
|
|
451
|
+
if (display.length + line.length > MAX_MEMORY_SIZE)
|
|
452
|
+
break;
|
|
453
|
+
display += `${line}\n`;
|
|
454
|
+
count += 1;
|
|
455
|
+
}
|
|
456
|
+
const saveResult = await fsInstance.save_extracted_content(extracted_content);
|
|
457
|
+
// NOTE: Do NOT mention file_system tag here as it misleads LLM to use read_file action
|
|
458
|
+
// The extracted content preview above is sufficient for most tasks
|
|
459
|
+
memory = `Extracted content from ${page.url}\n<query>${params.query}</query>\n<extracted_content>\n${display}${lines.length - count} more lines (auto-saved, no need to read)...\n</extracted_content>`;
|
|
460
|
+
includeOnce = true;
|
|
461
|
+
}
|
|
462
|
+
return new ActionResult({
|
|
463
|
+
extracted_content,
|
|
464
|
+
include_extracted_content_only_once: includeOnce,
|
|
465
|
+
long_term_memory: memory,
|
|
466
|
+
});
|
|
467
|
+
});
|
|
468
|
+
}
|
|
469
|
+
registerScrollActions() {
|
|
470
|
+
const scrollLogger = this.logger; // Capture logger reference for use in named function
|
|
471
|
+
// Define the scroll handler implementation (shared by multiple action names for LLM compatibility)
|
|
472
|
+
const scrollImpl = async (params, { browser_session, signal }) => {
|
|
473
|
+
if (!browser_session)
|
|
474
|
+
throw new Error('Browser session missing');
|
|
475
|
+
throwIfAborted(signal);
|
|
476
|
+
const page = await browser_session.get_current_page();
|
|
477
|
+
if (!page || !page.evaluate) {
|
|
478
|
+
throw new BrowserError('Unable to access current page for scrolling.');
|
|
479
|
+
}
|
|
480
|
+
// Helper function to get window height with retries
|
|
481
|
+
const getWindowHeight = async (retries = 3) => {
|
|
482
|
+
for (let i = 0; i < retries; i++) {
|
|
483
|
+
throwIfAborted(signal);
|
|
484
|
+
try {
|
|
485
|
+
const height = await page.evaluate(() => window.innerHeight);
|
|
486
|
+
return height || 0;
|
|
487
|
+
}
|
|
488
|
+
catch (error) {
|
|
489
|
+
if (i === retries - 1) {
|
|
490
|
+
throw new Error(`Scroll failed due to an error: ${error}`);
|
|
491
|
+
}
|
|
492
|
+
await waitWithSignal(1000, signal);
|
|
493
|
+
}
|
|
494
|
+
}
|
|
495
|
+
return 0;
|
|
496
|
+
};
|
|
497
|
+
const windowHeight = await getWindowHeight();
|
|
498
|
+
const scrollAmount = Math.floor(windowHeight * params.num_pages);
|
|
499
|
+
const pagesScrolled = params.num_pages;
|
|
500
|
+
const dy = params.down ? scrollAmount : -scrollAmount;
|
|
501
|
+
const direction = params.down ? 'down' : 'up';
|
|
502
|
+
let scrollTarget = 'the page';
|
|
503
|
+
// Element-specific scrolling if index is provided
|
|
504
|
+
if (params.index !== undefined && params.index !== null) {
|
|
505
|
+
try {
|
|
506
|
+
const elementNode = await browser_session.get_dom_element_by_index(params.index, { signal });
|
|
507
|
+
if (!elementNode) {
|
|
508
|
+
throw new Error(`Element index ${params.index} does not exist - retry or use alternative actions`);
|
|
509
|
+
}
|
|
510
|
+
// Try direct container scrolling (no events that might close dropdowns)
|
|
511
|
+
const containerScrollJs = `
|
|
512
|
+
(params) => {
|
|
513
|
+
const { dy, elementXPath } = params;
|
|
514
|
+
|
|
515
|
+
// Get the target element by XPath
|
|
516
|
+
const targetElement = document.evaluate(elementXPath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
|
517
|
+
if (!targetElement) {
|
|
518
|
+
return { success: false, reason: 'Element not found by XPath' };
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
console.log('[SCROLL DEBUG] Starting direct container scroll for element:', targetElement.tagName);
|
|
522
|
+
|
|
523
|
+
// Try to find scrollable containers in the hierarchy (starting from element itself)
|
|
524
|
+
let currentElement = targetElement;
|
|
525
|
+
let scrollSuccess = false;
|
|
526
|
+
let scrolledElement = null;
|
|
527
|
+
let scrollDelta = 0;
|
|
528
|
+
let attempts = 0;
|
|
529
|
+
|
|
530
|
+
// Check up to 10 elements in hierarchy (including the target element itself)
|
|
531
|
+
while (currentElement && attempts < 10) {
|
|
532
|
+
const computedStyle = window.getComputedStyle(currentElement);
|
|
533
|
+
const hasScrollableY = /(auto|scroll|overlay)/.test(computedStyle.overflowY);
|
|
534
|
+
const canScrollVertically = currentElement.scrollHeight > currentElement.clientHeight;
|
|
535
|
+
|
|
536
|
+
console.log('[SCROLL DEBUG] Checking element:', currentElement.tagName,
|
|
537
|
+
'hasScrollableY:', hasScrollableY,
|
|
538
|
+
'canScrollVertically:', canScrollVertically,
|
|
539
|
+
'scrollHeight:', currentElement.scrollHeight,
|
|
540
|
+
'clientHeight:', currentElement.clientHeight);
|
|
541
|
+
|
|
542
|
+
if (hasScrollableY && canScrollVertically) {
|
|
543
|
+
const beforeScroll = currentElement.scrollTop;
|
|
544
|
+
const maxScroll = currentElement.scrollHeight - currentElement.clientHeight;
|
|
545
|
+
|
|
546
|
+
// Calculate scroll amount (1/3 of provided dy for gentler scrolling)
|
|
547
|
+
let scrollAmount = dy / 3;
|
|
548
|
+
|
|
549
|
+
// Ensure we don't scroll beyond bounds
|
|
550
|
+
if (scrollAmount > 0) {
|
|
551
|
+
scrollAmount = Math.min(scrollAmount, maxScroll - beforeScroll);
|
|
552
|
+
} else {
|
|
553
|
+
scrollAmount = Math.max(scrollAmount, -beforeScroll);
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
// Try direct scrollTop manipulation (most reliable)
|
|
557
|
+
currentElement.scrollTop = beforeScroll + scrollAmount;
|
|
558
|
+
|
|
559
|
+
const afterScroll = currentElement.scrollTop;
|
|
560
|
+
const actualScrollDelta = afterScroll - beforeScroll;
|
|
561
|
+
|
|
562
|
+
console.log('[SCROLL DEBUG] Scroll attempt:', currentElement.tagName,
|
|
563
|
+
'before:', beforeScroll, 'after:', afterScroll, 'delta:', actualScrollDelta);
|
|
564
|
+
|
|
565
|
+
if (Math.abs(actualScrollDelta) > 0.5) {
|
|
566
|
+
scrollSuccess = true;
|
|
567
|
+
scrolledElement = currentElement;
|
|
568
|
+
scrollDelta = actualScrollDelta;
|
|
569
|
+
console.log('[SCROLL DEBUG] Successfully scrolled container:', currentElement.tagName, 'delta:', actualScrollDelta);
|
|
570
|
+
break;
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
|
|
574
|
+
// Move to parent (but don't go beyond body for dropdown case)
|
|
575
|
+
if (currentElement === document.body || currentElement === document.documentElement) {
|
|
576
|
+
break;
|
|
577
|
+
}
|
|
578
|
+
currentElement = currentElement.parentElement;
|
|
579
|
+
attempts++;
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
if (scrollSuccess) {
|
|
583
|
+
// Successfully scrolled a container
|
|
584
|
+
return {
|
|
585
|
+
success: true,
|
|
586
|
+
method: 'direct_container_scroll',
|
|
587
|
+
containerType: 'element',
|
|
588
|
+
containerTag: scrolledElement.tagName.toLowerCase(),
|
|
589
|
+
containerClass: scrolledElement.className || '',
|
|
590
|
+
containerId: scrolledElement.id || '',
|
|
591
|
+
scrollDelta: scrollDelta
|
|
592
|
+
};
|
|
593
|
+
} else {
|
|
594
|
+
// No container found or could scroll
|
|
595
|
+
console.log('[SCROLL DEBUG] No scrollable container found for element');
|
|
596
|
+
return {
|
|
597
|
+
success: false,
|
|
598
|
+
reason: 'No scrollable container found',
|
|
599
|
+
needsPageScroll: true
|
|
600
|
+
};
|
|
601
|
+
}
|
|
602
|
+
}
|
|
603
|
+
`;
|
|
604
|
+
const scrollParams = { dy, elementXPath: elementNode.xpath };
|
|
605
|
+
const result = (await page.evaluate(containerScrollJs, scrollParams));
|
|
606
|
+
if (result.success) {
|
|
607
|
+
if (result.containerType === 'element') {
|
|
608
|
+
let containerInfo = result.containerTag;
|
|
609
|
+
if (result.containerId) {
|
|
610
|
+
containerInfo += `#${result.containerId}`;
|
|
611
|
+
}
|
|
612
|
+
else if (result.containerClass) {
|
|
613
|
+
containerInfo += `.${result.containerClass.split(' ')[0]}`;
|
|
614
|
+
}
|
|
615
|
+
scrollTarget = `element ${params.index}'s scroll container (${containerInfo})`;
|
|
616
|
+
// Don't do additional page scrolling since we successfully scrolled the container
|
|
617
|
+
}
|
|
618
|
+
else {
|
|
619
|
+
scrollTarget = `the page (fallback from element ${params.index})`;
|
|
620
|
+
}
|
|
621
|
+
}
|
|
622
|
+
else {
|
|
623
|
+
// Container scroll failed, need page-level scrolling
|
|
624
|
+
scrollLogger.debug(`Container scroll failed for element ${params.index}: ${result.reason || 'Unknown'}`);
|
|
625
|
+
scrollTarget = `the page (no container found for element ${params.index})`;
|
|
626
|
+
// This will trigger page-level scrolling below
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
catch (error) {
|
|
630
|
+
scrollLogger.debug(`Element-specific scrolling failed for index ${params.index}: ${error}`);
|
|
631
|
+
scrollTarget = `the page (fallback from element ${params.index})`;
|
|
632
|
+
// Fall through to page-level scrolling
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
// Page-level scrolling (default or fallback)
|
|
636
|
+
if (scrollTarget === 'the page' ||
|
|
637
|
+
scrollTarget.includes('fallback') ||
|
|
638
|
+
scrollTarget.includes('no container found') ||
|
|
639
|
+
scrollTarget.includes('mouse wheel failed')) {
|
|
640
|
+
scrollLogger.debug(`🔄 Performing page-level scrolling. Reason: ${scrollTarget}`);
|
|
641
|
+
try {
|
|
642
|
+
await browser_session._scrollContainer(dy);
|
|
643
|
+
}
|
|
644
|
+
catch (error) {
|
|
645
|
+
// Hard fallback: always works on root scroller
|
|
646
|
+
await page.evaluate((y) => window.scrollBy(0, y), dy);
|
|
647
|
+
scrollLogger.debug('Smart scroll failed; used window.scrollBy fallback', error);
|
|
648
|
+
}
|
|
649
|
+
}
|
|
650
|
+
// Create descriptive message
|
|
651
|
+
let longTermMemory;
|
|
652
|
+
if (pagesScrolled === 1.0) {
|
|
653
|
+
longTermMemory = `Scrolled ${direction} ${scrollTarget} by one page`;
|
|
654
|
+
}
|
|
655
|
+
else {
|
|
656
|
+
longTermMemory = `Scrolled ${direction} ${scrollTarget} by ${pagesScrolled} pages`;
|
|
657
|
+
}
|
|
658
|
+
const msg = `🔍 ${longTermMemory}`;
|
|
659
|
+
scrollLogger.info(msg);
|
|
660
|
+
return new ActionResult({
|
|
661
|
+
extracted_content: msg,
|
|
662
|
+
include_in_memory: true,
|
|
663
|
+
long_term_memory: longTermMemory,
|
|
664
|
+
});
|
|
665
|
+
};
|
|
666
|
+
// Register scroll action with multiple names for LLM compatibility
|
|
667
|
+
// Different LLMs may use different names: scroll, scroll_page, scroll_down
|
|
668
|
+
const scrollDescription = 'Scroll the page by specified number of pages (set down=True to scroll down, down=False to scroll up, num_pages=number of pages to scroll like 0.5 for half page, 1.0 for one page, etc.). Optional index parameter to scroll within a specific element or its scroll container (works well for dropdowns and custom UI components).';
|
|
669
|
+
// Create named functions that wrap the implementation
|
|
670
|
+
// Different LLMs may use different names: scroll, scroll_page, scroll_down, scroll_by, scroll_page_by, scroll_up
|
|
671
|
+
const scrollAction = async function scroll(p, ctx) { return scrollImpl(p, ctx); };
|
|
672
|
+
const scrollPageAction = async function scroll_page(p, ctx) { return scrollImpl(p, ctx); };
|
|
673
|
+
const scrollDownAction = async function scroll_down(p, ctx) { return scrollImpl(p, ctx); };
|
|
674
|
+
const scrollByAction = async function scroll_by(p, ctx) { return scrollImpl(p, ctx); };
|
|
675
|
+
const scrollPageByAction = async function scroll_page_by(p, ctx) { return scrollImpl(p, ctx); };
|
|
676
|
+
const scrollUpAction = async function scroll_up(p, ctx) { return scrollImpl(p, ctx); };
|
|
677
|
+
this.registry.action(scrollDescription, { param_model: ScrollActionSchema })(scrollAction);
|
|
678
|
+
this.registry.action(scrollDescription, { param_model: ScrollActionSchema })(scrollPageAction);
|
|
679
|
+
this.registry.action(scrollDescription, { param_model: ScrollActionSchema })(scrollDownAction);
|
|
680
|
+
this.registry.action(scrollDescription, { param_model: ScrollActionSchema })(scrollByAction);
|
|
681
|
+
this.registry.action(scrollDescription, { param_model: ScrollActionSchema })(scrollPageByAction);
|
|
682
|
+
this.registry.action(scrollDescription, { param_model: ScrollActionSchema })(scrollUpAction);
|
|
683
|
+
this.registry.action('Scroll to a text in the current page', {
|
|
684
|
+
param_model: ScrollToTextActionSchema,
|
|
685
|
+
})(async function scroll_to_text(params, { browser_session }) {
|
|
686
|
+
if (!browser_session)
|
|
687
|
+
throw new Error('Browser session missing');
|
|
688
|
+
const page = await browser_session.get_current_page();
|
|
689
|
+
if (!page?.evaluate) {
|
|
690
|
+
throw new BrowserError('Unable to access page for scrolling.');
|
|
691
|
+
}
|
|
692
|
+
const success = await page.evaluate(({ text }) => {
|
|
693
|
+
const iterator = document.createNodeIterator(document.body, NodeFilter.SHOW_ELEMENT);
|
|
694
|
+
let node;
|
|
695
|
+
while ((node = iterator.nextNode())) {
|
|
696
|
+
const el = node;
|
|
697
|
+
if (!el || !el.textContent)
|
|
698
|
+
continue;
|
|
699
|
+
if (el.textContent.toLowerCase().includes(text.toLowerCase())) {
|
|
700
|
+
el.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
|
701
|
+
return true;
|
|
702
|
+
}
|
|
703
|
+
}
|
|
704
|
+
return false;
|
|
705
|
+
}, { text: params.text });
|
|
706
|
+
if (!success) {
|
|
707
|
+
throw new BrowserError(`Text '${params.text}' not found on page`);
|
|
708
|
+
}
|
|
709
|
+
const msg = `🔍 Scrolled to text: ${params.text}`;
|
|
710
|
+
return new ActionResult({
|
|
711
|
+
extracted_content: msg,
|
|
712
|
+
include_in_memory: true,
|
|
713
|
+
long_term_memory: msg,
|
|
714
|
+
});
|
|
715
|
+
});
|
|
716
|
+
}
|
|
717
|
+
registerFileSystemActions() {
|
|
718
|
+
this.registry.action('Read file_name from file system', {
|
|
719
|
+
param_model: ReadFileActionSchema,
|
|
720
|
+
})(async function read_file(params, { file_system, available_file_paths }) {
|
|
721
|
+
const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
|
|
722
|
+
const allowed = Array.isArray(available_file_paths) &&
|
|
723
|
+
available_file_paths.includes(params.file_name);
|
|
724
|
+
const result = await fsInstance.read_file(params.file_name, allowed);
|
|
725
|
+
const MAX_MEMORY_SIZE = 1000;
|
|
726
|
+
let memory = result;
|
|
727
|
+
if (result.length > MAX_MEMORY_SIZE) {
|
|
728
|
+
const lines = result.split('\n');
|
|
729
|
+
let preview = '';
|
|
730
|
+
let used = 0;
|
|
731
|
+
for (const line of lines) {
|
|
732
|
+
if (preview.length + line.length > MAX_MEMORY_SIZE)
|
|
733
|
+
break;
|
|
734
|
+
preview += `${line}\n`;
|
|
735
|
+
used += 1;
|
|
736
|
+
}
|
|
737
|
+
const remaining = lines.length - used;
|
|
738
|
+
memory =
|
|
739
|
+
remaining > 0 ? `${preview}${remaining} more lines...` : preview;
|
|
740
|
+
}
|
|
741
|
+
return new ActionResult({
|
|
742
|
+
extracted_content: result,
|
|
743
|
+
include_in_memory: true,
|
|
744
|
+
long_term_memory: memory,
|
|
745
|
+
include_extracted_content_only_once: true,
|
|
746
|
+
});
|
|
747
|
+
});
|
|
748
|
+
this.registry.action('Write content to file', {
|
|
749
|
+
param_model: WriteFileActionSchema,
|
|
750
|
+
})(async function write_file(params, { file_system }) {
|
|
751
|
+
const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
|
|
752
|
+
let content = params.content;
|
|
753
|
+
const trailing = params.trailing_newline ?? true;
|
|
754
|
+
const leading = params.leading_newline ?? false;
|
|
755
|
+
if (trailing) {
|
|
756
|
+
content = `${content}\n`;
|
|
757
|
+
}
|
|
758
|
+
if (leading) {
|
|
759
|
+
content = `\n${content}`;
|
|
760
|
+
}
|
|
761
|
+
const append = params.append ?? false;
|
|
762
|
+
const result = append
|
|
763
|
+
? await fsInstance.append_file(params.file_name, content)
|
|
764
|
+
: await fsInstance.write_file(params.file_name, content);
|
|
765
|
+
const msg = `📝 ${result}`;
|
|
766
|
+
return new ActionResult({
|
|
767
|
+
extracted_content: result,
|
|
768
|
+
include_in_memory: true,
|
|
769
|
+
long_term_memory: result,
|
|
770
|
+
});
|
|
771
|
+
});
|
|
772
|
+
this.registry.action('Replace text within an existing file', {
|
|
773
|
+
param_model: ReplaceFileStrActionSchema,
|
|
774
|
+
})(async function replace_file_str(params, { file_system }) {
|
|
775
|
+
const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
|
|
776
|
+
const result = await fsInstance.replace_file_str(params.file_name, params.old_str, params.new_str);
|
|
777
|
+
return new ActionResult({
|
|
778
|
+
extracted_content: result,
|
|
779
|
+
include_in_memory: true,
|
|
780
|
+
long_term_memory: result,
|
|
781
|
+
});
|
|
782
|
+
});
|
|
783
|
+
}
|
|
784
|
+
registerKeyboardActions() {
|
|
785
|
+
this.registry.action('Send keys to the active page', {
|
|
786
|
+
param_model: SendKeysActionSchema,
|
|
787
|
+
})(async function send_keys(params, { browser_session }) {
|
|
788
|
+
if (!browser_session)
|
|
789
|
+
throw new Error('Browser session missing');
|
|
790
|
+
const page = await browser_session.get_current_page();
|
|
791
|
+
const keyboard = page?.keyboard;
|
|
792
|
+
if (!keyboard) {
|
|
793
|
+
throw new BrowserError('Keyboard input is not available on the current page.');
|
|
794
|
+
}
|
|
795
|
+
try {
|
|
796
|
+
await keyboard.press(params.keys);
|
|
797
|
+
}
|
|
798
|
+
catch (error) {
|
|
799
|
+
if (error instanceof Error && error.message.includes('Unknown key')) {
|
|
800
|
+
for (const char of params.keys) {
|
|
801
|
+
await keyboard.press(char);
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
else {
|
|
805
|
+
throw error;
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
const msg = `⌨️ Sent keys: ${params.keys}`;
|
|
809
|
+
return new ActionResult({
|
|
810
|
+
extracted_content: msg,
|
|
811
|
+
include_in_memory: true,
|
|
812
|
+
long_term_memory: msg,
|
|
813
|
+
});
|
|
814
|
+
});
|
|
815
|
+
}
|
|
816
|
+
registerDropdownActions() {
|
|
817
|
+
this.registry.action('Get all options from a native dropdown or ARIA menu', { param_model: DropdownOptionsActionSchema })(async function get_dropdown_options(params, { browser_session, signal }) {
|
|
818
|
+
if (!browser_session)
|
|
819
|
+
throw new Error('Browser session missing');
|
|
820
|
+
throwIfAborted(signal);
|
|
821
|
+
const page = await browser_session.get_current_page();
|
|
822
|
+
const domElement = await browser_session.get_dom_element_by_index(params.index, { signal });
|
|
823
|
+
if (!domElement) {
|
|
824
|
+
throw new BrowserError(`Element index ${params.index} does not exist.`);
|
|
825
|
+
}
|
|
826
|
+
if (!page?.evaluate) {
|
|
827
|
+
throw new BrowserError('Unable to evaluate dropdown options on current page.');
|
|
828
|
+
}
|
|
829
|
+
if (!domElement.xpath) {
|
|
830
|
+
throw new BrowserError('DOM element does not include an XPath selector.');
|
|
831
|
+
}
|
|
832
|
+
const payload = await page.evaluate(({ xpath }) => {
|
|
833
|
+
const element = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
|
834
|
+
if (!element)
|
|
835
|
+
return null;
|
|
836
|
+
if (element.tagName?.toLowerCase() === 'select') {
|
|
837
|
+
const options = Array.from(element.options).map((opt, index) => ({
|
|
838
|
+
text: opt.text,
|
|
839
|
+
value: opt.value,
|
|
840
|
+
index,
|
|
841
|
+
}));
|
|
842
|
+
return { type: 'select', options };
|
|
843
|
+
}
|
|
844
|
+
const ariaRoles = new Set(['menu', 'listbox', 'combobox']);
|
|
845
|
+
const role = element.getAttribute('role');
|
|
846
|
+
if (role && ariaRoles.has(role)) {
|
|
847
|
+
const nodes = element.querySelectorAll('[role="menuitem"],[role="option"]');
|
|
848
|
+
const options = Array.from(nodes).map((node, index) => ({
|
|
849
|
+
text: node.textContent?.trim() ?? '',
|
|
850
|
+
value: node.textContent?.trim() ?? '',
|
|
851
|
+
index,
|
|
852
|
+
}));
|
|
853
|
+
return { type: 'aria', options };
|
|
854
|
+
}
|
|
855
|
+
return null;
|
|
856
|
+
}, { xpath: domElement.xpath });
|
|
857
|
+
if (!payload || !payload.options?.length) {
|
|
858
|
+
throw new BrowserError('No options found for the specified dropdown.');
|
|
859
|
+
}
|
|
860
|
+
const formatted = payload.options.map((opt) => `${opt.index}: text=${JSON.stringify(opt.text ?? '')}`);
|
|
861
|
+
formatted.push('Use the exact text string in select_dropdown_option');
|
|
862
|
+
const message = formatted.join('\n');
|
|
863
|
+
return new ActionResult({
|
|
864
|
+
extracted_content: message,
|
|
865
|
+
include_in_memory: true,
|
|
866
|
+
include_extracted_content_only_once: true,
|
|
867
|
+
long_term_memory: `Found dropdown options for index ${params.index}.`,
|
|
868
|
+
});
|
|
869
|
+
});
|
|
870
|
+
this.registry.action('Select dropdown option or ARIA menu item by text', {
|
|
871
|
+
param_model: SelectDropdownActionSchema,
|
|
872
|
+
})(async function select_dropdown_option(params, { browser_session, signal }) {
|
|
873
|
+
if (!browser_session)
|
|
874
|
+
throw new Error('Browser session missing');
|
|
875
|
+
throwIfAborted(signal);
|
|
876
|
+
const page = await browser_session.get_current_page();
|
|
877
|
+
const domElement = await browser_session.get_dom_element_by_index(params.index, { signal });
|
|
878
|
+
if (!domElement?.xpath) {
|
|
879
|
+
throw new BrowserError('DOM element does not include an XPath selector.');
|
|
880
|
+
}
|
|
881
|
+
if (!page) {
|
|
882
|
+
throw new BrowserError('No active page for selection.');
|
|
883
|
+
}
|
|
884
|
+
for (const frame of page.frames ?? []) {
|
|
885
|
+
try {
|
|
886
|
+
const typeInfo = await frame.evaluate((xpath) => {
|
|
887
|
+
const element = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
|
888
|
+
if (!element)
|
|
889
|
+
return { found: false };
|
|
890
|
+
const tagName = element.tagName?.toLowerCase();
|
|
891
|
+
const role = element.getAttribute?.('role');
|
|
892
|
+
if (tagName === 'select')
|
|
893
|
+
return { found: true, type: 'select' };
|
|
894
|
+
if (role && ['menu', 'listbox', 'combobox'].includes(role))
|
|
895
|
+
return { found: true, type: 'aria' };
|
|
896
|
+
return { found: false };
|
|
897
|
+
}, domElement.xpath);
|
|
898
|
+
if (!typeInfo?.found)
|
|
899
|
+
continue;
|
|
900
|
+
if (typeInfo.type === 'select') {
|
|
901
|
+
await frame
|
|
902
|
+
.locator(domElement.xpath)
|
|
903
|
+
.first()
|
|
904
|
+
.select_option({ label: params.text });
|
|
905
|
+
const msg = `Selected option ${params.text}`;
|
|
906
|
+
return new ActionResult({
|
|
907
|
+
extracted_content: msg,
|
|
908
|
+
include_in_memory: true,
|
|
909
|
+
long_term_memory: msg,
|
|
910
|
+
});
|
|
911
|
+
}
|
|
912
|
+
const clicked = await frame.evaluate(({ xpath, text }) => {
|
|
913
|
+
const root = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
|
914
|
+
if (!root)
|
|
915
|
+
return false;
|
|
916
|
+
const nodes = root.querySelectorAll('[role="menuitem"],[role="option"]');
|
|
917
|
+
for (const node of Array.from(nodes)) {
|
|
918
|
+
if (node.textContent?.trim() === text) {
|
|
919
|
+
node.click();
|
|
920
|
+
return true;
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
return false;
|
|
924
|
+
}, { xpath: domElement.xpath, text: params.text });
|
|
925
|
+
if (clicked) {
|
|
926
|
+
const msg = `Selected menu item ${params.text}`;
|
|
927
|
+
return new ActionResult({
|
|
928
|
+
extracted_content: msg,
|
|
929
|
+
include_in_memory: true,
|
|
930
|
+
long_term_memory: msg,
|
|
931
|
+
});
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
catch (error) {
|
|
935
|
+
continue;
|
|
936
|
+
}
|
|
937
|
+
}
|
|
938
|
+
throw new BrowserError(`Could not select option '${params.text}' for index ${params.index}`);
|
|
939
|
+
});
|
|
940
|
+
}
|
|
941
|
+
registerSheetsActions() {
|
|
942
|
+
// Capture 'this' for use in callbacks
|
|
943
|
+
const self = this;
|
|
944
|
+
this.registry.action('Google Sheets: Get the contents of the entire sheet', {
|
|
945
|
+
domains: ['https://docs.google.com'],
|
|
946
|
+
})(async function sheets_get_contents(_params, { browser_session, signal }) {
|
|
947
|
+
if (!browser_session)
|
|
948
|
+
throw new Error('Browser session missing');
|
|
949
|
+
throwIfAborted(signal);
|
|
950
|
+
const page = await browser_session.get_current_page();
|
|
951
|
+
await page?.keyboard?.press('Enter');
|
|
952
|
+
await page?.keyboard?.press('Escape');
|
|
953
|
+
await page?.keyboard?.press('ControlOrMeta+A');
|
|
954
|
+
await page?.keyboard?.press('ControlOrMeta+C');
|
|
955
|
+
const content = await page?.evaluate?.(() => navigator.clipboard.readText());
|
|
956
|
+
return new ActionResult({
|
|
957
|
+
extracted_content: content ?? '',
|
|
958
|
+
include_in_memory: true,
|
|
959
|
+
long_term_memory: 'Retrieved sheet contents',
|
|
960
|
+
include_extracted_content_only_once: true,
|
|
961
|
+
});
|
|
962
|
+
});
|
|
963
|
+
this.registry.action('Google Sheets: Get the contents of a cell or range of cells', {
|
|
964
|
+
domains: ['https://docs.google.com'],
|
|
965
|
+
param_model: SheetsRangeActionSchema,
|
|
966
|
+
})(async function sheets_get_range(params, { browser_session, signal }) {
|
|
967
|
+
if (!browser_session)
|
|
968
|
+
throw new Error('Browser session missing');
|
|
969
|
+
throwIfAborted(signal);
|
|
970
|
+
const page = await browser_session.get_current_page();
|
|
971
|
+
await self.gotoSheetsRange(page, params.cell_or_range, signal);
|
|
972
|
+
await page?.keyboard?.press('ControlOrMeta+C');
|
|
973
|
+
await waitWithSignal(100, signal);
|
|
974
|
+
const content = await page?.evaluate?.(() => navigator.clipboard.readText());
|
|
975
|
+
return new ActionResult({
|
|
976
|
+
extracted_content: content ?? '',
|
|
977
|
+
include_in_memory: true,
|
|
978
|
+
long_term_memory: `Retrieved contents from ${params.cell_or_range}`,
|
|
979
|
+
include_extracted_content_only_once: true,
|
|
980
|
+
});
|
|
981
|
+
});
|
|
982
|
+
this.registry.action('Google Sheets: Update the content of a cell or range of cells', {
|
|
983
|
+
domains: ['https://docs.google.com'],
|
|
984
|
+
param_model: SheetsUpdateActionSchema,
|
|
985
|
+
})(async function sheets_update(params, { browser_session, signal }) {
|
|
986
|
+
if (!browser_session)
|
|
987
|
+
throw new Error('Browser session missing');
|
|
988
|
+
throwIfAborted(signal);
|
|
989
|
+
const page = await browser_session.get_current_page();
|
|
990
|
+
await self.gotoSheetsRange(page, params.cell_or_range, signal);
|
|
991
|
+
await page?.evaluate?.((value) => {
|
|
992
|
+
const clipboardData = new DataTransfer();
|
|
993
|
+
clipboardData.setData('text/plain', value);
|
|
994
|
+
document.activeElement?.dispatchEvent(new ClipboardEvent('paste', { clipboardData }));
|
|
995
|
+
}, params.value);
|
|
996
|
+
return new ActionResult({
|
|
997
|
+
extracted_content: `Updated cells: ${params.cell_or_range} = ${params.value}`,
|
|
998
|
+
long_term_memory: `Updated cells ${params.cell_or_range} with ${params.value}`,
|
|
999
|
+
});
|
|
1000
|
+
});
|
|
1001
|
+
this.registry.action('Google Sheets: Clear whatever cells are currently selected', {
|
|
1002
|
+
domains: ['https://docs.google.com'],
|
|
1003
|
+
param_model: SheetsRangeActionSchema,
|
|
1004
|
+
})(async function sheets_clear(params, { browser_session, signal }) {
|
|
1005
|
+
if (!browser_session)
|
|
1006
|
+
throw new Error('Browser session missing');
|
|
1007
|
+
throwIfAborted(signal);
|
|
1008
|
+
const page = await browser_session.get_current_page();
|
|
1009
|
+
await self.gotoSheetsRange(page, params.cell_or_range, signal);
|
|
1010
|
+
await page?.keyboard?.press('Backspace');
|
|
1011
|
+
return new ActionResult({
|
|
1012
|
+
extracted_content: `Cleared cells: ${params.cell_or_range}`,
|
|
1013
|
+
long_term_memory: `Cleared cells ${params.cell_or_range}`,
|
|
1014
|
+
});
|
|
1015
|
+
});
|
|
1016
|
+
this.registry.action('Google Sheets: Select a specific cell or range of cells', {
|
|
1017
|
+
domains: ['https://docs.google.com'],
|
|
1018
|
+
param_model: SheetsRangeActionSchema,
|
|
1019
|
+
})(async function sheets_select(params, { browser_session, signal }) {
|
|
1020
|
+
if (!browser_session)
|
|
1021
|
+
throw new Error('Browser session missing');
|
|
1022
|
+
throwIfAborted(signal);
|
|
1023
|
+
const page = await browser_session.get_current_page();
|
|
1024
|
+
await self.gotoSheetsRange(page, params.cell_or_range, signal);
|
|
1025
|
+
return new ActionResult({
|
|
1026
|
+
extracted_content: `Selected cells: ${params.cell_or_range}`,
|
|
1027
|
+
long_term_memory: `Selected cells ${params.cell_or_range}`,
|
|
1028
|
+
});
|
|
1029
|
+
});
|
|
1030
|
+
this.registry.action('Google Sheets: Fallback method to type text into the currently selected cell', {
|
|
1031
|
+
domains: ['https://docs.google.com'],
|
|
1032
|
+
param_model: SheetsInputActionSchema,
|
|
1033
|
+
})(async function sheets_input(params, { browser_session, signal }) {
|
|
1034
|
+
if (!browser_session)
|
|
1035
|
+
throw new Error('Browser session missing');
|
|
1036
|
+
throwIfAborted(signal);
|
|
1037
|
+
const page = await browser_session.get_current_page();
|
|
1038
|
+
await page?.keyboard?.type(params.text, { delay: 100 });
|
|
1039
|
+
await page?.keyboard?.press('Enter');
|
|
1040
|
+
await page?.keyboard?.press('ArrowUp');
|
|
1041
|
+
return new ActionResult({
|
|
1042
|
+
extracted_content: `Inputted text ${params.text}`,
|
|
1043
|
+
long_term_memory: `Inputted text '${params.text}' into cell`,
|
|
1044
|
+
});
|
|
1045
|
+
});
|
|
1046
|
+
}
|
|
1047
|
+
async gotoSheetsRange(page, cell_or_range, signal = null) {
|
|
1048
|
+
if (!page?.keyboard) {
|
|
1049
|
+
throw new BrowserError('No keyboard available for Google Sheets actions.');
|
|
1050
|
+
}
|
|
1051
|
+
throwIfAborted(signal);
|
|
1052
|
+
await page.keyboard.press('Enter');
|
|
1053
|
+
await page.keyboard.press('Escape');
|
|
1054
|
+
await waitWithSignal(100, signal);
|
|
1055
|
+
await page.keyboard.press('Home');
|
|
1056
|
+
await page.keyboard.press('ArrowUp');
|
|
1057
|
+
await waitWithSignal(100, signal);
|
|
1058
|
+
await page.keyboard.press('Control+G');
|
|
1059
|
+
await waitWithSignal(200, signal);
|
|
1060
|
+
await page.keyboard.type(cell_or_range, { delay: 50 });
|
|
1061
|
+
await page.keyboard.press('Enter');
|
|
1062
|
+
await waitWithSignal(200, signal);
|
|
1063
|
+
await page.keyboard.press('Escape');
|
|
1064
|
+
}
|
|
1065
|
+
registerDoneAction(outputModel) {
|
|
1066
|
+
// Capture 'this' for use in callbacks
|
|
1067
|
+
const self = this;
|
|
1068
|
+
if (outputModel) {
|
|
1069
|
+
const structuredSchema = StructuredOutputActionSchema(outputModel);
|
|
1070
|
+
this.registry.action('Complete task - with return text and success flag.', { param_model: structuredSchema })(async function done(params) {
|
|
1071
|
+
const payload = { ...params.data };
|
|
1072
|
+
for (const key of Object.keys(payload)) {
|
|
1073
|
+
const value = payload[key];
|
|
1074
|
+
if (value && typeof value === 'object' && 'value' in value) {
|
|
1075
|
+
payload[key] = value.value;
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
1078
|
+
return new ActionResult({
|
|
1079
|
+
is_done: true,
|
|
1080
|
+
success: params.success,
|
|
1081
|
+
extracted_content: JSON.stringify(payload),
|
|
1082
|
+
long_term_memory: `Task completed. Success Status: ${params.success}`,
|
|
1083
|
+
});
|
|
1084
|
+
});
|
|
1085
|
+
return;
|
|
1086
|
+
}
|
|
1087
|
+
this.registry.action('Complete task - provide a summary to the user.', {
|
|
1088
|
+
param_model: DoneActionSchema,
|
|
1089
|
+
})(async function done(params, { file_system }) {
|
|
1090
|
+
const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
|
|
1091
|
+
let userMessage = params.text;
|
|
1092
|
+
const lenMaxMemory = 100;
|
|
1093
|
+
let memory = `Task completed: ${params.success} - ${params.text.slice(0, lenMaxMemory)}`;
|
|
1094
|
+
if (params.text.length > lenMaxMemory) {
|
|
1095
|
+
memory += ` - ${params.text.length - lenMaxMemory} more characters`;
|
|
1096
|
+
}
|
|
1097
|
+
const attachments = [];
|
|
1098
|
+
if (params.files_to_display) {
|
|
1099
|
+
if (self.displayFilesInDoneText) {
|
|
1100
|
+
let attachmentText = '';
|
|
1101
|
+
for (const fileName of params.files_to_display) {
|
|
1102
|
+
if (fileName === 'todo.md') {
|
|
1103
|
+
continue;
|
|
1104
|
+
}
|
|
1105
|
+
const content = fsInstance.display_file(fileName);
|
|
1106
|
+
if (content) {
|
|
1107
|
+
attachmentText += `\n\n${fileName}:\n${content}`;
|
|
1108
|
+
attachments.push(fileName);
|
|
1109
|
+
}
|
|
1110
|
+
}
|
|
1111
|
+
if (attachmentText) {
|
|
1112
|
+
userMessage += '\n\nAttachments:';
|
|
1113
|
+
userMessage += attachmentText;
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
else {
|
|
1117
|
+
for (const fileName of params.files_to_display) {
|
|
1118
|
+
if (fileName === 'todo.md') {
|
|
1119
|
+
continue;
|
|
1120
|
+
}
|
|
1121
|
+
const content = fsInstance.display_file(fileName);
|
|
1122
|
+
if (content) {
|
|
1123
|
+
attachments.push(fileName);
|
|
1124
|
+
}
|
|
1125
|
+
}
|
|
1126
|
+
}
|
|
1127
|
+
}
|
|
1128
|
+
const attachmentPaths = attachments.map((name) => `${fsInstance.get_dir()}/${name}`);
|
|
1129
|
+
return new ActionResult({
|
|
1130
|
+
is_done: true,
|
|
1131
|
+
success: params.success,
|
|
1132
|
+
extracted_content: userMessage,
|
|
1133
|
+
long_term_memory: memory,
|
|
1134
|
+
attachments: attachmentPaths,
|
|
1135
|
+
});
|
|
1136
|
+
});
|
|
1137
|
+
}
|
|
1138
|
+
use_structured_output_action(outputModel) {
|
|
1139
|
+
this.registerDoneAction(outputModel);
|
|
1140
|
+
}
|
|
1141
|
+
action(description, options = {}) {
|
|
1142
|
+
return this.registry.action(description, options);
|
|
1143
|
+
}
|
|
1144
|
+
async act(action, { browser_session, page_extraction_llm = null, sensitive_data = null, available_file_paths = null, file_system = null, context = null, signal = null, }) {
|
|
1145
|
+
const entries = toActionEntries(action);
|
|
1146
|
+
for (const [actionName, params] of entries) {
|
|
1147
|
+
try {
|
|
1148
|
+
const result = await this.registry.execute_action(actionName, params, {
|
|
1149
|
+
browser_session,
|
|
1150
|
+
page_extraction_llm,
|
|
1151
|
+
sensitive_data,
|
|
1152
|
+
available_file_paths,
|
|
1153
|
+
file_system,
|
|
1154
|
+
context,
|
|
1155
|
+
signal,
|
|
1156
|
+
});
|
|
1157
|
+
if (typeof result === 'string') {
|
|
1158
|
+
return new ActionResult({ extracted_content: result });
|
|
1159
|
+
}
|
|
1160
|
+
if (result instanceof ActionResult) {
|
|
1161
|
+
return result;
|
|
1162
|
+
}
|
|
1163
|
+
if (result == null) {
|
|
1164
|
+
return new ActionResult();
|
|
1165
|
+
}
|
|
1166
|
+
return new ActionResult({ extracted_content: JSON.stringify(result) });
|
|
1167
|
+
}
|
|
1168
|
+
catch (error) {
|
|
1169
|
+
return new ActionResult({
|
|
1170
|
+
error: String(error?.message ?? error ?? ''),
|
|
1171
|
+
});
|
|
1172
|
+
}
|
|
1173
|
+
}
|
|
1174
|
+
return new ActionResult();
|
|
1175
|
+
}
|
|
1176
|
+
}
|