browser-use 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +295 -686
- package/dist/actor/element.d.ts +19 -0
- package/dist/actor/element.js +46 -0
- package/dist/actor/index.d.ts +4 -0
- package/dist/actor/index.js +4 -0
- package/dist/actor/mouse.d.ts +19 -0
- package/dist/actor/mouse.js +39 -0
- package/dist/actor/page.d.ts +29 -0
- package/dist/actor/page.js +88 -0
- package/dist/actor/utils.d.ts +4 -0
- package/dist/actor/utils.js +35 -0
- package/dist/agent/cloud-events.d.ts +18 -0
- package/dist/agent/cloud-events.js +65 -2
- package/dist/agent/gif.d.ts +1 -0
- package/dist/agent/gif.js +24 -2
- package/dist/agent/judge.d.ts +17 -0
- package/dist/agent/judge.js +197 -0
- package/dist/agent/message-manager/service.d.ts +12 -4
- package/dist/agent/message-manager/service.js +205 -39
- package/dist/agent/message-manager/utils.js +0 -1
- package/dist/agent/message-manager/views.d.ts +4 -0
- package/dist/agent/message-manager/views.js +11 -7
- package/dist/agent/prompts.d.ts +24 -3
- package/dist/agent/prompts.js +274 -59
- package/dist/agent/service.d.ts +99 -41
- package/dist/agent/service.js +2266 -472
- package/dist/agent/variable-detector.d.ts +12 -0
- package/dist/agent/variable-detector.js +211 -0
- package/dist/agent/views.d.ts +237 -18
- package/dist/agent/views.js +446 -33
- package/dist/browser/cloud/cloud.d.ts +20 -0
- package/dist/browser/cloud/cloud.js +129 -0
- package/dist/browser/cloud/index.d.ts +2 -0
- package/dist/browser/cloud/index.js +2 -0
- package/dist/browser/cloud/views.d.ts +41 -0
- package/dist/browser/cloud/views.js +35 -0
- package/dist/browser/events.d.ts +345 -0
- package/dist/browser/events.js +566 -0
- package/dist/browser/extensions.js +17 -17
- package/dist/browser/index.d.ts +4 -0
- package/dist/browser/index.js +4 -0
- package/dist/browser/profile.d.ts +8 -2
- package/dist/browser/profile.js +79 -12
- package/dist/browser/session-manager.d.ts +85 -0
- package/dist/browser/session-manager.js +208 -0
- package/dist/browser/session.d.ts +100 -8
- package/dist/browser/session.js +1097 -58
- package/dist/browser/types.d.ts +0 -2
- package/dist/browser/views.d.ts +39 -0
- package/dist/browser/views.js +32 -0
- package/dist/browser/watchdogs/aboutblank-watchdog.d.ts +12 -0
- package/dist/browser/watchdogs/aboutblank-watchdog.js +131 -0
- package/dist/browser/watchdogs/base.d.ts +21 -0
- package/dist/browser/watchdogs/base.js +81 -0
- package/dist/browser/watchdogs/cdp-session-watchdog.d.ts +14 -0
- package/dist/browser/watchdogs/cdp-session-watchdog.js +177 -0
- package/dist/browser/watchdogs/crash-watchdog.d.ts +38 -0
- package/dist/browser/watchdogs/crash-watchdog.js +296 -0
- package/dist/browser/watchdogs/default-action-watchdog.d.ts +49 -0
- package/dist/browser/watchdogs/default-action-watchdog.js +212 -0
- package/dist/browser/watchdogs/dom-watchdog.d.ts +8 -0
- package/dist/browser/watchdogs/dom-watchdog.js +31 -0
- package/dist/browser/watchdogs/downloads-watchdog.d.ts +77 -0
- package/dist/browser/watchdogs/downloads-watchdog.js +409 -0
- package/dist/browser/watchdogs/har-recording-watchdog.d.ts +19 -0
- package/dist/browser/watchdogs/har-recording-watchdog.js +317 -0
- package/dist/browser/watchdogs/index.d.ts +15 -0
- package/dist/browser/watchdogs/index.js +15 -0
- package/dist/browser/watchdogs/local-browser-watchdog.d.ts +10 -0
- package/dist/browser/watchdogs/local-browser-watchdog.js +32 -0
- package/dist/browser/watchdogs/permissions-watchdog.d.ts +8 -0
- package/dist/browser/watchdogs/permissions-watchdog.js +73 -0
- package/dist/browser/watchdogs/popups-watchdog.d.ts +13 -0
- package/dist/browser/watchdogs/popups-watchdog.js +77 -0
- package/dist/browser/watchdogs/recording-watchdog.d.ts +27 -0
- package/dist/browser/watchdogs/recording-watchdog.js +249 -0
- package/dist/browser/watchdogs/screenshot-watchdog.d.ts +6 -0
- package/dist/browser/watchdogs/screenshot-watchdog.js +13 -0
- package/dist/browser/watchdogs/security-watchdog.d.ts +10 -0
- package/dist/browser/watchdogs/security-watchdog.js +84 -0
- package/dist/browser/watchdogs/storage-state-watchdog.d.ts +24 -0
- package/dist/browser/watchdogs/storage-state-watchdog.js +288 -0
- package/dist/cli.d.ts +7 -2
- package/dist/cli.js +182 -25
- package/dist/code-use/formatting.d.ts +3 -0
- package/dist/code-use/formatting.js +18 -0
- package/dist/code-use/index.d.ts +6 -0
- package/dist/code-use/index.js +6 -0
- package/dist/code-use/namespace.d.ts +5 -0
- package/dist/code-use/namespace.js +81 -0
- package/dist/code-use/notebook-export.d.ts +3 -0
- package/dist/code-use/notebook-export.js +56 -0
- package/dist/code-use/service.d.ts +24 -0
- package/dist/code-use/service.js +104 -0
- package/dist/code-use/utils.d.ts +4 -0
- package/dist/code-use/utils.js +98 -0
- package/dist/code-use/views.d.ts +108 -0
- package/dist/code-use/views.js +165 -0
- package/dist/config.d.ts +13 -0
- package/dist/config.js +69 -3
- package/dist/controller/registry/service.d.ts +10 -1
- package/dist/controller/registry/service.js +266 -10
- package/dist/controller/registry/views.d.ts +4 -1
- package/dist/controller/registry/views.js +25 -2
- package/dist/controller/service.d.ts +10 -1
- package/dist/controller/service.js +1807 -268
- package/dist/controller/views.d.ts +78 -155
- package/dist/controller/views.js +61 -12
- package/dist/dom/history-tree-processor/service.d.ts +5 -0
- package/dist/dom/history-tree-processor/service.js +169 -14
- package/dist/dom/history-tree-processor/view.d.ts +7 -1
- package/dist/dom/history-tree-processor/view.js +10 -1
- package/dist/dom/markdown-extractor.d.ts +37 -0
- package/dist/dom/markdown-extractor.js +345 -0
- package/dist/dom/service.d.ts +3 -1
- package/dist/dom/service.js +76 -0
- package/dist/dom/views.d.ts +1 -0
- package/dist/dom/views.js +45 -0
- package/dist/event-bus.d.ts +107 -7
- package/dist/event-bus.js +313 -10
- package/dist/exceptions.d.ts +0 -3
- package/dist/exceptions.js +0 -7
- package/dist/filesystem/file-system.d.ts +18 -0
- package/dist/filesystem/file-system.js +503 -42
- package/dist/index.d.ts +7 -0
- package/dist/index.js +6 -0
- package/dist/integrations/gmail/actions.d.ts +3 -3
- package/dist/integrations/gmail/actions.js +4 -4
- package/dist/llm/anthropic/chat.d.ts +18 -1
- package/dist/llm/anthropic/chat.js +123 -55
- package/dist/llm/anthropic/serializer.d.ts +2 -0
- package/dist/llm/anthropic/serializer.js +81 -9
- package/dist/llm/aws/chat-anthropic.d.ts +17 -0
- package/dist/llm/aws/chat-anthropic.js +126 -26
- package/dist/llm/aws/chat-bedrock.d.ts +28 -1
- package/dist/llm/aws/chat-bedrock.js +161 -34
- package/dist/llm/aws/serializer.d.ts +13 -1
- package/dist/llm/aws/serializer.js +56 -17
- package/dist/llm/azure/chat.d.ts +53 -2
- package/dist/llm/azure/chat.js +366 -54
- package/dist/llm/base.d.ts +2 -0
- package/dist/llm/browser-use/chat.d.ts +40 -0
- package/dist/llm/browser-use/chat.js +305 -0
- package/dist/llm/browser-use/index.d.ts +1 -0
- package/dist/llm/browser-use/index.js +1 -0
- package/dist/llm/cerebras/chat.d.ts +39 -0
- package/dist/llm/cerebras/chat.js +178 -0
- package/dist/llm/cerebras/index.d.ts +2 -0
- package/dist/llm/cerebras/index.js +2 -0
- package/dist/llm/cerebras/serializer.d.ts +7 -0
- package/dist/llm/cerebras/serializer.js +82 -0
- package/dist/llm/deepseek/chat.d.ts +19 -2
- package/dist/llm/deepseek/chat.js +138 -25
- package/dist/llm/google/chat.d.ts +46 -2
- package/dist/llm/google/chat.js +267 -64
- package/dist/llm/google/serializer.d.ts +9 -1
- package/dist/llm/google/serializer.js +141 -34
- package/dist/llm/groq/chat.d.ts +21 -2
- package/dist/llm/groq/chat.js +125 -26
- package/dist/llm/groq/parser.js +3 -1
- package/dist/llm/mistral/chat.d.ts +43 -0
- package/dist/llm/mistral/chat.js +154 -0
- package/dist/llm/mistral/index.d.ts +2 -0
- package/dist/llm/mistral/index.js +2 -0
- package/dist/llm/mistral/schema.d.ts +8 -0
- package/dist/llm/mistral/schema.js +27 -0
- package/dist/llm/models.d.ts +2 -0
- package/dist/llm/models.js +317 -0
- package/dist/llm/ollama/chat.d.ts +13 -1
- package/dist/llm/ollama/chat.js +110 -19
- package/dist/llm/ollama/serializer.d.ts +1 -0
- package/dist/llm/ollama/serializer.js +34 -12
- package/dist/llm/openai/chat.d.ts +16 -0
- package/dist/llm/openai/chat.js +94 -44
- package/dist/llm/openai/like.d.ts +5 -3
- package/dist/llm/openai/like.js +7 -3
- package/dist/llm/openai/responses-serializer.d.ts +18 -0
- package/dist/llm/openai/responses-serializer.js +72 -0
- package/dist/llm/openrouter/chat.d.ts +28 -2
- package/dist/llm/openrouter/chat.js +115 -29
- package/dist/llm/schema.d.ts +11 -1
- package/dist/llm/schema.js +81 -1
- package/dist/llm/vercel/chat.d.ts +50 -0
- package/dist/llm/vercel/chat.js +276 -0
- package/dist/llm/vercel/index.d.ts +1 -0
- package/dist/llm/vercel/index.js +1 -0
- package/dist/llm/vercel/serializer.d.ts +5 -0
- package/dist/llm/vercel/serializer.js +7 -0
- package/dist/llm/views.d.ts +2 -1
- package/dist/llm/views.js +3 -1
- package/dist/logging-config.d.ts +2 -0
- package/dist/logging-config.js +82 -29
- package/dist/mcp/client.d.ts +10 -5
- package/dist/mcp/client.js +14 -9
- package/dist/mcp/controller.d.ts +42 -3
- package/dist/mcp/controller.js +56 -31
- package/dist/mcp/server.d.ts +14 -0
- package/dist/mcp/server.js +255 -52
- package/dist/observability.js +10 -4
- package/dist/sandbox/index.d.ts +2 -0
- package/dist/sandbox/index.js +2 -0
- package/dist/sandbox/sandbox.d.ts +19 -0
- package/dist/sandbox/sandbox.js +140 -0
- package/dist/sandbox/views.d.ts +67 -0
- package/dist/sandbox/views.js +121 -0
- package/dist/skill-cli/index.d.ts +3 -0
- package/dist/skill-cli/index.js +3 -0
- package/dist/skill-cli/protocol.d.ts +30 -0
- package/dist/skill-cli/protocol.js +48 -0
- package/dist/skill-cli/server.d.ts +11 -0
- package/dist/skill-cli/server.js +85 -0
- package/dist/skill-cli/sessions.d.ts +24 -0
- package/dist/skill-cli/sessions.js +47 -0
- package/dist/skills/index.d.ts +3 -0
- package/dist/skills/index.js +3 -0
- package/dist/skills/service.d.ts +27 -0
- package/dist/skills/service.js +266 -0
- package/dist/skills/utils.d.ts +6 -0
- package/dist/skills/utils.js +53 -0
- package/dist/skills/views.d.ts +40 -0
- package/dist/skills/views.js +10 -0
- package/dist/sync/auth.js +8 -3
- package/dist/sync/service.d.ts +6 -6
- package/dist/sync/service.js +54 -89
- package/dist/telemetry/views.d.ts +20 -6
- package/dist/telemetry/views.js +23 -5
- package/dist/tokens/custom-pricing.d.ts +2 -0
- package/dist/tokens/custom-pricing.js +22 -0
- package/dist/tokens/index.d.ts +2 -0
- package/dist/tokens/index.js +2 -0
- package/dist/tokens/mappings.d.ts +1 -0
- package/dist/tokens/mappings.js +3 -0
- package/dist/tokens/service.js +27 -8
- package/dist/tools/extraction/index.d.ts +2 -0
- package/dist/tools/extraction/index.js +2 -0
- package/dist/tools/extraction/schema-utils.d.ts +6 -0
- package/dist/tools/extraction/schema-utils.js +237 -0
- package/dist/tools/extraction/views.d.ts +7 -0
- package/dist/tools/index.d.ts +5 -0
- package/dist/tools/index.js +5 -0
- package/dist/tools/registry/index.d.ts +2 -0
- package/dist/tools/registry/index.js +2 -0
- package/dist/tools/registry/service.d.ts +1 -0
- package/dist/tools/registry/service.js +1 -0
- package/dist/tools/registry/views.d.ts +1 -0
- package/dist/tools/registry/views.js +1 -0
- package/dist/tools/service.d.ts +2 -0
- package/dist/tools/service.js +1 -0
- package/dist/tools/utils.d.ts +2 -0
- package/dist/tools/utils.js +57 -0
- package/dist/tools/views.d.ts +1 -0
- package/dist/tools/views.js +1 -0
- package/dist/utils.d.ts +10 -1
- package/dist/utils.js +70 -3
- package/package.json +87 -26
- package/dist/dom/playground/process-dom.js +0 -5
- package/dist/dom/playground/test-accessibility.d.ts +0 -44
- package/dist/dom/playground/test-accessibility.js +0 -111
- /package/dist/{dom/playground/process-dom.d.ts → tools/extraction/views.js} +0 -0
|
@@ -1,14 +1,20 @@
|
|
|
1
|
-
import fs from 'node:fs';
|
|
1
|
+
import fs, { promises as fsp } from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { validate as validateJsonSchema } from '@cfworker/json-schema';
|
|
2
4
|
import { ActionResult } from '../agent/views.js';
|
|
5
|
+
import { ClickCoordinateEvent, ClickElementEvent, CloseTabEvent, GetDropdownOptionsEvent, GoBackEvent, NavigateToUrlEvent, ScrollEvent, ScrollToTextEvent, ScreenshotEvent, SelectDropdownOptionEvent, SendKeysEvent, SwitchTabEvent, TypeTextEvent, UploadFileEvent, WaitEvent, } from '../browser/events.js';
|
|
3
6
|
import { BrowserError } from '../browser/views.js';
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
7
|
+
import { chunkMarkdownByStructure, extractCleanMarkdownFromHtml, } from '../dom/markdown-extractor.js';
|
|
8
|
+
import { extractPdfText, FileSystem } from '../filesystem/file-system.js';
|
|
9
|
+
import { ClickElementActionIndexOnlySchema, ClickElementActionSchema, CloseTabActionSchema, DoneActionSchema, EvaluateActionSchema, ExtractStructuredDataActionSchema, FindElementsActionSchema, DropdownOptionsActionSchema, SelectDropdownActionSchema, GoToUrlActionSchema, InputTextActionSchema, NoParamsActionSchema, ReadLongContentActionSchema, ReadFileActionSchema, ReplaceFileStrActionSchema, ScrollActionSchema, ScrollToTextActionSchema, SearchActionSchema, SearchPageActionSchema, SearchGoogleActionSchema, ScreenshotActionSchema, StructuredOutputActionSchema, SwitchTabActionSchema, UploadFileActionSchema, WaitActionSchema, WriteFileActionSchema, SendKeysActionSchema, SheetsRangeActionSchema, SheetsUpdateActionSchema, SheetsInputActionSchema, } from './views.js';
|
|
6
10
|
import { Registry } from './registry/service.js';
|
|
7
|
-
import
|
|
8
|
-
import { UserMessage } from '../llm/messages.js';
|
|
11
|
+
import { SystemMessage, UserMessage } from '../llm/messages.js';
|
|
9
12
|
import { createLogger } from '../logging-config.js';
|
|
10
|
-
|
|
11
|
-
|
|
13
|
+
import { sanitize_surrogates } from '../utils.js';
|
|
14
|
+
import { findUnsupportedJsonSchemaKeyword, normalizeStructuredDataBySchema, } from '../tools/extraction/schema-utils.js';
|
|
15
|
+
import { getClickDescription } from '../tools/utils.js';
|
|
16
|
+
const DEFAULT_WAIT_OFFSET = 1;
|
|
17
|
+
const MAX_WAIT_SECONDS = 30;
|
|
12
18
|
const toActionEntries = (action) => {
|
|
13
19
|
if (!action) {
|
|
14
20
|
return [];
|
|
@@ -58,6 +64,13 @@ const waitWithSignal = async (timeoutMs, signal) => {
|
|
|
58
64
|
}
|
|
59
65
|
});
|
|
60
66
|
};
|
|
67
|
+
const dispatchBrowserEventIfAvailable = async (browser_session, event, fallback) => {
|
|
68
|
+
if (typeof browser_session?.dispatch_browser_event === 'function') {
|
|
69
|
+
const dispatchResult = await browser_session.dispatch_browser_event(event);
|
|
70
|
+
return dispatchResult?.event?.event_result ?? null;
|
|
71
|
+
}
|
|
72
|
+
return fallback();
|
|
73
|
+
};
|
|
61
74
|
const runWithTimeoutAndSignal = async (operation, timeoutMs, signal, timeoutMessage = 'Operation timed out') => {
|
|
62
75
|
throwIfAborted(signal);
|
|
63
76
|
if (timeoutMs <= 0) {
|
|
@@ -114,14 +127,33 @@ const runWithTimeoutAndSignal = async (operation, timeoutMs, signal, timeoutMess
|
|
|
114
127
|
});
|
|
115
128
|
});
|
|
116
129
|
};
|
|
130
|
+
const validateAndFixJavaScript = (code) => {
|
|
131
|
+
let fixedCode = code;
|
|
132
|
+
// Fix double-escaped quotes often produced in tool-argument JSON.
|
|
133
|
+
fixedCode = fixedCode.replace(/\\"/g, '"');
|
|
134
|
+
// Fix over-escaped regex tokens (e.g. \\d -> \d).
|
|
135
|
+
fixedCode = fixedCode.replace(/\\\\([dDsSwWbBnrtfv])/g, '\\$1');
|
|
136
|
+
fixedCode = fixedCode.replace(/\\\\([.*+?^${}()|[\]])/g, '\\$1');
|
|
137
|
+
// Convert brittle mixed-quote selectors/XPaths into template literals.
|
|
138
|
+
fixedCode = fixedCode.replace(/document\.evaluate\s*\(\s*"([^"]*)"\s*,/g, (_match, xpath) => `document.evaluate(\`${xpath}\`,`);
|
|
139
|
+
fixedCode = fixedCode.replace(/(querySelector(?:All)?)\s*\(\s*"([^"]*)"\s*\)/g, (_match, methodName, selector) => `${methodName}(\`${selector}\`)`);
|
|
140
|
+
fixedCode = fixedCode.replace(/\.closest\s*\(\s*"([^"]*)"\s*\)/g, (_match, selector) => `.closest(\`${selector}\`)`);
|
|
141
|
+
fixedCode = fixedCode.replace(/\.matches\s*\(\s*"([^"]*)"\s*\)/g, (_match, selector) => `.matches(\`${selector}\`)`);
|
|
142
|
+
return fixedCode;
|
|
143
|
+
};
|
|
117
144
|
export class Controller {
|
|
118
145
|
registry;
|
|
119
146
|
displayFilesInDoneText;
|
|
147
|
+
outputModel;
|
|
148
|
+
coordinateClickingEnabled;
|
|
149
|
+
clickActionHandler = null;
|
|
120
150
|
logger;
|
|
121
151
|
constructor(options = {}) {
|
|
122
152
|
const { exclude_actions = [], output_model = null, display_files_in_done_text = true, } = options;
|
|
123
153
|
this.registry = new Registry(exclude_actions);
|
|
124
154
|
this.displayFilesInDoneText = display_files_in_done_text;
|
|
155
|
+
this.outputModel = output_model;
|
|
156
|
+
this.coordinateClickingEnabled = false;
|
|
125
157
|
this.logger = createLogger('browser_use.controller');
|
|
126
158
|
this.registerDefaultActions(output_model);
|
|
127
159
|
}
|
|
@@ -131,15 +163,50 @@ export class Controller {
|
|
|
131
163
|
this.registerElementActions();
|
|
132
164
|
this.registerTabActions();
|
|
133
165
|
this.registerContentActions();
|
|
166
|
+
this.registerExplorationActions();
|
|
134
167
|
this.registerScrollActions();
|
|
135
168
|
this.registerFileSystemActions();
|
|
169
|
+
this.registerUtilityActions();
|
|
136
170
|
this.registerKeyboardActions();
|
|
137
171
|
this.registerDropdownActions();
|
|
138
172
|
this.registerSheetsActions();
|
|
139
173
|
}
|
|
140
174
|
registerNavigationActions() {
|
|
175
|
+
this.registry.action('Search the query on a web search engine (duckduckgo, google, or bing).', { param_model: SearchActionSchema, terminates_sequence: true })(async function search(params, { browser_session, signal }) {
|
|
176
|
+
if (!browser_session)
|
|
177
|
+
throw new Error('Browser session missing');
|
|
178
|
+
throwIfAborted(signal);
|
|
179
|
+
const requestedEngine = String(params.engine ?? 'duckduckgo');
|
|
180
|
+
const engine = requestedEngine.toLowerCase();
|
|
181
|
+
const encodedQuery = encodeURIComponent(params.query).replace(/%20/g, '+');
|
|
182
|
+
const searchUrlByEngine = {
|
|
183
|
+
duckduckgo: `https://duckduckgo.com/?q=${encodedQuery}`,
|
|
184
|
+
google: `https://www.google.com/search?q=${encodedQuery}&udm=14`,
|
|
185
|
+
bing: `https://www.bing.com/search?q=${encodedQuery}`,
|
|
186
|
+
};
|
|
187
|
+
const searchUrl = searchUrlByEngine[engine];
|
|
188
|
+
if (!searchUrl) {
|
|
189
|
+
return new ActionResult({
|
|
190
|
+
error: `Unsupported search engine: ${requestedEngine}. Options: duckduckgo, google, bing`,
|
|
191
|
+
});
|
|
192
|
+
}
|
|
193
|
+
try {
|
|
194
|
+
await browser_session.navigate_to(searchUrl, { signal });
|
|
195
|
+
const memory = `Searched ${requestedEngine} for '${params.query}'`;
|
|
196
|
+
return new ActionResult({
|
|
197
|
+
extracted_content: memory,
|
|
198
|
+
long_term_memory: memory,
|
|
199
|
+
});
|
|
200
|
+
}
|
|
201
|
+
catch (error) {
|
|
202
|
+
return new ActionResult({
|
|
203
|
+
error: `Failed to search ${requestedEngine} for "${params.query}": ${String(error?.message ?? error)}`,
|
|
204
|
+
});
|
|
205
|
+
}
|
|
206
|
+
});
|
|
141
207
|
this.registry.action('Search the query in Google...', {
|
|
142
208
|
param_model: SearchGoogleActionSchema,
|
|
209
|
+
terminates_sequence: true,
|
|
143
210
|
})(async function search_google(params, { browser_session, signal }) {
|
|
144
211
|
if (!browser_session)
|
|
145
212
|
throw new Error('Browser session missing');
|
|
@@ -160,9 +227,7 @@ export class Controller {
|
|
|
160
227
|
long_term_memory: `Searched Google for '${params.query}'`,
|
|
161
228
|
});
|
|
162
229
|
});
|
|
163
|
-
|
|
164
|
-
param_model: GoToUrlActionSchema,
|
|
165
|
-
})(async function go_to_url(params, { browser_session, signal }) {
|
|
230
|
+
const navigateImpl = async function (params, { browser_session, signal, }) {
|
|
166
231
|
if (!browser_session)
|
|
167
232
|
throw new Error('Browser session missing');
|
|
168
233
|
throwIfAborted(signal);
|
|
@@ -177,7 +242,7 @@ export class Controller {
|
|
|
177
242
|
long_term_memory: `Opened new tab with URL ${params.url}`,
|
|
178
243
|
});
|
|
179
244
|
}
|
|
180
|
-
await browser_session.navigate_to(params.url, { signal });
|
|
245
|
+
await dispatchBrowserEventIfAvailable(browser_session, new NavigateToUrlEvent({ url: params.url, new_tab: false }), () => browser_session.navigate_to(params.url, { signal }));
|
|
181
246
|
const msg = `🔗 Navigated to ${params.url}`;
|
|
182
247
|
return new ActionResult({
|
|
183
248
|
extracted_content: msg,
|
|
@@ -195,44 +260,181 @@ export class Controller {
|
|
|
195
260
|
'net::',
|
|
196
261
|
];
|
|
197
262
|
if (networkFailures.some((needle) => errorMsg.includes(needle))) {
|
|
198
|
-
|
|
199
|
-
|
|
263
|
+
return new ActionResult({
|
|
264
|
+
error: `Navigation failed - site unavailable: ${params.url}`,
|
|
265
|
+
});
|
|
200
266
|
}
|
|
201
|
-
|
|
267
|
+
if (error instanceof Error &&
|
|
268
|
+
error.name === 'RuntimeError' &&
|
|
269
|
+
errorMsg.includes('CDP client not initialized')) {
|
|
270
|
+
return new ActionResult({
|
|
271
|
+
error: `Browser connection error: ${errorMsg}`,
|
|
272
|
+
});
|
|
273
|
+
}
|
|
274
|
+
return new ActionResult({
|
|
275
|
+
error: `Navigation failed: ${errorMsg}`,
|
|
276
|
+
});
|
|
202
277
|
}
|
|
278
|
+
};
|
|
279
|
+
this.registry.action('Navigate to URL...', {
|
|
280
|
+
param_model: GoToUrlActionSchema,
|
|
281
|
+
terminates_sequence: true,
|
|
282
|
+
})(async function go_to_url(params, { browser_session, signal }) {
|
|
283
|
+
return navigateImpl(params, { browser_session, signal });
|
|
203
284
|
});
|
|
204
|
-
this.registry.action('
|
|
285
|
+
this.registry.action('Navigate to URL...', {
|
|
286
|
+
param_model: GoToUrlActionSchema,
|
|
287
|
+
terminates_sequence: true,
|
|
288
|
+
})(async function navigate(params, { browser_session, signal }) {
|
|
289
|
+
return navigateImpl(params, { browser_session, signal });
|
|
290
|
+
});
|
|
291
|
+
this.registry.action('Go back', {
|
|
292
|
+
param_model: NoParamsActionSchema,
|
|
293
|
+
terminates_sequence: true,
|
|
294
|
+
})(async function go_back(_params, { browser_session, signal }) {
|
|
205
295
|
if (!browser_session)
|
|
206
296
|
throw new Error('Browser session missing');
|
|
207
297
|
throwIfAborted(signal);
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
298
|
+
try {
|
|
299
|
+
await dispatchBrowserEventIfAvailable(browser_session, new GoBackEvent(), () => browser_session.go_back({ signal }));
|
|
300
|
+
const memory = 'Navigated back';
|
|
301
|
+
return new ActionResult({ extracted_content: memory });
|
|
302
|
+
}
|
|
303
|
+
catch (error) {
|
|
304
|
+
return new ActionResult({
|
|
305
|
+
error: `Failed to go back: ${String(error?.message ?? error)}`,
|
|
306
|
+
});
|
|
307
|
+
}
|
|
211
308
|
});
|
|
212
|
-
this.registry.action('Wait for x seconds
|
|
309
|
+
this.registry.action('Wait for x seconds.', {
|
|
310
|
+
param_model: WaitActionSchema,
|
|
311
|
+
})(async function wait(params, { signal, browser_session }) {
|
|
213
312
|
const seconds = params.seconds ?? 3;
|
|
214
313
|
const actualSeconds = Math.min(Math.max(seconds - DEFAULT_WAIT_OFFSET, 0), MAX_WAIT_SECONDS);
|
|
215
|
-
const msg = `🕒
|
|
314
|
+
const msg = `🕒 Waited for ${seconds} second${seconds === 1 ? '' : 's'}`;
|
|
216
315
|
if (actualSeconds > 0) {
|
|
217
|
-
|
|
316
|
+
if (browser_session) {
|
|
317
|
+
await dispatchBrowserEventIfAvailable(browser_session, new WaitEvent({
|
|
318
|
+
seconds: actualSeconds,
|
|
319
|
+
max_seconds: MAX_WAIT_SECONDS,
|
|
320
|
+
}), () => waitWithSignal(actualSeconds * 1000, signal));
|
|
321
|
+
}
|
|
322
|
+
else {
|
|
323
|
+
await waitWithSignal(actualSeconds * 1000, signal);
|
|
324
|
+
}
|
|
218
325
|
}
|
|
219
|
-
return new ActionResult({
|
|
326
|
+
return new ActionResult({
|
|
327
|
+
extracted_content: msg,
|
|
328
|
+
long_term_memory: `Waited for ${seconds} second${seconds === 1 ? '' : 's'}`,
|
|
329
|
+
});
|
|
220
330
|
});
|
|
221
331
|
}
|
|
222
332
|
registerElementActions() {
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
333
|
+
const logger = this.logger;
|
|
334
|
+
const convertLlmCoordinatesToViewport = (llmX, llmY, browserSession) => {
|
|
335
|
+
const llmSize = browserSession?.llm_screenshot_size;
|
|
336
|
+
const viewportSize = browserSession?._original_viewport_size;
|
|
337
|
+
if (!Array.isArray(llmSize) ||
|
|
338
|
+
llmSize.length !== 2 ||
|
|
339
|
+
!Array.isArray(viewportSize) ||
|
|
340
|
+
viewportSize.length !== 2) {
|
|
341
|
+
return [llmX, llmY];
|
|
342
|
+
}
|
|
343
|
+
const [llmWidth, llmHeight] = llmSize.map((value) => Number(value));
|
|
344
|
+
const [viewportWidth, viewportHeight] = viewportSize.map((value) => Number(value));
|
|
345
|
+
if (!Number.isFinite(llmWidth) ||
|
|
346
|
+
!Number.isFinite(llmHeight) ||
|
|
347
|
+
!Number.isFinite(viewportWidth) ||
|
|
348
|
+
!Number.isFinite(viewportHeight) ||
|
|
349
|
+
llmWidth <= 0 ||
|
|
350
|
+
llmHeight <= 0 ||
|
|
351
|
+
viewportWidth <= 0 ||
|
|
352
|
+
viewportHeight <= 0) {
|
|
353
|
+
return [llmX, llmY];
|
|
354
|
+
}
|
|
355
|
+
const actualX = Math.floor((llmX / llmWidth) * viewportWidth);
|
|
356
|
+
const actualY = Math.floor((llmY / llmHeight) * viewportHeight);
|
|
357
|
+
logger.info(`🔄 Converting coordinates: LLM (${llmX}, ${llmY}) @ ${llmWidth}x${llmHeight} -> Viewport (${actualX}, ${actualY}) @ ${viewportWidth}x${viewportHeight}`);
|
|
358
|
+
return [actualX, actualY];
|
|
359
|
+
};
|
|
360
|
+
const clickImpl = async (params, { browser_session, signal }) => {
|
|
226
361
|
if (!browser_session)
|
|
227
362
|
throw new Error('Browser session missing');
|
|
228
363
|
throwIfAborted(signal);
|
|
229
|
-
const
|
|
364
|
+
const collectTabIds = () => {
|
|
365
|
+
if (!Array.isArray(browser_session.tabs)) {
|
|
366
|
+
return new Set();
|
|
367
|
+
}
|
|
368
|
+
return new Set(browser_session.tabs
|
|
369
|
+
.map((tab) => tab?.page_id)
|
|
370
|
+
.filter((pageId) => typeof pageId === 'number' && Number.isFinite(pageId)));
|
|
371
|
+
};
|
|
372
|
+
const detectNewTabNote = async (tabsBefore) => {
|
|
373
|
+
try {
|
|
374
|
+
await waitWithSignal(50, signal);
|
|
375
|
+
const tabsAfter = Array.isArray(browser_session.tabs)
|
|
376
|
+
? browser_session.tabs
|
|
377
|
+
: [];
|
|
378
|
+
const newTab = tabsAfter.find((tab) => {
|
|
379
|
+
const pageId = tab?.page_id;
|
|
380
|
+
return typeof pageId === 'number' && !tabsBefore.has(pageId);
|
|
381
|
+
});
|
|
382
|
+
if (!newTab) {
|
|
383
|
+
return '';
|
|
384
|
+
}
|
|
385
|
+
const tabId = typeof newTab?.tab_id === 'string' && newTab.tab_id.trim()
|
|
386
|
+
? newTab.tab_id.trim()
|
|
387
|
+
: String(newTab.page_id).padStart(4, '0').slice(-4);
|
|
388
|
+
return `. Note: This opened a new tab (tab_id: ${tabId}) - switch to it if you need to interact with the new page.`;
|
|
389
|
+
}
|
|
390
|
+
catch {
|
|
391
|
+
return '';
|
|
392
|
+
}
|
|
393
|
+
};
|
|
394
|
+
if (params.coordinate_x != null &&
|
|
395
|
+
params.coordinate_y != null &&
|
|
396
|
+
params.index == null) {
|
|
397
|
+
if (!this.coordinateClickingEnabled) {
|
|
398
|
+
throw new BrowserError('Coordinate clicking is disabled for the current model. Provide an element index.');
|
|
399
|
+
}
|
|
400
|
+
const tabsBefore = collectTabIds();
|
|
401
|
+
const page = await browser_session.get_current_page();
|
|
402
|
+
if (!page?.mouse?.click) {
|
|
403
|
+
throw new BrowserError('Unable to perform coordinate click on the current page.');
|
|
404
|
+
}
|
|
405
|
+
const [actualX, actualY] = convertLlmCoordinatesToViewport(params.coordinate_x, params.coordinate_y, browser_session);
|
|
406
|
+
await dispatchBrowserEventIfAvailable(browser_session, new ClickCoordinateEvent({
|
|
407
|
+
coordinate_x: actualX,
|
|
408
|
+
coordinate_y: actualY,
|
|
409
|
+
}), () => page.mouse.click(actualX, actualY));
|
|
410
|
+
const coordinateMessage = `🖱️ Clicked at coordinates (${params.coordinate_x}, ${params.coordinate_y})` +
|
|
411
|
+
(await detectNewTabNote(tabsBefore));
|
|
412
|
+
return new ActionResult({
|
|
413
|
+
extracted_content: coordinateMessage,
|
|
414
|
+
include_in_memory: true,
|
|
415
|
+
long_term_memory: coordinateMessage,
|
|
416
|
+
metadata: {
|
|
417
|
+
click_x: actualX,
|
|
418
|
+
click_y: actualY,
|
|
419
|
+
},
|
|
420
|
+
});
|
|
421
|
+
}
|
|
422
|
+
if (params.index == null) {
|
|
423
|
+
return new ActionResult({
|
|
424
|
+
error: 'Must provide either index or both coordinate_x and coordinate_y',
|
|
425
|
+
});
|
|
426
|
+
}
|
|
427
|
+
const element = await browser_session.get_dom_element_by_index(params.index, {
|
|
428
|
+
signal,
|
|
429
|
+
});
|
|
230
430
|
if (!element) {
|
|
231
|
-
|
|
431
|
+
const msg = `Element index ${params.index} not available - page may have changed. Try refreshing browser state.`;
|
|
432
|
+
logger.warning(`⚠️ ${msg}`);
|
|
433
|
+
return new ActionResult({
|
|
434
|
+
extracted_content: msg,
|
|
435
|
+
});
|
|
232
436
|
}
|
|
233
|
-
const
|
|
234
|
-
? browser_session.tabs.length
|
|
235
|
-
: 0;
|
|
437
|
+
const tabsBefore = collectTabIds();
|
|
236
438
|
if (browser_session.is_file_input?.(element)) {
|
|
237
439
|
const msg = `Index ${params.index} - has an element which opens file upload dialog.`;
|
|
238
440
|
return new ActionResult({
|
|
@@ -242,124 +444,430 @@ export class Controller {
|
|
|
242
444
|
long_term_memory: msg,
|
|
243
445
|
});
|
|
244
446
|
}
|
|
245
|
-
const downloadPath = await browser_session
|
|
447
|
+
const downloadPath = await dispatchBrowserEventIfAvailable(browser_session, new ClickElementEvent({
|
|
448
|
+
node: element,
|
|
449
|
+
button: 'left',
|
|
450
|
+
}), () => browser_session._click_element_node(element, {
|
|
246
451
|
signal,
|
|
247
|
-
});
|
|
248
|
-
let msg
|
|
452
|
+
}));
|
|
453
|
+
let msg;
|
|
249
454
|
if (downloadPath) {
|
|
250
455
|
msg = `💾 Downloaded file to ${downloadPath}`;
|
|
251
456
|
}
|
|
252
457
|
else {
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
458
|
+
let elementDescription = '';
|
|
459
|
+
if (typeof element?.tag_name === 'string' &&
|
|
460
|
+
typeof element?.get_all_text_till_next_clickable_element ===
|
|
461
|
+
'function') {
|
|
462
|
+
try {
|
|
463
|
+
elementDescription = getClickDescription(element);
|
|
464
|
+
}
|
|
465
|
+
catch {
|
|
466
|
+
elementDescription = '';
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
if (elementDescription) {
|
|
470
|
+
msg = `🖱️ Clicked ${elementDescription}`;
|
|
471
|
+
}
|
|
472
|
+
else {
|
|
473
|
+
const snippet = element.get_all_text_till_next_clickable_element?.(2) ?? '';
|
|
474
|
+
msg = `🖱️ Clicked button with index ${params.index}: ${snippet}`;
|
|
475
|
+
}
|
|
260
476
|
}
|
|
477
|
+
msg += await detectNewTabNote(tabsBefore);
|
|
261
478
|
return new ActionResult({
|
|
262
479
|
extracted_content: msg,
|
|
263
480
|
include_in_memory: true,
|
|
264
481
|
long_term_memory: msg,
|
|
265
482
|
});
|
|
266
|
-
}
|
|
267
|
-
this.
|
|
483
|
+
};
|
|
484
|
+
this.clickActionHandler = clickImpl;
|
|
485
|
+
this.registerClickActions();
|
|
486
|
+
const detectSensitiveKeyName = (value, sensitiveData) => {
|
|
487
|
+
if (!value || !sensitiveData) {
|
|
488
|
+
return null;
|
|
489
|
+
}
|
|
490
|
+
for (const [domainOrKey, content] of Object.entries(sensitiveData)) {
|
|
491
|
+
if (typeof content === 'string') {
|
|
492
|
+
if (content === value) {
|
|
493
|
+
return domainOrKey;
|
|
494
|
+
}
|
|
495
|
+
continue;
|
|
496
|
+
}
|
|
497
|
+
if (!content || typeof content !== 'object') {
|
|
498
|
+
continue;
|
|
499
|
+
}
|
|
500
|
+
for (const [key, nestedValue] of Object.entries(content)) {
|
|
501
|
+
if (nestedValue === value) {
|
|
502
|
+
return key;
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
}
|
|
506
|
+
return null;
|
|
507
|
+
};
|
|
508
|
+
const inputImpl = async function (params, { browser_session, has_sensitive_data, sensitive_data, signal, }) {
|
|
268
509
|
if (!browser_session)
|
|
269
510
|
throw new Error('Browser session missing');
|
|
270
511
|
throwIfAborted(signal);
|
|
271
512
|
const element = await browser_session.get_dom_element_by_index(params.index, { signal });
|
|
272
513
|
if (!element) {
|
|
273
|
-
|
|
514
|
+
const msg = `Element index ${params.index} not available - page may have changed. Try refreshing browser state.`;
|
|
515
|
+
logger.warning(`⚠️ ${msg}`);
|
|
516
|
+
return new ActionResult({
|
|
517
|
+
extracted_content: msg,
|
|
518
|
+
});
|
|
274
519
|
}
|
|
275
|
-
|
|
520
|
+
const isAutocompleteField = (node) => {
|
|
521
|
+
const attrs = node?.attributes ?? {};
|
|
522
|
+
const role = String(attrs.role ?? '').toLowerCase();
|
|
523
|
+
const ariaAutocomplete = String(attrs['aria-autocomplete'] ?? '').toLowerCase();
|
|
524
|
+
const hasDatalist = String(attrs.list ?? '').trim().length > 0;
|
|
525
|
+
return (role === 'combobox' ||
|
|
526
|
+
(ariaAutocomplete !== '' && ariaAutocomplete !== 'none') ||
|
|
527
|
+
hasDatalist);
|
|
528
|
+
};
|
|
529
|
+
const needsAutocompleteDelay = (node) => {
|
|
530
|
+
const attrs = node?.attributes ?? {};
|
|
531
|
+
const role = String(attrs.role ?? '').toLowerCase();
|
|
532
|
+
const ariaAutocomplete = String(attrs['aria-autocomplete'] ?? '').toLowerCase();
|
|
533
|
+
return (role === 'combobox' ||
|
|
534
|
+
(ariaAutocomplete !== '' && ariaAutocomplete !== 'none'));
|
|
535
|
+
};
|
|
536
|
+
await dispatchBrowserEventIfAvailable(browser_session, new TypeTextEvent({
|
|
537
|
+
node: element,
|
|
538
|
+
text: params.text,
|
|
539
|
+
clear: params.clear ?? true,
|
|
540
|
+
}), () => browser_session._input_text_element_node(element, params.text, {
|
|
541
|
+
clear: params.clear,
|
|
276
542
|
signal,
|
|
277
|
-
});
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
543
|
+
}));
|
|
544
|
+
let actualValue = null;
|
|
545
|
+
try {
|
|
546
|
+
const locator = await browser_session.get_locate_element?.(element);
|
|
547
|
+
if (locator && typeof locator.inputValue === 'function') {
|
|
548
|
+
const value = await locator.inputValue();
|
|
549
|
+
actualValue = typeof value === 'string' ? value : null;
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
catch {
|
|
553
|
+
actualValue = null;
|
|
554
|
+
}
|
|
555
|
+
let msg = `⌨️ Input ${params.text} into index ${params.index}`;
|
|
556
|
+
if (has_sensitive_data) {
|
|
557
|
+
const sensitiveKeyName = detectSensitiveKeyName(params.text, sensitive_data ?? null);
|
|
558
|
+
msg = sensitiveKeyName
|
|
559
|
+
? `Typed ${sensitiveKeyName}`
|
|
560
|
+
: 'Typed sensitive data';
|
|
561
|
+
}
|
|
562
|
+
if (!has_sensitive_data &&
|
|
563
|
+
actualValue != null &&
|
|
564
|
+
actualValue !== params.text) {
|
|
565
|
+
msg +=
|
|
566
|
+
`\n⚠️ Note: the field's actual value '${actualValue}' differs from typed text '${params.text}'. ` +
|
|
567
|
+
'The page may have reformatted or autocompleted your input.';
|
|
568
|
+
}
|
|
569
|
+
if (isAutocompleteField(element)) {
|
|
570
|
+
msg +=
|
|
571
|
+
'\n💡 This is an autocomplete field. Wait for suggestions to appear, then click the correct suggestion instead of pressing Enter.';
|
|
572
|
+
if (needsAutocompleteDelay(element)) {
|
|
573
|
+
await waitWithSignal(400, signal);
|
|
574
|
+
}
|
|
575
|
+
}
|
|
281
576
|
return new ActionResult({
|
|
282
577
|
extracted_content: msg,
|
|
283
578
|
include_in_memory: true,
|
|
284
|
-
long_term_memory:
|
|
579
|
+
long_term_memory: msg,
|
|
580
|
+
});
|
|
581
|
+
};
|
|
582
|
+
this.registry.action('Click and input text into an input interactive element', { param_model: InputTextActionSchema })(async function input_text(params, { browser_session, has_sensitive_data, sensitive_data, signal }) {
|
|
583
|
+
return inputImpl(params, {
|
|
584
|
+
browser_session,
|
|
585
|
+
has_sensitive_data,
|
|
586
|
+
sensitive_data,
|
|
587
|
+
signal,
|
|
588
|
+
});
|
|
589
|
+
});
|
|
590
|
+
this.registry.action('Click and input text into an input interactive element', { param_model: InputTextActionSchema })(async function input(params, { browser_session, has_sensitive_data, sensitive_data, signal }) {
|
|
591
|
+
return inputImpl(params, {
|
|
592
|
+
browser_session,
|
|
593
|
+
has_sensitive_data,
|
|
594
|
+
sensitive_data,
|
|
595
|
+
signal,
|
|
285
596
|
});
|
|
286
597
|
});
|
|
287
598
|
this.registry.action('Upload file to interactive element with file path', {
|
|
288
599
|
param_model: UploadFileActionSchema,
|
|
289
|
-
})(async function upload_file(params, { browser_session, available_file_paths, signal }) {
|
|
600
|
+
})(async function upload_file(params, { browser_session, available_file_paths, file_system, signal }) {
|
|
290
601
|
if (!browser_session)
|
|
291
602
|
throw new Error('Browser session missing');
|
|
292
603
|
throwIfAborted(signal);
|
|
293
|
-
|
|
294
|
-
|
|
604
|
+
let uploadPath = params.path;
|
|
605
|
+
const isLocalBrowser = browser_session?.is_local !== false;
|
|
606
|
+
const allowedPaths = new Set(available_file_paths ?? []);
|
|
607
|
+
const downloadedFiles = Array.isArray(browser_session?.downloaded_files)
|
|
608
|
+
? browser_session.downloaded_files
|
|
609
|
+
: [];
|
|
610
|
+
for (const downloadedPath of downloadedFiles) {
|
|
611
|
+
allowedPaths.add(downloadedPath);
|
|
295
612
|
}
|
|
296
|
-
if (!
|
|
297
|
-
|
|
613
|
+
if (!allowedPaths.has(uploadPath)) {
|
|
614
|
+
const fsInstance = file_system ?? null;
|
|
615
|
+
const managedFile = fsInstance && typeof fsInstance.get_file === 'function'
|
|
616
|
+
? fsInstance.get_file(uploadPath)
|
|
617
|
+
: null;
|
|
618
|
+
if (managedFile && fsInstance?.get_dir) {
|
|
619
|
+
uploadPath = path.join(fsInstance.get_dir(), uploadPath);
|
|
620
|
+
}
|
|
621
|
+
else if (!isLocalBrowser) {
|
|
622
|
+
// Remote browser paths may only exist on the remote runtime.
|
|
623
|
+
}
|
|
624
|
+
else {
|
|
625
|
+
return new ActionResult({
|
|
626
|
+
error: `File path ${params.path} is not available. To fix: add this file path to available_file_paths when creating the Agent.`,
|
|
627
|
+
});
|
|
628
|
+
}
|
|
298
629
|
}
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
630
|
+
if (isLocalBrowser) {
|
|
631
|
+
if (!fs.existsSync(uploadPath)) {
|
|
632
|
+
return new ActionResult({
|
|
633
|
+
error: `File ${uploadPath} does not exist`,
|
|
634
|
+
});
|
|
635
|
+
}
|
|
636
|
+
if (fs.statSync(uploadPath).size === 0) {
|
|
637
|
+
return new ActionResult({
|
|
638
|
+
error: `File ${uploadPath} is empty (0 bytes). The file may not have been saved correctly.`,
|
|
639
|
+
});
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
let selectorMap = null;
|
|
643
|
+
if (typeof browser_session.get_selector_map === 'function') {
|
|
644
|
+
selectorMap = await browser_session.get_selector_map({ signal });
|
|
645
|
+
if (!(params.index in (selectorMap ?? {}))) {
|
|
646
|
+
return new ActionResult({
|
|
647
|
+
error: `Element with index ${params.index} does not exist.`,
|
|
648
|
+
});
|
|
649
|
+
}
|
|
302
650
|
}
|
|
303
|
-
|
|
304
|
-
if (!
|
|
305
|
-
|
|
651
|
+
let node = await browser_session.find_file_upload_element_by_index(params.index, 3, 3, { signal });
|
|
652
|
+
if (!node &&
|
|
653
|
+
selectorMap &&
|
|
654
|
+
typeof browser_session.is_file_input === 'function') {
|
|
655
|
+
let currentScrollY = 0;
|
|
656
|
+
try {
|
|
657
|
+
const page = await browser_session.get_current_page?.();
|
|
658
|
+
if (page?.evaluate) {
|
|
659
|
+
const evaluated = await page.evaluate(() => window.scrollY || window.pageYOffset || 0);
|
|
660
|
+
const numeric = typeof evaluated === 'number' ? evaluated : Number(evaluated);
|
|
661
|
+
if (Number.isFinite(numeric)) {
|
|
662
|
+
currentScrollY = numeric;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
}
|
|
666
|
+
catch {
|
|
667
|
+
currentScrollY = 0;
|
|
668
|
+
}
|
|
669
|
+
let closest = null;
|
|
670
|
+
let minDistance = Number.POSITIVE_INFINITY;
|
|
671
|
+
for (const element of Object.values(selectorMap)) {
|
|
672
|
+
if (!browser_session.is_file_input(element)) {
|
|
673
|
+
continue;
|
|
674
|
+
}
|
|
675
|
+
const y = Number(element?.absolute_position?.y ?? 0);
|
|
676
|
+
const distance = Number.isFinite(y)
|
|
677
|
+
? Math.abs(y - currentScrollY)
|
|
678
|
+
: 0;
|
|
679
|
+
if (!closest || distance < minDistance) {
|
|
680
|
+
closest = element;
|
|
681
|
+
minDistance = distance;
|
|
682
|
+
}
|
|
683
|
+
}
|
|
684
|
+
if (closest) {
|
|
685
|
+
node = closest;
|
|
686
|
+
}
|
|
306
687
|
}
|
|
307
|
-
|
|
688
|
+
if (!node) {
|
|
689
|
+
throw new BrowserError('No file upload element found on the page');
|
|
690
|
+
}
|
|
691
|
+
await dispatchBrowserEventIfAvailable(browser_session, new UploadFileEvent({
|
|
692
|
+
node,
|
|
693
|
+
file_path: uploadPath,
|
|
694
|
+
}), async () => {
|
|
695
|
+
const locator = await browser_session.get_locate_element(node);
|
|
696
|
+
if (!locator) {
|
|
697
|
+
throw new BrowserError('No file upload element found on the page');
|
|
698
|
+
}
|
|
699
|
+
await locator.setInputFiles(uploadPath);
|
|
700
|
+
return null;
|
|
701
|
+
});
|
|
308
702
|
const msg = `📁 Successfully uploaded file to index ${params.index}`;
|
|
309
703
|
return new ActionResult({
|
|
310
704
|
extracted_content: msg,
|
|
311
705
|
include_in_memory: true,
|
|
312
|
-
long_term_memory: `Uploaded file ${
|
|
706
|
+
long_term_memory: `Uploaded file ${uploadPath} to element ${params.index}`,
|
|
313
707
|
});
|
|
314
708
|
});
|
|
315
709
|
}
|
|
710
|
+
registerClickActions() {
|
|
711
|
+
const clickActionHandler = this.clickActionHandler;
|
|
712
|
+
if (!clickActionHandler) {
|
|
713
|
+
return;
|
|
714
|
+
}
|
|
715
|
+
const removeAction = this.registry?.remove_action;
|
|
716
|
+
if (typeof removeAction === 'function') {
|
|
717
|
+
removeAction.call(this.registry, 'click');
|
|
718
|
+
removeAction.call(this.registry, 'click_element_by_index');
|
|
719
|
+
}
|
|
720
|
+
const registerIndexAlias = () => {
|
|
721
|
+
this.registry.action('Click element by index.', {
|
|
722
|
+
param_model: ClickElementActionIndexOnlySchema,
|
|
723
|
+
action_name: 'click_element_by_index',
|
|
724
|
+
})(async (params, ctx) => {
|
|
725
|
+
return await clickActionHandler(params, ctx);
|
|
726
|
+
});
|
|
727
|
+
};
|
|
728
|
+
if (this.coordinateClickingEnabled) {
|
|
729
|
+
this.registry.action('Click element by index or coordinates. Use coordinates only if the index is not available. Either provide coordinates or index.', {
|
|
730
|
+
param_model: ClickElementActionSchema,
|
|
731
|
+
action_name: 'click',
|
|
732
|
+
})(async (params, ctx) => {
|
|
733
|
+
return await clickActionHandler(params, ctx);
|
|
734
|
+
});
|
|
735
|
+
registerIndexAlias();
|
|
736
|
+
return;
|
|
737
|
+
}
|
|
738
|
+
this.registry.action('Click element by index.', {
|
|
739
|
+
param_model: ClickElementActionIndexOnlySchema,
|
|
740
|
+
action_name: 'click',
|
|
741
|
+
})(async (params, ctx) => {
|
|
742
|
+
return await clickActionHandler(params, ctx);
|
|
743
|
+
});
|
|
744
|
+
registerIndexAlias();
|
|
745
|
+
}
|
|
316
746
|
registerTabActions() {
|
|
317
|
-
|
|
318
|
-
|
|
747
|
+
const tabLogger = this.logger;
|
|
748
|
+
const resolveTabIdentifier = (params) => {
|
|
749
|
+
if (typeof params.tab_id === 'string' && params.tab_id.trim()) {
|
|
750
|
+
return params.tab_id.trim();
|
|
751
|
+
}
|
|
752
|
+
if (typeof params.page_id === 'number' &&
|
|
753
|
+
Number.isFinite(params.page_id)) {
|
|
754
|
+
return params.page_id;
|
|
755
|
+
}
|
|
756
|
+
return -1;
|
|
757
|
+
};
|
|
758
|
+
const formatTabId = (identifier, browser_session) => {
|
|
759
|
+
if (typeof identifier === 'string' && identifier.trim()) {
|
|
760
|
+
return identifier.trim();
|
|
761
|
+
}
|
|
762
|
+
const numericIdentifier = typeof identifier === 'number' && Number.isFinite(identifier)
|
|
763
|
+
? Math.floor(identifier)
|
|
764
|
+
: -1;
|
|
765
|
+
if (numericIdentifier >= 0) {
|
|
766
|
+
const matchedTab = Array.isArray(browser_session?.tabs)
|
|
767
|
+
? browser_session.tabs.find((tab) => tab?.page_id === numericIdentifier)
|
|
768
|
+
: null;
|
|
769
|
+
const matchedTabId = typeof matchedTab?.tab_id === 'string' && matchedTab.tab_id.trim()
|
|
770
|
+
? matchedTab.tab_id.trim()
|
|
771
|
+
: null;
|
|
772
|
+
return (matchedTabId ?? String(numericIdentifier).padStart(4, '0').slice(-4));
|
|
773
|
+
}
|
|
774
|
+
return 'unknown';
|
|
775
|
+
};
|
|
776
|
+
const switchImpl = async function (params, { browser_session, signal, }) {
|
|
319
777
|
if (!browser_session)
|
|
320
778
|
throw new Error('Browser session missing');
|
|
321
779
|
throwIfAborted(signal);
|
|
322
|
-
|
|
323
|
-
const
|
|
780
|
+
const identifier = resolveTabIdentifier(params);
|
|
781
|
+
const tabId = formatTabId(identifier, browser_session);
|
|
324
782
|
try {
|
|
325
|
-
|
|
326
|
-
|
|
783
|
+
const switchTargetId = identifier === -1 ? null : String(identifier).trim();
|
|
784
|
+
await dispatchBrowserEventIfAvailable(browser_session, new SwitchTabEvent({ target_id: switchTargetId }), () => browser_session.switch_to_tab(identifier, { signal }));
|
|
785
|
+
const page = await browser_session.get_current_page();
|
|
786
|
+
try {
|
|
787
|
+
await page?.wait_for_load_state?.('domcontentloaded', {
|
|
788
|
+
timeout: 5000,
|
|
789
|
+
});
|
|
790
|
+
}
|
|
791
|
+
catch {
|
|
792
|
+
/* ignore */
|
|
793
|
+
}
|
|
794
|
+
const memory = `Switched to tab #${tabId}`;
|
|
795
|
+
return new ActionResult({
|
|
796
|
+
extracted_content: memory,
|
|
797
|
+
long_term_memory: memory,
|
|
327
798
|
});
|
|
328
799
|
}
|
|
329
|
-
catch {
|
|
330
|
-
|
|
800
|
+
catch (error) {
|
|
801
|
+
tabLogger.warning(`Tab switch may have failed: ${error.message}`);
|
|
802
|
+
const memory = `Attempted to switch to tab #${tabId}`;
|
|
803
|
+
return new ActionResult({
|
|
804
|
+
extracted_content: memory,
|
|
805
|
+
long_term_memory: memory,
|
|
806
|
+
});
|
|
331
807
|
}
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
});
|
|
808
|
+
};
|
|
809
|
+
this.registry.action('Switch tab', {
|
|
810
|
+
param_model: SwitchTabActionSchema,
|
|
811
|
+
terminates_sequence: true,
|
|
812
|
+
})(async function switch_tab(params, { browser_session, signal }) {
|
|
813
|
+
return switchImpl(params, { browser_session, signal });
|
|
338
814
|
});
|
|
339
|
-
this.registry.action('
|
|
340
|
-
param_model:
|
|
341
|
-
|
|
815
|
+
this.registry.action('Switch tab', {
|
|
816
|
+
param_model: SwitchTabActionSchema,
|
|
817
|
+
terminates_sequence: true,
|
|
818
|
+
action_name: 'switch',
|
|
819
|
+
})(async function switch_alias(params, { browser_session, signal }) {
|
|
820
|
+
return switchImpl(params, { browser_session, signal });
|
|
821
|
+
});
|
|
822
|
+
const closeImpl = async function (params, { browser_session, signal, }) {
|
|
342
823
|
if (!browser_session)
|
|
343
824
|
throw new Error('Browser session missing');
|
|
344
825
|
throwIfAborted(signal);
|
|
345
|
-
|
|
346
|
-
const
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
826
|
+
const identifier = resolveTabIdentifier(params);
|
|
827
|
+
const closedTabId = formatTabId(identifier, browser_session);
|
|
828
|
+
try {
|
|
829
|
+
const resolvedCloseTargetId = identifier === -1
|
|
830
|
+
? (browser_session?.active_tab?.target_id ??
|
|
831
|
+
browser_session?.active_tab?.tab_id ??
|
|
832
|
+
null)
|
|
833
|
+
: String(identifier).trim();
|
|
834
|
+
if (!resolvedCloseTargetId) {
|
|
835
|
+
throw new Error('Could not resolve target tab to close');
|
|
836
|
+
}
|
|
837
|
+
await dispatchBrowserEventIfAvailable(browser_session, new CloseTabEvent({ target_id: resolvedCloseTargetId }), () => browser_session.close_tab(identifier));
|
|
838
|
+
const memory = `Closed tab #${closedTabId}`;
|
|
839
|
+
return new ActionResult({
|
|
840
|
+
extracted_content: memory,
|
|
841
|
+
long_term_memory: memory,
|
|
842
|
+
});
|
|
843
|
+
}
|
|
844
|
+
catch (error) {
|
|
845
|
+
tabLogger.warning(`Tab ${closedTabId} may already be closed: ${error.message}`);
|
|
846
|
+
const memory = `Tab #${closedTabId} closed (was already closed or invalid)`;
|
|
847
|
+
return new ActionResult({
|
|
848
|
+
extracted_content: memory,
|
|
849
|
+
long_term_memory: memory,
|
|
850
|
+
});
|
|
851
|
+
}
|
|
852
|
+
};
|
|
853
|
+
this.registry.action('Close an existing tab', {
|
|
854
|
+
param_model: CloseTabActionSchema,
|
|
855
|
+
})(async function close_tab(params, { browser_session, signal }) {
|
|
856
|
+
return closeImpl(params, { browser_session, signal });
|
|
857
|
+
});
|
|
858
|
+
this.registry.action('Close an existing tab', {
|
|
859
|
+
param_model: CloseTabActionSchema,
|
|
860
|
+
})(async function close(params, { browser_session, signal }) {
|
|
861
|
+
return closeImpl(params, { browser_session, signal });
|
|
357
862
|
});
|
|
358
863
|
}
|
|
359
864
|
registerContentActions() {
|
|
360
|
-
|
|
865
|
+
const registry = this.registry;
|
|
866
|
+
const contentLogger = this.logger;
|
|
867
|
+
const extractStructuredDescription = "LLM extracts structured data from page markdown. Use when: on right page, know what to extract, haven't called before on same page+query. Can't get interactive elements. Set extract_links=True for URLs. Use start_from_char if previous extraction was truncated to extract data further down the page.";
|
|
868
|
+
this.registry.action(extractStructuredDescription, {
|
|
361
869
|
param_model: ExtractStructuredDataActionSchema,
|
|
362
|
-
})(async function extract_structured_data(params, { page, page_extraction_llm, file_system, signal }) {
|
|
870
|
+
})(async function extract_structured_data(params, { page, page_extraction_llm, extraction_schema, file_system, signal }) {
|
|
363
871
|
throwIfAborted(signal);
|
|
364
872
|
if (!page) {
|
|
365
873
|
throw new BrowserError('No active page available for extraction.');
|
|
@@ -368,105 +876,457 @@ export class Controller {
|
|
|
368
876
|
throw new BrowserError('page_extraction_llm is not configured.');
|
|
369
877
|
}
|
|
370
878
|
const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
|
|
371
|
-
const
|
|
372
|
-
|
|
879
|
+
const pageHtml = await runWithTimeoutAndSignal(async () => {
|
|
880
|
+
const value = await page.content?.();
|
|
881
|
+
return typeof value === 'string' ? value : '';
|
|
882
|
+
}, 10000, signal, 'Page content extraction timed out');
|
|
883
|
+
if (!pageHtml) {
|
|
373
884
|
throw new BrowserError('Unable to extract page content.');
|
|
374
885
|
}
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
886
|
+
let combinedHtml = pageHtml;
|
|
887
|
+
const frames = typeof page.frames === 'function'
|
|
888
|
+
? page.frames()
|
|
889
|
+
: Array.isArray(page.frames)
|
|
890
|
+
? page.frames
|
|
891
|
+
: [];
|
|
892
|
+
const currentUrl = (() => {
|
|
893
|
+
const pageUrlValue = page.url;
|
|
894
|
+
if (typeof pageUrlValue === 'function') {
|
|
895
|
+
return String(pageUrlValue.call(page) ?? '');
|
|
896
|
+
}
|
|
897
|
+
return typeof pageUrlValue === 'string' ? pageUrlValue : '';
|
|
898
|
+
})();
|
|
387
899
|
for (const iframe of frames) {
|
|
388
900
|
throwIfAborted(signal);
|
|
389
901
|
try {
|
|
390
|
-
// Wait for iframe to load with aggressive timeout
|
|
391
902
|
await runWithTimeoutAndSignal(async () => {
|
|
392
903
|
await iframe.waitForLoadState?.('load');
|
|
393
|
-
},
|
|
904
|
+
}, 1000, signal, 'Iframe load timeout');
|
|
394
905
|
}
|
|
395
906
|
catch (error) {
|
|
396
907
|
if (isAbortError(error)) {
|
|
397
908
|
throw error;
|
|
398
909
|
}
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
910
|
+
}
|
|
911
|
+
const iframeUrl = typeof iframe.url === 'function'
|
|
912
|
+
? iframe.url()
|
|
913
|
+
: typeof iframe.url === 'string'
|
|
914
|
+
? iframe.url
|
|
915
|
+
: '';
|
|
916
|
+
if (!iframeUrl ||
|
|
917
|
+
iframeUrl === currentUrl ||
|
|
918
|
+
iframeUrl.startsWith('data:') ||
|
|
919
|
+
iframeUrl.startsWith('about:')) {
|
|
920
|
+
continue;
|
|
921
|
+
}
|
|
922
|
+
try {
|
|
923
|
+
const iframeHtml = await runWithTimeoutAndSignal(async () => {
|
|
924
|
+
const value = await iframe.content?.();
|
|
925
|
+
return typeof value === 'string' ? value : '';
|
|
926
|
+
}, 2000, signal, 'Iframe content extraction timeout');
|
|
927
|
+
if (!iframeHtml) {
|
|
928
|
+
continue;
|
|
413
929
|
}
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
930
|
+
combinedHtml += `\n<section><h2>IFRAME ${iframeUrl}</h2>${iframeHtml}</section>`;
|
|
931
|
+
}
|
|
932
|
+
catch (error) {
|
|
933
|
+
if (isAbortError(error)) {
|
|
934
|
+
throw error;
|
|
419
935
|
}
|
|
420
936
|
}
|
|
421
937
|
}
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
938
|
+
const extracted = extractCleanMarkdownFromHtml(combinedHtml, {
|
|
939
|
+
extract_links: params.extract_links,
|
|
940
|
+
method: 'page_content',
|
|
941
|
+
url: currentUrl || undefined,
|
|
942
|
+
});
|
|
943
|
+
let content = extracted.content;
|
|
944
|
+
const contentStats = extracted.stats;
|
|
945
|
+
const finalFilteredLength = contentStats.final_filtered_chars;
|
|
946
|
+
const startFromChar = Math.max(0, params.start_from_char ?? 0);
|
|
947
|
+
const maxChars = 100000;
|
|
948
|
+
const chunks = chunkMarkdownByStructure(content, maxChars, 5, startFromChar);
|
|
949
|
+
if (!chunks.length) {
|
|
950
|
+
return new ActionResult({
|
|
951
|
+
error: `start_from_char (${startFromChar}) exceeds content length ${finalFilteredLength} characters.`,
|
|
952
|
+
});
|
|
953
|
+
}
|
|
954
|
+
const chunk = chunks[0];
|
|
955
|
+
content = chunk.content;
|
|
956
|
+
const wasTruncated = chunk.has_more;
|
|
957
|
+
if (chunk.overlap_prefix) {
|
|
958
|
+
content = `${chunk.overlap_prefix}\n${content}`;
|
|
959
|
+
}
|
|
960
|
+
if (startFromChar > 0) {
|
|
961
|
+
contentStats.started_from_char = startFromChar;
|
|
962
|
+
}
|
|
963
|
+
if (wasTruncated) {
|
|
964
|
+
contentStats.truncated_at_char = chunk.char_offset_end;
|
|
965
|
+
contentStats.next_start_char = chunk.char_offset_end;
|
|
966
|
+
contentStats.chunk_index = chunk.chunk_index;
|
|
967
|
+
contentStats.total_chunks = chunk.total_chunks;
|
|
968
|
+
}
|
|
969
|
+
const originalHtmlLength = contentStats.original_html_chars;
|
|
970
|
+
const initialMarkdownLength = contentStats.initial_markdown_chars;
|
|
971
|
+
const charsFiltered = contentStats.filtered_chars_removed;
|
|
972
|
+
let statsSummary = `Content processed: ${originalHtmlLength.toLocaleString()} HTML chars ` +
|
|
973
|
+
`→ ${initialMarkdownLength.toLocaleString()} initial markdown ` +
|
|
974
|
+
`→ ${finalFilteredLength.toLocaleString()} filtered markdown`;
|
|
975
|
+
if (startFromChar > 0) {
|
|
976
|
+
statsSummary += ` (started from char ${startFromChar.toLocaleString()})`;
|
|
977
|
+
}
|
|
978
|
+
if (wasTruncated &&
|
|
979
|
+
contentStats.next_start_char != null &&
|
|
980
|
+
contentStats.chunk_index != null &&
|
|
981
|
+
contentStats.total_chunks != null) {
|
|
982
|
+
const chunkInfo = `chunk ${contentStats.chunk_index + 1} of ${contentStats.total_chunks}, `;
|
|
983
|
+
statsSummary +=
|
|
984
|
+
` → ${content.length.toLocaleString()} final chars ` +
|
|
985
|
+
`(${chunkInfo}use start_from_char=${contentStats.next_start_char} to continue)`;
|
|
986
|
+
}
|
|
987
|
+
else if (charsFiltered > 0) {
|
|
988
|
+
statsSummary += ` (filtered ${charsFiltered.toLocaleString()} chars of noise)`;
|
|
989
|
+
}
|
|
990
|
+
content = sanitize_surrogates(content);
|
|
991
|
+
const sanitizedQuery = sanitize_surrogates(params.query);
|
|
992
|
+
const parseJsonFromCompletion = (completion) => {
|
|
993
|
+
const trimmed = completion.trim();
|
|
994
|
+
const fencedMatch = trimmed.match(/```(?:json)?\s*([\s\S]*?)```/i);
|
|
995
|
+
const candidate = fencedMatch?.[1]?.trim() || trimmed;
|
|
996
|
+
return JSON.parse(candidate);
|
|
997
|
+
};
|
|
998
|
+
let effectiveOutputSchema = params.output_schema ?? extraction_schema;
|
|
999
|
+
if (effectiveOutputSchema != null) {
|
|
1000
|
+
const unsupportedKeyword = findUnsupportedJsonSchemaKeyword(effectiveOutputSchema);
|
|
1001
|
+
if (unsupportedKeyword) {
|
|
1002
|
+
contentLogger.warning(`Invalid output_schema, falling back to free-text extraction: unsupported keyword '${unsupportedKeyword}'`);
|
|
1003
|
+
effectiveOutputSchema = null;
|
|
1004
|
+
}
|
|
1005
|
+
}
|
|
1006
|
+
const pageUrl = currentUrl || '';
|
|
1007
|
+
const maxMemoryLength = 10000;
|
|
1008
|
+
if (effectiveOutputSchema != null) {
|
|
1009
|
+
const systemPrompt = `
|
|
1010
|
+
You are an expert at extracting structured data from the markdown of a webpage.
|
|
1011
|
+
|
|
1012
|
+
<input>
|
|
1013
|
+
You will be given a query, a JSON Schema, and the markdown of a webpage that has been filtered to remove noise and advertising content.
|
|
1014
|
+
</input>
|
|
1015
|
+
|
|
1016
|
+
<instructions>
|
|
1017
|
+
- Extract ONLY information present in the webpage. Do not guess or fabricate values.
|
|
1018
|
+
- Your response MUST conform to the provided JSON Schema exactly.
|
|
1019
|
+
- If a required field's value cannot be found on the page, use null (if the schema allows it) or an empty string / empty array as appropriate.
|
|
1020
|
+
- If the content was truncated, extract what is available from the visible portion.
|
|
1021
|
+
</instructions>`.trim();
|
|
1022
|
+
const schemaJson = JSON.stringify(effectiveOutputSchema, null, 2);
|
|
1023
|
+
const prompt = `<query>\n${sanitizedQuery}\n</query>\n\n` +
|
|
1024
|
+
`<output_schema>\n${schemaJson}\n</output_schema>\n\n` +
|
|
1025
|
+
`<content_stats>\n${statsSummary}\n</content_stats>\n\n` +
|
|
1026
|
+
`<webpage_content>\n${content}\n</webpage_content>`;
|
|
1027
|
+
const response = await page_extraction_llm.ainvoke([new SystemMessage(systemPrompt), new UserMessage(prompt)], undefined, { signal: signal ?? undefined });
|
|
1028
|
+
throwIfAborted(signal);
|
|
1029
|
+
const completion = response?.completion;
|
|
1030
|
+
const completionText = typeof completion === 'string'
|
|
1031
|
+
? completion
|
|
1032
|
+
: JSON.stringify(completion ?? {});
|
|
1033
|
+
let parsedResult;
|
|
1034
|
+
try {
|
|
1035
|
+
parsedResult = parseJsonFromCompletion(completionText);
|
|
1036
|
+
}
|
|
1037
|
+
catch (error) {
|
|
1038
|
+
throw new BrowserError(`Structured extraction returned invalid JSON: ${error.message}`);
|
|
1039
|
+
}
|
|
1040
|
+
const schemaValidation = validateJsonSchema(parsedResult, effectiveOutputSchema);
|
|
1041
|
+
if (!schemaValidation.valid) {
|
|
1042
|
+
const details = (schemaValidation.errors ?? [])
|
|
1043
|
+
.slice(0, 3)
|
|
1044
|
+
.map((item) => String(item?.error ?? '').trim())
|
|
1045
|
+
.filter(Boolean)
|
|
1046
|
+
.join('; ');
|
|
1047
|
+
const suffix = details ? `: ${details}` : '';
|
|
1048
|
+
throw new BrowserError(`Structured extraction result does not match output_schema${suffix}`);
|
|
1049
|
+
}
|
|
1050
|
+
const normalizedResult = normalizeStructuredDataBySchema(parsedResult, effectiveOutputSchema);
|
|
1051
|
+
const resultJson = JSON.stringify(normalizedResult);
|
|
1052
|
+
const extractedContent = `<url>\n${pageUrl}\n</url>\n` +
|
|
1053
|
+
`<query>\n${sanitizedQuery}\n</query>\n` +
|
|
1054
|
+
`<structured_result>\n${resultJson}\n</structured_result>`;
|
|
1055
|
+
const extractionMeta = {
|
|
1056
|
+
data: normalizedResult,
|
|
1057
|
+
schema_used: effectiveOutputSchema,
|
|
1058
|
+
is_partial: wasTruncated,
|
|
1059
|
+
source_url: pageUrl,
|
|
1060
|
+
content_stats: contentStats,
|
|
1061
|
+
};
|
|
1062
|
+
const includeOnce = extractedContent.length >= maxMemoryLength;
|
|
1063
|
+
const memory = includeOnce
|
|
1064
|
+
? `Query: ${sanitizedQuery}\nContent in ${await fsInstance.save_extracted_content(extractedContent)} and once in <read_state>.`
|
|
1065
|
+
: extractedContent;
|
|
1066
|
+
return new ActionResult({
|
|
1067
|
+
extracted_content: extractedContent,
|
|
1068
|
+
include_extracted_content_only_once: includeOnce,
|
|
1069
|
+
long_term_memory: memory,
|
|
1070
|
+
metadata: {
|
|
1071
|
+
structured_extraction: true,
|
|
1072
|
+
extraction_result: extractionMeta,
|
|
1073
|
+
},
|
|
1074
|
+
});
|
|
429
1075
|
}
|
|
430
|
-
const
|
|
431
|
-
|
|
432
|
-
2. Does not make sense for the page
|
|
433
|
-
3. Some/all of the information is not available
|
|
1076
|
+
const systemPrompt = `
|
|
1077
|
+
You are an expert at extracting data from the markdown of a webpage.
|
|
434
1078
|
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
1079
|
+
<input>
|
|
1080
|
+
You will be given a query and the markdown of a webpage that has been filtered to remove noise and advertising content.
|
|
1081
|
+
</input>
|
|
1082
|
+
|
|
1083
|
+
<instructions>
|
|
1084
|
+
- You are tasked to extract information from the webpage that is relevant to the query.
|
|
1085
|
+
- You should ONLY use the information available in the webpage to answer the query. Do not make up information or provide guess from your own knowledge.
|
|
1086
|
+
- If the information relevant to the query is not available in the page, your response should mention that.
|
|
1087
|
+
- If the query asks for all items, products, etc., make sure to directly list all of them.
|
|
1088
|
+
- If the content was truncated and you need more information, note that the user can use start_from_char parameter to continue from where truncation occurred.
|
|
1089
|
+
</instructions>
|
|
1090
|
+
|
|
1091
|
+
<output>
|
|
1092
|
+
- Your output should present ALL the information relevant to the query in a concise way.
|
|
1093
|
+
- Do not answer in conversational format - directly output the relevant information or that the information is unavailable.
|
|
1094
|
+
</output>`.trim();
|
|
1095
|
+
const prompt = `<query>\n${sanitizedQuery}\n</query>\n\n` +
|
|
1096
|
+
`<content_stats>\n${statsSummary}\n</content_stats>\n\n` +
|
|
1097
|
+
`<webpage_content>\n${content}\n</webpage_content>`;
|
|
1098
|
+
const response = await page_extraction_llm.ainvoke([new SystemMessage(systemPrompt), new UserMessage(prompt)], undefined, { signal: signal ?? undefined });
|
|
440
1099
|
throwIfAborted(signal);
|
|
441
|
-
const completion =
|
|
442
|
-
const
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
const
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
1100
|
+
const completion = response?.completion;
|
|
1101
|
+
const completionText = typeof completion === 'string'
|
|
1102
|
+
? completion
|
|
1103
|
+
: JSON.stringify(completion ?? {});
|
|
1104
|
+
const extractedContent = `<url>\n${pageUrl}\n</url>\n` +
|
|
1105
|
+
`<query>\n${sanitizedQuery}\n</query>\n` +
|
|
1106
|
+
`<result>\n${completionText}\n</result>`;
|
|
1107
|
+
const includeOnce = extractedContent.length >= maxMemoryLength;
|
|
1108
|
+
const memory = includeOnce
|
|
1109
|
+
? `Query: ${sanitizedQuery}\nContent in ${await fsInstance.save_extracted_content(extractedContent)} and once in <read_state>.`
|
|
1110
|
+
: extractedContent;
|
|
1111
|
+
return new ActionResult({
|
|
1112
|
+
extracted_content: extractedContent,
|
|
1113
|
+
include_extracted_content_only_once: includeOnce,
|
|
1114
|
+
long_term_memory: memory,
|
|
1115
|
+
});
|
|
1116
|
+
});
|
|
1117
|
+
this.registry.action(extractStructuredDescription, {
|
|
1118
|
+
param_model: ExtractStructuredDataActionSchema,
|
|
1119
|
+
action_name: 'extract',
|
|
1120
|
+
})(async function extract(params, { browser_session, page_extraction_llm, extraction_schema, file_system, available_file_paths, sensitive_data, signal, }) {
|
|
1121
|
+
return registry.execute_action('extract_structured_data', params, {
|
|
1122
|
+
browser_session,
|
|
1123
|
+
page_extraction_llm,
|
|
1124
|
+
extraction_schema,
|
|
1125
|
+
file_system,
|
|
1126
|
+
available_file_paths,
|
|
1127
|
+
sensitive_data,
|
|
1128
|
+
signal,
|
|
1129
|
+
});
|
|
1130
|
+
});
|
|
1131
|
+
}
|
|
1132
|
+
registerExplorationActions() {
|
|
1133
|
+
this.registry.action('Search page text for a pattern (like grep). Zero LLM cost and instant.', { param_model: SearchPageActionSchema })(async function search_page(params, { browser_session, signal }) {
|
|
1134
|
+
if (!browser_session)
|
|
1135
|
+
throw new Error('Browser session missing');
|
|
1136
|
+
throwIfAborted(signal);
|
|
1137
|
+
const page = await browser_session.get_current_page();
|
|
1138
|
+
if (!page?.evaluate) {
|
|
1139
|
+
throw new BrowserError('No active page for search_page.');
|
|
1140
|
+
}
|
|
1141
|
+
const searchResult = (await page.evaluate(({ pattern, regex, caseSensitive, contextChars, cssScope, maxResults, }) => {
|
|
1142
|
+
const sourceNode = cssScope
|
|
1143
|
+
? document.querySelector(cssScope)
|
|
1144
|
+
: document.body;
|
|
1145
|
+
if (!sourceNode) {
|
|
1146
|
+
return {
|
|
1147
|
+
error: `CSS scope not found: ${cssScope}`,
|
|
1148
|
+
matches: [],
|
|
1149
|
+
total: 0,
|
|
1150
|
+
};
|
|
1151
|
+
}
|
|
1152
|
+
const sourceText = sourceNode.innerText ||
|
|
1153
|
+
sourceNode.textContent ||
|
|
1154
|
+
'';
|
|
1155
|
+
if (!sourceText.trim()) {
|
|
1156
|
+
return {
|
|
1157
|
+
matches: [],
|
|
1158
|
+
total: 0,
|
|
1159
|
+
};
|
|
1160
|
+
}
|
|
1161
|
+
const safePattern = regex
|
|
1162
|
+
? pattern
|
|
1163
|
+
: pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
1164
|
+
const flags = caseSensitive ? 'g' : 'gi';
|
|
1165
|
+
let matcher;
|
|
1166
|
+
try {
|
|
1167
|
+
matcher = new RegExp(safePattern, flags);
|
|
455
1168
|
}
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
1169
|
+
catch (error) {
|
|
1170
|
+
return {
|
|
1171
|
+
error: `Invalid regex pattern: ${String(error)}`,
|
|
1172
|
+
matches: [],
|
|
1173
|
+
total: 0,
|
|
1174
|
+
};
|
|
1175
|
+
}
|
|
1176
|
+
const matches = [];
|
|
1177
|
+
let foundTotal = 0;
|
|
1178
|
+
let m;
|
|
1179
|
+
while ((m = matcher.exec(sourceText)) !== null) {
|
|
1180
|
+
foundTotal += 1;
|
|
1181
|
+
if (matches.length < Math.max(1, maxResults)) {
|
|
1182
|
+
const start = Math.max(0, m.index - Math.max(0, contextChars));
|
|
1183
|
+
const end = Math.min(sourceText.length, m.index + m[0].length + Math.max(0, contextChars));
|
|
1184
|
+
matches.push({
|
|
1185
|
+
position: m.index,
|
|
1186
|
+
match: m[0],
|
|
1187
|
+
snippet: sourceText.slice(start, end),
|
|
1188
|
+
});
|
|
1189
|
+
}
|
|
1190
|
+
if (m[0].length === 0) {
|
|
1191
|
+
matcher.lastIndex += 1;
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
return {
|
|
1195
|
+
matches,
|
|
1196
|
+
total: foundTotal,
|
|
1197
|
+
truncated: foundTotal > matches.length,
|
|
1198
|
+
};
|
|
1199
|
+
}, {
|
|
1200
|
+
pattern: params.pattern,
|
|
1201
|
+
regex: params.regex,
|
|
1202
|
+
caseSensitive: params.case_sensitive,
|
|
1203
|
+
contextChars: params.context_chars,
|
|
1204
|
+
cssScope: params.css_scope ?? null,
|
|
1205
|
+
maxResults: params.max_results,
|
|
1206
|
+
}));
|
|
1207
|
+
if (!searchResult) {
|
|
1208
|
+
return new ActionResult({ error: 'search_page returned no result' });
|
|
1209
|
+
}
|
|
1210
|
+
if (searchResult.error) {
|
|
1211
|
+
return new ActionResult({
|
|
1212
|
+
error: `search_page: ${searchResult.error}`,
|
|
1213
|
+
});
|
|
1214
|
+
}
|
|
1215
|
+
const total = searchResult.total ?? 0;
|
|
1216
|
+
const matches = searchResult.matches ?? [];
|
|
1217
|
+
if (total === 0 || !matches.length) {
|
|
1218
|
+
const noMatchMessage = `No matches found for "${params.pattern}".`;
|
|
1219
|
+
return new ActionResult({
|
|
1220
|
+
extracted_content: noMatchMessage,
|
|
1221
|
+
long_term_memory: `Searched page for "${params.pattern}": 0 matches found.`,
|
|
1222
|
+
});
|
|
1223
|
+
}
|
|
1224
|
+
const lines = [
|
|
1225
|
+
`Found ${total} matches for "${params.pattern}" in page text:`,
|
|
1226
|
+
];
|
|
1227
|
+
for (let i = 0; i < matches.length; i += 1) {
|
|
1228
|
+
const match = matches[i];
|
|
1229
|
+
const compactSnippet = match.snippet.replace(/\s+/g, ' ').trim();
|
|
1230
|
+
lines.push(`${i + 1}. [pos ${match.position}] "${match.match}" -> ${compactSnippet}`);
|
|
1231
|
+
}
|
|
1232
|
+
if (searchResult.truncated) {
|
|
1233
|
+
lines.push(`... showing first ${matches.length} matches (increase max_results to see more).`);
|
|
1234
|
+
}
|
|
1235
|
+
const memory = `Searched page for "${params.pattern}": ${total} match${total === 1 ? '' : 'es'} found.`;
|
|
1236
|
+
return new ActionResult({
|
|
1237
|
+
extracted_content: lines.join('\n'),
|
|
1238
|
+
long_term_memory: memory,
|
|
1239
|
+
});
|
|
1240
|
+
});
|
|
1241
|
+
this.registry.action('Query DOM elements by CSS selector (like find). Zero LLM cost and instant.', { param_model: FindElementsActionSchema })(async function find_elements(params, { browser_session, signal }) {
|
|
1242
|
+
if (!browser_session)
|
|
1243
|
+
throw new Error('Browser session missing');
|
|
1244
|
+
throwIfAborted(signal);
|
|
1245
|
+
const page = await browser_session.get_current_page();
|
|
1246
|
+
if (!page?.evaluate) {
|
|
1247
|
+
throw new BrowserError('No active page for find_elements.');
|
|
1248
|
+
}
|
|
1249
|
+
const result = (await page.evaluate(({ selector, attributes, maxResults, includeText, }) => {
|
|
1250
|
+
let elements;
|
|
1251
|
+
try {
|
|
1252
|
+
elements = Array.from(document.querySelectorAll(selector));
|
|
1253
|
+
}
|
|
1254
|
+
catch (error) {
|
|
1255
|
+
return {
|
|
1256
|
+
error: `Invalid selector: ${String(error)}`,
|
|
1257
|
+
elements: [],
|
|
1258
|
+
total: 0,
|
|
1259
|
+
};
|
|
1260
|
+
}
|
|
1261
|
+
const selected = elements.slice(0, Math.max(1, maxResults));
|
|
1262
|
+
const payload = selected.map((el, idx) => {
|
|
1263
|
+
const attrs = {};
|
|
1264
|
+
if (attributes?.length) {
|
|
1265
|
+
for (const attr of attributes) {
|
|
1266
|
+
const value = el.getAttribute(attr);
|
|
1267
|
+
if (value != null) {
|
|
1268
|
+
attrs[attr] = value;
|
|
1269
|
+
}
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
return {
|
|
1273
|
+
index: idx + 1,
|
|
1274
|
+
tag: el.tagName.toLowerCase(),
|
|
1275
|
+
text: includeText
|
|
1276
|
+
? (el.textContent || '').replace(/\s+/g, ' ').trim()
|
|
1277
|
+
: '',
|
|
1278
|
+
attributes: attrs,
|
|
1279
|
+
};
|
|
1280
|
+
});
|
|
1281
|
+
return {
|
|
1282
|
+
elements: payload,
|
|
1283
|
+
total: elements.length,
|
|
1284
|
+
truncated: elements.length > selected.length,
|
|
1285
|
+
};
|
|
1286
|
+
}, {
|
|
1287
|
+
selector: params.selector,
|
|
1288
|
+
attributes: params.attributes ?? null,
|
|
1289
|
+
maxResults: params.max_results,
|
|
1290
|
+
includeText: params.include_text,
|
|
1291
|
+
}));
|
|
1292
|
+
if (!result) {
|
|
1293
|
+
return new ActionResult({ error: 'find_elements returned no result' });
|
|
1294
|
+
}
|
|
1295
|
+
if (result.error) {
|
|
1296
|
+
return new ActionResult({ error: `find_elements: ${result.error}` });
|
|
1297
|
+
}
|
|
1298
|
+
const elements = result.elements ?? [];
|
|
1299
|
+
const total = result.total ?? 0;
|
|
1300
|
+
if (!elements.length) {
|
|
1301
|
+
const msg = `No elements found for selector "${params.selector}".`;
|
|
1302
|
+
return new ActionResult({
|
|
1303
|
+
extracted_content: msg,
|
|
1304
|
+
long_term_memory: msg,
|
|
1305
|
+
});
|
|
1306
|
+
}
|
|
1307
|
+
const lines = [
|
|
1308
|
+
`Found ${total} element${total === 1 ? '' : 's'} for selector "${params.selector}":`,
|
|
1309
|
+
];
|
|
1310
|
+
for (const el of elements) {
|
|
1311
|
+
const attrs = Object.entries(el.attributes || {})
|
|
1312
|
+
.map(([k, v]) => `${k}=${JSON.stringify(v)}`)
|
|
1313
|
+
.join(' ');
|
|
1314
|
+
const text = params.include_text && el.text
|
|
1315
|
+
? ` text=${JSON.stringify(el.text)}`
|
|
1316
|
+
: '';
|
|
1317
|
+
lines.push(`${el.index}. <${el.tag}>${text}${attrs ? ` ${attrs}` : ''}`.trim());
|
|
1318
|
+
}
|
|
1319
|
+
if (result.truncated) {
|
|
1320
|
+
lines.push(`... showing first ${elements.length} elements (increase max_results to see more).`);
|
|
461
1321
|
}
|
|
462
1322
|
return new ActionResult({
|
|
463
|
-
extracted_content,
|
|
464
|
-
|
|
465
|
-
long_term_memory: memory,
|
|
1323
|
+
extracted_content: lines.join('\n'),
|
|
1324
|
+
long_term_memory: `Queried selector "${params.selector}" and found ${total} element${total === 1 ? '' : 's'}.`,
|
|
466
1325
|
});
|
|
467
1326
|
});
|
|
468
1327
|
}
|
|
469
1328
|
registerScrollActions() {
|
|
1329
|
+
const registry = this.registry;
|
|
470
1330
|
const scrollLogger = this.logger; // Capture logger reference for use in named function
|
|
471
1331
|
// Define the scroll handler implementation (shared by multiple action names for LLM compatibility)
|
|
472
1332
|
const scrollImpl = async (params, { browser_session, signal }) => {
|
|
@@ -487,7 +1347,9 @@ ${content}`;
|
|
|
487
1347
|
}
|
|
488
1348
|
catch (error) {
|
|
489
1349
|
if (i === retries - 1) {
|
|
490
|
-
throw new Error(`Scroll failed due to an error: ${error}
|
|
1350
|
+
throw new Error(`Scroll failed due to an error: ${error}`, {
|
|
1351
|
+
cause: error,
|
|
1352
|
+
});
|
|
491
1353
|
}
|
|
492
1354
|
await waitWithSignal(1000, signal);
|
|
493
1355
|
}
|
|
@@ -495,8 +1357,8 @@ ${content}`;
|
|
|
495
1357
|
return 0;
|
|
496
1358
|
};
|
|
497
1359
|
const windowHeight = await getWindowHeight();
|
|
498
|
-
const
|
|
499
|
-
const
|
|
1360
|
+
const pagesScrolled = params.pages ?? params.num_pages ?? 1;
|
|
1361
|
+
const scrollAmount = Math.floor(windowHeight * pagesScrolled);
|
|
500
1362
|
const dy = params.down ? scrollAmount : -scrollAmount;
|
|
501
1363
|
const direction = params.down ? 'down' : 'up';
|
|
502
1364
|
let scrollTarget = 'the page';
|
|
@@ -505,7 +1367,9 @@ ${content}`;
|
|
|
505
1367
|
try {
|
|
506
1368
|
const elementNode = await browser_session.get_dom_element_by_index(params.index, { signal });
|
|
507
1369
|
if (!elementNode) {
|
|
508
|
-
|
|
1370
|
+
return new ActionResult({
|
|
1371
|
+
error: `Element index ${params.index} not found in browser state`,
|
|
1372
|
+
});
|
|
509
1373
|
}
|
|
510
1374
|
// Try direct container scrolling (no events that might close dropdowns)
|
|
511
1375
|
const containerScrollJs = `
|
|
@@ -518,8 +1382,6 @@ ${content}`;
|
|
|
518
1382
|
return { success: false, reason: 'Element not found by XPath' };
|
|
519
1383
|
}
|
|
520
1384
|
|
|
521
|
-
console.log('[SCROLL DEBUG] Starting direct container scroll for element:', targetElement.tagName);
|
|
522
|
-
|
|
523
1385
|
// Try to find scrollable containers in the hierarchy (starting from element itself)
|
|
524
1386
|
let currentElement = targetElement;
|
|
525
1387
|
let scrollSuccess = false;
|
|
@@ -533,12 +1395,6 @@ ${content}`;
|
|
|
533
1395
|
const hasScrollableY = /(auto|scroll|overlay)/.test(computedStyle.overflowY);
|
|
534
1396
|
const canScrollVertically = currentElement.scrollHeight > currentElement.clientHeight;
|
|
535
1397
|
|
|
536
|
-
console.log('[SCROLL DEBUG] Checking element:', currentElement.tagName,
|
|
537
|
-
'hasScrollableY:', hasScrollableY,
|
|
538
|
-
'canScrollVertically:', canScrollVertically,
|
|
539
|
-
'scrollHeight:', currentElement.scrollHeight,
|
|
540
|
-
'clientHeight:', currentElement.clientHeight);
|
|
541
|
-
|
|
542
1398
|
if (hasScrollableY && canScrollVertically) {
|
|
543
1399
|
const beforeScroll = currentElement.scrollTop;
|
|
544
1400
|
const maxScroll = currentElement.scrollHeight - currentElement.clientHeight;
|
|
@@ -559,14 +1415,10 @@ ${content}`;
|
|
|
559
1415
|
const afterScroll = currentElement.scrollTop;
|
|
560
1416
|
const actualScrollDelta = afterScroll - beforeScroll;
|
|
561
1417
|
|
|
562
|
-
console.log('[SCROLL DEBUG] Scroll attempt:', currentElement.tagName,
|
|
563
|
-
'before:', beforeScroll, 'after:', afterScroll, 'delta:', actualScrollDelta);
|
|
564
|
-
|
|
565
1418
|
if (Math.abs(actualScrollDelta) > 0.5) {
|
|
566
1419
|
scrollSuccess = true;
|
|
567
1420
|
scrolledElement = currentElement;
|
|
568
1421
|
scrollDelta = actualScrollDelta;
|
|
569
|
-
console.log('[SCROLL DEBUG] Successfully scrolled container:', currentElement.tagName, 'delta:', actualScrollDelta);
|
|
570
1422
|
break;
|
|
571
1423
|
}
|
|
572
1424
|
}
|
|
@@ -592,7 +1444,6 @@ ${content}`;
|
|
|
592
1444
|
};
|
|
593
1445
|
} else {
|
|
594
1446
|
// No container found or could scroll
|
|
595
|
-
console.log('[SCROLL DEBUG] No scrollable container found for element');
|
|
596
1447
|
return {
|
|
597
1448
|
success: false,
|
|
598
1449
|
reason: 'No scrollable container found',
|
|
@@ -639,7 +1490,10 @@ ${content}`;
|
|
|
639
1490
|
scrollTarget.includes('mouse wheel failed')) {
|
|
640
1491
|
scrollLogger.debug(`🔄 Performing page-level scrolling. Reason: ${scrollTarget}`);
|
|
641
1492
|
try {
|
|
642
|
-
await browser_session
|
|
1493
|
+
await dispatchBrowserEventIfAvailable(browser_session, new ScrollEvent({
|
|
1494
|
+
direction,
|
|
1495
|
+
amount: Math.abs(dy),
|
|
1496
|
+
}), () => browser_session._scrollContainer(dy));
|
|
643
1497
|
}
|
|
644
1498
|
catch (error) {
|
|
645
1499
|
// Hard fallback: always works on root scroller
|
|
@@ -709,27 +1563,33 @@ ${content}`;
|
|
|
709
1563
|
})(async function scroll_to_text(params, { browser_session }) {
|
|
710
1564
|
if (!browser_session)
|
|
711
1565
|
throw new Error('Browser session missing');
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
}
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
el
|
|
725
|
-
|
|
1566
|
+
await dispatchBrowserEventIfAvailable(browser_session, new ScrollToTextEvent({
|
|
1567
|
+
text: params.text,
|
|
1568
|
+
direction: 'down',
|
|
1569
|
+
}), async () => {
|
|
1570
|
+
const page = await browser_session.get_current_page();
|
|
1571
|
+
if (!page?.evaluate) {
|
|
1572
|
+
throw new BrowserError('Unable to access page for scrolling.');
|
|
1573
|
+
}
|
|
1574
|
+
const success = await page.evaluate(({ text }) => {
|
|
1575
|
+
const iterator = document.createNodeIterator(document.body, NodeFilter.SHOW_ELEMENT);
|
|
1576
|
+
let node;
|
|
1577
|
+
while ((node = iterator.nextNode())) {
|
|
1578
|
+
const el = node;
|
|
1579
|
+
if (!el || !el.textContent)
|
|
1580
|
+
continue;
|
|
1581
|
+
if (el.textContent.toLowerCase().includes(text.toLowerCase())) {
|
|
1582
|
+
el.scrollIntoView({ behavior: 'smooth', block: 'center' });
|
|
1583
|
+
return true;
|
|
1584
|
+
}
|
|
726
1585
|
}
|
|
1586
|
+
return false;
|
|
1587
|
+
}, { text: params.text });
|
|
1588
|
+
if (!success) {
|
|
1589
|
+
throw new BrowserError(`Text '${params.text}' not found on page`);
|
|
727
1590
|
}
|
|
728
|
-
return
|
|
729
|
-
}
|
|
730
|
-
if (!success) {
|
|
731
|
-
throw new BrowserError(`Text '${params.text}' not found on page`);
|
|
732
|
-
}
|
|
1591
|
+
return null;
|
|
1592
|
+
});
|
|
733
1593
|
const msg = `🔍 Scrolled to text: ${params.text}`;
|
|
734
1594
|
return new ActionResult({
|
|
735
1595
|
extracted_content: msg,
|
|
@@ -737,18 +1597,49 @@ ${content}`;
|
|
|
737
1597
|
long_term_memory: msg,
|
|
738
1598
|
});
|
|
739
1599
|
});
|
|
1600
|
+
this.registry.action('Scroll to text.', {
|
|
1601
|
+
param_model: ScrollToTextActionSchema,
|
|
1602
|
+
action_name: 'find_text',
|
|
1603
|
+
})(async function find_text(params, ctx) {
|
|
1604
|
+
try {
|
|
1605
|
+
return await registry.execute_action('scroll_to_text', params, ctx);
|
|
1606
|
+
}
|
|
1607
|
+
catch (error) {
|
|
1608
|
+
if (isAbortError(error)) {
|
|
1609
|
+
throw error;
|
|
1610
|
+
}
|
|
1611
|
+
const msg = `Text '${params.text}' not found or not visible on page`;
|
|
1612
|
+
return new ActionResult({
|
|
1613
|
+
extracted_content: msg,
|
|
1614
|
+
long_term_memory: `Tried scrolling to text '${params.text}' but it was not found`,
|
|
1615
|
+
});
|
|
1616
|
+
}
|
|
1617
|
+
});
|
|
740
1618
|
}
|
|
741
1619
|
registerFileSystemActions() {
|
|
742
|
-
this.registry
|
|
1620
|
+
const registry = this.registry;
|
|
1621
|
+
this.registry.action('Read the complete content of a file. Use this to view file contents before editing or to retrieve data from files. Supports text files (txt, md, json, csv, jsonl), documents (pdf, docx), and images (jpg, png).', {
|
|
743
1622
|
param_model: ReadFileActionSchema,
|
|
744
1623
|
})(async function read_file(params, { file_system, available_file_paths }) {
|
|
745
1624
|
const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
|
|
746
1625
|
const allowed = Array.isArray(available_file_paths) &&
|
|
747
1626
|
available_file_paths.includes(params.file_name);
|
|
748
|
-
const
|
|
1627
|
+
const structuredResult = typeof fsInstance.read_file_structured === 'function'
|
|
1628
|
+
? await fsInstance.read_file_structured(params.file_name, allowed)
|
|
1629
|
+
: {
|
|
1630
|
+
message: await fsInstance.read_file(params.file_name, allowed),
|
|
1631
|
+
images: null,
|
|
1632
|
+
};
|
|
1633
|
+
const result = String(structuredResult?.message ?? '');
|
|
1634
|
+
const images = Array.isArray(structuredResult?.images)
|
|
1635
|
+
? structuredResult.images
|
|
1636
|
+
: null;
|
|
749
1637
|
const MAX_MEMORY_SIZE = 1000;
|
|
750
1638
|
let memory = result;
|
|
751
|
-
if (
|
|
1639
|
+
if (images && images.length > 0) {
|
|
1640
|
+
memory = `Read image file ${params.file_name}`;
|
|
1641
|
+
}
|
|
1642
|
+
else if (result.length > MAX_MEMORY_SIZE) {
|
|
752
1643
|
const lines = result.split('\n');
|
|
753
1644
|
let preview = '';
|
|
754
1645
|
let used = 0;
|
|
@@ -764,12 +1655,373 @@ ${content}`;
|
|
|
764
1655
|
}
|
|
765
1656
|
return new ActionResult({
|
|
766
1657
|
extracted_content: result,
|
|
767
|
-
include_in_memory: true,
|
|
768
1658
|
long_term_memory: memory,
|
|
1659
|
+
images,
|
|
769
1660
|
include_extracted_content_only_once: true,
|
|
770
1661
|
});
|
|
771
1662
|
});
|
|
772
|
-
this.registry.action('
|
|
1663
|
+
this.registry.action('Intelligently read long content to find specific information. Works on current page (source="page") or files. For large content, uses search to identify relevant sections. Best for long articles, documents, or any content where you know what you are looking for.', { param_model: ReadLongContentActionSchema })(async function read_long_content(params, { browser_session, page_extraction_llm, available_file_paths, signal }) {
|
|
1664
|
+
throwIfAborted(signal);
|
|
1665
|
+
const goal = params.goal.trim();
|
|
1666
|
+
const source = (params.source || 'page').trim();
|
|
1667
|
+
const context = (params.context || '').trim();
|
|
1668
|
+
const maxChars = 50000;
|
|
1669
|
+
const chunkSize = 2000;
|
|
1670
|
+
const fallbackSearchTerms = (() => {
|
|
1671
|
+
const tokens = `${goal} ${context}`
|
|
1672
|
+
.toLowerCase()
|
|
1673
|
+
.match(/[a-z0-9][a-z0-9-]{2,}/g);
|
|
1674
|
+
if (!tokens?.length) {
|
|
1675
|
+
return goal ? [goal] : ['content'];
|
|
1676
|
+
}
|
|
1677
|
+
return Array.from(new Set(tokens)).slice(0, 5);
|
|
1678
|
+
})();
|
|
1679
|
+
const extractSearchTerms = async () => {
|
|
1680
|
+
const extractionLlm = page_extraction_llm;
|
|
1681
|
+
if (!extractionLlm || typeof extractionLlm.ainvoke !== 'function') {
|
|
1682
|
+
return fallbackSearchTerms;
|
|
1683
|
+
}
|
|
1684
|
+
const prompt = `Extract 3-5 key search terms from this goal that would help find relevant sections.
|
|
1685
|
+
Return only the terms, one per line, no numbering or bullets.
|
|
1686
|
+
|
|
1687
|
+
Goal: ${goal}
|
|
1688
|
+
|
|
1689
|
+
Context: ${context}`;
|
|
1690
|
+
try {
|
|
1691
|
+
const response = await runWithTimeoutAndSignal(async () => (await extractionLlm.ainvoke([new UserMessage(prompt)], undefined, { signal: signal ?? undefined })), 12000, signal, 'Timed out extracting search terms');
|
|
1692
|
+
const parsed = (response?.completion ?? '')
|
|
1693
|
+
.split('\n')
|
|
1694
|
+
.map((line) => line
|
|
1695
|
+
.trim()
|
|
1696
|
+
.replace(/^[-\d.)\s]+/, '')
|
|
1697
|
+
.trim())
|
|
1698
|
+
.filter(Boolean);
|
|
1699
|
+
const unique = Array.from(new Set(parsed)).slice(0, 5);
|
|
1700
|
+
return unique.length ? unique : fallbackSearchTerms;
|
|
1701
|
+
}
|
|
1702
|
+
catch (error) {
|
|
1703
|
+
if (isAbortError(error)) {
|
|
1704
|
+
throw error;
|
|
1705
|
+
}
|
|
1706
|
+
return fallbackSearchTerms;
|
|
1707
|
+
}
|
|
1708
|
+
};
|
|
1709
|
+
const escapeRegExp = (value) => value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
1710
|
+
const searchText = (value, pattern, contextChars = 100) => {
|
|
1711
|
+
let regex;
|
|
1712
|
+
try {
|
|
1713
|
+
regex = new RegExp(pattern, 'gi');
|
|
1714
|
+
}
|
|
1715
|
+
catch {
|
|
1716
|
+
regex = new RegExp(escapeRegExp(pattern), 'gi');
|
|
1717
|
+
}
|
|
1718
|
+
const matches = [];
|
|
1719
|
+
let match = regex.exec(value);
|
|
1720
|
+
while (match != null) {
|
|
1721
|
+
const start = Math.max(0, match.index - contextChars);
|
|
1722
|
+
const end = Math.min(value.length, match.index + match[0].length + contextChars);
|
|
1723
|
+
matches.push({
|
|
1724
|
+
position: match.index,
|
|
1725
|
+
snippet: value.slice(start, end),
|
|
1726
|
+
});
|
|
1727
|
+
if (!regex.global) {
|
|
1728
|
+
break;
|
|
1729
|
+
}
|
|
1730
|
+
match = regex.exec(value);
|
|
1731
|
+
}
|
|
1732
|
+
return matches;
|
|
1733
|
+
};
|
|
1734
|
+
const chunkContent = (value, size = chunkSize) => {
|
|
1735
|
+
const chunks = [];
|
|
1736
|
+
for (let start = 0; start < value.length; start += size) {
|
|
1737
|
+
chunks.push({
|
|
1738
|
+
start,
|
|
1739
|
+
end: Math.min(start + size, value.length),
|
|
1740
|
+
text: value.slice(start, start + size),
|
|
1741
|
+
});
|
|
1742
|
+
}
|
|
1743
|
+
return chunks;
|
|
1744
|
+
};
|
|
1745
|
+
const getCurrentPageUrl = (page) => {
|
|
1746
|
+
const value = page?.url;
|
|
1747
|
+
if (typeof value === 'function') {
|
|
1748
|
+
return String(value.call(page) ?? '');
|
|
1749
|
+
}
|
|
1750
|
+
return typeof value === 'string' ? value : '';
|
|
1751
|
+
};
|
|
1752
|
+
const readPdfByPage = async (filePath) => {
|
|
1753
|
+
const buffer = await fsp.readFile(filePath);
|
|
1754
|
+
try {
|
|
1755
|
+
const pdfParseModule = (await import('pdf-parse'));
|
|
1756
|
+
if (typeof pdfParseModule.PDFParse === 'function') {
|
|
1757
|
+
const Parser = pdfParseModule.PDFParse;
|
|
1758
|
+
const parser = new Parser({ data: buffer });
|
|
1759
|
+
try {
|
|
1760
|
+
let numPages = 0;
|
|
1761
|
+
try {
|
|
1762
|
+
const info = await parser.getInfo?.({ parsePageInfo: false });
|
|
1763
|
+
numPages = Number(info?.total ?? 0);
|
|
1764
|
+
}
|
|
1765
|
+
catch {
|
|
1766
|
+
numPages = 0;
|
|
1767
|
+
}
|
|
1768
|
+
if (!Number.isFinite(numPages) || numPages <= 0) {
|
|
1769
|
+
const full = await parser.getText();
|
|
1770
|
+
const text = typeof full?.text === 'string' ? full.text : '';
|
|
1771
|
+
return {
|
|
1772
|
+
numPages: 1,
|
|
1773
|
+
pageTexts: [text],
|
|
1774
|
+
totalChars: text.length,
|
|
1775
|
+
};
|
|
1776
|
+
}
|
|
1777
|
+
const pageTexts = [];
|
|
1778
|
+
let totalChars = 0;
|
|
1779
|
+
for (let pageNumber = 1; pageNumber <= numPages; pageNumber += 1) {
|
|
1780
|
+
const pageResult = await parser.getText({
|
|
1781
|
+
partial: [pageNumber],
|
|
1782
|
+
});
|
|
1783
|
+
const text = typeof pageResult?.text === 'string' ? pageResult.text : '';
|
|
1784
|
+
pageTexts.push(text);
|
|
1785
|
+
totalChars += text.length;
|
|
1786
|
+
}
|
|
1787
|
+
return {
|
|
1788
|
+
numPages,
|
|
1789
|
+
pageTexts,
|
|
1790
|
+
totalChars,
|
|
1791
|
+
};
|
|
1792
|
+
}
|
|
1793
|
+
finally {
|
|
1794
|
+
if (typeof parser.destroy === 'function') {
|
|
1795
|
+
await parser.destroy();
|
|
1796
|
+
}
|
|
1797
|
+
}
|
|
1798
|
+
}
|
|
1799
|
+
}
|
|
1800
|
+
catch {
|
|
1801
|
+
// Fall back to the compatibility parser.
|
|
1802
|
+
}
|
|
1803
|
+
const parsed = await extractPdfText(buffer);
|
|
1804
|
+
const text = parsed.text ?? '';
|
|
1805
|
+
return {
|
|
1806
|
+
numPages: Math.max(parsed.totalPages, 1),
|
|
1807
|
+
pageTexts: [text],
|
|
1808
|
+
totalChars: text.length,
|
|
1809
|
+
};
|
|
1810
|
+
};
|
|
1811
|
+
try {
|
|
1812
|
+
let content = '';
|
|
1813
|
+
let sourceName = 'content';
|
|
1814
|
+
if (source.toLowerCase() === 'page') {
|
|
1815
|
+
if (!browser_session) {
|
|
1816
|
+
throw new BrowserError('Browser session missing for page content.');
|
|
1817
|
+
}
|
|
1818
|
+
const page = await browser_session.get_current_page();
|
|
1819
|
+
if (!page?.content) {
|
|
1820
|
+
throw new BrowserError('No active page available to read content.');
|
|
1821
|
+
}
|
|
1822
|
+
const html = await page.content();
|
|
1823
|
+
const pageUrl = getCurrentPageUrl(page);
|
|
1824
|
+
const markdown = extractCleanMarkdownFromHtml(html || '', {
|
|
1825
|
+
extract_links: false,
|
|
1826
|
+
method: 'page_content',
|
|
1827
|
+
url: pageUrl || undefined,
|
|
1828
|
+
});
|
|
1829
|
+
content = markdown.content;
|
|
1830
|
+
sourceName = 'current page';
|
|
1831
|
+
if (!content) {
|
|
1832
|
+
return new ActionResult({
|
|
1833
|
+
extracted_content: 'Error: No page content available',
|
|
1834
|
+
long_term_memory: 'Failed to read page: no content',
|
|
1835
|
+
});
|
|
1836
|
+
}
|
|
1837
|
+
}
|
|
1838
|
+
else {
|
|
1839
|
+
const allowedPaths = new Set(Array.isArray(available_file_paths) ? available_file_paths : []);
|
|
1840
|
+
const downloadedFiles = Array.isArray(browser_session?.downloaded_files)
|
|
1841
|
+
? browser_session.downloaded_files
|
|
1842
|
+
: [];
|
|
1843
|
+
for (const filePath of downloadedFiles) {
|
|
1844
|
+
allowedPaths.add(filePath);
|
|
1845
|
+
}
|
|
1846
|
+
if (!allowedPaths.has(source)) {
|
|
1847
|
+
const message = `Error: File path not in available_file_paths: ${source}. ` +
|
|
1848
|
+
'The user must add this path to available_file_paths when creating the Agent.';
|
|
1849
|
+
return new ActionResult({
|
|
1850
|
+
extracted_content: message,
|
|
1851
|
+
long_term_memory: `Failed to read: file path not allowed: ${source}`,
|
|
1852
|
+
});
|
|
1853
|
+
}
|
|
1854
|
+
if (!fs.existsSync(source)) {
|
|
1855
|
+
return new ActionResult({
|
|
1856
|
+
extracted_content: `Error: File not found: ${source}`,
|
|
1857
|
+
long_term_memory: 'Failed to read: file not found',
|
|
1858
|
+
});
|
|
1859
|
+
}
|
|
1860
|
+
const ext = path.extname(source).toLowerCase();
|
|
1861
|
+
sourceName = path.basename(source);
|
|
1862
|
+
if (ext === '.pdf') {
|
|
1863
|
+
const pdfData = await readPdfByPage(source);
|
|
1864
|
+
const numPages = pdfData.numPages;
|
|
1865
|
+
const pageTexts = pdfData.pageTexts;
|
|
1866
|
+
const totalChars = pdfData.totalChars;
|
|
1867
|
+
if (totalChars <= maxChars) {
|
|
1868
|
+
const contentParts = [];
|
|
1869
|
+
for (let pageIndex = 0; pageIndex < pageTexts.length; pageIndex += 1) {
|
|
1870
|
+
const pageText = pageTexts[pageIndex] ?? '';
|
|
1871
|
+
if (!pageText.trim()) {
|
|
1872
|
+
continue;
|
|
1873
|
+
}
|
|
1874
|
+
contentParts.push(`--- Page ${pageIndex + 1} ---\n${pageText}`);
|
|
1875
|
+
}
|
|
1876
|
+
const allContent = contentParts.join('\n\n');
|
|
1877
|
+
return new ActionResult({
|
|
1878
|
+
extracted_content: `PDF: ${sourceName} (${numPages} pages)\n\n${allContent}`,
|
|
1879
|
+
long_term_memory: `Read ${sourceName} (${numPages} pages, ${totalChars.toLocaleString()} chars) for goal: ${goal.slice(0, 50)}`,
|
|
1880
|
+
include_extracted_content_only_once: true,
|
|
1881
|
+
});
|
|
1882
|
+
}
|
|
1883
|
+
const searchTerms = await extractSearchTerms();
|
|
1884
|
+
const pageScores = new Map();
|
|
1885
|
+
for (const term of searchTerms) {
|
|
1886
|
+
if (!term.trim()) {
|
|
1887
|
+
continue;
|
|
1888
|
+
}
|
|
1889
|
+
const pattern = new RegExp(escapeRegExp(term), 'i');
|
|
1890
|
+
for (let pageIndex = 0; pageIndex < pageTexts.length; pageIndex += 1) {
|
|
1891
|
+
const pageText = pageTexts[pageIndex] ?? '';
|
|
1892
|
+
if (pattern.test(pageText)) {
|
|
1893
|
+
const pageNumber = pageIndex + 1;
|
|
1894
|
+
pageScores.set(pageNumber, (pageScores.get(pageNumber) ?? 0) + 1);
|
|
1895
|
+
}
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
const pagesToRead = [1];
|
|
1899
|
+
const sortedPages = Array.from(pageScores.entries()).sort((a, b) => b[1] - a[1]);
|
|
1900
|
+
for (const [pageNumber] of sortedPages) {
|
|
1901
|
+
if (!pagesToRead.includes(pageNumber)) {
|
|
1902
|
+
pagesToRead.push(pageNumber);
|
|
1903
|
+
}
|
|
1904
|
+
}
|
|
1905
|
+
const contentParts = [];
|
|
1906
|
+
let charsUsed = 0;
|
|
1907
|
+
const pagesIncluded = [];
|
|
1908
|
+
const pageOrder = Array.from(new Set(pagesToRead)).sort((a, b) => a - b);
|
|
1909
|
+
for (const pageNumber of pageOrder) {
|
|
1910
|
+
const pageText = pageTexts[pageNumber - 1] ?? '';
|
|
1911
|
+
const pageHeader = `--- Page ${pageNumber} ---\n`;
|
|
1912
|
+
const remaining = maxChars - charsUsed;
|
|
1913
|
+
if (remaining < pageHeader.length + 50) {
|
|
1914
|
+
break;
|
|
1915
|
+
}
|
|
1916
|
+
let pageContent = `${pageHeader}${pageText}`;
|
|
1917
|
+
if (pageContent.length > remaining) {
|
|
1918
|
+
const truncationSuffix = '\n[...truncated]';
|
|
1919
|
+
pageContent =
|
|
1920
|
+
pageContent.slice(0, remaining - truncationSuffix.length) +
|
|
1921
|
+
truncationSuffix;
|
|
1922
|
+
}
|
|
1923
|
+
contentParts.push(pageContent);
|
|
1924
|
+
charsUsed += pageContent.length;
|
|
1925
|
+
pagesIncluded.push(pageNumber);
|
|
1926
|
+
}
|
|
1927
|
+
const partialPdfContent = contentParts.join('\n\n');
|
|
1928
|
+
return new ActionResult({
|
|
1929
|
+
extracted_content: `PDF: ${sourceName} (${numPages} pages, showing ${pagesIncluded.length} relevant)\n\n` +
|
|
1930
|
+
partialPdfContent,
|
|
1931
|
+
long_term_memory: `Read ${sourceName} (${pagesIncluded.length} relevant pages of ${numPages}) ` +
|
|
1932
|
+
`for goal: ${goal.slice(0, 50)}`,
|
|
1933
|
+
include_extracted_content_only_once: true,
|
|
1934
|
+
});
|
|
1935
|
+
}
|
|
1936
|
+
const fileBuffer = await fsp.readFile(source);
|
|
1937
|
+
content = fileBuffer.toString('utf-8');
|
|
1938
|
+
}
|
|
1939
|
+
if (!content.trim()) {
|
|
1940
|
+
return new ActionResult({
|
|
1941
|
+
extracted_content: `Error: No readable content found in ${sourceName}`,
|
|
1942
|
+
long_term_memory: `Failed to read ${sourceName}: no content`,
|
|
1943
|
+
});
|
|
1944
|
+
}
|
|
1945
|
+
if (content.length <= maxChars) {
|
|
1946
|
+
return new ActionResult({
|
|
1947
|
+
extracted_content: `Content from ${sourceName} (${content.length.toLocaleString()} chars):\n\n${content}`,
|
|
1948
|
+
long_term_memory: `Read ${sourceName} (${content.length.toLocaleString()} chars) for goal: ${goal.slice(0, 50)}`,
|
|
1949
|
+
include_extracted_content_only_once: true,
|
|
1950
|
+
});
|
|
1951
|
+
}
|
|
1952
|
+
const searchTerms = await extractSearchTerms();
|
|
1953
|
+
const chunks = chunkContent(content, chunkSize);
|
|
1954
|
+
const chunkScores = new Map();
|
|
1955
|
+
for (const term of searchTerms) {
|
|
1956
|
+
const matches = searchText(content, term);
|
|
1957
|
+
for (const match of matches) {
|
|
1958
|
+
for (let index = 0; index < chunks.length; index += 1) {
|
|
1959
|
+
const chunk = chunks[index];
|
|
1960
|
+
if (chunk &&
|
|
1961
|
+
chunk.start <= match.position &&
|
|
1962
|
+
match.position < chunk.end) {
|
|
1963
|
+
chunkScores.set(index, (chunkScores.get(index) ?? 0) + 1);
|
|
1964
|
+
break;
|
|
1965
|
+
}
|
|
1966
|
+
}
|
|
1967
|
+
}
|
|
1968
|
+
}
|
|
1969
|
+
if (!chunkScores.size) {
|
|
1970
|
+
const truncated = content.slice(0, maxChars);
|
|
1971
|
+
return new ActionResult({
|
|
1972
|
+
extracted_content: `Content from ${sourceName} (first ${maxChars.toLocaleString()} of ${content.length.toLocaleString()} chars):\n\n${truncated}`,
|
|
1973
|
+
long_term_memory: `Read ${sourceName} (truncated to ${maxChars.toLocaleString()} chars, no matches for search terms)`,
|
|
1974
|
+
include_extracted_content_only_once: true,
|
|
1975
|
+
});
|
|
1976
|
+
}
|
|
1977
|
+
const sortedChunks = Array.from(chunkScores.entries()).sort((a, b) => b[1] - a[1]);
|
|
1978
|
+
const selectedIndices = new Set([0]);
|
|
1979
|
+
for (const [chunkIndex] of sortedChunks) {
|
|
1980
|
+
selectedIndices.add(chunkIndex);
|
|
1981
|
+
}
|
|
1982
|
+
const resultParts = [];
|
|
1983
|
+
let totalChars = 0;
|
|
1984
|
+
const orderedIndices = Array.from(selectedIndices).sort((a, b) => a - b);
|
|
1985
|
+
for (const index of orderedIndices) {
|
|
1986
|
+
const chunk = chunks[index];
|
|
1987
|
+
if (!chunk) {
|
|
1988
|
+
continue;
|
|
1989
|
+
}
|
|
1990
|
+
if (totalChars + chunk.text.length > maxChars) {
|
|
1991
|
+
break;
|
|
1992
|
+
}
|
|
1993
|
+
const previousIndex = index - 1;
|
|
1994
|
+
if (index > 0 && !selectedIndices.has(previousIndex)) {
|
|
1995
|
+
resultParts.push('\n[...]\n');
|
|
1996
|
+
}
|
|
1997
|
+
resultParts.push(chunk.text);
|
|
1998
|
+
totalChars += chunk.text.length;
|
|
1999
|
+
}
|
|
2000
|
+
const resultContent = resultParts.join('');
|
|
2001
|
+
return new ActionResult({
|
|
2002
|
+
extracted_content: `Content from ${sourceName} (relevant sections, ${totalChars.toLocaleString()} of ${content.length.toLocaleString()} chars):\n\n` +
|
|
2003
|
+
resultContent,
|
|
2004
|
+
long_term_memory: `Read ${sourceName} (${selectedIndices.size} relevant sections of ${chunks.length}) ` +
|
|
2005
|
+
`for goal: ${goal.slice(0, 50)}`,
|
|
2006
|
+
include_extracted_content_only_once: true,
|
|
2007
|
+
});
|
|
2008
|
+
}
|
|
2009
|
+
catch (error) {
|
|
2010
|
+
if (isAbortError(error)) {
|
|
2011
|
+
throw error;
|
|
2012
|
+
}
|
|
2013
|
+
const errorMessage = `Error reading content: ${String(error.message ?? error)}`;
|
|
2014
|
+
return new ActionResult({
|
|
2015
|
+
extracted_content: errorMessage,
|
|
2016
|
+
long_term_memory: errorMessage,
|
|
2017
|
+
});
|
|
2018
|
+
}
|
|
2019
|
+
});
|
|
2020
|
+
this.registry.action('Write content to a file. By default this OVERWRITES the entire file - use append=true to add to an existing file, or use replace_file for targeted edits within a file. ' +
|
|
2021
|
+
'FILENAME RULES: Use only letters, numbers, underscores, hyphens, dots, parentheses. Spaces are auto-converted to hyphens. ' +
|
|
2022
|
+
'SUPPORTED EXTENSIONS: .txt, .md, .json, .jsonl, .csv, .html, .xml, .pdf, .docx. ' +
|
|
2023
|
+
'CANNOT write binary/image files (.png, .jpg, .mp4, etc.) - do not attempt to save screenshots as files. ' +
|
|
2024
|
+
'For PDF files, write content in markdown format and it will be auto-converted to PDF.', {
|
|
773
2025
|
param_model: WriteFileActionSchema,
|
|
774
2026
|
})(async function write_file(params, { file_system }) {
|
|
775
2027
|
const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
|
|
@@ -786,49 +2038,168 @@ ${content}`;
|
|
|
786
2038
|
const result = append
|
|
787
2039
|
? await fsInstance.append_file(params.file_name, content)
|
|
788
2040
|
: await fsInstance.write_file(params.file_name, content);
|
|
789
|
-
const msg = `📝 ${result}`;
|
|
790
2041
|
return new ActionResult({
|
|
791
2042
|
extracted_content: result,
|
|
792
|
-
include_in_memory: true,
|
|
793
2043
|
long_term_memory: result,
|
|
794
2044
|
});
|
|
795
2045
|
});
|
|
796
|
-
this.registry.action('Replace text within
|
|
2046
|
+
this.registry.action('Replace specific text within a file by searching for old_str and replacing with new_str. Use this for targeted edits like updating todo checkboxes or modifying specific lines without rewriting the entire file.', {
|
|
797
2047
|
param_model: ReplaceFileStrActionSchema,
|
|
798
2048
|
})(async function replace_file_str(params, { file_system }) {
|
|
799
2049
|
const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
|
|
800
2050
|
const result = await fsInstance.replace_file_str(params.file_name, params.old_str, params.new_str);
|
|
801
2051
|
return new ActionResult({
|
|
802
2052
|
extracted_content: result,
|
|
803
|
-
include_in_memory: true,
|
|
804
2053
|
long_term_memory: result,
|
|
805
2054
|
});
|
|
806
2055
|
});
|
|
2056
|
+
this.registry.action('Replace specific text within a file by searching for old_str and replacing with new_str. Use this for targeted edits like updating todo checkboxes or modifying specific lines without rewriting the entire file.', {
|
|
2057
|
+
param_model: ReplaceFileStrActionSchema,
|
|
2058
|
+
action_name: 'replace_file',
|
|
2059
|
+
})(async function replace_file(params, ctx) {
|
|
2060
|
+
return registry.execute_action('replace_file_str', params, ctx);
|
|
2061
|
+
});
|
|
807
2062
|
}
|
|
808
|
-
|
|
809
|
-
this.registry.action('
|
|
810
|
-
param_model: SendKeysActionSchema,
|
|
811
|
-
})(async function send_keys(params, { browser_session }) {
|
|
2063
|
+
registerUtilityActions() {
|
|
2064
|
+
this.registry.action('Take a screenshot of the current viewport. If file_name is provided, saves to that file and returns the path. Otherwise, screenshot is included in the next browser_state observation.', { param_model: ScreenshotActionSchema })(async function screenshot(params, { browser_session, file_system, signal }) {
|
|
812
2065
|
if (!browser_session)
|
|
813
2066
|
throw new Error('Browser session missing');
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
2067
|
+
throwIfAborted(signal);
|
|
2068
|
+
if (params.file_name) {
|
|
2069
|
+
const screenshotB64 = await dispatchBrowserEventIfAvailable(browser_session, new ScreenshotEvent({
|
|
2070
|
+
full_page: false,
|
|
2071
|
+
}), async () => (await browser_session.take_screenshot?.(false)) ?? null);
|
|
2072
|
+
if (!screenshotB64) {
|
|
2073
|
+
return new ActionResult({
|
|
2074
|
+
error: 'Failed to capture screenshot.',
|
|
2075
|
+
});
|
|
2076
|
+
}
|
|
2077
|
+
const fsInstance = file_system ?? new FileSystem(process.cwd(), false);
|
|
2078
|
+
let fileName = params.file_name;
|
|
2079
|
+
if (!fileName.toLowerCase().endsWith('.png')) {
|
|
2080
|
+
fileName = `${fileName}.png`;
|
|
2081
|
+
}
|
|
2082
|
+
fileName = FileSystem.sanitize_filename(fileName);
|
|
2083
|
+
const filePath = path.join(fsInstance.get_dir(), fileName);
|
|
2084
|
+
await fsp.writeFile(filePath, Buffer.from(screenshotB64, 'base64'));
|
|
2085
|
+
const msg = `📸 Saved screenshot to ${filePath}`;
|
|
2086
|
+
return new ActionResult({
|
|
2087
|
+
extracted_content: msg,
|
|
2088
|
+
long_term_memory: msg,
|
|
2089
|
+
attachments: [filePath],
|
|
2090
|
+
});
|
|
818
2091
|
}
|
|
819
|
-
|
|
820
|
-
|
|
2092
|
+
return new ActionResult({
|
|
2093
|
+
extracted_content: 'Requested screenshot for next observation',
|
|
2094
|
+
metadata: {
|
|
2095
|
+
include_screenshot: true,
|
|
2096
|
+
},
|
|
2097
|
+
});
|
|
2098
|
+
});
|
|
2099
|
+
this.registry.action('Execute browser JavaScript on the current page and return the result.', { param_model: EvaluateActionSchema })(async function evaluate(params, { browser_session, signal }) {
|
|
2100
|
+
if (!browser_session)
|
|
2101
|
+
throw new Error('Browser session missing');
|
|
2102
|
+
throwIfAborted(signal);
|
|
2103
|
+
const page = await browser_session.get_current_page();
|
|
2104
|
+
if (!page?.evaluate) {
|
|
2105
|
+
throw new BrowserError('No active page available for evaluate.');
|
|
821
2106
|
}
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
2107
|
+
const validatedCode = validateAndFixJavaScript(params.code);
|
|
2108
|
+
const payload = (await page.evaluate(async ({ code }) => {
|
|
2109
|
+
const serialize = (value) => {
|
|
2110
|
+
if (value === undefined) {
|
|
2111
|
+
return null;
|
|
2112
|
+
}
|
|
2113
|
+
try {
|
|
2114
|
+
return JSON.parse(JSON.stringify(value));
|
|
826
2115
|
}
|
|
2116
|
+
catch {
|
|
2117
|
+
return String(value);
|
|
2118
|
+
}
|
|
2119
|
+
};
|
|
2120
|
+
try {
|
|
2121
|
+
const raw = await Promise.resolve((0, eval)(code));
|
|
2122
|
+
return { ok: true, result: serialize(raw) };
|
|
827
2123
|
}
|
|
828
|
-
|
|
829
|
-
|
|
2124
|
+
catch (error) {
|
|
2125
|
+
return {
|
|
2126
|
+
ok: false,
|
|
2127
|
+
error: error instanceof Error
|
|
2128
|
+
? error.message
|
|
2129
|
+
: String(error ?? 'Unknown evaluate error'),
|
|
2130
|
+
};
|
|
2131
|
+
}
|
|
2132
|
+
}, { code: validatedCode }));
|
|
2133
|
+
if (!payload) {
|
|
2134
|
+
return new ActionResult({ error: 'evaluate returned no result' });
|
|
2135
|
+
}
|
|
2136
|
+
if (!payload.ok) {
|
|
2137
|
+
const codePreview = validatedCode.length > 500
|
|
2138
|
+
? `${validatedCode.slice(0, 500)}...`
|
|
2139
|
+
: validatedCode;
|
|
2140
|
+
return new ActionResult({
|
|
2141
|
+
error: `JavaScript Execution Failed:\n` +
|
|
2142
|
+
`JavaScript execution error: ${payload.error ?? 'Unknown error'}\n\n` +
|
|
2143
|
+
`Validated Code (after quote fixing):\n${codePreview}`,
|
|
2144
|
+
});
|
|
2145
|
+
}
|
|
2146
|
+
let rendered = typeof payload.result === 'string'
|
|
2147
|
+
? payload.result
|
|
2148
|
+
: JSON.stringify(payload.result);
|
|
2149
|
+
const imagePattern = /(data:image\/[^;]+;base64,[A-Za-z0-9+/=]+)/g;
|
|
2150
|
+
const foundImages = rendered.match(imagePattern) ?? [];
|
|
2151
|
+
let metadata = null;
|
|
2152
|
+
if (foundImages.length > 0) {
|
|
2153
|
+
metadata = { images: foundImages };
|
|
2154
|
+
for (const imageData of foundImages) {
|
|
2155
|
+
rendered = rendered.split(imageData).join('[Image]');
|
|
830
2156
|
}
|
|
831
2157
|
}
|
|
2158
|
+
const maxChars = 20000;
|
|
2159
|
+
if (rendered.length > maxChars) {
|
|
2160
|
+
rendered = `${rendered.slice(0, maxChars - 50)}\n... [Truncated after 20000 characters]`;
|
|
2161
|
+
}
|
|
2162
|
+
const maxMemoryChars = 10000;
|
|
2163
|
+
const includeExtractedContentOnlyOnce = rendered.length >= maxMemoryChars;
|
|
2164
|
+
const longTermMemory = includeExtractedContentOnlyOnce
|
|
2165
|
+
? `JavaScript executed successfully, result length: ${rendered.length} characters.`
|
|
2166
|
+
: rendered;
|
|
2167
|
+
return new ActionResult({
|
|
2168
|
+
extracted_content: rendered,
|
|
2169
|
+
long_term_memory: longTermMemory,
|
|
2170
|
+
include_extracted_content_only_once: includeExtractedContentOnlyOnce,
|
|
2171
|
+
metadata,
|
|
2172
|
+
});
|
|
2173
|
+
});
|
|
2174
|
+
}
|
|
2175
|
+
registerKeyboardActions() {
|
|
2176
|
+
this.registry.action('Send keys to the active page', {
|
|
2177
|
+
param_model: SendKeysActionSchema,
|
|
2178
|
+
})(async function send_keys(params, { browser_session }) {
|
|
2179
|
+
if (!browser_session)
|
|
2180
|
+
throw new Error('Browser session missing');
|
|
2181
|
+
await dispatchBrowserEventIfAvailable(browser_session, new SendKeysEvent({ keys: params.keys }), async () => {
|
|
2182
|
+
const page = await browser_session.get_current_page();
|
|
2183
|
+
const keyboard = page?.keyboard;
|
|
2184
|
+
if (!keyboard) {
|
|
2185
|
+
throw new BrowserError('Keyboard input is not available on the current page.');
|
|
2186
|
+
}
|
|
2187
|
+
try {
|
|
2188
|
+
await keyboard.press(params.keys);
|
|
2189
|
+
}
|
|
2190
|
+
catch (error) {
|
|
2191
|
+
if (error instanceof Error &&
|
|
2192
|
+
error.message.includes('Unknown key')) {
|
|
2193
|
+
for (const char of params.keys) {
|
|
2194
|
+
await keyboard.press(char);
|
|
2195
|
+
}
|
|
2196
|
+
}
|
|
2197
|
+
else {
|
|
2198
|
+
throw error;
|
|
2199
|
+
}
|
|
2200
|
+
}
|
|
2201
|
+
return null;
|
|
2202
|
+
});
|
|
832
2203
|
const msg = `⌨️ Sent keys: ${params.keys}`;
|
|
833
2204
|
return new ActionResult({
|
|
834
2205
|
extracted_content: msg,
|
|
@@ -838,15 +2209,42 @@ ${content}`;
|
|
|
838
2209
|
});
|
|
839
2210
|
}
|
|
840
2211
|
registerDropdownActions() {
|
|
2212
|
+
const registry = this.registry;
|
|
2213
|
+
const dropdownLogger = this.logger;
|
|
2214
|
+
const formatAvailableOptions = (options) => options
|
|
2215
|
+
.map((opt) => ` - [${opt.index}] text=${JSON.stringify(opt.text)} value=${JSON.stringify(opt.value)}`)
|
|
2216
|
+
.join('\n');
|
|
841
2217
|
this.registry.action('Get all options from a native dropdown or ARIA menu', { param_model: DropdownOptionsActionSchema })(async function get_dropdown_options(params, { browser_session, signal }) {
|
|
842
2218
|
if (!browser_session)
|
|
843
2219
|
throw new Error('Browser session missing');
|
|
844
2220
|
throwIfAborted(signal);
|
|
845
|
-
const page = await browser_session.get_current_page();
|
|
846
2221
|
const domElement = await browser_session.get_dom_element_by_index(params.index, { signal });
|
|
847
2222
|
if (!domElement) {
|
|
848
|
-
|
|
2223
|
+
const msg = `Element index ${params.index} not available - page may have changed. Try refreshing browser state.`;
|
|
2224
|
+
dropdownLogger.warning(`⚠️ ${msg}`);
|
|
2225
|
+
return new ActionResult({
|
|
2226
|
+
extracted_content: msg,
|
|
2227
|
+
});
|
|
2228
|
+
}
|
|
2229
|
+
if (typeof browser_session.dispatch_browser_event === 'function') {
|
|
2230
|
+
const dispatchResult = await browser_session.dispatch_browser_event(new GetDropdownOptionsEvent({ node: domElement }));
|
|
2231
|
+
const eventResult = dispatchResult?.event?.event_result;
|
|
2232
|
+
const eventMessage = eventResult?.message ??
|
|
2233
|
+
eventResult?.short_term_memory ??
|
|
2234
|
+
eventResult?.formatted_options ??
|
|
2235
|
+
null;
|
|
2236
|
+
if (eventMessage) {
|
|
2237
|
+
const memory = eventResult?.long_term_memory ??
|
|
2238
|
+
`Found dropdown options for index ${params.index}.`;
|
|
2239
|
+
return new ActionResult({
|
|
2240
|
+
extracted_content: eventMessage,
|
|
2241
|
+
include_in_memory: true,
|
|
2242
|
+
include_extracted_content_only_once: true,
|
|
2243
|
+
long_term_memory: memory,
|
|
2244
|
+
});
|
|
2245
|
+
}
|
|
849
2246
|
}
|
|
2247
|
+
const page = await browser_session.get_current_page();
|
|
850
2248
|
if (!page?.evaluate) {
|
|
851
2249
|
throw new BrowserError('Unable to evaluate dropdown options on current page.');
|
|
852
2250
|
}
|
|
@@ -859,8 +2257,8 @@ ${content}`;
|
|
|
859
2257
|
return null;
|
|
860
2258
|
if (element.tagName?.toLowerCase() === 'select') {
|
|
861
2259
|
const options = Array.from(element.options).map((opt, index) => ({
|
|
862
|
-
text: opt.
|
|
863
|
-
value: opt.value,
|
|
2260
|
+
text: opt.textContent?.trim() ?? '',
|
|
2261
|
+
value: (opt.value ?? '').trim(),
|
|
864
2262
|
index,
|
|
865
2263
|
}));
|
|
866
2264
|
return { type: 'select', options };
|
|
@@ -881,8 +2279,8 @@ ${content}`;
|
|
|
881
2279
|
if (!payload || !payload.options?.length) {
|
|
882
2280
|
throw new BrowserError('No options found for the specified dropdown.');
|
|
883
2281
|
}
|
|
884
|
-
const formatted = payload.options.map((opt) => `${opt.index}: text=${JSON.stringify(opt.text ?? '')}`);
|
|
885
|
-
formatted.push('
|
|
2282
|
+
const formatted = payload.options.map((opt) => `${opt.index}: text=${JSON.stringify(opt.text ?? '')}, value=${JSON.stringify(opt.value ?? '')}`);
|
|
2283
|
+
formatted.push('Prefer exact text first; if needed select_dropdown_option also supports case-insensitive text/value matching.');
|
|
886
2284
|
const message = formatted.join('\n');
|
|
887
2285
|
return new ActionResult({
|
|
888
2286
|
extracted_content: message,
|
|
@@ -891,17 +2289,49 @@ ${content}`;
|
|
|
891
2289
|
long_term_memory: `Found dropdown options for index ${params.index}.`,
|
|
892
2290
|
});
|
|
893
2291
|
});
|
|
2292
|
+
this.registry.action('Get all options from a native dropdown or ARIA menu', {
|
|
2293
|
+
param_model: DropdownOptionsActionSchema,
|
|
2294
|
+
action_name: 'dropdown_options',
|
|
2295
|
+
})(async function dropdown_options(params, ctx) {
|
|
2296
|
+
return registry.execute_action('get_dropdown_options', params, ctx);
|
|
2297
|
+
});
|
|
894
2298
|
this.registry.action('Select dropdown option or ARIA menu item by text', {
|
|
895
2299
|
param_model: SelectDropdownActionSchema,
|
|
896
2300
|
})(async function select_dropdown_option(params, { browser_session, signal }) {
|
|
897
2301
|
if (!browser_session)
|
|
898
2302
|
throw new Error('Browser session missing');
|
|
899
2303
|
throwIfAborted(signal);
|
|
900
|
-
const page = await browser_session.get_current_page();
|
|
901
2304
|
const domElement = await browser_session.get_dom_element_by_index(params.index, { signal });
|
|
2305
|
+
if (!domElement) {
|
|
2306
|
+
const msg = `Element index ${params.index} not available - page may have changed. Try refreshing browser state.`;
|
|
2307
|
+
dropdownLogger.warning(`⚠️ ${msg}`);
|
|
2308
|
+
return new ActionResult({
|
|
2309
|
+
extracted_content: msg,
|
|
2310
|
+
});
|
|
2311
|
+
}
|
|
902
2312
|
if (!domElement?.xpath) {
|
|
903
2313
|
throw new BrowserError('DOM element does not include an XPath selector.');
|
|
904
2314
|
}
|
|
2315
|
+
if (typeof browser_session.dispatch_browser_event === 'function') {
|
|
2316
|
+
const dispatchResult = await browser_session.dispatch_browser_event(new SelectDropdownOptionEvent({
|
|
2317
|
+
node: domElement,
|
|
2318
|
+
text: params.text,
|
|
2319
|
+
}));
|
|
2320
|
+
const eventResult = dispatchResult?.event?.event_result;
|
|
2321
|
+
const eventMessage = eventResult?.message ??
|
|
2322
|
+
eventResult?.short_term_memory ??
|
|
2323
|
+
eventResult?.matched_text ??
|
|
2324
|
+
null;
|
|
2325
|
+
if (eventMessage) {
|
|
2326
|
+
const memory = eventResult?.long_term_memory ?? eventMessage;
|
|
2327
|
+
return new ActionResult({
|
|
2328
|
+
extracted_content: eventMessage,
|
|
2329
|
+
include_in_memory: true,
|
|
2330
|
+
long_term_memory: memory,
|
|
2331
|
+
});
|
|
2332
|
+
}
|
|
2333
|
+
}
|
|
2334
|
+
const page = await browser_session.get_current_page();
|
|
905
2335
|
if (!page) {
|
|
906
2336
|
throw new BrowserError('No active page for selection.');
|
|
907
2337
|
}
|
|
@@ -922,45 +2352,121 @@ ${content}`;
|
|
|
922
2352
|
if (!typeInfo?.found)
|
|
923
2353
|
continue;
|
|
924
2354
|
if (typeInfo.type === 'select') {
|
|
925
|
-
await frame
|
|
926
|
-
.
|
|
927
|
-
.
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
2355
|
+
const selection = await frame.evaluate(({ xpath, text }) => {
|
|
2356
|
+
const root = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
|
2357
|
+
if (!root || root.tagName?.toLowerCase() !== 'select') {
|
|
2358
|
+
return { found: false };
|
|
2359
|
+
}
|
|
2360
|
+
const options = Array.from(root.options).map((opt, index) => ({
|
|
2361
|
+
index,
|
|
2362
|
+
text: opt.textContent?.trim() ?? '',
|
|
2363
|
+
value: (opt.value ?? '').trim(),
|
|
2364
|
+
}));
|
|
2365
|
+
const normalize = (value) => value.trim().toLowerCase();
|
|
2366
|
+
const targetRaw = text.trim();
|
|
2367
|
+
const targetLower = normalize(text);
|
|
2368
|
+
let matchedIndex = options.findIndex((opt) => opt.text === targetRaw || opt.value === targetRaw);
|
|
2369
|
+
if (matchedIndex < 0) {
|
|
2370
|
+
matchedIndex = options.findIndex((opt) => normalize(opt.text) === targetLower ||
|
|
2371
|
+
normalize(opt.value) === targetLower);
|
|
2372
|
+
}
|
|
2373
|
+
if (matchedIndex < 0) {
|
|
2374
|
+
return { found: true, success: false, options };
|
|
2375
|
+
}
|
|
2376
|
+
const matched = options[matchedIndex];
|
|
2377
|
+
root.value = matched.value;
|
|
2378
|
+
root.dispatchEvent(new Event('input', { bubbles: true }));
|
|
2379
|
+
root.dispatchEvent(new Event('change', { bubbles: true }));
|
|
2380
|
+
const selectedOption = root.selectedIndex >= 0
|
|
2381
|
+
? root.options[root.selectedIndex]
|
|
2382
|
+
: null;
|
|
2383
|
+
const selectedText = selectedOption?.textContent?.trim() ?? '';
|
|
2384
|
+
const selectedValue = (root.value ?? '').trim();
|
|
2385
|
+
const verified = normalize(selectedValue) === normalize(matched.value) ||
|
|
2386
|
+
normalize(selectedText) === normalize(matched.text);
|
|
2387
|
+
return {
|
|
2388
|
+
found: true,
|
|
2389
|
+
success: verified,
|
|
2390
|
+
options,
|
|
2391
|
+
selectedText,
|
|
2392
|
+
selectedValue,
|
|
2393
|
+
matched,
|
|
2394
|
+
};
|
|
2395
|
+
}, { xpath: domElement.xpath, text: params.text });
|
|
2396
|
+
if (selection?.found && selection.success) {
|
|
2397
|
+
const matchedText = selection.matched?.text ?? params.text;
|
|
2398
|
+
const matchedValue = selection.matched?.value ?? '';
|
|
2399
|
+
const msg = `Selected option ${matchedText} (${matchedValue})`;
|
|
2400
|
+
return new ActionResult({
|
|
2401
|
+
extracted_content: msg,
|
|
2402
|
+
include_in_memory: true,
|
|
2403
|
+
long_term_memory: msg,
|
|
2404
|
+
});
|
|
2405
|
+
}
|
|
2406
|
+
if (selection?.found) {
|
|
2407
|
+
const details = formatAvailableOptions(selection.options ?? []);
|
|
2408
|
+
throw new BrowserError(`Could not select option '${params.text}' for index ${params.index}.\nAvailable options:\n${details}`);
|
|
2409
|
+
}
|
|
2410
|
+
continue;
|
|
935
2411
|
}
|
|
936
2412
|
const clicked = await frame.evaluate(({ xpath, text }) => {
|
|
937
2413
|
const root = document.evaluate(xpath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
|
938
2414
|
if (!root)
|
|
939
2415
|
return false;
|
|
940
2416
|
const nodes = root.querySelectorAll('[role="menuitem"],[role="option"]');
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
2417
|
+
const options = Array.from(nodes).map((node, index) => ({
|
|
2418
|
+
index,
|
|
2419
|
+
text: node.textContent?.trim() ?? '',
|
|
2420
|
+
value: node.textContent?.trim() ?? '',
|
|
2421
|
+
}));
|
|
2422
|
+
const normalize = (value) => value.trim().toLowerCase();
|
|
2423
|
+
const targetRaw = text.trim();
|
|
2424
|
+
const targetLower = normalize(text);
|
|
2425
|
+
let matchedIndex = options.findIndex((opt) => opt.text === targetRaw || opt.value === targetRaw);
|
|
2426
|
+
if (matchedIndex < 0) {
|
|
2427
|
+
matchedIndex = options.findIndex((opt) => normalize(opt.text) === targetLower ||
|
|
2428
|
+
normalize(opt.value) === targetLower);
|
|
2429
|
+
}
|
|
2430
|
+
if (matchedIndex < 0) {
|
|
2431
|
+
return { found: true, success: false, options };
|
|
946
2432
|
}
|
|
947
|
-
|
|
2433
|
+
nodes[matchedIndex].click();
|
|
2434
|
+
return {
|
|
2435
|
+
found: true,
|
|
2436
|
+
success: true,
|
|
2437
|
+
options,
|
|
2438
|
+
matched: options[matchedIndex],
|
|
2439
|
+
};
|
|
948
2440
|
}, { xpath: domElement.xpath, text: params.text });
|
|
949
|
-
if (clicked) {
|
|
950
|
-
const
|
|
2441
|
+
if (clicked?.found && clicked.success) {
|
|
2442
|
+
const matchedText = clicked.matched?.text ?? params.text;
|
|
2443
|
+
const msg = `Selected menu item ${matchedText}`;
|
|
951
2444
|
return new ActionResult({
|
|
952
2445
|
extracted_content: msg,
|
|
953
2446
|
include_in_memory: true,
|
|
954
2447
|
long_term_memory: msg,
|
|
955
2448
|
});
|
|
956
2449
|
}
|
|
2450
|
+
if (clicked?.found) {
|
|
2451
|
+
const details = formatAvailableOptions(clicked.options ?? []);
|
|
2452
|
+
throw new BrowserError(`Could not select option '${params.text}' for index ${params.index}.\nAvailable options:\n${details}`);
|
|
2453
|
+
}
|
|
957
2454
|
}
|
|
958
2455
|
catch (error) {
|
|
2456
|
+
if (error instanceof BrowserError) {
|
|
2457
|
+
throw error;
|
|
2458
|
+
}
|
|
959
2459
|
continue;
|
|
960
2460
|
}
|
|
961
2461
|
}
|
|
962
2462
|
throw new BrowserError(`Could not select option '${params.text}' for index ${params.index}`);
|
|
963
2463
|
});
|
|
2464
|
+
this.registry.action('Select dropdown option or ARIA menu item by text', {
|
|
2465
|
+
param_model: SelectDropdownActionSchema,
|
|
2466
|
+
action_name: 'select_dropdown',
|
|
2467
|
+
})(async function select_dropdown(params, ctx) {
|
|
2468
|
+
return registry.execute_action('select_dropdown_option', params, ctx);
|
|
2469
|
+
});
|
|
964
2470
|
}
|
|
965
2471
|
registerSheetsActions() {
|
|
966
2472
|
const gotoSheetsRange = this.gotoSheetsRange.bind(this);
|
|
@@ -1090,13 +2596,11 @@ ${content}`;
|
|
|
1090
2596
|
if (outputModel) {
|
|
1091
2597
|
const structuredSchema = StructuredOutputActionSchema(outputModel);
|
|
1092
2598
|
this.registry.action('Complete task - with return text and success flag.', { param_model: structuredSchema })(async function done(params) {
|
|
1093
|
-
const payload =
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
|
|
1097
|
-
|
|
1098
|
-
}
|
|
1099
|
-
}
|
|
2599
|
+
const payload = params.data &&
|
|
2600
|
+
typeof params.data === 'object' &&
|
|
2601
|
+
!Array.isArray(params.data)
|
|
2602
|
+
? params.data
|
|
2603
|
+
: {};
|
|
1100
2604
|
return new ActionResult({
|
|
1101
2605
|
is_done: true,
|
|
1102
2606
|
success: params.success,
|
|
@@ -1121,9 +2625,6 @@ ${content}`;
|
|
|
1121
2625
|
if (displayFilesInDoneText) {
|
|
1122
2626
|
let attachmentText = '';
|
|
1123
2627
|
for (const fileName of params.files_to_display) {
|
|
1124
|
-
if (fileName === 'todo.md') {
|
|
1125
|
-
continue;
|
|
1126
|
-
}
|
|
1127
2628
|
const content = fsInstance.display_file(fileName);
|
|
1128
2629
|
if (content) {
|
|
1129
2630
|
attachmentText += `\n\n${fileName}:\n${content}`;
|
|
@@ -1137,9 +2638,6 @@ ${content}`;
|
|
|
1137
2638
|
}
|
|
1138
2639
|
else {
|
|
1139
2640
|
for (const fileName of params.files_to_display) {
|
|
1140
|
-
if (fileName === 'todo.md') {
|
|
1141
|
-
continue;
|
|
1142
|
-
}
|
|
1143
2641
|
const content = fsInstance.display_file(fileName);
|
|
1144
2642
|
if (content) {
|
|
1145
2643
|
attachments.push(fileName);
|
|
@@ -1158,8 +2656,24 @@ ${content}`;
|
|
|
1158
2656
|
});
|
|
1159
2657
|
}
|
|
1160
2658
|
use_structured_output_action(outputModel) {
|
|
2659
|
+
this.outputModel = outputModel;
|
|
1161
2660
|
this.registerDoneAction(outputModel);
|
|
1162
2661
|
}
|
|
2662
|
+
get_output_model() {
|
|
2663
|
+
return this.outputModel;
|
|
2664
|
+
}
|
|
2665
|
+
exclude_action(actionName) {
|
|
2666
|
+
this.registry.exclude_action(actionName);
|
|
2667
|
+
}
|
|
2668
|
+
set_coordinate_clicking(enabled) {
|
|
2669
|
+
const resolved = Boolean(enabled);
|
|
2670
|
+
if (resolved === this.coordinateClickingEnabled) {
|
|
2671
|
+
return;
|
|
2672
|
+
}
|
|
2673
|
+
this.coordinateClickingEnabled = resolved;
|
|
2674
|
+
this.registerClickActions();
|
|
2675
|
+
this.logger.debug(`Coordinate clicking ${resolved ? 'enabled' : 'disabled'}`);
|
|
2676
|
+
}
|
|
1163
2677
|
action(description, options = {}) {
|
|
1164
2678
|
return this.registry.action(description, options);
|
|
1165
2679
|
}
|
|
@@ -1185,11 +2699,36 @@ ${content}`;
|
|
|
1185
2699
|
if (result == null) {
|
|
1186
2700
|
return new ActionResult();
|
|
1187
2701
|
}
|
|
1188
|
-
|
|
2702
|
+
const resultType = result && typeof result === 'object'
|
|
2703
|
+
? (result.constructor?.name ?? typeof result)
|
|
2704
|
+
: typeof result;
|
|
2705
|
+
throw new Error(`Invalid action result type: ${resultType} of ${String(result)}`);
|
|
1189
2706
|
}
|
|
1190
2707
|
catch (error) {
|
|
2708
|
+
if (error instanceof BrowserError) {
|
|
2709
|
+
if (error.long_term_memory != null) {
|
|
2710
|
+
if (error.short_term_memory != null) {
|
|
2711
|
+
return new ActionResult({
|
|
2712
|
+
extracted_content: error.short_term_memory,
|
|
2713
|
+
error: error.long_term_memory,
|
|
2714
|
+
include_extracted_content_only_once: true,
|
|
2715
|
+
});
|
|
2716
|
+
}
|
|
2717
|
+
return new ActionResult({
|
|
2718
|
+
error: error.long_term_memory,
|
|
2719
|
+
});
|
|
2720
|
+
}
|
|
2721
|
+
throw error;
|
|
2722
|
+
}
|
|
2723
|
+
const message = String(error?.message ?? error ?? '');
|
|
2724
|
+
if (error instanceof Error &&
|
|
2725
|
+
message === `Error executing action ${actionName} due to timeout.`) {
|
|
2726
|
+
return new ActionResult({
|
|
2727
|
+
error: `${actionName} was not executed due to timeout.`,
|
|
2728
|
+
});
|
|
2729
|
+
}
|
|
1191
2730
|
return new ActionResult({
|
|
1192
|
-
error:
|
|
2731
|
+
error: message,
|
|
1193
2732
|
});
|
|
1194
2733
|
}
|
|
1195
2734
|
}
|