browser-use 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +295 -686
- package/dist/actor/element.d.ts +19 -0
- package/dist/actor/element.js +46 -0
- package/dist/actor/index.d.ts +4 -0
- package/dist/actor/index.js +4 -0
- package/dist/actor/mouse.d.ts +19 -0
- package/dist/actor/mouse.js +39 -0
- package/dist/actor/page.d.ts +29 -0
- package/dist/actor/page.js +88 -0
- package/dist/actor/utils.d.ts +4 -0
- package/dist/actor/utils.js +35 -0
- package/dist/agent/cloud-events.d.ts +18 -0
- package/dist/agent/cloud-events.js +65 -2
- package/dist/agent/gif.d.ts +1 -0
- package/dist/agent/gif.js +24 -2
- package/dist/agent/judge.d.ts +17 -0
- package/dist/agent/judge.js +197 -0
- package/dist/agent/message-manager/service.d.ts +12 -4
- package/dist/agent/message-manager/service.js +205 -39
- package/dist/agent/message-manager/utils.js +0 -1
- package/dist/agent/message-manager/views.d.ts +4 -0
- package/dist/agent/message-manager/views.js +11 -7
- package/dist/agent/prompts.d.ts +24 -3
- package/dist/agent/prompts.js +274 -59
- package/dist/agent/service.d.ts +103 -41
- package/dist/agent/service.js +2336 -472
- package/dist/agent/variable-detector.d.ts +12 -0
- package/dist/agent/variable-detector.js +211 -0
- package/dist/agent/views.d.ts +237 -18
- package/dist/agent/views.js +446 -33
- package/dist/browser/cloud/cloud.d.ts +20 -0
- package/dist/browser/cloud/cloud.js +129 -0
- package/dist/browser/cloud/index.d.ts +2 -0
- package/dist/browser/cloud/index.js +2 -0
- package/dist/browser/cloud/views.d.ts +41 -0
- package/dist/browser/cloud/views.js +35 -0
- package/dist/browser/events.d.ts +345 -0
- package/dist/browser/events.js +566 -0
- package/dist/browser/extensions.js +17 -17
- package/dist/browser/index.d.ts +4 -0
- package/dist/browser/index.js +4 -0
- package/dist/browser/profile.d.ts +10 -4
- package/dist/browser/profile.js +79 -12
- package/dist/browser/session-manager.d.ts +85 -0
- package/dist/browser/session-manager.js +208 -0
- package/dist/browser/session.d.ts +105 -9
- package/dist/browser/session.js +1166 -95
- package/dist/browser/types.d.ts +153 -156
- package/dist/browser/views.d.ts +39 -0
- package/dist/browser/views.js +32 -0
- package/dist/browser/watchdogs/aboutblank-watchdog.d.ts +12 -0
- package/dist/browser/watchdogs/aboutblank-watchdog.js +131 -0
- package/dist/browser/watchdogs/base.d.ts +21 -0
- package/dist/browser/watchdogs/base.js +81 -0
- package/dist/browser/watchdogs/cdp-session-watchdog.d.ts +14 -0
- package/dist/browser/watchdogs/cdp-session-watchdog.js +177 -0
- package/dist/browser/watchdogs/crash-watchdog.d.ts +38 -0
- package/dist/browser/watchdogs/crash-watchdog.js +296 -0
- package/dist/browser/watchdogs/default-action-watchdog.d.ts +49 -0
- package/dist/browser/watchdogs/default-action-watchdog.js +212 -0
- package/dist/browser/watchdogs/dom-watchdog.d.ts +8 -0
- package/dist/browser/watchdogs/dom-watchdog.js +31 -0
- package/dist/browser/watchdogs/downloads-watchdog.d.ts +77 -0
- package/dist/browser/watchdogs/downloads-watchdog.js +409 -0
- package/dist/browser/watchdogs/har-recording-watchdog.d.ts +19 -0
- package/dist/browser/watchdogs/har-recording-watchdog.js +317 -0
- package/dist/browser/watchdogs/index.d.ts +15 -0
- package/dist/browser/watchdogs/index.js +15 -0
- package/dist/browser/watchdogs/local-browser-watchdog.d.ts +10 -0
- package/dist/browser/watchdogs/local-browser-watchdog.js +32 -0
- package/dist/browser/watchdogs/permissions-watchdog.d.ts +8 -0
- package/dist/browser/watchdogs/permissions-watchdog.js +73 -0
- package/dist/browser/watchdogs/popups-watchdog.d.ts +13 -0
- package/dist/browser/watchdogs/popups-watchdog.js +77 -0
- package/dist/browser/watchdogs/recording-watchdog.d.ts +27 -0
- package/dist/browser/watchdogs/recording-watchdog.js +249 -0
- package/dist/browser/watchdogs/screenshot-watchdog.d.ts +6 -0
- package/dist/browser/watchdogs/screenshot-watchdog.js +13 -0
- package/dist/browser/watchdogs/security-watchdog.d.ts +10 -0
- package/dist/browser/watchdogs/security-watchdog.js +84 -0
- package/dist/browser/watchdogs/storage-state-watchdog.d.ts +24 -0
- package/dist/browser/watchdogs/storage-state-watchdog.js +288 -0
- package/dist/cli.d.ts +7 -2
- package/dist/cli.js +182 -25
- package/dist/code-use/formatting.d.ts +3 -0
- package/dist/code-use/formatting.js +18 -0
- package/dist/code-use/index.d.ts +6 -0
- package/dist/code-use/index.js +6 -0
- package/dist/code-use/namespace.d.ts +5 -0
- package/dist/code-use/namespace.js +81 -0
- package/dist/code-use/notebook-export.d.ts +3 -0
- package/dist/code-use/notebook-export.js +56 -0
- package/dist/code-use/service.d.ts +24 -0
- package/dist/code-use/service.js +104 -0
- package/dist/code-use/utils.d.ts +4 -0
- package/dist/code-use/utils.js +98 -0
- package/dist/code-use/views.d.ts +108 -0
- package/dist/code-use/views.js +165 -0
- package/dist/config.d.ts +15 -0
- package/dist/config.js +109 -7
- package/dist/controller/registry/service.d.ts +10 -1
- package/dist/controller/registry/service.js +266 -10
- package/dist/controller/registry/views.d.ts +4 -1
- package/dist/controller/registry/views.js +25 -2
- package/dist/controller/service.d.ts +10 -1
- package/dist/controller/service.js +1814 -268
- package/dist/controller/views.d.ts +78 -155
- package/dist/controller/views.js +61 -12
- package/dist/dom/history-tree-processor/service.d.ts +5 -0
- package/dist/dom/history-tree-processor/service.js +169 -14
- package/dist/dom/history-tree-processor/view.d.ts +7 -1
- package/dist/dom/history-tree-processor/view.js +10 -1
- package/dist/dom/markdown-extractor.d.ts +37 -0
- package/dist/dom/markdown-extractor.js +345 -0
- package/dist/dom/service.d.ts +3 -1
- package/dist/dom/service.js +76 -0
- package/dist/dom/views.d.ts +1 -0
- package/dist/dom/views.js +45 -0
- package/dist/event-bus.d.ts +107 -7
- package/dist/event-bus.js +313 -10
- package/dist/exceptions.d.ts +0 -3
- package/dist/exceptions.js +0 -7
- package/dist/filesystem/file-system.d.ts +18 -0
- package/dist/filesystem/file-system.js +503 -42
- package/dist/index.d.ts +7 -0
- package/dist/index.js +6 -0
- package/dist/integrations/gmail/actions.d.ts +3 -3
- package/dist/integrations/gmail/actions.js +4 -4
- package/dist/llm/anthropic/chat.d.ts +18 -1
- package/dist/llm/anthropic/chat.js +123 -55
- package/dist/llm/anthropic/serializer.d.ts +2 -0
- package/dist/llm/anthropic/serializer.js +81 -9
- package/dist/llm/aws/chat-anthropic.d.ts +17 -0
- package/dist/llm/aws/chat-anthropic.js +126 -26
- package/dist/llm/aws/chat-bedrock.d.ts +28 -1
- package/dist/llm/aws/chat-bedrock.js +161 -34
- package/dist/llm/aws/serializer.d.ts +13 -1
- package/dist/llm/aws/serializer.js +56 -17
- package/dist/llm/azure/chat.d.ts +53 -2
- package/dist/llm/azure/chat.js +366 -54
- package/dist/llm/base.d.ts +2 -0
- package/dist/llm/browser-use/chat.d.ts +40 -0
- package/dist/llm/browser-use/chat.js +305 -0
- package/dist/llm/browser-use/index.d.ts +1 -0
- package/dist/llm/browser-use/index.js +1 -0
- package/dist/llm/cerebras/chat.d.ts +39 -0
- package/dist/llm/cerebras/chat.js +178 -0
- package/dist/llm/cerebras/index.d.ts +2 -0
- package/dist/llm/cerebras/index.js +2 -0
- package/dist/llm/cerebras/serializer.d.ts +7 -0
- package/dist/llm/cerebras/serializer.js +82 -0
- package/dist/llm/deepseek/chat.d.ts +19 -2
- package/dist/llm/deepseek/chat.js +138 -25
- package/dist/llm/google/chat.d.ts +46 -2
- package/dist/llm/google/chat.js +267 -64
- package/dist/llm/google/serializer.d.ts +9 -1
- package/dist/llm/google/serializer.js +141 -34
- package/dist/llm/groq/chat.d.ts +21 -2
- package/dist/llm/groq/chat.js +125 -26
- package/dist/llm/groq/parser.js +3 -1
- package/dist/llm/mistral/chat.d.ts +43 -0
- package/dist/llm/mistral/chat.js +154 -0
- package/dist/llm/mistral/index.d.ts +2 -0
- package/dist/llm/mistral/index.js +2 -0
- package/dist/llm/mistral/schema.d.ts +8 -0
- package/dist/llm/mistral/schema.js +27 -0
- package/dist/llm/models.d.ts +2 -0
- package/dist/llm/models.js +317 -0
- package/dist/llm/ollama/chat.d.ts +13 -1
- package/dist/llm/ollama/chat.js +110 -19
- package/dist/llm/ollama/serializer.d.ts +1 -0
- package/dist/llm/ollama/serializer.js +34 -12
- package/dist/llm/openai/chat.d.ts +16 -0
- package/dist/llm/openai/chat.js +94 -44
- package/dist/llm/openai/like.d.ts +5 -3
- package/dist/llm/openai/like.js +7 -3
- package/dist/llm/openai/responses-serializer.d.ts +18 -0
- package/dist/llm/openai/responses-serializer.js +72 -0
- package/dist/llm/openrouter/chat.d.ts +28 -2
- package/dist/llm/openrouter/chat.js +115 -29
- package/dist/llm/schema.d.ts +11 -1
- package/dist/llm/schema.js +109 -4
- package/dist/llm/vercel/chat.d.ts +50 -0
- package/dist/llm/vercel/chat.js +276 -0
- package/dist/llm/vercel/index.d.ts +1 -0
- package/dist/llm/vercel/index.js +1 -0
- package/dist/llm/vercel/serializer.d.ts +5 -0
- package/dist/llm/vercel/serializer.js +7 -0
- package/dist/llm/views.d.ts +2 -1
- package/dist/llm/views.js +3 -1
- package/dist/logging-config.d.ts +2 -0
- package/dist/logging-config.js +82 -29
- package/dist/mcp/client.d.ts +10 -5
- package/dist/mcp/client.js +14 -9
- package/dist/mcp/controller.d.ts +42 -3
- package/dist/mcp/controller.js +56 -31
- package/dist/mcp/server.d.ts +15 -0
- package/dist/mcp/server.js +261 -52
- package/dist/observability.js +10 -4
- package/dist/sandbox/index.d.ts +2 -0
- package/dist/sandbox/index.js +2 -0
- package/dist/sandbox/sandbox.d.ts +19 -0
- package/dist/sandbox/sandbox.js +140 -0
- package/dist/sandbox/views.d.ts +67 -0
- package/dist/sandbox/views.js +121 -0
- package/dist/skill-cli/index.d.ts +3 -0
- package/dist/skill-cli/index.js +3 -0
- package/dist/skill-cli/protocol.d.ts +30 -0
- package/dist/skill-cli/protocol.js +48 -0
- package/dist/skill-cli/server.d.ts +11 -0
- package/dist/skill-cli/server.js +85 -0
- package/dist/skill-cli/sessions.d.ts +24 -0
- package/dist/skill-cli/sessions.js +47 -0
- package/dist/skills/index.d.ts +3 -0
- package/dist/skills/index.js +3 -0
- package/dist/skills/service.d.ts +27 -0
- package/dist/skills/service.js +266 -0
- package/dist/skills/utils.d.ts +6 -0
- package/dist/skills/utils.js +53 -0
- package/dist/skills/views.d.ts +40 -0
- package/dist/skills/views.js +10 -0
- package/dist/sync/auth.js +8 -3
- package/dist/sync/service.d.ts +6 -6
- package/dist/sync/service.js +54 -89
- package/dist/telemetry/views.d.ts +20 -6
- package/dist/telemetry/views.js +23 -5
- package/dist/tokens/custom-pricing.d.ts +2 -0
- package/dist/tokens/custom-pricing.js +22 -0
- package/dist/tokens/index.d.ts +2 -0
- package/dist/tokens/index.js +2 -0
- package/dist/tokens/mappings.d.ts +1 -0
- package/dist/tokens/mappings.js +3 -0
- package/dist/tokens/service.js +27 -8
- package/dist/tools/extraction/index.d.ts +2 -0
- package/dist/tools/extraction/index.js +2 -0
- package/dist/tools/extraction/schema-utils.d.ts +6 -0
- package/dist/tools/extraction/schema-utils.js +237 -0
- package/dist/tools/extraction/views.d.ts +7 -0
- package/dist/tools/index.d.ts +5 -0
- package/dist/tools/index.js +5 -0
- package/dist/tools/registry/index.d.ts +2 -0
- package/dist/tools/registry/index.js +2 -0
- package/dist/tools/registry/service.d.ts +1 -0
- package/dist/tools/registry/service.js +1 -0
- package/dist/tools/registry/views.d.ts +1 -0
- package/dist/tools/registry/views.js +1 -0
- package/dist/tools/service.d.ts +2 -0
- package/dist/tools/service.js +1 -0
- package/dist/tools/utils.d.ts +2 -0
- package/dist/tools/utils.js +57 -0
- package/dist/tools/views.d.ts +1 -0
- package/dist/tools/views.js +1 -0
- package/dist/utils.d.ts +10 -1
- package/dist/utils.js +70 -3
- package/package.json +116 -49
- package/dist/dom/playground/process-dom.js +0 -5
- package/dist/dom/playground/test-accessibility.d.ts +0 -44
- package/dist/dom/playground/test-accessibility.js +0 -111
- /package/dist/{dom/playground/process-dom.d.ts → tools/extraction/views.js} +0 -0
package/dist/agent/prompts.d.ts
CHANGED
|
@@ -1,17 +1,20 @@
|
|
|
1
|
-
import { SystemMessage, UserMessage } from '../llm/messages.js';
|
|
1
|
+
import { SystemMessage, UserMessage, ContentPartTextParam, ContentPartImageParam } from '../llm/messages.js';
|
|
2
2
|
import type { AgentStepInfo } from './views.js';
|
|
3
3
|
import type { BrowserStateSummary } from '../browser/views.js';
|
|
4
4
|
import type { FileSystem } from '../filesystem/file-system.js';
|
|
5
5
|
export declare class SystemPrompt {
|
|
6
|
-
private readonly actionDescription;
|
|
7
6
|
private readonly maxActionsPerStep;
|
|
8
7
|
private readonly overrideSystemMessage;
|
|
9
8
|
private readonly extendSystemMessage;
|
|
10
9
|
private readonly useThinking;
|
|
11
10
|
private readonly flashMode;
|
|
11
|
+
private readonly isAnthropic;
|
|
12
|
+
private readonly isBrowserUseModel;
|
|
13
|
+
private readonly modelName;
|
|
12
14
|
private promptTemplate;
|
|
13
15
|
private systemMessage;
|
|
14
|
-
constructor(
|
|
16
|
+
constructor(maxActionsPerStep?: number, overrideSystemMessage?: string | null, extendSystemMessage?: string | null, useThinking?: boolean, flashMode?: boolean, isAnthropic?: boolean, isBrowserUseModel?: boolean, modelName?: string | null);
|
|
17
|
+
private isAnthropic45Model;
|
|
15
18
|
private loadPromptTemplate;
|
|
16
19
|
get_system_message(): SystemMessage;
|
|
17
20
|
}
|
|
@@ -29,6 +32,12 @@ interface AgentMessagePromptInit {
|
|
|
29
32
|
available_file_paths?: string[] | null;
|
|
30
33
|
screenshots?: string[] | null;
|
|
31
34
|
vision_detail_level?: 'auto' | 'low' | 'high';
|
|
35
|
+
include_recent_events?: boolean;
|
|
36
|
+
sample_images?: Array<ContentPartTextParam | ContentPartImageParam> | null;
|
|
37
|
+
read_state_images?: Array<Record<string, unknown>> | null;
|
|
38
|
+
llm_screenshot_size?: [number, number] | null;
|
|
39
|
+
unavailable_skills_info?: string | null;
|
|
40
|
+
plan_description?: string | null;
|
|
32
41
|
}
|
|
33
42
|
export declare class AgentMessagePrompt {
|
|
34
43
|
private readonly browserState;
|
|
@@ -44,9 +53,21 @@ export declare class AgentMessagePrompt {
|
|
|
44
53
|
private readonly availableFilePaths?;
|
|
45
54
|
private readonly screenshots;
|
|
46
55
|
private readonly visionDetailLevel;
|
|
56
|
+
private readonly includeRecentEvents;
|
|
57
|
+
private readonly sampleImages;
|
|
58
|
+
private readonly readStateImages;
|
|
59
|
+
private readonly llmScreenshotSize;
|
|
60
|
+
private readonly unavailableSkillsInfo;
|
|
61
|
+
private readonly planDescription;
|
|
47
62
|
constructor(init: AgentMessagePromptInit);
|
|
63
|
+
private extractPageStatistics;
|
|
48
64
|
private browserStateDescription;
|
|
49
65
|
private agentStateDescription;
|
|
66
|
+
private resizeScreenshotForLlm;
|
|
50
67
|
get_user_message(use_vision?: boolean): UserMessage;
|
|
51
68
|
}
|
|
69
|
+
export declare const get_rerun_summary_prompt: (originalTask: string, totalSteps: number, successCount: number, errorCount: number) => string;
|
|
70
|
+
export declare const get_rerun_summary_message: (prompt: string, screenshotB64?: string | null) => UserMessage;
|
|
71
|
+
export declare const get_ai_step_system_prompt: () => string;
|
|
72
|
+
export declare const get_ai_step_user_prompt: (query: string, statsSummary: string, content: string) => string;
|
|
52
73
|
export {};
|
package/dist/agent/prompts.js
CHANGED
|
@@ -6,30 +6,38 @@ var __decorate = (this && this.__decorate) || function (decorators, target, key,
|
|
|
6
6
|
};
|
|
7
7
|
import fs from 'node:fs';
|
|
8
8
|
import { fileURLToPath } from 'node:url';
|
|
9
|
+
import { Image, createCanvas } from 'canvas';
|
|
9
10
|
import { SystemMessage, UserMessage, ContentPartTextParam, ContentPartImageParam, ImageURL, } from '../llm/messages.js';
|
|
10
11
|
import { observe_debug } from '../observability.js';
|
|
11
|
-
import { is_new_tab_page } from '../utils.js';
|
|
12
|
+
import { is_new_tab_page, sanitize_surrogates } from '../utils.js';
|
|
13
|
+
import { createLogger } from '../logging-config.js';
|
|
14
|
+
import { DOMElementNode } from '../dom/views.js';
|
|
15
|
+
const logger = createLogger('browser_use.agent.prompts');
|
|
12
16
|
const readPromptTemplate = (filename) => {
|
|
13
17
|
const filePath = fileURLToPath(new URL(filename, import.meta.url));
|
|
14
18
|
return fs.readFileSync(filePath, 'utf-8');
|
|
15
19
|
};
|
|
16
20
|
export class SystemPrompt {
|
|
17
|
-
actionDescription;
|
|
18
21
|
maxActionsPerStep;
|
|
19
22
|
overrideSystemMessage;
|
|
20
23
|
extendSystemMessage;
|
|
21
24
|
useThinking;
|
|
22
25
|
flashMode;
|
|
26
|
+
isAnthropic;
|
|
27
|
+
isBrowserUseModel;
|
|
28
|
+
modelName;
|
|
23
29
|
promptTemplate = '';
|
|
24
30
|
systemMessage;
|
|
25
|
-
constructor(
|
|
26
|
-
this.actionDescription = actionDescription;
|
|
31
|
+
constructor(maxActionsPerStep = 3, overrideSystemMessage = null, extendSystemMessage = null, useThinking = true, flashMode = false, isAnthropic = false, isBrowserUseModel = false, modelName = null) {
|
|
27
32
|
this.maxActionsPerStep = maxActionsPerStep;
|
|
28
33
|
this.overrideSystemMessage = overrideSystemMessage;
|
|
29
34
|
this.extendSystemMessage = extendSystemMessage;
|
|
30
35
|
this.useThinking = useThinking;
|
|
31
36
|
this.flashMode = flashMode;
|
|
32
|
-
|
|
37
|
+
this.isAnthropic = isAnthropic;
|
|
38
|
+
this.isBrowserUseModel = isBrowserUseModel;
|
|
39
|
+
this.modelName = modelName;
|
|
40
|
+
if (overrideSystemMessage !== null) {
|
|
33
41
|
this.promptTemplate = overrideSystemMessage;
|
|
34
42
|
}
|
|
35
43
|
else {
|
|
@@ -42,17 +50,47 @@ export class SystemPrompt {
|
|
|
42
50
|
this.systemMessage = new SystemMessage(prompt);
|
|
43
51
|
this.systemMessage.cache = true;
|
|
44
52
|
}
|
|
53
|
+
isAnthropic45Model() {
|
|
54
|
+
if (!this.modelName) {
|
|
55
|
+
return false;
|
|
56
|
+
}
|
|
57
|
+
const modelLower = this.modelName.toLowerCase();
|
|
58
|
+
const isOpus45 = modelLower.includes('opus') &&
|
|
59
|
+
(modelLower.includes('4.5') || modelLower.includes('4-5'));
|
|
60
|
+
const isHaiku45 = modelLower.includes('haiku') &&
|
|
61
|
+
(modelLower.includes('4.5') || modelLower.includes('4-5'));
|
|
62
|
+
return isOpus45 || isHaiku45;
|
|
63
|
+
}
|
|
45
64
|
loadPromptTemplate() {
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
65
|
+
let templateName = './system_prompt.md';
|
|
66
|
+
if (this.isBrowserUseModel) {
|
|
67
|
+
if (this.flashMode) {
|
|
68
|
+
templateName = './system_prompt_browser_use_flash.md';
|
|
69
|
+
}
|
|
70
|
+
else if (this.useThinking) {
|
|
71
|
+
templateName = './system_prompt_browser_use.md';
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
templateName = './system_prompt_browser_use_no_thinking.md';
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
else if (this.flashMode && this.isAnthropic45Model()) {
|
|
78
|
+
templateName = './system_prompt_anthropic_flash.md';
|
|
79
|
+
}
|
|
80
|
+
else if (this.flashMode && this.isAnthropic) {
|
|
81
|
+
templateName = './system_prompt_flash_anthropic.md';
|
|
82
|
+
}
|
|
83
|
+
else if (this.flashMode) {
|
|
84
|
+
templateName = './system_prompt_flash.md';
|
|
85
|
+
}
|
|
86
|
+
else if (!this.useThinking) {
|
|
87
|
+
templateName = './system_prompt_no_thinking.md';
|
|
88
|
+
}
|
|
51
89
|
try {
|
|
52
90
|
this.promptTemplate = readPromptTemplate(templateName);
|
|
53
91
|
}
|
|
54
92
|
catch (error) {
|
|
55
|
-
throw new Error(`Failed to load system prompt template: ${error.message}
|
|
93
|
+
throw new Error(`Failed to load system prompt template: ${error.message}`, { cause: error });
|
|
56
94
|
}
|
|
57
95
|
}
|
|
58
96
|
get_system_message() {
|
|
@@ -73,6 +111,12 @@ export class AgentMessagePrompt {
|
|
|
73
111
|
availableFilePaths;
|
|
74
112
|
screenshots;
|
|
75
113
|
visionDetailLevel;
|
|
114
|
+
includeRecentEvents;
|
|
115
|
+
sampleImages;
|
|
116
|
+
readStateImages;
|
|
117
|
+
llmScreenshotSize;
|
|
118
|
+
unavailableSkillsInfo;
|
|
119
|
+
planDescription;
|
|
76
120
|
constructor(init) {
|
|
77
121
|
this.browserState = init.browser_state_summary;
|
|
78
122
|
this.fileSystem = init.file_system;
|
|
@@ -88,48 +132,97 @@ export class AgentMessagePrompt {
|
|
|
88
132
|
this.availableFilePaths = init.available_file_paths ?? null;
|
|
89
133
|
this.screenshots = init.screenshots ?? [];
|
|
90
134
|
this.visionDetailLevel = init.vision_detail_level ?? 'auto';
|
|
135
|
+
this.includeRecentEvents = init.include_recent_events ?? false;
|
|
136
|
+
this.sampleImages = init.sample_images ?? [];
|
|
137
|
+
this.readStateImages = init.read_state_images ?? [];
|
|
138
|
+
this.llmScreenshotSize = init.llm_screenshot_size ?? null;
|
|
139
|
+
this.unavailableSkillsInfo = init.unavailable_skills_info ?? null;
|
|
140
|
+
this.planDescription = init.plan_description ?? null;
|
|
141
|
+
}
|
|
142
|
+
extractPageStatistics() {
|
|
143
|
+
const stats = {
|
|
144
|
+
links: 0,
|
|
145
|
+
iframes: 0,
|
|
146
|
+
shadow_open: 0,
|
|
147
|
+
shadow_closed: 0,
|
|
148
|
+
scroll_containers: 0,
|
|
149
|
+
images: 0,
|
|
150
|
+
interactive_elements: 0,
|
|
151
|
+
total_elements: 0,
|
|
152
|
+
};
|
|
153
|
+
const root = this.browserState.element_tree;
|
|
154
|
+
if (!root) {
|
|
155
|
+
return stats;
|
|
156
|
+
}
|
|
157
|
+
const traverseNode = (node) => {
|
|
158
|
+
stats.total_elements += 1;
|
|
159
|
+
const tag = String(node.tag_name ?? '').toLowerCase();
|
|
160
|
+
if (tag === 'a') {
|
|
161
|
+
stats.links += 1;
|
|
162
|
+
}
|
|
163
|
+
else if (tag === 'iframe' || tag === 'frame') {
|
|
164
|
+
stats.iframes += 1;
|
|
165
|
+
}
|
|
166
|
+
else if (tag === 'img') {
|
|
167
|
+
stats.images += 1;
|
|
168
|
+
}
|
|
169
|
+
if (node.is_interactive) {
|
|
170
|
+
stats.interactive_elements += 1;
|
|
171
|
+
}
|
|
172
|
+
if (node.shadow_root) {
|
|
173
|
+
// The TS DOM snapshot currently tracks presence of a shadow root, but
|
|
174
|
+
// does not expose open-vs-closed mode; count these as open for parity.
|
|
175
|
+
stats.shadow_open += 1;
|
|
176
|
+
}
|
|
177
|
+
for (const child of node.children) {
|
|
178
|
+
if (child instanceof DOMElementNode) {
|
|
179
|
+
traverseNode(child);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
};
|
|
183
|
+
traverseNode(root);
|
|
184
|
+
return stats;
|
|
91
185
|
}
|
|
92
186
|
browserStateDescription() {
|
|
93
|
-
|
|
187
|
+
const pageStats = this.extractPageStatistics();
|
|
188
|
+
let statsText = '<page_stats>';
|
|
189
|
+
if (pageStats.total_elements < 10) {
|
|
190
|
+
statsText += 'Page appears empty (SPA not loaded?) - ';
|
|
191
|
+
}
|
|
192
|
+
statsText += `${pageStats.links} links, ${pageStats.interactive_elements} interactive, ${pageStats.iframes} iframes`;
|
|
193
|
+
if (pageStats.shadow_open > 0 || pageStats.shadow_closed > 0) {
|
|
194
|
+
statsText += `, ${pageStats.shadow_open} shadow(open), ${pageStats.shadow_closed} shadow(closed)`;
|
|
195
|
+
}
|
|
196
|
+
if (pageStats.images > 0) {
|
|
197
|
+
statsText += `, ${pageStats.images} images`;
|
|
198
|
+
}
|
|
199
|
+
statsText += `, ${pageStats.total_elements} total elements`;
|
|
200
|
+
statsText += '</page_stats>\n';
|
|
201
|
+
let elementsText = this.browserState.llm_representation(this.includeAttributes ?? undefined);
|
|
94
202
|
let truncatedText = '';
|
|
95
203
|
if (elementsText.length > this.maxClickableElementsLength) {
|
|
96
204
|
elementsText = elementsText.slice(0, this.maxClickableElementsLength);
|
|
97
205
|
truncatedText = ` (truncated to ${this.maxClickableElementsLength} characters)`;
|
|
98
206
|
}
|
|
99
|
-
|
|
100
|
-
|
|
207
|
+
let hasContentAbove = false;
|
|
208
|
+
let hasContentBelow = false;
|
|
101
209
|
const pi = this.browserState.page_info;
|
|
102
210
|
let pageInfoText = '';
|
|
103
211
|
if (pi) {
|
|
104
212
|
const pagesAbove = pi.viewport_height > 0 ? pi.pixels_above / pi.viewport_height : 0;
|
|
105
213
|
const pagesBelow = pi.viewport_height > 0 ? pi.pixels_below / pi.viewport_height : 0;
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
pageInfoText =
|
|
214
|
+
hasContentAbove = pagesAbove > 0;
|
|
215
|
+
hasContentBelow = pagesBelow > 0;
|
|
216
|
+
pageInfoText = '<page_info>';
|
|
217
|
+
pageInfoText += `${pagesAbove.toFixed(1)} above, `;
|
|
218
|
+
pageInfoText += `${pagesBelow.toFixed(1)} below `;
|
|
219
|
+
pageInfoText += '</page_info>\n';
|
|
109
220
|
}
|
|
110
221
|
if (elementsText) {
|
|
111
|
-
if (hasContentAbove) {
|
|
112
|
-
if (pi) {
|
|
113
|
-
const pagesAbove = pi.viewport_height > 0 ? pi.pixels_above / pi.viewport_height : 0;
|
|
114
|
-
elementsText = `... ${this.browserState.pixels_above} pixels above (${pagesAbove.toFixed(1)} pages) - scroll to see more or extract structured data if you are looking for specific information ...\n${elementsText}`;
|
|
115
|
-
}
|
|
116
|
-
else {
|
|
117
|
-
elementsText = `... ${this.browserState.pixels_above} pixels above - scroll to see more or extract structured data if you are looking for specific information ...\n${elementsText}`;
|
|
118
|
-
}
|
|
119
|
-
}
|
|
120
|
-
else {
|
|
222
|
+
if (!hasContentAbove) {
|
|
121
223
|
elementsText = `[Start of page]\n${elementsText}`;
|
|
122
224
|
}
|
|
123
|
-
if (hasContentBelow) {
|
|
124
|
-
if (pi) {
|
|
125
|
-
const pagesBelow = pi.viewport_height > 0 ? pi.pixels_below / pi.viewport_height : 0;
|
|
126
|
-
elementsText = `${elementsText}\n... ${this.browserState.pixels_below} pixels below (${pagesBelow.toFixed(1)} pages) - scroll to see more or extract structured data if you are looking for specific information ...`;
|
|
127
|
-
}
|
|
128
|
-
else {
|
|
129
|
-
elementsText = `${elementsText}\n... ${this.browserState.pixels_below} pixels below - scroll to see more or extract structured data if you are looking for specific information ...`;
|
|
130
|
-
}
|
|
131
|
-
}
|
|
132
|
-
else {
|
|
225
|
+
if (!hasContentBelow) {
|
|
133
226
|
elementsText = `${elementsText}\n[End of page]`;
|
|
134
227
|
}
|
|
135
228
|
}
|
|
@@ -137,38 +230,54 @@ export class AgentMessagePrompt {
|
|
|
137
230
|
elementsText = 'empty page';
|
|
138
231
|
}
|
|
139
232
|
let tabsText = '';
|
|
233
|
+
const resolveTabIdentifier = (tab) => typeof tab.tab_id === 'string' && tab.tab_id.trim()
|
|
234
|
+
? tab.tab_id.trim().slice(-4)
|
|
235
|
+
: String(tab.page_id);
|
|
140
236
|
const currentTabCandidates = [];
|
|
141
237
|
for (const tab of this.browserState.tabs) {
|
|
142
238
|
if (tab.url === this.browserState.url &&
|
|
143
239
|
tab.title === this.browserState.title) {
|
|
144
|
-
currentTabCandidates.push(tab
|
|
240
|
+
currentTabCandidates.push(resolveTabIdentifier(tab));
|
|
145
241
|
}
|
|
146
242
|
}
|
|
147
243
|
const currentTabId = currentTabCandidates.length === 1 ? currentTabCandidates[0] : null;
|
|
148
244
|
for (const tab of this.browserState.tabs) {
|
|
149
|
-
tabsText += `Tab ${tab
|
|
245
|
+
tabsText += `Tab ${resolveTabIdentifier(tab)}: ${tab.url} - ${tab.title.slice(0, 30)}\n`;
|
|
150
246
|
}
|
|
151
247
|
const currentTabText = currentTabId !== null ? `Current tab: ${currentTabId}` : '';
|
|
152
248
|
const pdfMessage = this.browserState.is_pdf_viewer
|
|
153
|
-
? 'PDF viewer cannot be rendered.
|
|
249
|
+
? 'PDF viewer cannot be rendered. In this page, DO NOT use the extract action as PDF content cannot be rendered. Use the read_file action on the downloaded PDF in available_file_paths to read the full text content.\n\n'
|
|
250
|
+
: '';
|
|
251
|
+
const recentEventsText = this.includeRecentEvents && this.browserState.recent_events
|
|
252
|
+
? `Recent browser events: ${this.browserState.recent_events}\n`
|
|
154
253
|
: '';
|
|
155
|
-
|
|
254
|
+
let closedPopupsText = '';
|
|
255
|
+
if (Array.isArray(this.browserState.closed_popup_messages) &&
|
|
256
|
+
this.browserState.closed_popup_messages.length > 0) {
|
|
257
|
+
closedPopupsText = 'Auto-closed JavaScript dialogs:\n';
|
|
258
|
+
for (const popupMessage of this.browserState.closed_popup_messages) {
|
|
259
|
+
closedPopupsText += ` - ${popupMessage}\n`;
|
|
260
|
+
}
|
|
261
|
+
closedPopupsText += '\n';
|
|
262
|
+
}
|
|
263
|
+
return `${statsText}${currentTabText}
|
|
156
264
|
Available tabs:
|
|
157
265
|
${tabsText}
|
|
158
266
|
${pageInfoText}
|
|
159
|
-
${pdfMessage}Interactive elements
|
|
267
|
+
${recentEventsText}${closedPopupsText}${pdfMessage}Interactive elements${truncatedText}:
|
|
160
268
|
${elementsText}
|
|
161
269
|
`;
|
|
162
270
|
}
|
|
163
271
|
agentStateDescription() {
|
|
164
272
|
const todoContents = this.fileSystem.get_todo_contents();
|
|
165
|
-
const todoText = todoContents ||
|
|
166
|
-
|
|
167
|
-
const
|
|
168
|
-
const
|
|
169
|
-
|
|
273
|
+
const todoText = todoContents || '[empty todo.md, fill it when applicable]';
|
|
274
|
+
const now = new Date();
|
|
275
|
+
const pad = (value) => String(value).padStart(2, '0');
|
|
276
|
+
const dateString = `${now.getFullYear()}-${pad(now.getMonth() + 1)}-${pad(now.getDate())}`;
|
|
277
|
+
let stepInfoDescription = this.stepInfo != null
|
|
278
|
+
? `Step${this.stepInfo.step_number + 1} maximum:${this.stepInfo.max_steps}\n`
|
|
170
279
|
: '';
|
|
171
|
-
|
|
280
|
+
stepInfoDescription += `Today:${dateString}`;
|
|
172
281
|
let agentState = `<user_request>
|
|
173
282
|
${this.task ?? ''}
|
|
174
283
|
</user_request>
|
|
@@ -179,24 +288,47 @@ ${this.fileSystem.describe()}
|
|
|
179
288
|
${todoText}
|
|
180
289
|
</todo_contents>
|
|
181
290
|
`;
|
|
291
|
+
if (this.planDescription) {
|
|
292
|
+
agentState += `<plan>
|
|
293
|
+
${this.planDescription}
|
|
294
|
+
</plan>
|
|
295
|
+
`;
|
|
296
|
+
}
|
|
182
297
|
if (this.sensitiveData) {
|
|
183
|
-
agentState += `<sensitive_data>
|
|
184
|
-
${this.sensitiveData}
|
|
185
|
-
</sensitive_data>
|
|
298
|
+
agentState += `<sensitive_data>${this.sensitiveData}</sensitive_data>
|
|
186
299
|
`;
|
|
187
300
|
}
|
|
188
|
-
agentState += `<step_info>
|
|
189
|
-
${stepInfoDescription}
|
|
190
|
-
</step_info>
|
|
301
|
+
agentState += `<step_info>${stepInfoDescription}</step_info>
|
|
191
302
|
`;
|
|
192
303
|
if (this.availableFilePaths?.length) {
|
|
193
|
-
agentState += `<available_file_paths
|
|
194
|
-
|
|
195
|
-
</available_file_paths>
|
|
304
|
+
agentState += `<available_file_paths>${this.availableFilePaths.join('\n')}
|
|
305
|
+
Use with absolute paths</available_file_paths>
|
|
196
306
|
`;
|
|
197
307
|
}
|
|
198
308
|
return agentState;
|
|
199
309
|
}
|
|
310
|
+
resizeScreenshotForLlm(screenshotB64) {
|
|
311
|
+
if (!this.llmScreenshotSize) {
|
|
312
|
+
return screenshotB64;
|
|
313
|
+
}
|
|
314
|
+
try {
|
|
315
|
+
const [targetWidth, targetHeight] = this.llmScreenshotSize;
|
|
316
|
+
const image = new Image();
|
|
317
|
+
image.src = Buffer.from(screenshotB64, 'base64');
|
|
318
|
+
if (image.width === targetWidth && image.height === targetHeight) {
|
|
319
|
+
return screenshotB64;
|
|
320
|
+
}
|
|
321
|
+
logger.info(`Resizing screenshot from ${image.width}x${image.height} to ${targetWidth}x${targetHeight} for LLM`);
|
|
322
|
+
const canvas = createCanvas(targetWidth, targetHeight);
|
|
323
|
+
const context = canvas.getContext('2d');
|
|
324
|
+
context.drawImage(image, 0, 0, targetWidth, targetHeight);
|
|
325
|
+
return canvas.toBuffer('image/png').toString('base64');
|
|
326
|
+
}
|
|
327
|
+
catch (error) {
|
|
328
|
+
logger.warning(`Failed to resize screenshot: ${error.message}, using original`);
|
|
329
|
+
return screenshotB64;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
200
332
|
// @ts-ignore - Decorator type mismatch with TypeScript strict mode
|
|
201
333
|
get_user_message(use_vision = true) {
|
|
202
334
|
if (is_new_tab_page(this.browserState.url) &&
|
|
@@ -208,6 +340,7 @@ ${this.availableFilePaths.join('\n')}
|
|
|
208
340
|
let stateDescription = `<agent_history>
|
|
209
341
|
${(this.agentHistoryDescription ?? '').trim()}
|
|
210
342
|
</agent_history>
|
|
343
|
+
|
|
211
344
|
`;
|
|
212
345
|
stateDescription += `<agent_state>
|
|
213
346
|
${this.agentStateDescription().trim()}
|
|
@@ -230,17 +363,37 @@ ${this.pageFilteredActions}
|
|
|
230
363
|
</page_specific_actions>
|
|
231
364
|
`;
|
|
232
365
|
}
|
|
233
|
-
if (
|
|
366
|
+
if (this.unavailableSkillsInfo) {
|
|
367
|
+
stateDescription += `\n${this.unavailableSkillsInfo}\n`;
|
|
368
|
+
}
|
|
369
|
+
stateDescription = sanitize_surrogates(stateDescription);
|
|
370
|
+
const hasReadStateImages = this.readStateImages.length > 0;
|
|
371
|
+
if ((use_vision === true && this.screenshots.length > 0) ||
|
|
372
|
+
hasReadStateImages) {
|
|
234
373
|
const parts = [
|
|
235
374
|
new ContentPartTextParam(stateDescription),
|
|
236
375
|
];
|
|
376
|
+
parts.push(...this.sampleImages);
|
|
237
377
|
this.screenshots.forEach((shot, index) => {
|
|
238
378
|
const label = index === this.screenshots.length - 1
|
|
239
379
|
? 'Current screenshot:'
|
|
240
380
|
: 'Previous screenshot:';
|
|
381
|
+
const processedScreenshot = this.resizeScreenshotForLlm(shot);
|
|
241
382
|
parts.push(new ContentPartTextParam(label));
|
|
242
|
-
parts.push(new ContentPartImageParam(new ImageURL(`data:image/png;base64,${
|
|
383
|
+
parts.push(new ContentPartImageParam(new ImageURL(`data:image/png;base64,${processedScreenshot}`, this.visionDetailLevel, 'image/png')));
|
|
243
384
|
});
|
|
385
|
+
for (const imageInfo of this.readStateImages) {
|
|
386
|
+
const imageName = typeof imageInfo.name === 'string' ? imageInfo.name : 'unknown';
|
|
387
|
+
const imageData = typeof imageInfo.data === 'string' ? imageInfo.data : null;
|
|
388
|
+
if (!imageData) {
|
|
389
|
+
continue;
|
|
390
|
+
}
|
|
391
|
+
const mediaType = imageName.toLowerCase().endsWith('.png')
|
|
392
|
+
? 'image/png'
|
|
393
|
+
: 'image/jpeg';
|
|
394
|
+
parts.push(new ContentPartTextParam(`Image from file: ${imageName}`));
|
|
395
|
+
parts.push(new ContentPartImageParam(new ImageURL(`data:${mediaType};base64,${imageData}`, this.visionDetailLevel, mediaType)));
|
|
396
|
+
}
|
|
244
397
|
const message = new UserMessage(parts);
|
|
245
398
|
message.cache = true;
|
|
246
399
|
return message;
|
|
@@ -257,3 +410,65 @@ __decorate([
|
|
|
257
410
|
ignore_output: true,
|
|
258
411
|
})
|
|
259
412
|
], AgentMessagePrompt.prototype, "get_user_message", null);
|
|
413
|
+
export const get_rerun_summary_prompt = (originalTask, totalSteps, successCount, errorCount) => `You are analyzing the completion of a rerun task. Based on the screenshot and execution info, provide a summary.
|
|
414
|
+
|
|
415
|
+
Original task: ${originalTask}
|
|
416
|
+
|
|
417
|
+
Execution statistics:
|
|
418
|
+
- Total steps: ${totalSteps}
|
|
419
|
+
- Successful steps: ${successCount}
|
|
420
|
+
- Failed steps: ${errorCount}
|
|
421
|
+
|
|
422
|
+
Analyze the screenshot to determine:
|
|
423
|
+
1. Whether the task completed successfully
|
|
424
|
+
2. What the final state shows
|
|
425
|
+
3. Overall completion status (complete/partial/failed)
|
|
426
|
+
|
|
427
|
+
Respond with:
|
|
428
|
+
- summary: A clear, concise summary of what happened during the rerun
|
|
429
|
+
- success: Whether the task completed successfully (true/false)
|
|
430
|
+
- completion_status: One of "complete", "partial", or "failed"`;
|
|
431
|
+
export const get_rerun_summary_message = (prompt, screenshotB64 = null) => {
|
|
432
|
+
if (screenshotB64) {
|
|
433
|
+
const parts = [
|
|
434
|
+
new ContentPartTextParam(prompt),
|
|
435
|
+
new ContentPartImageParam(new ImageURL(`data:image/png;base64,${screenshotB64}`)),
|
|
436
|
+
];
|
|
437
|
+
return new UserMessage(parts);
|
|
438
|
+
}
|
|
439
|
+
return new UserMessage(prompt);
|
|
440
|
+
};
|
|
441
|
+
export const get_ai_step_system_prompt = () => `
|
|
442
|
+
You are an expert at extracting data from webpages.
|
|
443
|
+
|
|
444
|
+
<input>
|
|
445
|
+
You will be given:
|
|
446
|
+
1. A query describing what to extract
|
|
447
|
+
2. The markdown of the webpage (filtered to remove noise)
|
|
448
|
+
3. Optionally, a screenshot of the current page state
|
|
449
|
+
</input>
|
|
450
|
+
|
|
451
|
+
<instructions>
|
|
452
|
+
- Extract information from the webpage that is relevant to the query
|
|
453
|
+
- ONLY use the information available in the webpage - do not make up information
|
|
454
|
+
- If the information is not available, mention that clearly
|
|
455
|
+
- If the query asks for all items, list all of them
|
|
456
|
+
</instructions>
|
|
457
|
+
|
|
458
|
+
<output>
|
|
459
|
+
- Present ALL relevant information in a concise way
|
|
460
|
+
- Do not use conversational format - directly output the relevant information
|
|
461
|
+
- If information is unavailable, state that clearly
|
|
462
|
+
</output>
|
|
463
|
+
`.trim();
|
|
464
|
+
export const get_ai_step_user_prompt = (query, statsSummary, content) => `<query>
|
|
465
|
+
${query}
|
|
466
|
+
</query>
|
|
467
|
+
|
|
468
|
+
<content_stats>
|
|
469
|
+
${statsSummary}
|
|
470
|
+
</content_stats>
|
|
471
|
+
|
|
472
|
+
<webpage_content>
|
|
473
|
+
${content}
|
|
474
|
+
</webpage_content>`;
|