camel-ai 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/agents/_utils.py +38 -0
- camel/agents/chat_agent.py +2217 -519
- camel/agents/mcp_agent.py +30 -27
- camel/configs/__init__.py +15 -0
- camel/configs/aihubmix_config.py +88 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/cometapi_config.py +104 -0
- camel/configs/minimax_config.py +93 -0
- camel/configs/nebius_config.py +103 -0
- camel/data_collectors/alpaca_collector.py +15 -6
- camel/datasets/base_generator.py +39 -10
- camel/environments/single_step.py +28 -3
- camel/environments/tic_tac_toe.py +1 -1
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/docker/Dockerfile +3 -12
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/loaders/__init__.py +11 -2
- camel/loaders/chunkr_reader.py +9 -0
- camel/memories/agent_memories.py +48 -4
- camel/memories/base.py +26 -0
- camel/memories/blocks/chat_history_block.py +122 -4
- camel/memories/context_creators/score_based.py +25 -384
- camel/memories/records.py +88 -8
- camel/messages/base.py +153 -34
- camel/models/__init__.py +10 -0
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +1 -16
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +6 -19
- camel/models/aws_bedrock_model.py +2 -33
- camel/models/azure_openai_model.py +114 -89
- camel/models/base_audio_model.py +3 -1
- camel/models/base_model.py +32 -14
- camel/models/cohere_model.py +1 -16
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +1 -16
- camel/models/fish_audio_model.py +6 -0
- camel/models/gemini_model.py +36 -18
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +1 -16
- camel/models/lmstudio_model.py +1 -17
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +1 -16
- camel/models/model_factory.py +27 -1
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +105 -24
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +4 -19
- camel/models/openai_compatible_model.py +62 -41
- camel/models/openai_model.py +62 -57
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +34 -47
- camel/models/sglang_model.py +64 -31
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +60 -16
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/retrievers/auto_retriever.py +1 -0
- camel/runtimes/daytona_runtime.py +11 -12
- camel/societies/__init__.py +2 -0
- camel/societies/workforce/__init__.py +2 -0
- camel/societies/workforce/events.py +122 -0
- camel/societies/workforce/prompts.py +146 -66
- camel/societies/workforce/role_playing_worker.py +15 -11
- camel/societies/workforce/single_agent_worker.py +302 -65
- camel/societies/workforce/structured_output_handler.py +30 -18
- camel/societies/workforce/task_channel.py +163 -27
- camel/societies/workforce/utils.py +107 -13
- camel/societies/workforce/workflow_memory_manager.py +772 -0
- camel/societies/workforce/workforce.py +1949 -579
- camel/societies/workforce/workforce_callback.py +74 -0
- camel/societies/workforce/workforce_logger.py +168 -145
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/key_value_storages/json.py +15 -2
- camel/storages/key_value_storages/mem0_cloud.py +48 -47
- camel/storages/object_storages/google_cloud.py +1 -1
- camel/storages/vectordb_storages/oceanbase.py +13 -13
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/storages/vectordb_storages/tidb.py +8 -6
- camel/tasks/task.py +4 -3
- camel/toolkits/__init__.py +20 -7
- camel/toolkits/aci_toolkit.py +45 -0
- camel/toolkits/base.py +6 -4
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/dappier_toolkit.py +5 -1
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
- camel/toolkits/excel_toolkit.py +1 -1
- camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +430 -36
- camel/toolkits/function_tool.py +13 -3
- camel/toolkits/github_toolkit.py +104 -17
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +38 -4
- camel/toolkits/google_drive_mcp_toolkit.py +12 -31
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +15 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +77 -8
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +884 -88
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +959 -89
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +9 -2
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +281 -213
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +23 -3
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +72 -7
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -132
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +321 -8
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +151 -53
- camel/toolkits/klavis_toolkit.py +5 -1
- camel/toolkits/markitdown_toolkit.py +27 -1
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +366 -71
- camel/toolkits/memory_toolkit.py +5 -1
- camel/toolkits/message_integration.py +18 -13
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/note_taking_toolkit.py +19 -10
- camel/toolkits/notion_mcp_toolkit.py +16 -26
- camel/toolkits/openbb_toolkit.py +5 -1
- camel/toolkits/origene_mcp_toolkit.py +8 -49
- camel/toolkits/playwright_mcp_toolkit.py +12 -31
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/search_toolkit.py +264 -91
- camel/toolkits/slack_toolkit.py +64 -10
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +17 -11
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/zapier_toolkit.py +5 -1
- camel/types/__init__.py +2 -2
- camel/types/enums.py +274 -7
- camel/types/openai_types.py +2 -2
- camel/types/unified_model_type.py +15 -0
- camel/utils/commons.py +36 -5
- camel/utils/constants.py +3 -0
- camel/utils/context_utils.py +1003 -0
- camel/utils/mcp.py +138 -4
- camel/utils/token_counting.py +43 -20
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +223 -83
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +170 -141
- camel/loaders/pandas_reader.py +0 -368
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1550
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
|
@@ -30,6 +30,7 @@ export interface BrowserConfig {
|
|
|
30
30
|
// Tab management
|
|
31
31
|
tabIdPrefix: string;
|
|
32
32
|
tabCounterPadding: number;
|
|
33
|
+
consoleLogLimit: number;
|
|
33
34
|
|
|
34
35
|
// Scroll and positioning
|
|
35
36
|
scrollPositionScale: number;
|
|
@@ -72,12 +73,14 @@ export interface BrowserConfig {
|
|
|
72
73
|
// CDP connection options
|
|
73
74
|
connectOverCdp: boolean;
|
|
74
75
|
cdpUrl?: string;
|
|
76
|
+
cdpKeepCurrentPage: boolean;
|
|
75
77
|
}
|
|
76
78
|
|
|
77
79
|
export interface WebSocketConfig {
|
|
78
80
|
browser_log_to_file: boolean;
|
|
79
81
|
session_id?: string;
|
|
80
82
|
viewport_limit: boolean;
|
|
83
|
+
fullVisualMode?: boolean;
|
|
81
84
|
}
|
|
82
85
|
|
|
83
86
|
// Default stealth configuration
|
|
@@ -113,9 +116,10 @@ function getDefaultBrowserConfig(): BrowserConfig {
|
|
|
113
116
|
clickTimeout: 3000,
|
|
114
117
|
tabIdPrefix: 'tab-',
|
|
115
118
|
tabCounterPadding: 3,
|
|
119
|
+
consoleLogLimit: 1000,
|
|
116
120
|
scrollPositionScale: 0.1,
|
|
117
121
|
navigationDelay: 100,
|
|
118
|
-
blankPageUrls: ['
|
|
122
|
+
blankPageUrls: ['chrome://newtab/', 'edge://newtab/', 'chrome://new-tab-page/'],
|
|
119
123
|
dataUrlPrefix: 'data:',
|
|
120
124
|
domContentLoadedState: 'domcontentloaded',
|
|
121
125
|
networkIdleState: 'networkidle',
|
|
@@ -136,7 +140,8 @@ function getDefaultBrowserConfig(): BrowserConfig {
|
|
|
136
140
|
height: 720
|
|
137
141
|
},
|
|
138
142
|
connectOverCdp: false,
|
|
139
|
-
cdpUrl: undefined
|
|
143
|
+
cdpUrl: undefined,
|
|
144
|
+
cdpKeepCurrentPage: false
|
|
140
145
|
};
|
|
141
146
|
}
|
|
142
147
|
|
|
@@ -210,10 +215,12 @@ export class ConfigLoader {
|
|
|
210
215
|
if (config.browser_log_to_file !== undefined) wsConfig.browser_log_to_file = config.browser_log_to_file;
|
|
211
216
|
if (config.session_id !== undefined) wsConfig.session_id = config.session_id;
|
|
212
217
|
if (config.viewport_limit !== undefined) wsConfig.viewport_limit = config.viewport_limit;
|
|
218
|
+
if (config.fullVisualMode !== undefined) wsConfig.fullVisualMode = config.fullVisualMode;
|
|
213
219
|
|
|
214
220
|
// CDP connection options
|
|
215
221
|
if (config.connectOverCdp !== undefined) browserConfig.connectOverCdp = config.connectOverCdp;
|
|
216
222
|
if (config.cdpUrl !== undefined) browserConfig.cdpUrl = config.cdpUrl;
|
|
223
|
+
if (config.cdpKeepCurrentPage !== undefined) browserConfig.cdpKeepCurrentPage = config.cdpKeepCurrentPage;
|
|
217
224
|
|
|
218
225
|
return new ConfigLoader(browserConfig, wsConfig);
|
|
219
226
|
}
|
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
import {HybridBrowserSession} from './browser-session';
|
|
2
2
|
import {ActionResult, BrowserAction, BrowserToolkitConfig, SnapshotResult, TabInfo, VisualMarkResult} from './types';
|
|
3
3
|
import {ConfigLoader} from './config-loader';
|
|
4
|
+
import {ConsoleMessage} from 'playwright';
|
|
5
|
+
import {SomScreenshotInjected} from './som-screenshot-injected';
|
|
6
|
+
import {filterClickableByHierarchy} from './snapshot-parser';
|
|
4
7
|
|
|
5
8
|
export class HybridBrowserToolkit {
|
|
6
9
|
private session: HybridBrowserSession;
|
|
7
10
|
private config: BrowserToolkitConfig;
|
|
8
11
|
private configLoader: ConfigLoader;
|
|
9
12
|
private viewportLimit: boolean;
|
|
13
|
+
private fullVisualMode: boolean;
|
|
10
14
|
|
|
11
15
|
constructor(config: BrowserToolkitConfig = {}) {
|
|
12
16
|
this.configLoader = ConfigLoader.fromPythonConfig(config);
|
|
13
17
|
this.config = config; // Store original config for backward compatibility
|
|
14
|
-
this.session = new HybridBrowserSession(
|
|
18
|
+
this.session = new HybridBrowserSession(config); // Pass original config
|
|
15
19
|
this.viewportLimit = this.configLoader.getWebSocketConfig().viewport_limit;
|
|
20
|
+
this.fullVisualMode = this.configLoader.getWebSocketConfig().fullVisualMode || false;
|
|
16
21
|
}
|
|
17
22
|
|
|
18
23
|
async openBrowser(startUrl?: string): Promise<ActionResult> {
|
|
@@ -21,22 +26,66 @@ export class HybridBrowserToolkit {
|
|
|
21
26
|
try {
|
|
22
27
|
await this.session.ensureBrowser();
|
|
23
28
|
|
|
24
|
-
|
|
25
|
-
const
|
|
29
|
+
// Check if we should skip navigation in CDP keep-current-page mode
|
|
30
|
+
const browserConfig = this.configLoader.getBrowserConfig();
|
|
31
|
+
if (browserConfig.cdpUrl && browserConfig.cdpKeepCurrentPage && !startUrl) {
|
|
32
|
+
// In CDP keep-current-page mode without explicit URL, just ensure browser and return current page
|
|
33
|
+
const snapshotStart = Date.now();
|
|
34
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
35
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
36
|
+
|
|
37
|
+
const page = await this.session.getCurrentPage();
|
|
38
|
+
const currentUrl = page ? await page.url() : 'unknown';
|
|
39
|
+
|
|
40
|
+
const totalTime = Date.now() - startTime;
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
success: true,
|
|
44
|
+
message: `Browser opened in CDP keep-current-page mode (current page: ${currentUrl})`,
|
|
45
|
+
snapshot,
|
|
46
|
+
timing: {
|
|
47
|
+
total_time_ms: totalTime,
|
|
48
|
+
snapshot_time_ms: snapshotTime,
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
}
|
|
26
52
|
|
|
53
|
+
// For normal mode or CDP with cdpKeepCurrentPage=false: navigate to URL
|
|
54
|
+
if (!browserConfig.cdpUrl || !browserConfig.cdpKeepCurrentPage) {
|
|
55
|
+
const url = startUrl || this.config.defaultStartUrl || 'https://google.com/';
|
|
56
|
+
const result = await this.session.visitPage(url);
|
|
57
|
+
|
|
58
|
+
const snapshotStart = Date.now();
|
|
59
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
60
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
61
|
+
|
|
62
|
+
const totalTime = Date.now() - startTime;
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
success: true,
|
|
66
|
+
message: result.message,
|
|
67
|
+
snapshot,
|
|
68
|
+
timing: {
|
|
69
|
+
total_time_ms: totalTime,
|
|
70
|
+
page_load_time_ms: result.timing?.page_load_time_ms || 0,
|
|
71
|
+
snapshot_time_ms: snapshotTime,
|
|
72
|
+
},
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Fallback: Just return current page snapshot without any navigation
|
|
27
77
|
const snapshotStart = Date.now();
|
|
28
|
-
const snapshot = await this.
|
|
78
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
29
79
|
const snapshotTime = Date.now() - snapshotStart;
|
|
30
80
|
|
|
31
81
|
const totalTime = Date.now() - startTime;
|
|
32
82
|
|
|
33
83
|
return {
|
|
34
84
|
success: true,
|
|
35
|
-
message: `Browser opened
|
|
85
|
+
message: `Browser opened without navigation`,
|
|
36
86
|
snapshot,
|
|
37
87
|
timing: {
|
|
38
88
|
total_time_ms: totalTime,
|
|
39
|
-
...result.timing,
|
|
40
89
|
snapshot_time_ms: snapshotTime,
|
|
41
90
|
},
|
|
42
91
|
};
|
|
@@ -68,39 +117,57 @@ export class HybridBrowserToolkit {
|
|
|
68
117
|
}
|
|
69
118
|
|
|
70
119
|
async visitPage(url: string): Promise<any> {
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
result
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
const snapshotTime = Date.now() - snapshotStart;
|
|
120
|
+
try {
|
|
121
|
+
// Ensure browser is initialized before visiting page
|
|
122
|
+
await this.session.ensureBrowser();
|
|
123
|
+
|
|
124
|
+
const result = await this.session.visitPage(url);
|
|
125
|
+
|
|
126
|
+
// Format response for Python layer compatibility
|
|
127
|
+
const response: any = {
|
|
128
|
+
result: result.message,
|
|
129
|
+
snapshot: '',
|
|
130
|
+
};
|
|
83
131
|
|
|
132
|
+
if (result.success) {
|
|
133
|
+
const snapshotStart = Date.now();
|
|
134
|
+
response.snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
135
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
136
|
+
|
|
137
|
+
if (result.timing) {
|
|
138
|
+
result.timing.snapshot_time_ms = snapshotTime;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Include timing if available
|
|
84
143
|
if (result.timing) {
|
|
85
|
-
|
|
144
|
+
response.timing = result.timing;
|
|
86
145
|
}
|
|
146
|
+
|
|
147
|
+
// Include newTabId if present
|
|
148
|
+
if (result.newTabId) {
|
|
149
|
+
response.newTabId = result.newTabId;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
return response;
|
|
153
|
+
} catch (error) {
|
|
154
|
+
console.error('[visitPage] Error:', error);
|
|
155
|
+
return {
|
|
156
|
+
result: `Navigation to ${url} failed: ${error}`,
|
|
157
|
+
snapshot: '',
|
|
158
|
+
timing: {
|
|
159
|
+
total_time_ms: 0,
|
|
160
|
+
navigation_time_ms: 0,
|
|
161
|
+
dom_content_loaded_time_ms: 0,
|
|
162
|
+
network_idle_time_ms: 0,
|
|
163
|
+
}
|
|
164
|
+
};
|
|
87
165
|
}
|
|
88
|
-
|
|
89
|
-
// Include timing if available
|
|
90
|
-
if (result.timing) {
|
|
91
|
-
response.timing = result.timing;
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
// Include newTabId if present
|
|
95
|
-
if (result.newTabId) {
|
|
96
|
-
response.newTabId = result.newTabId;
|
|
97
|
-
}
|
|
98
|
-
|
|
99
|
-
return response;
|
|
100
166
|
}
|
|
101
167
|
|
|
102
168
|
async getPageSnapshot(viewportLimit: boolean = false): Promise<string> {
|
|
103
169
|
try {
|
|
170
|
+
// Always return real snapshot when explicitly called
|
|
104
171
|
// If viewport limiting is enabled, we need coordinates for filtering
|
|
105
172
|
const snapshotResult = await this.session.getSnapshotForAI(viewportLimit, viewportLimit);
|
|
106
173
|
return snapshotResult.snapshot;
|
|
@@ -108,6 +175,14 @@ export class HybridBrowserToolkit {
|
|
|
108
175
|
return `Error capturing snapshot: ${error}`;
|
|
109
176
|
}
|
|
110
177
|
}
|
|
178
|
+
|
|
179
|
+
// Internal method for getting snapshot in actions (respects fullVisualMode)
|
|
180
|
+
private async getSnapshotForAction(viewportLimit: boolean = false): Promise<string> {
|
|
181
|
+
if (this.fullVisualMode) {
|
|
182
|
+
return 'full visual mode';
|
|
183
|
+
}
|
|
184
|
+
return this.getPageSnapshot(viewportLimit);
|
|
185
|
+
}
|
|
111
186
|
|
|
112
187
|
|
|
113
188
|
async getSnapshotForAI(): Promise<SnapshotResult> {
|
|
@@ -116,35 +191,34 @@ export class HybridBrowserToolkit {
|
|
|
116
191
|
|
|
117
192
|
async getSomScreenshot(): Promise<VisualMarkResult & { timing: any }> {
|
|
118
193
|
const startTime = Date.now();
|
|
194
|
+
console.log('[HybridBrowserToolkit] Starting getSomScreenshot...');
|
|
119
195
|
|
|
120
196
|
try {
|
|
121
|
-
|
|
122
|
-
const
|
|
123
|
-
|
|
124
|
-
// Add visual marks using improved method
|
|
125
|
-
const markingStart = Date.now();
|
|
126
|
-
const markedImageBuffer = await this.addVisualMarksOptimized(screenshotResult.buffer, snapshotResult);
|
|
127
|
-
const markingTime = Date.now() - markingStart;
|
|
128
|
-
|
|
129
|
-
const base64Image = markedImageBuffer.toString('base64');
|
|
130
|
-
const dataUrl = `data:image/png;base64,${base64Image}`;
|
|
131
|
-
|
|
132
|
-
const totalTime = Date.now() - startTime;
|
|
197
|
+
// Get page and snapshot data
|
|
198
|
+
const page = await this.session.getCurrentPage();
|
|
199
|
+
const snapshotResult = await this.session.getSnapshotForAI(true); // Include coordinates
|
|
133
200
|
|
|
134
|
-
//
|
|
135
|
-
const
|
|
201
|
+
// Parse clickable elements from snapshot text
|
|
202
|
+
const clickableElements = this.parseClickableElements(snapshotResult.snapshot);
|
|
203
|
+
console.log(`[HybridBrowserToolkit] Found ${clickableElements.size} clickable elements`);
|
|
136
204
|
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
205
|
+
// Apply hierarchy-based filtering
|
|
206
|
+
const filteredElements = filterClickableByHierarchy(snapshotResult.snapshot, clickableElements);
|
|
207
|
+
console.log(`[HybridBrowserToolkit] After filtering: ${filteredElements.size} elements remain`);
|
|
208
|
+
|
|
209
|
+
// Use injected SOM-screenshot method without export path
|
|
210
|
+
const result = await SomScreenshotInjected.captureOptimized(
|
|
211
|
+
page,
|
|
212
|
+
snapshotResult,
|
|
213
|
+
filteredElements,
|
|
214
|
+
undefined // No export path - don't generate files
|
|
215
|
+
);
|
|
216
|
+
|
|
217
|
+
// Add snapshot timing info to result
|
|
218
|
+
result.timing.snapshot_time_ms = snapshotResult.timing.snapshot_time_ms;
|
|
219
|
+
result.timing.coordinate_enrichment_time_ms = snapshotResult.timing.coordinate_enrichment_time_ms;
|
|
220
|
+
|
|
221
|
+
return result;
|
|
148
222
|
} catch (error) {
|
|
149
223
|
const totalTime = Date.now() - startTime;
|
|
150
224
|
return {
|
|
@@ -161,98 +235,6 @@ export class HybridBrowserToolkit {
|
|
|
161
235
|
}
|
|
162
236
|
}
|
|
163
237
|
|
|
164
|
-
private async addVisualMarksOptimized(screenshotBuffer: Buffer, snapshotResult: SnapshotResult): Promise<Buffer> {
|
|
165
|
-
try {
|
|
166
|
-
|
|
167
|
-
// Check if we have any elements with coordinates
|
|
168
|
-
const elementsWithCoords = Object.entries(snapshotResult.elements)
|
|
169
|
-
.filter(([ref, element]) => element.coordinates);
|
|
170
|
-
|
|
171
|
-
if (elementsWithCoords.length === 0) {
|
|
172
|
-
return screenshotBuffer;
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
// Parse clickable elements from snapshot text
|
|
176
|
-
const clickableElements = this.parseClickableElements(snapshotResult.snapshot);
|
|
177
|
-
|
|
178
|
-
// Use sharp for image processing
|
|
179
|
-
const sharp = require('sharp');
|
|
180
|
-
const page = await this.session.getCurrentPage();
|
|
181
|
-
const viewport = page.viewportSize() || { width: 1280, height: 720 };
|
|
182
|
-
|
|
183
|
-
// Filter elements visible in viewport
|
|
184
|
-
const visibleElements = elementsWithCoords.filter(([ref, element]) => {
|
|
185
|
-
const coords = element.coordinates!;
|
|
186
|
-
return coords.x < viewport.width &&
|
|
187
|
-
coords.y < viewport.height &&
|
|
188
|
-
coords.x + coords.width > 0 &&
|
|
189
|
-
coords.y + coords.height > 0;
|
|
190
|
-
});
|
|
191
|
-
|
|
192
|
-
// Remove overlapped elements (only keep topmost)
|
|
193
|
-
const nonOverlappedElements = this.removeOverlappedElements(visibleElements);
|
|
194
|
-
|
|
195
|
-
// Create SVG overlay with all the marks
|
|
196
|
-
const marks = nonOverlappedElements.map(([ref, element]) => {
|
|
197
|
-
const coords = element.coordinates!;
|
|
198
|
-
const isClickable = clickableElements.has(ref);
|
|
199
|
-
|
|
200
|
-
// Use original coordinates for elements within viewport
|
|
201
|
-
// Clamp only to prevent marks from extending beyond screenshot bounds
|
|
202
|
-
const x = Math.max(0, coords.x);
|
|
203
|
-
const y = Math.max(0, coords.y);
|
|
204
|
-
const maxWidth = viewport.width - x;
|
|
205
|
-
const maxHeight = viewport.height - y;
|
|
206
|
-
const width = Math.min(coords.width, maxWidth);
|
|
207
|
-
const height = Math.min(coords.height, maxHeight);
|
|
208
|
-
|
|
209
|
-
// Position text to be visible even if element is partially cut off
|
|
210
|
-
const textX = Math.max(2, Math.min(x + 2, viewport.width - 40));
|
|
211
|
-
const textY = Math.max(14, Math.min(y + 14, viewport.height - 4));
|
|
212
|
-
|
|
213
|
-
// Different colors for clickable vs non-clickable elements
|
|
214
|
-
const colors = isClickable ? {
|
|
215
|
-
fill: 'rgba(0, 150, 255, 0.15)', // Blue for clickable
|
|
216
|
-
stroke: '#0096FF',
|
|
217
|
-
textFill: '#0096FF'
|
|
218
|
-
} : {
|
|
219
|
-
fill: 'rgba(255, 107, 107, 0.1)', // Red for non-clickable
|
|
220
|
-
stroke: '#FF6B6B',
|
|
221
|
-
textFill: '#FF6B6B'
|
|
222
|
-
};
|
|
223
|
-
|
|
224
|
-
return `
|
|
225
|
-
<rect x="${x}" y="${y}" width="${width}" height="${height}"
|
|
226
|
-
fill="${colors.fill}" stroke="${colors.stroke}" stroke-width="2" rx="2"/>
|
|
227
|
-
<text x="${textX}" y="${textY}" font-family="Arial, sans-serif"
|
|
228
|
-
font-size="12" fill="${colors.textFill}" font-weight="bold">${ref}</text>
|
|
229
|
-
`;
|
|
230
|
-
}).join('');
|
|
231
|
-
|
|
232
|
-
const svgOverlay = `
|
|
233
|
-
<svg width="${viewport.width}" height="${viewport.height}" xmlns="http://www.w3.org/2000/svg">
|
|
234
|
-
${marks}
|
|
235
|
-
</svg>
|
|
236
|
-
`;
|
|
237
|
-
|
|
238
|
-
// Composite the overlay onto the screenshot
|
|
239
|
-
const markedImageBuffer = await sharp(screenshotBuffer)
|
|
240
|
-
.composite([{
|
|
241
|
-
input: Buffer.from(svgOverlay),
|
|
242
|
-
top: 0,
|
|
243
|
-
left: 0
|
|
244
|
-
}])
|
|
245
|
-
.png()
|
|
246
|
-
.toBuffer();
|
|
247
|
-
|
|
248
|
-
return markedImageBuffer;
|
|
249
|
-
|
|
250
|
-
} catch (error) {
|
|
251
|
-
// Error adding visual marks, falling back to original screenshot
|
|
252
|
-
// Return original screenshot if marking fails
|
|
253
|
-
return screenshotBuffer;
|
|
254
|
-
}
|
|
255
|
-
}
|
|
256
238
|
|
|
257
239
|
/**
|
|
258
240
|
* Parse clickable elements from snapshot text
|
|
@@ -262,8 +244,8 @@ export class HybridBrowserToolkit {
|
|
|
262
244
|
const lines = snapshotText.split('\n');
|
|
263
245
|
|
|
264
246
|
for (const line of lines) {
|
|
265
|
-
// Look for lines containing [cursor=pointer] and extract ref
|
|
266
|
-
if (line.includes('[cursor=pointer]')) {
|
|
247
|
+
// Look for lines containing [cursor=pointer] or [active] and extract ref
|
|
248
|
+
if (line.includes('[cursor=pointer]') || line.includes('[active]')) {
|
|
267
249
|
const refMatch = line.match(/\[ref=([^\]]+)\]/);
|
|
268
250
|
if (refMatch) {
|
|
269
251
|
clickableElements.add(refMatch[1]);
|
|
@@ -274,73 +256,31 @@ export class HybridBrowserToolkit {
|
|
|
274
256
|
return clickableElements;
|
|
275
257
|
}
|
|
276
258
|
|
|
277
|
-
/**
|
|
278
|
-
* Remove overlapped elements, keeping only the topmost (last in DOM order)
|
|
279
|
-
*/
|
|
280
|
-
private removeOverlappedElements(elements: Array<[string, any]>): Array<[string, any]> {
|
|
281
|
-
const result: Array<[string, any]> = [];
|
|
282
|
-
|
|
283
|
-
for (let i = 0; i < elements.length; i++) {
|
|
284
|
-
const [refA, elementA] = elements[i];
|
|
285
|
-
const coordsA = elementA.coordinates!;
|
|
286
|
-
let isOverlapped = false;
|
|
287
|
-
|
|
288
|
-
// Check if this element is completely overlapped by any later element
|
|
289
|
-
for (let j = i + 1; j < elements.length; j++) {
|
|
290
|
-
const [refB, elementB] = elements[j];
|
|
291
|
-
const coordsB = elementB.coordinates!;
|
|
292
|
-
|
|
293
|
-
// Check if element A is completely covered by element B
|
|
294
|
-
if (this.isCompletelyOverlapped(coordsA, coordsB)) {
|
|
295
|
-
isOverlapped = true;
|
|
296
|
-
break;
|
|
297
|
-
}
|
|
298
|
-
}
|
|
299
|
-
|
|
300
|
-
if (!isOverlapped) {
|
|
301
|
-
result.push(elements[i]);
|
|
302
|
-
}
|
|
303
|
-
}
|
|
304
|
-
|
|
305
|
-
return result;
|
|
306
|
-
}
|
|
307
|
-
|
|
308
|
-
/**
|
|
309
|
-
* Check if element A is completely overlapped by element B
|
|
310
|
-
*/
|
|
311
|
-
private isCompletelyOverlapped(
|
|
312
|
-
coordsA: { x: number; y: number; width: number; height: number },
|
|
313
|
-
coordsB: { x: number; y: number; width: number; height: number }
|
|
314
|
-
): boolean {
|
|
315
|
-
// A is completely overlapped by B if:
|
|
316
|
-
// B's left edge is <= A's left edge AND
|
|
317
|
-
// B's top edge is <= A's top edge AND
|
|
318
|
-
// B's right edge is >= A's right edge AND
|
|
319
|
-
// B's bottom edge is >= A's bottom edge
|
|
320
|
-
return (
|
|
321
|
-
coordsB.x <= coordsA.x &&
|
|
322
|
-
coordsB.y <= coordsA.y &&
|
|
323
|
-
coordsB.x + coordsB.width >= coordsA.x + coordsA.width &&
|
|
324
|
-
coordsB.y + coordsB.height >= coordsA.y + coordsA.height
|
|
325
|
-
);
|
|
326
|
-
}
|
|
327
259
|
|
|
328
260
|
private async executeActionWithSnapshot(action: BrowserAction): Promise<any> {
|
|
329
261
|
const result = await this.session.executeAction(action);
|
|
330
262
|
|
|
331
|
-
// Format response for Python layer compatibility
|
|
332
263
|
const response: any = {
|
|
333
264
|
result: result.message,
|
|
334
265
|
snapshot: '',
|
|
335
266
|
};
|
|
336
267
|
|
|
337
268
|
if (result.success) {
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
269
|
+
if (result.details?.diffSnapshot) {
|
|
270
|
+
response.snapshot = result.details.diffSnapshot;
|
|
271
|
+
|
|
272
|
+
if (result.timing) {
|
|
273
|
+
result.timing.snapshot_time_ms = 0; // Diff snapshot time is included in action time
|
|
274
|
+
}
|
|
275
|
+
} else {
|
|
276
|
+
// Get full snapshot as usual
|
|
277
|
+
const snapshotStart = Date.now();
|
|
278
|
+
response.snapshot = await this.getPageSnapshot(this.viewportLimit);
|
|
279
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
280
|
+
|
|
281
|
+
if (result.timing) {
|
|
282
|
+
result.timing.snapshot_time_ms = snapshotTime;
|
|
283
|
+
}
|
|
344
284
|
}
|
|
345
285
|
}
|
|
346
286
|
|
|
@@ -354,6 +294,14 @@ export class HybridBrowserToolkit {
|
|
|
354
294
|
response.newTabId = result.newTabId;
|
|
355
295
|
}
|
|
356
296
|
|
|
297
|
+
// Include details if present (excluding diffSnapshot as it's already in snapshot)
|
|
298
|
+
if (result.details) {
|
|
299
|
+
const { diffSnapshot, ...otherDetails } = result.details;
|
|
300
|
+
if (Object.keys(otherDetails).length > 0) {
|
|
301
|
+
response.details = otherDetails;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
357
305
|
return response;
|
|
358
306
|
}
|
|
359
307
|
|
|
@@ -362,8 +310,20 @@ export class HybridBrowserToolkit {
|
|
|
362
310
|
return this.executeActionWithSnapshot(action);
|
|
363
311
|
}
|
|
364
312
|
|
|
365
|
-
async type(ref: string
|
|
366
|
-
|
|
313
|
+
async type(refOrInputs: string | Array<{ ref: string; text: string }>, text?: string): Promise<any> {
|
|
314
|
+
let action: BrowserAction;
|
|
315
|
+
|
|
316
|
+
if (typeof refOrInputs === 'string') {
|
|
317
|
+
// Single input mode (backward compatibility)
|
|
318
|
+
if (text === undefined) {
|
|
319
|
+
throw new Error('Text parameter is required when ref is a string');
|
|
320
|
+
}
|
|
321
|
+
action = { type: 'type', ref: refOrInputs, text };
|
|
322
|
+
} else {
|
|
323
|
+
// Multiple inputs mode
|
|
324
|
+
action = { type: 'type', inputs: refOrInputs };
|
|
325
|
+
}
|
|
326
|
+
|
|
367
327
|
return this.executeActionWithSnapshot(action);
|
|
368
328
|
}
|
|
369
329
|
|
|
@@ -382,6 +342,25 @@ export class HybridBrowserToolkit {
|
|
|
382
342
|
return this.executeActionWithSnapshot(action);
|
|
383
343
|
}
|
|
384
344
|
|
|
345
|
+
async mouseControl(control: 'click' | 'right_click'| 'dblclick', x: number, y: number): Promise<any> {
|
|
346
|
+
const action: BrowserAction = { type: 'mouse_control', control, x, y };
|
|
347
|
+
return this.executeActionWithSnapshot(action);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
async mouseDrag(from_ref: string, to_ref: string): Promise<any> {
|
|
351
|
+
const action: BrowserAction = { type: 'mouse_drag', from_ref, to_ref };
|
|
352
|
+
return this.executeActionWithSnapshot(action);
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
async pressKeys(keys: string[]): Promise<any> {
|
|
356
|
+
const action: BrowserAction = { type: 'press_key', keys};
|
|
357
|
+
return this.executeActionWithSnapshot(action);
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
async batchKeyboardInput(operations: Array<{type: string, keys?: string[], text?: string, delay?: number}>, skipStabilityWait: boolean = true): Promise<any> {
|
|
361
|
+
return this.session.batchKeyboardInput(operations, skipStabilityWait);
|
|
362
|
+
}
|
|
363
|
+
|
|
385
364
|
async back(): Promise<ActionResult> {
|
|
386
365
|
const startTime = Date.now();
|
|
387
366
|
|
|
@@ -393,7 +372,7 @@ export class HybridBrowserToolkit {
|
|
|
393
372
|
const navigationTime = Date.now() - navigationStart;
|
|
394
373
|
|
|
395
374
|
const snapshotStart = Date.now();
|
|
396
|
-
const snapshot = await this.
|
|
375
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
397
376
|
const snapshotTime = Date.now() - snapshotStart;
|
|
398
377
|
|
|
399
378
|
const totalTime = Date.now() - startTime;
|
|
@@ -433,7 +412,7 @@ export class HybridBrowserToolkit {
|
|
|
433
412
|
const navigationTime = Date.now() - navigationStart;
|
|
434
413
|
|
|
435
414
|
const snapshotStart = Date.now();
|
|
436
|
-
const snapshot = await this.
|
|
415
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
437
416
|
const snapshotTime = Date.now() - snapshotStart;
|
|
438
417
|
|
|
439
418
|
const totalTime = Date.now() - startTime;
|
|
@@ -505,7 +484,7 @@ export class HybridBrowserToolkit {
|
|
|
505
484
|
return {
|
|
506
485
|
success: true,
|
|
507
486
|
message: `Closed tab ${tabId}`,
|
|
508
|
-
snapshot: await this.
|
|
487
|
+
snapshot: await this.getSnapshotForAction(this.viewportLimit),
|
|
509
488
|
};
|
|
510
489
|
} else {
|
|
511
490
|
return {
|
|
@@ -519,4 +498,93 @@ export class HybridBrowserToolkit {
|
|
|
519
498
|
return await this.session.getTabInfo();
|
|
520
499
|
}
|
|
521
500
|
|
|
522
|
-
|
|
501
|
+
async getConsoleView(): Promise<any> {
|
|
502
|
+
const currentLogs = await this.session.getCurrentLogs();
|
|
503
|
+
// Format logs
|
|
504
|
+
return currentLogs.map(item => ({
|
|
505
|
+
type: item.type(),
|
|
506
|
+
text: item.text(),
|
|
507
|
+
}));
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
async consoleExecute(code: string): Promise<any> {
|
|
511
|
+
const startTime = Date.now();
|
|
512
|
+
try {
|
|
513
|
+
const page = await this.session.getCurrentPage();
|
|
514
|
+
|
|
515
|
+
// Wrap the code to capture console.log output
|
|
516
|
+
const wrappedCode = `
|
|
517
|
+
(function() {
|
|
518
|
+
const _logs = [];
|
|
519
|
+
const originalLog = console.log;
|
|
520
|
+
console.log = function(...args) {
|
|
521
|
+
_logs.push(args.map(arg => {
|
|
522
|
+
try {
|
|
523
|
+
return typeof arg === 'object' ? JSON.stringify(arg) : String(arg);
|
|
524
|
+
} catch (e) {
|
|
525
|
+
return String(arg);
|
|
526
|
+
}
|
|
527
|
+
}).join(' '));
|
|
528
|
+
originalLog.apply(console, args);
|
|
529
|
+
};
|
|
530
|
+
|
|
531
|
+
let result;
|
|
532
|
+
try {
|
|
533
|
+
result = eval(${JSON.stringify(code)});
|
|
534
|
+
} catch (e) {
|
|
535
|
+
try {
|
|
536
|
+
result = (function() { ${code} })();
|
|
537
|
+
} catch (error) {
|
|
538
|
+
console.log = originalLog;
|
|
539
|
+
throw error;
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
|
|
543
|
+
console.log = originalLog;
|
|
544
|
+
return { result, logs: _logs };
|
|
545
|
+
})()
|
|
546
|
+
`;
|
|
547
|
+
|
|
548
|
+
const evalResult = await page.evaluate(wrappedCode) as { result: any; logs: string[] };
|
|
549
|
+
const { result, logs } = evalResult;
|
|
550
|
+
|
|
551
|
+
const snapshotStart = Date.now();
|
|
552
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
553
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
554
|
+
const totalTime = Date.now() - startTime;
|
|
555
|
+
|
|
556
|
+
// Properly serialize the result
|
|
557
|
+
let resultStr: string;
|
|
558
|
+
try {
|
|
559
|
+
resultStr = JSON.stringify(result, null, 2);
|
|
560
|
+
} catch (e) {
|
|
561
|
+
// Fallback for non-serializable values
|
|
562
|
+
resultStr = String(result);
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
return {
|
|
566
|
+
result: `Console execution result: ${resultStr}`,
|
|
567
|
+
console_output: logs,
|
|
568
|
+
snapshot: snapshot,
|
|
569
|
+
timing: {
|
|
570
|
+
total_time_ms: totalTime,
|
|
571
|
+
snapshot_time_ms: snapshotTime,
|
|
572
|
+
},
|
|
573
|
+
};
|
|
574
|
+
|
|
575
|
+
} catch (error) {
|
|
576
|
+
const totalTime = Date.now() - startTime;
|
|
577
|
+
return {
|
|
578
|
+
result: `Console execution failed: ${error}`,
|
|
579
|
+
console_output: [],
|
|
580
|
+
snapshot: '',
|
|
581
|
+
timing: {
|
|
582
|
+
total_time_ms: totalTime,
|
|
583
|
+
snapshot_time_ms: 0,
|
|
584
|
+
},
|
|
585
|
+
};
|
|
586
|
+
}
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
}
|
|
590
|
+
|