camel-ai 0.2.75a6__py3-none-any.whl → 0.2.76__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +1001 -205
- camel/agents/mcp_agent.py +30 -27
- camel/configs/__init__.py +6 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/cometapi_config.py +104 -0
- camel/data_collectors/alpaca_collector.py +15 -6
- camel/environments/tic_tac_toe.py +1 -1
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/docker/Dockerfile +3 -12
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/loaders/__init__.py +11 -2
- camel/loaders/chunkr_reader.py +9 -0
- camel/memories/__init__.py +2 -1
- camel/memories/agent_memories.py +3 -1
- camel/memories/blocks/chat_history_block.py +21 -3
- camel/memories/records.py +88 -8
- camel/messages/base.py +127 -34
- camel/models/__init__.py +4 -0
- camel/models/amd_model.py +101 -0
- camel/models/azure_openai_model.py +0 -6
- camel/models/base_model.py +30 -0
- camel/models/cometapi_model.py +83 -0
- camel/models/model_factory.py +4 -0
- camel/models/openai_compatible_model.py +0 -6
- camel/models/openai_model.py +0 -6
- camel/models/zhipuai_model.py +61 -2
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/retrievers/auto_retriever.py +1 -0
- camel/runtimes/daytona_runtime.py +11 -12
- camel/societies/workforce/prompts.py +131 -50
- camel/societies/workforce/single_agent_worker.py +434 -49
- camel/societies/workforce/structured_output_handler.py +30 -18
- camel/societies/workforce/task_channel.py +43 -0
- camel/societies/workforce/utils.py +105 -12
- camel/societies/workforce/workforce.py +1322 -311
- camel/societies/workforce/workforce_logger.py +24 -5
- camel/storages/key_value_storages/json.py +15 -2
- camel/storages/object_storages/google_cloud.py +1 -1
- camel/storages/vectordb_storages/oceanbase.py +10 -11
- camel/storages/vectordb_storages/tidb.py +8 -6
- camel/tasks/task.py +4 -3
- camel/toolkits/__init__.py +18 -5
- camel/toolkits/aci_toolkit.py +45 -0
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
- camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +194 -34
- camel/toolkits/function_tool.py +6 -1
- camel/toolkits/google_drive_mcp_toolkit.py +12 -31
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +12 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +79 -2
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +95 -59
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +619 -95
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +7 -2
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +115 -219
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +1 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +39 -6
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +405 -131
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +9 -5
- camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +98 -31
- camel/toolkits/markitdown_toolkit.py +27 -1
- camel/toolkits/mcp_toolkit.py +348 -348
- camel/toolkits/message_integration.py +3 -0
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/note_taking_toolkit.py +18 -8
- camel/toolkits/notion_mcp_toolkit.py +16 -26
- camel/toolkits/origene_mcp_toolkit.py +8 -49
- camel/toolkits/playwright_mcp_toolkit.py +12 -31
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/slack_toolkit.py +50 -1
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +924 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +17 -11
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/types/enums.py +124 -1
- camel/types/unified_model_type.py +5 -0
- camel/utils/commons.py +17 -0
- camel/utils/context_utils.py +804 -0
- camel/utils/mcp.py +136 -2
- camel/utils/token_counting.py +25 -17
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76.dist-info}/METADATA +158 -59
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76.dist-info}/RECORD +95 -76
- camel/loaders/pandas_reader.py +0 -368
- camel/toolkits/terminal_toolkit.py +0 -1788
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76.dist-info}/licenses/LICENSE +0 -0
|
@@ -73,12 +73,14 @@ export interface BrowserConfig {
|
|
|
73
73
|
// CDP connection options
|
|
74
74
|
connectOverCdp: boolean;
|
|
75
75
|
cdpUrl?: string;
|
|
76
|
+
cdpKeepCurrentPage: boolean;
|
|
76
77
|
}
|
|
77
78
|
|
|
78
79
|
export interface WebSocketConfig {
|
|
79
80
|
browser_log_to_file: boolean;
|
|
80
81
|
session_id?: string;
|
|
81
82
|
viewport_limit: boolean;
|
|
83
|
+
fullVisualMode?: boolean;
|
|
82
84
|
}
|
|
83
85
|
|
|
84
86
|
// Default stealth configuration
|
|
@@ -117,7 +119,7 @@ function getDefaultBrowserConfig(): BrowserConfig {
|
|
|
117
119
|
consoleLogLimit: 1000,
|
|
118
120
|
scrollPositionScale: 0.1,
|
|
119
121
|
navigationDelay: 100,
|
|
120
|
-
blankPageUrls: [],
|
|
122
|
+
blankPageUrls: ['chrome://newtab/', 'edge://newtab/', 'chrome://new-tab-page/'],
|
|
121
123
|
dataUrlPrefix: 'data:',
|
|
122
124
|
domContentLoadedState: 'domcontentloaded',
|
|
123
125
|
networkIdleState: 'networkidle',
|
|
@@ -138,7 +140,8 @@ function getDefaultBrowserConfig(): BrowserConfig {
|
|
|
138
140
|
height: 720
|
|
139
141
|
},
|
|
140
142
|
connectOverCdp: false,
|
|
141
|
-
cdpUrl: undefined
|
|
143
|
+
cdpUrl: undefined,
|
|
144
|
+
cdpKeepCurrentPage: false
|
|
142
145
|
};
|
|
143
146
|
}
|
|
144
147
|
|
|
@@ -212,10 +215,12 @@ export class ConfigLoader {
|
|
|
212
215
|
if (config.browser_log_to_file !== undefined) wsConfig.browser_log_to_file = config.browser_log_to_file;
|
|
213
216
|
if (config.session_id !== undefined) wsConfig.session_id = config.session_id;
|
|
214
217
|
if (config.viewport_limit !== undefined) wsConfig.viewport_limit = config.viewport_limit;
|
|
218
|
+
if (config.fullVisualMode !== undefined) wsConfig.fullVisualMode = config.fullVisualMode;
|
|
215
219
|
|
|
216
220
|
// CDP connection options
|
|
217
221
|
if (config.connectOverCdp !== undefined) browserConfig.connectOverCdp = config.connectOverCdp;
|
|
218
222
|
if (config.cdpUrl !== undefined) browserConfig.cdpUrl = config.cdpUrl;
|
|
223
|
+
if (config.cdpKeepCurrentPage !== undefined) browserConfig.cdpKeepCurrentPage = config.cdpKeepCurrentPage;
|
|
219
224
|
|
|
220
225
|
return new ConfigLoader(browserConfig, wsConfig);
|
|
221
226
|
}
|
|
@@ -2,18 +2,22 @@ import {HybridBrowserSession} from './browser-session';
|
|
|
2
2
|
import {ActionResult, BrowserAction, BrowserToolkitConfig, SnapshotResult, TabInfo, VisualMarkResult} from './types';
|
|
3
3
|
import {ConfigLoader} from './config-loader';
|
|
4
4
|
import {ConsoleMessage} from 'playwright';
|
|
5
|
+
import {SomScreenshotInjected} from './som-screenshot-injected';
|
|
6
|
+
import {filterClickableByHierarchy} from './snapshot-parser';
|
|
5
7
|
|
|
6
8
|
export class HybridBrowserToolkit {
|
|
7
9
|
private session: HybridBrowserSession;
|
|
8
10
|
private config: BrowserToolkitConfig;
|
|
9
11
|
private configLoader: ConfigLoader;
|
|
10
12
|
private viewportLimit: boolean;
|
|
13
|
+
private fullVisualMode: boolean;
|
|
11
14
|
|
|
12
15
|
constructor(config: BrowserToolkitConfig = {}) {
|
|
13
16
|
this.configLoader = ConfigLoader.fromPythonConfig(config);
|
|
14
17
|
this.config = config; // Store original config for backward compatibility
|
|
15
|
-
this.session = new HybridBrowserSession(
|
|
18
|
+
this.session = new HybridBrowserSession(config); // Pass original config
|
|
16
19
|
this.viewportLimit = this.configLoader.getWebSocketConfig().viewport_limit;
|
|
20
|
+
this.fullVisualMode = this.configLoader.getWebSocketConfig().fullVisualMode || false;
|
|
17
21
|
}
|
|
18
22
|
|
|
19
23
|
async openBrowser(startUrl?: string): Promise<ActionResult> {
|
|
@@ -22,22 +26,66 @@ export class HybridBrowserToolkit {
|
|
|
22
26
|
try {
|
|
23
27
|
await this.session.ensureBrowser();
|
|
24
28
|
|
|
25
|
-
|
|
26
|
-
const
|
|
29
|
+
// Check if we should skip navigation in CDP keep-current-page mode
|
|
30
|
+
const browserConfig = this.configLoader.getBrowserConfig();
|
|
31
|
+
if (browserConfig.cdpUrl && browserConfig.cdpKeepCurrentPage && !startUrl) {
|
|
32
|
+
// In CDP keep-current-page mode without explicit URL, just ensure browser and return current page
|
|
33
|
+
const snapshotStart = Date.now();
|
|
34
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
35
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
36
|
+
|
|
37
|
+
const page = await this.session.getCurrentPage();
|
|
38
|
+
const currentUrl = page ? await page.url() : 'unknown';
|
|
39
|
+
|
|
40
|
+
const totalTime = Date.now() - startTime;
|
|
41
|
+
|
|
42
|
+
return {
|
|
43
|
+
success: true,
|
|
44
|
+
message: `Browser opened in CDP keep-current-page mode (current page: ${currentUrl})`,
|
|
45
|
+
snapshot,
|
|
46
|
+
timing: {
|
|
47
|
+
total_time_ms: totalTime,
|
|
48
|
+
snapshot_time_ms: snapshotTime,
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
// For normal mode or CDP with cdpKeepCurrentPage=false: navigate to URL
|
|
54
|
+
if (!browserConfig.cdpUrl || !browserConfig.cdpKeepCurrentPage) {
|
|
55
|
+
const url = startUrl || this.config.defaultStartUrl || 'https://google.com/';
|
|
56
|
+
const result = await this.session.visitPage(url);
|
|
57
|
+
|
|
58
|
+
const snapshotStart = Date.now();
|
|
59
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
60
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
61
|
+
|
|
62
|
+
const totalTime = Date.now() - startTime;
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
success: true,
|
|
66
|
+
message: result.message,
|
|
67
|
+
snapshot,
|
|
68
|
+
timing: {
|
|
69
|
+
total_time_ms: totalTime,
|
|
70
|
+
page_load_time_ms: result.timing?.page_load_time_ms || 0,
|
|
71
|
+
snapshot_time_ms: snapshotTime,
|
|
72
|
+
},
|
|
73
|
+
};
|
|
74
|
+
}
|
|
27
75
|
|
|
76
|
+
// Fallback: Just return current page snapshot without any navigation
|
|
28
77
|
const snapshotStart = Date.now();
|
|
29
|
-
const snapshot = await this.
|
|
78
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
30
79
|
const snapshotTime = Date.now() - snapshotStart;
|
|
31
80
|
|
|
32
81
|
const totalTime = Date.now() - startTime;
|
|
33
82
|
|
|
34
83
|
return {
|
|
35
84
|
success: true,
|
|
36
|
-
message: `Browser opened
|
|
85
|
+
message: `Browser opened without navigation`,
|
|
37
86
|
snapshot,
|
|
38
87
|
timing: {
|
|
39
88
|
total_time_ms: totalTime,
|
|
40
|
-
...result.timing,
|
|
41
89
|
snapshot_time_ms: snapshotTime,
|
|
42
90
|
},
|
|
43
91
|
};
|
|
@@ -83,7 +131,7 @@ export class HybridBrowserToolkit {
|
|
|
83
131
|
|
|
84
132
|
if (result.success) {
|
|
85
133
|
const snapshotStart = Date.now();
|
|
86
|
-
response.snapshot = await this.
|
|
134
|
+
response.snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
87
135
|
const snapshotTime = Date.now() - snapshotStart;
|
|
88
136
|
|
|
89
137
|
if (result.timing) {
|
|
@@ -119,6 +167,7 @@ export class HybridBrowserToolkit {
|
|
|
119
167
|
|
|
120
168
|
async getPageSnapshot(viewportLimit: boolean = false): Promise<string> {
|
|
121
169
|
try {
|
|
170
|
+
// Always return real snapshot when explicitly called
|
|
122
171
|
// If viewport limiting is enabled, we need coordinates for filtering
|
|
123
172
|
const snapshotResult = await this.session.getSnapshotForAI(viewportLimit, viewportLimit);
|
|
124
173
|
return snapshotResult.snapshot;
|
|
@@ -126,6 +175,14 @@ export class HybridBrowserToolkit {
|
|
|
126
175
|
return `Error capturing snapshot: ${error}`;
|
|
127
176
|
}
|
|
128
177
|
}
|
|
178
|
+
|
|
179
|
+
// Internal method for getting snapshot in actions (respects fullVisualMode)
|
|
180
|
+
private async getSnapshotForAction(viewportLimit: boolean = false): Promise<string> {
|
|
181
|
+
if (this.fullVisualMode) {
|
|
182
|
+
return 'full visual mode';
|
|
183
|
+
}
|
|
184
|
+
return this.getPageSnapshot(viewportLimit);
|
|
185
|
+
}
|
|
129
186
|
|
|
130
187
|
|
|
131
188
|
async getSnapshotForAI(): Promise<SnapshotResult> {
|
|
@@ -134,35 +191,34 @@ export class HybridBrowserToolkit {
|
|
|
134
191
|
|
|
135
192
|
async getSomScreenshot(): Promise<VisualMarkResult & { timing: any }> {
|
|
136
193
|
const startTime = Date.now();
|
|
194
|
+
console.log('[HybridBrowserToolkit] Starting getSomScreenshot...');
|
|
137
195
|
|
|
138
196
|
try {
|
|
139
|
-
|
|
140
|
-
const
|
|
141
|
-
|
|
142
|
-
// Add visual marks using improved method
|
|
143
|
-
const markingStart = Date.now();
|
|
144
|
-
const markedImageBuffer = await this.addVisualMarksOptimized(screenshotResult.buffer, snapshotResult);
|
|
145
|
-
const markingTime = Date.now() - markingStart;
|
|
197
|
+
// Get page and snapshot data
|
|
198
|
+
const page = await this.session.getCurrentPage();
|
|
199
|
+
const snapshotResult = await this.session.getSnapshotForAI(true); // Include coordinates
|
|
146
200
|
|
|
147
|
-
|
|
148
|
-
const
|
|
201
|
+
// Parse clickable elements from snapshot text
|
|
202
|
+
const clickableElements = this.parseClickableElements(snapshotResult.snapshot);
|
|
203
|
+
console.log(`[HybridBrowserToolkit] Found ${clickableElements.size} clickable elements`);
|
|
149
204
|
|
|
150
|
-
|
|
205
|
+
// Apply hierarchy-based filtering
|
|
206
|
+
const filteredElements = filterClickableByHierarchy(snapshotResult.snapshot, clickableElements);
|
|
207
|
+
console.log(`[HybridBrowserToolkit] After filtering: ${filteredElements.size} elements remain`);
|
|
208
|
+
|
|
209
|
+
// Use injected SOM-screenshot method without export path
|
|
210
|
+
const result = await SomScreenshotInjected.captureOptimized(
|
|
211
|
+
page,
|
|
212
|
+
snapshotResult,
|
|
213
|
+
filteredElements,
|
|
214
|
+
undefined // No export path - don't generate files
|
|
215
|
+
);
|
|
151
216
|
|
|
152
|
-
//
|
|
153
|
-
|
|
217
|
+
// Add snapshot timing info to result
|
|
218
|
+
result.timing.snapshot_time_ms = snapshotResult.timing.snapshot_time_ms;
|
|
219
|
+
result.timing.coordinate_enrichment_time_ms = snapshotResult.timing.coordinate_enrichment_time_ms;
|
|
154
220
|
|
|
155
|
-
return
|
|
156
|
-
text: `Visual webpage screenshot captured with ${Object.keys(snapshotResult.elements).length} interactive elements (${elementsWithCoords} marked visually)`,
|
|
157
|
-
images: [dataUrl],
|
|
158
|
-
timing: {
|
|
159
|
-
total_time_ms: totalTime,
|
|
160
|
-
screenshot_time_ms: screenshotResult.timing.screenshot_time_ms,
|
|
161
|
-
snapshot_time_ms: snapshotResult.timing.snapshot_time_ms,
|
|
162
|
-
coordinate_enrichment_time_ms: snapshotResult.timing.coordinate_enrichment_time_ms,
|
|
163
|
-
visual_marking_time_ms: markingTime,
|
|
164
|
-
},
|
|
165
|
-
};
|
|
221
|
+
return result;
|
|
166
222
|
} catch (error) {
|
|
167
223
|
const totalTime = Date.now() - startTime;
|
|
168
224
|
return {
|
|
@@ -179,132 +235,6 @@ export class HybridBrowserToolkit {
|
|
|
179
235
|
}
|
|
180
236
|
}
|
|
181
237
|
|
|
182
|
-
private async addVisualMarksOptimized(screenshotBuffer: Buffer, snapshotResult: SnapshotResult): Promise<Buffer> {
|
|
183
|
-
try {
|
|
184
|
-
|
|
185
|
-
// Check if we have any elements with coordinates
|
|
186
|
-
const elementsWithCoords = Object.entries(snapshotResult.elements)
|
|
187
|
-
.filter(([ref, element]) => element.coordinates);
|
|
188
|
-
|
|
189
|
-
if (elementsWithCoords.length === 0) {
|
|
190
|
-
return screenshotBuffer;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
// Parse clickable elements from snapshot text
|
|
194
|
-
const clickableElements = this.parseClickableElements(snapshotResult.snapshot);
|
|
195
|
-
|
|
196
|
-
// Use sharp for image processing
|
|
197
|
-
const sharp = require('sharp');
|
|
198
|
-
const page = await this.session.getCurrentPage();
|
|
199
|
-
let viewport = page.viewportSize();
|
|
200
|
-
|
|
201
|
-
// In CDP mode, viewportSize might be null, get it from window dimensions
|
|
202
|
-
if (!viewport) {
|
|
203
|
-
const windowSize = await page.evaluate(() => ({
|
|
204
|
-
width: window.innerWidth,
|
|
205
|
-
height: window.innerHeight
|
|
206
|
-
}));
|
|
207
|
-
viewport = windowSize;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
// Get device pixel ratio to handle high DPI screens
|
|
211
|
-
const dpr = await page.evaluate(() => window.devicePixelRatio) || 1;
|
|
212
|
-
|
|
213
|
-
// Get actual screenshot dimensions
|
|
214
|
-
const metadata = await sharp(screenshotBuffer).metadata();
|
|
215
|
-
const screenshotWidth = metadata.width || viewport.width;
|
|
216
|
-
const screenshotHeight = metadata.height || viewport.height;
|
|
217
|
-
|
|
218
|
-
// Calculate scaling factor between CSS pixels and screenshot pixels
|
|
219
|
-
const scaleX = screenshotWidth / viewport.width;
|
|
220
|
-
const scaleY = screenshotHeight / viewport.height;
|
|
221
|
-
|
|
222
|
-
// Debug logging for CDP mode
|
|
223
|
-
if (process.env.HYBRID_BROWSER_DEBUG === '1') {
|
|
224
|
-
console.log('[CDP Debug] Viewport size:', viewport);
|
|
225
|
-
console.log('[CDP Debug] Device pixel ratio:', dpr);
|
|
226
|
-
console.log('[CDP Debug] Screenshot dimensions:', { width: screenshotWidth, height: screenshotHeight });
|
|
227
|
-
console.log('[CDP Debug] Scale factors:', { scaleX, scaleY });
|
|
228
|
-
console.log('[CDP Debug] Elements with coordinates:', elementsWithCoords.length);
|
|
229
|
-
elementsWithCoords.slice(0, 3).forEach(([ref, element]) => {
|
|
230
|
-
console.log(`[CDP Debug] Element ${ref}:`, element.coordinates);
|
|
231
|
-
});
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// Filter elements visible in viewport
|
|
235
|
-
const visibleElements = elementsWithCoords.filter(([ref, element]) => {
|
|
236
|
-
const coords = element.coordinates!;
|
|
237
|
-
return coords.x < viewport.width &&
|
|
238
|
-
coords.y < viewport.height &&
|
|
239
|
-
coords.x + coords.width > 0 &&
|
|
240
|
-
coords.y + coords.height > 0;
|
|
241
|
-
});
|
|
242
|
-
|
|
243
|
-
// Remove overlapped elements (only keep topmost)
|
|
244
|
-
const nonOverlappedElements = this.removeOverlappedElements(visibleElements);
|
|
245
|
-
|
|
246
|
-
// Create SVG overlay with all the marks
|
|
247
|
-
const marks = nonOverlappedElements.map(([ref, element]) => {
|
|
248
|
-
const coords = element.coordinates!;
|
|
249
|
-
const isClickable = clickableElements.has(ref);
|
|
250
|
-
|
|
251
|
-
// Scale coordinates from CSS pixels to screenshot pixels
|
|
252
|
-
const x = Math.max(0, coords.x * scaleX);
|
|
253
|
-
const y = Math.max(0, coords.y * scaleY);
|
|
254
|
-
const width = coords.width * scaleX;
|
|
255
|
-
const height = coords.height * scaleY;
|
|
256
|
-
|
|
257
|
-
// Clamp to screenshot bounds
|
|
258
|
-
const clampedWidth = Math.min(width, screenshotWidth - x);
|
|
259
|
-
const clampedHeight = Math.min(height, screenshotHeight - y);
|
|
260
|
-
|
|
261
|
-
// Position text to be visible even if element is partially cut off
|
|
262
|
-
const textX = Math.max(2, Math.min(x + 2, screenshotWidth - 40));
|
|
263
|
-
const textY = Math.max(14, Math.min(y + 14, screenshotHeight - 4));
|
|
264
|
-
|
|
265
|
-
// Different colors for clickable vs non-clickable elements
|
|
266
|
-
const colors = isClickable ? {
|
|
267
|
-
fill: 'rgba(0, 150, 255, 0.15)', // Blue for clickable
|
|
268
|
-
stroke: '#0096FF',
|
|
269
|
-
textFill: '#0096FF'
|
|
270
|
-
} : {
|
|
271
|
-
fill: 'rgba(255, 107, 107, 0.1)', // Red for non-clickable
|
|
272
|
-
stroke: '#FF6B6B',
|
|
273
|
-
textFill: '#FF6B6B'
|
|
274
|
-
};
|
|
275
|
-
|
|
276
|
-
return `
|
|
277
|
-
<rect x="${x}" y="${y}" width="${clampedWidth}" height="${clampedHeight}"
|
|
278
|
-
fill="${colors.fill}" stroke="${colors.stroke}" stroke-width="2" rx="2"/>
|
|
279
|
-
<text x="${textX}" y="${textY}" font-family="Arial, sans-serif"
|
|
280
|
-
font-size="12" fill="${colors.textFill}" font-weight="bold">${ref}</text>
|
|
281
|
-
`;
|
|
282
|
-
}).join('');
|
|
283
|
-
|
|
284
|
-
const svgOverlay = `
|
|
285
|
-
<svg width="${screenshotWidth}" height="${screenshotHeight}" xmlns="http://www.w3.org/2000/svg">
|
|
286
|
-
${marks}
|
|
287
|
-
</svg>
|
|
288
|
-
`;
|
|
289
|
-
|
|
290
|
-
// Composite the overlay onto the screenshot
|
|
291
|
-
const markedImageBuffer = await sharp(screenshotBuffer)
|
|
292
|
-
.composite([{
|
|
293
|
-
input: Buffer.from(svgOverlay),
|
|
294
|
-
top: 0,
|
|
295
|
-
left: 0
|
|
296
|
-
}])
|
|
297
|
-
.png()
|
|
298
|
-
.toBuffer();
|
|
299
|
-
|
|
300
|
-
return markedImageBuffer;
|
|
301
|
-
|
|
302
|
-
} catch (error) {
|
|
303
|
-
// Error adding visual marks, falling back to original screenshot
|
|
304
|
-
// Return original screenshot if marking fails
|
|
305
|
-
return screenshotBuffer;
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
238
|
|
|
309
239
|
/**
|
|
310
240
|
* Parse clickable elements from snapshot text
|
|
@@ -314,8 +244,8 @@ export class HybridBrowserToolkit {
|
|
|
314
244
|
const lines = snapshotText.split('\n');
|
|
315
245
|
|
|
316
246
|
for (const line of lines) {
|
|
317
|
-
// Look for lines containing [cursor=pointer] and extract ref
|
|
318
|
-
if (line.includes('[cursor=pointer]')) {
|
|
247
|
+
// Look for lines containing [cursor=pointer] or [active] and extract ref
|
|
248
|
+
if (line.includes('[cursor=pointer]') || line.includes('[active]')) {
|
|
319
249
|
const refMatch = line.match(/\[ref=([^\]]+)\]/);
|
|
320
250
|
if (refMatch) {
|
|
321
251
|
clickableElements.add(refMatch[1]);
|
|
@@ -326,73 +256,31 @@ export class HybridBrowserToolkit {
|
|
|
326
256
|
return clickableElements;
|
|
327
257
|
}
|
|
328
258
|
|
|
329
|
-
/**
|
|
330
|
-
* Remove overlapped elements, keeping only the topmost (last in DOM order)
|
|
331
|
-
*/
|
|
332
|
-
private removeOverlappedElements(elements: Array<[string, any]>): Array<[string, any]> {
|
|
333
|
-
const result: Array<[string, any]> = [];
|
|
334
|
-
|
|
335
|
-
for (let i = 0; i < elements.length; i++) {
|
|
336
|
-
const [refA, elementA] = elements[i];
|
|
337
|
-
const coordsA = elementA.coordinates!;
|
|
338
|
-
let isOverlapped = false;
|
|
339
|
-
|
|
340
|
-
// Check if this element is completely overlapped by any later element
|
|
341
|
-
for (let j = i + 1; j < elements.length; j++) {
|
|
342
|
-
const [refB, elementB] = elements[j];
|
|
343
|
-
const coordsB = elementB.coordinates!;
|
|
344
|
-
|
|
345
|
-
// Check if element A is completely covered by element B
|
|
346
|
-
if (this.isCompletelyOverlapped(coordsA, coordsB)) {
|
|
347
|
-
isOverlapped = true;
|
|
348
|
-
break;
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
if (!isOverlapped) {
|
|
353
|
-
result.push(elements[i]);
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
return result;
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
/**
|
|
361
|
-
* Check if element A is completely overlapped by element B
|
|
362
|
-
*/
|
|
363
|
-
private isCompletelyOverlapped(
|
|
364
|
-
coordsA: { x: number; y: number; width: number; height: number },
|
|
365
|
-
coordsB: { x: number; y: number; width: number; height: number }
|
|
366
|
-
): boolean {
|
|
367
|
-
// A is completely overlapped by B if:
|
|
368
|
-
// B's left edge is <= A's left edge AND
|
|
369
|
-
// B's top edge is <= A's top edge AND
|
|
370
|
-
// B's right edge is >= A's right edge AND
|
|
371
|
-
// B's bottom edge is >= A's bottom edge
|
|
372
|
-
return (
|
|
373
|
-
coordsB.x <= coordsA.x &&
|
|
374
|
-
coordsB.y <= coordsA.y &&
|
|
375
|
-
coordsB.x + coordsB.width >= coordsA.x + coordsA.width &&
|
|
376
|
-
coordsB.y + coordsB.height >= coordsA.y + coordsA.height
|
|
377
|
-
);
|
|
378
|
-
}
|
|
379
259
|
|
|
380
260
|
private async executeActionWithSnapshot(action: BrowserAction): Promise<any> {
|
|
381
261
|
const result = await this.session.executeAction(action);
|
|
382
262
|
|
|
383
|
-
// Format response for Python layer compatibility
|
|
384
263
|
const response: any = {
|
|
385
264
|
result: result.message,
|
|
386
265
|
snapshot: '',
|
|
387
266
|
};
|
|
388
267
|
|
|
389
268
|
if (result.success) {
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
269
|
+
if (result.details?.diffSnapshot) {
|
|
270
|
+
response.snapshot = result.details.diffSnapshot;
|
|
271
|
+
|
|
272
|
+
if (result.timing) {
|
|
273
|
+
result.timing.snapshot_time_ms = 0; // Diff snapshot time is included in action time
|
|
274
|
+
}
|
|
275
|
+
} else {
|
|
276
|
+
// Get full snapshot as usual
|
|
277
|
+
const snapshotStart = Date.now();
|
|
278
|
+
response.snapshot = await this.getPageSnapshot(this.viewportLimit);
|
|
279
|
+
const snapshotTime = Date.now() - snapshotStart;
|
|
280
|
+
|
|
281
|
+
if (result.timing) {
|
|
282
|
+
result.timing.snapshot_time_ms = snapshotTime;
|
|
283
|
+
}
|
|
396
284
|
}
|
|
397
285
|
}
|
|
398
286
|
|
|
@@ -406,6 +294,14 @@ export class HybridBrowserToolkit {
|
|
|
406
294
|
response.newTabId = result.newTabId;
|
|
407
295
|
}
|
|
408
296
|
|
|
297
|
+
// Include details if present (excluding diffSnapshot as it's already in snapshot)
|
|
298
|
+
if (result.details) {
|
|
299
|
+
const { diffSnapshot, ...otherDetails } = result.details;
|
|
300
|
+
if (Object.keys(otherDetails).length > 0) {
|
|
301
|
+
response.details = otherDetails;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
|
|
409
305
|
return response;
|
|
410
306
|
}
|
|
411
307
|
|
|
@@ -472,7 +368,7 @@ export class HybridBrowserToolkit {
|
|
|
472
368
|
const navigationTime = Date.now() - navigationStart;
|
|
473
369
|
|
|
474
370
|
const snapshotStart = Date.now();
|
|
475
|
-
const snapshot = await this.
|
|
371
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
476
372
|
const snapshotTime = Date.now() - snapshotStart;
|
|
477
373
|
|
|
478
374
|
const totalTime = Date.now() - startTime;
|
|
@@ -512,7 +408,7 @@ export class HybridBrowserToolkit {
|
|
|
512
408
|
const navigationTime = Date.now() - navigationStart;
|
|
513
409
|
|
|
514
410
|
const snapshotStart = Date.now();
|
|
515
|
-
const snapshot = await this.
|
|
411
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
516
412
|
const snapshotTime = Date.now() - snapshotStart;
|
|
517
413
|
|
|
518
414
|
const totalTime = Date.now() - startTime;
|
|
@@ -584,7 +480,7 @@ export class HybridBrowserToolkit {
|
|
|
584
480
|
return {
|
|
585
481
|
success: true,
|
|
586
482
|
message: `Closed tab ${tabId}`,
|
|
587
|
-
snapshot: await this.
|
|
483
|
+
snapshot: await this.getSnapshotForAction(this.viewportLimit),
|
|
588
484
|
};
|
|
589
485
|
} else {
|
|
590
486
|
return {
|
|
@@ -649,7 +545,7 @@ export class HybridBrowserToolkit {
|
|
|
649
545
|
const { result, logs } = evalResult;
|
|
650
546
|
|
|
651
547
|
const snapshotStart = Date.now();
|
|
652
|
-
const snapshot = await this.
|
|
548
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
653
549
|
const snapshotTime = Date.now() - snapshotStart;
|
|
654
550
|
const totalTime = Date.now() - startTime;
|
|
655
551
|
|