camel-ai 0.2.75a5__py3-none-any.whl → 0.2.76a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +298 -130
- camel/configs/__init__.py +6 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/nebius_config.py +103 -0
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/models/__init__.py +4 -0
- camel/models/amd_model.py +101 -0
- camel/models/model_factory.py +4 -0
- camel/models/nebius_model.py +83 -0
- camel/models/ollama_model.py +3 -3
- camel/models/openai_model.py +0 -6
- camel/runtimes/daytona_runtime.py +11 -12
- camel/societies/workforce/task_channel.py +120 -27
- camel/societies/workforce/workforce.py +35 -3
- camel/toolkits/__init__.py +5 -3
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/function_tool.py +6 -1
- camel/toolkits/github_toolkit.py +104 -17
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +8 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +12 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +33 -14
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +135 -40
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +43 -207
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +231 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +39 -6
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +248 -58
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +5 -1
- camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +98 -31
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +39 -14
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/search_toolkit.py +13 -2
- camel/toolkits/terminal_toolkit.py +12 -2
- camel/toolkits/video_analysis_toolkit.py +16 -10
- camel/types/enums.py +42 -0
- camel/types/unified_model_type.py +5 -0
- camel/utils/commons.py +2 -0
- camel/utils/mcp.py +136 -2
- {camel_ai-0.2.75a5.dist-info → camel_ai-0.2.76a0.dist-info}/METADATA +5 -11
- {camel_ai-0.2.75a5.dist-info → camel_ai-0.2.76a0.dist-info}/RECORD +47 -38
- {camel_ai-0.2.75a5.dist-info → camel_ai-0.2.76a0.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.75a5.dist-info → camel_ai-0.2.76a0.dist-info}/licenses/LICENSE +0 -0
|
@@ -59,14 +59,30 @@ export class HybridBrowserSession {
|
|
|
59
59
|
const contexts = this.browser.contexts();
|
|
60
60
|
if (contexts.length > 0) {
|
|
61
61
|
this.context = contexts[0];
|
|
62
|
+
|
|
63
|
+
// Apply stealth headers to existing context if configured
|
|
64
|
+
// Note: userAgent cannot be changed on an existing context
|
|
65
|
+
if (stealthConfig.enabled) {
|
|
66
|
+
if (stealthConfig.extraHTTPHeaders) {
|
|
67
|
+
await this.context.setExtraHTTPHeaders(stealthConfig.extraHTTPHeaders);
|
|
68
|
+
}
|
|
69
|
+
if (stealthConfig.userAgent) {
|
|
70
|
+
console.warn('[HybridBrowserSession] Cannot apply userAgent to existing context. Consider creating a new context if userAgent customization is required.');
|
|
71
|
+
}
|
|
72
|
+
}
|
|
62
73
|
} else {
|
|
63
74
|
const contextOptions: any = {
|
|
64
75
|
viewport: browserConfig.viewport
|
|
65
76
|
};
|
|
66
77
|
|
|
67
|
-
// Apply stealth headers if configured
|
|
68
|
-
if (stealthConfig.enabled
|
|
69
|
-
|
|
78
|
+
// Apply stealth headers and UA if configured
|
|
79
|
+
if (stealthConfig.enabled) {
|
|
80
|
+
if (stealthConfig.extraHTTPHeaders) {
|
|
81
|
+
contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
|
|
82
|
+
}
|
|
83
|
+
if (stealthConfig.userAgent) {
|
|
84
|
+
contextOptions.userAgent = stealthConfig.userAgent;
|
|
85
|
+
}
|
|
70
86
|
}
|
|
71
87
|
|
|
72
88
|
this.context = await this.browser.newContext(contextOptions);
|
|
@@ -105,13 +121,18 @@ export class HybridBrowserSession {
|
|
|
105
121
|
if (stealthConfig.enabled) {
|
|
106
122
|
launchOptions.args = stealthConfig.args || [];
|
|
107
123
|
|
|
108
|
-
// Apply stealth user agent if configured
|
|
124
|
+
// Apply stealth user agent/headers if configured
|
|
109
125
|
if (stealthConfig.userAgent) {
|
|
110
126
|
launchOptions.userAgent = stealthConfig.userAgent;
|
|
111
127
|
}
|
|
128
|
+
if (stealthConfig.extraHTTPHeaders) {
|
|
129
|
+
launchOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
|
|
130
|
+
}
|
|
112
131
|
}
|
|
113
132
|
|
|
114
133
|
if (browserConfig.userDataDir) {
|
|
134
|
+
// Ensure viewport is honored in persistent context
|
|
135
|
+
launchOptions.viewport = browserConfig.viewport;
|
|
115
136
|
this.context = await chromium.launchPersistentContext(
|
|
116
137
|
browserConfig.userDataDir,
|
|
117
138
|
launchOptions
|
|
@@ -129,9 +150,14 @@ export class HybridBrowserSession {
|
|
|
129
150
|
viewport: browserConfig.viewport
|
|
130
151
|
};
|
|
131
152
|
|
|
132
|
-
// Apply stealth headers if configured
|
|
133
|
-
if (stealthConfig.enabled
|
|
134
|
-
|
|
153
|
+
// Apply stealth headers and UA if configured
|
|
154
|
+
if (stealthConfig.enabled) {
|
|
155
|
+
if (stealthConfig.extraHTTPHeaders) {
|
|
156
|
+
contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
|
|
157
|
+
}
|
|
158
|
+
if (stealthConfig.userAgent) {
|
|
159
|
+
contextOptions.userAgent = stealthConfig.userAgent;
|
|
160
|
+
}
|
|
135
161
|
}
|
|
136
162
|
|
|
137
163
|
this.context = await this.browser.newContext(contextOptions);
|
|
@@ -173,26 +199,10 @@ export class HybridBrowserSession {
|
|
|
173
199
|
|
|
174
200
|
async getCurrentPage(): Promise<Page> {
|
|
175
201
|
if (!this.currentTabId || !this.pages.has(this.currentTabId)) {
|
|
176
|
-
// In CDP mode,
|
|
202
|
+
// In CDP mode, we cannot create new pages
|
|
177
203
|
const browserConfig = this.configLoader.getBrowserConfig();
|
|
178
|
-
if (browserConfig.connectOverCdp
|
|
179
|
-
|
|
180
|
-
try {
|
|
181
|
-
const newPage = await this.context.newPage();
|
|
182
|
-
const newTabId = this.generateTabId();
|
|
183
|
-
this.registerNewPage(newTabId, newPage);
|
|
184
|
-
this.currentTabId = newTabId;
|
|
185
|
-
|
|
186
|
-
// Set page timeouts
|
|
187
|
-
newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
|
|
188
|
-
newPage.setDefaultTimeout(browserConfig.navigationTimeout);
|
|
189
|
-
|
|
190
|
-
console.log(`[CDP] Created new page with tab ID: ${newTabId}`);
|
|
191
|
-
return newPage;
|
|
192
|
-
} catch (error) {
|
|
193
|
-
console.error('[CDP] Failed to create new page:', error);
|
|
194
|
-
throw new Error('No active page available and failed to create new page in CDP mode');
|
|
195
|
-
}
|
|
204
|
+
if (browserConfig.connectOverCdp) {
|
|
205
|
+
throw new Error('No active page available in CDP mode; frontend must pre-create blank tabs.');
|
|
196
206
|
}
|
|
197
207
|
throw new Error('No active page available');
|
|
198
208
|
}
|
|
@@ -235,6 +245,36 @@ export class HybridBrowserSession {
|
|
|
235
245
|
return this.getSnapshotForAINative(includeCoordinates, viewportLimit);
|
|
236
246
|
}
|
|
237
247
|
|
|
248
|
+
private parseElementFromSnapshot(snapshotText: string, ref: string): { role?: string; text?: string } {
|
|
249
|
+
const lines = snapshotText.split('\n');
|
|
250
|
+
for (const line of lines) {
|
|
251
|
+
if (line.includes(`[ref=${ref}]`)) {
|
|
252
|
+
const typeMatch = line.match(/^\s*-?\s*([\w-]+)/);
|
|
253
|
+
const role = typeMatch ? typeMatch[1] : undefined;
|
|
254
|
+
const textMatch = line.match(/"([^"]*)"/);
|
|
255
|
+
const text = textMatch ? textMatch[1] : undefined;
|
|
256
|
+
return { role, text };
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
return {};
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
private buildSnapshotIndex(snapshotText: string): Map<string, { role?: string; text?: string }> {
|
|
263
|
+
const index = new Map<string, { role?: string; text?: string }>();
|
|
264
|
+
const refRe = /\[ref=([^\]]+)\]/i;
|
|
265
|
+
for (const line of snapshotText.split('\n')) {
|
|
266
|
+
const m = line.match(refRe);
|
|
267
|
+
if (!m) continue;
|
|
268
|
+
const ref = m[1];
|
|
269
|
+
const roleMatch = line.match(/^\s*-?\s*([a-z0-9_-]+)/i);
|
|
270
|
+
const role = roleMatch ? roleMatch[1].toLowerCase() : undefined;
|
|
271
|
+
const textMatch = line.match(/"([^"]*)"/);
|
|
272
|
+
const text = textMatch ? textMatch[1] : undefined;
|
|
273
|
+
index.set(ref, { role, text });
|
|
274
|
+
}
|
|
275
|
+
return index;
|
|
276
|
+
}
|
|
277
|
+
|
|
238
278
|
private async getSnapshotForAINative(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
|
|
239
279
|
const startTime = Date.now();
|
|
240
280
|
const page = await this.getCurrentPage();
|
|
@@ -257,6 +297,17 @@ export class HybridBrowserSession {
|
|
|
257
297
|
const mappingStart = Date.now();
|
|
258
298
|
const playwrightMapping: Record<string, any> = {};
|
|
259
299
|
|
|
300
|
+
// Parse element info in a single pass
|
|
301
|
+
const snapshotIndex = this.buildSnapshotIndex(snapshotText);
|
|
302
|
+
for (const ref of refs) {
|
|
303
|
+
const elementInfo = snapshotIndex.get(ref) || {};
|
|
304
|
+
playwrightMapping[ref] = {
|
|
305
|
+
ref,
|
|
306
|
+
role: elementInfo.role || 'unknown',
|
|
307
|
+
text: elementInfo.text || '',
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
|
|
260
311
|
if (includeCoordinates) {
|
|
261
312
|
// Get coordinates for each ref using aria-ref selector
|
|
262
313
|
for (const ref of refs) {
|
|
@@ -270,8 +321,9 @@ export class HybridBrowserSession {
|
|
|
270
321
|
const boundingBox = await element.boundingBox();
|
|
271
322
|
|
|
272
323
|
if (boundingBox) {
|
|
324
|
+
// Add coordinates to existing element info
|
|
273
325
|
playwrightMapping[ref] = {
|
|
274
|
-
ref,
|
|
326
|
+
...playwrightMapping[ref],
|
|
275
327
|
coordinates: {
|
|
276
328
|
x: Math.round(boundingBox.x),
|
|
277
329
|
y: Math.round(boundingBox.y),
|
|
@@ -388,7 +440,6 @@ export class HybridBrowserSession {
|
|
|
388
440
|
|
|
389
441
|
if (shouldOpenNewTab) {
|
|
390
442
|
// Handle new tab opening
|
|
391
|
-
|
|
392
443
|
// If it's a link that doesn't naturally open in new tab, force it
|
|
393
444
|
if (isNavigableLink && !naturallyOpensNewTab) {
|
|
394
445
|
await element.evaluate((el, blankTarget) => {
|
|
@@ -803,6 +854,55 @@ export class HybridBrowserSession {
|
|
|
803
854
|
}
|
|
804
855
|
}
|
|
805
856
|
|
|
857
|
+
/**
|
|
858
|
+
* Wait for DOM to stop changing for a specified duration
|
|
859
|
+
*/
|
|
860
|
+
private async waitForDOMStability(page: Page, maxWaitTime: number = 500): Promise<void> {
|
|
861
|
+
const startTime = Date.now();
|
|
862
|
+
const stabilityThreshold = 100; // Consider stable if no changes for 100ms
|
|
863
|
+
let lastChangeTime = Date.now();
|
|
864
|
+
|
|
865
|
+
try {
|
|
866
|
+
// Monitor DOM changes
|
|
867
|
+
await page.evaluate(() => {
|
|
868
|
+
let changeCount = 0;
|
|
869
|
+
(window as any).__domStabilityCheck = { changeCount: 0, lastChange: Date.now() };
|
|
870
|
+
|
|
871
|
+
const observer = new MutationObserver(() => {
|
|
872
|
+
(window as any).__domStabilityCheck.changeCount++;
|
|
873
|
+
(window as any).__domStabilityCheck.lastChange = Date.now();
|
|
874
|
+
});
|
|
875
|
+
|
|
876
|
+
observer.observe(document.body, {
|
|
877
|
+
childList: true,
|
|
878
|
+
subtree: true,
|
|
879
|
+
attributes: true,
|
|
880
|
+
characterData: true
|
|
881
|
+
});
|
|
882
|
+
|
|
883
|
+
(window as any).__domStabilityObserver = observer;
|
|
884
|
+
});
|
|
885
|
+
|
|
886
|
+
// Wait until no changes for stabilityThreshold or timeout
|
|
887
|
+
await page.waitForFunction(
|
|
888
|
+
(threshold) => {
|
|
889
|
+
const check = (window as any).__domStabilityCheck;
|
|
890
|
+
return check && (Date.now() - check.lastChange) > threshold;
|
|
891
|
+
},
|
|
892
|
+
stabilityThreshold,
|
|
893
|
+
{ timeout: Math.max(0, maxWaitTime) }
|
|
894
|
+
).catch(() => {});
|
|
895
|
+
} finally {
|
|
896
|
+
// Cleanup
|
|
897
|
+
await page.evaluate(() => {
|
|
898
|
+
const observer = (window as any).__domStabilityObserver;
|
|
899
|
+
if (observer) observer.disconnect();
|
|
900
|
+
delete (window as any).__domStabilityObserver;
|
|
901
|
+
delete (window as any).__domStabilityCheck;
|
|
902
|
+
}).catch(() => {});
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
806
906
|
private async waitForPageStability(page: Page): Promise<{ domContentLoadedTime: number; networkIdleTime: number }> {
|
|
807
907
|
let domContentLoadedTime = 0;
|
|
808
908
|
let networkIdleTime = 0;
|
|
@@ -1132,12 +1232,12 @@ export class HybridBrowserSession {
|
|
|
1132
1232
|
const filtered: Record<string, SnapshotElement> = {};
|
|
1133
1233
|
|
|
1134
1234
|
|
|
1135
|
-
// Apply viewport filtering
|
|
1136
|
-
|
|
1137
|
-
const
|
|
1138
|
-
|
|
1139
|
-
|
|
1140
|
-
|
|
1235
|
+
// Apply viewport filtering
|
|
1236
|
+
// boundingBox() returns viewport-relative coordinates, so we don't need to add scroll offsets
|
|
1237
|
+
const viewportLeft = 0;
|
|
1238
|
+
const viewportTop = 0;
|
|
1239
|
+
const viewportRight = viewport.width;
|
|
1240
|
+
const viewportBottom = viewport.height;
|
|
1141
1241
|
|
|
1142
1242
|
for (const [ref, element] of Object.entries(elements)) {
|
|
1143
1243
|
// If element has no coordinates, include it (fallback)
|
|
@@ -1148,14 +1248,9 @@ export class HybridBrowserSession {
|
|
|
1148
1248
|
|
|
1149
1249
|
const { x, y, width, height } = element.coordinates;
|
|
1150
1250
|
|
|
1151
|
-
// Calculate viewport bounds using adjusted scroll position
|
|
1152
|
-
const viewportLeft = adjustedScrollPos.x;
|
|
1153
|
-
const viewportTop = adjustedScrollPos.y;
|
|
1154
|
-
const viewportRight = adjustedScrollPos.x + viewport.width;
|
|
1155
|
-
const viewportBottom = adjustedScrollPos.y + viewport.height;
|
|
1156
|
-
|
|
1157
1251
|
// Check if element is visible in current viewport
|
|
1158
1252
|
// Element is visible if it overlaps with viewport bounds
|
|
1253
|
+
// Since boundingBox() coords are viewport-relative, we compare directly
|
|
1159
1254
|
const isVisible = (
|
|
1160
1255
|
x < viewportRight && // Left edge is before viewport right
|
|
1161
1256
|
y < viewportBottom && // Top edge is before viewport bottom
|
|
@@ -79,6 +79,7 @@ export interface WebSocketConfig {
|
|
|
79
79
|
browser_log_to_file: boolean;
|
|
80
80
|
session_id?: string;
|
|
81
81
|
viewport_limit: boolean;
|
|
82
|
+
fullVisualMode?: boolean;
|
|
82
83
|
}
|
|
83
84
|
|
|
84
85
|
// Default stealth configuration
|
|
@@ -212,6 +213,7 @@ export class ConfigLoader {
|
|
|
212
213
|
if (config.browser_log_to_file !== undefined) wsConfig.browser_log_to_file = config.browser_log_to_file;
|
|
213
214
|
if (config.session_id !== undefined) wsConfig.session_id = config.session_id;
|
|
214
215
|
if (config.viewport_limit !== undefined) wsConfig.viewport_limit = config.viewport_limit;
|
|
216
|
+
if (config.fullVisualMode !== undefined) wsConfig.fullVisualMode = config.fullVisualMode;
|
|
215
217
|
|
|
216
218
|
// CDP connection options
|
|
217
219
|
if (config.connectOverCdp !== undefined) browserConfig.connectOverCdp = config.connectOverCdp;
|
|
@@ -2,18 +2,22 @@ import {HybridBrowserSession} from './browser-session';
|
|
|
2
2
|
import {ActionResult, BrowserAction, BrowserToolkitConfig, SnapshotResult, TabInfo, VisualMarkResult} from './types';
|
|
3
3
|
import {ConfigLoader} from './config-loader';
|
|
4
4
|
import {ConsoleMessage} from 'playwright';
|
|
5
|
+
import {SomScreenshotInjected} from './som-screenshot-injected';
|
|
6
|
+
import {filterClickableByHierarchy} from './snapshot-parser';
|
|
5
7
|
|
|
6
8
|
export class HybridBrowserToolkit {
|
|
7
9
|
private session: HybridBrowserSession;
|
|
8
10
|
private config: BrowserToolkitConfig;
|
|
9
11
|
private configLoader: ConfigLoader;
|
|
10
12
|
private viewportLimit: boolean;
|
|
13
|
+
private fullVisualMode: boolean;
|
|
11
14
|
|
|
12
15
|
constructor(config: BrowserToolkitConfig = {}) {
|
|
13
16
|
this.configLoader = ConfigLoader.fromPythonConfig(config);
|
|
14
17
|
this.config = config; // Store original config for backward compatibility
|
|
15
18
|
this.session = new HybridBrowserSession(this.configLoader.getBrowserConfig()); // Pass processed config
|
|
16
19
|
this.viewportLimit = this.configLoader.getWebSocketConfig().viewport_limit;
|
|
20
|
+
this.fullVisualMode = this.configLoader.getWebSocketConfig().fullVisualMode || false;
|
|
17
21
|
}
|
|
18
22
|
|
|
19
23
|
async openBrowser(startUrl?: string): Promise<ActionResult> {
|
|
@@ -26,7 +30,7 @@ export class HybridBrowserToolkit {
|
|
|
26
30
|
const result = await this.session.visitPage(url);
|
|
27
31
|
|
|
28
32
|
const snapshotStart = Date.now();
|
|
29
|
-
const snapshot = await this.
|
|
33
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
30
34
|
const snapshotTime = Date.now() - snapshotStart;
|
|
31
35
|
|
|
32
36
|
const totalTime = Date.now() - startTime;
|
|
@@ -83,7 +87,7 @@ export class HybridBrowserToolkit {
|
|
|
83
87
|
|
|
84
88
|
if (result.success) {
|
|
85
89
|
const snapshotStart = Date.now();
|
|
86
|
-
response.snapshot = await this.
|
|
90
|
+
response.snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
87
91
|
const snapshotTime = Date.now() - snapshotStart;
|
|
88
92
|
|
|
89
93
|
if (result.timing) {
|
|
@@ -119,6 +123,7 @@ export class HybridBrowserToolkit {
|
|
|
119
123
|
|
|
120
124
|
async getPageSnapshot(viewportLimit: boolean = false): Promise<string> {
|
|
121
125
|
try {
|
|
126
|
+
// Always return real snapshot when explicitly called
|
|
122
127
|
// If viewport limiting is enabled, we need coordinates for filtering
|
|
123
128
|
const snapshotResult = await this.session.getSnapshotForAI(viewportLimit, viewportLimit);
|
|
124
129
|
return snapshotResult.snapshot;
|
|
@@ -126,6 +131,14 @@ export class HybridBrowserToolkit {
|
|
|
126
131
|
return `Error capturing snapshot: ${error}`;
|
|
127
132
|
}
|
|
128
133
|
}
|
|
134
|
+
|
|
135
|
+
// Internal method for getting snapshot in actions (respects fullVisualMode)
|
|
136
|
+
private async getSnapshotForAction(viewportLimit: boolean = false): Promise<string> {
|
|
137
|
+
if (this.fullVisualMode) {
|
|
138
|
+
return 'full visual mode';
|
|
139
|
+
}
|
|
140
|
+
return this.getPageSnapshot(viewportLimit);
|
|
141
|
+
}
|
|
129
142
|
|
|
130
143
|
|
|
131
144
|
async getSnapshotForAI(): Promise<SnapshotResult> {
|
|
@@ -134,35 +147,34 @@ export class HybridBrowserToolkit {
|
|
|
134
147
|
|
|
135
148
|
async getSomScreenshot(): Promise<VisualMarkResult & { timing: any }> {
|
|
136
149
|
const startTime = Date.now();
|
|
150
|
+
console.log('[HybridBrowserToolkit] Starting getSomScreenshot...');
|
|
137
151
|
|
|
138
152
|
try {
|
|
139
|
-
|
|
140
|
-
const
|
|
141
|
-
|
|
142
|
-
// Add visual marks using improved method
|
|
143
|
-
const markingStart = Date.now();
|
|
144
|
-
const markedImageBuffer = await this.addVisualMarksOptimized(screenshotResult.buffer, snapshotResult);
|
|
145
|
-
const markingTime = Date.now() - markingStart;
|
|
153
|
+
// Get page and snapshot data
|
|
154
|
+
const page = await this.session.getCurrentPage();
|
|
155
|
+
const snapshotResult = await this.session.getSnapshotForAI(true); // Include coordinates
|
|
146
156
|
|
|
147
|
-
|
|
148
|
-
const
|
|
157
|
+
// Parse clickable elements from snapshot text
|
|
158
|
+
const clickableElements = this.parseClickableElements(snapshotResult.snapshot);
|
|
159
|
+
console.log(`[HybridBrowserToolkit] Found ${clickableElements.size} clickable elements`);
|
|
149
160
|
|
|
150
|
-
|
|
161
|
+
// Apply hierarchy-based filtering
|
|
162
|
+
const filteredElements = filterClickableByHierarchy(snapshotResult.snapshot, clickableElements);
|
|
163
|
+
console.log(`[HybridBrowserToolkit] After filtering: ${filteredElements.size} elements remain`);
|
|
164
|
+
|
|
165
|
+
// Use injected SOM-screenshot method without export path
|
|
166
|
+
const result = await SomScreenshotInjected.captureOptimized(
|
|
167
|
+
page,
|
|
168
|
+
snapshotResult,
|
|
169
|
+
filteredElements,
|
|
170
|
+
undefined // No export path - don't generate files
|
|
171
|
+
);
|
|
151
172
|
|
|
152
|
-
//
|
|
153
|
-
|
|
173
|
+
// Add snapshot timing info to result
|
|
174
|
+
result.timing.snapshot_time_ms = snapshotResult.timing.snapshot_time_ms;
|
|
175
|
+
result.timing.coordinate_enrichment_time_ms = snapshotResult.timing.coordinate_enrichment_time_ms;
|
|
154
176
|
|
|
155
|
-
return
|
|
156
|
-
text: `Visual webpage screenshot captured with ${Object.keys(snapshotResult.elements).length} interactive elements (${elementsWithCoords} marked visually)`,
|
|
157
|
-
images: [dataUrl],
|
|
158
|
-
timing: {
|
|
159
|
-
total_time_ms: totalTime,
|
|
160
|
-
screenshot_time_ms: screenshotResult.timing.screenshot_time_ms,
|
|
161
|
-
snapshot_time_ms: snapshotResult.timing.snapshot_time_ms,
|
|
162
|
-
coordinate_enrichment_time_ms: snapshotResult.timing.coordinate_enrichment_time_ms,
|
|
163
|
-
visual_marking_time_ms: markingTime,
|
|
164
|
-
},
|
|
165
|
-
};
|
|
177
|
+
return result;
|
|
166
178
|
} catch (error) {
|
|
167
179
|
const totalTime = Date.now() - startTime;
|
|
168
180
|
return {
|
|
@@ -179,132 +191,6 @@ export class HybridBrowserToolkit {
|
|
|
179
191
|
}
|
|
180
192
|
}
|
|
181
193
|
|
|
182
|
-
private async addVisualMarksOptimized(screenshotBuffer: Buffer, snapshotResult: SnapshotResult): Promise<Buffer> {
|
|
183
|
-
try {
|
|
184
|
-
|
|
185
|
-
// Check if we have any elements with coordinates
|
|
186
|
-
const elementsWithCoords = Object.entries(snapshotResult.elements)
|
|
187
|
-
.filter(([ref, element]) => element.coordinates);
|
|
188
|
-
|
|
189
|
-
if (elementsWithCoords.length === 0) {
|
|
190
|
-
return screenshotBuffer;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
// Parse clickable elements from snapshot text
|
|
194
|
-
const clickableElements = this.parseClickableElements(snapshotResult.snapshot);
|
|
195
|
-
|
|
196
|
-
// Use sharp for image processing
|
|
197
|
-
const sharp = require('sharp');
|
|
198
|
-
const page = await this.session.getCurrentPage();
|
|
199
|
-
let viewport = page.viewportSize();
|
|
200
|
-
|
|
201
|
-
// In CDP mode, viewportSize might be null, get it from window dimensions
|
|
202
|
-
if (!viewport) {
|
|
203
|
-
const windowSize = await page.evaluate(() => ({
|
|
204
|
-
width: window.innerWidth,
|
|
205
|
-
height: window.innerHeight
|
|
206
|
-
}));
|
|
207
|
-
viewport = windowSize;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
// Get device pixel ratio to handle high DPI screens
|
|
211
|
-
const dpr = await page.evaluate(() => window.devicePixelRatio) || 1;
|
|
212
|
-
|
|
213
|
-
// Get actual screenshot dimensions
|
|
214
|
-
const metadata = await sharp(screenshotBuffer).metadata();
|
|
215
|
-
const screenshotWidth = metadata.width || viewport.width;
|
|
216
|
-
const screenshotHeight = metadata.height || viewport.height;
|
|
217
|
-
|
|
218
|
-
// Calculate scaling factor between CSS pixels and screenshot pixels
|
|
219
|
-
const scaleX = screenshotWidth / viewport.width;
|
|
220
|
-
const scaleY = screenshotHeight / viewport.height;
|
|
221
|
-
|
|
222
|
-
// Debug logging for CDP mode
|
|
223
|
-
if (process.env.HYBRID_BROWSER_DEBUG === '1') {
|
|
224
|
-
console.log('[CDP Debug] Viewport size:', viewport);
|
|
225
|
-
console.log('[CDP Debug] Device pixel ratio:', dpr);
|
|
226
|
-
console.log('[CDP Debug] Screenshot dimensions:', { width: screenshotWidth, height: screenshotHeight });
|
|
227
|
-
console.log('[CDP Debug] Scale factors:', { scaleX, scaleY });
|
|
228
|
-
console.log('[CDP Debug] Elements with coordinates:', elementsWithCoords.length);
|
|
229
|
-
elementsWithCoords.slice(0, 3).forEach(([ref, element]) => {
|
|
230
|
-
console.log(`[CDP Debug] Element ${ref}:`, element.coordinates);
|
|
231
|
-
});
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// Filter elements visible in viewport
|
|
235
|
-
const visibleElements = elementsWithCoords.filter(([ref, element]) => {
|
|
236
|
-
const coords = element.coordinates!;
|
|
237
|
-
return coords.x < viewport.width &&
|
|
238
|
-
coords.y < viewport.height &&
|
|
239
|
-
coords.x + coords.width > 0 &&
|
|
240
|
-
coords.y + coords.height > 0;
|
|
241
|
-
});
|
|
242
|
-
|
|
243
|
-
// Remove overlapped elements (only keep topmost)
|
|
244
|
-
const nonOverlappedElements = this.removeOverlappedElements(visibleElements);
|
|
245
|
-
|
|
246
|
-
// Create SVG overlay with all the marks
|
|
247
|
-
const marks = nonOverlappedElements.map(([ref, element]) => {
|
|
248
|
-
const coords = element.coordinates!;
|
|
249
|
-
const isClickable = clickableElements.has(ref);
|
|
250
|
-
|
|
251
|
-
// Scale coordinates from CSS pixels to screenshot pixels
|
|
252
|
-
const x = Math.max(0, coords.x * scaleX);
|
|
253
|
-
const y = Math.max(0, coords.y * scaleY);
|
|
254
|
-
const width = coords.width * scaleX;
|
|
255
|
-
const height = coords.height * scaleY;
|
|
256
|
-
|
|
257
|
-
// Clamp to screenshot bounds
|
|
258
|
-
const clampedWidth = Math.min(width, screenshotWidth - x);
|
|
259
|
-
const clampedHeight = Math.min(height, screenshotHeight - y);
|
|
260
|
-
|
|
261
|
-
// Position text to be visible even if element is partially cut off
|
|
262
|
-
const textX = Math.max(2, Math.min(x + 2, screenshotWidth - 40));
|
|
263
|
-
const textY = Math.max(14, Math.min(y + 14, screenshotHeight - 4));
|
|
264
|
-
|
|
265
|
-
// Different colors for clickable vs non-clickable elements
|
|
266
|
-
const colors = isClickable ? {
|
|
267
|
-
fill: 'rgba(0, 150, 255, 0.15)', // Blue for clickable
|
|
268
|
-
stroke: '#0096FF',
|
|
269
|
-
textFill: '#0096FF'
|
|
270
|
-
} : {
|
|
271
|
-
fill: 'rgba(255, 107, 107, 0.1)', // Red for non-clickable
|
|
272
|
-
stroke: '#FF6B6B',
|
|
273
|
-
textFill: '#FF6B6B'
|
|
274
|
-
};
|
|
275
|
-
|
|
276
|
-
return `
|
|
277
|
-
<rect x="${x}" y="${y}" width="${clampedWidth}" height="${clampedHeight}"
|
|
278
|
-
fill="${colors.fill}" stroke="${colors.stroke}" stroke-width="2" rx="2"/>
|
|
279
|
-
<text x="${textX}" y="${textY}" font-family="Arial, sans-serif"
|
|
280
|
-
font-size="12" fill="${colors.textFill}" font-weight="bold">${ref}</text>
|
|
281
|
-
`;
|
|
282
|
-
}).join('');
|
|
283
|
-
|
|
284
|
-
const svgOverlay = `
|
|
285
|
-
<svg width="${screenshotWidth}" height="${screenshotHeight}" xmlns="http://www.w3.org/2000/svg">
|
|
286
|
-
${marks}
|
|
287
|
-
</svg>
|
|
288
|
-
`;
|
|
289
|
-
|
|
290
|
-
// Composite the overlay onto the screenshot
|
|
291
|
-
const markedImageBuffer = await sharp(screenshotBuffer)
|
|
292
|
-
.composite([{
|
|
293
|
-
input: Buffer.from(svgOverlay),
|
|
294
|
-
top: 0,
|
|
295
|
-
left: 0
|
|
296
|
-
}])
|
|
297
|
-
.png()
|
|
298
|
-
.toBuffer();
|
|
299
|
-
|
|
300
|
-
return markedImageBuffer;
|
|
301
|
-
|
|
302
|
-
} catch (error) {
|
|
303
|
-
// Error adding visual marks, falling back to original screenshot
|
|
304
|
-
// Return original screenshot if marking fails
|
|
305
|
-
return screenshotBuffer;
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
194
|
|
|
309
195
|
/**
|
|
310
196
|
* Parse clickable elements from snapshot text
|
|
@@ -314,8 +200,8 @@ export class HybridBrowserToolkit {
|
|
|
314
200
|
const lines = snapshotText.split('\n');
|
|
315
201
|
|
|
316
202
|
for (const line of lines) {
|
|
317
|
-
// Look for lines containing [cursor=pointer] and extract ref
|
|
318
|
-
if (line.includes('[cursor=pointer]')) {
|
|
203
|
+
// Look for lines containing [cursor=pointer] or [active] and extract ref
|
|
204
|
+
if (line.includes('[cursor=pointer]') || line.includes('[active]')) {
|
|
319
205
|
const refMatch = line.match(/\[ref=([^\]]+)\]/);
|
|
320
206
|
if (refMatch) {
|
|
321
207
|
clickableElements.add(refMatch[1]);
|
|
@@ -326,56 +212,6 @@ export class HybridBrowserToolkit {
|
|
|
326
212
|
return clickableElements;
|
|
327
213
|
}
|
|
328
214
|
|
|
329
|
-
/**
|
|
330
|
-
* Remove overlapped elements, keeping only the topmost (last in DOM order)
|
|
331
|
-
*/
|
|
332
|
-
private removeOverlappedElements(elements: Array<[string, any]>): Array<[string, any]> {
|
|
333
|
-
const result: Array<[string, any]> = [];
|
|
334
|
-
|
|
335
|
-
for (let i = 0; i < elements.length; i++) {
|
|
336
|
-
const [refA, elementA] = elements[i];
|
|
337
|
-
const coordsA = elementA.coordinates!;
|
|
338
|
-
let isOverlapped = false;
|
|
339
|
-
|
|
340
|
-
// Check if this element is completely overlapped by any later element
|
|
341
|
-
for (let j = i + 1; j < elements.length; j++) {
|
|
342
|
-
const [refB, elementB] = elements[j];
|
|
343
|
-
const coordsB = elementB.coordinates!;
|
|
344
|
-
|
|
345
|
-
// Check if element A is completely covered by element B
|
|
346
|
-
if (this.isCompletelyOverlapped(coordsA, coordsB)) {
|
|
347
|
-
isOverlapped = true;
|
|
348
|
-
break;
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
if (!isOverlapped) {
|
|
353
|
-
result.push(elements[i]);
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
return result;
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
/**
|
|
361
|
-
* Check if element A is completely overlapped by element B
|
|
362
|
-
*/
|
|
363
|
-
private isCompletelyOverlapped(
|
|
364
|
-
coordsA: { x: number; y: number; width: number; height: number },
|
|
365
|
-
coordsB: { x: number; y: number; width: number; height: number }
|
|
366
|
-
): boolean {
|
|
367
|
-
// A is completely overlapped by B if:
|
|
368
|
-
// B's left edge is <= A's left edge AND
|
|
369
|
-
// B's top edge is <= A's top edge AND
|
|
370
|
-
// B's right edge is >= A's right edge AND
|
|
371
|
-
// B's bottom edge is >= A's bottom edge
|
|
372
|
-
return (
|
|
373
|
-
coordsB.x <= coordsA.x &&
|
|
374
|
-
coordsB.y <= coordsA.y &&
|
|
375
|
-
coordsB.x + coordsB.width >= coordsA.x + coordsA.width &&
|
|
376
|
-
coordsB.y + coordsB.height >= coordsA.y + coordsA.height
|
|
377
|
-
);
|
|
378
|
-
}
|
|
379
215
|
|
|
380
216
|
private async executeActionWithSnapshot(action: BrowserAction): Promise<any> {
|
|
381
217
|
const result = await this.session.executeAction(action);
|
|
@@ -472,7 +308,7 @@ export class HybridBrowserToolkit {
|
|
|
472
308
|
const navigationTime = Date.now() - navigationStart;
|
|
473
309
|
|
|
474
310
|
const snapshotStart = Date.now();
|
|
475
|
-
const snapshot = await this.
|
|
311
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
476
312
|
const snapshotTime = Date.now() - snapshotStart;
|
|
477
313
|
|
|
478
314
|
const totalTime = Date.now() - startTime;
|
|
@@ -512,7 +348,7 @@ export class HybridBrowserToolkit {
|
|
|
512
348
|
const navigationTime = Date.now() - navigationStart;
|
|
513
349
|
|
|
514
350
|
const snapshotStart = Date.now();
|
|
515
|
-
const snapshot = await this.
|
|
351
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
516
352
|
const snapshotTime = Date.now() - snapshotStart;
|
|
517
353
|
|
|
518
354
|
const totalTime = Date.now() - startTime;
|
|
@@ -584,7 +420,7 @@ export class HybridBrowserToolkit {
|
|
|
584
420
|
return {
|
|
585
421
|
success: true,
|
|
586
422
|
message: `Closed tab ${tabId}`,
|
|
587
|
-
snapshot: await this.
|
|
423
|
+
snapshot: await this.getSnapshotForAction(this.viewportLimit),
|
|
588
424
|
};
|
|
589
425
|
} else {
|
|
590
426
|
return {
|
|
@@ -649,7 +485,7 @@ export class HybridBrowserToolkit {
|
|
|
649
485
|
const { result, logs } = evalResult;
|
|
650
486
|
|
|
651
487
|
const snapshotStart = Date.now();
|
|
652
|
-
const snapshot = await this.
|
|
488
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
653
489
|
const snapshotTime = Date.now() - snapshotStart;
|
|
654
490
|
const totalTime = Date.now() - startTime;
|
|
655
491
|
|