camel-ai 0.2.75a6__py3-none-any.whl → 0.2.76a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +151 -37
- camel/configs/__init__.py +3 -0
- camel/configs/amd_config.py +70 -0
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/models/__init__.py +2 -0
- camel/models/amd_model.py +101 -0
- camel/models/model_factory.py +2 -0
- camel/models/openai_model.py +0 -6
- camel/runtimes/daytona_runtime.py +11 -12
- camel/toolkits/__init__.py +5 -3
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/function_tool.py +6 -1
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +8 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +12 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +33 -14
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +135 -40
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +43 -207
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +231 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +39 -6
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +241 -56
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +5 -1
- camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +98 -31
- camel/toolkits/mcp_toolkit.py +39 -14
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/terminal_toolkit.py +12 -2
- camel/toolkits/video_analysis_toolkit.py +16 -10
- camel/types/enums.py +11 -0
- camel/utils/commons.py +2 -0
- camel/utils/mcp.py +136 -2
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/METADATA +5 -3
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/RECORD +38 -31
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,18 +2,22 @@ import {HybridBrowserSession} from './browser-session';
|
|
|
2
2
|
import {ActionResult, BrowserAction, BrowserToolkitConfig, SnapshotResult, TabInfo, VisualMarkResult} from './types';
|
|
3
3
|
import {ConfigLoader} from './config-loader';
|
|
4
4
|
import {ConsoleMessage} from 'playwright';
|
|
5
|
+
import {SomScreenshotInjected} from './som-screenshot-injected';
|
|
6
|
+
import {filterClickableByHierarchy} from './snapshot-parser';
|
|
5
7
|
|
|
6
8
|
export class HybridBrowserToolkit {
|
|
7
9
|
private session: HybridBrowserSession;
|
|
8
10
|
private config: BrowserToolkitConfig;
|
|
9
11
|
private configLoader: ConfigLoader;
|
|
10
12
|
private viewportLimit: boolean;
|
|
13
|
+
private fullVisualMode: boolean;
|
|
11
14
|
|
|
12
15
|
constructor(config: BrowserToolkitConfig = {}) {
|
|
13
16
|
this.configLoader = ConfigLoader.fromPythonConfig(config);
|
|
14
17
|
this.config = config; // Store original config for backward compatibility
|
|
15
18
|
this.session = new HybridBrowserSession(this.configLoader.getBrowserConfig()); // Pass processed config
|
|
16
19
|
this.viewportLimit = this.configLoader.getWebSocketConfig().viewport_limit;
|
|
20
|
+
this.fullVisualMode = this.configLoader.getWebSocketConfig().fullVisualMode || false;
|
|
17
21
|
}
|
|
18
22
|
|
|
19
23
|
async openBrowser(startUrl?: string): Promise<ActionResult> {
|
|
@@ -26,7 +30,7 @@ export class HybridBrowserToolkit {
|
|
|
26
30
|
const result = await this.session.visitPage(url);
|
|
27
31
|
|
|
28
32
|
const snapshotStart = Date.now();
|
|
29
|
-
const snapshot = await this.
|
|
33
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
30
34
|
const snapshotTime = Date.now() - snapshotStart;
|
|
31
35
|
|
|
32
36
|
const totalTime = Date.now() - startTime;
|
|
@@ -83,7 +87,7 @@ export class HybridBrowserToolkit {
|
|
|
83
87
|
|
|
84
88
|
if (result.success) {
|
|
85
89
|
const snapshotStart = Date.now();
|
|
86
|
-
response.snapshot = await this.
|
|
90
|
+
response.snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
87
91
|
const snapshotTime = Date.now() - snapshotStart;
|
|
88
92
|
|
|
89
93
|
if (result.timing) {
|
|
@@ -119,6 +123,7 @@ export class HybridBrowserToolkit {
|
|
|
119
123
|
|
|
120
124
|
async getPageSnapshot(viewportLimit: boolean = false): Promise<string> {
|
|
121
125
|
try {
|
|
126
|
+
// Always return real snapshot when explicitly called
|
|
122
127
|
// If viewport limiting is enabled, we need coordinates for filtering
|
|
123
128
|
const snapshotResult = await this.session.getSnapshotForAI(viewportLimit, viewportLimit);
|
|
124
129
|
return snapshotResult.snapshot;
|
|
@@ -126,6 +131,14 @@ export class HybridBrowserToolkit {
|
|
|
126
131
|
return `Error capturing snapshot: ${error}`;
|
|
127
132
|
}
|
|
128
133
|
}
|
|
134
|
+
|
|
135
|
+
// Internal method for getting snapshot in actions (respects fullVisualMode)
|
|
136
|
+
private async getSnapshotForAction(viewportLimit: boolean = false): Promise<string> {
|
|
137
|
+
if (this.fullVisualMode) {
|
|
138
|
+
return 'full visual mode';
|
|
139
|
+
}
|
|
140
|
+
return this.getPageSnapshot(viewportLimit);
|
|
141
|
+
}
|
|
129
142
|
|
|
130
143
|
|
|
131
144
|
async getSnapshotForAI(): Promise<SnapshotResult> {
|
|
@@ -134,35 +147,34 @@ export class HybridBrowserToolkit {
|
|
|
134
147
|
|
|
135
148
|
async getSomScreenshot(): Promise<VisualMarkResult & { timing: any }> {
|
|
136
149
|
const startTime = Date.now();
|
|
150
|
+
console.log('[HybridBrowserToolkit] Starting getSomScreenshot...');
|
|
137
151
|
|
|
138
152
|
try {
|
|
139
|
-
|
|
140
|
-
const
|
|
141
|
-
|
|
142
|
-
// Add visual marks using improved method
|
|
143
|
-
const markingStart = Date.now();
|
|
144
|
-
const markedImageBuffer = await this.addVisualMarksOptimized(screenshotResult.buffer, snapshotResult);
|
|
145
|
-
const markingTime = Date.now() - markingStart;
|
|
153
|
+
// Get page and snapshot data
|
|
154
|
+
const page = await this.session.getCurrentPage();
|
|
155
|
+
const snapshotResult = await this.session.getSnapshotForAI(true); // Include coordinates
|
|
146
156
|
|
|
147
|
-
|
|
148
|
-
const
|
|
157
|
+
// Parse clickable elements from snapshot text
|
|
158
|
+
const clickableElements = this.parseClickableElements(snapshotResult.snapshot);
|
|
159
|
+
console.log(`[HybridBrowserToolkit] Found ${clickableElements.size} clickable elements`);
|
|
149
160
|
|
|
150
|
-
|
|
161
|
+
// Apply hierarchy-based filtering
|
|
162
|
+
const filteredElements = filterClickableByHierarchy(snapshotResult.snapshot, clickableElements);
|
|
163
|
+
console.log(`[HybridBrowserToolkit] After filtering: ${filteredElements.size} elements remain`);
|
|
164
|
+
|
|
165
|
+
// Use injected SOM-screenshot method without export path
|
|
166
|
+
const result = await SomScreenshotInjected.captureOptimized(
|
|
167
|
+
page,
|
|
168
|
+
snapshotResult,
|
|
169
|
+
filteredElements,
|
|
170
|
+
undefined // No export path - don't generate files
|
|
171
|
+
);
|
|
151
172
|
|
|
152
|
-
//
|
|
153
|
-
|
|
173
|
+
// Add snapshot timing info to result
|
|
174
|
+
result.timing.snapshot_time_ms = snapshotResult.timing.snapshot_time_ms;
|
|
175
|
+
result.timing.coordinate_enrichment_time_ms = snapshotResult.timing.coordinate_enrichment_time_ms;
|
|
154
176
|
|
|
155
|
-
return
|
|
156
|
-
text: `Visual webpage screenshot captured with ${Object.keys(snapshotResult.elements).length} interactive elements (${elementsWithCoords} marked visually)`,
|
|
157
|
-
images: [dataUrl],
|
|
158
|
-
timing: {
|
|
159
|
-
total_time_ms: totalTime,
|
|
160
|
-
screenshot_time_ms: screenshotResult.timing.screenshot_time_ms,
|
|
161
|
-
snapshot_time_ms: snapshotResult.timing.snapshot_time_ms,
|
|
162
|
-
coordinate_enrichment_time_ms: snapshotResult.timing.coordinate_enrichment_time_ms,
|
|
163
|
-
visual_marking_time_ms: markingTime,
|
|
164
|
-
},
|
|
165
|
-
};
|
|
177
|
+
return result;
|
|
166
178
|
} catch (error) {
|
|
167
179
|
const totalTime = Date.now() - startTime;
|
|
168
180
|
return {
|
|
@@ -179,132 +191,6 @@ export class HybridBrowserToolkit {
|
|
|
179
191
|
}
|
|
180
192
|
}
|
|
181
193
|
|
|
182
|
-
private async addVisualMarksOptimized(screenshotBuffer: Buffer, snapshotResult: SnapshotResult): Promise<Buffer> {
|
|
183
|
-
try {
|
|
184
|
-
|
|
185
|
-
// Check if we have any elements with coordinates
|
|
186
|
-
const elementsWithCoords = Object.entries(snapshotResult.elements)
|
|
187
|
-
.filter(([ref, element]) => element.coordinates);
|
|
188
|
-
|
|
189
|
-
if (elementsWithCoords.length === 0) {
|
|
190
|
-
return screenshotBuffer;
|
|
191
|
-
}
|
|
192
|
-
|
|
193
|
-
// Parse clickable elements from snapshot text
|
|
194
|
-
const clickableElements = this.parseClickableElements(snapshotResult.snapshot);
|
|
195
|
-
|
|
196
|
-
// Use sharp for image processing
|
|
197
|
-
const sharp = require('sharp');
|
|
198
|
-
const page = await this.session.getCurrentPage();
|
|
199
|
-
let viewport = page.viewportSize();
|
|
200
|
-
|
|
201
|
-
// In CDP mode, viewportSize might be null, get it from window dimensions
|
|
202
|
-
if (!viewport) {
|
|
203
|
-
const windowSize = await page.evaluate(() => ({
|
|
204
|
-
width: window.innerWidth,
|
|
205
|
-
height: window.innerHeight
|
|
206
|
-
}));
|
|
207
|
-
viewport = windowSize;
|
|
208
|
-
}
|
|
209
|
-
|
|
210
|
-
// Get device pixel ratio to handle high DPI screens
|
|
211
|
-
const dpr = await page.evaluate(() => window.devicePixelRatio) || 1;
|
|
212
|
-
|
|
213
|
-
// Get actual screenshot dimensions
|
|
214
|
-
const metadata = await sharp(screenshotBuffer).metadata();
|
|
215
|
-
const screenshotWidth = metadata.width || viewport.width;
|
|
216
|
-
const screenshotHeight = metadata.height || viewport.height;
|
|
217
|
-
|
|
218
|
-
// Calculate scaling factor between CSS pixels and screenshot pixels
|
|
219
|
-
const scaleX = screenshotWidth / viewport.width;
|
|
220
|
-
const scaleY = screenshotHeight / viewport.height;
|
|
221
|
-
|
|
222
|
-
// Debug logging for CDP mode
|
|
223
|
-
if (process.env.HYBRID_BROWSER_DEBUG === '1') {
|
|
224
|
-
console.log('[CDP Debug] Viewport size:', viewport);
|
|
225
|
-
console.log('[CDP Debug] Device pixel ratio:', dpr);
|
|
226
|
-
console.log('[CDP Debug] Screenshot dimensions:', { width: screenshotWidth, height: screenshotHeight });
|
|
227
|
-
console.log('[CDP Debug] Scale factors:', { scaleX, scaleY });
|
|
228
|
-
console.log('[CDP Debug] Elements with coordinates:', elementsWithCoords.length);
|
|
229
|
-
elementsWithCoords.slice(0, 3).forEach(([ref, element]) => {
|
|
230
|
-
console.log(`[CDP Debug] Element ${ref}:`, element.coordinates);
|
|
231
|
-
});
|
|
232
|
-
}
|
|
233
|
-
|
|
234
|
-
// Filter elements visible in viewport
|
|
235
|
-
const visibleElements = elementsWithCoords.filter(([ref, element]) => {
|
|
236
|
-
const coords = element.coordinates!;
|
|
237
|
-
return coords.x < viewport.width &&
|
|
238
|
-
coords.y < viewport.height &&
|
|
239
|
-
coords.x + coords.width > 0 &&
|
|
240
|
-
coords.y + coords.height > 0;
|
|
241
|
-
});
|
|
242
|
-
|
|
243
|
-
// Remove overlapped elements (only keep topmost)
|
|
244
|
-
const nonOverlappedElements = this.removeOverlappedElements(visibleElements);
|
|
245
|
-
|
|
246
|
-
// Create SVG overlay with all the marks
|
|
247
|
-
const marks = nonOverlappedElements.map(([ref, element]) => {
|
|
248
|
-
const coords = element.coordinates!;
|
|
249
|
-
const isClickable = clickableElements.has(ref);
|
|
250
|
-
|
|
251
|
-
// Scale coordinates from CSS pixels to screenshot pixels
|
|
252
|
-
const x = Math.max(0, coords.x * scaleX);
|
|
253
|
-
const y = Math.max(0, coords.y * scaleY);
|
|
254
|
-
const width = coords.width * scaleX;
|
|
255
|
-
const height = coords.height * scaleY;
|
|
256
|
-
|
|
257
|
-
// Clamp to screenshot bounds
|
|
258
|
-
const clampedWidth = Math.min(width, screenshotWidth - x);
|
|
259
|
-
const clampedHeight = Math.min(height, screenshotHeight - y);
|
|
260
|
-
|
|
261
|
-
// Position text to be visible even if element is partially cut off
|
|
262
|
-
const textX = Math.max(2, Math.min(x + 2, screenshotWidth - 40));
|
|
263
|
-
const textY = Math.max(14, Math.min(y + 14, screenshotHeight - 4));
|
|
264
|
-
|
|
265
|
-
// Different colors for clickable vs non-clickable elements
|
|
266
|
-
const colors = isClickable ? {
|
|
267
|
-
fill: 'rgba(0, 150, 255, 0.15)', // Blue for clickable
|
|
268
|
-
stroke: '#0096FF',
|
|
269
|
-
textFill: '#0096FF'
|
|
270
|
-
} : {
|
|
271
|
-
fill: 'rgba(255, 107, 107, 0.1)', // Red for non-clickable
|
|
272
|
-
stroke: '#FF6B6B',
|
|
273
|
-
textFill: '#FF6B6B'
|
|
274
|
-
};
|
|
275
|
-
|
|
276
|
-
return `
|
|
277
|
-
<rect x="${x}" y="${y}" width="${clampedWidth}" height="${clampedHeight}"
|
|
278
|
-
fill="${colors.fill}" stroke="${colors.stroke}" stroke-width="2" rx="2"/>
|
|
279
|
-
<text x="${textX}" y="${textY}" font-family="Arial, sans-serif"
|
|
280
|
-
font-size="12" fill="${colors.textFill}" font-weight="bold">${ref}</text>
|
|
281
|
-
`;
|
|
282
|
-
}).join('');
|
|
283
|
-
|
|
284
|
-
const svgOverlay = `
|
|
285
|
-
<svg width="${screenshotWidth}" height="${screenshotHeight}" xmlns="http://www.w3.org/2000/svg">
|
|
286
|
-
${marks}
|
|
287
|
-
</svg>
|
|
288
|
-
`;
|
|
289
|
-
|
|
290
|
-
// Composite the overlay onto the screenshot
|
|
291
|
-
const markedImageBuffer = await sharp(screenshotBuffer)
|
|
292
|
-
.composite([{
|
|
293
|
-
input: Buffer.from(svgOverlay),
|
|
294
|
-
top: 0,
|
|
295
|
-
left: 0
|
|
296
|
-
}])
|
|
297
|
-
.png()
|
|
298
|
-
.toBuffer();
|
|
299
|
-
|
|
300
|
-
return markedImageBuffer;
|
|
301
|
-
|
|
302
|
-
} catch (error) {
|
|
303
|
-
// Error adding visual marks, falling back to original screenshot
|
|
304
|
-
// Return original screenshot if marking fails
|
|
305
|
-
return screenshotBuffer;
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
194
|
|
|
309
195
|
/**
|
|
310
196
|
* Parse clickable elements from snapshot text
|
|
@@ -314,8 +200,8 @@ export class HybridBrowserToolkit {
|
|
|
314
200
|
const lines = snapshotText.split('\n');
|
|
315
201
|
|
|
316
202
|
for (const line of lines) {
|
|
317
|
-
// Look for lines containing [cursor=pointer] and extract ref
|
|
318
|
-
if (line.includes('[cursor=pointer]')) {
|
|
203
|
+
// Look for lines containing [cursor=pointer] or [active] and extract ref
|
|
204
|
+
if (line.includes('[cursor=pointer]') || line.includes('[active]')) {
|
|
319
205
|
const refMatch = line.match(/\[ref=([^\]]+)\]/);
|
|
320
206
|
if (refMatch) {
|
|
321
207
|
clickableElements.add(refMatch[1]);
|
|
@@ -326,56 +212,6 @@ export class HybridBrowserToolkit {
|
|
|
326
212
|
return clickableElements;
|
|
327
213
|
}
|
|
328
214
|
|
|
329
|
-
/**
|
|
330
|
-
* Remove overlapped elements, keeping only the topmost (last in DOM order)
|
|
331
|
-
*/
|
|
332
|
-
private removeOverlappedElements(elements: Array<[string, any]>): Array<[string, any]> {
|
|
333
|
-
const result: Array<[string, any]> = [];
|
|
334
|
-
|
|
335
|
-
for (let i = 0; i < elements.length; i++) {
|
|
336
|
-
const [refA, elementA] = elements[i];
|
|
337
|
-
const coordsA = elementA.coordinates!;
|
|
338
|
-
let isOverlapped = false;
|
|
339
|
-
|
|
340
|
-
// Check if this element is completely overlapped by any later element
|
|
341
|
-
for (let j = i + 1; j < elements.length; j++) {
|
|
342
|
-
const [refB, elementB] = elements[j];
|
|
343
|
-
const coordsB = elementB.coordinates!;
|
|
344
|
-
|
|
345
|
-
// Check if element A is completely covered by element B
|
|
346
|
-
if (this.isCompletelyOverlapped(coordsA, coordsB)) {
|
|
347
|
-
isOverlapped = true;
|
|
348
|
-
break;
|
|
349
|
-
}
|
|
350
|
-
}
|
|
351
|
-
|
|
352
|
-
if (!isOverlapped) {
|
|
353
|
-
result.push(elements[i]);
|
|
354
|
-
}
|
|
355
|
-
}
|
|
356
|
-
|
|
357
|
-
return result;
|
|
358
|
-
}
|
|
359
|
-
|
|
360
|
-
/**
|
|
361
|
-
* Check if element A is completely overlapped by element B
|
|
362
|
-
*/
|
|
363
|
-
private isCompletelyOverlapped(
|
|
364
|
-
coordsA: { x: number; y: number; width: number; height: number },
|
|
365
|
-
coordsB: { x: number; y: number; width: number; height: number }
|
|
366
|
-
): boolean {
|
|
367
|
-
// A is completely overlapped by B if:
|
|
368
|
-
// B's left edge is <= A's left edge AND
|
|
369
|
-
// B's top edge is <= A's top edge AND
|
|
370
|
-
// B's right edge is >= A's right edge AND
|
|
371
|
-
// B's bottom edge is >= A's bottom edge
|
|
372
|
-
return (
|
|
373
|
-
coordsB.x <= coordsA.x &&
|
|
374
|
-
coordsB.y <= coordsA.y &&
|
|
375
|
-
coordsB.x + coordsB.width >= coordsA.x + coordsA.width &&
|
|
376
|
-
coordsB.y + coordsB.height >= coordsA.y + coordsA.height
|
|
377
|
-
);
|
|
378
|
-
}
|
|
379
215
|
|
|
380
216
|
private async executeActionWithSnapshot(action: BrowserAction): Promise<any> {
|
|
381
217
|
const result = await this.session.executeAction(action);
|
|
@@ -472,7 +308,7 @@ export class HybridBrowserToolkit {
|
|
|
472
308
|
const navigationTime = Date.now() - navigationStart;
|
|
473
309
|
|
|
474
310
|
const snapshotStart = Date.now();
|
|
475
|
-
const snapshot = await this.
|
|
311
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
476
312
|
const snapshotTime = Date.now() - snapshotStart;
|
|
477
313
|
|
|
478
314
|
const totalTime = Date.now() - startTime;
|
|
@@ -512,7 +348,7 @@ export class HybridBrowserToolkit {
|
|
|
512
348
|
const navigationTime = Date.now() - navigationStart;
|
|
513
349
|
|
|
514
350
|
const snapshotStart = Date.now();
|
|
515
|
-
const snapshot = await this.
|
|
351
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
516
352
|
const snapshotTime = Date.now() - snapshotStart;
|
|
517
353
|
|
|
518
354
|
const totalTime = Date.now() - startTime;
|
|
@@ -584,7 +420,7 @@ export class HybridBrowserToolkit {
|
|
|
584
420
|
return {
|
|
585
421
|
success: true,
|
|
586
422
|
message: `Closed tab ${tabId}`,
|
|
587
|
-
snapshot: await this.
|
|
423
|
+
snapshot: await this.getSnapshotForAction(this.viewportLimit),
|
|
588
424
|
};
|
|
589
425
|
} else {
|
|
590
426
|
return {
|
|
@@ -649,7 +485,7 @@ export class HybridBrowserToolkit {
|
|
|
649
485
|
const { result, logs } = evalResult;
|
|
650
486
|
|
|
651
487
|
const snapshotStart = Date.now();
|
|
652
|
-
const snapshot = await this.
|
|
488
|
+
const snapshot = await this.getSnapshotForAction(this.viewportLimit);
|
|
653
489
|
const snapshotTime = Date.now() - snapshotStart;
|
|
654
490
|
const totalTime = Date.now() - startTime;
|
|
655
491
|
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Parent-child filtering logic for SOM-labels
|
|
3
|
+
* Filters out child elements that are contained within propagating parent elements
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
export interface ElementInfo {
|
|
7
|
+
ref: string;
|
|
8
|
+
coordinates?: {
|
|
9
|
+
x: number;
|
|
10
|
+
y: number;
|
|
11
|
+
width: number;
|
|
12
|
+
height: number;
|
|
13
|
+
};
|
|
14
|
+
role?: string;
|
|
15
|
+
type?: string;
|
|
16
|
+
tagName?: string;
|
|
17
|
+
attributes?: Record<string, any>;
|
|
18
|
+
text?: string;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
// Elements that propagate bounds to their children
|
|
22
|
+
const PROPAGATING_ELEMENTS = [
|
|
23
|
+
{ tag: 'a', role: null },
|
|
24
|
+
{ tag: 'button', role: null },
|
|
25
|
+
{ tag: 'div', role: 'button' },
|
|
26
|
+
{ tag: 'div', role: 'combobox' },
|
|
27
|
+
{ tag: 'span', role: 'button' },
|
|
28
|
+
{ tag: 'span', role: 'combobox' },
|
|
29
|
+
{ tag: 'input', role: 'combobox' },
|
|
30
|
+
];
|
|
31
|
+
|
|
32
|
+
const CONTAINMENT_THRESHOLD = 0.99; // 99% containment required
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
* Check if element is a propagating element
|
|
36
|
+
*/
|
|
37
|
+
function isPropagatingElement(element: ElementInfo): boolean {
|
|
38
|
+
const tagName = element.tagName || element.type || '';
|
|
39
|
+
const tag = tagName.toLowerCase();
|
|
40
|
+
const role = element.role || element.attributes?.role || null;
|
|
41
|
+
|
|
42
|
+
// For generic elements with cursor=pointer, we need to be more selective
|
|
43
|
+
// Only treat them as propagating if they don't have text content
|
|
44
|
+
// (text-containing generics are usually labels, not containers)
|
|
45
|
+
if ((tag === 'generic' || element.type === 'generic') &&
|
|
46
|
+
element.attributes?.['cursor'] === 'pointer') {
|
|
47
|
+
// If element has direct text content, it's likely a label, not a container
|
|
48
|
+
if (element.text && element.text.trim()) {
|
|
49
|
+
return false;
|
|
50
|
+
}
|
|
51
|
+
// If no text, it might be a container
|
|
52
|
+
return true;
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
for (const pattern of PROPAGATING_ELEMENTS) {
|
|
56
|
+
if (pattern.tag === tag) {
|
|
57
|
+
if (pattern.role === null || pattern.role === role) {
|
|
58
|
+
return true;
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
return false;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Check if child bounds are contained within parent bounds
|
|
67
|
+
*/
|
|
68
|
+
function isContained(
|
|
69
|
+
childBounds: { x: number; y: number; width: number; height: number },
|
|
70
|
+
parentBounds: { x: number; y: number; width: number; height: number },
|
|
71
|
+
threshold: number
|
|
72
|
+
): boolean {
|
|
73
|
+
// Calculate intersection
|
|
74
|
+
const xOverlap = Math.max(0,
|
|
75
|
+
Math.min(childBounds.x + childBounds.width, parentBounds.x + parentBounds.width) -
|
|
76
|
+
Math.max(childBounds.x, parentBounds.x)
|
|
77
|
+
);
|
|
78
|
+
const yOverlap = Math.max(0,
|
|
79
|
+
Math.min(childBounds.y + childBounds.height, parentBounds.y + parentBounds.height) -
|
|
80
|
+
Math.max(childBounds.y, parentBounds.y)
|
|
81
|
+
);
|
|
82
|
+
|
|
83
|
+
const intersectionArea = xOverlap * yOverlap;
|
|
84
|
+
const childArea = childBounds.width * childBounds.height;
|
|
85
|
+
|
|
86
|
+
if (childArea === 0) return false;
|
|
87
|
+
|
|
88
|
+
return (intersectionArea / childArea) >= threshold;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Check if child element should be filtered out
|
|
93
|
+
*/
|
|
94
|
+
function shouldFilterChild(childEl: ElementInfo, parentEl: ElementInfo): boolean {
|
|
95
|
+
// Never filter if parent is not a propagating element
|
|
96
|
+
if (!isPropagatingElement(parentEl)) {
|
|
97
|
+
return false;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
// Never filter if elements don't have coordinates
|
|
101
|
+
if (!childEl.coordinates || !parentEl.coordinates) {
|
|
102
|
+
return false;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Check containment
|
|
106
|
+
if (!isContained(childEl.coordinates, parentEl.coordinates, CONTAINMENT_THRESHOLD)) {
|
|
107
|
+
return false;
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
const childTag = (childEl.tagName || childEl.type || '').toLowerCase();
|
|
111
|
+
const childRole = childEl.role || childEl.attributes?.role || null;
|
|
112
|
+
|
|
113
|
+
// Exception rules - never filter these:
|
|
114
|
+
|
|
115
|
+
// 1. Form elements (need individual interaction)
|
|
116
|
+
if (['input', 'select', 'textarea', 'label'].includes(childTag)) {
|
|
117
|
+
return false;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// 2. Child is also a propagating element (might have stopPropagation)
|
|
121
|
+
if (isPropagatingElement(childEl)) {
|
|
122
|
+
return false;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// 3. Has onclick handler
|
|
126
|
+
if (childEl.attributes?.onclick) {
|
|
127
|
+
return false;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
// 4. Has meaningful aria-label
|
|
131
|
+
if (childEl.attributes?.['aria-label']?.trim()) {
|
|
132
|
+
return false;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// 5. Has interactive role
|
|
136
|
+
if (['button', 'link', 'checkbox', 'radio', 'tab', 'menuitem'].includes(childRole || '')) {
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Default: filter this child
|
|
141
|
+
return true;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/**
|
|
145
|
+
* Filter clickable elements based on parent-child relationships
|
|
146
|
+
* @param elements - Map of all elements with their info
|
|
147
|
+
* @param clickableRefs - Set of refs that are clickable
|
|
148
|
+
* @returns Filtered set of element refs and debug info
|
|
149
|
+
*/
|
|
150
|
+
export function filterParentChildElements(
|
|
151
|
+
elements: Record<string, ElementInfo>,
|
|
152
|
+
clickableRefs: Set<string>
|
|
153
|
+
): {
|
|
154
|
+
filteredElements: Set<string>;
|
|
155
|
+
debugInfo: any[];
|
|
156
|
+
} {
|
|
157
|
+
const elementRefs = Array.from(clickableRefs);
|
|
158
|
+
const filteredElements = new Set<string>(elementRefs);
|
|
159
|
+
const debugInfo: any[] = [];
|
|
160
|
+
|
|
161
|
+
console.log(`[Parent-Child Filter] Analyzing ${elementRefs.length} clickable elements`);
|
|
162
|
+
|
|
163
|
+
// Check each pair of elements for parent-child filtering
|
|
164
|
+
for (let i = 0; i < elementRefs.length; i++) {
|
|
165
|
+
const parentRef = elementRefs[i];
|
|
166
|
+
const parentEl = elements[parentRef];
|
|
167
|
+
|
|
168
|
+
if (!parentEl?.coordinates) continue;
|
|
169
|
+
|
|
170
|
+
const isParentPropagating = isPropagatingElement(parentEl);
|
|
171
|
+
|
|
172
|
+
for (let j = 0; j < elementRefs.length; j++) {
|
|
173
|
+
if (i === j) continue;
|
|
174
|
+
|
|
175
|
+
const childRef = elementRefs[j];
|
|
176
|
+
const childEl = elements[childRef];
|
|
177
|
+
|
|
178
|
+
if (!childEl?.coordinates) continue;
|
|
179
|
+
|
|
180
|
+
// Debug parent-child relationships when enabled
|
|
181
|
+
const DEBUG_PARENT_CHILD = process.env.DEBUG_PARENT_CHILD === 'true';
|
|
182
|
+
if (DEBUG_PARENT_CHILD) {
|
|
183
|
+
const shouldFilter = shouldFilterChild(childEl, parentEl);
|
|
184
|
+
console.log(`\n[Debug] Checking ${parentRef} -> ${childRef}:`);
|
|
185
|
+
console.log(`Parent:`, {
|
|
186
|
+
ref: parentRef,
|
|
187
|
+
type: parentEl.type || parentEl.tagName,
|
|
188
|
+
role: parentEl.role,
|
|
189
|
+
coords: parentEl.coordinates,
|
|
190
|
+
isPropagating: isParentPropagating
|
|
191
|
+
});
|
|
192
|
+
console.log(`Child:`, {
|
|
193
|
+
ref: childRef,
|
|
194
|
+
type: childEl.type || childEl.tagName,
|
|
195
|
+
role: childEl.role,
|
|
196
|
+
coords: childEl.coordinates
|
|
197
|
+
});
|
|
198
|
+
console.log(`Should filter? ${shouldFilter}`);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
if (shouldFilterChild(childEl, parentEl)) {
|
|
202
|
+
filteredElements.delete(childRef);
|
|
203
|
+
|
|
204
|
+
debugInfo.push({
|
|
205
|
+
type: 'filtered',
|
|
206
|
+
childRef,
|
|
207
|
+
parentRef,
|
|
208
|
+
reason: 'Contained within propagating parent',
|
|
209
|
+
parentType: parentEl.type || parentEl.tagName,
|
|
210
|
+
childType: childEl.type || childEl.tagName,
|
|
211
|
+
parentRole: parentEl.role,
|
|
212
|
+
childRole: childEl.role,
|
|
213
|
+
containment: isContained(childEl.coordinates, parentEl.coordinates, CONTAINMENT_THRESHOLD)
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const filteredCount = elementRefs.length - filteredElements.size;
|
|
220
|
+
console.log(`[Parent-Child Filter] Filtered out ${filteredCount} child elements`);
|
|
221
|
+
|
|
222
|
+
return {
|
|
223
|
+
filteredElements,
|
|
224
|
+
debugInfo
|
|
225
|
+
};
|
|
226
|
+
}
|