camel-ai 0.2.75a6__py3-none-any.whl → 0.2.76a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (38) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +151 -37
  3. camel/configs/__init__.py +3 -0
  4. camel/configs/amd_config.py +70 -0
  5. camel/interpreters/__init__.py +2 -0
  6. camel/interpreters/microsandbox_interpreter.py +395 -0
  7. camel/models/__init__.py +2 -0
  8. camel/models/amd_model.py +101 -0
  9. camel/models/model_factory.py +2 -0
  10. camel/models/openai_model.py +0 -6
  11. camel/runtimes/daytona_runtime.py +11 -12
  12. camel/toolkits/__init__.py +5 -3
  13. camel/toolkits/code_execution.py +28 -1
  14. camel/toolkits/function_tool.py +6 -1
  15. camel/toolkits/hybrid_browser_toolkit/config_loader.py +8 -0
  16. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +12 -0
  17. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +33 -14
  18. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +135 -40
  19. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
  20. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +43 -207
  21. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  22. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +231 -0
  23. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  24. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +39 -6
  25. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +241 -56
  26. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +5 -1
  27. camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +98 -31
  28. camel/toolkits/mcp_toolkit.py +39 -14
  29. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  30. camel/toolkits/terminal_toolkit.py +12 -2
  31. camel/toolkits/video_analysis_toolkit.py +16 -10
  32. camel/types/enums.py +11 -0
  33. camel/utils/commons.py +2 -0
  34. camel/utils/mcp.py +136 -2
  35. {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/METADATA +5 -3
  36. {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/RECORD +38 -31
  37. {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/WHEEL +0 -0
  38. {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/licenses/LICENSE +0 -0
@@ -2,18 +2,22 @@ import {HybridBrowserSession} from './browser-session';
2
2
  import {ActionResult, BrowserAction, BrowserToolkitConfig, SnapshotResult, TabInfo, VisualMarkResult} from './types';
3
3
  import {ConfigLoader} from './config-loader';
4
4
  import {ConsoleMessage} from 'playwright';
5
+ import {SomScreenshotInjected} from './som-screenshot-injected';
6
+ import {filterClickableByHierarchy} from './snapshot-parser';
5
7
 
6
8
  export class HybridBrowserToolkit {
7
9
  private session: HybridBrowserSession;
8
10
  private config: BrowserToolkitConfig;
9
11
  private configLoader: ConfigLoader;
10
12
  private viewportLimit: boolean;
13
+ private fullVisualMode: boolean;
11
14
 
12
15
  constructor(config: BrowserToolkitConfig = {}) {
13
16
  this.configLoader = ConfigLoader.fromPythonConfig(config);
14
17
  this.config = config; // Store original config for backward compatibility
15
18
  this.session = new HybridBrowserSession(this.configLoader.getBrowserConfig()); // Pass processed config
16
19
  this.viewportLimit = this.configLoader.getWebSocketConfig().viewport_limit;
20
+ this.fullVisualMode = this.configLoader.getWebSocketConfig().fullVisualMode || false;
17
21
  }
18
22
 
19
23
  async openBrowser(startUrl?: string): Promise<ActionResult> {
@@ -26,7 +30,7 @@ export class HybridBrowserToolkit {
26
30
  const result = await this.session.visitPage(url);
27
31
 
28
32
  const snapshotStart = Date.now();
29
- const snapshot = await this.getPageSnapshot(this.viewportLimit);
33
+ const snapshot = await this.getSnapshotForAction(this.viewportLimit);
30
34
  const snapshotTime = Date.now() - snapshotStart;
31
35
 
32
36
  const totalTime = Date.now() - startTime;
@@ -83,7 +87,7 @@ export class HybridBrowserToolkit {
83
87
 
84
88
  if (result.success) {
85
89
  const snapshotStart = Date.now();
86
- response.snapshot = await this.getPageSnapshot(this.viewportLimit);
90
+ response.snapshot = await this.getSnapshotForAction(this.viewportLimit);
87
91
  const snapshotTime = Date.now() - snapshotStart;
88
92
 
89
93
  if (result.timing) {
@@ -119,6 +123,7 @@ export class HybridBrowserToolkit {
119
123
 
120
124
  async getPageSnapshot(viewportLimit: boolean = false): Promise<string> {
121
125
  try {
126
+ // Always return real snapshot when explicitly called
122
127
  // If viewport limiting is enabled, we need coordinates for filtering
123
128
  const snapshotResult = await this.session.getSnapshotForAI(viewportLimit, viewportLimit);
124
129
  return snapshotResult.snapshot;
@@ -126,6 +131,14 @@ export class HybridBrowserToolkit {
126
131
  return `Error capturing snapshot: ${error}`;
127
132
  }
128
133
  }
134
+
135
+ // Internal method for getting snapshot in actions (respects fullVisualMode)
136
+ private async getSnapshotForAction(viewportLimit: boolean = false): Promise<string> {
137
+ if (this.fullVisualMode) {
138
+ return 'full visual mode';
139
+ }
140
+ return this.getPageSnapshot(viewportLimit);
141
+ }
129
142
 
130
143
 
131
144
  async getSnapshotForAI(): Promise<SnapshotResult> {
@@ -134,35 +147,34 @@ export class HybridBrowserToolkit {
134
147
 
135
148
  async getSomScreenshot(): Promise<VisualMarkResult & { timing: any }> {
136
149
  const startTime = Date.now();
150
+ console.log('[HybridBrowserToolkit] Starting getSomScreenshot...');
137
151
 
138
152
  try {
139
- const screenshotResult = await this.session.takeScreenshot();
140
- const snapshotResult = await this.session.getSnapshotForAI(true); // Include coordinates for SOM_mark
141
-
142
- // Add visual marks using improved method
143
- const markingStart = Date.now();
144
- const markedImageBuffer = await this.addVisualMarksOptimized(screenshotResult.buffer, snapshotResult);
145
- const markingTime = Date.now() - markingStart;
153
+ // Get page and snapshot data
154
+ const page = await this.session.getCurrentPage();
155
+ const snapshotResult = await this.session.getSnapshotForAI(true); // Include coordinates
146
156
 
147
- const base64Image = markedImageBuffer.toString('base64');
148
- const dataUrl = `data:image/png;base64,${base64Image}`;
157
+ // Parse clickable elements from snapshot text
158
+ const clickableElements = this.parseClickableElements(snapshotResult.snapshot);
159
+ console.log(`[HybridBrowserToolkit] Found ${clickableElements.size} clickable elements`);
149
160
 
150
- const totalTime = Date.now() - startTime;
161
+ // Apply hierarchy-based filtering
162
+ const filteredElements = filterClickableByHierarchy(snapshotResult.snapshot, clickableElements);
163
+ console.log(`[HybridBrowserToolkit] After filtering: ${filteredElements.size} elements remain`);
164
+
165
+ // Use injected SOM-screenshot method without export path
166
+ const result = await SomScreenshotInjected.captureOptimized(
167
+ page,
168
+ snapshotResult,
169
+ filteredElements,
170
+ undefined // No export path - don't generate files
171
+ );
151
172
 
152
- // Count elements with coordinates
153
- const elementsWithCoords = Object.values(snapshotResult.elements).filter(el => el.coordinates).length;
173
+ // Add snapshot timing info to result
174
+ result.timing.snapshot_time_ms = snapshotResult.timing.snapshot_time_ms;
175
+ result.timing.coordinate_enrichment_time_ms = snapshotResult.timing.coordinate_enrichment_time_ms;
154
176
 
155
- return {
156
- text: `Visual webpage screenshot captured with ${Object.keys(snapshotResult.elements).length} interactive elements (${elementsWithCoords} marked visually)`,
157
- images: [dataUrl],
158
- timing: {
159
- total_time_ms: totalTime,
160
- screenshot_time_ms: screenshotResult.timing.screenshot_time_ms,
161
- snapshot_time_ms: snapshotResult.timing.snapshot_time_ms,
162
- coordinate_enrichment_time_ms: snapshotResult.timing.coordinate_enrichment_time_ms,
163
- visual_marking_time_ms: markingTime,
164
- },
165
- };
177
+ return result;
166
178
  } catch (error) {
167
179
  const totalTime = Date.now() - startTime;
168
180
  return {
@@ -179,132 +191,6 @@ export class HybridBrowserToolkit {
179
191
  }
180
192
  }
181
193
 
182
- private async addVisualMarksOptimized(screenshotBuffer: Buffer, snapshotResult: SnapshotResult): Promise<Buffer> {
183
- try {
184
-
185
- // Check if we have any elements with coordinates
186
- const elementsWithCoords = Object.entries(snapshotResult.elements)
187
- .filter(([ref, element]) => element.coordinates);
188
-
189
- if (elementsWithCoords.length === 0) {
190
- return screenshotBuffer;
191
- }
192
-
193
- // Parse clickable elements from snapshot text
194
- const clickableElements = this.parseClickableElements(snapshotResult.snapshot);
195
-
196
- // Use sharp for image processing
197
- const sharp = require('sharp');
198
- const page = await this.session.getCurrentPage();
199
- let viewport = page.viewportSize();
200
-
201
- // In CDP mode, viewportSize might be null, get it from window dimensions
202
- if (!viewport) {
203
- const windowSize = await page.evaluate(() => ({
204
- width: window.innerWidth,
205
- height: window.innerHeight
206
- }));
207
- viewport = windowSize;
208
- }
209
-
210
- // Get device pixel ratio to handle high DPI screens
211
- const dpr = await page.evaluate(() => window.devicePixelRatio) || 1;
212
-
213
- // Get actual screenshot dimensions
214
- const metadata = await sharp(screenshotBuffer).metadata();
215
- const screenshotWidth = metadata.width || viewport.width;
216
- const screenshotHeight = metadata.height || viewport.height;
217
-
218
- // Calculate scaling factor between CSS pixels and screenshot pixels
219
- const scaleX = screenshotWidth / viewport.width;
220
- const scaleY = screenshotHeight / viewport.height;
221
-
222
- // Debug logging for CDP mode
223
- if (process.env.HYBRID_BROWSER_DEBUG === '1') {
224
- console.log('[CDP Debug] Viewport size:', viewport);
225
- console.log('[CDP Debug] Device pixel ratio:', dpr);
226
- console.log('[CDP Debug] Screenshot dimensions:', { width: screenshotWidth, height: screenshotHeight });
227
- console.log('[CDP Debug] Scale factors:', { scaleX, scaleY });
228
- console.log('[CDP Debug] Elements with coordinates:', elementsWithCoords.length);
229
- elementsWithCoords.slice(0, 3).forEach(([ref, element]) => {
230
- console.log(`[CDP Debug] Element ${ref}:`, element.coordinates);
231
- });
232
- }
233
-
234
- // Filter elements visible in viewport
235
- const visibleElements = elementsWithCoords.filter(([ref, element]) => {
236
- const coords = element.coordinates!;
237
- return coords.x < viewport.width &&
238
- coords.y < viewport.height &&
239
- coords.x + coords.width > 0 &&
240
- coords.y + coords.height > 0;
241
- });
242
-
243
- // Remove overlapped elements (only keep topmost)
244
- const nonOverlappedElements = this.removeOverlappedElements(visibleElements);
245
-
246
- // Create SVG overlay with all the marks
247
- const marks = nonOverlappedElements.map(([ref, element]) => {
248
- const coords = element.coordinates!;
249
- const isClickable = clickableElements.has(ref);
250
-
251
- // Scale coordinates from CSS pixels to screenshot pixels
252
- const x = Math.max(0, coords.x * scaleX);
253
- const y = Math.max(0, coords.y * scaleY);
254
- const width = coords.width * scaleX;
255
- const height = coords.height * scaleY;
256
-
257
- // Clamp to screenshot bounds
258
- const clampedWidth = Math.min(width, screenshotWidth - x);
259
- const clampedHeight = Math.min(height, screenshotHeight - y);
260
-
261
- // Position text to be visible even if element is partially cut off
262
- const textX = Math.max(2, Math.min(x + 2, screenshotWidth - 40));
263
- const textY = Math.max(14, Math.min(y + 14, screenshotHeight - 4));
264
-
265
- // Different colors for clickable vs non-clickable elements
266
- const colors = isClickable ? {
267
- fill: 'rgba(0, 150, 255, 0.15)', // Blue for clickable
268
- stroke: '#0096FF',
269
- textFill: '#0096FF'
270
- } : {
271
- fill: 'rgba(255, 107, 107, 0.1)', // Red for non-clickable
272
- stroke: '#FF6B6B',
273
- textFill: '#FF6B6B'
274
- };
275
-
276
- return `
277
- <rect x="${x}" y="${y}" width="${clampedWidth}" height="${clampedHeight}"
278
- fill="${colors.fill}" stroke="${colors.stroke}" stroke-width="2" rx="2"/>
279
- <text x="${textX}" y="${textY}" font-family="Arial, sans-serif"
280
- font-size="12" fill="${colors.textFill}" font-weight="bold">${ref}</text>
281
- `;
282
- }).join('');
283
-
284
- const svgOverlay = `
285
- <svg width="${screenshotWidth}" height="${screenshotHeight}" xmlns="http://www.w3.org/2000/svg">
286
- ${marks}
287
- </svg>
288
- `;
289
-
290
- // Composite the overlay onto the screenshot
291
- const markedImageBuffer = await sharp(screenshotBuffer)
292
- .composite([{
293
- input: Buffer.from(svgOverlay),
294
- top: 0,
295
- left: 0
296
- }])
297
- .png()
298
- .toBuffer();
299
-
300
- return markedImageBuffer;
301
-
302
- } catch (error) {
303
- // Error adding visual marks, falling back to original screenshot
304
- // Return original screenshot if marking fails
305
- return screenshotBuffer;
306
- }
307
- }
308
194
 
309
195
  /**
310
196
  * Parse clickable elements from snapshot text
@@ -314,8 +200,8 @@ export class HybridBrowserToolkit {
314
200
  const lines = snapshotText.split('\n');
315
201
 
316
202
  for (const line of lines) {
317
- // Look for lines containing [cursor=pointer] and extract ref
318
- if (line.includes('[cursor=pointer]')) {
203
+ // Look for lines containing [cursor=pointer] or [active] and extract ref
204
+ if (line.includes('[cursor=pointer]') || line.includes('[active]')) {
319
205
  const refMatch = line.match(/\[ref=([^\]]+)\]/);
320
206
  if (refMatch) {
321
207
  clickableElements.add(refMatch[1]);
@@ -326,56 +212,6 @@ export class HybridBrowserToolkit {
326
212
  return clickableElements;
327
213
  }
328
214
 
329
- /**
330
- * Remove overlapped elements, keeping only the topmost (last in DOM order)
331
- */
332
- private removeOverlappedElements(elements: Array<[string, any]>): Array<[string, any]> {
333
- const result: Array<[string, any]> = [];
334
-
335
- for (let i = 0; i < elements.length; i++) {
336
- const [refA, elementA] = elements[i];
337
- const coordsA = elementA.coordinates!;
338
- let isOverlapped = false;
339
-
340
- // Check if this element is completely overlapped by any later element
341
- for (let j = i + 1; j < elements.length; j++) {
342
- const [refB, elementB] = elements[j];
343
- const coordsB = elementB.coordinates!;
344
-
345
- // Check if element A is completely covered by element B
346
- if (this.isCompletelyOverlapped(coordsA, coordsB)) {
347
- isOverlapped = true;
348
- break;
349
- }
350
- }
351
-
352
- if (!isOverlapped) {
353
- result.push(elements[i]);
354
- }
355
- }
356
-
357
- return result;
358
- }
359
-
360
- /**
361
- * Check if element A is completely overlapped by element B
362
- */
363
- private isCompletelyOverlapped(
364
- coordsA: { x: number; y: number; width: number; height: number },
365
- coordsB: { x: number; y: number; width: number; height: number }
366
- ): boolean {
367
- // A is completely overlapped by B if:
368
- // B's left edge is <= A's left edge AND
369
- // B's top edge is <= A's top edge AND
370
- // B's right edge is >= A's right edge AND
371
- // B's bottom edge is >= A's bottom edge
372
- return (
373
- coordsB.x <= coordsA.x &&
374
- coordsB.y <= coordsA.y &&
375
- coordsB.x + coordsB.width >= coordsA.x + coordsA.width &&
376
- coordsB.y + coordsB.height >= coordsA.y + coordsA.height
377
- );
378
- }
379
215
 
380
216
  private async executeActionWithSnapshot(action: BrowserAction): Promise<any> {
381
217
  const result = await this.session.executeAction(action);
@@ -472,7 +308,7 @@ export class HybridBrowserToolkit {
472
308
  const navigationTime = Date.now() - navigationStart;
473
309
 
474
310
  const snapshotStart = Date.now();
475
- const snapshot = await this.getPageSnapshot(this.viewportLimit);
311
+ const snapshot = await this.getSnapshotForAction(this.viewportLimit);
476
312
  const snapshotTime = Date.now() - snapshotStart;
477
313
 
478
314
  const totalTime = Date.now() - startTime;
@@ -512,7 +348,7 @@ export class HybridBrowserToolkit {
512
348
  const navigationTime = Date.now() - navigationStart;
513
349
 
514
350
  const snapshotStart = Date.now();
515
- const snapshot = await this.getPageSnapshot(this.viewportLimit);
351
+ const snapshot = await this.getSnapshotForAction(this.viewportLimit);
516
352
  const snapshotTime = Date.now() - snapshotStart;
517
353
 
518
354
  const totalTime = Date.now() - startTime;
@@ -584,7 +420,7 @@ export class HybridBrowserToolkit {
584
420
  return {
585
421
  success: true,
586
422
  message: `Closed tab ${tabId}`,
587
- snapshot: await this.getPageSnapshot(this.viewportLimit),
423
+ snapshot: await this.getSnapshotForAction(this.viewportLimit),
588
424
  };
589
425
  } else {
590
426
  return {
@@ -649,7 +485,7 @@ export class HybridBrowserToolkit {
649
485
  const { result, logs } = evalResult;
650
486
 
651
487
  const snapshotStart = Date.now();
652
- const snapshot = await this.getPageSnapshot(this.viewportLimit);
488
+ const snapshot = await this.getSnapshotForAction(this.viewportLimit);
653
489
  const snapshotTime = Date.now() - snapshotStart;
654
490
  const totalTime = Date.now() - startTime;
655
491
 
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Parent-child filtering logic for SOM-labels
3
+ * Filters out child elements that are contained within propagating parent elements
4
+ */
5
+
6
+ export interface ElementInfo {
7
+ ref: string;
8
+ coordinates?: {
9
+ x: number;
10
+ y: number;
11
+ width: number;
12
+ height: number;
13
+ };
14
+ role?: string;
15
+ type?: string;
16
+ tagName?: string;
17
+ attributes?: Record<string, any>;
18
+ text?: string;
19
+ }
20
+
21
+ // Elements that propagate bounds to their children
22
+ const PROPAGATING_ELEMENTS = [
23
+ { tag: 'a', role: null },
24
+ { tag: 'button', role: null },
25
+ { tag: 'div', role: 'button' },
26
+ { tag: 'div', role: 'combobox' },
27
+ { tag: 'span', role: 'button' },
28
+ { tag: 'span', role: 'combobox' },
29
+ { tag: 'input', role: 'combobox' },
30
+ ];
31
+
32
+ const CONTAINMENT_THRESHOLD = 0.99; // 99% containment required
33
+
34
+ /**
35
+ * Check if element is a propagating element
36
+ */
37
+ function isPropagatingElement(element: ElementInfo): boolean {
38
+ const tagName = element.tagName || element.type || '';
39
+ const tag = tagName.toLowerCase();
40
+ const role = element.role || element.attributes?.role || null;
41
+
42
+ // For generic elements with cursor=pointer, we need to be more selective
43
+ // Only treat them as propagating if they don't have text content
44
+ // (text-containing generics are usually labels, not containers)
45
+ if ((tag === 'generic' || element.type === 'generic') &&
46
+ element.attributes?.['cursor'] === 'pointer') {
47
+ // If element has direct text content, it's likely a label, not a container
48
+ if (element.text && element.text.trim()) {
49
+ return false;
50
+ }
51
+ // If no text, it might be a container
52
+ return true;
53
+ }
54
+
55
+ for (const pattern of PROPAGATING_ELEMENTS) {
56
+ if (pattern.tag === tag) {
57
+ if (pattern.role === null || pattern.role === role) {
58
+ return true;
59
+ }
60
+ }
61
+ }
62
+ return false;
63
+ }
64
+
65
+ /**
66
+ * Check if child bounds are contained within parent bounds
67
+ */
68
+ function isContained(
69
+ childBounds: { x: number; y: number; width: number; height: number },
70
+ parentBounds: { x: number; y: number; width: number; height: number },
71
+ threshold: number
72
+ ): boolean {
73
+ // Calculate intersection
74
+ const xOverlap = Math.max(0,
75
+ Math.min(childBounds.x + childBounds.width, parentBounds.x + parentBounds.width) -
76
+ Math.max(childBounds.x, parentBounds.x)
77
+ );
78
+ const yOverlap = Math.max(0,
79
+ Math.min(childBounds.y + childBounds.height, parentBounds.y + parentBounds.height) -
80
+ Math.max(childBounds.y, parentBounds.y)
81
+ );
82
+
83
+ const intersectionArea = xOverlap * yOverlap;
84
+ const childArea = childBounds.width * childBounds.height;
85
+
86
+ if (childArea === 0) return false;
87
+
88
+ return (intersectionArea / childArea) >= threshold;
89
+ }
90
+
91
+ /**
92
+ * Check if child element should be filtered out
93
+ */
94
+ function shouldFilterChild(childEl: ElementInfo, parentEl: ElementInfo): boolean {
95
+ // Never filter if parent is not a propagating element
96
+ if (!isPropagatingElement(parentEl)) {
97
+ return false;
98
+ }
99
+
100
+ // Never filter if elements don't have coordinates
101
+ if (!childEl.coordinates || !parentEl.coordinates) {
102
+ return false;
103
+ }
104
+
105
+ // Check containment
106
+ if (!isContained(childEl.coordinates, parentEl.coordinates, CONTAINMENT_THRESHOLD)) {
107
+ return false;
108
+ }
109
+
110
+ const childTag = (childEl.tagName || childEl.type || '').toLowerCase();
111
+ const childRole = childEl.role || childEl.attributes?.role || null;
112
+
113
+ // Exception rules - never filter these:
114
+
115
+ // 1. Form elements (need individual interaction)
116
+ if (['input', 'select', 'textarea', 'label'].includes(childTag)) {
117
+ return false;
118
+ }
119
+
120
+ // 2. Child is also a propagating element (might have stopPropagation)
121
+ if (isPropagatingElement(childEl)) {
122
+ return false;
123
+ }
124
+
125
+ // 3. Has onclick handler
126
+ if (childEl.attributes?.onclick) {
127
+ return false;
128
+ }
129
+
130
+ // 4. Has meaningful aria-label
131
+ if (childEl.attributes?.['aria-label']?.trim()) {
132
+ return false;
133
+ }
134
+
135
+ // 5. Has interactive role
136
+ if (['button', 'link', 'checkbox', 'radio', 'tab', 'menuitem'].includes(childRole || '')) {
137
+ return false;
138
+ }
139
+
140
+ // Default: filter this child
141
+ return true;
142
+ }
143
+
144
+ /**
145
+ * Filter clickable elements based on parent-child relationships
146
+ * @param elements - Map of all elements with their info
147
+ * @param clickableRefs - Set of refs that are clickable
148
+ * @returns Filtered set of element refs and debug info
149
+ */
150
+ export function filterParentChildElements(
151
+ elements: Record<string, ElementInfo>,
152
+ clickableRefs: Set<string>
153
+ ): {
154
+ filteredElements: Set<string>;
155
+ debugInfo: any[];
156
+ } {
157
+ const elementRefs = Array.from(clickableRefs);
158
+ const filteredElements = new Set<string>(elementRefs);
159
+ const debugInfo: any[] = [];
160
+
161
+ console.log(`[Parent-Child Filter] Analyzing ${elementRefs.length} clickable elements`);
162
+
163
+ // Check each pair of elements for parent-child filtering
164
+ for (let i = 0; i < elementRefs.length; i++) {
165
+ const parentRef = elementRefs[i];
166
+ const parentEl = elements[parentRef];
167
+
168
+ if (!parentEl?.coordinates) continue;
169
+
170
+ const isParentPropagating = isPropagatingElement(parentEl);
171
+
172
+ for (let j = 0; j < elementRefs.length; j++) {
173
+ if (i === j) continue;
174
+
175
+ const childRef = elementRefs[j];
176
+ const childEl = elements[childRef];
177
+
178
+ if (!childEl?.coordinates) continue;
179
+
180
+ // Debug parent-child relationships when enabled
181
+ const DEBUG_PARENT_CHILD = process.env.DEBUG_PARENT_CHILD === 'true';
182
+ if (DEBUG_PARENT_CHILD) {
183
+ const shouldFilter = shouldFilterChild(childEl, parentEl);
184
+ console.log(`\n[Debug] Checking ${parentRef} -> ${childRef}:`);
185
+ console.log(`Parent:`, {
186
+ ref: parentRef,
187
+ type: parentEl.type || parentEl.tagName,
188
+ role: parentEl.role,
189
+ coords: parentEl.coordinates,
190
+ isPropagating: isParentPropagating
191
+ });
192
+ console.log(`Child:`, {
193
+ ref: childRef,
194
+ type: childEl.type || childEl.tagName,
195
+ role: childEl.role,
196
+ coords: childEl.coordinates
197
+ });
198
+ console.log(`Should filter? ${shouldFilter}`);
199
+ }
200
+
201
+ if (shouldFilterChild(childEl, parentEl)) {
202
+ filteredElements.delete(childRef);
203
+
204
+ debugInfo.push({
205
+ type: 'filtered',
206
+ childRef,
207
+ parentRef,
208
+ reason: 'Contained within propagating parent',
209
+ parentType: parentEl.type || parentEl.tagName,
210
+ childType: childEl.type || childEl.tagName,
211
+ parentRole: parentEl.role,
212
+ childRole: childEl.role,
213
+ containment: isContained(childEl.coordinates, parentEl.coordinates, CONTAINMENT_THRESHOLD)
214
+ });
215
+ }
216
+ }
217
+ }
218
+
219
+ const filteredCount = elementRefs.length - filteredElements.size;
220
+ console.log(`[Parent-Child Filter] Filtered out ${filteredCount} child elements`);
221
+
222
+ return {
223
+ filteredElements,
224
+ debugInfo
225
+ };
226
+ }