camel-ai 0.2.75a6__py3-none-any.whl → 0.2.76a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (38) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/chat_agent.py +151 -37
  3. camel/configs/__init__.py +3 -0
  4. camel/configs/amd_config.py +70 -0
  5. camel/interpreters/__init__.py +2 -0
  6. camel/interpreters/microsandbox_interpreter.py +395 -0
  7. camel/models/__init__.py +2 -0
  8. camel/models/amd_model.py +101 -0
  9. camel/models/model_factory.py +2 -0
  10. camel/models/openai_model.py +0 -6
  11. camel/runtimes/daytona_runtime.py +11 -12
  12. camel/toolkits/__init__.py +5 -3
  13. camel/toolkits/code_execution.py +28 -1
  14. camel/toolkits/function_tool.py +6 -1
  15. camel/toolkits/hybrid_browser_toolkit/config_loader.py +8 -0
  16. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +12 -0
  17. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +33 -14
  18. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +135 -40
  19. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +2 -0
  20. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +43 -207
  21. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  22. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +231 -0
  23. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  24. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +39 -6
  25. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +241 -56
  26. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +5 -1
  27. camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +98 -31
  28. camel/toolkits/mcp_toolkit.py +39 -14
  29. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  30. camel/toolkits/terminal_toolkit.py +12 -2
  31. camel/toolkits/video_analysis_toolkit.py +16 -10
  32. camel/types/enums.py +11 -0
  33. camel/utils/commons.py +2 -0
  34. camel/utils/mcp.py +136 -2
  35. {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/METADATA +5 -3
  36. {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/RECORD +38 -31
  37. {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/WHEEL +0 -0
  38. {camel_ai-0.2.75a6.dist-info → camel_ai-0.2.76a0.dist-info}/licenses/LICENSE +0 -0
@@ -18,6 +18,7 @@ from camel.interpreters import (
18
18
  E2BInterpreter,
19
19
  InternalPythonInterpreter,
20
20
  JupyterKernelInterpreter,
21
+ MicrosandboxInterpreter,
21
22
  SubprocessInterpreter,
22
23
  )
23
24
  from camel.logger import get_logger
@@ -43,18 +44,31 @@ class CodeExecutionToolkit(BaseToolkit):
43
44
  (default: :obj:`None`)
44
45
  require_confirm (bool): Whether to require confirmation before
45
46
  executing code. (default: :obj:`False`)
47
+ timeout (Optional[float]): General timeout for toolkit operations.
48
+ (default: :obj:`None`)
49
+ microsandbox_config (Optional[dict]): Configuration for microsandbox
50
+ interpreter. Available keys: 'server_url', 'api_key',
51
+ 'namespace', 'sandbox_name', 'timeout'.
52
+ If None, uses default configuration. (default: :obj:`None`)
46
53
  """
47
54
 
48
55
  def __init__(
49
56
  self,
50
57
  sandbox: Literal[
51
- "internal_python", "jupyter", "docker", "subprocess", "e2b"
58
+ "internal_python",
59
+ "jupyter",
60
+ "docker",
61
+ "subprocess",
62
+ "e2b",
63
+ "microsandbox",
52
64
  ] = "subprocess",
53
65
  verbose: bool = False,
54
66
  unsafe_mode: bool = False,
55
67
  import_white_list: Optional[List[str]] = None,
56
68
  require_confirm: bool = False,
57
69
  timeout: Optional[float] = None,
70
+ # Microsandbox configuration dictionary
71
+ microsandbox_config: Optional[dict] = None,
58
72
  ) -> None:
59
73
  super().__init__(timeout=timeout)
60
74
  self.verbose = verbose
@@ -68,6 +82,7 @@ class CodeExecutionToolkit(BaseToolkit):
68
82
  DockerInterpreter,
69
83
  SubprocessInterpreter,
70
84
  E2BInterpreter,
85
+ MicrosandboxInterpreter,
71
86
  ]
72
87
 
73
88
  if sandbox == "internal_python":
@@ -95,6 +110,18 @@ class CodeExecutionToolkit(BaseToolkit):
95
110
  )
96
111
  elif sandbox == "e2b":
97
112
  self.interpreter = E2BInterpreter(require_confirm=require_confirm)
113
+ elif sandbox == "microsandbox":
114
+ # Extract parameters with proper types for microsandbox
115
+ config = microsandbox_config or {}
116
+
117
+ self.interpreter = MicrosandboxInterpreter(
118
+ require_confirm=require_confirm,
119
+ server_url=config.get("server_url"),
120
+ api_key=config.get("api_key"),
121
+ namespace=config.get("namespace", "default"),
122
+ sandbox_name=config.get("sandbox_name"),
123
+ timeout=config.get("timeout", 30),
124
+ )
98
125
  else:
99
126
  raise RuntimeError(
100
127
  f"The sandbox type `{sandbox}` is not supported."
@@ -156,7 +156,12 @@ def get_openai_tool_schema(func: Callable) -> Dict[str, Any]:
156
156
  if (name := param.arg_name) in parameters_dict["properties"] and (
157
157
  description := param.description
158
158
  ):
159
- parameters_dict["properties"][name]["description"] = description
159
+ # OpenAI does not allow descriptions on properties that use $ref.
160
+ # To avoid schema errors, we only add the description if "$ref" is
161
+ # not present.
162
+ prop = parameters_dict["properties"][name]
163
+ if "$ref" not in prop:
164
+ prop["description"] = description
160
165
 
161
166
  short_description = docstring.short_description or ""
162
167
  long_description = docstring.long_description or ""
@@ -44,6 +44,9 @@ class BrowserConfig:
44
44
  connect_over_cdp: bool = False
45
45
  cdp_url: Optional[str] = None
46
46
 
47
+ # Full visual mode configuration
48
+ full_visual_mode: bool = False
49
+
47
50
 
48
51
  @dataclass
49
52
  class ToolkitConfig:
@@ -51,6 +54,7 @@ class ToolkitConfig:
51
54
 
52
55
  cache_dir: str = "tmp/"
53
56
  browser_log_to_file: bool = False
57
+ log_dir: Optional[str] = None
54
58
  session_id: Optional[str] = None
55
59
  enabled_tools: Optional[list] = None
56
60
 
@@ -116,6 +120,8 @@ class ConfigLoader:
116
120
  toolkit_kwargs["session_id"] = value
117
121
  elif key == "enabledTools":
118
122
  toolkit_kwargs["enabled_tools"] = value
123
+ elif key == "fullVisualMode":
124
+ browser_kwargs["full_visual_mode"] = value
119
125
 
120
126
  browser_config = BrowserConfig(**browser_kwargs)
121
127
  toolkit_config = ToolkitConfig(**toolkit_kwargs)
@@ -142,10 +148,12 @@ class ConfigLoader:
142
148
  "screenshotTimeout": self.browser_config.screenshot_timeout,
143
149
  "pageStabilityTimeout": self.browser_config.page_stability_timeout,
144
150
  "browser_log_to_file": self.toolkit_config.browser_log_to_file,
151
+ "log_dir": self.toolkit_config.log_dir,
145
152
  "session_id": self.toolkit_config.session_id,
146
153
  "viewport_limit": self.browser_config.viewport_limit,
147
154
  "connectOverCdp": self.browser_config.connect_over_cdp,
148
155
  "cdpUrl": self.browser_config.cdp_url,
156
+ "fullVisualMode": self.browser_config.full_visual_mode,
149
157
  }
150
158
 
151
159
  def get_timeout_config(self) -> Dict[str, Optional[int]]:
@@ -38,6 +38,7 @@ class HybridBrowserToolkit(BaseToolkit):
38
38
  cache_dir: str = "tmp/",
39
39
  enabled_tools: Optional[List[str]] = None,
40
40
  browser_log_to_file: bool = False,
41
+ log_dir: Optional[str] = None,
41
42
  session_id: Optional[str] = None,
42
43
  default_start_url: str = "https://google.com/",
43
44
  default_timeout: Optional[int] = None,
@@ -50,6 +51,7 @@ class HybridBrowserToolkit(BaseToolkit):
50
51
  viewport_limit: bool = False,
51
52
  connect_over_cdp: bool = False,
52
53
  cdp_url: Optional[str] = None,
54
+ full_visual_mode: bool = False,
53
55
  **kwargs: Any,
54
56
  ) -> Any:
55
57
  r"""Create a HybridBrowserToolkit instance with the specified mode.
@@ -72,6 +74,8 @@ class HybridBrowserToolkit(BaseToolkit):
72
74
  Defaults to None.
73
75
  browser_log_to_file (bool): Whether to log browser actions to
74
76
  file. Defaults to False.
77
+ log_dir (Optional[str]): Custom directory path for log files.
78
+ If None, defaults to "browser_log". Defaults to None.
75
79
  session_id (Optional[str]): Session identifier. Defaults to None.
76
80
  default_start_url (str): Default URL to start with. Defaults
77
81
  to "https://google.com/".
@@ -98,6 +102,11 @@ class HybridBrowserToolkit(BaseToolkit):
98
102
  cdp_url (Optional[str]): WebSocket endpoint URL for CDP
99
103
  connection. Required when connect_over_cdp is True.
100
104
  Defaults to None. (Only supported in TypeScript mode)
105
+ full_visual_mode (bool): When True, browser actions like click,
106
+ browser_open, visit_page, etc. will return 'full visual mode'
107
+ as snapshot instead of actual page content. The
108
+ browser_get_page_snapshot method will still return the actual
109
+ snapshot. Defaults to False.
101
110
  **kwargs: Additional keyword arguments passed to the
102
111
  implementation.
103
112
 
@@ -117,6 +126,7 @@ class HybridBrowserToolkit(BaseToolkit):
117
126
  cache_dir=cache_dir,
118
127
  enabled_tools=enabled_tools,
119
128
  browser_log_to_file=browser_log_to_file,
129
+ log_dir=log_dir,
120
130
  session_id=session_id,
121
131
  default_start_url=default_start_url,
122
132
  default_timeout=default_timeout,
@@ -129,6 +139,7 @@ class HybridBrowserToolkit(BaseToolkit):
129
139
  viewport_limit=viewport_limit,
130
140
  connect_over_cdp=connect_over_cdp,
131
141
  cdp_url=cdp_url,
142
+ full_visual_mode=full_visual_mode,
132
143
  **kwargs,
133
144
  )
134
145
  elif mode == "python":
@@ -160,6 +171,7 @@ class HybridBrowserToolkit(BaseToolkit):
160
171
  cache_dir=cache_dir,
161
172
  enabled_tools=enabled_tools,
162
173
  browser_log_to_file=browser_log_to_file,
174
+ log_dir=log_dir,
163
175
  session_id=session_id,
164
176
  default_start_url=default_start_url,
165
177
  default_timeout=default_timeout,
@@ -86,6 +86,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
86
86
  cache_dir: str = "tmp/",
87
87
  enabled_tools: Optional[List[str]] = None,
88
88
  browser_log_to_file: bool = False,
89
+ log_dir: Optional[str] = None,
89
90
  session_id: Optional[str] = None,
90
91
  default_start_url: str = "https://google.com/",
91
92
  default_timeout: Optional[int] = None,
@@ -98,6 +99,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
98
99
  viewport_limit: bool = False,
99
100
  connect_over_cdp: bool = False,
100
101
  cdp_url: Optional[str] = None,
102
+ full_visual_mode: bool = False,
101
103
  ) -> None:
102
104
  r"""Initialize the HybridBrowserToolkit.
103
105
 
@@ -115,6 +117,8 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
115
117
  Defaults to None.
116
118
  browser_log_to_file (bool): Whether to log browser actions to
117
119
  file. Defaults to False.
120
+ log_dir (Optional[str]): Custom directory path for log files.
121
+ If None, defaults to "browser_log". Defaults to None.
118
122
  session_id (Optional[str]): Session identifier. Defaults to None.
119
123
  default_start_url (str): Default URL to start with. Defaults
120
124
  to "https://google.com/".
@@ -143,6 +147,9 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
143
147
  cdp_url (Optional[str]): WebSocket endpoint URL for CDP
144
148
  connection (e.g., 'ws://localhost:9222/devtools/browser/...').
145
149
  Required when connect_over_cdp is True. Defaults to None.
150
+ full_visual_mode (bool): When True, browser actions like click,
151
+ browser_open, visit_page, etc. will not return snapshots.
152
+ Defaults to False.
146
153
  """
147
154
  super().__init__()
148
155
  RegisteredAgentToolkit.__init__(self)
@@ -163,10 +170,12 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
163
170
  viewport_limit=viewport_limit,
164
171
  cache_dir=cache_dir,
165
172
  browser_log_to_file=browser_log_to_file,
173
+ log_dir=log_dir,
166
174
  session_id=session_id,
167
175
  enabled_tools=enabled_tools,
168
176
  connect_over_cdp=connect_over_cdp,
169
177
  cdp_url=cdp_url,
178
+ full_visual_mode=full_visual_mode,
170
179
  )
171
180
 
172
181
  # Legacy attribute access for backward compatibility
@@ -182,6 +191,7 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
182
191
  self._default_start_url = browser_config.default_start_url
183
192
  self._session_id = toolkit_config.session_id or "default"
184
193
  self._viewport_limit = browser_config.viewport_limit
194
+ self._full_visual_mode = browser_config.full_visual_mode
185
195
 
186
196
  # Store timeout configuration for backward compatibility
187
197
  self._default_timeout = browser_config.default_timeout
@@ -648,22 +658,29 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
648
658
 
649
659
  # Add tab information
650
660
  tab_info = await ws_wrapper.get_tab_info()
651
- result.update(
652
- {
653
- "tabs": tab_info,
654
- "current_tab": next(
655
- (
656
- i
657
- for i, tab in enumerate(tab_info)
658
- if tab.get("is_current")
659
- ),
660
- 0,
661
+
662
+ response = {
663
+ "result": result.get("result", ""),
664
+ "snapshot": result.get("snapshot", ""),
665
+ "tabs": tab_info,
666
+ "current_tab": next(
667
+ (
668
+ i
669
+ for i, tab in enumerate(tab_info)
670
+ if tab.get("is_current")
661
671
  ),
662
- "total_tabs": len(tab_info),
663
- }
664
- )
672
+ 0,
673
+ ),
674
+ "total_tabs": len(tab_info),
675
+ }
665
676
 
666
- return result
677
+ if "newTabId" in result:
678
+ response["newTabId"] = result["newTabId"]
679
+
680
+ if "timing" in result:
681
+ response["timing"] = result["timing"]
682
+
683
+ return response
667
684
  except Exception as e:
668
685
  logger.error(f"Failed to click element: {e}")
669
686
  return {
@@ -1377,6 +1394,8 @@ class HybridBrowserToolkit(BaseToolkit, RegisteredAgentToolkit):
1377
1394
  screenshot_timeout=self._screenshot_timeout,
1378
1395
  page_stability_timeout=self._page_stability_timeout,
1379
1396
  dom_content_loaded_timeout=self._dom_content_loaded_timeout,
1397
+ viewport_limit=self._viewport_limit,
1398
+ full_visual_mode=self._full_visual_mode,
1380
1399
  )
1381
1400
 
1382
1401
  def get_tools(self) -> List[FunctionTool]:
@@ -59,14 +59,30 @@ export class HybridBrowserSession {
59
59
  const contexts = this.browser.contexts();
60
60
  if (contexts.length > 0) {
61
61
  this.context = contexts[0];
62
+
63
+ // Apply stealth headers to existing context if configured
64
+ // Note: userAgent cannot be changed on an existing context
65
+ if (stealthConfig.enabled) {
66
+ if (stealthConfig.extraHTTPHeaders) {
67
+ await this.context.setExtraHTTPHeaders(stealthConfig.extraHTTPHeaders);
68
+ }
69
+ if (stealthConfig.userAgent) {
70
+ console.warn('[HybridBrowserSession] Cannot apply userAgent to existing context. Consider creating a new context if userAgent customization is required.');
71
+ }
72
+ }
62
73
  } else {
63
74
  const contextOptions: any = {
64
75
  viewport: browserConfig.viewport
65
76
  };
66
77
 
67
- // Apply stealth headers if configured
68
- if (stealthConfig.enabled && stealthConfig.extraHTTPHeaders) {
69
- contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
78
+ // Apply stealth headers and UA if configured
79
+ if (stealthConfig.enabled) {
80
+ if (stealthConfig.extraHTTPHeaders) {
81
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
82
+ }
83
+ if (stealthConfig.userAgent) {
84
+ contextOptions.userAgent = stealthConfig.userAgent;
85
+ }
70
86
  }
71
87
 
72
88
  this.context = await this.browser.newContext(contextOptions);
@@ -105,13 +121,18 @@ export class HybridBrowserSession {
105
121
  if (stealthConfig.enabled) {
106
122
  launchOptions.args = stealthConfig.args || [];
107
123
 
108
- // Apply stealth user agent if configured
124
+ // Apply stealth user agent/headers if configured
109
125
  if (stealthConfig.userAgent) {
110
126
  launchOptions.userAgent = stealthConfig.userAgent;
111
127
  }
128
+ if (stealthConfig.extraHTTPHeaders) {
129
+ launchOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
130
+ }
112
131
  }
113
132
 
114
133
  if (browserConfig.userDataDir) {
134
+ // Ensure viewport is honored in persistent context
135
+ launchOptions.viewport = browserConfig.viewport;
115
136
  this.context = await chromium.launchPersistentContext(
116
137
  browserConfig.userDataDir,
117
138
  launchOptions
@@ -129,9 +150,14 @@ export class HybridBrowserSession {
129
150
  viewport: browserConfig.viewport
130
151
  };
131
152
 
132
- // Apply stealth headers if configured
133
- if (stealthConfig.enabled && stealthConfig.extraHTTPHeaders) {
134
- contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
153
+ // Apply stealth headers and UA if configured
154
+ if (stealthConfig.enabled) {
155
+ if (stealthConfig.extraHTTPHeaders) {
156
+ contextOptions.extraHTTPHeaders = stealthConfig.extraHTTPHeaders;
157
+ }
158
+ if (stealthConfig.userAgent) {
159
+ contextOptions.userAgent = stealthConfig.userAgent;
160
+ }
135
161
  }
136
162
 
137
163
  this.context = await this.browser.newContext(contextOptions);
@@ -173,26 +199,10 @@ export class HybridBrowserSession {
173
199
 
174
200
  async getCurrentPage(): Promise<Page> {
175
201
  if (!this.currentTabId || !this.pages.has(this.currentTabId)) {
176
- // In CDP mode, try to create a new page if none exists
202
+ // In CDP mode, we cannot create new pages
177
203
  const browserConfig = this.configLoader.getBrowserConfig();
178
- if (browserConfig.connectOverCdp && this.context) {
179
- console.log('[CDP] No active page found, attempting to create new page...');
180
- try {
181
- const newPage = await this.context.newPage();
182
- const newTabId = this.generateTabId();
183
- this.registerNewPage(newTabId, newPage);
184
- this.currentTabId = newTabId;
185
-
186
- // Set page timeouts
187
- newPage.setDefaultNavigationTimeout(browserConfig.navigationTimeout);
188
- newPage.setDefaultTimeout(browserConfig.navigationTimeout);
189
-
190
- console.log(`[CDP] Created new page with tab ID: ${newTabId}`);
191
- return newPage;
192
- } catch (error) {
193
- console.error('[CDP] Failed to create new page:', error);
194
- throw new Error('No active page available and failed to create new page in CDP mode');
195
- }
204
+ if (browserConfig.connectOverCdp) {
205
+ throw new Error('No active page available in CDP mode; frontend must pre-create blank tabs.');
196
206
  }
197
207
  throw new Error('No active page available');
198
208
  }
@@ -235,6 +245,36 @@ export class HybridBrowserSession {
235
245
  return this.getSnapshotForAINative(includeCoordinates, viewportLimit);
236
246
  }
237
247
 
248
+ private parseElementFromSnapshot(snapshotText: string, ref: string): { role?: string; text?: string } {
249
+ const lines = snapshotText.split('\n');
250
+ for (const line of lines) {
251
+ if (line.includes(`[ref=${ref}]`)) {
252
+ const typeMatch = line.match(/^\s*-?\s*([\w-]+)/);
253
+ const role = typeMatch ? typeMatch[1] : undefined;
254
+ const textMatch = line.match(/"([^"]*)"/);
255
+ const text = textMatch ? textMatch[1] : undefined;
256
+ return { role, text };
257
+ }
258
+ }
259
+ return {};
260
+ }
261
+
262
+ private buildSnapshotIndex(snapshotText: string): Map<string, { role?: string; text?: string }> {
263
+ const index = new Map<string, { role?: string; text?: string }>();
264
+ const refRe = /\[ref=([^\]]+)\]/i;
265
+ for (const line of snapshotText.split('\n')) {
266
+ const m = line.match(refRe);
267
+ if (!m) continue;
268
+ const ref = m[1];
269
+ const roleMatch = line.match(/^\s*-?\s*([a-z0-9_-]+)/i);
270
+ const role = roleMatch ? roleMatch[1].toLowerCase() : undefined;
271
+ const textMatch = line.match(/"([^"]*)"/);
272
+ const text = textMatch ? textMatch[1] : undefined;
273
+ index.set(ref, { role, text });
274
+ }
275
+ return index;
276
+ }
277
+
238
278
  private async getSnapshotForAINative(includeCoordinates = false, viewportLimit = false): Promise<SnapshotResult & { timing: DetailedTiming }> {
239
279
  const startTime = Date.now();
240
280
  const page = await this.getCurrentPage();
@@ -257,6 +297,17 @@ export class HybridBrowserSession {
257
297
  const mappingStart = Date.now();
258
298
  const playwrightMapping: Record<string, any> = {};
259
299
 
300
+ // Parse element info in a single pass
301
+ const snapshotIndex = this.buildSnapshotIndex(snapshotText);
302
+ for (const ref of refs) {
303
+ const elementInfo = snapshotIndex.get(ref) || {};
304
+ playwrightMapping[ref] = {
305
+ ref,
306
+ role: elementInfo.role || 'unknown',
307
+ text: elementInfo.text || '',
308
+ };
309
+ }
310
+
260
311
  if (includeCoordinates) {
261
312
  // Get coordinates for each ref using aria-ref selector
262
313
  for (const ref of refs) {
@@ -270,8 +321,9 @@ export class HybridBrowserSession {
270
321
  const boundingBox = await element.boundingBox();
271
322
 
272
323
  if (boundingBox) {
324
+ // Add coordinates to existing element info
273
325
  playwrightMapping[ref] = {
274
- ref,
326
+ ...playwrightMapping[ref],
275
327
  coordinates: {
276
328
  x: Math.round(boundingBox.x),
277
329
  y: Math.round(boundingBox.y),
@@ -388,7 +440,6 @@ export class HybridBrowserSession {
388
440
 
389
441
  if (shouldOpenNewTab) {
390
442
  // Handle new tab opening
391
-
392
443
  // If it's a link that doesn't naturally open in new tab, force it
393
444
  if (isNavigableLink && !naturallyOpensNewTab) {
394
445
  await element.evaluate((el, blankTarget) => {
@@ -803,6 +854,55 @@ export class HybridBrowserSession {
803
854
  }
804
855
  }
805
856
 
857
+ /**
858
+ * Wait for DOM to stop changing for a specified duration
859
+ */
860
+ private async waitForDOMStability(page: Page, maxWaitTime: number = 500): Promise<void> {
861
+ const startTime = Date.now();
862
+ const stabilityThreshold = 100; // Consider stable if no changes for 100ms
863
+ let lastChangeTime = Date.now();
864
+
865
+ try {
866
+ // Monitor DOM changes
867
+ await page.evaluate(() => {
868
+ let changeCount = 0;
869
+ (window as any).__domStabilityCheck = { changeCount: 0, lastChange: Date.now() };
870
+
871
+ const observer = new MutationObserver(() => {
872
+ (window as any).__domStabilityCheck.changeCount++;
873
+ (window as any).__domStabilityCheck.lastChange = Date.now();
874
+ });
875
+
876
+ observer.observe(document.body, {
877
+ childList: true,
878
+ subtree: true,
879
+ attributes: true,
880
+ characterData: true
881
+ });
882
+
883
+ (window as any).__domStabilityObserver = observer;
884
+ });
885
+
886
+ // Wait until no changes for stabilityThreshold or timeout
887
+ await page.waitForFunction(
888
+ (threshold) => {
889
+ const check = (window as any).__domStabilityCheck;
890
+ return check && (Date.now() - check.lastChange) > threshold;
891
+ },
892
+ stabilityThreshold,
893
+ { timeout: Math.max(0, maxWaitTime) }
894
+ ).catch(() => {});
895
+ } finally {
896
+ // Cleanup
897
+ await page.evaluate(() => {
898
+ const observer = (window as any).__domStabilityObserver;
899
+ if (observer) observer.disconnect();
900
+ delete (window as any).__domStabilityObserver;
901
+ delete (window as any).__domStabilityCheck;
902
+ }).catch(() => {});
903
+ }
904
+ }
905
+
806
906
  private async waitForPageStability(page: Page): Promise<{ domContentLoadedTime: number; networkIdleTime: number }> {
807
907
  let domContentLoadedTime = 0;
808
908
  let networkIdleTime = 0;
@@ -1132,12 +1232,12 @@ export class HybridBrowserSession {
1132
1232
  const filtered: Record<string, SnapshotElement> = {};
1133
1233
 
1134
1234
 
1135
- // Apply viewport filtering with scroll position adjustment
1136
- const browserConfig = this.configLoader.getBrowserConfig();
1137
- const adjustedScrollPos = {
1138
- x: scrollPos.x * browserConfig.scrollPositionScale,
1139
- y: scrollPos.y * browserConfig.scrollPositionScale
1140
- };
1235
+ // Apply viewport filtering
1236
+ // boundingBox() returns viewport-relative coordinates, so we don't need to add scroll offsets
1237
+ const viewportLeft = 0;
1238
+ const viewportTop = 0;
1239
+ const viewportRight = viewport.width;
1240
+ const viewportBottom = viewport.height;
1141
1241
 
1142
1242
  for (const [ref, element] of Object.entries(elements)) {
1143
1243
  // If element has no coordinates, include it (fallback)
@@ -1148,14 +1248,9 @@ export class HybridBrowserSession {
1148
1248
 
1149
1249
  const { x, y, width, height } = element.coordinates;
1150
1250
 
1151
- // Calculate viewport bounds using adjusted scroll position
1152
- const viewportLeft = adjustedScrollPos.x;
1153
- const viewportTop = adjustedScrollPos.y;
1154
- const viewportRight = adjustedScrollPos.x + viewport.width;
1155
- const viewportBottom = adjustedScrollPos.y + viewport.height;
1156
-
1157
1251
  // Check if element is visible in current viewport
1158
1252
  // Element is visible if it overlaps with viewport bounds
1253
+ // Since boundingBox() coords are viewport-relative, we compare directly
1159
1254
  const isVisible = (
1160
1255
  x < viewportRight && // Left edge is before viewport right
1161
1256
  y < viewportBottom && // Top edge is before viewport bottom
@@ -79,6 +79,7 @@ export interface WebSocketConfig {
79
79
  browser_log_to_file: boolean;
80
80
  session_id?: string;
81
81
  viewport_limit: boolean;
82
+ fullVisualMode?: boolean;
82
83
  }
83
84
 
84
85
  // Default stealth configuration
@@ -212,6 +213,7 @@ export class ConfigLoader {
212
213
  if (config.browser_log_to_file !== undefined) wsConfig.browser_log_to_file = config.browser_log_to_file;
213
214
  if (config.session_id !== undefined) wsConfig.session_id = config.session_id;
214
215
  if (config.viewport_limit !== undefined) wsConfig.viewport_limit = config.viewport_limit;
216
+ if (config.fullVisualMode !== undefined) wsConfig.fullVisualMode = config.fullVisualMode;
215
217
 
216
218
  // CDP connection options
217
219
  if (config.connectOverCdp !== undefined) browserConfig.connectOverCdp = config.connectOverCdp;