camel-ai 0.2.73a1__py3-none-any.whl → 0.2.73a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

@@ -78,8 +78,52 @@ class WebSocketBrowserServer {
78
78
  switch (command) {
79
79
  case 'init':
80
80
  console.log('Initializing toolkit with params:', JSON.stringify(params, null, 2));
81
- this.toolkit = new HybridBrowserToolkit(params);
82
- return { message: 'Toolkit initialized' };
81
+
82
+ // Check if CDP is available first
83
+ let useCdp = false;
84
+ let cdpUrl = params.cdpUrl || 'http://localhost:9222';
85
+
86
+ // Extract base URL and port for validation
87
+ const baseUrl = cdpUrl.includes('/devtools/') ? cdpUrl.split('/devtools/')[0] : cdpUrl;
88
+
89
+ try {
90
+ // Test if Chrome debug port is accessible and get page URL
91
+ const response = await fetch(`${baseUrl}/json`);
92
+ if (response.ok) {
93
+ const pages = await response.json();
94
+ if (pages && pages.length > 0) {
95
+ // If user provided a specific page URL, use it; otherwise use first available
96
+ if (cdpUrl.includes('/devtools/page/') || cdpUrl.includes('/devtools/browser/')) {
97
+ useCdp = true;
98
+ console.log(`Using provided CDP URL: ${cdpUrl}`);
99
+ } else {
100
+ // Use the first available page
101
+ const firstPage = pages[0];
102
+ const pageUrl = firstPage.devtoolsFrontendUrl;
103
+ const pageId = pageUrl.match(/ws=localhost:\d+(.*)$/)?.[1];
104
+
105
+ if (pageId) {
106
+ useCdp = true;
107
+ cdpUrl = `${baseUrl}${pageId}`;
108
+ console.log(`Chrome debug port detected, using CDP connection to: ${pageId}`);
109
+ }
110
+ }
111
+ }
112
+ }
113
+ } catch (error) {
114
+ console.log('Chrome debug port not accessible, will start new browser instance');
115
+ }
116
+
117
+ const config = {
118
+ connectOverCdp: useCdp,
119
+ cdpUrl: useCdp ? cdpUrl : undefined,
120
+ headless: false,
121
+ ...params
122
+ };
123
+
124
+ console.log('Final config:', JSON.stringify(config, null, 2));
125
+ this.toolkit = new HybridBrowserToolkit(config);
126
+ return { message: 'Toolkit initialized with CDP connection' };
83
127
 
84
128
  case 'open_browser':
85
129
  if (!this.toolkit) throw new Error('Toolkit not initialized');
@@ -110,6 +110,11 @@ class WebSocketBrowserWrapper:
110
110
  self.process: Optional[subprocess.Popen] = None
111
111
  self.websocket = None
112
112
  self.server_port = None
113
+ self._send_lock = asyncio.Lock() # Lock for sending messages
114
+ self._receive_task = None # Background task for receiving messages
115
+ self._pending_responses: Dict[
116
+ str, asyncio.Future[Dict[str, Any]]
117
+ ] = {} # Message ID -> Future
113
118
 
114
119
  # Logging configuration
115
120
  self.browser_log_to_file = (config or {}).get(
@@ -251,11 +256,22 @@ class WebSocketBrowserWrapper:
251
256
  f"Failed to connect to WebSocket server: {e}"
252
257
  ) from e
253
258
 
259
+ # Start the background receiver task
260
+ self._receive_task = asyncio.create_task(self._receive_loop())
261
+
254
262
  # Initialize the browser toolkit
255
263
  await self._send_command('init', self.config)
256
264
 
257
265
  async def stop(self):
258
266
  """Stop the WebSocket connection and server."""
267
+ # Cancel the receiver task
268
+ if self._receive_task and not self._receive_task.done():
269
+ self._receive_task.cancel()
270
+ try:
271
+ await self._receive_task
272
+ except asyncio.CancelledError:
273
+ pass
274
+
259
275
  if self.websocket:
260
276
  try:
261
277
  await self._send_command('shutdown', {})
@@ -327,6 +343,39 @@ class WebSocketBrowserWrapper:
327
343
  except Exception as e:
328
344
  logger.error(f"Failed to write to log file: {e}")
329
345
 
346
+ async def _receive_loop(self):
347
+ r"""Background task to receive messages from WebSocket."""
348
+ try:
349
+ while self.websocket:
350
+ try:
351
+ response_data = await self.websocket.recv()
352
+ response = json.loads(response_data)
353
+
354
+ message_id = response.get('id')
355
+ if message_id and message_id in self._pending_responses:
356
+ # Set the result for the waiting coroutine
357
+ future = self._pending_responses.pop(message_id)
358
+ if not future.done():
359
+ future.set_result(response)
360
+ else:
361
+ # Log unexpected messages
362
+ logger.warning(
363
+ f"Received unexpected message: {response}"
364
+ )
365
+
366
+ except asyncio.CancelledError:
367
+ break
368
+ except Exception as e:
369
+ logger.error(f"Error in receive loop: {e}")
370
+ # Notify all pending futures of the error
371
+ for future in self._pending_responses.values():
372
+ if not future.done():
373
+ future.set_exception(e)
374
+ self._pending_responses.clear()
375
+ break
376
+ finally:
377
+ logger.debug("Receive loop terminated")
378
+
330
379
  async def _ensure_connection(self) -> None:
331
380
  """Ensure WebSocket connection is alive."""
332
381
  if not self.websocket:
@@ -350,39 +399,39 @@ class WebSocketBrowserWrapper:
350
399
  message_id = str(uuid.uuid4())
351
400
  message = {'id': message_id, 'command': command, 'params': params}
352
401
 
353
- try:
354
- # Send command
355
- if self.websocket is None:
356
- raise RuntimeError("WebSocket connection not established")
357
- await self.websocket.send(json.dumps(message))
402
+ # Create a future for this message
403
+ future: asyncio.Future[Dict[str, Any]] = asyncio.Future()
404
+ self._pending_responses[message_id] = future
358
405
 
359
- # Wait for response with matching ID
360
- while True:
361
- try:
362
- if self.websocket is None:
363
- raise RuntimeError("WebSocket connection lost")
364
- response_data = await asyncio.wait_for(
365
- self.websocket.recv(), timeout=60.0
366
- )
367
- response = json.loads(response_data)
368
-
369
- # Check if this is the response we're waiting for
370
- if response.get('id') == message_id:
371
- if not response.get('success'):
372
- raise RuntimeError(
373
- f"Command failed: {response.get('error')}"
374
- )
375
- return response['result']
406
+ try:
407
+ # Use lock only for sending to prevent interleaved messages
408
+ async with self._send_lock:
409
+ if self.websocket is None:
410
+ raise RuntimeError("WebSocket connection not established")
411
+ await self.websocket.send(json.dumps(message))
412
+
413
+ # Wait for response (no lock needed, handled by background
414
+ # receiver)
415
+ try:
416
+ response = await asyncio.wait_for(future, timeout=60.0)
376
417
 
377
- except asyncio.TimeoutError:
418
+ if not response.get('success'):
378
419
  raise RuntimeError(
379
- f"Timeout waiting for response to command: {command}"
420
+ f"Command failed: {response.get('error')}"
380
421
  )
381
- except json.JSONDecodeError as e:
382
- logger.warning(f"Failed to decode WebSocket response: {e}")
383
- continue
422
+ return response['result']
423
+
424
+ except asyncio.TimeoutError:
425
+ # Remove from pending if timeout
426
+ self._pending_responses.pop(message_id, None)
427
+ raise RuntimeError(
428
+ f"Timeout waiting for response to command: {command}"
429
+ )
384
430
 
385
431
  except Exception as e:
432
+ # Clean up the pending response
433
+ self._pending_responses.pop(message_id, None)
434
+
386
435
  # Check if it's a connection closed error
387
436
  if (
388
437
  "close frame" in str(e)
@@ -0,0 +1,17 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+
15
+ from .hybrid_browser_toolkit import HybridBrowserToolkit
16
+
17
+ __all__ = ["HybridBrowserToolkit"]
@@ -0,0 +1,417 @@
1
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ # you may not use this file except in compliance with the License.
4
+ # You may obtain a copy of the License at
5
+ #
6
+ # http://www.apache.org/licenses/LICENSE-2.0
7
+ #
8
+ # Unless required by applicable law or agreed to in writing, software
9
+ # distributed under the License is distributed on an "AS IS" BASIS,
10
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11
+ # See the License for the specific language governing permissions and
12
+ # limitations under the License.
13
+ # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
+ import asyncio
15
+ from typing import TYPE_CHECKING, Any, Dict, Optional
16
+
17
+ from .config_loader import ConfigLoader
18
+
19
+ if TYPE_CHECKING:
20
+ from playwright.async_api import Page
21
+
22
+
23
+ class ActionExecutor:
24
+ r"""Executes high-level actions (click, type …) on a Playwright Page."""
25
+
26
+ def __init__(
27
+ self,
28
+ page: "Page",
29
+ session: Optional[Any] = None,
30
+ default_timeout: Optional[int] = None,
31
+ short_timeout: Optional[int] = None,
32
+ max_scroll_amount: Optional[int] = None,
33
+ ):
34
+ self.page = page
35
+ self.session = session # HybridBrowserSession instance
36
+
37
+ # Configure timeouts using the config file with optional overrides
38
+ self.default_timeout = ConfigLoader.get_action_timeout(default_timeout)
39
+ self.short_timeout = ConfigLoader.get_short_timeout(short_timeout)
40
+ self.max_scroll_amount = ConfigLoader.get_max_scroll_amount(
41
+ max_scroll_amount
42
+ )
43
+
44
+ # ------------------------------------------------------------------
45
+ # Public helpers
46
+ # ------------------------------------------------------------------
47
+ async def execute(self, action: Dict[str, Any]) -> Dict[str, Any]:
48
+ r"""Execute an action and return detailed result information."""
49
+ if not action:
50
+ return {
51
+ "success": False,
52
+ "message": "No action to execute",
53
+ "details": {},
54
+ }
55
+
56
+ action_type = action.get("type")
57
+ if not action_type:
58
+ return {
59
+ "success": False,
60
+ "message": "Error: action has no type",
61
+ "details": {},
62
+ }
63
+
64
+ try:
65
+ # small helper to ensure basic stability
66
+ # await self._wait_dom_stable()
67
+
68
+ handler = {
69
+ "click": self._click,
70
+ "type": self._type,
71
+ "select": self._select,
72
+ "wait": self._wait,
73
+ "extract": self._extract,
74
+ "scroll": self._scroll,
75
+ "enter": self._enter,
76
+ }.get(action_type)
77
+
78
+ if handler is None:
79
+ return {
80
+ "success": False,
81
+ "message": f"Error: Unknown action type '{action_type}'",
82
+ "details": {"action_type": action_type},
83
+ }
84
+
85
+ result = await handler(action)
86
+ return {
87
+ "success": True,
88
+ "message": result["message"],
89
+ "details": result.get("details", {}),
90
+ }
91
+ except Exception as exc:
92
+ return {
93
+ "success": False,
94
+ "message": f"Error executing {action_type}: {exc}",
95
+ "details": {"action_type": action_type, "error": str(exc)},
96
+ }
97
+
98
+ # ------------------------------------------------------------------
99
+ # Internal handlers
100
+ # ------------------------------------------------------------------
101
+ async def _click(self, action: Dict[str, Any]) -> Dict[str, Any]:
102
+ r"""Handle click actions with new tab support for any clickable
103
+ element."""
104
+ ref = action.get("ref")
105
+ text = action.get("text")
106
+ selector = action.get("selector")
107
+ if not (ref or text or selector):
108
+ return {
109
+ "message": "Error: click requires ref/text/selector",
110
+ "details": {"error": "missing_selector"},
111
+ }
112
+
113
+ # Build strategies in priority order
114
+ strategies = []
115
+ if ref:
116
+ strategies.append(f"[aria-ref='{ref}']")
117
+ if selector:
118
+ strategies.append(selector)
119
+ if text:
120
+ strategies.append(f'text="{text}"')
121
+
122
+ details: Dict[str, Any] = {
123
+ "ref": ref,
124
+ "selector": selector,
125
+ "text": text,
126
+ "strategies_tried": [],
127
+ "successful_strategy": None,
128
+ "click_method": None,
129
+ "new_tab_created": False,
130
+ }
131
+
132
+ # Find the first valid selector
133
+ found_selector = None
134
+ for sel in strategies:
135
+ if await self.page.locator(sel).count() > 0:
136
+ found_selector = sel
137
+ break
138
+
139
+ if not found_selector:
140
+ details['error'] = "Element not found with any strategy"
141
+ return {
142
+ "message": "Error: Click failed, element not found",
143
+ "details": details,
144
+ }
145
+
146
+ element = self.page.locator(found_selector).first
147
+ details['successful_strategy'] = found_selector
148
+
149
+ # Attempt ctrl+click first (always)
150
+ try:
151
+ if self.session:
152
+ async with self.page.context.expect_page(
153
+ timeout=self.short_timeout
154
+ ) as new_page_info:
155
+ await element.click(modifiers=["ControlOrMeta"])
156
+ new_page = await new_page_info.value
157
+ await new_page.wait_for_load_state('domcontentloaded')
158
+ new_tab_index = await self.session.register_page(new_page)
159
+ if new_tab_index is not None:
160
+ await self.session.switch_to_tab(new_tab_index)
161
+ self.page = new_page
162
+ details.update(
163
+ {
164
+ "click_method": "ctrl_click_new_tab",
165
+ "new_tab_created": True,
166
+ "new_tab_index": new_tab_index,
167
+ }
168
+ )
169
+ return {
170
+ "message": f"Clicked element (ctrl click), opened in new "
171
+ f"tab {new_tab_index}",
172
+ "details": details,
173
+ }
174
+ else:
175
+ await element.click(modifiers=["ControlOrMeta"])
176
+ details["click_method"] = "ctrl_click_no_session"
177
+ return {
178
+ "message": f"Clicked element (ctrl click, no"
179
+ f" session): {found_selector}",
180
+ "details": details,
181
+ }
182
+ except asyncio.TimeoutError:
183
+ # No new tab was opened, click may have still worked
184
+ details["click_method"] = "ctrl_click_same_tab"
185
+ return {
186
+ "message": f"Clicked element (ctrl click, "
187
+ f"same tab): {found_selector}",
188
+ "details": details,
189
+ }
190
+ except Exception as e:
191
+ details['strategies_tried'].append(
192
+ {
193
+ 'selector': found_selector,
194
+ 'method': 'ctrl_click',
195
+ 'error': str(e),
196
+ }
197
+ )
198
+ # Fall through to fallback
199
+
200
+ # Fallback to normal force click if ctrl+click fails
201
+ try:
202
+ await element.click(force=True, timeout=self.default_timeout)
203
+ details["click_method"] = "playwright_force_click"
204
+ return {
205
+ "message": f"Fallback clicked element: {found_selector}",
206
+ "details": details,
207
+ }
208
+ except Exception as e:
209
+ details["click_method"] = "playwright_force_click_failed"
210
+ details["error"] = str(e)
211
+ return {
212
+ "message": f"Error: All click strategies "
213
+ f"failed for {found_selector}",
214
+ "details": details,
215
+ }
216
+
217
+ async def _type(self, action: Dict[str, Any]) -> Dict[str, Any]:
218
+ r"""Handle typing text into input fields."""
219
+ ref = action.get("ref")
220
+ selector = action.get("selector")
221
+ text = action.get("text", "")
222
+ if not (ref or selector):
223
+ return {
224
+ "message": "Error: type requires ref/selector",
225
+ "details": {"error": "missing_selector"},
226
+ }
227
+
228
+ target = selector or f"[aria-ref='{ref}']"
229
+ details = {
230
+ "ref": ref,
231
+ "selector": selector,
232
+ "target": target,
233
+ "text": text,
234
+ "text_length": len(text),
235
+ }
236
+
237
+ try:
238
+ await self.page.fill(target, text, timeout=self.short_timeout)
239
+ return {
240
+ "message": f"Typed '{text}' into {target}",
241
+ "details": details,
242
+ }
243
+ except Exception as exc:
244
+ details["error"] = str(exc)
245
+ return {"message": f"Type failed: {exc}", "details": details}
246
+
247
+ async def _select(self, action: Dict[str, Any]) -> Dict[str, Any]:
248
+ r"""Handle selecting options from dropdowns."""
249
+ ref = action.get("ref")
250
+ selector = action.get("selector")
251
+ value = action.get("value", "")
252
+ if not (ref or selector):
253
+ return {
254
+ "message": "Error: select requires ref/selector",
255
+ "details": {"error": "missing_selector"},
256
+ }
257
+
258
+ target = selector or f"[aria-ref='{ref}']"
259
+ details = {
260
+ "ref": ref,
261
+ "selector": selector,
262
+ "target": target,
263
+ "value": value,
264
+ }
265
+
266
+ try:
267
+ await self.page.select_option(
268
+ target, value, timeout=self.default_timeout
269
+ )
270
+ return {
271
+ "message": f"Selected '{value}' in {target}",
272
+ "details": details,
273
+ }
274
+ except Exception as exc:
275
+ details["error"] = str(exc)
276
+ return {"message": f"Select failed: {exc}", "details": details}
277
+
278
+ async def _wait(self, action: Dict[str, Any]) -> Dict[str, Any]:
279
+ r"""Handle wait actions."""
280
+ details: Dict[str, Any] = {
281
+ "wait_type": None,
282
+ "timeout": None,
283
+ "selector": None,
284
+ }
285
+
286
+ if "timeout" in action:
287
+ ms = int(action["timeout"])
288
+ details["wait_type"] = "timeout"
289
+ details["timeout"] = ms
290
+ await asyncio.sleep(ms / 1000)
291
+ return {"message": f"Waited {ms}ms", "details": details}
292
+ if "selector" in action:
293
+ sel = action["selector"]
294
+ details["wait_type"] = "selector"
295
+ details["selector"] = sel
296
+ await self.page.wait_for_selector(
297
+ sel, timeout=self.default_timeout
298
+ )
299
+ return {"message": f"Waited for {sel}", "details": details}
300
+ return {
301
+ "message": "Error: wait requires timeout/selector",
302
+ "details": details,
303
+ }
304
+
305
+ async def _extract(self, action: Dict[str, Any]) -> Dict[str, Any]:
306
+ r"""Handle text extraction from elements."""
307
+ ref = action.get("ref")
308
+ if not ref:
309
+ return {
310
+ "message": "Error: extract requires ref",
311
+ "details": {"error": "missing_ref"},
312
+ }
313
+
314
+ target = f"[aria-ref='{ref}']"
315
+ details = {"ref": ref, "target": target}
316
+
317
+ await self.page.wait_for_selector(target, timeout=self.default_timeout)
318
+ txt = await self.page.text_content(target)
319
+
320
+ details["extracted_text"] = txt
321
+ details["text_length"] = len(txt) if txt else 0
322
+
323
+ return {
324
+ "message": f"Extracted: {txt[:100] if txt else 'None'}",
325
+ "details": details,
326
+ }
327
+
328
+ async def _scroll(self, action: Dict[str, Any]) -> Dict[str, Any]:
329
+ r"""Handle page scrolling with safe parameter validation."""
330
+ direction = action.get("direction", "down")
331
+ amount = action.get("amount", 300)
332
+
333
+ details = {
334
+ "direction": direction,
335
+ "requested_amount": amount,
336
+ "actual_amount": None,
337
+ "scroll_offset": None,
338
+ }
339
+
340
+ # Validate inputs to prevent injection
341
+ if direction not in ("up", "down"):
342
+ return {
343
+ "message": "Error: direction must be 'up' or 'down'",
344
+ "details": details,
345
+ }
346
+
347
+ try:
348
+ # Safely convert amount to integer and clamp to reasonable range
349
+ amount_int = int(amount)
350
+ amount_int = max(
351
+ -self.max_scroll_amount,
352
+ min(self.max_scroll_amount, amount_int),
353
+ ) # Clamp to max_scroll_amount range
354
+ details["actual_amount"] = amount_int
355
+ except (ValueError, TypeError):
356
+ return {
357
+ "message": "Error: amount must be a valid number",
358
+ "details": details,
359
+ }
360
+
361
+ # Use safe evaluation with bound parameters
362
+ scroll_offset = amount_int if direction == "down" else -amount_int
363
+ details["scroll_offset"] = scroll_offset
364
+
365
+ await self.page.evaluate(
366
+ "offset => window.scrollBy(0, offset)", scroll_offset
367
+ )
368
+ await asyncio.sleep(0.5)
369
+ return {
370
+ "message": f"Scrolled {direction} by {abs(amount_int)}px",
371
+ "details": details,
372
+ }
373
+
374
+ async def _enter(self, action: Dict[str, Any]) -> Dict[str, Any]:
375
+ r"""Handle Enter key press on the currently focused element."""
376
+ details = {"action_type": "enter", "target": "focused_element"}
377
+
378
+ # Press Enter on whatever element currently has focus
379
+ await self.page.keyboard.press("Enter")
380
+ return {
381
+ "message": "Pressed Enter on focused element",
382
+ "details": details,
383
+ }
384
+
385
+ # utilities
386
+ async def _wait_dom_stable(self) -> None:
387
+ r"""Wait for DOM to become stable before executing actions."""
388
+ try:
389
+ # Wait for basic DOM content loading
390
+ await self.page.wait_for_load_state(
391
+ 'domcontentloaded', timeout=self.short_timeout
392
+ )
393
+
394
+ # Try to wait for network idle briefly
395
+ try:
396
+ await self.page.wait_for_load_state(
397
+ 'networkidle', timeout=self.short_timeout
398
+ )
399
+ except Exception:
400
+ pass # Network idle is optional
401
+
402
+ except Exception:
403
+ pass # Don't fail if wait times out
404
+
405
+ # static helpers
406
+ @staticmethod
407
+ def should_update_snapshot(action: Dict[str, Any]) -> bool:
408
+ r"""Determine if an action requires a snapshot update."""
409
+ change_types = {
410
+ "click",
411
+ "type",
412
+ "select",
413
+ "scroll",
414
+ "navigate",
415
+ "enter",
416
+ }
417
+ return action.get("type") in change_types