camel-ai 0.2.71a2__py3-none-any.whl → 0.2.71a4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (32) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +6 -2
  3. camel/agents/chat_agent.py +297 -16
  4. camel/interpreters/docker_interpreter.py +3 -2
  5. camel/loaders/base_loader.py +85 -0
  6. camel/messages/base.py +2 -6
  7. camel/services/agent_openapi_server.py +380 -0
  8. camel/societies/workforce/workforce.py +144 -33
  9. camel/toolkits/__init__.py +7 -4
  10. camel/toolkits/craw4ai_toolkit.py +2 -2
  11. camel/toolkits/file_write_toolkit.py +6 -6
  12. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py +2 -2
  13. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py +47 -11
  14. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py +21 -11
  15. camel/toolkits/{non_visual_browser_toolkit/nv_browser_session.py → hybrid_browser_toolkit/browser_session.py} +64 -10
  16. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +1008 -0
  17. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py +16 -4
  18. camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} +202 -23
  19. camel/toolkits/note_taking_toolkit.py +90 -0
  20. camel/toolkits/openai_image_toolkit.py +292 -0
  21. camel/toolkits/slack_toolkit.py +4 -4
  22. camel/toolkits/terminal_toolkit.py +223 -73
  23. camel/types/agents/tool_calling_record.py +4 -1
  24. camel/types/enums.py +24 -24
  25. camel/utils/mcp_client.py +37 -1
  26. camel/utils/tool_result.py +44 -0
  27. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/METADATA +58 -5
  28. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/RECORD +30 -26
  29. camel/toolkits/dalle_toolkit.py +0 -175
  30. camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +0 -446
  31. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/WHEEL +0 -0
  32. {camel_ai-0.2.71a2.dist-info → camel_ai-0.2.71a4.dist-info}/licenses/LICENSE +0 -0
@@ -19,7 +19,16 @@ import time
19
19
  import uuid
20
20
  from collections import deque
21
21
  from enum import Enum
22
- from typing import Any, Coroutine, Deque, Dict, List, Optional, Set, Tuple
22
+ from typing import (
23
+ Any,
24
+ Coroutine,
25
+ Deque,
26
+ Dict,
27
+ List,
28
+ Optional,
29
+ Set,
30
+ Tuple,
31
+ )
23
32
 
24
33
  from colorama import Fore
25
34
 
@@ -200,7 +209,7 @@ class Workforce(BaseNode):
200
209
  children: Optional[List[BaseNode]] = None,
201
210
  coordinator_agent: Optional[ChatAgent] = None,
202
211
  task_agent: Optional[ChatAgent] = None,
203
- new_worker_agent: Optional[ChatAgent] = None, # TODO: use MCP Agent
212
+ new_worker_agent: Optional[ChatAgent] = None,
204
213
  graceful_shutdown_timeout: float = 15.0,
205
214
  share_memory: bool = False,
206
215
  ) -> None:
@@ -325,9 +334,10 @@ class Workforce(BaseNode):
325
334
  "settings (ModelPlatformType.DEFAULT, ModelType.DEFAULT) "
326
335
  "with default system message and TaskPlanningToolkit."
327
336
  )
337
+ task_tools = TaskPlanningToolkit().get_tools()
328
338
  self.task_agent = ChatAgent(
329
339
  task_sys_msg,
330
- tools=TaskPlanningToolkit().get_tools(), # type: ignore[arg-type]
340
+ tools=task_tools, # type: ignore[arg-type]
331
341
  )
332
342
  else:
333
343
  logger.info(
@@ -563,6 +573,44 @@ class Workforce(BaseNode):
563
573
  except Exception as e:
564
574
  logger.warning(f"Error synchronizing shared memory: {e}")
565
575
 
576
+ def _update_dependencies_for_decomposition(
577
+ self, original_task: Task, subtasks: List[Task]
578
+ ) -> None:
579
+ r"""Update dependency tracking when a task is decomposed into subtasks.
580
+ Tasks that depended on the original task should now depend on all
581
+ subtasks. The last subtask inherits the original task's dependencies.
582
+ """
583
+ if not subtasks:
584
+ return
585
+
586
+ original_task_id = original_task.id
587
+ subtask_ids = [subtask.id for subtask in subtasks]
588
+
589
+ # Find tasks that depend on the original task
590
+ dependent_task_ids = [
591
+ task_id
592
+ for task_id, deps in self._task_dependencies.items()
593
+ if original_task_id in deps
594
+ ]
595
+
596
+ # Update dependent tasks to depend on all subtasks
597
+ for task_id in dependent_task_ids:
598
+ dependencies = self._task_dependencies[task_id]
599
+ dependencies.remove(original_task_id)
600
+ dependencies.extend(subtask_ids)
601
+
602
+ # The last subtask inherits original task's dependencies (if any)
603
+ if original_task_id in self._task_dependencies:
604
+ original_dependencies = self._task_dependencies[original_task_id]
605
+ if original_dependencies:
606
+ # Set dependencies for the last subtask to maintain execution
607
+ # order
608
+ self._task_dependencies[subtask_ids[-1]] = (
609
+ original_dependencies.copy()
610
+ )
611
+ # Remove original task dependencies as it's now decomposed
612
+ del self._task_dependencies[original_task_id]
613
+
566
614
  def _cleanup_task_tracking(self, task_id: str) -> None:
567
615
  r"""Clean up tracking data for a task to prevent memory leaks.
568
616
 
@@ -590,6 +638,10 @@ class Workforce(BaseNode):
590
638
  for subtask in subtasks:
591
639
  subtask.parent = task
592
640
 
641
+ # Update dependency tracking for decomposed task
642
+ if subtasks:
643
+ self._update_dependencies_for_decomposition(task, subtasks)
644
+
593
645
  return subtasks
594
646
 
595
647
  # Human intervention methods
@@ -1436,7 +1488,9 @@ class Workforce(BaseNode):
1436
1488
 
1437
1489
  return valid_assignments, invalid_assignments
1438
1490
 
1439
- def _handle_task_assignment_fallbacks(self, tasks: List[Task]) -> List:
1491
+ async def _handle_task_assignment_fallbacks(
1492
+ self, tasks: List[Task]
1493
+ ) -> List:
1440
1494
  r"""Create new workers for unassigned tasks as fallback.
1441
1495
 
1442
1496
  Args:
@@ -1449,7 +1503,7 @@ class Workforce(BaseNode):
1449
1503
 
1450
1504
  for task in tasks:
1451
1505
  logger.info(f"Creating new worker for unassigned task {task.id}")
1452
- new_worker = self._create_worker_node_for_task(task)
1506
+ new_worker = await self._create_worker_node_for_task(task)
1453
1507
 
1454
1508
  assignment = TaskAssignment(
1455
1509
  task_id=task.id,
@@ -1460,7 +1514,7 @@ class Workforce(BaseNode):
1460
1514
 
1461
1515
  return fallback_assignments
1462
1516
 
1463
- def _handle_assignment_retry_and_fallback(
1517
+ async def _handle_assignment_retry_and_fallback(
1464
1518
  self,
1465
1519
  invalid_assignments: List[TaskAssignment],
1466
1520
  tasks: List[Task],
@@ -1531,14 +1585,14 @@ class Workforce(BaseNode):
1531
1585
  f"Creating fallback workers for {len(unassigned_tasks)} "
1532
1586
  f"unassigned tasks"
1533
1587
  )
1534
- fallback_assignments = self._handle_task_assignment_fallbacks(
1535
- unassigned_tasks
1588
+ fallback_assignments = (
1589
+ await self._handle_task_assignment_fallbacks(unassigned_tasks)
1536
1590
  )
1537
1591
  final_assignments.extend(fallback_assignments)
1538
1592
 
1539
1593
  return final_assignments
1540
1594
 
1541
- def _find_assignee(
1595
+ async def _find_assignee(
1542
1596
  self,
1543
1597
  tasks: List[Task],
1544
1598
  ) -> TaskAssignResult:
@@ -1580,7 +1634,7 @@ class Workforce(BaseNode):
1580
1634
  # invalid assignments and unassigned tasks
1581
1635
  all_problem_assignments = invalid_assignments
1582
1636
  retry_and_fallback_assignments = (
1583
- self._handle_assignment_retry_and_fallback(
1637
+ await self._handle_assignment_retry_and_fallback(
1584
1638
  all_problem_assignments, tasks, valid_worker_ids
1585
1639
  )
1586
1640
  )
@@ -1616,7 +1670,7 @@ class Workforce(BaseNode):
1616
1670
  async def _post_dependency(self, dependency: Task) -> None:
1617
1671
  await self._channel.post_dependency(dependency, self.node_id)
1618
1672
 
1619
- def _create_worker_node_for_task(self, task: Task) -> Worker:
1673
+ async def _create_worker_node_for_task(self, task: Task) -> Worker:
1620
1674
  r"""Creates a new worker node for a given task and add it to the
1621
1675
  children list of this node. This is one of the actions that
1622
1676
  the coordinator can take when a task has failed.
@@ -1662,7 +1716,7 @@ class Workforce(BaseNode):
1662
1716
  f"Coordinator agent returned malformed JSON response. "
1663
1717
  )
1664
1718
 
1665
- new_agent = self._create_new_agent(
1719
+ new_agent = await self._create_new_agent(
1666
1720
  new_node_conf.role,
1667
1721
  new_node_conf.sys_msg,
1668
1722
  )
@@ -1689,14 +1743,19 @@ class Workforce(BaseNode):
1689
1743
  )
1690
1744
  return new_node
1691
1745
 
1692
- def _create_new_agent(self, role: str, sys_msg: str) -> ChatAgent:
1746
+ async def _create_new_agent(self, role: str, sys_msg: str) -> ChatAgent:
1693
1747
  worker_sys_msg = BaseMessage.make_assistant_message(
1694
1748
  role_name=role,
1695
1749
  content=sys_msg,
1696
1750
  )
1697
1751
 
1698
1752
  if self.new_worker_agent is not None:
1699
- return self.new_worker_agent
1753
+ # Clone the template agent to create an independent instance
1754
+ cloned_agent = self.new_worker_agent.clone(with_memory=False)
1755
+ # Update the system message for the specific role
1756
+ cloned_agent._system_message = worker_sys_msg
1757
+ cloned_agent.init_messages() # Initialize with new system message
1758
+ return cloned_agent
1700
1759
  else:
1701
1760
  # Default tools for a new agent
1702
1761
  function_list = [
@@ -1712,7 +1771,7 @@ class Workforce(BaseNode):
1712
1771
  )
1713
1772
 
1714
1773
  return ChatAgent(
1715
- worker_sys_msg,
1774
+ system_message=worker_sys_msg,
1716
1775
  model=model,
1717
1776
  tools=function_list, # type: ignore[arg-type]
1718
1777
  pause_event=self._pause_event,
@@ -1765,7 +1824,7 @@ class Workforce(BaseNode):
1765
1824
  f"Found {len(tasks_to_assign)} new tasks. "
1766
1825
  f"Requesting assignment..."
1767
1826
  )
1768
- batch_result = self._find_assignee(tasks_to_assign)
1827
+ batch_result = await self._find_assignee(tasks_to_assign)
1769
1828
  logger.debug(
1770
1829
  f"Coordinator returned assignments:\n"
1771
1830
  f"{json.dumps(batch_result.dict(), indent=2)}"
@@ -1788,17 +1847,19 @@ class Workforce(BaseNode):
1788
1847
  # Step 2: Iterate through all pending tasks and post those that are
1789
1848
  # ready
1790
1849
  posted_tasks = []
1791
- # Pre-compute completed task IDs set for O(1) lookups
1792
- completed_task_ids = {t.id for t in self._completed_tasks}
1850
+ # Pre-compute completed task IDs and their states for O(1) lookups
1851
+ completed_tasks_info = {t.id: t.state for t in self._completed_tasks}
1793
1852
 
1794
1853
  for task in self._pending_tasks:
1795
1854
  # A task must be assigned to be considered for posting
1796
1855
  if task.id in self._task_dependencies:
1797
1856
  dependencies = self._task_dependencies[task.id]
1798
1857
  # Check if all dependencies for this task are in the completed
1799
- # set
1858
+ # set and their state is DONE
1800
1859
  if all(
1801
- dep_id in completed_task_ids for dep_id in dependencies
1860
+ dep_id in completed_tasks_info
1861
+ and completed_tasks_info[dep_id] == TaskState.DONE
1862
+ for dep_id in dependencies
1802
1863
  ):
1803
1864
  assignee_id = self._assignees[task.id]
1804
1865
  logger.debug(
@@ -1885,7 +1946,7 @@ class Workforce(BaseNode):
1885
1946
 
1886
1947
  if task.get_depth() > 3:
1887
1948
  # Create a new worker node and reassign
1888
- assignee = self._create_worker_node_for_task(task)
1949
+ assignee = await self._create_worker_node_for_task(task)
1889
1950
 
1890
1951
  # Sync shared memory after creating new worker to provide context
1891
1952
  if self.share_memory:
@@ -1915,19 +1976,35 @@ class Workforce(BaseNode):
1915
1976
  # Insert packets at the head of the queue
1916
1977
  self._pending_tasks.extendleft(reversed(subtasks))
1917
1978
 
1979
+ await self._post_ready_tasks()
1980
+ action_taken = f"decomposed into {len(subtasks)} subtasks"
1981
+
1982
+ # Handle task completion differently for decomposed tasks
1983
+ if task.id in self._assignees:
1984
+ await self._channel.archive_task(task.id)
1985
+
1986
+ self._cleanup_task_tracking(task.id)
1987
+ logger.debug(
1988
+ f"Task {task.id} failed and was {action_taken}. "
1989
+ f"Dependencies updated for subtasks."
1990
+ )
1991
+
1918
1992
  # Sync shared memory after task decomposition
1919
1993
  if self.share_memory:
1920
1994
  logger.info(
1921
- f"Syncing shared memory after decomposing failed "
1922
- f"task {task.id}"
1995
+ f"Syncing shared memory after task {task.id} decomposition"
1923
1996
  )
1924
1997
  self._sync_shared_memory()
1925
1998
 
1999
+ # Check if any pending tasks are now ready to execute
1926
2000
  await self._post_ready_tasks()
1927
- action_taken = f"decomposed into {len(subtasks)} subtasks"
2001
+ return False
2002
+
2003
+ # For reassigned tasks (depth > 3), handle normally
1928
2004
  if task.id in self._assignees:
1929
2005
  await self._channel.archive_task(task.id)
1930
2006
 
2007
+ self._cleanup_task_tracking(task.id)
1931
2008
  logger.debug(
1932
2009
  f"Task {task.id} failed and was {action_taken}. "
1933
2010
  f"Updating dependency state."
@@ -2020,31 +2097,65 @@ class Workforce(BaseNode):
2020
2097
  break
2021
2098
 
2022
2099
  if not found_and_removed:
2023
- # Task was already removed from pending queue (expected case when
2024
- # it had been popped immediately after posting). No need to
2025
- # draw user attention with a warning; record at debug level.
2100
+ # Task was already removed from pending queue (common case when
2101
+ # it was posted and removed immediately).
2026
2102
  logger.debug(
2027
2103
  f"Completed task {task.id} was already removed from pending "
2028
- "queue."
2104
+ "queue (normal for posted tasks)."
2029
2105
  )
2030
2106
 
2031
2107
  # Archive the task and update dependency tracking
2032
2108
  if task.id in self._assignees:
2033
2109
  await self._channel.archive_task(task.id)
2034
2110
 
2035
- # Ensure it's in completed tasks set
2036
- self._completed_tasks.append(task)
2111
+ # Ensure it's in completed tasks set by updating if it exists or
2112
+ # appending if it's new.
2113
+ task_found_in_completed = False
2114
+ for i, t in enumerate(self._completed_tasks):
2115
+ if t.id == task.id:
2116
+ self._completed_tasks[i] = task
2117
+ task_found_in_completed = True
2118
+ break
2119
+ if not task_found_in_completed:
2120
+ self._completed_tasks.append(task)
2037
2121
 
2038
2122
  # Handle parent task completion logic
2039
2123
  parent = task.parent
2040
- if parent and parent.id not in {t.id for t in self._completed_tasks}:
2124
+ if parent:
2125
+ # Check if all subtasks are completed and successful
2041
2126
  all_subtasks_done = all(
2042
- sub.id in {t.id for t in self._completed_tasks}
2127
+ any(
2128
+ t.id == sub.id and t.state == TaskState.DONE
2129
+ for t in self._completed_tasks
2130
+ )
2043
2131
  for sub in parent.subtasks
2044
2132
  )
2045
2133
  if all_subtasks_done:
2046
- # Set the parent task state to done
2134
+ # Collect results from successful subtasks only
2135
+ successful_results = []
2136
+ for sub in parent.subtasks:
2137
+ completed_subtask = next(
2138
+ (
2139
+ t
2140
+ for t in self._completed_tasks
2141
+ if t.id == sub.id and t.state == TaskState.DONE
2142
+ ),
2143
+ None,
2144
+ )
2145
+ if completed_subtask and completed_subtask.result:
2146
+ successful_results.append(
2147
+ f"--- Subtask {sub.id} Result ---\n"
2148
+ f"{completed_subtask.result}"
2149
+ )
2150
+
2151
+ # Set parent task state and result
2047
2152
  parent.state = TaskState.DONE
2153
+ parent.result = (
2154
+ "\n\n".join(successful_results)
2155
+ if successful_results
2156
+ else "All subtasks completed"
2157
+ )
2158
+
2048
2159
  logger.debug(
2049
2160
  f"All subtasks of {parent.id} are done. "
2050
2161
  f"Marking parent as complete."
@@ -23,7 +23,7 @@ from .open_api_specs.security_config import openapi_security_config
23
23
  from .math_toolkit import MathToolkit
24
24
  from .search_toolkit import SearchToolkit
25
25
  from .weather_toolkit import WeatherToolkit
26
- from .dalle_toolkit import DalleToolkit
26
+ from .openai_image_toolkit import OpenAIImageToolkit
27
27
  from .ask_news_toolkit import AskNewsToolkit, AsyncAskNewsToolkit
28
28
  from .linkedin_toolkit import LinkedInToolkit
29
29
  from .reddit_toolkit import RedditToolkit
@@ -77,11 +77,12 @@ from .aci_toolkit import ACIToolkit
77
77
  from .playwright_mcp_toolkit import PlaywrightMCPToolkit
78
78
  from .wolfram_alpha_toolkit import WolframAlphaToolkit
79
79
  from .task_planning_toolkit import TaskPlanningToolkit
80
- from .non_visual_browser_toolkit import BrowserNonVisualToolkit
80
+ from .hybrid_browser_toolkit import HybridBrowserToolkit
81
81
  from .edgeone_pages_mcp_toolkit import EdgeOnePagesMCPToolkit
82
82
  from .google_drive_mcp_toolkit import GoogleDriveMCPToolkit
83
83
  from .craw4ai_toolkit import Crawl4AIToolkit
84
84
  from .markitdown_toolkit import MarkItDownToolkit
85
+ from .note_taking_toolkit import NoteTakingToolkit
85
86
 
86
87
  __all__ = [
87
88
  'BaseToolkit',
@@ -96,7 +97,7 @@ __all__ = [
96
97
  'SearchToolkit',
97
98
  'SlackToolkit',
98
99
  'WhatsAppToolkit',
99
- 'DalleToolkit',
100
+ 'OpenAIImageToolkit',
100
101
  'TwitterToolkit',
101
102
  'WeatherToolkit',
102
103
  'RetrievalToolkit',
@@ -145,10 +146,12 @@ __all__ = [
145
146
  'PlaywrightMCPToolkit',
146
147
  'WolframAlphaToolkit',
147
148
  'BohriumToolkit',
149
+ 'OpenAIImageToolkit',
148
150
  'TaskPlanningToolkit',
149
- 'BrowserNonVisualToolkit',
151
+ 'HybridBrowserToolkit',
150
152
  'EdgeOnePagesMCPToolkit',
151
153
  'GoogleDriveMCPToolkit',
152
154
  'Crawl4AIToolkit',
153
155
  'MarkItDownToolkit',
156
+ 'NoteTakingToolkit',
154
157
  ]
@@ -71,11 +71,11 @@ class Crawl4AIToolkit(BaseToolkit):
71
71
  return f"Error scraping {url}: {e}"
72
72
 
73
73
  async def __aenter__(self):
74
- """Async context manager entry."""
74
+ r"""Async context manager entry."""
75
75
  return self
76
76
 
77
77
  async def __aexit__(self, exc_type, exc_val, exc_tb):
78
- """Async context manager exit - cleanup the client."""
78
+ r"""Async context manager exit - cleanup the client."""
79
79
  if self._client is not None:
80
80
  await self._client.__aexit__(exc_type, exc_val, exc_tb)
81
81
  self._client = None
@@ -160,9 +160,9 @@ class FileWriteToolkit(BaseToolkit):
160
160
  file_path (Path): The target file path.
161
161
  title (str): The title of the document.
162
162
  content (str): The text content to write.
163
- use_latex (bool): Whether to use LaTeX for rendering. (requires
164
- LaTeX toolchain). If False, uses PyMuPDF for simpler PDF
165
- generation. (default: :obj:`False`)
163
+ use_latex (bool): Whether to use LaTeX for rendering. Only
164
+ Recommended for documents with mathematical formulas or
165
+ complex typesetting needs. (default: :obj:`False`)
166
166
  """
167
167
  # TODO: table generation need to be improved
168
168
  if use_latex:
@@ -439,9 +439,9 @@ class FileWriteToolkit(BaseToolkit):
439
439
  supplied, it is resolved to self.output_dir.
440
440
  encoding (Optional[str]): The character encoding to use. (default:
441
441
  :obj: `None`)
442
- use_latex (bool): For PDF files, whether to use LaTeX rendering
443
- (True) or simple FPDF rendering (False). (default: :obj:
444
- `False`)
442
+ use_latex (bool): For PDF files, whether to use LaTeX rendering.
443
+ Only recommended for documents with mathematical formulas or
444
+ complex typesetting needs. (default: :obj:`False`)
445
445
 
446
446
  Returns:
447
447
  str: A message indicating success or error details.
@@ -11,8 +11,8 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
- from .browser_non_visual_toolkit import BrowserNonVisualToolkit
14
+ from .hybrid_browser_toolkit import HybridBrowserToolkit
15
15
 
16
16
  __all__ = [
17
- "BrowserNonVisualToolkit",
17
+ "HybridBrowserToolkit",
18
18
  ]
@@ -24,6 +24,7 @@ class ActionExecutor:
24
24
  # Configuration constants
25
25
  DEFAULT_TIMEOUT = 5000 # 5 seconds
26
26
  SHORT_TIMEOUT = 2000 # 2 seconds
27
+ MAX_SCROLL_AMOUNT = 5000 # Maximum scroll distance in pixels
27
28
 
28
29
  def __init__(self, page: "Page"):
29
30
  self.page = page
@@ -32,6 +33,7 @@ class ActionExecutor:
32
33
  # Public helpers
33
34
  # ------------------------------------------------------------------
34
35
  async def execute(self, action: Dict[str, Any]) -> str:
36
+ r"""Execute an action and return the result description."""
35
37
  if not action:
36
38
  return "No action to execute"
37
39
 
@@ -64,32 +66,46 @@ class ActionExecutor:
64
66
  # Internal handlers
65
67
  # ------------------------------------------------------------------
66
68
  async def _click(self, action: Dict[str, Any]) -> str:
69
+ r"""Handle click actions with multiple fallback strategies."""
67
70
  ref = action.get("ref")
68
71
  text = action.get("text")
69
72
  selector = action.get("selector")
70
73
  if not (ref or text or selector):
71
74
  return "Error: click requires ref/text/selector"
72
75
 
76
+ # Build strategies in priority order: ref > selector > text
73
77
  strategies = []
78
+ if ref:
79
+ strategies.append(f"[aria-ref='{ref}']")
74
80
  if selector:
75
81
  strategies.append(selector)
76
82
  if text:
77
83
  strategies.append(f'text="{text}"')
78
- if ref:
79
- strategies.append(f"[aria-ref='{ref}']")
80
84
 
85
+ # Strategy 1: Try Playwright force click for each selector
81
86
  for sel in strategies:
82
87
  try:
83
88
  if await self.page.locator(sel).count() > 0:
84
89
  await self.page.click(
85
- sel, timeout=self.SHORT_TIMEOUT, force=True
90
+ sel, timeout=self.DEFAULT_TIMEOUT, force=True
86
91
  )
87
- return f"Clicked element via {sel}"
92
+ return f"Clicked element via force: {sel}"
88
93
  except Exception:
89
- pass
90
- return "Error: Could not click element"
94
+ continue
95
+
96
+ # Strategy 2: Try JavaScript click as fallback
97
+ for sel in strategies:
98
+ try:
99
+ await self.page.locator(sel).first.evaluate("el => el.click()")
100
+ await asyncio.sleep(0.1) # Brief wait for effects
101
+ return f"Clicked element via JS: {sel}"
102
+ except Exception:
103
+ continue
104
+
105
+ return "Error: All click strategies failed"
91
106
 
92
107
  async def _type(self, action: Dict[str, Any]) -> str:
108
+ r"""Handle typing text into input fields."""
93
109
  ref = action.get("ref")
94
110
  selector = action.get("selector")
95
111
  text = action.get("text", "")
@@ -103,6 +119,7 @@ class ActionExecutor:
103
119
  return f"Type failed: {exc}"
104
120
 
105
121
  async def _select(self, action: Dict[str, Any]) -> str:
122
+ r"""Handle selecting options from dropdowns."""
106
123
  ref = action.get("ref")
107
124
  selector = action.get("selector")
108
125
  value = action.get("value", "")
@@ -118,8 +135,9 @@ class ActionExecutor:
118
135
  return f"Select failed: {exc}"
119
136
 
120
137
  async def _wait(self, action: Dict[str, Any]) -> str:
138
+ r"""Handle wait actions."""
121
139
  if "timeout" in action:
122
- ms = action["timeout"]
140
+ ms = int(action["timeout"])
123
141
  await asyncio.sleep(ms / 1000)
124
142
  return f"Waited {ms}ms"
125
143
  if "selector" in action:
@@ -131,6 +149,7 @@ class ActionExecutor:
131
149
  return "Error: wait requires timeout/selector"
132
150
 
133
151
  async def _extract(self, action: Dict[str, Any]) -> str:
152
+ r"""Handle text extraction from elements."""
134
153
  ref = action.get("ref")
135
154
  if not ref:
136
155
  return "Error: extract requires ref"
@@ -140,6 +159,7 @@ class ActionExecutor:
140
159
  return f"Extracted: {txt[:100] if txt else 'None'}"
141
160
 
142
161
  async def _scroll(self, action: Dict[str, Any]) -> str:
162
+ r"""Handle page scrolling with safe parameter validation."""
143
163
  direction = action.get("direction", "down")
144
164
  amount = action.get("amount", 300)
145
165
 
@@ -151,18 +171,22 @@ class ActionExecutor:
151
171
  # Safely convert amount to integer and clamp to reasonable range
152
172
  amount_int = int(amount)
153
173
  amount_int = max(
154
- -5000, min(5000, amount_int)
155
- ) # Clamp between -5000 and 5000
174
+ -self.MAX_SCROLL_AMOUNT,
175
+ min(self.MAX_SCROLL_AMOUNT, amount_int),
176
+ ) # Clamp to MAX_SCROLL_AMOUNT range
156
177
  except (ValueError, TypeError):
157
178
  return "Error: amount must be a valid number"
158
179
 
159
180
  # Use safe evaluation with bound parameters
160
181
  scroll_offset = amount_int if direction == "down" else -amount_int
161
- await self.page.evaluate(f"window.scrollBy(0, {scroll_offset})")
182
+ await self.page.evaluate(
183
+ "offset => window.scrollBy(0, offset)", scroll_offset
184
+ )
162
185
  await asyncio.sleep(0.5)
163
186
  return f"Scrolled {direction} by {abs(amount_int)}px"
164
187
 
165
188
  async def _enter(self, action: Dict[str, Any]) -> str:
189
+ r"""Handle Enter key press actions."""
166
190
  ref = action.get("ref")
167
191
  selector = action.get("selector")
168
192
  if ref:
@@ -175,16 +199,28 @@ class ActionExecutor:
175
199
 
176
200
  # utilities
177
201
  async def _wait_dom_stable(self) -> None:
202
+ r"""Wait for DOM to become stable before executing actions."""
178
203
  try:
204
+ # Wait for basic DOM content loading
179
205
  await self.page.wait_for_load_state(
180
206
  'domcontentloaded', timeout=self.SHORT_TIMEOUT
181
207
  )
208
+
209
+ # Try to wait for network idle briefly
210
+ try:
211
+ await self.page.wait_for_load_state(
212
+ 'networkidle', timeout=self.SHORT_TIMEOUT
213
+ )
214
+ except Exception:
215
+ pass # Network idle is optional
216
+
182
217
  except Exception:
183
- pass
218
+ pass # Don't fail if wait times out
184
219
 
185
220
  # static helpers
186
221
  @staticmethod
187
222
  def should_update_snapshot(action: Dict[str, Any]) -> bool:
223
+ r"""Determine if an action requires a snapshot update."""
188
224
  change_types = {
189
225
  "click",
190
226
  "type",