camel-ai 0.2.71a5__py3-none-any.whl → 0.2.71a6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

@@ -15,9 +15,11 @@
15
15
  import base64
16
16
  import datetime
17
17
  import io
18
+ import json
18
19
  import os
19
20
  import time
20
21
  import urllib.parse
22
+ from functools import wraps
21
23
  from typing import Any, Callable, ClassVar, Dict, List, Optional, cast
22
24
 
23
25
  from camel.logger import get_logger
@@ -26,9 +28,10 @@ from camel.toolkits.base import BaseToolkit
26
28
  from camel.toolkits.function_tool import FunctionTool
27
29
  from camel.utils import sanitize_filename
28
30
  from camel.utils.commons import dependencies_required
31
+ from camel.utils.tool_result import ToolResult
29
32
 
30
33
  from .agent import PlaywrightLLMAgent
31
- from .browser_session import NVBrowserSession
34
+ from .browser_session import HybridBrowserSession
32
35
 
33
36
  logger = get_logger(__name__)
34
37
 
@@ -54,9 +57,11 @@ class HybridBrowserToolkit(BaseToolkit):
54
57
  "open_browser",
55
58
  "close_browser",
56
59
  "visit_page",
60
+ "back",
61
+ "forward",
57
62
  "click",
58
63
  "type",
59
- "enter",
64
+ "switch_tab",
60
65
  ]
61
66
 
62
67
  # All available tools
@@ -64,6 +69,8 @@ class HybridBrowserToolkit(BaseToolkit):
64
69
  "open_browser",
65
70
  "close_browser",
66
71
  "visit_page",
72
+ "back",
73
+ "forward",
67
74
  "get_page_snapshot",
68
75
  "get_som_screenshot",
69
76
  "get_page_links",
@@ -74,6 +81,9 @@ class HybridBrowserToolkit(BaseToolkit):
74
81
  "enter",
75
82
  "wait_user",
76
83
  "solve_task",
84
+ "switch_tab",
85
+ "close_tab",
86
+ "get_tab_info",
77
87
  ]
78
88
 
79
89
  def __init__(
@@ -81,9 +91,13 @@ class HybridBrowserToolkit(BaseToolkit):
81
91
  *,
82
92
  headless: bool = True,
83
93
  user_data_dir: Optional[str] = None,
94
+ stealth: bool = False,
84
95
  web_agent_model: Optional[BaseModelBackend] = None,
85
96
  cache_dir: str = "tmp/",
86
97
  enabled_tools: Optional[List[str]] = None,
98
+ browser_log_to_file: bool = False,
99
+ session_id: Optional[str] = None,
100
+ default_start_url: str = "https://google.com/",
87
101
  ) -> None:
88
102
  r"""Initialize the HybridBrowserToolkit.
89
103
 
@@ -94,6 +108,12 @@ class HybridBrowserToolkit(BaseToolkit):
94
108
  browser data like cookies and local storage. Useful for
95
109
  maintaining sessions across runs. Defaults to `None` (a
96
110
  temporary directory is used).
111
+ stealth (bool): Whether to run the browser in stealth mode to avoid
112
+ bot detection. When enabled, hides WebDriver characteristics,
113
+ spoofs navigator properties, and implements various
114
+ anti-detection
115
+ measures. Highly recommended for production use and when
116
+ accessing sites with bot detection. Defaults to `False`.
97
117
  web_agent_model (Optional[BaseModelBackend]): The language model
98
118
  backend to use for the high-level `solve_task` agent. This is
99
119
  required only if you plan to use `solve_task`.
@@ -102,18 +122,57 @@ class HybridBrowserToolkit(BaseToolkit):
102
122
  screenshots. Defaults to `"tmp/"`.
103
123
  enabled_tools (Optional[List[str]]): List of tool names to enable.
104
124
  If None, uses DEFAULT_TOOLS. Available tools: open_browser,
105
- close_browser, visit_page, get_page_snapshot,
125
+ close_browser, visit_page, back, forward, get_page_snapshot,
106
126
  get_som_screenshot, get_page_links, click, type, select,
107
127
  scroll, enter, wait_user, solve_task.
108
128
  Defaults to `None`.
129
+ browser_log_to_file (bool): Whether to save detailed browser
130
+ action logs to file.
131
+ When enabled, logs action inputs/outputs, execution times,
132
+ and page loading times.
133
+ Logs are saved to an auto-generated timestamped file.
134
+ Defaults to `False`.
135
+ session_id (Optional[str]): A unique identifier for this browser
136
+ session. When multiple HybridBrowserToolkit instances are used
137
+ concurrently, different session IDs prevent them from sharing
138
+ the same browser session and causing conflicts. If None, a
139
+ default session will be used. Defaults to `None`.
140
+ default_start_url (str): The default URL to navigate to when
141
+ open_browser() is called without a start_url parameter or with
142
+ None. Defaults to `"https://google.com/"`.
109
143
  """
110
144
  super().__init__()
111
145
  self._headless = headless
112
146
  self._user_data_dir = user_data_dir
113
147
  self.web_agent_model = web_agent_model
114
148
  self.cache_dir = cache_dir
149
+ self.default_start_url = default_start_url
115
150
  os.makedirs(self.cache_dir, exist_ok=True)
116
151
 
152
+ # Logging configuration - fixed values for simplicity
153
+ self.enable_action_logging = True
154
+ self.enable_timing_logging = True
155
+ self.enable_page_loading_logging = True
156
+ self.log_to_console = False # Always disabled for cleaner output
157
+ self.log_to_file = browser_log_to_file
158
+ self.max_log_length = None # No truncation for file logs
159
+
160
+ # Set up log file if needed
161
+ if self.log_to_file:
162
+ # Create log directory if it doesn't exist
163
+ log_dir = "browser_log"
164
+ os.makedirs(log_dir, exist_ok=True)
165
+
166
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
167
+ self.log_file_path: Optional[str] = os.path.join(
168
+ log_dir, f"hybrid_browser_toolkit_{timestamp}_{session_id}.log"
169
+ )
170
+ else:
171
+ self.log_file_path = None
172
+
173
+ # Initialize log buffer for in-memory storage
174
+ self.log_buffer: List[Dict[str, Any]] = []
175
+
117
176
  # Configure enabled tools
118
177
  if enabled_tools is None:
119
178
  self.enabled_tools = self.DEFAULT_TOOLS.copy()
@@ -131,10 +190,23 @@ class HybridBrowserToolkit(BaseToolkit):
131
190
 
132
191
  logger.info(f"Enabled tools: {self.enabled_tools}")
133
192
 
193
+ # Log initialization if file logging is enabled
194
+ if self.log_to_file:
195
+ logger.info(
196
+ "HybridBrowserToolkit initialized with file logging enabled"
197
+ )
198
+ logger.info(f"Log file path: {self.log_file_path}")
199
+
134
200
  # Core components
135
- self._session = NVBrowserSession(
136
- headless=headless, user_data_dir=user_data_dir
201
+ temp_session = HybridBrowserSession(
202
+ headless=headless,
203
+ user_data_dir=user_data_dir,
204
+ stealth=stealth,
205
+ session_id=session_id,
137
206
  )
207
+ # Use the session directly - singleton logic is handled in
208
+ # ensure_browser
209
+ self._session = temp_session
138
210
  self._agent: Optional[PlaywrightLLMAgent] = None
139
211
  self._unified_script = self._load_unified_analyzer()
140
212
 
@@ -151,7 +223,13 @@ class HybridBrowserToolkit(BaseToolkit):
151
223
  try:
152
224
  loop = asyncio.get_event_loop()
153
225
  if not loop.is_closed() and not loop.is_running():
154
- loop.run_until_complete(self.close_browser())
226
+ # Try to close browser with a timeout to prevent hanging
227
+ try:
228
+ loop.run_until_complete(
229
+ asyncio.wait_for(self.close_browser(), timeout=2.0)
230
+ )
231
+ except asyncio.TimeoutError:
232
+ pass # Skip cleanup if it takes too long
155
233
  except (RuntimeError, ImportError):
156
234
  pass # Event loop unavailable, skip cleanup
157
235
  except Exception:
@@ -186,12 +264,176 @@ class HybridBrowserToolkit(BaseToolkit):
186
264
  f"{method_name}: 'ref' must be a non-empty string"
187
265
  )
188
266
 
267
+ def _truncate_if_needed(self, content: Any) -> str:
268
+ r"""Truncate content if max_log_length is set."""
269
+ content_str = str(content)
270
+ if (
271
+ self.max_log_length is not None
272
+ and len(content_str) > self.max_log_length
273
+ ):
274
+ return content_str[: self.max_log_length] + "... [TRUNCATED]"
275
+ return content_str
276
+
277
+ async def _get_current_url(self) -> Optional[str]:
278
+ r"""Safely get the current URL of the active page."""
279
+ try:
280
+ page = await self._session.get_page()
281
+ if page and not page.is_closed():
282
+ return page.url
283
+ return None # Return None if page is closed
284
+ except Exception:
285
+ # This can happen if browser is not open.
286
+ return None
287
+
288
+ async def _log_action(
289
+ self,
290
+ action_name: str,
291
+ inputs: Dict[str, Any],
292
+ outputs: Any,
293
+ execution_time: float,
294
+ page_load_time: Optional[float] = None,
295
+ error: Optional[str] = None,
296
+ ) -> None:
297
+ r"""Log action details with comprehensive information."""
298
+ if not (self.enable_action_logging or self.enable_timing_logging):
299
+ return
300
+
301
+ current_url = await self._get_current_url()
302
+
303
+ log_entry: Dict[str, Any] = {
304
+ "timestamp": datetime.datetime.now().isoformat(),
305
+ "action": action_name,
306
+ "url": current_url,
307
+ "execution_time_ms": round(execution_time * 1000, 2),
308
+ }
309
+
310
+ if self.enable_action_logging:
311
+ log_entry["inputs"] = inputs
312
+ if error:
313
+ log_entry["error"] = str(error)
314
+ elif isinstance(outputs, dict):
315
+ # Unpack dictionary items into the log entry
316
+ log_entry.update(outputs)
317
+ elif isinstance(outputs, ToolResult):
318
+ log_entry["outputs"] = {
319
+ "text": outputs.text,
320
+ "images": outputs.images,
321
+ }
322
+ else:
323
+ # For non-dict outputs, assign to 'outputs' key
324
+ log_entry["outputs"] = outputs
325
+
326
+ if page_load_time is not None and self.enable_page_loading_logging:
327
+ log_entry["page_load_time_ms"] = round(page_load_time * 1000, 2)
328
+
329
+ # Add to buffer
330
+ self.log_buffer.append(log_entry)
331
+
332
+ # Console logging
333
+ if self.log_to_console:
334
+ log_msg = f"[BROWSER ACTION] {action_name}"
335
+ if self.enable_timing_logging:
336
+ log_msg += f" | Execution: {log_entry['execution_time_ms']}ms"
337
+ if page_load_time is not None and self.enable_page_loading_logging:
338
+ log_msg += f" | Page Load: {log_entry['page_load_time_ms']}ms"
339
+ if error:
340
+ log_msg += f" | ERROR: {error}"
341
+
342
+ logger.info(log_msg)
343
+
344
+ if self.enable_action_logging:
345
+ logger.info(f" Inputs: {self._truncate_if_needed(inputs)}")
346
+ if not error:
347
+ if isinstance(outputs, dict):
348
+ for key, value in outputs.items():
349
+ logger.info(
350
+ f" - {key}: "
351
+ f"{self._truncate_if_needed(value)}"
352
+ )
353
+ else:
354
+ logger.info(
355
+ f" Outputs: {self._truncate_if_needed(outputs)}"
356
+ )
357
+
358
+ # File logging
359
+ if self.log_to_file and self.log_file_path:
360
+ try:
361
+ with open(self.log_file_path, 'a', encoding='utf-8') as f:
362
+ # Write full log entry to file without truncation
363
+ f.write(
364
+ json.dumps(log_entry, ensure_ascii=False, indent=2)
365
+ + '\n'
366
+ )
367
+ except Exception as e:
368
+ logger.error(f"Failed to write to log file: {e}")
369
+
370
+ @staticmethod
371
+ def action_logger(func: Callable[..., Any]) -> Callable[..., Any]:
372
+ r"""Decorator to add logging to action methods."""
373
+
374
+ @wraps(func)
375
+ async def wrapper(self, *args, **kwargs):
376
+ action_name = func.__name__
377
+ start_time = time.time()
378
+
379
+ # Log inputs
380
+ inputs = {
381
+ "args": args, # Don't skip self since it's already handled
382
+ "kwargs": kwargs,
383
+ }
384
+
385
+ try:
386
+ # Execute the original function
387
+ result = await func(self, *args, **kwargs)
388
+ execution_time = time.time() - start_time
389
+
390
+ # Log success
391
+ await self._log_action(
392
+ action_name=action_name,
393
+ inputs=inputs,
394
+ outputs=result,
395
+ execution_time=execution_time,
396
+ )
397
+
398
+ return result
399
+
400
+ except Exception as e:
401
+ execution_time = time.time() - start_time
402
+ error_msg = f"{type(e).__name__}: {e!s}"
403
+
404
+ # Log error
405
+ await self._log_action(
406
+ action_name=action_name,
407
+ inputs=inputs,
408
+ outputs=None,
409
+ execution_time=execution_time,
410
+ error=error_msg,
411
+ )
412
+
413
+ raise
414
+
415
+ return wrapper
416
+
417
+ async def _get_session(self) -> "HybridBrowserSession":
418
+ """Get the correct singleton session instance."""
419
+ singleton = await HybridBrowserSession._get_or_create_instance(
420
+ self._session
421
+ )
422
+ if singleton is not self._session:
423
+ logger.debug("Updating to singleton session instance")
424
+ self._session = singleton
425
+ return self._session
426
+
189
427
  async def _ensure_browser(self):
190
- await self._session.ensure_browser()
428
+ # Get singleton instance and update self._session if needed
429
+ session = await self._get_session()
430
+ await session.ensure_browser()
191
431
 
192
432
  async def _require_page(self):
193
- await self._session.ensure_browser()
194
- return await self._session.get_page()
433
+ # Get singleton instance and update self._session if needed
434
+ session = await self._get_session()
435
+ await session.ensure_browser()
436
+ return await session.get_page()
195
437
 
196
438
  async def _wait_for_page_stability(self):
197
439
  r"""Wait for page to become stable after actions that might trigger
@@ -370,8 +612,108 @@ class HybridBrowserToolkit(BaseToolkit):
370
612
 
371
613
  return "\n".join(lines)
372
614
 
615
+ async def _get_tab_info_for_output(self) -> Dict[str, Any]:
616
+ r"""Get tab information to include in action outputs."""
617
+ try:
618
+ # Ensure we have the correct singleton session instance first
619
+ session = await self._get_session()
620
+
621
+ # Add debug info for tab info retrieval
622
+ logger.debug("Attempting to get tab info from session...")
623
+ tab_info = await session.get_tab_info()
624
+ current_tab_index = await session.get_current_tab_index()
625
+
626
+ # Debug log the successful retrieval
627
+ logger.debug(
628
+ f"Successfully retrieved {len(tab_info)} tabs, current: "
629
+ f"{current_tab_index}"
630
+ )
631
+
632
+ return {
633
+ "tabs": tab_info,
634
+ "current_tab": current_tab_index,
635
+ "total_tabs": len(tab_info),
636
+ }
637
+ except Exception as e:
638
+ logger.warning(
639
+ f"Failed to get tab info from session: {type(e).__name__}: {e}"
640
+ )
641
+
642
+ # Try to get actual tab count from session pages directly
643
+ try:
644
+ # Get the correct session instance for fallback
645
+ fallback_session = await self._get_session()
646
+
647
+ # Check browser session state
648
+ session_state = {
649
+ "has_session": fallback_session is not None,
650
+ "has_pages_attr": hasattr(fallback_session, '_pages'),
651
+ "pages_count": len(fallback_session._pages)
652
+ if hasattr(fallback_session, '_pages')
653
+ else "unknown",
654
+ "has_page": hasattr(fallback_session, '_page')
655
+ and fallback_session._page is not None,
656
+ "session_id": getattr(
657
+ fallback_session, '_session_id', 'unknown'
658
+ ),
659
+ }
660
+ logger.debug(f"Browser session state: {session_state}")
661
+
662
+ actual_tab_count = 0
663
+ if (
664
+ hasattr(fallback_session, '_pages')
665
+ and fallback_session._pages
666
+ ):
667
+ actual_tab_count = len(fallback_session._pages)
668
+ # Also try to filter out closed pages
669
+ try:
670
+ open_pages = [
671
+ p
672
+ for p in fallback_session._pages
673
+ if not p.is_closed()
674
+ ]
675
+ actual_tab_count = len(open_pages)
676
+ logger.debug(
677
+ f"Found {actual_tab_count} open tabs out of "
678
+ f"{len(fallback_session._pages)} total"
679
+ )
680
+ except Exception:
681
+ # Keep the original count if we can't check page status
682
+ pass
683
+
684
+ if actual_tab_count == 0:
685
+ # If no pages, check if browser is even initialized
686
+ if (
687
+ hasattr(fallback_session, '_page')
688
+ and fallback_session._page is not None
689
+ ):
690
+ actual_tab_count = 1
691
+ logger.debug(
692
+ "No pages in list but main page exists, assuming "
693
+ "1 tab"
694
+ )
695
+ else:
696
+ actual_tab_count = 1
697
+ logger.debug("No pages found, defaulting to 1 tab")
698
+
699
+ logger.debug(f"Using fallback tab count: {actual_tab_count}")
700
+ return {
701
+ "tabs": [],
702
+ "current_tab": 0,
703
+ "total_tabs": actual_tab_count,
704
+ }
705
+
706
+ except Exception as fallback_error:
707
+ logger.warning(
708
+ f"Fallback tab count also failed: "
709
+ f"{type(fallback_error).__name__}: {fallback_error}"
710
+ )
711
+ return {"tabs": [], "current_tab": 0, "total_tabs": 1}
712
+
373
713
  async def _exec_with_snapshot(
374
- self, action: Dict[str, Any]
714
+ self,
715
+ action: Dict[str, Any],
716
+ element_details: Optional[Dict[str, Any]] = None,
375
717
  ) -> Dict[str, str]:
376
718
  r"""Execute action and return result with snapshot comparison."""
377
719
 
@@ -379,72 +721,166 @@ class HybridBrowserToolkit(BaseToolkit):
379
721
  action_type = action.get("type", "unknown")
380
722
  logger.info(f"Executing action: {action_type}")
381
723
 
382
- # Get before snapshot
383
- logger.info("Capturing pre-action snapshot...")
384
- snapshot_start = time.time()
385
- before_snapshot = await self._session.get_snapshot(
386
- force_refresh=True, diff_only=False
387
- )
388
- snapshot_time = time.time() - snapshot_start
389
- logger.info(f"Pre-action snapshot captured in {snapshot_time:.2f}s")
724
+ action_start_time = time.time()
725
+ inputs: Dict[str, Any] = {"action": action}
726
+ page_load_time = None
390
727
 
391
- # Execute action
392
- logger.info(f"Executing {action_type} action...")
393
- action_start = time.time()
394
- result = await self._session.exec_action(action)
395
- action_time = time.time() - action_start
396
- logger.info(f"Action {action_type} completed in {action_time:.2f}s")
397
-
398
- # Wait for page stability after action (especially important for click)
399
- if action_type in ["click", "type", "select", "enter"]:
400
- logger.info(
401
- f"Waiting for page stability " f"after {action_type}..."
728
+ try:
729
+ # Get before snapshot
730
+ logger.info("Capturing pre-action snapshot...")
731
+ snapshot_start = time.time()
732
+ before_snapshot = await self._session.get_snapshot(
733
+ force_refresh=True, diff_only=False
402
734
  )
403
- stability_start = time.time()
404
- await self._wait_for_page_stability()
405
- stability_time = time.time() - stability_start
735
+ snapshot_time = time.time() - snapshot_start
406
736
  logger.info(
407
- f"Page stability wait " f"completed in {stability_time:.2f}s"
737
+ f"Pre-action snapshot captured in {snapshot_time:.2f}s"
408
738
  )
409
739
 
410
- # Get after snapshot
411
- logger.info("Capturing post-action snapshot...")
412
- snapshot_start = time.time()
413
- after_snapshot = await self._session.get_snapshot(
414
- force_refresh=True, diff_only=False
415
- )
416
- snapshot_time = time.time() - snapshot_start
417
- logger.info(
418
- f"Post-action snapshot " f"captured in {snapshot_time:.2f}s"
419
- )
420
-
421
- # Check for snapshot quality and log warnings
422
- if before_snapshot == after_snapshot:
423
- snapshot = "snapshot not changed"
424
- logger.debug("Page snapshot unchanged after action")
425
- else:
426
- snapshot = after_snapshot
427
- # Check if snapshot is empty or problematic
428
- if "<empty>" in after_snapshot:
429
- logger.warning(
430
- f"Action {action_type} resulted "
431
- f"in empty snapshot - "
432
- f"page may still be loading"
740
+ # Execute action
741
+ logger.info(f"Executing {action_type} action...")
742
+ exec_start = time.time()
743
+ exec_result = await self._session.exec_action(action)
744
+ exec_time = time.time() - exec_start
745
+ logger.info(f"Action {action_type} completed in {exec_time:.2f}s")
746
+
747
+ # Parse the detailed result from ActionExecutor
748
+ if isinstance(exec_result, dict):
749
+ result_message = exec_result.get("message", str(exec_result))
750
+ action_details = exec_result.get("details", {})
751
+ success = exec_result.get("success", True)
752
+ else:
753
+ result_message = str(exec_result)
754
+ action_details = {}
755
+ success = True
756
+
757
+ # Wait for page stability after action (especially important for
758
+ # click)
759
+ stability_time: float = 0.0
760
+ if action_type in ["click", "type", "select", "enter"]:
761
+ logger.info(
762
+ f"Waiting for page stability " f"after {action_type}..."
433
763
  )
434
- elif len(after_snapshot.strip()) < 50:
435
- logger.warning(
436
- f"Action {action_type} resulted "
437
- f"in very short snapshot:"
438
- f" {len(after_snapshot)} chars"
764
+ stability_start = time.time()
765
+ await self._wait_for_page_stability()
766
+ stability_time = time.time() - stability_start
767
+ logger.info(
768
+ f"Page stability wait "
769
+ f"completed in "
770
+ f"{stability_time:.2f}s"
439
771
  )
772
+ page_load_time = stability_time
773
+
774
+ # Enhanced logging for page loading times
775
+ if self.enable_page_loading_logging and self.log_to_console:
776
+ logger.info(
777
+ f"[PAGE LOADING] Page stability for {action_type}: "
778
+ f"{round(stability_time * 1000, 2)}ms"
779
+ )
780
+
781
+ # Get after snapshot
782
+ logger.info("Capturing post-action snapshot...")
783
+ snapshot_start = time.time()
784
+ after_snapshot = await self._session.get_snapshot(
785
+ force_refresh=True, diff_only=False
786
+ )
787
+ snapshot_time = time.time() - snapshot_start
788
+ logger.info(
789
+ f"Post-action snapshot " f"captured in {snapshot_time:.2f}s"
790
+ )
791
+
792
+ # Check for snapshot quality and log warnings
793
+ if before_snapshot == after_snapshot:
794
+ snapshot = "snapshot not changed"
795
+ logger.debug("Page snapshot unchanged after action")
440
796
  else:
797
+ snapshot = after_snapshot
798
+ # Check if snapshot is empty or problematic
799
+ if "<empty>" in after_snapshot:
800
+ logger.warning(
801
+ f"Action {action_type} resulted "
802
+ f"in empty snapshot - "
803
+ f"page may still be loading"
804
+ )
805
+ elif len(after_snapshot.strip()) < 50:
806
+ logger.warning(
807
+ f"Action {action_type} resulted "
808
+ f"in very short snapshot:"
809
+ f" {len(after_snapshot)} chars"
810
+ )
811
+ else:
812
+ logger.debug(
813
+ f"Action {action_type} resulted "
814
+ f"in updated snapshot: "
815
+ f"{len(after_snapshot)} chars"
816
+ )
817
+
818
+ # Get tab information for output
819
+ tab_info = await self._get_tab_info_for_output()
820
+
821
+ # Create comprehensive output for logging
822
+ execution_time = time.time() - action_start_time
823
+ outputs = {
824
+ "result": result_message,
825
+ "snapshot": snapshot,
826
+ "success": success,
827
+ "action_details": action_details,
828
+ "execution_stats": {
829
+ "exec_time_ms": round(exec_time * 1000, 2),
830
+ "stability_time_ms": round(stability_time * 1000, 2)
831
+ if stability_time > 0
832
+ else None,
833
+ "total_time_ms": round(execution_time * 1000, 2),
834
+ },
835
+ **tab_info, # Include tab information
836
+ }
837
+
838
+ # If snapshot is unchanged after click, add element details to log
839
+ if (
840
+ snapshot == "snapshot not changed"
841
+ and action_type == "click"
842
+ and element_details
843
+ ):
441
844
  logger.debug(
442
- f"Action {action_type} resulted "
443
- f"in updated snapshot: "
444
- f"{len(after_snapshot)} chars"
845
+ "Snapshot unchanged after click. "
846
+ "Adding element details to log."
847
+ )
848
+ outputs["clicked_element_tag"] = element_details.get(
849
+ "tagName", "N/A"
445
850
  )
851
+ outputs["clicked_element_content"] = element_details.get(
852
+ "name", ""
853
+ )
854
+ outputs["clicked_element_type"] = element_details.get(
855
+ "role", "generic"
856
+ )
857
+
858
+ # Log the action with all details
859
+ await self._log_action(
860
+ action_name=f"_exec_with_snapshot_{action_type}",
861
+ inputs=inputs,
862
+ outputs=outputs,
863
+ execution_time=execution_time,
864
+ page_load_time=page_load_time,
865
+ )
866
+
867
+ return {"result": result_message, "snapshot": snapshot}
868
+
869
+ except Exception as e:
870
+ execution_time = time.time() - action_start_time
871
+ error_msg = f"{type(e).__name__}: {e!s}"
872
+
873
+ # Log error
874
+ await self._log_action(
875
+ action_name=f"_exec_with_snapshot_{action_type}",
876
+ inputs=inputs,
877
+ outputs=None,
878
+ execution_time=execution_time,
879
+ page_load_time=page_load_time,
880
+ error=error_msg,
881
+ )
446
882
 
447
- return {"result": result, "snapshot": snapshot}
883
+ raise
448
884
 
449
885
  async def _extract_links_by_refs(
450
886
  self, snapshot: str, page, refs: List[str]
@@ -509,29 +945,31 @@ class HybridBrowserToolkit(BaseToolkit):
509
945
 
510
946
  # Public API Methods
511
947
 
512
- async def open_browser(
513
- self, start_url: Optional[str] = "https://search.brave.com/"
514
- ) -> Dict[str, str]:
515
- r"""Launches a new browser session, making it ready for web automation.
516
-
517
- This method initializes the underlying browser instance. If a
518
- `start_url` is provided, it will also navigate to that URL. If you
519
- don't have a specific URL to start with, you can use a search engine
520
- like 'https://search.brave.com/'.
948
+ async def open_browser(self) -> Dict[str, Any]:
949
+ r"""Launches a new browser session and navigates to the configured
950
+ default page.
521
951
 
522
- Args:
523
- start_url (Optional[str]): The initial URL to navigate to after the
524
- browser is launched. If not provided, the browser will start
525
- with a blank page. (default: :obj:`https://search.brave.com/`)
952
+ This method initializes the underlying browser instance and
953
+ automatically
954
+ navigates to the default start URL that was configured during toolkit
955
+ initialization. Agents cannot specify a custom URL - they must use the
956
+ visit_page tool for navigation to other URLs.
526
957
 
527
958
  Returns:
528
- Dict[str, str]: A dictionary containing:
959
+ Dict[str, Any]: A dictionary containing:
529
960
  - "result": A string confirming that the browser session has
530
- started.
961
+ started and the default page has been loaded.
531
962
  - "snapshot": A textual representation of the current page's
532
963
  interactive elements. This snapshot is crucial for
533
964
  identifying elements for subsequent actions.
965
+ - "tabs": List of all open tabs with their information.
966
+ - "current_tab": Index of the currently active tab.
967
+ - "total_tabs": Total number of open tabs.
534
968
  """
969
+ # Add logging if enabled
970
+ action_start = time.time()
971
+ inputs: Dict[str, Any] = {} # No input parameters for agents
972
+
535
973
  logger.info("Starting browser session...")
536
974
 
537
975
  browser_start = time.time()
@@ -539,20 +977,42 @@ class HybridBrowserToolkit(BaseToolkit):
539
977
  browser_time = time.time() - browser_start
540
978
  logger.info(f"Browser session started in {browser_time:.2f}s")
541
979
 
542
- if start_url:
543
- logger.info(f"Auto-navigating to start URL: {start_url}")
544
- return await self.visit_page(start_url)
980
+ try:
981
+ # Always use the configured default start URL
982
+ start_url = self.default_start_url
983
+ logger.info(f"Navigating to configured default page: {start_url}")
984
+
985
+ result = await self.visit_page(start_url)
986
+
987
+ # Log success
988
+ if self.enable_action_logging or self.enable_timing_logging:
989
+ execution_time = time.time() - action_start
990
+ await self._log_action(
991
+ action_name="open_browser",
992
+ inputs=inputs,
993
+ outputs={
994
+ "result": "Browser opened and navigated to default "
995
+ "page."
996
+ },
997
+ execution_time=execution_time,
998
+ )
545
999
 
546
- logger.info("Capturing initial browser snapshot...")
547
- snapshot_start = time.time()
548
- snapshot = await self._session.get_snapshot(
549
- force_refresh=True, diff_only=False
550
- )
551
- snapshot_time = time.time() - snapshot_start
552
- logger.info(f"Initial snapshot captured in {snapshot_time:.2f}s")
1000
+ return result
553
1001
 
554
- return {"result": "Browser session started.", "snapshot": snapshot}
1002
+ except Exception as e:
1003
+ # Log error
1004
+ if self.enable_action_logging or self.enable_timing_logging:
1005
+ execution_time = time.time() - action_start
1006
+ await self._log_action(
1007
+ action_name="open_browser",
1008
+ inputs=inputs,
1009
+ outputs=None,
1010
+ execution_time=execution_time,
1011
+ error=f"{type(e).__name__}: {e!s}",
1012
+ )
1013
+ raise
555
1014
 
1015
+ @action_logger
556
1016
  async def close_browser(self) -> str:
557
1017
  r"""Closes the current browser session and releases all associated
558
1018
  resources.
@@ -573,7 +1033,8 @@ class HybridBrowserToolkit(BaseToolkit):
573
1033
  await self._session.close()
574
1034
  return "Browser session closed."
575
1035
 
576
- async def visit_page(self, url: str) -> Dict[str, str]:
1036
+ @action_logger
1037
+ async def visit_page(self, url: str) -> Dict[str, Any]:
577
1038
  r"""Navigates the current browser page to a specified URL.
578
1039
 
579
1040
  Args:
@@ -581,16 +1042,22 @@ class HybridBrowserToolkit(BaseToolkit):
581
1042
  valid URL.
582
1043
 
583
1044
  Returns:
584
- Dict[str, str]: A dictionary containing:
1045
+ Dict[str, Any]: A dictionary containing:
585
1046
  - "result": A message indicating the outcome of the navigation,
586
1047
  e.g., "Navigation successful.".
587
1048
  - "snapshot": A new textual snapshot of the page's interactive
588
1049
  elements after the new page has loaded.
1050
+ - "tabs": List of all open tabs with their information.
1051
+ - "current_tab": Index of the currently active tab.
1052
+ - "total_tabs": Total number of open tabs.
589
1053
  """
590
1054
  if not url or not isinstance(url, str):
591
1055
  return {
592
1056
  "result": "Error: 'url' must be a non-empty string",
593
1057
  "snapshot": "",
1058
+ "tabs": [],
1059
+ "current_tab": 0,
1060
+ "total_tabs": 1,
594
1061
  }
595
1062
 
596
1063
  if '://' not in url:
@@ -613,8 +1080,142 @@ class HybridBrowserToolkit(BaseToolkit):
613
1080
  snapshot_time = time.time() - snapshot_start
614
1081
  logger.info(f"Navigation snapshot captured in {snapshot_time:.2f}s")
615
1082
 
616
- return {"result": nav_result, "snapshot": snapshot}
1083
+ # Get tab information
1084
+ tab_info = await self._get_tab_info_for_output()
1085
+
1086
+ return {"result": nav_result, "snapshot": snapshot, **tab_info}
1087
+
1088
+ @action_logger
1089
+ async def back(self) -> Dict[str, Any]:
1090
+ r"""Navigates the browser back to the previous page in history.
1091
+
1092
+ This function simulates clicking the browser's back button, taking
1093
+ you to the previously visited page if one exists in the browser
1094
+ history.
617
1095
 
1096
+ Returns:
1097
+ Dict[str, Any]: A dictionary containing:
1098
+ - "result": A message indicating the outcome of the back
1099
+ navigation, e.g., "Back navigation successful." or an error
1100
+ message if no previous page exists.
1101
+ - "snapshot": A new textual snapshot of the page after
1102
+ navigation. If the snapshot is unchanged, it will be the
1103
+ string "snapshot not changed".
1104
+ - "tabs": List of all open tabs with their information.
1105
+ - "current_tab": Index of the currently active tab.
1106
+ - "total_tabs": Total number of open tabs.
1107
+ """
1108
+ page = await self._require_page()
1109
+
1110
+ try:
1111
+ logger.info("Navigating back in browser history...")
1112
+ nav_start = time.time()
1113
+ await page.go_back(wait_until="domcontentloaded", timeout=30000)
1114
+ nav_time = time.time() - nav_start
1115
+ logger.info(f"Back navigation completed in {nav_time:.2f}s")
1116
+
1117
+ # Wait for page stability
1118
+ await self._wait_for_page_stability()
1119
+
1120
+ # Get snapshot
1121
+ logger.info("Capturing page snapshot after back navigation...")
1122
+ snapshot_start = time.time()
1123
+ snapshot = await self._session.get_snapshot(
1124
+ force_refresh=True, diff_only=False
1125
+ )
1126
+ snapshot_time = time.time() - snapshot_start
1127
+ logger.info(
1128
+ f"Back navigation snapshot captured in {snapshot_time:.2f}s"
1129
+ )
1130
+
1131
+ # Get tab information
1132
+ tab_info = await self._get_tab_info_for_output()
1133
+
1134
+ return {
1135
+ "result": "Back navigation successful.",
1136
+ "snapshot": snapshot,
1137
+ **tab_info,
1138
+ }
1139
+
1140
+ except Exception as e:
1141
+ logger.warning(f"Back navigation failed: {e}")
1142
+ # Get current snapshot even if navigation failed
1143
+ snapshot = await self._session.get_snapshot(
1144
+ force_refresh=True, diff_only=False
1145
+ )
1146
+ tab_info = await self._get_tab_info_for_output()
1147
+ return {
1148
+ "result": f"Back navigation failed: {e!s}",
1149
+ "snapshot": snapshot,
1150
+ **tab_info,
1151
+ }
1152
+
1153
+ @action_logger
1154
+ async def forward(self) -> Dict[str, Any]:
1155
+ r"""Navigates the browser forward to the next page in history.
1156
+
1157
+ This function simulates clicking the browser's forward button, taking
1158
+ you to the next page in the browser history if one exists (i.e.,
1159
+ if you have previously navigated back).
1160
+
1161
+ Returns:
1162
+ Dict[str, Any]: A dictionary containing:
1163
+ - "result": A message indicating the outcome of the forward
1164
+ navigation, e.g., "Forward navigation successful." or an
1165
+ error message if no next page exists.
1166
+ - "snapshot": A new textual snapshot of the page after
1167
+ navigation. If the snapshot is unchanged, it will be the
1168
+ string "snapshot not changed".
1169
+ - "tabs": List of all open tabs with their information.
1170
+ - "current_tab": Index of the currently active tab.
1171
+ - "total_tabs": Total number of open tabs.
1172
+ """
1173
+ page = await self._require_page()
1174
+
1175
+ try:
1176
+ logger.info("Navigating forward in browser history...")
1177
+ nav_start = time.time()
1178
+ await page.go_forward(wait_until="domcontentloaded", timeout=30000)
1179
+ nav_time = time.time() - nav_start
1180
+ logger.info(f"Forward navigation completed in {nav_time:.2f}s")
1181
+
1182
+ # Wait for page stability
1183
+ await self._wait_for_page_stability()
1184
+
1185
+ # Get snapshot
1186
+ logger.info("Capturing page snapshot after forward navigation...")
1187
+ snapshot_start = time.time()
1188
+ snapshot = await self._session.get_snapshot(
1189
+ force_refresh=True, diff_only=False
1190
+ )
1191
+ snapshot_time = time.time() - snapshot_start
1192
+ logger.info(
1193
+ f"Forward navigation snapshot captured in {snapshot_time:.2f}s"
1194
+ )
1195
+
1196
+ # Get tab information
1197
+ tab_info = await self._get_tab_info_for_output()
1198
+
1199
+ return {
1200
+ "result": "Forward navigation successful.",
1201
+ "snapshot": snapshot,
1202
+ **tab_info,
1203
+ }
1204
+
1205
+ except Exception as e:
1206
+ logger.warning(f"Forward navigation failed: {e}")
1207
+ # Get current snapshot even if navigation failed
1208
+ snapshot = await self._session.get_snapshot(
1209
+ force_refresh=True, diff_only=False
1210
+ )
1211
+ tab_info = await self._get_tab_info_for_output()
1212
+ return {
1213
+ "result": f"Forward navigation failed: {e!s}",
1214
+ "snapshot": snapshot,
1215
+ **tab_info,
1216
+ }
1217
+
1218
+ @action_logger
618
1219
  async def get_page_snapshot(self) -> str:
619
1220
  r"""Captures a textual representation of the current page's content.
620
1221
 
@@ -650,6 +1251,7 @@ class HybridBrowserToolkit(BaseToolkit):
650
1251
  )
651
1252
 
652
1253
  @dependencies_required('PIL')
1254
+ @action_logger
653
1255
  async def get_som_screenshot(self):
654
1256
  r"""Captures a screenshot of the current webpage and visually marks all
655
1257
  interactive elements. "SoM" stands for "Set of Marks".
@@ -733,7 +1335,7 @@ class HybridBrowserToolkit(BaseToolkit):
733
1335
 
734
1336
  return ToolResult(text=text_result, images=[img_data_url])
735
1337
 
736
- async def click(self, *, ref: str) -> Dict[str, str]:
1338
+ async def click(self, *, ref: str) -> Dict[str, Any]:
737
1339
  r"""Clicks on an interactive element on the page.
738
1340
 
739
1341
  Args:
@@ -742,12 +1344,15 @@ class HybridBrowserToolkit(BaseToolkit):
742
1344
  `get_som_screenshot`).
743
1345
 
744
1346
  Returns:
745
- Dict[str, str]: A dictionary containing:
1347
+ Dict[str, Any]: A dictionary containing:
746
1348
  - "result": A message confirming the click action.
747
1349
  - "snapshot": A new textual snapshot of the page after the
748
1350
  click, which may have changed as a result of the action. If
749
1351
  the snapshot is unchanged, it will be the string "snapshot
750
1352
  not changed".
1353
+ - "tabs": List of all open tabs with their information.
1354
+ - "current_tab": Index of the currently active tab.
1355
+ - "total_tabs": Total number of open tabs.
751
1356
  """
752
1357
  self._validate_ref(ref, "click")
753
1358
 
@@ -755,19 +1360,30 @@ class HybridBrowserToolkit(BaseToolkit):
755
1360
  elements = analysis.get("elements", {})
756
1361
  if ref not in elements:
757
1362
  available_refs = list(elements.keys())
758
- logger.error(
759
- f"Error: Element reference '{ref}' not found. "
760
- f"Available refs: {available_refs}"
761
- )
1363
+ logger.error(f"Error: Element reference '{ref}' not found. ")
1364
+ # Added snapshot to give more context on failure
1365
+ snapshot = self._format_snapshot_from_analysis(analysis)
1366
+ tab_info = await self._get_tab_info_for_output()
762
1367
  return {
763
1368
  "result": f"Error: Element reference '{ref}' not found. "
764
- f"Available refs: {available_refs}"
1369
+ f"Available refs: {available_refs}",
1370
+ "snapshot": snapshot,
1371
+ **tab_info,
765
1372
  }
766
1373
 
1374
+ element_details = elements.get(ref)
767
1375
  action = {"type": "click", "ref": ref}
768
- return await self._exec_with_snapshot(action)
1376
+ result = await self._exec_with_snapshot(
1377
+ action, element_details=element_details
1378
+ )
1379
+
1380
+ # Add tab information to the result
1381
+ tab_info = await self._get_tab_info_for_output()
1382
+ result.update(tab_info)
769
1383
 
770
- async def type(self, *, ref: str, text: str) -> Dict[str, str]:
1384
+ return result
1385
+
1386
+ async def type(self, *, ref: str, text: str) -> Dict[str, Any]:
771
1387
  r"""Types text into an input field, such as a textbox or search bar.
772
1388
 
773
1389
  Args:
@@ -775,18 +1391,27 @@ class HybridBrowserToolkit(BaseToolkit):
775
1391
  text (str): The text to be typed into the element.
776
1392
 
777
1393
  Returns:
778
- Dict[str, str]: A dictionary containing:
1394
+ Dict[str, Any]: A dictionary containing:
779
1395
  - "result": A message confirming the type action.
780
1396
  - "snapshot": A new textual snapshot of the page after the
781
1397
  text has been entered.
1398
+ - "tabs": List of all open tabs with their information.
1399
+ - "current_tab": Index of the currently active tab.
1400
+ - "total_tabs": Total number of open tabs.
782
1401
  """
783
1402
  self._validate_ref(ref, "type")
784
1403
  await self._get_unified_analysis() # Ensure aria-ref attributes
785
1404
 
786
1405
  action = {"type": "type", "ref": ref, "text": text}
787
- return await self._exec_with_snapshot(action)
1406
+ result = await self._exec_with_snapshot(action)
1407
+
1408
+ # Add tab information to the result
1409
+ tab_info = await self._get_tab_info_for_output()
1410
+ result.update(tab_info)
1411
+
1412
+ return result
788
1413
 
789
- async def select(self, *, ref: str, value: str) -> Dict[str, str]:
1414
+ async def select(self, *, ref: str, value: str) -> Dict[str, Any]:
790
1415
  r"""Selects an option from a dropdown (`<select>`) element.
791
1416
 
792
1417
  Args:
@@ -796,17 +1421,26 @@ class HybridBrowserToolkit(BaseToolkit):
796
1421
  visible text.
797
1422
 
798
1423
  Returns:
799
- Dict[str, str]: A dictionary containing:
1424
+ Dict[str, Any]: A dictionary containing:
800
1425
  - "result": A message confirming the select action.
801
1426
  - "snapshot": A new snapshot of the page after the selection.
1427
+ - "tabs": List of all open tabs with their information.
1428
+ - "current_tab": Index of the currently active tab.
1429
+ - "total_tabs": Total number of open tabs.
802
1430
  """
803
1431
  self._validate_ref(ref, "select")
804
1432
  await self._get_unified_analysis()
805
1433
 
806
1434
  action = {"type": "select", "ref": ref, "value": value}
807
- return await self._exec_with_snapshot(action)
1435
+ result = await self._exec_with_snapshot(action)
1436
+
1437
+ # Add tab information to the result
1438
+ tab_info = await self._get_tab_info_for_output()
1439
+ result.update(tab_info)
808
1440
 
809
- async def scroll(self, *, direction: str, amount: int) -> Dict[str, str]:
1441
+ return result
1442
+
1443
+ async def scroll(self, *, direction: str, amount: int) -> Dict[str, Any]:
810
1444
  r"""Scrolls the page window up or down by a specified amount.
811
1445
 
812
1446
  Args:
@@ -815,40 +1449,68 @@ class HybridBrowserToolkit(BaseToolkit):
815
1449
  amount (int): The number of pixels to scroll.
816
1450
 
817
1451
  Returns:
818
- Dict[str, str]: A dictionary containing:
1452
+ Dict[str, Any]: A dictionary containing:
819
1453
  - "result": A confirmation of the scroll action.
820
1454
  - "snapshot": A new snapshot of the page after scrolling.
1455
+ - "tabs": List of all open tabs with their information.
1456
+ - "current_tab": Index of the currently active tab.
1457
+ - "total_tabs": Total number of open tabs.
821
1458
  """
822
1459
  if direction not in ("up", "down"):
1460
+ tab_info = await self._get_tab_info_for_output()
823
1461
  return {
824
1462
  "result": "Error: direction must be 'up' or 'down'",
825
1463
  "snapshot": "",
1464
+ **tab_info,
826
1465
  }
827
1466
 
828
1467
  action = {"type": "scroll", "direction": direction, "amount": amount}
829
- return await self._exec_with_snapshot(action)
1468
+ result = await self._exec_with_snapshot(action)
830
1469
 
831
- async def enter(self, *, ref: str) -> Dict[str, str]:
832
- r"""Simulates pressing the Enter key on a specific element.
1470
+ # Add tab information to the result
1471
+ tab_info = await self._get_tab_info_for_output()
1472
+ result.update(tab_info)
833
1473
 
834
- This is often used to submit forms after filling them out.
1474
+ return result
835
1475
 
836
- Args:
837
- ref (str): The reference ID of the element to press Enter on.
1476
+ async def enter(self) -> Dict[str, Any]:
1477
+ r"""Simulates pressing the Enter key on the currently focused element.
1478
+
1479
+ This tool is used to execute or confirm an action after interacting
1480
+ with
1481
+ an element, such as:
1482
+ - Submitting a search query after typing in a search box.
1483
+ - Confirming a form submission.
1484
+ - Executing a command in a text input field.
1485
+
1486
+ The common usage pattern is to first use the 'type' tool to input
1487
+ text, which sets the focus, and then call 'enter' without any
1488
+ parameters to trigger the action.
838
1489
 
839
1490
  Returns:
840
- Dict[str, str]: A dictionary containing:
841
- - "result": A confirmation of the action.
1491
+ Dict[str, Any]: A dictionary containing:
1492
+ - "result": A confirmation of the Enter key action.
842
1493
  - "snapshot": A new page snapshot, as this action often
843
1494
  triggers navigation or page updates.
1495
+ - "tabs": List of all open tabs with their information.
1496
+ - "current_tab": Index of the currently active tab.
1497
+ - "total_tabs": Total number of open tabs.
844
1498
  """
845
- self._validate_ref(ref, "enter")
846
- action = {"type": "enter", "ref": ref}
847
- return await self._exec_with_snapshot(action)
1499
+ # Always press Enter on the currently focused element
1500
+ action = {"type": "enter"}
1501
+
1502
+ result = await self._exec_with_snapshot(action)
1503
+
1504
+ # Add tab information to the result
1505
+ tab_info = await self._get_tab_info_for_output()
1506
+ result.update(tab_info)
1507
+
1508
+ return result
848
1509
 
1510
+ @action_logger
849
1511
  async def wait_user(
850
1512
  self, timeout_sec: Optional[float] = None
851
- ) -> Dict[str, str]:
1513
+ ) -> Dict[str, Any]:
852
1514
  r"""Pauses the agent's execution and waits for human intervention.
853
1515
 
854
1516
  This is useful for tasks that require manual steps, like solving a
@@ -861,10 +1523,13 @@ class HybridBrowserToolkit(BaseToolkit):
861
1523
  automatically. If `None`, it will wait indefinitely.
862
1524
 
863
1525
  Returns:
864
- Dict[str, str]: A dictionary containing:
1526
+ Dict[str, Any]: A dictionary containing:
865
1527
  - "result": A message indicating how the wait ended (e.g.,
866
1528
  "User resumed." or "Timeout... reached, auto-resumed.").
867
1529
  - "snapshot": The current page snapshot after the wait.
1530
+ - "tabs": List of all open tabs with their information.
1531
+ - "current_tab": Index of the currently active tab.
1532
+ - "total_tabs": Total number of open tabs.
868
1533
  """
869
1534
  import asyncio
870
1535
 
@@ -905,8 +1570,11 @@ class HybridBrowserToolkit(BaseToolkit):
905
1570
  snapshot = await self._session.get_snapshot(
906
1571
  force_refresh=True, diff_only=False
907
1572
  )
908
- return {"result": result_msg, "snapshot": snapshot}
1573
+ tab_info = await self._get_tab_info_for_output()
1574
+
1575
+ return {"result": result_msg, "snapshot": snapshot, **tab_info}
909
1576
 
1577
+ @action_logger
910
1578
  async def get_page_links(self, *, ref: List[str]) -> Dict[str, Any]:
911
1579
  r"""Retrieves the full URLs for a given list of link reference IDs.
912
1580
 
@@ -938,6 +1606,7 @@ class HybridBrowserToolkit(BaseToolkit):
938
1606
 
939
1607
  return {"links": links}
940
1608
 
1609
+ @action_logger
941
1610
  async def solve_task(
942
1611
  self, task_prompt: str, start_url: str, max_steps: int = 15
943
1612
  ) -> str:
@@ -967,6 +1636,48 @@ class HybridBrowserToolkit(BaseToolkit):
967
1636
  await agent.process_command(task_prompt, max_steps=max_steps)
968
1637
  return "Task processing finished - see stdout for detailed trace."
969
1638
 
1639
+ def get_log_summary(self) -> Dict[str, Any]:
1640
+ r"""Get a summary of logged actions."""
1641
+ if not self.log_buffer:
1642
+ return {"total_actions": 0, "summary": "No actions logged"}
1643
+
1644
+ total_actions = len(self.log_buffer)
1645
+ total_execution_time = sum(
1646
+ entry.get("execution_time_ms", 0) for entry in self.log_buffer
1647
+ )
1648
+ total_page_load_time = sum(
1649
+ entry.get("page_load_time_ms", 0)
1650
+ for entry in self.log_buffer
1651
+ if "page_load_time_ms" in entry
1652
+ )
1653
+
1654
+ action_counts: Dict[str, int] = {}
1655
+ error_count = 0
1656
+
1657
+ for entry in self.log_buffer:
1658
+ action = entry["action"]
1659
+ action_counts[action] = action_counts.get(action, 0) + 1
1660
+ if "error" in entry:
1661
+ error_count += 1
1662
+
1663
+ return {
1664
+ "total_actions": total_actions,
1665
+ "total_execution_time_ms": round(total_execution_time, 2),
1666
+ "total_page_load_time_ms": round(total_page_load_time, 2),
1667
+ "action_counts": action_counts,
1668
+ "error_count": error_count,
1669
+ "success_rate": round(
1670
+ (total_actions - error_count) / total_actions * 100, 2
1671
+ )
1672
+ if total_actions > 0
1673
+ else 0,
1674
+ }
1675
+
1676
+ def clear_logs(self) -> None:
1677
+ r"""Clear the log buffer."""
1678
+ self.log_buffer.clear()
1679
+ logger.info("Log buffer cleared")
1680
+
970
1681
  def get_tools(self) -> List[FunctionTool]:
971
1682
  r"""Get available function tools
972
1683
  based on enabled_tools configuration."""
@@ -975,6 +1686,8 @@ class HybridBrowserToolkit(BaseToolkit):
975
1686
  "open_browser": self.open_browser,
976
1687
  "close_browser": self.close_browser,
977
1688
  "visit_page": self.visit_page,
1689
+ "back": self.back,
1690
+ "forward": self.forward,
978
1691
  "get_page_snapshot": self.get_page_snapshot,
979
1692
  "get_som_screenshot": self.get_som_screenshot,
980
1693
  "get_page_links": self.get_page_links,
@@ -985,6 +1698,9 @@ class HybridBrowserToolkit(BaseToolkit):
985
1698
  "enter": self.enter,
986
1699
  "wait_user": self.wait_user,
987
1700
  "solve_task": self.solve_task,
1701
+ "switch_tab": self.switch_tab,
1702
+ "close_tab": self.close_tab,
1703
+ "get_tab_info": self.get_tab_info,
988
1704
  }
989
1705
 
990
1706
  enabled_tools = []
@@ -998,11 +1714,165 @@ class HybridBrowserToolkit(BaseToolkit):
998
1714
  continue
999
1715
 
1000
1716
  if tool_name in tool_map:
1001
- enabled_tools.append(
1002
- FunctionTool(cast(Callable, tool_map[tool_name]))
1717
+ tool = FunctionTool(
1718
+ cast(Callable[..., Any], tool_map[tool_name])
1003
1719
  )
1720
+ enabled_tools.append(tool)
1004
1721
  else:
1005
1722
  logger.warning(f"Unknown tool name: {tool_name}")
1006
1723
 
1007
1724
  logger.info(f"Returning {len(enabled_tools)} enabled tools")
1008
1725
  return enabled_tools
1726
+
1727
+ def clone_for_new_session(
1728
+ self, new_session_id: Optional[str] = None
1729
+ ) -> "HybridBrowserToolkit":
1730
+ r"""Create a new instance of HybridBrowserToolkit with a unique
1731
+ session.
1732
+
1733
+ Args:
1734
+ new_session_id: Optional new session ID. If None, a UUID will be
1735
+ generated.
1736
+
1737
+ Returns:
1738
+ A new HybridBrowserToolkit instance with the same configuration
1739
+ but a different session.
1740
+ """
1741
+ import uuid
1742
+
1743
+ if new_session_id is None:
1744
+ new_session_id = str(uuid.uuid4())[:8]
1745
+
1746
+ return HybridBrowserToolkit(
1747
+ headless=self._headless,
1748
+ user_data_dir=self._user_data_dir,
1749
+ stealth=self._session._stealth if self._session else False,
1750
+ web_agent_model=self.web_agent_model,
1751
+ cache_dir=f"{self.cache_dir.rstrip('/')}_clone_{new_session_id}/",
1752
+ enabled_tools=self.enabled_tools.copy(),
1753
+ browser_log_to_file=self.log_to_file,
1754
+ session_id=new_session_id,
1755
+ default_start_url=self.default_start_url,
1756
+ )
1757
+
1758
+ @action_logger
1759
+ async def switch_tab(self, *, tab_index: int) -> Dict[str, Any]:
1760
+ r"""Switches to a specific browser tab by its index.
1761
+
1762
+ This allows you to control which tab is currently active. After
1763
+ switching, all subsequent browser actions will operate on the newly
1764
+ selected tab.
1765
+
1766
+ Args:
1767
+ tab_index (int): The zero-based index of the tab to switch to.
1768
+ Use `get_tab_info` to see available tabs and their indices.
1769
+
1770
+ Returns:
1771
+ Dict[str, Any]: A dictionary containing:
1772
+ - "result": A message indicating success or failure of the
1773
+ tab switch.
1774
+ - "snapshot": A textual snapshot of the newly active tab's
1775
+ content.
1776
+ - "tabs": List of all open tabs with their information.
1777
+ - "current_tab": Index of the currently active tab.
1778
+ - "total_tabs": Total number of open tabs.
1779
+ """
1780
+ await self._ensure_browser()
1781
+ session = await self._get_session()
1782
+
1783
+ success = await session.switch_to_tab(tab_index)
1784
+
1785
+ if success:
1786
+ snapshot = await session.get_snapshot(
1787
+ force_refresh=True, diff_only=False
1788
+ )
1789
+ tab_info = await self._get_tab_info_for_output()
1790
+
1791
+ result = {
1792
+ "result": f"Successfully switched to tab {tab_index}",
1793
+ "snapshot": snapshot,
1794
+ **tab_info,
1795
+ }
1796
+ else:
1797
+ tab_info = await self._get_tab_info_for_output()
1798
+ result = {
1799
+ "result": f"Failed to switch to tab {tab_index}. Tab may not "
1800
+ f"exist.",
1801
+ "snapshot": "",
1802
+ **tab_info,
1803
+ }
1804
+
1805
+ return result
1806
+
1807
+ @action_logger
1808
+ async def close_tab(self, *, tab_index: int) -> Dict[str, Any]:
1809
+ r"""Closes a specific browser tab by its index.
1810
+
1811
+ After closing a tab, the browser will automatically switch to another
1812
+ available tab. If the closed tab was the only one open, the browser
1813
+ session will remain active but without any pages.
1814
+
1815
+ Args:
1816
+ tab_index (int): The zero-based index of the tab to close.
1817
+
1818
+ Returns:
1819
+ Dict[str, Any]: A dictionary containing:
1820
+ - "result": A message indicating success or failure of the
1821
+ tab closure.
1822
+ - "snapshot": A textual snapshot of the currently active tab
1823
+ after the closure (empty if no tabs remain).
1824
+ - "tabs": List of remaining open tabs.
1825
+ - "current_tab": Index of the currently active tab.
1826
+ - "total_tabs": Total number of remaining open tabs.
1827
+ """
1828
+ await self._ensure_browser()
1829
+ session = await self._get_session()
1830
+
1831
+ success = await session.close_tab(tab_index)
1832
+
1833
+ if success:
1834
+ # Get current state after closing the tab
1835
+ try:
1836
+ snapshot = await session.get_snapshot(
1837
+ force_refresh=True, diff_only=False
1838
+ )
1839
+ except Exception:
1840
+ snapshot = "" # No active tab
1841
+
1842
+ tab_info = await self._get_tab_info_for_output()
1843
+
1844
+ result = {
1845
+ "result": f"Successfully closed tab {tab_index}",
1846
+ "snapshot": snapshot,
1847
+ **tab_info,
1848
+ }
1849
+ else:
1850
+ tab_info = await self._get_tab_info_for_output()
1851
+ result = {
1852
+ "result": f"Failed to close tab {tab_index}. Tab may not "
1853
+ f"exist.",
1854
+ "snapshot": "",
1855
+ **tab_info,
1856
+ }
1857
+
1858
+ return result
1859
+
1860
+ @action_logger
1861
+ async def get_tab_info(self) -> Dict[str, Any]:
1862
+ r"""Retrieves information about all currently open browser tabs.
1863
+
1864
+ This provides a comprehensive overview of the browser state, including
1865
+ all open tabs, their titles, URLs, and which one is currently active.
1866
+
1867
+ Returns:
1868
+ Dict[str, Any]: A dictionary containing:
1869
+ - "tabs": A list of dictionaries, each representing a tab with:
1870
+ - "index": The zero-based index of the tab
1871
+ - "title": The page title
1872
+ - "url": The current URL
1873
+ - "is_current": Whether this is the currently active tab
1874
+ - "current_tab": Index of the currently active tab
1875
+ - "total_tabs": Total number of open tabs
1876
+ """
1877
+ await self._ensure_browser()
1878
+ return await self._get_tab_info_for_output()