camel-ai 0.2.71a5__py3-none-any.whl → 0.2.71a7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +51 -1
- camel/societies/workforce/role_playing_worker.py +64 -7
- camel/societies/workforce/single_agent_worker.py +73 -18
- camel/societies/workforce/structured_output_handler.py +500 -0
- camel/societies/workforce/worker.py +2 -0
- camel/societies/workforce/workforce.py +312 -147
- camel/tasks/task.py +1 -1
- camel/toolkits/file_write_toolkit.py +179 -124
- camel/toolkits/hybrid_browser_toolkit/actions.py +235 -60
- camel/toolkits/hybrid_browser_toolkit/agent.py +25 -8
- camel/toolkits/hybrid_browser_toolkit/browser_session.py +574 -164
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +996 -126
- camel/toolkits/hybrid_browser_toolkit/stealth_config.py +116 -0
- camel/toolkits/hybrid_browser_toolkit/stealth_script.js +0 -0
- camel/toolkits/note_taking_toolkit.py +7 -13
- {camel_ai-0.2.71a5.dist-info → camel_ai-0.2.71a7.dist-info}/METADATA +1 -1
- {camel_ai-0.2.71a5.dist-info → camel_ai-0.2.71a7.dist-info}/RECORD +20 -17
- {camel_ai-0.2.71a5.dist-info → camel_ai-0.2.71a7.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.71a5.dist-info → camel_ai-0.2.71a7.dist-info}/licenses/LICENSE +0 -0
|
@@ -15,9 +15,11 @@
|
|
|
15
15
|
import base64
|
|
16
16
|
import datetime
|
|
17
17
|
import io
|
|
18
|
+
import json
|
|
18
19
|
import os
|
|
19
20
|
import time
|
|
20
21
|
import urllib.parse
|
|
22
|
+
from functools import wraps
|
|
21
23
|
from typing import Any, Callable, ClassVar, Dict, List, Optional, cast
|
|
22
24
|
|
|
23
25
|
from camel.logger import get_logger
|
|
@@ -26,9 +28,10 @@ from camel.toolkits.base import BaseToolkit
|
|
|
26
28
|
from camel.toolkits.function_tool import FunctionTool
|
|
27
29
|
from camel.utils import sanitize_filename
|
|
28
30
|
from camel.utils.commons import dependencies_required
|
|
31
|
+
from camel.utils.tool_result import ToolResult
|
|
29
32
|
|
|
30
33
|
from .agent import PlaywrightLLMAgent
|
|
31
|
-
from .browser_session import
|
|
34
|
+
from .browser_session import HybridBrowserSession
|
|
32
35
|
|
|
33
36
|
logger = get_logger(__name__)
|
|
34
37
|
|
|
@@ -54,9 +57,11 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
54
57
|
"open_browser",
|
|
55
58
|
"close_browser",
|
|
56
59
|
"visit_page",
|
|
60
|
+
"back",
|
|
61
|
+
"forward",
|
|
57
62
|
"click",
|
|
58
63
|
"type",
|
|
59
|
-
"
|
|
64
|
+
"switch_tab",
|
|
60
65
|
]
|
|
61
66
|
|
|
62
67
|
# All available tools
|
|
@@ -64,6 +69,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
64
69
|
"open_browser",
|
|
65
70
|
"close_browser",
|
|
66
71
|
"visit_page",
|
|
72
|
+
"back",
|
|
73
|
+
"forward",
|
|
67
74
|
"get_page_snapshot",
|
|
68
75
|
"get_som_screenshot",
|
|
69
76
|
"get_page_links",
|
|
@@ -74,6 +81,9 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
74
81
|
"enter",
|
|
75
82
|
"wait_user",
|
|
76
83
|
"solve_task",
|
|
84
|
+
"switch_tab",
|
|
85
|
+
"close_tab",
|
|
86
|
+
"get_tab_info",
|
|
77
87
|
]
|
|
78
88
|
|
|
79
89
|
def __init__(
|
|
@@ -81,9 +91,13 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
81
91
|
*,
|
|
82
92
|
headless: bool = True,
|
|
83
93
|
user_data_dir: Optional[str] = None,
|
|
94
|
+
stealth: bool = False,
|
|
84
95
|
web_agent_model: Optional[BaseModelBackend] = None,
|
|
85
96
|
cache_dir: str = "tmp/",
|
|
86
97
|
enabled_tools: Optional[List[str]] = None,
|
|
98
|
+
browser_log_to_file: bool = False,
|
|
99
|
+
session_id: Optional[str] = None,
|
|
100
|
+
default_start_url: str = "https://google.com/",
|
|
87
101
|
) -> None:
|
|
88
102
|
r"""Initialize the HybridBrowserToolkit.
|
|
89
103
|
|
|
@@ -94,6 +108,12 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
94
108
|
browser data like cookies and local storage. Useful for
|
|
95
109
|
maintaining sessions across runs. Defaults to `None` (a
|
|
96
110
|
temporary directory is used).
|
|
111
|
+
stealth (bool): Whether to run the browser in stealth mode to avoid
|
|
112
|
+
bot detection. When enabled, hides WebDriver characteristics,
|
|
113
|
+
spoofs navigator properties, and implements various
|
|
114
|
+
anti-detection
|
|
115
|
+
measures. Highly recommended for production use and when
|
|
116
|
+
accessing sites with bot detection. Defaults to `False`.
|
|
97
117
|
web_agent_model (Optional[BaseModelBackend]): The language model
|
|
98
118
|
backend to use for the high-level `solve_task` agent. This is
|
|
99
119
|
required only if you plan to use `solve_task`.
|
|
@@ -102,18 +122,57 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
102
122
|
screenshots. Defaults to `"tmp/"`.
|
|
103
123
|
enabled_tools (Optional[List[str]]): List of tool names to enable.
|
|
104
124
|
If None, uses DEFAULT_TOOLS. Available tools: open_browser,
|
|
105
|
-
close_browser, visit_page, get_page_snapshot,
|
|
125
|
+
close_browser, visit_page, back, forward, get_page_snapshot,
|
|
106
126
|
get_som_screenshot, get_page_links, click, type, select,
|
|
107
127
|
scroll, enter, wait_user, solve_task.
|
|
108
128
|
Defaults to `None`.
|
|
129
|
+
browser_log_to_file (bool): Whether to save detailed browser
|
|
130
|
+
action logs to file.
|
|
131
|
+
When enabled, logs action inputs/outputs, execution times,
|
|
132
|
+
and page loading times.
|
|
133
|
+
Logs are saved to an auto-generated timestamped file.
|
|
134
|
+
Defaults to `False`.
|
|
135
|
+
session_id (Optional[str]): A unique identifier for this browser
|
|
136
|
+
session. When multiple HybridBrowserToolkit instances are used
|
|
137
|
+
concurrently, different session IDs prevent them from sharing
|
|
138
|
+
the same browser session and causing conflicts. If None, a
|
|
139
|
+
default session will be used. Defaults to `None`.
|
|
140
|
+
default_start_url (str): The default URL to navigate to when
|
|
141
|
+
open_browser() is called without a start_url parameter or with
|
|
142
|
+
None. Defaults to `"https://google.com/"`.
|
|
109
143
|
"""
|
|
110
144
|
super().__init__()
|
|
111
145
|
self._headless = headless
|
|
112
146
|
self._user_data_dir = user_data_dir
|
|
113
147
|
self.web_agent_model = web_agent_model
|
|
114
148
|
self.cache_dir = cache_dir
|
|
149
|
+
self.default_start_url = default_start_url
|
|
115
150
|
os.makedirs(self.cache_dir, exist_ok=True)
|
|
116
151
|
|
|
152
|
+
# Logging configuration - fixed values for simplicity
|
|
153
|
+
self.enable_action_logging = True
|
|
154
|
+
self.enable_timing_logging = True
|
|
155
|
+
self.enable_page_loading_logging = True
|
|
156
|
+
self.log_to_console = False # Always disabled for cleaner output
|
|
157
|
+
self.log_to_file = browser_log_to_file
|
|
158
|
+
self.max_log_length = None # No truncation for file logs
|
|
159
|
+
|
|
160
|
+
# Set up log file if needed
|
|
161
|
+
if self.log_to_file:
|
|
162
|
+
# Create log directory if it doesn't exist
|
|
163
|
+
log_dir = "browser_log"
|
|
164
|
+
os.makedirs(log_dir, exist_ok=True)
|
|
165
|
+
|
|
166
|
+
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
167
|
+
self.log_file_path: Optional[str] = os.path.join(
|
|
168
|
+
log_dir, f"hybrid_browser_toolkit_{timestamp}_{session_id}.log"
|
|
169
|
+
)
|
|
170
|
+
else:
|
|
171
|
+
self.log_file_path = None
|
|
172
|
+
|
|
173
|
+
# Initialize log buffer for in-memory storage
|
|
174
|
+
self.log_buffer: List[Dict[str, Any]] = []
|
|
175
|
+
|
|
117
176
|
# Configure enabled tools
|
|
118
177
|
if enabled_tools is None:
|
|
119
178
|
self.enabled_tools = self.DEFAULT_TOOLS.copy()
|
|
@@ -131,10 +190,23 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
131
190
|
|
|
132
191
|
logger.info(f"Enabled tools: {self.enabled_tools}")
|
|
133
192
|
|
|
193
|
+
# Log initialization if file logging is enabled
|
|
194
|
+
if self.log_to_file:
|
|
195
|
+
logger.info(
|
|
196
|
+
"HybridBrowserToolkit initialized with file logging enabled"
|
|
197
|
+
)
|
|
198
|
+
logger.info(f"Log file path: {self.log_file_path}")
|
|
199
|
+
|
|
134
200
|
# Core components
|
|
135
|
-
|
|
136
|
-
headless=headless,
|
|
201
|
+
temp_session = HybridBrowserSession(
|
|
202
|
+
headless=headless,
|
|
203
|
+
user_data_dir=user_data_dir,
|
|
204
|
+
stealth=stealth,
|
|
205
|
+
session_id=session_id,
|
|
137
206
|
)
|
|
207
|
+
# Use the session directly - singleton logic is handled in
|
|
208
|
+
# ensure_browser
|
|
209
|
+
self._session = temp_session
|
|
138
210
|
self._agent: Optional[PlaywrightLLMAgent] = None
|
|
139
211
|
self._unified_script = self._load_unified_analyzer()
|
|
140
212
|
|
|
@@ -151,7 +223,13 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
151
223
|
try:
|
|
152
224
|
loop = asyncio.get_event_loop()
|
|
153
225
|
if not loop.is_closed() and not loop.is_running():
|
|
154
|
-
|
|
226
|
+
# Try to close browser with a timeout to prevent hanging
|
|
227
|
+
try:
|
|
228
|
+
loop.run_until_complete(
|
|
229
|
+
asyncio.wait_for(self.close_browser(), timeout=2.0)
|
|
230
|
+
)
|
|
231
|
+
except asyncio.TimeoutError:
|
|
232
|
+
pass # Skip cleanup if it takes too long
|
|
155
233
|
except (RuntimeError, ImportError):
|
|
156
234
|
pass # Event loop unavailable, skip cleanup
|
|
157
235
|
except Exception:
|
|
@@ -186,12 +264,176 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
186
264
|
f"{method_name}: 'ref' must be a non-empty string"
|
|
187
265
|
)
|
|
188
266
|
|
|
267
|
+
def _truncate_if_needed(self, content: Any) -> str:
|
|
268
|
+
r"""Truncate content if max_log_length is set."""
|
|
269
|
+
content_str = str(content)
|
|
270
|
+
if (
|
|
271
|
+
self.max_log_length is not None
|
|
272
|
+
and len(content_str) > self.max_log_length
|
|
273
|
+
):
|
|
274
|
+
return content_str[: self.max_log_length] + "... [TRUNCATED]"
|
|
275
|
+
return content_str
|
|
276
|
+
|
|
277
|
+
async def _get_current_url(self) -> Optional[str]:
|
|
278
|
+
r"""Safely get the current URL of the active page."""
|
|
279
|
+
try:
|
|
280
|
+
page = await self._session.get_page()
|
|
281
|
+
if page and not page.is_closed():
|
|
282
|
+
return page.url
|
|
283
|
+
return None # Return None if page is closed
|
|
284
|
+
except Exception:
|
|
285
|
+
# This can happen if browser is not open.
|
|
286
|
+
return None
|
|
287
|
+
|
|
288
|
+
async def _log_action(
|
|
289
|
+
self,
|
|
290
|
+
action_name: str,
|
|
291
|
+
inputs: Dict[str, Any],
|
|
292
|
+
outputs: Any,
|
|
293
|
+
execution_time: float,
|
|
294
|
+
page_load_time: Optional[float] = None,
|
|
295
|
+
error: Optional[str] = None,
|
|
296
|
+
) -> None:
|
|
297
|
+
r"""Log action details with comprehensive information."""
|
|
298
|
+
if not (self.enable_action_logging or self.enable_timing_logging):
|
|
299
|
+
return
|
|
300
|
+
|
|
301
|
+
current_url = await self._get_current_url()
|
|
302
|
+
|
|
303
|
+
log_entry: Dict[str, Any] = {
|
|
304
|
+
"timestamp": datetime.datetime.now().isoformat(),
|
|
305
|
+
"action": action_name,
|
|
306
|
+
"url": current_url,
|
|
307
|
+
"execution_time_ms": round(execution_time * 1000, 2),
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if self.enable_action_logging:
|
|
311
|
+
log_entry["inputs"] = inputs
|
|
312
|
+
if error:
|
|
313
|
+
log_entry["error"] = str(error)
|
|
314
|
+
elif isinstance(outputs, dict):
|
|
315
|
+
# Unpack dictionary items into the log entry
|
|
316
|
+
log_entry.update(outputs)
|
|
317
|
+
elif isinstance(outputs, ToolResult):
|
|
318
|
+
log_entry["outputs"] = {
|
|
319
|
+
"text": outputs.text,
|
|
320
|
+
"images": outputs.images,
|
|
321
|
+
}
|
|
322
|
+
else:
|
|
323
|
+
# For non-dict outputs, assign to 'outputs' key
|
|
324
|
+
log_entry["outputs"] = outputs
|
|
325
|
+
|
|
326
|
+
if page_load_time is not None and self.enable_page_loading_logging:
|
|
327
|
+
log_entry["page_load_time_ms"] = round(page_load_time * 1000, 2)
|
|
328
|
+
|
|
329
|
+
# Add to buffer
|
|
330
|
+
self.log_buffer.append(log_entry)
|
|
331
|
+
|
|
332
|
+
# Console logging
|
|
333
|
+
if self.log_to_console:
|
|
334
|
+
log_msg = f"[BROWSER ACTION] {action_name}"
|
|
335
|
+
if self.enable_timing_logging:
|
|
336
|
+
log_msg += f" | Execution: {log_entry['execution_time_ms']}ms"
|
|
337
|
+
if page_load_time is not None and self.enable_page_loading_logging:
|
|
338
|
+
log_msg += f" | Page Load: {log_entry['page_load_time_ms']}ms"
|
|
339
|
+
if error:
|
|
340
|
+
log_msg += f" | ERROR: {error}"
|
|
341
|
+
|
|
342
|
+
logger.info(log_msg)
|
|
343
|
+
|
|
344
|
+
if self.enable_action_logging:
|
|
345
|
+
logger.info(f" Inputs: {self._truncate_if_needed(inputs)}")
|
|
346
|
+
if not error:
|
|
347
|
+
if isinstance(outputs, dict):
|
|
348
|
+
for key, value in outputs.items():
|
|
349
|
+
logger.info(
|
|
350
|
+
f" - {key}: "
|
|
351
|
+
f"{self._truncate_if_needed(value)}"
|
|
352
|
+
)
|
|
353
|
+
else:
|
|
354
|
+
logger.info(
|
|
355
|
+
f" Outputs: {self._truncate_if_needed(outputs)}"
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
# File logging
|
|
359
|
+
if self.log_to_file and self.log_file_path:
|
|
360
|
+
try:
|
|
361
|
+
with open(self.log_file_path, 'a', encoding='utf-8') as f:
|
|
362
|
+
# Write full log entry to file without truncation
|
|
363
|
+
f.write(
|
|
364
|
+
json.dumps(log_entry, ensure_ascii=False, indent=2)
|
|
365
|
+
+ '\n'
|
|
366
|
+
)
|
|
367
|
+
except Exception as e:
|
|
368
|
+
logger.error(f"Failed to write to log file: {e}")
|
|
369
|
+
|
|
370
|
+
@staticmethod
|
|
371
|
+
def action_logger(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
372
|
+
r"""Decorator to add logging to action methods."""
|
|
373
|
+
|
|
374
|
+
@wraps(func)
|
|
375
|
+
async def wrapper(self, *args, **kwargs):
|
|
376
|
+
action_name = func.__name__
|
|
377
|
+
start_time = time.time()
|
|
378
|
+
|
|
379
|
+
# Log inputs
|
|
380
|
+
inputs = {
|
|
381
|
+
"args": args, # Don't skip self since it's already handled
|
|
382
|
+
"kwargs": kwargs,
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
try:
|
|
386
|
+
# Execute the original function
|
|
387
|
+
result = await func(self, *args, **kwargs)
|
|
388
|
+
execution_time = time.time() - start_time
|
|
389
|
+
|
|
390
|
+
# Log success
|
|
391
|
+
await self._log_action(
|
|
392
|
+
action_name=action_name,
|
|
393
|
+
inputs=inputs,
|
|
394
|
+
outputs=result,
|
|
395
|
+
execution_time=execution_time,
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
return result
|
|
399
|
+
|
|
400
|
+
except Exception as e:
|
|
401
|
+
execution_time = time.time() - start_time
|
|
402
|
+
error_msg = f"{type(e).__name__}: {e!s}"
|
|
403
|
+
|
|
404
|
+
# Log error
|
|
405
|
+
await self._log_action(
|
|
406
|
+
action_name=action_name,
|
|
407
|
+
inputs=inputs,
|
|
408
|
+
outputs=None,
|
|
409
|
+
execution_time=execution_time,
|
|
410
|
+
error=error_msg,
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
raise
|
|
414
|
+
|
|
415
|
+
return wrapper
|
|
416
|
+
|
|
417
|
+
async def _get_session(self) -> "HybridBrowserSession":
|
|
418
|
+
"""Get the correct singleton session instance."""
|
|
419
|
+
singleton = await HybridBrowserSession._get_or_create_instance(
|
|
420
|
+
self._session
|
|
421
|
+
)
|
|
422
|
+
if singleton is not self._session:
|
|
423
|
+
logger.debug("Updating to singleton session instance")
|
|
424
|
+
self._session = singleton
|
|
425
|
+
return self._session
|
|
426
|
+
|
|
189
427
|
async def _ensure_browser(self):
|
|
190
|
-
|
|
428
|
+
# Get singleton instance and update self._session if needed
|
|
429
|
+
session = await self._get_session()
|
|
430
|
+
await session.ensure_browser()
|
|
191
431
|
|
|
192
432
|
async def _require_page(self):
|
|
193
|
-
|
|
194
|
-
|
|
433
|
+
# Get singleton instance and update self._session if needed
|
|
434
|
+
session = await self._get_session()
|
|
435
|
+
await session.ensure_browser()
|
|
436
|
+
return await session.get_page()
|
|
195
437
|
|
|
196
438
|
async def _wait_for_page_stability(self):
|
|
197
439
|
r"""Wait for page to become stable after actions that might trigger
|
|
@@ -370,8 +612,108 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
370
612
|
|
|
371
613
|
return "\n".join(lines)
|
|
372
614
|
|
|
615
|
+
async def _get_tab_info_for_output(self) -> Dict[str, Any]:
|
|
616
|
+
r"""Get tab information to include in action outputs."""
|
|
617
|
+
try:
|
|
618
|
+
# Ensure we have the correct singleton session instance first
|
|
619
|
+
session = await self._get_session()
|
|
620
|
+
|
|
621
|
+
# Add debug info for tab info retrieval
|
|
622
|
+
logger.debug("Attempting to get tab info from session...")
|
|
623
|
+
tab_info = await session.get_tab_info()
|
|
624
|
+
current_tab_index = await session.get_current_tab_index()
|
|
625
|
+
|
|
626
|
+
# Debug log the successful retrieval
|
|
627
|
+
logger.debug(
|
|
628
|
+
f"Successfully retrieved {len(tab_info)} tabs, current: "
|
|
629
|
+
f"{current_tab_index}"
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
return {
|
|
633
|
+
"tabs": tab_info,
|
|
634
|
+
"current_tab": current_tab_index,
|
|
635
|
+
"total_tabs": len(tab_info),
|
|
636
|
+
}
|
|
637
|
+
except Exception as e:
|
|
638
|
+
logger.warning(
|
|
639
|
+
f"Failed to get tab info from session: {type(e).__name__}: {e}"
|
|
640
|
+
)
|
|
641
|
+
|
|
642
|
+
# Try to get actual tab count from session pages directly
|
|
643
|
+
try:
|
|
644
|
+
# Get the correct session instance for fallback
|
|
645
|
+
fallback_session = await self._get_session()
|
|
646
|
+
|
|
647
|
+
# Check browser session state
|
|
648
|
+
session_state = {
|
|
649
|
+
"has_session": fallback_session is not None,
|
|
650
|
+
"has_pages_attr": hasattr(fallback_session, '_pages'),
|
|
651
|
+
"pages_count": len(fallback_session._pages)
|
|
652
|
+
if hasattr(fallback_session, '_pages')
|
|
653
|
+
else "unknown",
|
|
654
|
+
"has_page": hasattr(fallback_session, '_page')
|
|
655
|
+
and fallback_session._page is not None,
|
|
656
|
+
"session_id": getattr(
|
|
657
|
+
fallback_session, '_session_id', 'unknown'
|
|
658
|
+
),
|
|
659
|
+
}
|
|
660
|
+
logger.debug(f"Browser session state: {session_state}")
|
|
661
|
+
|
|
662
|
+
actual_tab_count = 0
|
|
663
|
+
if (
|
|
664
|
+
hasattr(fallback_session, '_pages')
|
|
665
|
+
and fallback_session._pages
|
|
666
|
+
):
|
|
667
|
+
actual_tab_count = len(fallback_session._pages)
|
|
668
|
+
# Also try to filter out closed pages
|
|
669
|
+
try:
|
|
670
|
+
open_pages = [
|
|
671
|
+
p
|
|
672
|
+
for p in fallback_session._pages
|
|
673
|
+
if not p.is_closed()
|
|
674
|
+
]
|
|
675
|
+
actual_tab_count = len(open_pages)
|
|
676
|
+
logger.debug(
|
|
677
|
+
f"Found {actual_tab_count} open tabs out of "
|
|
678
|
+
f"{len(fallback_session._pages)} total"
|
|
679
|
+
)
|
|
680
|
+
except Exception:
|
|
681
|
+
# Keep the original count if we can't check page status
|
|
682
|
+
pass
|
|
683
|
+
|
|
684
|
+
if actual_tab_count == 0:
|
|
685
|
+
# If no pages, check if browser is even initialized
|
|
686
|
+
if (
|
|
687
|
+
hasattr(fallback_session, '_page')
|
|
688
|
+
and fallback_session._page is not None
|
|
689
|
+
):
|
|
690
|
+
actual_tab_count = 1
|
|
691
|
+
logger.debug(
|
|
692
|
+
"No pages in list but main page exists, assuming "
|
|
693
|
+
"1 tab"
|
|
694
|
+
)
|
|
695
|
+
else:
|
|
696
|
+
actual_tab_count = 1
|
|
697
|
+
logger.debug("No pages found, defaulting to 1 tab")
|
|
698
|
+
|
|
699
|
+
logger.debug(f"Using fallback tab count: {actual_tab_count}")
|
|
700
|
+
return {
|
|
701
|
+
"tabs": [],
|
|
702
|
+
"current_tab": 0,
|
|
703
|
+
"total_tabs": actual_tab_count,
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
except Exception as fallback_error:
|
|
707
|
+
logger.warning(
|
|
708
|
+
f"Fallback tab count also failed: "
|
|
709
|
+
f"{type(fallback_error).__name__}: {fallback_error}"
|
|
710
|
+
)
|
|
711
|
+
return {"tabs": [], "current_tab": 0, "total_tabs": 1}
|
|
712
|
+
|
|
373
713
|
async def _exec_with_snapshot(
|
|
374
|
-
self,
|
|
714
|
+
self,
|
|
715
|
+
action: Dict[str, Any],
|
|
716
|
+
element_details: Optional[Dict[str, Any]] = None,
|
|
375
717
|
) -> Dict[str, str]:
|
|
376
718
|
r"""Execute action and return result with snapshot comparison."""
|
|
377
719
|
|
|
@@ -379,72 +721,166 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
379
721
|
action_type = action.get("type", "unknown")
|
|
380
722
|
logger.info(f"Executing action: {action_type}")
|
|
381
723
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
before_snapshot = await self._session.get_snapshot(
|
|
386
|
-
force_refresh=True, diff_only=False
|
|
387
|
-
)
|
|
388
|
-
snapshot_time = time.time() - snapshot_start
|
|
389
|
-
logger.info(f"Pre-action snapshot captured in {snapshot_time:.2f}s")
|
|
724
|
+
action_start_time = time.time()
|
|
725
|
+
inputs: Dict[str, Any] = {"action": action}
|
|
726
|
+
page_load_time = None
|
|
390
727
|
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
# Wait for page stability after action (especially important for click)
|
|
399
|
-
if action_type in ["click", "type", "select", "enter"]:
|
|
400
|
-
logger.info(
|
|
401
|
-
f"Waiting for page stability " f"after {action_type}..."
|
|
728
|
+
try:
|
|
729
|
+
# Get before snapshot
|
|
730
|
+
logger.info("Capturing pre-action snapshot...")
|
|
731
|
+
snapshot_start = time.time()
|
|
732
|
+
before_snapshot = await self._session.get_snapshot(
|
|
733
|
+
force_refresh=True, diff_only=False
|
|
402
734
|
)
|
|
403
|
-
|
|
404
|
-
await self._wait_for_page_stability()
|
|
405
|
-
stability_time = time.time() - stability_start
|
|
735
|
+
snapshot_time = time.time() - snapshot_start
|
|
406
736
|
logger.info(
|
|
407
|
-
f"
|
|
737
|
+
f"Pre-action snapshot captured in {snapshot_time:.2f}s"
|
|
408
738
|
)
|
|
409
739
|
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
#
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
f"page
|
|
740
|
+
# Execute action
|
|
741
|
+
logger.info(f"Executing {action_type} action...")
|
|
742
|
+
exec_start = time.time()
|
|
743
|
+
exec_result = await self._session.exec_action(action)
|
|
744
|
+
exec_time = time.time() - exec_start
|
|
745
|
+
logger.info(f"Action {action_type} completed in {exec_time:.2f}s")
|
|
746
|
+
|
|
747
|
+
# Parse the detailed result from ActionExecutor
|
|
748
|
+
if isinstance(exec_result, dict):
|
|
749
|
+
result_message = exec_result.get("message", str(exec_result))
|
|
750
|
+
action_details = exec_result.get("details", {})
|
|
751
|
+
success = exec_result.get("success", True)
|
|
752
|
+
else:
|
|
753
|
+
result_message = str(exec_result)
|
|
754
|
+
action_details = {}
|
|
755
|
+
success = True
|
|
756
|
+
|
|
757
|
+
# Wait for page stability after action (especially important for
|
|
758
|
+
# click)
|
|
759
|
+
stability_time: float = 0.0
|
|
760
|
+
if action_type in ["click", "type", "select", "enter"]:
|
|
761
|
+
logger.info(
|
|
762
|
+
f"Waiting for page stability " f"after {action_type}..."
|
|
433
763
|
)
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
f"
|
|
764
|
+
stability_start = time.time()
|
|
765
|
+
await self._wait_for_page_stability()
|
|
766
|
+
stability_time = time.time() - stability_start
|
|
767
|
+
logger.info(
|
|
768
|
+
f"Page stability wait "
|
|
769
|
+
f"completed in "
|
|
770
|
+
f"{stability_time:.2f}s"
|
|
439
771
|
)
|
|
772
|
+
page_load_time = stability_time
|
|
773
|
+
|
|
774
|
+
# Enhanced logging for page loading times
|
|
775
|
+
if self.enable_page_loading_logging and self.log_to_console:
|
|
776
|
+
logger.info(
|
|
777
|
+
f"[PAGE LOADING] Page stability for {action_type}: "
|
|
778
|
+
f"{round(stability_time * 1000, 2)}ms"
|
|
779
|
+
)
|
|
780
|
+
|
|
781
|
+
# Get after snapshot
|
|
782
|
+
logger.info("Capturing post-action snapshot...")
|
|
783
|
+
snapshot_start = time.time()
|
|
784
|
+
after_snapshot = await self._session.get_snapshot(
|
|
785
|
+
force_refresh=True, diff_only=False
|
|
786
|
+
)
|
|
787
|
+
snapshot_time = time.time() - snapshot_start
|
|
788
|
+
logger.info(
|
|
789
|
+
f"Post-action snapshot " f"captured in {snapshot_time:.2f}s"
|
|
790
|
+
)
|
|
791
|
+
|
|
792
|
+
# Check for snapshot quality and log warnings
|
|
793
|
+
if before_snapshot == after_snapshot:
|
|
794
|
+
snapshot = "snapshot not changed"
|
|
795
|
+
logger.debug("Page snapshot unchanged after action")
|
|
440
796
|
else:
|
|
797
|
+
snapshot = after_snapshot
|
|
798
|
+
# Check if snapshot is empty or problematic
|
|
799
|
+
if "<empty>" in after_snapshot:
|
|
800
|
+
logger.warning(
|
|
801
|
+
f"Action {action_type} resulted "
|
|
802
|
+
f"in empty snapshot - "
|
|
803
|
+
f"page may still be loading"
|
|
804
|
+
)
|
|
805
|
+
elif len(after_snapshot.strip()) < 50:
|
|
806
|
+
logger.warning(
|
|
807
|
+
f"Action {action_type} resulted "
|
|
808
|
+
f"in very short snapshot:"
|
|
809
|
+
f" {len(after_snapshot)} chars"
|
|
810
|
+
)
|
|
811
|
+
else:
|
|
812
|
+
logger.debug(
|
|
813
|
+
f"Action {action_type} resulted "
|
|
814
|
+
f"in updated snapshot: "
|
|
815
|
+
f"{len(after_snapshot)} chars"
|
|
816
|
+
)
|
|
817
|
+
|
|
818
|
+
# Get tab information for output
|
|
819
|
+
tab_info = await self._get_tab_info_for_output()
|
|
820
|
+
|
|
821
|
+
# Create comprehensive output for logging
|
|
822
|
+
execution_time = time.time() - action_start_time
|
|
823
|
+
outputs = {
|
|
824
|
+
"result": result_message,
|
|
825
|
+
"snapshot": snapshot,
|
|
826
|
+
"success": success,
|
|
827
|
+
"action_details": action_details,
|
|
828
|
+
"execution_stats": {
|
|
829
|
+
"exec_time_ms": round(exec_time * 1000, 2),
|
|
830
|
+
"stability_time_ms": round(stability_time * 1000, 2)
|
|
831
|
+
if stability_time > 0
|
|
832
|
+
else None,
|
|
833
|
+
"total_time_ms": round(execution_time * 1000, 2),
|
|
834
|
+
},
|
|
835
|
+
**tab_info, # Include tab information
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
# If snapshot is unchanged after click, add element details to log
|
|
839
|
+
if (
|
|
840
|
+
snapshot == "snapshot not changed"
|
|
841
|
+
and action_type == "click"
|
|
842
|
+
and element_details
|
|
843
|
+
):
|
|
441
844
|
logger.debug(
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
845
|
+
"Snapshot unchanged after click. "
|
|
846
|
+
"Adding element details to log."
|
|
847
|
+
)
|
|
848
|
+
outputs["clicked_element_tag"] = element_details.get(
|
|
849
|
+
"tagName", "N/A"
|
|
445
850
|
)
|
|
851
|
+
outputs["clicked_element_content"] = element_details.get(
|
|
852
|
+
"name", ""
|
|
853
|
+
)
|
|
854
|
+
outputs["clicked_element_type"] = element_details.get(
|
|
855
|
+
"role", "generic"
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
# Log the action with all details
|
|
859
|
+
await self._log_action(
|
|
860
|
+
action_name=f"_exec_with_snapshot_{action_type}",
|
|
861
|
+
inputs=inputs,
|
|
862
|
+
outputs=outputs,
|
|
863
|
+
execution_time=execution_time,
|
|
864
|
+
page_load_time=page_load_time,
|
|
865
|
+
)
|
|
866
|
+
|
|
867
|
+
return {"result": result_message, "snapshot": snapshot}
|
|
868
|
+
|
|
869
|
+
except Exception as e:
|
|
870
|
+
execution_time = time.time() - action_start_time
|
|
871
|
+
error_msg = f"{type(e).__name__}: {e!s}"
|
|
872
|
+
|
|
873
|
+
# Log error
|
|
874
|
+
await self._log_action(
|
|
875
|
+
action_name=f"_exec_with_snapshot_{action_type}",
|
|
876
|
+
inputs=inputs,
|
|
877
|
+
outputs=None,
|
|
878
|
+
execution_time=execution_time,
|
|
879
|
+
page_load_time=page_load_time,
|
|
880
|
+
error=error_msg,
|
|
881
|
+
)
|
|
446
882
|
|
|
447
|
-
|
|
883
|
+
raise
|
|
448
884
|
|
|
449
885
|
async def _extract_links_by_refs(
|
|
450
886
|
self, snapshot: str, page, refs: List[str]
|
|
@@ -509,29 +945,31 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
509
945
|
|
|
510
946
|
# Public API Methods
|
|
511
947
|
|
|
512
|
-
async def open_browser(
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
r"""Launches a new browser session, making it ready for web automation.
|
|
516
|
-
|
|
517
|
-
This method initializes the underlying browser instance. If a
|
|
518
|
-
`start_url` is provided, it will also navigate to that URL. If you
|
|
519
|
-
don't have a specific URL to start with, you can use a search engine
|
|
520
|
-
like 'https://search.brave.com/'.
|
|
948
|
+
async def open_browser(self) -> Dict[str, Any]:
|
|
949
|
+
r"""Launches a new browser session and navigates to the configured
|
|
950
|
+
default page.
|
|
521
951
|
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
952
|
+
This method initializes the underlying browser instance and
|
|
953
|
+
automatically
|
|
954
|
+
navigates to the default start URL that was configured during toolkit
|
|
955
|
+
initialization. Agents cannot specify a custom URL - they must use the
|
|
956
|
+
visit_page tool for navigation to other URLs.
|
|
526
957
|
|
|
527
958
|
Returns:
|
|
528
|
-
Dict[str,
|
|
959
|
+
Dict[str, Any]: A dictionary containing:
|
|
529
960
|
- "result": A string confirming that the browser session has
|
|
530
|
-
started.
|
|
961
|
+
started and the default page has been loaded.
|
|
531
962
|
- "snapshot": A textual representation of the current page's
|
|
532
963
|
interactive elements. This snapshot is crucial for
|
|
533
964
|
identifying elements for subsequent actions.
|
|
965
|
+
- "tabs": List of all open tabs with their information.
|
|
966
|
+
- "current_tab": Index of the currently active tab.
|
|
967
|
+
- "total_tabs": Total number of open tabs.
|
|
534
968
|
"""
|
|
969
|
+
# Add logging if enabled
|
|
970
|
+
action_start = time.time()
|
|
971
|
+
inputs: Dict[str, Any] = {} # No input parameters for agents
|
|
972
|
+
|
|
535
973
|
logger.info("Starting browser session...")
|
|
536
974
|
|
|
537
975
|
browser_start = time.time()
|
|
@@ -539,20 +977,42 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
539
977
|
browser_time = time.time() - browser_start
|
|
540
978
|
logger.info(f"Browser session started in {browser_time:.2f}s")
|
|
541
979
|
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
980
|
+
try:
|
|
981
|
+
# Always use the configured default start URL
|
|
982
|
+
start_url = self.default_start_url
|
|
983
|
+
logger.info(f"Navigating to configured default page: {start_url}")
|
|
984
|
+
|
|
985
|
+
result = await self.visit_page(start_url)
|
|
986
|
+
|
|
987
|
+
# Log success
|
|
988
|
+
if self.enable_action_logging or self.enable_timing_logging:
|
|
989
|
+
execution_time = time.time() - action_start
|
|
990
|
+
await self._log_action(
|
|
991
|
+
action_name="open_browser",
|
|
992
|
+
inputs=inputs,
|
|
993
|
+
outputs={
|
|
994
|
+
"result": "Browser opened and navigated to default "
|
|
995
|
+
"page."
|
|
996
|
+
},
|
|
997
|
+
execution_time=execution_time,
|
|
998
|
+
)
|
|
545
999
|
|
|
546
|
-
|
|
547
|
-
snapshot_start = time.time()
|
|
548
|
-
snapshot = await self._session.get_snapshot(
|
|
549
|
-
force_refresh=True, diff_only=False
|
|
550
|
-
)
|
|
551
|
-
snapshot_time = time.time() - snapshot_start
|
|
552
|
-
logger.info(f"Initial snapshot captured in {snapshot_time:.2f}s")
|
|
1000
|
+
return result
|
|
553
1001
|
|
|
554
|
-
|
|
1002
|
+
except Exception as e:
|
|
1003
|
+
# Log error
|
|
1004
|
+
if self.enable_action_logging or self.enable_timing_logging:
|
|
1005
|
+
execution_time = time.time() - action_start
|
|
1006
|
+
await self._log_action(
|
|
1007
|
+
action_name="open_browser",
|
|
1008
|
+
inputs=inputs,
|
|
1009
|
+
outputs=None,
|
|
1010
|
+
execution_time=execution_time,
|
|
1011
|
+
error=f"{type(e).__name__}: {e!s}",
|
|
1012
|
+
)
|
|
1013
|
+
raise
|
|
555
1014
|
|
|
1015
|
+
@action_logger
|
|
556
1016
|
async def close_browser(self) -> str:
|
|
557
1017
|
r"""Closes the current browser session and releases all associated
|
|
558
1018
|
resources.
|
|
@@ -573,7 +1033,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
573
1033
|
await self._session.close()
|
|
574
1034
|
return "Browser session closed."
|
|
575
1035
|
|
|
576
|
-
|
|
1036
|
+
@action_logger
|
|
1037
|
+
async def visit_page(self, url: str) -> Dict[str, Any]:
|
|
577
1038
|
r"""Navigates the current browser page to a specified URL.
|
|
578
1039
|
|
|
579
1040
|
Args:
|
|
@@ -581,16 +1042,22 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
581
1042
|
valid URL.
|
|
582
1043
|
|
|
583
1044
|
Returns:
|
|
584
|
-
Dict[str,
|
|
1045
|
+
Dict[str, Any]: A dictionary containing:
|
|
585
1046
|
- "result": A message indicating the outcome of the navigation,
|
|
586
1047
|
e.g., "Navigation successful.".
|
|
587
1048
|
- "snapshot": A new textual snapshot of the page's interactive
|
|
588
1049
|
elements after the new page has loaded.
|
|
1050
|
+
- "tabs": List of all open tabs with their information.
|
|
1051
|
+
- "current_tab": Index of the currently active tab.
|
|
1052
|
+
- "total_tabs": Total number of open tabs.
|
|
589
1053
|
"""
|
|
590
1054
|
if not url or not isinstance(url, str):
|
|
591
1055
|
return {
|
|
592
1056
|
"result": "Error: 'url' must be a non-empty string",
|
|
593
1057
|
"snapshot": "",
|
|
1058
|
+
"tabs": [],
|
|
1059
|
+
"current_tab": 0,
|
|
1060
|
+
"total_tabs": 1,
|
|
594
1061
|
}
|
|
595
1062
|
|
|
596
1063
|
if '://' not in url:
|
|
@@ -613,8 +1080,142 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
613
1080
|
snapshot_time = time.time() - snapshot_start
|
|
614
1081
|
logger.info(f"Navigation snapshot captured in {snapshot_time:.2f}s")
|
|
615
1082
|
|
|
616
|
-
|
|
1083
|
+
# Get tab information
|
|
1084
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1085
|
+
|
|
1086
|
+
return {"result": nav_result, "snapshot": snapshot, **tab_info}
|
|
1087
|
+
|
|
1088
|
+
@action_logger
|
|
1089
|
+
async def back(self) -> Dict[str, Any]:
|
|
1090
|
+
r"""Navigates the browser back to the previous page in history.
|
|
1091
|
+
|
|
1092
|
+
This function simulates clicking the browser's back button, taking
|
|
1093
|
+
you to the previously visited page if one exists in the browser
|
|
1094
|
+
history.
|
|
617
1095
|
|
|
1096
|
+
Returns:
|
|
1097
|
+
Dict[str, Any]: A dictionary containing:
|
|
1098
|
+
- "result": A message indicating the outcome of the back
|
|
1099
|
+
navigation, e.g., "Back navigation successful." or an error
|
|
1100
|
+
message if no previous page exists.
|
|
1101
|
+
- "snapshot": A new textual snapshot of the page after
|
|
1102
|
+
navigation. If the snapshot is unchanged, it will be the
|
|
1103
|
+
string "snapshot not changed".
|
|
1104
|
+
- "tabs": List of all open tabs with their information.
|
|
1105
|
+
- "current_tab": Index of the currently active tab.
|
|
1106
|
+
- "total_tabs": Total number of open tabs.
|
|
1107
|
+
"""
|
|
1108
|
+
page = await self._require_page()
|
|
1109
|
+
|
|
1110
|
+
try:
|
|
1111
|
+
logger.info("Navigating back in browser history...")
|
|
1112
|
+
nav_start = time.time()
|
|
1113
|
+
await page.go_back(wait_until="domcontentloaded", timeout=30000)
|
|
1114
|
+
nav_time = time.time() - nav_start
|
|
1115
|
+
logger.info(f"Back navigation completed in {nav_time:.2f}s")
|
|
1116
|
+
|
|
1117
|
+
# Wait for page stability
|
|
1118
|
+
await self._wait_for_page_stability()
|
|
1119
|
+
|
|
1120
|
+
# Get snapshot
|
|
1121
|
+
logger.info("Capturing page snapshot after back navigation...")
|
|
1122
|
+
snapshot_start = time.time()
|
|
1123
|
+
snapshot = await self._session.get_snapshot(
|
|
1124
|
+
force_refresh=True, diff_only=False
|
|
1125
|
+
)
|
|
1126
|
+
snapshot_time = time.time() - snapshot_start
|
|
1127
|
+
logger.info(
|
|
1128
|
+
f"Back navigation snapshot captured in {snapshot_time:.2f}s"
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
# Get tab information
|
|
1132
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1133
|
+
|
|
1134
|
+
return {
|
|
1135
|
+
"result": "Back navigation successful.",
|
|
1136
|
+
"snapshot": snapshot,
|
|
1137
|
+
**tab_info,
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
except Exception as e:
|
|
1141
|
+
logger.warning(f"Back navigation failed: {e}")
|
|
1142
|
+
# Get current snapshot even if navigation failed
|
|
1143
|
+
snapshot = await self._session.get_snapshot(
|
|
1144
|
+
force_refresh=True, diff_only=False
|
|
1145
|
+
)
|
|
1146
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1147
|
+
return {
|
|
1148
|
+
"result": f"Back navigation failed: {e!s}",
|
|
1149
|
+
"snapshot": snapshot,
|
|
1150
|
+
**tab_info,
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
@action_logger
|
|
1154
|
+
async def forward(self) -> Dict[str, Any]:
|
|
1155
|
+
r"""Navigates the browser forward to the next page in history.
|
|
1156
|
+
|
|
1157
|
+
This function simulates clicking the browser's forward button, taking
|
|
1158
|
+
you to the next page in the browser history if one exists (i.e.,
|
|
1159
|
+
if you have previously navigated back).
|
|
1160
|
+
|
|
1161
|
+
Returns:
|
|
1162
|
+
Dict[str, Any]: A dictionary containing:
|
|
1163
|
+
- "result": A message indicating the outcome of the forward
|
|
1164
|
+
navigation, e.g., "Forward navigation successful." or an
|
|
1165
|
+
error message if no next page exists.
|
|
1166
|
+
- "snapshot": A new textual snapshot of the page after
|
|
1167
|
+
navigation. If the snapshot is unchanged, it will be the
|
|
1168
|
+
string "snapshot not changed".
|
|
1169
|
+
- "tabs": List of all open tabs with their information.
|
|
1170
|
+
- "current_tab": Index of the currently active tab.
|
|
1171
|
+
- "total_tabs": Total number of open tabs.
|
|
1172
|
+
"""
|
|
1173
|
+
page = await self._require_page()
|
|
1174
|
+
|
|
1175
|
+
try:
|
|
1176
|
+
logger.info("Navigating forward in browser history...")
|
|
1177
|
+
nav_start = time.time()
|
|
1178
|
+
await page.go_forward(wait_until="domcontentloaded", timeout=30000)
|
|
1179
|
+
nav_time = time.time() - nav_start
|
|
1180
|
+
logger.info(f"Forward navigation completed in {nav_time:.2f}s")
|
|
1181
|
+
|
|
1182
|
+
# Wait for page stability
|
|
1183
|
+
await self._wait_for_page_stability()
|
|
1184
|
+
|
|
1185
|
+
# Get snapshot
|
|
1186
|
+
logger.info("Capturing page snapshot after forward navigation...")
|
|
1187
|
+
snapshot_start = time.time()
|
|
1188
|
+
snapshot = await self._session.get_snapshot(
|
|
1189
|
+
force_refresh=True, diff_only=False
|
|
1190
|
+
)
|
|
1191
|
+
snapshot_time = time.time() - snapshot_start
|
|
1192
|
+
logger.info(
|
|
1193
|
+
f"Forward navigation snapshot captured in {snapshot_time:.2f}s"
|
|
1194
|
+
)
|
|
1195
|
+
|
|
1196
|
+
# Get tab information
|
|
1197
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1198
|
+
|
|
1199
|
+
return {
|
|
1200
|
+
"result": "Forward navigation successful.",
|
|
1201
|
+
"snapshot": snapshot,
|
|
1202
|
+
**tab_info,
|
|
1203
|
+
}
|
|
1204
|
+
|
|
1205
|
+
except Exception as e:
|
|
1206
|
+
logger.warning(f"Forward navigation failed: {e}")
|
|
1207
|
+
# Get current snapshot even if navigation failed
|
|
1208
|
+
snapshot = await self._session.get_snapshot(
|
|
1209
|
+
force_refresh=True, diff_only=False
|
|
1210
|
+
)
|
|
1211
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1212
|
+
return {
|
|
1213
|
+
"result": f"Forward navigation failed: {e!s}",
|
|
1214
|
+
"snapshot": snapshot,
|
|
1215
|
+
**tab_info,
|
|
1216
|
+
}
|
|
1217
|
+
|
|
1218
|
+
@action_logger
|
|
618
1219
|
async def get_page_snapshot(self) -> str:
|
|
619
1220
|
r"""Captures a textual representation of the current page's content.
|
|
620
1221
|
|
|
@@ -650,6 +1251,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
650
1251
|
)
|
|
651
1252
|
|
|
652
1253
|
@dependencies_required('PIL')
|
|
1254
|
+
@action_logger
|
|
653
1255
|
async def get_som_screenshot(self):
|
|
654
1256
|
r"""Captures a screenshot of the current webpage and visually marks all
|
|
655
1257
|
interactive elements. "SoM" stands for "Set of Marks".
|
|
@@ -733,7 +1335,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
733
1335
|
|
|
734
1336
|
return ToolResult(text=text_result, images=[img_data_url])
|
|
735
1337
|
|
|
736
|
-
async def click(self, *, ref: str) -> Dict[str,
|
|
1338
|
+
async def click(self, *, ref: str) -> Dict[str, Any]:
|
|
737
1339
|
r"""Clicks on an interactive element on the page.
|
|
738
1340
|
|
|
739
1341
|
Args:
|
|
@@ -742,12 +1344,15 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
742
1344
|
`get_som_screenshot`).
|
|
743
1345
|
|
|
744
1346
|
Returns:
|
|
745
|
-
Dict[str,
|
|
1347
|
+
Dict[str, Any]: A dictionary containing:
|
|
746
1348
|
- "result": A message confirming the click action.
|
|
747
1349
|
- "snapshot": A new textual snapshot of the page after the
|
|
748
1350
|
click, which may have changed as a result of the action. If
|
|
749
1351
|
the snapshot is unchanged, it will be the string "snapshot
|
|
750
1352
|
not changed".
|
|
1353
|
+
- "tabs": List of all open tabs with their information.
|
|
1354
|
+
- "current_tab": Index of the currently active tab.
|
|
1355
|
+
- "total_tabs": Total number of open tabs.
|
|
751
1356
|
"""
|
|
752
1357
|
self._validate_ref(ref, "click")
|
|
753
1358
|
|
|
@@ -755,19 +1360,30 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
755
1360
|
elements = analysis.get("elements", {})
|
|
756
1361
|
if ref not in elements:
|
|
757
1362
|
available_refs = list(elements.keys())
|
|
758
|
-
logger.error(
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
)
|
|
1363
|
+
logger.error(f"Error: Element reference '{ref}' not found. ")
|
|
1364
|
+
# Added snapshot to give more context on failure
|
|
1365
|
+
snapshot = self._format_snapshot_from_analysis(analysis)
|
|
1366
|
+
tab_info = await self._get_tab_info_for_output()
|
|
762
1367
|
return {
|
|
763
1368
|
"result": f"Error: Element reference '{ref}' not found. "
|
|
764
|
-
f"Available refs: {available_refs}"
|
|
1369
|
+
f"Available refs: {available_refs}",
|
|
1370
|
+
"snapshot": snapshot,
|
|
1371
|
+
**tab_info,
|
|
765
1372
|
}
|
|
766
1373
|
|
|
1374
|
+
element_details = elements.get(ref)
|
|
767
1375
|
action = {"type": "click", "ref": ref}
|
|
768
|
-
|
|
1376
|
+
result = await self._exec_with_snapshot(
|
|
1377
|
+
action, element_details=element_details
|
|
1378
|
+
)
|
|
1379
|
+
|
|
1380
|
+
# Add tab information to the result
|
|
1381
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1382
|
+
result.update(tab_info)
|
|
769
1383
|
|
|
770
|
-
|
|
1384
|
+
return result
|
|
1385
|
+
|
|
1386
|
+
async def type(self, *, ref: str, text: str) -> Dict[str, Any]:
|
|
771
1387
|
r"""Types text into an input field, such as a textbox or search bar.
|
|
772
1388
|
|
|
773
1389
|
Args:
|
|
@@ -775,18 +1391,27 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
775
1391
|
text (str): The text to be typed into the element.
|
|
776
1392
|
|
|
777
1393
|
Returns:
|
|
778
|
-
Dict[str,
|
|
1394
|
+
Dict[str, Any]: A dictionary containing:
|
|
779
1395
|
- "result": A message confirming the type action.
|
|
780
1396
|
- "snapshot": A new textual snapshot of the page after the
|
|
781
1397
|
text has been entered.
|
|
1398
|
+
- "tabs": List of all open tabs with their information.
|
|
1399
|
+
- "current_tab": Index of the currently active tab.
|
|
1400
|
+
- "total_tabs": Total number of open tabs.
|
|
782
1401
|
"""
|
|
783
1402
|
self._validate_ref(ref, "type")
|
|
784
1403
|
await self._get_unified_analysis() # Ensure aria-ref attributes
|
|
785
1404
|
|
|
786
1405
|
action = {"type": "type", "ref": ref, "text": text}
|
|
787
|
-
|
|
1406
|
+
result = await self._exec_with_snapshot(action)
|
|
1407
|
+
|
|
1408
|
+
# Add tab information to the result
|
|
1409
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1410
|
+
result.update(tab_info)
|
|
1411
|
+
|
|
1412
|
+
return result
|
|
788
1413
|
|
|
789
|
-
async def select(self, *, ref: str, value: str) -> Dict[str,
|
|
1414
|
+
async def select(self, *, ref: str, value: str) -> Dict[str, Any]:
|
|
790
1415
|
r"""Selects an option from a dropdown (`<select>`) element.
|
|
791
1416
|
|
|
792
1417
|
Args:
|
|
@@ -796,17 +1421,26 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
796
1421
|
visible text.
|
|
797
1422
|
|
|
798
1423
|
Returns:
|
|
799
|
-
Dict[str,
|
|
1424
|
+
Dict[str, Any]: A dictionary containing:
|
|
800
1425
|
- "result": A message confirming the select action.
|
|
801
1426
|
- "snapshot": A new snapshot of the page after the selection.
|
|
1427
|
+
- "tabs": List of all open tabs with their information.
|
|
1428
|
+
- "current_tab": Index of the currently active tab.
|
|
1429
|
+
- "total_tabs": Total number of open tabs.
|
|
802
1430
|
"""
|
|
803
1431
|
self._validate_ref(ref, "select")
|
|
804
1432
|
await self._get_unified_analysis()
|
|
805
1433
|
|
|
806
1434
|
action = {"type": "select", "ref": ref, "value": value}
|
|
807
|
-
|
|
1435
|
+
result = await self._exec_with_snapshot(action)
|
|
1436
|
+
|
|
1437
|
+
# Add tab information to the result
|
|
1438
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1439
|
+
result.update(tab_info)
|
|
808
1440
|
|
|
809
|
-
|
|
1441
|
+
return result
|
|
1442
|
+
|
|
1443
|
+
async def scroll(self, *, direction: str, amount: int) -> Dict[str, Any]:
|
|
810
1444
|
r"""Scrolls the page window up or down by a specified amount.
|
|
811
1445
|
|
|
812
1446
|
Args:
|
|
@@ -815,40 +1449,68 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
815
1449
|
amount (int): The number of pixels to scroll.
|
|
816
1450
|
|
|
817
1451
|
Returns:
|
|
818
|
-
Dict[str,
|
|
1452
|
+
Dict[str, Any]: A dictionary containing:
|
|
819
1453
|
- "result": A confirmation of the scroll action.
|
|
820
1454
|
- "snapshot": A new snapshot of the page after scrolling.
|
|
1455
|
+
- "tabs": List of all open tabs with their information.
|
|
1456
|
+
- "current_tab": Index of the currently active tab.
|
|
1457
|
+
- "total_tabs": Total number of open tabs.
|
|
821
1458
|
"""
|
|
822
1459
|
if direction not in ("up", "down"):
|
|
1460
|
+
tab_info = await self._get_tab_info_for_output()
|
|
823
1461
|
return {
|
|
824
1462
|
"result": "Error: direction must be 'up' or 'down'",
|
|
825
1463
|
"snapshot": "",
|
|
1464
|
+
**tab_info,
|
|
826
1465
|
}
|
|
827
1466
|
|
|
828
1467
|
action = {"type": "scroll", "direction": direction, "amount": amount}
|
|
829
|
-
|
|
1468
|
+
result = await self._exec_with_snapshot(action)
|
|
830
1469
|
|
|
831
|
-
|
|
832
|
-
|
|
1470
|
+
# Add tab information to the result
|
|
1471
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1472
|
+
result.update(tab_info)
|
|
833
1473
|
|
|
834
|
-
|
|
1474
|
+
return result
|
|
835
1475
|
|
|
836
|
-
|
|
837
|
-
|
|
1476
|
+
async def enter(self) -> Dict[str, Any]:
|
|
1477
|
+
r"""Simulates pressing the Enter key on the currently focused element.
|
|
1478
|
+
|
|
1479
|
+
This tool is used to execute or confirm an action after interacting
|
|
1480
|
+
with
|
|
1481
|
+
an element, such as:
|
|
1482
|
+
- Submitting a search query after typing in a search box.
|
|
1483
|
+
- Confirming a form submission.
|
|
1484
|
+
- Executing a command in a text input field.
|
|
1485
|
+
|
|
1486
|
+
The common usage pattern is to first use the 'type' tool to input
|
|
1487
|
+
text, which sets the focus, and then call 'enter' without any
|
|
1488
|
+
parameters to trigger the action.
|
|
838
1489
|
|
|
839
1490
|
Returns:
|
|
840
|
-
Dict[str,
|
|
841
|
-
- "result": A confirmation of the action.
|
|
1491
|
+
Dict[str, Any]: A dictionary containing:
|
|
1492
|
+
- "result": A confirmation of the Enter key action.
|
|
842
1493
|
- "snapshot": A new page snapshot, as this action often
|
|
843
1494
|
triggers navigation or page updates.
|
|
1495
|
+
- "tabs": List of all open tabs with their information.
|
|
1496
|
+
- "current_tab": Index of the currently active tab.
|
|
1497
|
+
- "total_tabs": Total number of open tabs.
|
|
844
1498
|
"""
|
|
845
|
-
|
|
846
|
-
action = {"type": "enter"
|
|
847
|
-
|
|
1499
|
+
# Always press Enter on the currently focused element
|
|
1500
|
+
action = {"type": "enter"}
|
|
1501
|
+
|
|
1502
|
+
result = await self._exec_with_snapshot(action)
|
|
1503
|
+
|
|
1504
|
+
# Add tab information to the result
|
|
1505
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1506
|
+
result.update(tab_info)
|
|
1507
|
+
|
|
1508
|
+
return result
|
|
848
1509
|
|
|
1510
|
+
@action_logger
|
|
849
1511
|
async def wait_user(
|
|
850
1512
|
self, timeout_sec: Optional[float] = None
|
|
851
|
-
) -> Dict[str,
|
|
1513
|
+
) -> Dict[str, Any]:
|
|
852
1514
|
r"""Pauses the agent's execution and waits for human intervention.
|
|
853
1515
|
|
|
854
1516
|
This is useful for tasks that require manual steps, like solving a
|
|
@@ -861,10 +1523,13 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
861
1523
|
automatically. If `None`, it will wait indefinitely.
|
|
862
1524
|
|
|
863
1525
|
Returns:
|
|
864
|
-
Dict[str,
|
|
1526
|
+
Dict[str, Any]: A dictionary containing:
|
|
865
1527
|
- "result": A message indicating how the wait ended (e.g.,
|
|
866
1528
|
"User resumed." or "Timeout... reached, auto-resumed.").
|
|
867
1529
|
- "snapshot": The current page snapshot after the wait.
|
|
1530
|
+
- "tabs": List of all open tabs with their information.
|
|
1531
|
+
- "current_tab": Index of the currently active tab.
|
|
1532
|
+
- "total_tabs": Total number of open tabs.
|
|
868
1533
|
"""
|
|
869
1534
|
import asyncio
|
|
870
1535
|
|
|
@@ -905,8 +1570,11 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
905
1570
|
snapshot = await self._session.get_snapshot(
|
|
906
1571
|
force_refresh=True, diff_only=False
|
|
907
1572
|
)
|
|
908
|
-
|
|
1573
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1574
|
+
|
|
1575
|
+
return {"result": result_msg, "snapshot": snapshot, **tab_info}
|
|
909
1576
|
|
|
1577
|
+
@action_logger
|
|
910
1578
|
async def get_page_links(self, *, ref: List[str]) -> Dict[str, Any]:
|
|
911
1579
|
r"""Retrieves the full URLs for a given list of link reference IDs.
|
|
912
1580
|
|
|
@@ -938,6 +1606,7 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
938
1606
|
|
|
939
1607
|
return {"links": links}
|
|
940
1608
|
|
|
1609
|
+
@action_logger
|
|
941
1610
|
async def solve_task(
|
|
942
1611
|
self, task_prompt: str, start_url: str, max_steps: int = 15
|
|
943
1612
|
) -> str:
|
|
@@ -967,6 +1636,48 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
967
1636
|
await agent.process_command(task_prompt, max_steps=max_steps)
|
|
968
1637
|
return "Task processing finished - see stdout for detailed trace."
|
|
969
1638
|
|
|
1639
|
+
def get_log_summary(self) -> Dict[str, Any]:
|
|
1640
|
+
r"""Get a summary of logged actions."""
|
|
1641
|
+
if not self.log_buffer:
|
|
1642
|
+
return {"total_actions": 0, "summary": "No actions logged"}
|
|
1643
|
+
|
|
1644
|
+
total_actions = len(self.log_buffer)
|
|
1645
|
+
total_execution_time = sum(
|
|
1646
|
+
entry.get("execution_time_ms", 0) for entry in self.log_buffer
|
|
1647
|
+
)
|
|
1648
|
+
total_page_load_time = sum(
|
|
1649
|
+
entry.get("page_load_time_ms", 0)
|
|
1650
|
+
for entry in self.log_buffer
|
|
1651
|
+
if "page_load_time_ms" in entry
|
|
1652
|
+
)
|
|
1653
|
+
|
|
1654
|
+
action_counts: Dict[str, int] = {}
|
|
1655
|
+
error_count = 0
|
|
1656
|
+
|
|
1657
|
+
for entry in self.log_buffer:
|
|
1658
|
+
action = entry["action"]
|
|
1659
|
+
action_counts[action] = action_counts.get(action, 0) + 1
|
|
1660
|
+
if "error" in entry:
|
|
1661
|
+
error_count += 1
|
|
1662
|
+
|
|
1663
|
+
return {
|
|
1664
|
+
"total_actions": total_actions,
|
|
1665
|
+
"total_execution_time_ms": round(total_execution_time, 2),
|
|
1666
|
+
"total_page_load_time_ms": round(total_page_load_time, 2),
|
|
1667
|
+
"action_counts": action_counts,
|
|
1668
|
+
"error_count": error_count,
|
|
1669
|
+
"success_rate": round(
|
|
1670
|
+
(total_actions - error_count) / total_actions * 100, 2
|
|
1671
|
+
)
|
|
1672
|
+
if total_actions > 0
|
|
1673
|
+
else 0,
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
def clear_logs(self) -> None:
|
|
1677
|
+
r"""Clear the log buffer."""
|
|
1678
|
+
self.log_buffer.clear()
|
|
1679
|
+
logger.info("Log buffer cleared")
|
|
1680
|
+
|
|
970
1681
|
def get_tools(self) -> List[FunctionTool]:
|
|
971
1682
|
r"""Get available function tools
|
|
972
1683
|
based on enabled_tools configuration."""
|
|
@@ -975,6 +1686,8 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
975
1686
|
"open_browser": self.open_browser,
|
|
976
1687
|
"close_browser": self.close_browser,
|
|
977
1688
|
"visit_page": self.visit_page,
|
|
1689
|
+
"back": self.back,
|
|
1690
|
+
"forward": self.forward,
|
|
978
1691
|
"get_page_snapshot": self.get_page_snapshot,
|
|
979
1692
|
"get_som_screenshot": self.get_som_screenshot,
|
|
980
1693
|
"get_page_links": self.get_page_links,
|
|
@@ -985,6 +1698,9 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
985
1698
|
"enter": self.enter,
|
|
986
1699
|
"wait_user": self.wait_user,
|
|
987
1700
|
"solve_task": self.solve_task,
|
|
1701
|
+
"switch_tab": self.switch_tab,
|
|
1702
|
+
"close_tab": self.close_tab,
|
|
1703
|
+
"get_tab_info": self.get_tab_info,
|
|
988
1704
|
}
|
|
989
1705
|
|
|
990
1706
|
enabled_tools = []
|
|
@@ -998,11 +1714,165 @@ class HybridBrowserToolkit(BaseToolkit):
|
|
|
998
1714
|
continue
|
|
999
1715
|
|
|
1000
1716
|
if tool_name in tool_map:
|
|
1001
|
-
|
|
1002
|
-
|
|
1717
|
+
tool = FunctionTool(
|
|
1718
|
+
cast(Callable[..., Any], tool_map[tool_name])
|
|
1003
1719
|
)
|
|
1720
|
+
enabled_tools.append(tool)
|
|
1004
1721
|
else:
|
|
1005
1722
|
logger.warning(f"Unknown tool name: {tool_name}")
|
|
1006
1723
|
|
|
1007
1724
|
logger.info(f"Returning {len(enabled_tools)} enabled tools")
|
|
1008
1725
|
return enabled_tools
|
|
1726
|
+
|
|
1727
|
+
def clone_for_new_session(
|
|
1728
|
+
self, new_session_id: Optional[str] = None
|
|
1729
|
+
) -> "HybridBrowserToolkit":
|
|
1730
|
+
r"""Create a new instance of HybridBrowserToolkit with a unique
|
|
1731
|
+
session.
|
|
1732
|
+
|
|
1733
|
+
Args:
|
|
1734
|
+
new_session_id: Optional new session ID. If None, a UUID will be
|
|
1735
|
+
generated.
|
|
1736
|
+
|
|
1737
|
+
Returns:
|
|
1738
|
+
A new HybridBrowserToolkit instance with the same configuration
|
|
1739
|
+
but a different session.
|
|
1740
|
+
"""
|
|
1741
|
+
import uuid
|
|
1742
|
+
|
|
1743
|
+
if new_session_id is None:
|
|
1744
|
+
new_session_id = str(uuid.uuid4())[:8]
|
|
1745
|
+
|
|
1746
|
+
return HybridBrowserToolkit(
|
|
1747
|
+
headless=self._headless,
|
|
1748
|
+
user_data_dir=self._user_data_dir,
|
|
1749
|
+
stealth=self._session._stealth if self._session else False,
|
|
1750
|
+
web_agent_model=self.web_agent_model,
|
|
1751
|
+
cache_dir=f"{self.cache_dir.rstrip('/')}_clone_{new_session_id}/",
|
|
1752
|
+
enabled_tools=self.enabled_tools.copy(),
|
|
1753
|
+
browser_log_to_file=self.log_to_file,
|
|
1754
|
+
session_id=new_session_id,
|
|
1755
|
+
default_start_url=self.default_start_url,
|
|
1756
|
+
)
|
|
1757
|
+
|
|
1758
|
+
@action_logger
|
|
1759
|
+
async def switch_tab(self, *, tab_index: int) -> Dict[str, Any]:
|
|
1760
|
+
r"""Switches to a specific browser tab by its index.
|
|
1761
|
+
|
|
1762
|
+
This allows you to control which tab is currently active. After
|
|
1763
|
+
switching, all subsequent browser actions will operate on the newly
|
|
1764
|
+
selected tab.
|
|
1765
|
+
|
|
1766
|
+
Args:
|
|
1767
|
+
tab_index (int): The zero-based index of the tab to switch to.
|
|
1768
|
+
Use `get_tab_info` to see available tabs and their indices.
|
|
1769
|
+
|
|
1770
|
+
Returns:
|
|
1771
|
+
Dict[str, Any]: A dictionary containing:
|
|
1772
|
+
- "result": A message indicating success or failure of the
|
|
1773
|
+
tab switch.
|
|
1774
|
+
- "snapshot": A textual snapshot of the newly active tab's
|
|
1775
|
+
content.
|
|
1776
|
+
- "tabs": List of all open tabs with their information.
|
|
1777
|
+
- "current_tab": Index of the currently active tab.
|
|
1778
|
+
- "total_tabs": Total number of open tabs.
|
|
1779
|
+
"""
|
|
1780
|
+
await self._ensure_browser()
|
|
1781
|
+
session = await self._get_session()
|
|
1782
|
+
|
|
1783
|
+
success = await session.switch_to_tab(tab_index)
|
|
1784
|
+
|
|
1785
|
+
if success:
|
|
1786
|
+
snapshot = await session.get_snapshot(
|
|
1787
|
+
force_refresh=True, diff_only=False
|
|
1788
|
+
)
|
|
1789
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1790
|
+
|
|
1791
|
+
result = {
|
|
1792
|
+
"result": f"Successfully switched to tab {tab_index}",
|
|
1793
|
+
"snapshot": snapshot,
|
|
1794
|
+
**tab_info,
|
|
1795
|
+
}
|
|
1796
|
+
else:
|
|
1797
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1798
|
+
result = {
|
|
1799
|
+
"result": f"Failed to switch to tab {tab_index}. Tab may not "
|
|
1800
|
+
f"exist.",
|
|
1801
|
+
"snapshot": "",
|
|
1802
|
+
**tab_info,
|
|
1803
|
+
}
|
|
1804
|
+
|
|
1805
|
+
return result
|
|
1806
|
+
|
|
1807
|
+
@action_logger
|
|
1808
|
+
async def close_tab(self, *, tab_index: int) -> Dict[str, Any]:
|
|
1809
|
+
r"""Closes a specific browser tab by its index.
|
|
1810
|
+
|
|
1811
|
+
After closing a tab, the browser will automatically switch to another
|
|
1812
|
+
available tab. If the closed tab was the only one open, the browser
|
|
1813
|
+
session will remain active but without any pages.
|
|
1814
|
+
|
|
1815
|
+
Args:
|
|
1816
|
+
tab_index (int): The zero-based index of the tab to close.
|
|
1817
|
+
|
|
1818
|
+
Returns:
|
|
1819
|
+
Dict[str, Any]: A dictionary containing:
|
|
1820
|
+
- "result": A message indicating success or failure of the
|
|
1821
|
+
tab closure.
|
|
1822
|
+
- "snapshot": A textual snapshot of the currently active tab
|
|
1823
|
+
after the closure (empty if no tabs remain).
|
|
1824
|
+
- "tabs": List of remaining open tabs.
|
|
1825
|
+
- "current_tab": Index of the currently active tab.
|
|
1826
|
+
- "total_tabs": Total number of remaining open tabs.
|
|
1827
|
+
"""
|
|
1828
|
+
await self._ensure_browser()
|
|
1829
|
+
session = await self._get_session()
|
|
1830
|
+
|
|
1831
|
+
success = await session.close_tab(tab_index)
|
|
1832
|
+
|
|
1833
|
+
if success:
|
|
1834
|
+
# Get current state after closing the tab
|
|
1835
|
+
try:
|
|
1836
|
+
snapshot = await session.get_snapshot(
|
|
1837
|
+
force_refresh=True, diff_only=False
|
|
1838
|
+
)
|
|
1839
|
+
except Exception:
|
|
1840
|
+
snapshot = "" # No active tab
|
|
1841
|
+
|
|
1842
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1843
|
+
|
|
1844
|
+
result = {
|
|
1845
|
+
"result": f"Successfully closed tab {tab_index}",
|
|
1846
|
+
"snapshot": snapshot,
|
|
1847
|
+
**tab_info,
|
|
1848
|
+
}
|
|
1849
|
+
else:
|
|
1850
|
+
tab_info = await self._get_tab_info_for_output()
|
|
1851
|
+
result = {
|
|
1852
|
+
"result": f"Failed to close tab {tab_index}. Tab may not "
|
|
1853
|
+
f"exist.",
|
|
1854
|
+
"snapshot": "",
|
|
1855
|
+
**tab_info,
|
|
1856
|
+
}
|
|
1857
|
+
|
|
1858
|
+
return result
|
|
1859
|
+
|
|
1860
|
+
@action_logger
|
|
1861
|
+
async def get_tab_info(self) -> Dict[str, Any]:
|
|
1862
|
+
r"""Retrieves information about all currently open browser tabs.
|
|
1863
|
+
|
|
1864
|
+
This provides a comprehensive overview of the browser state, including
|
|
1865
|
+
all open tabs, their titles, URLs, and which one is currently active.
|
|
1866
|
+
|
|
1867
|
+
Returns:
|
|
1868
|
+
Dict[str, Any]: A dictionary containing:
|
|
1869
|
+
- "tabs": A list of dictionaries, each representing a tab with:
|
|
1870
|
+
- "index": The zero-based index of the tab
|
|
1871
|
+
- "title": The page title
|
|
1872
|
+
- "url": The current URL
|
|
1873
|
+
- "is_current": Whether this is the currently active tab
|
|
1874
|
+
- "current_tab": Index of the currently active tab
|
|
1875
|
+
- "total_tabs": Total number of open tabs
|
|
1876
|
+
"""
|
|
1877
|
+
await self._ensure_browser()
|
|
1878
|
+
return await self._get_tab_info_for_output()
|