camel-ai 0.2.71a5__py3-none-any.whl → 0.2.71a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +51 -1
- camel/societies/workforce/role_playing_worker.py +64 -7
- camel/societies/workforce/single_agent_worker.py +71 -18
- camel/societies/workforce/structured_output_handler.py +500 -0
- camel/societies/workforce/workforce.py +309 -130
- camel/tasks/task.py +1 -1
- camel/toolkits/hybrid_browser_toolkit/actions.py +235 -60
- camel/toolkits/hybrid_browser_toolkit/agent.py +25 -8
- camel/toolkits/hybrid_browser_toolkit/browser_session.py +574 -164
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +996 -126
- camel/toolkits/hybrid_browser_toolkit/stealth_config.py +116 -0
- camel/toolkits/hybrid_browser_toolkit/stealth_script.js +0 -0
- camel/toolkits/note_taking_toolkit.py +7 -13
- {camel_ai-0.2.71a5.dist-info → camel_ai-0.2.71a6.dist-info}/METADATA +1 -1
- {camel_ai-0.2.71a5.dist-info → camel_ai-0.2.71a6.dist-info}/RECORD +18 -15
- {camel_ai-0.2.71a5.dist-info → camel_ai-0.2.71a6.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.71a5.dist-info → camel_ai-0.2.71a6.dist-info}/licenses/LICENSE +0 -0
|
@@ -14,13 +14,13 @@
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
|
-
from
|
|
18
|
-
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional
|
|
17
|
+
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Optional, Tuple
|
|
19
18
|
|
|
20
19
|
from camel.logger import get_logger
|
|
21
20
|
|
|
22
21
|
from .actions import ActionExecutor
|
|
23
22
|
from .snapshot import PageSnapshot
|
|
23
|
+
from .stealth_config import StealthConfig
|
|
24
24
|
|
|
25
25
|
if TYPE_CHECKING:
|
|
26
26
|
from playwright.async_api import (
|
|
@@ -30,42 +30,102 @@ if TYPE_CHECKING:
|
|
|
30
30
|
Playwright,
|
|
31
31
|
)
|
|
32
32
|
|
|
33
|
-
|
|
34
33
|
logger = get_logger(__name__)
|
|
35
34
|
|
|
36
35
|
|
|
37
|
-
class
|
|
38
|
-
"""Lightweight wrapper around Playwright for
|
|
39
|
-
browsing.
|
|
36
|
+
class HybridBrowserSession:
|
|
37
|
+
"""Lightweight wrapper around Playwright for
|
|
38
|
+
browsing with multi-tab support.
|
|
40
39
|
|
|
41
|
-
It provides
|
|
40
|
+
It provides multiple *Page* instances plus helper utilities (snapshot &
|
|
42
41
|
executor). Multiple toolkits or agents can reuse this class without
|
|
43
42
|
duplicating Playwright setup code.
|
|
44
43
|
|
|
45
|
-
This class is a singleton per event-loop.
|
|
44
|
+
This class is a singleton per event-loop and session-id combination.
|
|
46
45
|
"""
|
|
47
46
|
|
|
48
47
|
# Configuration constants
|
|
49
48
|
DEFAULT_NAVIGATION_TIMEOUT = 10000 # 10 seconds
|
|
50
49
|
NETWORK_IDLE_TIMEOUT = 5000 # 5 seconds
|
|
51
50
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
] = {}
|
|
51
|
+
# Class-level registry for singleton instances
|
|
52
|
+
# Format: {(loop_id, session_id): HybridBrowserSession}
|
|
53
|
+
_instances: ClassVar[Dict[Tuple[Any, str], "HybridBrowserSession"]] = {}
|
|
54
|
+
_instances_lock: ClassVar[asyncio.Lock] = asyncio.Lock()
|
|
55
55
|
|
|
56
56
|
_initialized: bool
|
|
57
|
+
_creation_params: Dict[str, Any]
|
|
57
58
|
|
|
58
59
|
def __new__(
|
|
59
|
-
cls,
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
60
|
+
cls,
|
|
61
|
+
*,
|
|
62
|
+
headless: bool = True,
|
|
63
|
+
user_data_dir: Optional[str] = None,
|
|
64
|
+
stealth: bool = False,
|
|
65
|
+
session_id: Optional[str] = None,
|
|
66
|
+
) -> "HybridBrowserSession":
|
|
67
|
+
# Create a unique key for this event loop and session combination
|
|
68
|
+
# We defer the event loop lookup to avoid issues with creation
|
|
69
|
+
# outside async context
|
|
63
70
|
instance = super().__new__(cls)
|
|
64
71
|
instance._initialized = False
|
|
72
|
+
instance._session_id = session_id or "default"
|
|
73
|
+
instance._creation_params = {
|
|
74
|
+
"headless": headless,
|
|
75
|
+
"user_data_dir": user_data_dir,
|
|
76
|
+
"stealth": stealth,
|
|
77
|
+
"session_id": session_id,
|
|
78
|
+
}
|
|
65
79
|
return instance
|
|
66
80
|
|
|
81
|
+
@classmethod
|
|
82
|
+
async def _get_or_create_instance(
|
|
83
|
+
cls,
|
|
84
|
+
instance: "HybridBrowserSession",
|
|
85
|
+
) -> "HybridBrowserSession":
|
|
86
|
+
"""Get or create singleton instance for the current event loop and
|
|
87
|
+
session."""
|
|
88
|
+
try:
|
|
89
|
+
loop = asyncio.get_running_loop()
|
|
90
|
+
loop_id = str(id(loop))
|
|
91
|
+
except RuntimeError:
|
|
92
|
+
# No event loop running, use a unique identifier for sync context
|
|
93
|
+
import threading
|
|
94
|
+
|
|
95
|
+
loop_id = f"sync_{threading.current_thread().ident}"
|
|
96
|
+
|
|
97
|
+
# Ensure session_id is never None for the key
|
|
98
|
+
session_id = (
|
|
99
|
+
instance._session_id
|
|
100
|
+
if instance._session_id is not None
|
|
101
|
+
else "default"
|
|
102
|
+
)
|
|
103
|
+
session_key = (loop_id, session_id)
|
|
104
|
+
|
|
105
|
+
# Use class-level lock to protect the instances registry
|
|
106
|
+
async with cls._instances_lock:
|
|
107
|
+
if session_key in cls._instances:
|
|
108
|
+
existing_instance = cls._instances[session_key]
|
|
109
|
+
logger.debug(
|
|
110
|
+
f"Reusing existing browser session for session_id: "
|
|
111
|
+
f"{session_id}"
|
|
112
|
+
)
|
|
113
|
+
return existing_instance
|
|
114
|
+
|
|
115
|
+
# Register this new instance
|
|
116
|
+
cls._instances[session_key] = instance
|
|
117
|
+
logger.debug(
|
|
118
|
+
f"Created new browser session for session_id: {session_id}"
|
|
119
|
+
)
|
|
120
|
+
return instance
|
|
121
|
+
|
|
67
122
|
def __init__(
|
|
68
|
-
self,
|
|
123
|
+
self,
|
|
124
|
+
*,
|
|
125
|
+
headless: bool = True,
|
|
126
|
+
user_data_dir: Optional[str] = None,
|
|
127
|
+
stealth: bool = False,
|
|
128
|
+
session_id: Optional[str] = None,
|
|
69
129
|
):
|
|
70
130
|
if self._initialized:
|
|
71
131
|
return
|
|
@@ -73,66 +133,324 @@ class NVBrowserSession:
|
|
|
73
133
|
|
|
74
134
|
self._headless = headless
|
|
75
135
|
self._user_data_dir = user_data_dir
|
|
136
|
+
self._stealth = stealth
|
|
137
|
+
self._session_id = session_id or "default"
|
|
138
|
+
|
|
139
|
+
# Initialize _creation_params to fix linter error
|
|
140
|
+
self._creation_params = {
|
|
141
|
+
"headless": headless,
|
|
142
|
+
"user_data_dir": user_data_dir,
|
|
143
|
+
"stealth": stealth,
|
|
144
|
+
"session_id": session_id,
|
|
145
|
+
}
|
|
76
146
|
|
|
77
147
|
self._playwright: Optional[Playwright] = None
|
|
78
148
|
self._browser: Optional[Browser] = None
|
|
79
149
|
self._context: Optional[BrowserContext] = None
|
|
80
150
|
self._page: Optional[Page] = None
|
|
81
151
|
|
|
152
|
+
# Multi-tab support
|
|
153
|
+
self._pages: List[Page] = [] # All tabs
|
|
154
|
+
self._current_tab_index: int = 0 # Current active tab index
|
|
155
|
+
|
|
82
156
|
self.snapshot: Optional[PageSnapshot] = None
|
|
83
157
|
self.executor: Optional[ActionExecutor] = None
|
|
84
158
|
|
|
85
159
|
# Protect browser initialisation against concurrent calls
|
|
86
160
|
self._ensure_lock: "asyncio.Lock" = asyncio.Lock()
|
|
87
161
|
|
|
162
|
+
# Load stealth script and config on initialization
|
|
163
|
+
self._stealth_script: Optional[str] = None
|
|
164
|
+
self._stealth_config: Optional[Dict[str, Any]] = None
|
|
165
|
+
if self._stealth:
|
|
166
|
+
self._stealth_script = self._load_stealth_script()
|
|
167
|
+
self._stealth_config = StealthConfig.get_all_config()
|
|
168
|
+
|
|
169
|
+
def _load_stealth_script(self) -> str:
|
|
170
|
+
r"""Load the stealth JavaScript script from file."""
|
|
171
|
+
import os
|
|
172
|
+
|
|
173
|
+
script_path = os.path.join(
|
|
174
|
+
os.path.dirname(os.path.abspath(__file__)), "stealth_script.js"
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
try:
|
|
178
|
+
with open(
|
|
179
|
+
script_path, "r", encoding='utf-8', errors='replace'
|
|
180
|
+
) as f:
|
|
181
|
+
script_content = f.read()
|
|
182
|
+
|
|
183
|
+
if not script_content.strip():
|
|
184
|
+
raise ValueError(f"Stealth script is empty: {script_path}")
|
|
185
|
+
|
|
186
|
+
logger.debug(
|
|
187
|
+
f"Loaded stealth script ({len(script_content)} chars)"
|
|
188
|
+
)
|
|
189
|
+
return script_content
|
|
190
|
+
except FileNotFoundError:
|
|
191
|
+
logger.error(f"Stealth script not found: {script_path}")
|
|
192
|
+
raise FileNotFoundError(f"Stealth script not found: {script_path}")
|
|
193
|
+
except Exception as e:
|
|
194
|
+
logger.error(f"Error loading stealth script: {e}")
|
|
195
|
+
raise RuntimeError(f"Failed to load stealth script: {e}") from e
|
|
196
|
+
|
|
88
197
|
# ------------------------------------------------------------------
|
|
89
|
-
#
|
|
198
|
+
# Multi-tab management methods
|
|
90
199
|
# ------------------------------------------------------------------
|
|
91
|
-
async def
|
|
92
|
-
r"""
|
|
93
|
-
|
|
200
|
+
async def create_new_tab(self, url: Optional[str] = None) -> int:
|
|
201
|
+
r"""Create a new tab and optionally navigate to a URL.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
url: Optional URL to navigate to in the new tab
|
|
205
|
+
|
|
206
|
+
Returns:
|
|
207
|
+
int: Index of the newly created tab
|
|
94
208
|
"""
|
|
95
|
-
|
|
209
|
+
await self.ensure_browser()
|
|
210
|
+
|
|
211
|
+
if self._context is None:
|
|
212
|
+
raise RuntimeError("Browser context is not available")
|
|
213
|
+
|
|
214
|
+
# Create new page
|
|
215
|
+
new_page = await self._context.new_page()
|
|
216
|
+
|
|
217
|
+
# Apply stealth modifications if enabled
|
|
218
|
+
if self._stealth and self._stealth_script:
|
|
219
|
+
try:
|
|
220
|
+
await new_page.add_init_script(self._stealth_script)
|
|
221
|
+
logger.debug("Applied stealth script to new tab")
|
|
222
|
+
except Exception as e:
|
|
223
|
+
logger.warning(
|
|
224
|
+
f"Failed to apply stealth script to new tab: {e}"
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Add to our pages list
|
|
228
|
+
self._pages.append(new_page)
|
|
229
|
+
new_tab_index = len(self._pages) - 1
|
|
230
|
+
|
|
231
|
+
# Navigate if URL provided
|
|
232
|
+
if url:
|
|
233
|
+
try:
|
|
234
|
+
await new_page.goto(
|
|
235
|
+
url, timeout=self.DEFAULT_NAVIGATION_TIMEOUT
|
|
236
|
+
)
|
|
237
|
+
await new_page.wait_for_load_state('domcontentloaded')
|
|
238
|
+
except Exception as e:
|
|
239
|
+
logger.warning(f"Failed to navigate new tab to {url}: {e}")
|
|
240
|
+
|
|
241
|
+
logger.info(
|
|
242
|
+
f"Created new tab {new_tab_index}, total tabs: {len(self._pages)}"
|
|
243
|
+
)
|
|
244
|
+
return new_tab_index
|
|
245
|
+
|
|
246
|
+
async def register_page(self, new_page: "Page") -> int:
|
|
247
|
+
r"""Register a page that was created externally (e.g., by a click).
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
new_page (Page): The new page object to register.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
int: The index of the (newly) registered tab.
|
|
254
|
+
"""
|
|
255
|
+
if new_page in self._pages:
|
|
256
|
+
try:
|
|
257
|
+
# Page is already known, just return its index
|
|
258
|
+
return self._pages.index(new_page)
|
|
259
|
+
except ValueError:
|
|
260
|
+
# Should not happen if `in` check passed, but handle anyway
|
|
261
|
+
pass
|
|
262
|
+
|
|
263
|
+
# Add new page to our list
|
|
264
|
+
self._pages.append(new_page)
|
|
265
|
+
new_tab_index = len(self._pages) - 1
|
|
266
|
+
logger.info(
|
|
267
|
+
f"Registered new tab {new_tab_index} (opened by user action). "
|
|
268
|
+
f"Total tabs: {len(self._pages)}"
|
|
269
|
+
)
|
|
270
|
+
return new_tab_index
|
|
271
|
+
|
|
272
|
+
async def switch_to_tab(self, tab_index: int) -> bool:
|
|
273
|
+
r"""Switch to a specific tab by index.
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
tab_index: Index of the tab to switch to
|
|
277
|
+
|
|
278
|
+
Returns:
|
|
279
|
+
bool: True if successful, False if tab index is invalid
|
|
280
|
+
"""
|
|
281
|
+
# Use a more robust bounds check to prevent race conditions
|
|
96
282
|
try:
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
283
|
+
if not self._pages:
|
|
284
|
+
logger.warning("No tabs available")
|
|
285
|
+
return False
|
|
286
|
+
|
|
287
|
+
# Capture current state to avoid race conditions
|
|
288
|
+
current_pages = self._pages.copy()
|
|
289
|
+
pages_count = len(current_pages)
|
|
290
|
+
|
|
291
|
+
if tab_index < 0 or tab_index >= pages_count:
|
|
292
|
+
logger.warning(
|
|
293
|
+
f"Invalid tab index {tab_index}, available "
|
|
294
|
+
f"tabs: {pages_count}"
|
|
295
|
+
)
|
|
296
|
+
return False
|
|
297
|
+
|
|
298
|
+
# Check if the page is still valid
|
|
299
|
+
page = current_pages[tab_index]
|
|
300
|
+
if page.is_closed():
|
|
301
|
+
logger.warning(
|
|
302
|
+
f"Tab {tab_index} is closed, removing from list"
|
|
303
|
+
)
|
|
304
|
+
# Remove closed page from original list
|
|
109
305
|
if (
|
|
110
|
-
|
|
111
|
-
and
|
|
112
|
-
and existing._playwright is not None
|
|
306
|
+
tab_index < len(self._pages)
|
|
307
|
+
and self._pages[tab_index] is page
|
|
113
308
|
):
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
self.
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
309
|
+
self._pages.pop(tab_index)
|
|
310
|
+
# Adjust current tab index if necessary
|
|
311
|
+
if self._current_tab_index >= len(self._pages):
|
|
312
|
+
self._current_tab_index = max(0, len(self._pages) - 1)
|
|
313
|
+
return False
|
|
314
|
+
|
|
315
|
+
self._current_tab_index = tab_index
|
|
316
|
+
self._page = page
|
|
317
|
+
|
|
318
|
+
# Bring the tab to the front in the browser window
|
|
319
|
+
await self._page.bring_to_front()
|
|
320
|
+
|
|
321
|
+
# Update executor and snapshot for new tab
|
|
322
|
+
self.executor = ActionExecutor(self._page, self)
|
|
323
|
+
self.snapshot = PageSnapshot(self._page)
|
|
324
|
+
|
|
325
|
+
logger.info(f"Switched to tab {tab_index}")
|
|
326
|
+
return True
|
|
327
|
+
|
|
328
|
+
except Exception as e:
|
|
329
|
+
logger.warning(f"Error switching to tab {tab_index}: {e}")
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
async def close_tab(self, tab_index: int) -> bool:
|
|
333
|
+
r"""Close a specific tab.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
tab_index: Index of the tab to close
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
bool: True if successful, False if tab index is invalid
|
|
340
|
+
"""
|
|
341
|
+
if not self._pages or tab_index < 0 or tab_index >= len(self._pages):
|
|
342
|
+
return False
|
|
343
|
+
|
|
344
|
+
try:
|
|
345
|
+
page = self._pages[tab_index]
|
|
346
|
+
if not page.is_closed():
|
|
347
|
+
await page.close()
|
|
348
|
+
|
|
349
|
+
# Remove from our list
|
|
350
|
+
self._pages.pop(tab_index)
|
|
351
|
+
|
|
352
|
+
# If we closed the current tab, switch to another one
|
|
353
|
+
if tab_index == self._current_tab_index:
|
|
354
|
+
if self._pages:
|
|
355
|
+
# Switch to the previous tab, or first tab if we closed
|
|
356
|
+
# the first one
|
|
357
|
+
new_index = max(
|
|
358
|
+
0, min(tab_index - 1, len(self._pages) - 1)
|
|
359
|
+
)
|
|
360
|
+
await self.switch_to_tab(new_index)
|
|
361
|
+
else:
|
|
362
|
+
# No tabs left
|
|
363
|
+
self._current_tab_index = 0
|
|
364
|
+
self._page = None
|
|
365
|
+
self.executor = None
|
|
366
|
+
self.snapshot = None
|
|
367
|
+
elif tab_index < self._current_tab_index:
|
|
368
|
+
# Adjust current tab index since we removed a tab before it
|
|
369
|
+
self._current_tab_index -= 1
|
|
370
|
+
|
|
371
|
+
logger.info(
|
|
372
|
+
f"Closed tab {tab_index}, remaining tabs: {len(self._pages)}"
|
|
373
|
+
)
|
|
374
|
+
return True
|
|
375
|
+
|
|
376
|
+
except Exception as e:
|
|
377
|
+
logger.warning(f"Error closing tab {tab_index}: {e}")
|
|
378
|
+
return False
|
|
379
|
+
|
|
380
|
+
async def get_tab_info(self) -> List[Dict[str, Any]]:
|
|
381
|
+
r"""Get information about all open tabs.
|
|
382
|
+
|
|
383
|
+
Returns:
|
|
384
|
+
List of dictionaries containing tab information
|
|
385
|
+
"""
|
|
386
|
+
tab_info = []
|
|
387
|
+
for i, page in enumerate(self._pages):
|
|
388
|
+
try:
|
|
389
|
+
if not page.is_closed():
|
|
390
|
+
title = await page.title()
|
|
391
|
+
url = page.url
|
|
392
|
+
is_current = i == self._current_tab_index
|
|
393
|
+
tab_info.append(
|
|
394
|
+
{
|
|
395
|
+
"index": i,
|
|
396
|
+
"title": title,
|
|
397
|
+
"url": url,
|
|
398
|
+
"is_current": is_current,
|
|
399
|
+
}
|
|
400
|
+
)
|
|
401
|
+
else:
|
|
402
|
+
# Mark closed tab for removal
|
|
403
|
+
tab_info.append(
|
|
404
|
+
{
|
|
405
|
+
"index": i,
|
|
406
|
+
"title": "[CLOSED]",
|
|
407
|
+
"url": "",
|
|
408
|
+
"is_current": False,
|
|
409
|
+
}
|
|
410
|
+
)
|
|
411
|
+
except Exception as e:
|
|
412
|
+
logger.warning(f"Error getting info for tab {i}: {e}")
|
|
413
|
+
tab_info.append(
|
|
414
|
+
{
|
|
415
|
+
"index": i,
|
|
416
|
+
"title": "[ERROR]",
|
|
417
|
+
"url": "",
|
|
418
|
+
"is_current": False,
|
|
419
|
+
}
|
|
420
|
+
)
|
|
421
|
+
|
|
422
|
+
return tab_info
|
|
423
|
+
|
|
424
|
+
async def get_current_tab_index(self) -> int:
|
|
425
|
+
r"""Get the index of the current active tab."""
|
|
426
|
+
return self._current_tab_index
|
|
427
|
+
|
|
428
|
+
# ------------------------------------------------------------------
|
|
429
|
+
# Browser lifecycle helpers
|
|
430
|
+
# ------------------------------------------------------------------
|
|
431
|
+
async def ensure_browser(self) -> None:
|
|
432
|
+
r"""Ensure browser is ready. Each session_id gets its own browser
|
|
433
|
+
instance."""
|
|
434
|
+
# First, get the singleton instance for this session
|
|
435
|
+
singleton_instance = await self._get_or_create_instance(self)
|
|
436
|
+
|
|
437
|
+
# If this isn't the singleton instance, delegate to the singleton
|
|
438
|
+
if singleton_instance is not self:
|
|
439
|
+
await singleton_instance.ensure_browser()
|
|
440
|
+
# Copy the singleton's browser state to this instance
|
|
441
|
+
self._playwright = singleton_instance._playwright
|
|
442
|
+
self._browser = singleton_instance._browser
|
|
443
|
+
self._context = singleton_instance._context
|
|
444
|
+
self._page = singleton_instance._page
|
|
445
|
+
self._pages = singleton_instance._pages
|
|
446
|
+
self._current_tab_index = singleton_instance._current_tab_index
|
|
447
|
+
self.snapshot = singleton_instance.snapshot
|
|
448
|
+
self.executor = singleton_instance.executor
|
|
449
|
+
return
|
|
132
450
|
|
|
133
451
|
# Serialise initialisation to avoid race conditions where multiple
|
|
134
452
|
# concurrent coroutine calls create multiple browser instances for
|
|
135
|
-
# the same
|
|
453
|
+
# the same HybridBrowserSession.
|
|
136
454
|
async with self._ensure_lock:
|
|
137
455
|
await self._ensure_browser_inner()
|
|
138
456
|
|
|
@@ -145,155 +463,247 @@ class NVBrowserSession:
|
|
|
145
463
|
return
|
|
146
464
|
|
|
147
465
|
self._playwright = await async_playwright().start()
|
|
466
|
+
|
|
467
|
+
# Prepare stealth options
|
|
468
|
+
launch_options: Dict[str, Any] = {"headless": self._headless}
|
|
469
|
+
context_options: Dict[str, Any] = {}
|
|
470
|
+
if self._stealth and self._stealth_config:
|
|
471
|
+
# Use preloaded stealth configuration
|
|
472
|
+
launch_options['args'] = self._stealth_config['launch_args']
|
|
473
|
+
context_options.update(self._stealth_config['context_options'])
|
|
474
|
+
|
|
148
475
|
if self._user_data_dir:
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
476
|
+
context = (
|
|
477
|
+
await self._playwright.chromium.launch_persistent_context(
|
|
478
|
+
user_data_dir=self._user_data_dir,
|
|
479
|
+
**launch_options,
|
|
480
|
+
**context_options,
|
|
481
|
+
)
|
|
155
482
|
)
|
|
156
|
-
self.
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
if self._context.pages:
|
|
166
|
-
self._page = self._context.pages[0]
|
|
483
|
+
self._context = context
|
|
484
|
+
# Get the first (default) page
|
|
485
|
+
pages = context.pages
|
|
486
|
+
if pages:
|
|
487
|
+
self._page = pages[0]
|
|
488
|
+
self._pages = list(pages)
|
|
489
|
+
else:
|
|
490
|
+
self._page = await context.new_page()
|
|
491
|
+
self._pages = [self._page]
|
|
167
492
|
else:
|
|
493
|
+
self._browser = await self._playwright.chromium.launch(
|
|
494
|
+
**launch_options
|
|
495
|
+
)
|
|
496
|
+
self._context = await self._browser.new_context(**context_options)
|
|
168
497
|
self._page = await self._context.new_page()
|
|
498
|
+
self._pages = [self._page]
|
|
169
499
|
|
|
170
|
-
#
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
500
|
+
# Apply stealth modifications if enabled
|
|
501
|
+
if self._stealth and self._stealth_script:
|
|
502
|
+
try:
|
|
503
|
+
await self._page.add_init_script(self._stealth_script)
|
|
504
|
+
logger.debug("Applied stealth script to main page")
|
|
505
|
+
except Exception as e:
|
|
506
|
+
logger.warning(f"Failed to apply stealth script: {e}")
|
|
507
|
+
|
|
508
|
+
# Set up timeout for navigation
|
|
509
|
+
self._page.set_default_navigation_timeout(
|
|
510
|
+
self.DEFAULT_NAVIGATION_TIMEOUT
|
|
178
511
|
)
|
|
512
|
+
self._page.set_default_timeout(self.DEFAULT_NAVIGATION_TIMEOUT)
|
|
179
513
|
|
|
180
|
-
#
|
|
514
|
+
# Initialize utilities
|
|
181
515
|
self.snapshot = PageSnapshot(self._page)
|
|
182
|
-
self.executor = ActionExecutor(self._page)
|
|
516
|
+
self.executor = ActionExecutor(self._page, self)
|
|
517
|
+
self._current_tab_index = 0
|
|
518
|
+
|
|
519
|
+
logger.info("Browser session initialized successfully")
|
|
183
520
|
|
|
184
521
|
async def close(self) -> None:
|
|
185
|
-
r"""Close
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
# Remove this session from the sessions dict and close resources
|
|
189
|
-
try:
|
|
190
|
-
loop = asyncio.get_running_loop()
|
|
191
|
-
if loop in self._sessions and self._sessions[loop] is self:
|
|
192
|
-
del self._sessions[loop]
|
|
193
|
-
except RuntimeError:
|
|
194
|
-
pass # No running loop, that's okay
|
|
522
|
+
r"""Close browser session and clean up resources."""
|
|
523
|
+
if self._page is None:
|
|
524
|
+
return
|
|
195
525
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
del self._sessions[loop]
|
|
526
|
+
try:
|
|
527
|
+
logger.debug("Closing browser session...")
|
|
528
|
+
await self._close_session()
|
|
200
529
|
|
|
201
|
-
|
|
530
|
+
# Remove from singleton registry
|
|
531
|
+
try:
|
|
532
|
+
try:
|
|
533
|
+
loop = asyncio.get_running_loop()
|
|
534
|
+
loop_id = str(id(loop))
|
|
535
|
+
except RuntimeError:
|
|
536
|
+
# Use same logic as _get_or_create_instance
|
|
537
|
+
import threading
|
|
538
|
+
|
|
539
|
+
loop_id = f"sync_{threading.current_thread().ident}"
|
|
540
|
+
|
|
541
|
+
session_id = (
|
|
542
|
+
self._session_id
|
|
543
|
+
if self._session_id is not None
|
|
544
|
+
else "default"
|
|
545
|
+
)
|
|
546
|
+
session_key = (loop_id, session_id)
|
|
547
|
+
|
|
548
|
+
async with self._instances_lock:
|
|
549
|
+
if (
|
|
550
|
+
session_key in self._instances
|
|
551
|
+
and self._instances[session_key] is self
|
|
552
|
+
):
|
|
553
|
+
del self._instances[session_key]
|
|
554
|
+
logger.debug(
|
|
555
|
+
f"Removed session {session_id} from registry"
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
except Exception as registry_error:
|
|
559
|
+
logger.warning(f"Error cleaning up registry: {registry_error}")
|
|
560
|
+
|
|
561
|
+
logger.debug("Browser session closed successfully")
|
|
562
|
+
except Exception as e:
|
|
563
|
+
logger.error(f"Error during browser session close: {e}")
|
|
564
|
+
finally:
|
|
565
|
+
self._page = None
|
|
566
|
+
self._pages = []
|
|
567
|
+
self._current_tab_index = 0
|
|
568
|
+
self.snapshot = None
|
|
569
|
+
self.executor = None
|
|
202
570
|
|
|
203
571
|
async def _close_session(self) -> None:
|
|
204
|
-
r"""Internal session
|
|
205
|
-
|
|
572
|
+
r"""Internal session close logic with thorough cleanup."""
|
|
573
|
+
try:
|
|
574
|
+
# Close all pages first
|
|
575
|
+
pages_to_close = self._pages.copy()
|
|
576
|
+
for page in pages_to_close:
|
|
577
|
+
try:
|
|
578
|
+
if not page.is_closed():
|
|
579
|
+
await page.close()
|
|
580
|
+
logger.debug(
|
|
581
|
+
f"Closed page: "
|
|
582
|
+
f"{page.url if hasattr(page, 'url') else 'unknown'}" # noqa: E501
|
|
583
|
+
)
|
|
584
|
+
except Exception as e:
|
|
585
|
+
logger.warning(f"Error closing page: {e}")
|
|
206
586
|
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
try:
|
|
210
|
-
await self._context.close()
|
|
211
|
-
except Exception as e:
|
|
212
|
-
errors.append(f"Context close error: {e}")
|
|
587
|
+
# Clear the pages list
|
|
588
|
+
self._pages.clear()
|
|
213
589
|
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
590
|
+
# Close context with explicit wait
|
|
591
|
+
if self._context:
|
|
592
|
+
try:
|
|
593
|
+
await self._context.close()
|
|
594
|
+
logger.debug("Browser context closed")
|
|
595
|
+
except Exception as e:
|
|
596
|
+
logger.warning(f"Error closing context: {e}")
|
|
597
|
+
finally:
|
|
598
|
+
self._context = None
|
|
220
599
|
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
600
|
+
# Close browser with explicit wait
|
|
601
|
+
if self._browser:
|
|
602
|
+
try:
|
|
603
|
+
await self._browser.close()
|
|
604
|
+
logger.debug("Browser instance closed")
|
|
605
|
+
except Exception as e:
|
|
606
|
+
logger.warning(f"Error closing browser: {e}")
|
|
607
|
+
finally:
|
|
608
|
+
self._browser = None
|
|
227
609
|
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
610
|
+
# Stop playwright with increased delay for cleanup
|
|
611
|
+
if self._playwright:
|
|
612
|
+
try:
|
|
613
|
+
await self._playwright.stop()
|
|
614
|
+
logger.debug("Playwright stopped")
|
|
231
615
|
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
616
|
+
# Give more time for complete subprocess cleanup
|
|
617
|
+
import asyncio
|
|
618
|
+
|
|
619
|
+
await asyncio.sleep(0.5)
|
|
620
|
+
|
|
621
|
+
except Exception as e:
|
|
622
|
+
logger.warning(f"Error stopping playwright: {e}")
|
|
623
|
+
finally:
|
|
624
|
+
self._playwright = None
|
|
625
|
+
|
|
626
|
+
except Exception as e:
|
|
627
|
+
logger.error(f"Error during session cleanup: {e}")
|
|
628
|
+
finally:
|
|
629
|
+
# Ensure all attributes are cleared regardless of errors
|
|
630
|
+
self._page = None
|
|
631
|
+
self._pages = []
|
|
632
|
+
self._context = None
|
|
633
|
+
self._browser = None
|
|
634
|
+
self._playwright = None
|
|
237
635
|
|
|
238
636
|
@classmethod
|
|
239
637
|
async def close_all_sessions(cls) -> None:
|
|
240
|
-
r"""
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
638
|
+
r"""Close all browser sessions and clean up the singleton registry."""
|
|
639
|
+
logger.debug("Closing all browser sessions...")
|
|
640
|
+
async with cls._instances_lock:
|
|
641
|
+
# Close all active sessions
|
|
642
|
+
instances_to_close = list(cls._instances.values())
|
|
643
|
+
cls._instances.clear()
|
|
644
|
+
logger.debug(f"Closing {len(instances_to_close)} sessions.")
|
|
645
|
+
|
|
646
|
+
# Close sessions outside the lock to avoid deadlock
|
|
647
|
+
for instance in instances_to_close:
|
|
648
|
+
try:
|
|
649
|
+
await instance._close_session()
|
|
650
|
+
logger.debug(f"Closed session: {instance._session_id}")
|
|
651
|
+
except Exception as e:
|
|
652
|
+
logger.error(
|
|
653
|
+
f"Error closing session {instance._session_id}: {e}"
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
logger.debug("All browser sessions closed and registry cleared")
|
|
657
|
+
|
|
658
|
+
@classmethod
|
|
659
|
+
async def close_all(cls) -> None:
|
|
660
|
+
"""Alias for close_all_sessions for backward compatibility."""
|
|
661
|
+
await cls.close_all_sessions()
|
|
255
662
|
|
|
256
663
|
# ------------------------------------------------------------------
|
|
257
|
-
#
|
|
664
|
+
# Page interaction
|
|
258
665
|
# ------------------------------------------------------------------
|
|
259
666
|
async def visit(self, url: str) -> str:
|
|
260
|
-
r"""Navigate to
|
|
667
|
+
r"""Navigate current tab to URL."""
|
|
261
668
|
await self.ensure_browser()
|
|
262
|
-
|
|
669
|
+
page = await self.get_page()
|
|
670
|
+
|
|
671
|
+
await page.goto(url, timeout=self.DEFAULT_NAVIGATION_TIMEOUT)
|
|
672
|
+
await page.wait_for_load_state('domcontentloaded')
|
|
263
673
|
|
|
674
|
+
# Try to wait for network idle
|
|
264
675
|
try:
|
|
265
|
-
await
|
|
266
|
-
|
|
267
|
-
wait_until="domcontentloaded",
|
|
268
|
-
timeout=self.DEFAULT_NAVIGATION_TIMEOUT,
|
|
676
|
+
await page.wait_for_load_state(
|
|
677
|
+
'networkidle', timeout=self.NETWORK_IDLE_TIMEOUT
|
|
269
678
|
)
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
)
|
|
275
|
-
except Exception:
|
|
276
|
-
pass # Network idle timeout is not critical
|
|
277
|
-
return f"Visited {url}"
|
|
278
|
-
except Exception as e:
|
|
279
|
-
return f"Error visiting {url}: {e}"
|
|
679
|
+
except Exception:
|
|
680
|
+
logger.debug("Network idle timeout - continuing anyway")
|
|
681
|
+
|
|
682
|
+
return f"Navigated to {url}"
|
|
280
683
|
|
|
281
684
|
async def get_snapshot(
|
|
282
685
|
self, *, force_refresh: bool = False, diff_only: bool = False
|
|
283
686
|
) -> str:
|
|
284
|
-
|
|
285
|
-
|
|
687
|
+
r"""Get snapshot for current tab."""
|
|
688
|
+
if not self.snapshot:
|
|
689
|
+
return "<empty>"
|
|
286
690
|
return await self.snapshot.capture(
|
|
287
691
|
force_refresh=force_refresh, diff_only=diff_only
|
|
288
692
|
)
|
|
289
693
|
|
|
290
|
-
async def exec_action(self, action: Dict[str, Any]) -> str:
|
|
291
|
-
|
|
292
|
-
|
|
694
|
+
async def exec_action(self, action: Dict[str, Any]) -> Dict[str, Any]:
|
|
695
|
+
r"""Execute action on current tab."""
|
|
696
|
+
if not self.executor:
|
|
697
|
+
return {
|
|
698
|
+
"success": False,
|
|
699
|
+
"message": "No executor available",
|
|
700
|
+
"details": {},
|
|
701
|
+
}
|
|
293
702
|
return await self.executor.execute(action)
|
|
294
703
|
|
|
295
|
-
# Low-level accessors -------------------------------------------------
|
|
296
704
|
async def get_page(self) -> "Page":
|
|
705
|
+
r"""Get current active page."""
|
|
297
706
|
await self.ensure_browser()
|
|
298
|
-
|
|
707
|
+
if self._page is None:
|
|
708
|
+
raise RuntimeError("No active page available")
|
|
299
709
|
return self._page
|