camel-ai 0.2.71a1__py3-none-any.whl → 0.2.71a3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (32) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_types.py +6 -2
  3. camel/agents/chat_agent.py +357 -18
  4. camel/messages/base.py +2 -6
  5. camel/messages/func_message.py +32 -5
  6. camel/services/agent_openapi_server.py +380 -0
  7. camel/societies/workforce/single_agent_worker.py +1 -5
  8. camel/societies/workforce/workforce.py +68 -8
  9. camel/tasks/task.py +2 -2
  10. camel/toolkits/__init__.py +2 -2
  11. camel/toolkits/craw4ai_toolkit.py +27 -7
  12. camel/toolkits/file_write_toolkit.py +110 -31
  13. camel/toolkits/human_toolkit.py +19 -14
  14. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py +2 -2
  15. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py +47 -11
  16. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py +21 -11
  17. camel/toolkits/{non_visual_browser_toolkit/nv_browser_session.py → hybrid_browser_toolkit/browser_session.py} +64 -10
  18. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +1002 -0
  19. camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py +16 -4
  20. camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} +171 -15
  21. camel/toolkits/jina_reranker_toolkit.py +3 -4
  22. camel/toolkits/terminal_toolkit.py +189 -48
  23. camel/toolkits/video_download_toolkit.py +1 -2
  24. camel/types/agents/tool_calling_record.py +4 -1
  25. camel/types/enums.py +24 -24
  26. camel/utils/message_summarizer.py +148 -0
  27. camel/utils/tool_result.py +44 -0
  28. {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/METADATA +19 -5
  29. {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/RECORD +31 -28
  30. camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +0 -446
  31. {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/WHEEL +0 -0
  32. {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/licenses/LICENSE +0 -0
@@ -146,23 +146,25 @@ class FileWriteToolkit(BaseToolkit):
146
146
  document.save(str(file_path))
147
147
  logger.debug(f"Wrote DOCX to {file_path} with default formatting")
148
148
 
149
- @dependencies_required('pylatex', 'fpdf')
149
+ @dependencies_required('pylatex', 'pymupdf')
150
150
  def _write_pdf_file(
151
- self, file_path: Path, content: str, use_latex: bool = False
151
+ self,
152
+ file_path: Path,
153
+ title: str,
154
+ content: str,
155
+ use_latex: bool = False,
152
156
  ) -> None:
153
157
  r"""Write text content to a PDF file with default formatting.
154
158
 
155
159
  Args:
156
160
  file_path (Path): The target file path.
161
+ title (str): The title of the document.
157
162
  content (str): The text content to write.
158
163
  use_latex (bool): Whether to use LaTeX for rendering. (requires
159
- LaTeX toolchain). If False, uses FPDF for simpler PDF
164
+ LaTeX toolchain). If False, uses PyMuPDF for simpler PDF
160
165
  generation. (default: :obj:`False`)
161
-
162
- Raises:
163
- RuntimeError: If the 'pylatex' or 'fpdf' library is not installed
164
- when use_latex=True.
165
166
  """
167
+ # TODO: table generation need to be improved
166
168
  if use_latex:
167
169
  from pylatex import (
168
170
  Command,
@@ -213,30 +215,105 @@ class FileWriteToolkit(BaseToolkit):
213
215
 
214
216
  logger.info(f"Wrote PDF (with LaTeX) to {file_path}")
215
217
  else:
216
- from fpdf import FPDF
217
-
218
- # Use default formatting values
219
- font_family = 'Arial'
220
- font_size = 12
221
- font_style = ''
222
- line_height = 10
223
- margin = 10
224
-
225
- pdf = FPDF()
226
- pdf.set_margins(margin, margin, margin)
227
-
228
- pdf.add_page()
229
- pdf.set_font(font_family, style=font_style, size=font_size)
230
-
231
- # Split content into paragraphs and add them
232
- for para in content.split('\n'):
233
- if para.strip(): # Skip empty paragraphs
234
- pdf.multi_cell(0, line_height, para)
218
+ import pymupdf
219
+
220
+ # Create a new PDF document
221
+ doc = pymupdf.open()
222
+
223
+ # Add a page
224
+ page = doc.new_page()
225
+
226
+ # Process the content
227
+ lines = content.strip().split('\n')
228
+ document_title = title
229
+
230
+ # Create a TextWriter for writing text to the page
231
+ text_writer = pymupdf.TextWriter(page.rect)
232
+
233
+ # Define fonts
234
+ normal_font = pymupdf.Font(
235
+ "helv"
236
+ ) # Standard font with multilingual support
237
+ bold_font = pymupdf.Font("helv")
238
+
239
+ # Start position for text
240
+ y_pos = 50
241
+ x_pos = 50
242
+
243
+ # Add title
244
+ text_writer.fill_textbox(
245
+ pymupdf.Rect(
246
+ x_pos, y_pos, page.rect.width - x_pos, y_pos + 30
247
+ ),
248
+ document_title,
249
+ fontsize=16,
250
+ )
251
+ y_pos += 40
252
+
253
+ # Process content
254
+ for line in lines:
255
+ stripped_line = line.strip()
256
+
257
+ # Skip empty lines but add some space
258
+ if not stripped_line:
259
+ y_pos += 10
260
+ continue
261
+
262
+ # Handle headers
263
+ if stripped_line.startswith('## '):
264
+ text_writer.fill_textbox(
265
+ pymupdf.Rect(
266
+ x_pos, y_pos, page.rect.width - x_pos, y_pos + 20
267
+ ),
268
+ stripped_line[3:].strip(),
269
+ font=bold_font,
270
+ fontsize=14,
271
+ )
272
+ y_pos += 25
273
+ elif stripped_line.startswith('# '):
274
+ text_writer.fill_textbox(
275
+ pymupdf.Rect(
276
+ x_pos, y_pos, page.rect.width - x_pos, y_pos + 25
277
+ ),
278
+ stripped_line[2:].strip(),
279
+ font=bold_font,
280
+ fontsize=16,
281
+ )
282
+ y_pos += 30
283
+ # Handle horizontal rule
284
+ elif stripped_line == '---':
285
+ page.draw_line(
286
+ pymupdf.Point(x_pos, y_pos + 5),
287
+ pymupdf.Point(page.rect.width - x_pos, y_pos + 5),
288
+ )
289
+ y_pos += 15
290
+ # Regular text
235
291
  else:
236
- pdf.ln(line_height) # Add empty line
237
-
238
- pdf.output(str(file_path))
239
- logger.debug(f"Wrote PDF to {file_path} with custom formatting")
292
+ # Check if we need a new page
293
+ if y_pos > page.rect.height - 50:
294
+ text_writer.write_text(page)
295
+ page = doc.new_page()
296
+ text_writer = pymupdf.TextWriter(page.rect)
297
+ y_pos = 50
298
+
299
+ # Add text to the current page
300
+ text_writer.fill_textbox(
301
+ pymupdf.Rect(
302
+ x_pos, y_pos, page.rect.width - x_pos, y_pos + 15
303
+ ),
304
+ stripped_line,
305
+ font=normal_font,
306
+ )
307
+ y_pos += 15
308
+
309
+ # Write the accumulated text to the last page
310
+ text_writer.write_text(page)
311
+
312
+ # Save the PDF
313
+ doc.save(str(file_path))
314
+ doc.close()
315
+
316
+ logger.debug(f"Wrote PDF to {file_path} with PyMuPDF formatting")
240
317
 
241
318
  def _write_csv_file(
242
319
  self,
@@ -338,6 +415,7 @@ class FileWriteToolkit(BaseToolkit):
338
415
 
339
416
  def write_to_file(
340
417
  self,
418
+ title: str,
341
419
  content: Union[str, List[List[str]]],
342
420
  filename: str,
343
421
  encoding: Optional[str] = None,
@@ -351,6 +429,7 @@ class FileWriteToolkit(BaseToolkit):
351
429
  and HTML (.html, .htm).
352
430
 
353
431
  Args:
432
+ title (str): The title of the document.
354
433
  content (Union[str, List[List[str]]]): The content to write to the
355
434
  file. Content format varies by file type:
356
435
  - Text formats (txt, md, html, yaml): string
@@ -388,7 +467,7 @@ class FileWriteToolkit(BaseToolkit):
388
467
  self._write_docx_file(file_path, str(content))
389
468
  elif extension == ".pdf":
390
469
  self._write_pdf_file(
391
- file_path, str(content), use_latex=use_latex
470
+ file_path, title, str(content), use_latex=use_latex
392
471
  )
393
472
  elif extension == ".csv":
394
473
  self._write_csv_file(
@@ -22,7 +22,12 @@ logger = logging.getLogger(__name__)
22
22
 
23
23
 
24
24
  class HumanToolkit(BaseToolkit):
25
- r"""A class representing a toolkit for human interaction."""
25
+ r"""A class representing a toolkit for human interaction.
26
+
27
+ Note:
28
+ This toolkit should be called to send a tidy message to the user to
29
+ keep them informed.
30
+ """
26
31
 
27
32
  def ask_human_via_console(self, question: str) -> str:
28
33
  r"""Use this tool to ask a question to the user when you are stuck,
@@ -48,21 +53,21 @@ class HumanToolkit(BaseToolkit):
48
53
  return reply
49
54
 
50
55
  def send_message_to_user(self, message: str) -> None:
51
- r"""Use this tool to send a message to the user to keep them
52
- informed about your progress, decisions, or actions.
53
- This is a one-way communication channel from you to the user and does
54
- not require a response. You should use it to:
55
- - Announce what you are about to do
56
- (e.g., "I will now search for papers on GUI Agents.")
57
- - Report the result of an action
58
- (e.g., "I have found 15 relevant papers.")
59
- - State a decision
60
- (e.g., "I will now analyze the top 10 papers.")
61
- - Inform the user about your current state if you are performing a
62
- task.
56
+ r"""Use this tool to send a tidy message to the user in one short
57
+ sentence.
58
+
59
+ This one-way tool keeps the user informed about your progress,
60
+ decisions, or actions. It does not require a response.
61
+ You should use it to:
62
+ - Announce what you are about to do (e.g., "I will now search for
63
+ papers on GUI Agents.").
64
+ - Report the result of an action (e.g., "I have found 15 relevant
65
+ papers.").
66
+ - State a decision (e.g., "I will now analyze the top 10 papers.").
67
+ - Give a status update during a long-running task.
63
68
 
64
69
  Args:
65
- message (str): The message to send to the user.
70
+ message (str): The tidy and informative message for the user.
66
71
  """
67
72
  print(f"\nAgent Message:\n{message}")
68
73
  logger.info(f"\nAgent Message:\n{message}")
@@ -11,8 +11,8 @@
11
11
  # See the License for the specific language governing permissions and
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
- from .browser_non_visual_toolkit import BrowserNonVisualToolkit
14
+ from .hybrid_browser_toolkit import HybridBrowserToolkit
15
15
 
16
16
  __all__ = [
17
- "BrowserNonVisualToolkit",
17
+ "HybridBrowserToolkit",
18
18
  ]
@@ -24,6 +24,7 @@ class ActionExecutor:
24
24
  # Configuration constants
25
25
  DEFAULT_TIMEOUT = 5000 # 5 seconds
26
26
  SHORT_TIMEOUT = 2000 # 2 seconds
27
+ MAX_SCROLL_AMOUNT = 5000 # Maximum scroll distance in pixels
27
28
 
28
29
  def __init__(self, page: "Page"):
29
30
  self.page = page
@@ -32,6 +33,7 @@ class ActionExecutor:
32
33
  # Public helpers
33
34
  # ------------------------------------------------------------------
34
35
  async def execute(self, action: Dict[str, Any]) -> str:
36
+ r"""Execute an action and return the result description."""
35
37
  if not action:
36
38
  return "No action to execute"
37
39
 
@@ -64,32 +66,46 @@ class ActionExecutor:
64
66
  # Internal handlers
65
67
  # ------------------------------------------------------------------
66
68
  async def _click(self, action: Dict[str, Any]) -> str:
69
+ r"""Handle click actions with multiple fallback strategies."""
67
70
  ref = action.get("ref")
68
71
  text = action.get("text")
69
72
  selector = action.get("selector")
70
73
  if not (ref or text or selector):
71
74
  return "Error: click requires ref/text/selector"
72
75
 
76
+ # Build strategies in priority order: ref > selector > text
73
77
  strategies = []
78
+ if ref:
79
+ strategies.append(f"[aria-ref='{ref}']")
74
80
  if selector:
75
81
  strategies.append(selector)
76
82
  if text:
77
83
  strategies.append(f'text="{text}"')
78
- if ref:
79
- strategies.append(f"[aria-ref='{ref}']")
80
84
 
85
+ # Strategy 1: Try Playwright force click for each selector
81
86
  for sel in strategies:
82
87
  try:
83
88
  if await self.page.locator(sel).count() > 0:
84
89
  await self.page.click(
85
- sel, timeout=self.SHORT_TIMEOUT, force=True
90
+ sel, timeout=self.DEFAULT_TIMEOUT, force=True
86
91
  )
87
- return f"Clicked element via {sel}"
92
+ return f"Clicked element via force: {sel}"
88
93
  except Exception:
89
- pass
90
- return "Error: Could not click element"
94
+ continue
95
+
96
+ # Strategy 2: Try JavaScript click as fallback
97
+ for sel in strategies:
98
+ try:
99
+ await self.page.locator(sel).first.evaluate("el => el.click()")
100
+ await asyncio.sleep(0.1) # Brief wait for effects
101
+ return f"Clicked element via JS: {sel}"
102
+ except Exception:
103
+ continue
104
+
105
+ return "Error: All click strategies failed"
91
106
 
92
107
  async def _type(self, action: Dict[str, Any]) -> str:
108
+ r"""Handle typing text into input fields."""
93
109
  ref = action.get("ref")
94
110
  selector = action.get("selector")
95
111
  text = action.get("text", "")
@@ -103,6 +119,7 @@ class ActionExecutor:
103
119
  return f"Type failed: {exc}"
104
120
 
105
121
  async def _select(self, action: Dict[str, Any]) -> str:
122
+ r"""Handle selecting options from dropdowns."""
106
123
  ref = action.get("ref")
107
124
  selector = action.get("selector")
108
125
  value = action.get("value", "")
@@ -118,8 +135,9 @@ class ActionExecutor:
118
135
  return f"Select failed: {exc}"
119
136
 
120
137
  async def _wait(self, action: Dict[str, Any]) -> str:
138
+ r"""Handle wait actions."""
121
139
  if "timeout" in action:
122
- ms = action["timeout"]
140
+ ms = int(action["timeout"])
123
141
  await asyncio.sleep(ms / 1000)
124
142
  return f"Waited {ms}ms"
125
143
  if "selector" in action:
@@ -131,6 +149,7 @@ class ActionExecutor:
131
149
  return "Error: wait requires timeout/selector"
132
150
 
133
151
  async def _extract(self, action: Dict[str, Any]) -> str:
152
+ r"""Handle text extraction from elements."""
134
153
  ref = action.get("ref")
135
154
  if not ref:
136
155
  return "Error: extract requires ref"
@@ -140,6 +159,7 @@ class ActionExecutor:
140
159
  return f"Extracted: {txt[:100] if txt else 'None'}"
141
160
 
142
161
  async def _scroll(self, action: Dict[str, Any]) -> str:
162
+ r"""Handle page scrolling with safe parameter validation."""
143
163
  direction = action.get("direction", "down")
144
164
  amount = action.get("amount", 300)
145
165
 
@@ -151,18 +171,22 @@ class ActionExecutor:
151
171
  # Safely convert amount to integer and clamp to reasonable range
152
172
  amount_int = int(amount)
153
173
  amount_int = max(
154
- -5000, min(5000, amount_int)
155
- ) # Clamp between -5000 and 5000
174
+ -self.MAX_SCROLL_AMOUNT,
175
+ min(self.MAX_SCROLL_AMOUNT, amount_int),
176
+ ) # Clamp to MAX_SCROLL_AMOUNT range
156
177
  except (ValueError, TypeError):
157
178
  return "Error: amount must be a valid number"
158
179
 
159
180
  # Use safe evaluation with bound parameters
160
181
  scroll_offset = amount_int if direction == "down" else -amount_int
161
- await self.page.evaluate(f"window.scrollBy(0, {scroll_offset})")
182
+ await self.page.evaluate(
183
+ "offset => window.scrollBy(0, offset)", scroll_offset
184
+ )
162
185
  await asyncio.sleep(0.5)
163
186
  return f"Scrolled {direction} by {abs(amount_int)}px"
164
187
 
165
188
  async def _enter(self, action: Dict[str, Any]) -> str:
189
+ r"""Handle Enter key press actions."""
166
190
  ref = action.get("ref")
167
191
  selector = action.get("selector")
168
192
  if ref:
@@ -175,16 +199,28 @@ class ActionExecutor:
175
199
 
176
200
  # utilities
177
201
  async def _wait_dom_stable(self) -> None:
202
+ r"""Wait for DOM to become stable before executing actions."""
178
203
  try:
204
+ # Wait for basic DOM content loading
179
205
  await self.page.wait_for_load_state(
180
206
  'domcontentloaded', timeout=self.SHORT_TIMEOUT
181
207
  )
208
+
209
+ # Try to wait for network idle briefly
210
+ try:
211
+ await self.page.wait_for_load_state(
212
+ 'networkidle', timeout=self.SHORT_TIMEOUT
213
+ )
214
+ except Exception:
215
+ pass # Network idle is optional
216
+
182
217
  except Exception:
183
- pass
218
+ pass # Don't fail if wait times out
184
219
 
185
220
  # static helpers
186
221
  @staticmethod
187
222
  def should_update_snapshot(action: Dict[str, Any]) -> bool:
223
+ r"""Determine if an action requires a snapshot update."""
188
224
  change_types = {
189
225
  "click",
190
226
  "type",
@@ -12,24 +12,24 @@
12
12
  # limitations under the License.
13
13
  # ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
14
14
  import json
15
- import logging
16
15
  import re
17
16
  from typing import TYPE_CHECKING, Any, Dict, List, Optional
18
17
 
18
+ from camel.logger import get_logger
19
19
  from camel.models import BaseModelBackend, ModelFactory
20
20
  from camel.types import ModelPlatformType, ModelType
21
21
 
22
22
  from .actions import ActionExecutor
23
- from .nv_browser_session import NVBrowserSession
23
+ from .browser_session import NVBrowserSession
24
24
 
25
25
  if TYPE_CHECKING:
26
26
  from camel.agents import ChatAgent
27
27
 
28
- logger = logging.getLogger(__name__)
28
+ logger = get_logger(__name__)
29
29
 
30
30
 
31
31
  class PlaywrightLLMAgent:
32
- """High-level orchestration: snapshot ↔ LLM ↔ action executor."""
32
+ r"""High-level orchestration: snapshot ↔ LLM ↔ action executor."""
33
33
 
34
34
  # System prompt as class constant to avoid recreation
35
35
  SYSTEM_PROMPT = """
@@ -90,8 +90,8 @@ what was accomplished
90
90
  self.action_history: List[Dict[str, Any]] = []
91
91
  if model_backend is None:
92
92
  model_backend = ModelFactory.create(
93
- model_platform=ModelPlatformType.OPENAI,
94
- model_type=ModelType.GPT_4O_MINI,
93
+ model_platform=ModelPlatformType.DEFAULT,
94
+ model_type=ModelType.DEFAULT,
95
95
  model_config_dict={"temperature": 0, "top_p": 1},
96
96
  )
97
97
  self.model_backend = model_backend
@@ -99,16 +99,19 @@ what was accomplished
99
99
  self._chat_agent: Optional[ChatAgent] = None
100
100
 
101
101
  async def navigate(self, url: str) -> str:
102
+ r"""Navigate to a URL and return the snapshot."""
102
103
  try:
103
104
  # NVBrowserSession handles waits internally
104
105
  logger.debug("Navigated to URL: %s", url)
105
106
  await self._session.visit(url)
106
107
  return await self._session.get_snapshot(force_refresh=True)
107
108
  except Exception as exc:
108
- return f"Error: could not navigate - {exc}"
109
+ error_msg = f"Error: could not navigate to {url} - {exc}"
110
+ logger.error(error_msg)
111
+ return error_msg
109
112
 
110
113
  def _get_chat_agent(self) -> "ChatAgent":
111
- """Get or create the ChatAgent instance."""
114
+ r"""Get or create the ChatAgent instance."""
112
115
  from camel.agents import ChatAgent
113
116
 
114
117
  if self._chat_agent is None:
@@ -165,12 +168,16 @@ what was accomplished
165
168
  logger.warning(
166
169
  "Could not parse JSON from LLM response: %s", content[:200]
167
170
  )
171
+ return self._get_fallback_response("Parsing error")
172
+
173
+ def _get_fallback_response(self, error_msg: str) -> Dict[str, Any]:
174
+ r"""Generate a fallback response structure."""
168
175
  return {
169
- "plan": ["Could not parse response"],
176
+ "plan": [f"Could not parse response: {error_msg}"],
170
177
  "action": {
171
178
  "type": "finish",
172
179
  "ref": None,
173
- "summary": "Parsing error",
180
+ "summary": f"Parsing error: {error_msg}",
174
181
  },
175
182
  }
176
183
 
@@ -181,7 +188,7 @@ what was accomplished
181
188
  is_initial: bool,
182
189
  history: Optional[List[Dict[str, Any]]] = None,
183
190
  ) -> Dict[str, Any]:
184
- """Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
191
+ r"""Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
185
192
  # Build user message
186
193
  if is_initial:
187
194
  user_content = f"Snapshot:\n{snapshot}\n\nTask: {prompt}"
@@ -208,6 +215,7 @@ what was accomplished
208
215
  return self._safe_parse_json(content)
209
216
 
210
217
  async def process_command(self, prompt: str, max_steps: int = 15):
218
+ r"""Process a command using LLM-guided browser automation."""
211
219
  # initial full snapshot
212
220
  full_snapshot = await self._session.get_snapshot()
213
221
  assert self._session.snapshot is not None
@@ -270,9 +278,11 @@ what was accomplished
270
278
  logger.info("Process completed with %d steps", steps)
271
279
 
272
280
  async def _run_action(self, action: Dict[str, Any]) -> str:
281
+ r"""Execute a single action and return the result."""
273
282
  if action.get("type") == "navigate":
274
283
  return await self.navigate(action.get("url", ""))
275
284
  return await self._session.exec_action(action)
276
285
 
277
286
  async def close(self):
287
+ r"""Clean up browser session and resources."""
278
288
  await self._session.close()
@@ -57,13 +57,12 @@ class NVBrowserSession:
57
57
 
58
58
  def __new__(
59
59
  cls, *, headless: bool = True, user_data_dir: Optional[str] = None
60
- ):
61
- loop = asyncio.get_running_loop()
62
- if loop not in cls._sessions:
63
- instance = super().__new__(cls)
64
- instance._initialized = False
65
- cls._sessions[loop] = instance
66
- return cls._sessions[loop]
60
+ ) -> "NVBrowserSession":
61
+ # Defer event loop lookup until we actually need it
62
+ # This allows creation outside of async context
63
+ instance = super().__new__(cls)
64
+ instance._initialized = False
65
+ return instance
67
66
 
68
67
  def __init__(
69
68
  self, *, headless: bool = True, user_data_dir: Optional[str] = None
@@ -90,6 +89,47 @@ class NVBrowserSession:
90
89
  # Browser lifecycle helpers
91
90
  # ------------------------------------------------------------------
92
91
  async def ensure_browser(self) -> None:
92
+ r"""Ensure browser is ready, implementing singleton pattern per event
93
+ loop.
94
+ """
95
+ # Check if we need to reuse or create a session for this event loop
96
+ try:
97
+ loop = asyncio.get_running_loop()
98
+ except RuntimeError as e:
99
+ raise RuntimeError(
100
+ "ensure_browser() must be called from within an async context"
101
+ ) from e
102
+
103
+ # Check if there's already a session for this loop
104
+ if loop in self._sessions and self._sessions[loop] is not self:
105
+ # Copy the existing session's browser resources
106
+ existing = self._sessions[loop]
107
+ # Wait for existing session to be fully initialized
108
+ async with existing._ensure_lock:
109
+ if (
110
+ existing._initialized
111
+ and existing._page is not None
112
+ and existing._playwright is not None
113
+ ):
114
+ try:
115
+ # Verify the page is still responsive
116
+ await existing._page.title()
117
+ self._playwright = existing._playwright
118
+ self._browser = existing._browser
119
+ self._context = existing._context
120
+ self._page = existing._page
121
+ self.snapshot = existing.snapshot
122
+ self.executor = existing.executor
123
+ self._initialized = True
124
+ return
125
+ except Exception:
126
+ # Existing session is broken, continue with new
127
+ # initialization
128
+ pass
129
+
130
+ # Register this instance for the current loop
131
+ self._sessions[loop] = self
132
+
93
133
  # Serialise initialisation to avoid race conditions where multiple
94
134
  # concurrent coroutine calls create multiple browser instances for
95
135
  # the same NVBrowserSession.
@@ -98,6 +138,7 @@ class NVBrowserSession:
98
138
 
99
139
  # Moved original logic to helper
100
140
  async def _ensure_browser_inner(self) -> None:
141
+ r"""Internal browser initialization logic."""
101
142
  from playwright.async_api import async_playwright
102
143
 
103
144
  if self._page is not None:
@@ -144,11 +185,23 @@ class NVBrowserSession:
144
185
  r"""Close all browser resources, ensuring cleanup even if some
145
186
  operations fail.
146
187
  """
147
- # The close method will now only close the *current* event-loop's
148
- # browser instance. Use `close_all_sessions` for a full cleanup.
188
+ # Remove this session from the sessions dict and close resources
189
+ try:
190
+ loop = asyncio.get_running_loop()
191
+ if loop in self._sessions and self._sessions[loop] is self:
192
+ del self._sessions[loop]
193
+ except RuntimeError:
194
+ pass # No running loop, that's okay
195
+
196
+ # Clean up any stale loop references
197
+ stale_loops = [loop for loop in self._sessions if loop.is_closed()]
198
+ for loop in stale_loops:
199
+ del self._sessions[loop]
200
+
149
201
  await self._close_session()
150
202
 
151
203
  async def _close_session(self) -> None:
204
+ r"""Internal session cleanup with comprehensive error handling."""
152
205
  errors: list[str] = []
153
206
 
154
207
  # Close context first (which closes pages)
@@ -204,6 +257,7 @@ class NVBrowserSession:
204
257
  # Convenience wrappers around common actions
205
258
  # ------------------------------------------------------------------
206
259
  async def visit(self, url: str) -> str:
260
+ r"""Navigate to a URL with proper error handling."""
207
261
  await self.ensure_browser()
208
262
  assert self._page is not None
209
263
 
@@ -233,7 +287,7 @@ class NVBrowserSession:
233
287
  force_refresh=force_refresh, diff_only=diff_only
234
288
  )
235
289
 
236
- async def exec_action(self, action: dict[str, Any]) -> str:
290
+ async def exec_action(self, action: Dict[str, Any]) -> str:
237
291
  await self.ensure_browser()
238
292
  assert self.executor is not None
239
293
  return await self.executor.execute(action)