camel-ai 0.2.71a1__py3-none-any.whl → 0.2.71a3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/_types.py +6 -2
- camel/agents/chat_agent.py +357 -18
- camel/messages/base.py +2 -6
- camel/messages/func_message.py +32 -5
- camel/services/agent_openapi_server.py +380 -0
- camel/societies/workforce/single_agent_worker.py +1 -5
- camel/societies/workforce/workforce.py +68 -8
- camel/tasks/task.py +2 -2
- camel/toolkits/__init__.py +2 -2
- camel/toolkits/craw4ai_toolkit.py +27 -7
- camel/toolkits/file_write_toolkit.py +110 -31
- camel/toolkits/human_toolkit.py +19 -14
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/__init__.py +2 -2
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/actions.py +47 -11
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/agent.py +21 -11
- camel/toolkits/{non_visual_browser_toolkit/nv_browser_session.py → hybrid_browser_toolkit/browser_session.py} +64 -10
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +1002 -0
- camel/toolkits/{non_visual_browser_toolkit → hybrid_browser_toolkit}/snapshot.py +16 -4
- camel/toolkits/{non_visual_browser_toolkit/snapshot.js → hybrid_browser_toolkit/unified_analyzer.js} +171 -15
- camel/toolkits/jina_reranker_toolkit.py +3 -4
- camel/toolkits/terminal_toolkit.py +189 -48
- camel/toolkits/video_download_toolkit.py +1 -2
- camel/types/agents/tool_calling_record.py +4 -1
- camel/types/enums.py +24 -24
- camel/utils/message_summarizer.py +148 -0
- camel/utils/tool_result.py +44 -0
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/METADATA +19 -5
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/RECORD +31 -28
- camel/toolkits/non_visual_browser_toolkit/browser_non_visual_toolkit.py +0 -446
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a3.dist-info}/licenses/LICENSE +0 -0
|
@@ -146,23 +146,25 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
146
146
|
document.save(str(file_path))
|
|
147
147
|
logger.debug(f"Wrote DOCX to {file_path} with default formatting")
|
|
148
148
|
|
|
149
|
-
@dependencies_required('pylatex', '
|
|
149
|
+
@dependencies_required('pylatex', 'pymupdf')
|
|
150
150
|
def _write_pdf_file(
|
|
151
|
-
self,
|
|
151
|
+
self,
|
|
152
|
+
file_path: Path,
|
|
153
|
+
title: str,
|
|
154
|
+
content: str,
|
|
155
|
+
use_latex: bool = False,
|
|
152
156
|
) -> None:
|
|
153
157
|
r"""Write text content to a PDF file with default formatting.
|
|
154
158
|
|
|
155
159
|
Args:
|
|
156
160
|
file_path (Path): The target file path.
|
|
161
|
+
title (str): The title of the document.
|
|
157
162
|
content (str): The text content to write.
|
|
158
163
|
use_latex (bool): Whether to use LaTeX for rendering. (requires
|
|
159
|
-
LaTeX toolchain). If False, uses
|
|
164
|
+
LaTeX toolchain). If False, uses PyMuPDF for simpler PDF
|
|
160
165
|
generation. (default: :obj:`False`)
|
|
161
|
-
|
|
162
|
-
Raises:
|
|
163
|
-
RuntimeError: If the 'pylatex' or 'fpdf' library is not installed
|
|
164
|
-
when use_latex=True.
|
|
165
166
|
"""
|
|
167
|
+
# TODO: table generation need to be improved
|
|
166
168
|
if use_latex:
|
|
167
169
|
from pylatex import (
|
|
168
170
|
Command,
|
|
@@ -213,30 +215,105 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
213
215
|
|
|
214
216
|
logger.info(f"Wrote PDF (with LaTeX) to {file_path}")
|
|
215
217
|
else:
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
#
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
#
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
218
|
+
import pymupdf
|
|
219
|
+
|
|
220
|
+
# Create a new PDF document
|
|
221
|
+
doc = pymupdf.open()
|
|
222
|
+
|
|
223
|
+
# Add a page
|
|
224
|
+
page = doc.new_page()
|
|
225
|
+
|
|
226
|
+
# Process the content
|
|
227
|
+
lines = content.strip().split('\n')
|
|
228
|
+
document_title = title
|
|
229
|
+
|
|
230
|
+
# Create a TextWriter for writing text to the page
|
|
231
|
+
text_writer = pymupdf.TextWriter(page.rect)
|
|
232
|
+
|
|
233
|
+
# Define fonts
|
|
234
|
+
normal_font = pymupdf.Font(
|
|
235
|
+
"helv"
|
|
236
|
+
) # Standard font with multilingual support
|
|
237
|
+
bold_font = pymupdf.Font("helv")
|
|
238
|
+
|
|
239
|
+
# Start position for text
|
|
240
|
+
y_pos = 50
|
|
241
|
+
x_pos = 50
|
|
242
|
+
|
|
243
|
+
# Add title
|
|
244
|
+
text_writer.fill_textbox(
|
|
245
|
+
pymupdf.Rect(
|
|
246
|
+
x_pos, y_pos, page.rect.width - x_pos, y_pos + 30
|
|
247
|
+
),
|
|
248
|
+
document_title,
|
|
249
|
+
fontsize=16,
|
|
250
|
+
)
|
|
251
|
+
y_pos += 40
|
|
252
|
+
|
|
253
|
+
# Process content
|
|
254
|
+
for line in lines:
|
|
255
|
+
stripped_line = line.strip()
|
|
256
|
+
|
|
257
|
+
# Skip empty lines but add some space
|
|
258
|
+
if not stripped_line:
|
|
259
|
+
y_pos += 10
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
# Handle headers
|
|
263
|
+
if stripped_line.startswith('## '):
|
|
264
|
+
text_writer.fill_textbox(
|
|
265
|
+
pymupdf.Rect(
|
|
266
|
+
x_pos, y_pos, page.rect.width - x_pos, y_pos + 20
|
|
267
|
+
),
|
|
268
|
+
stripped_line[3:].strip(),
|
|
269
|
+
font=bold_font,
|
|
270
|
+
fontsize=14,
|
|
271
|
+
)
|
|
272
|
+
y_pos += 25
|
|
273
|
+
elif stripped_line.startswith('# '):
|
|
274
|
+
text_writer.fill_textbox(
|
|
275
|
+
pymupdf.Rect(
|
|
276
|
+
x_pos, y_pos, page.rect.width - x_pos, y_pos + 25
|
|
277
|
+
),
|
|
278
|
+
stripped_line[2:].strip(),
|
|
279
|
+
font=bold_font,
|
|
280
|
+
fontsize=16,
|
|
281
|
+
)
|
|
282
|
+
y_pos += 30
|
|
283
|
+
# Handle horizontal rule
|
|
284
|
+
elif stripped_line == '---':
|
|
285
|
+
page.draw_line(
|
|
286
|
+
pymupdf.Point(x_pos, y_pos + 5),
|
|
287
|
+
pymupdf.Point(page.rect.width - x_pos, y_pos + 5),
|
|
288
|
+
)
|
|
289
|
+
y_pos += 15
|
|
290
|
+
# Regular text
|
|
235
291
|
else:
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
292
|
+
# Check if we need a new page
|
|
293
|
+
if y_pos > page.rect.height - 50:
|
|
294
|
+
text_writer.write_text(page)
|
|
295
|
+
page = doc.new_page()
|
|
296
|
+
text_writer = pymupdf.TextWriter(page.rect)
|
|
297
|
+
y_pos = 50
|
|
298
|
+
|
|
299
|
+
# Add text to the current page
|
|
300
|
+
text_writer.fill_textbox(
|
|
301
|
+
pymupdf.Rect(
|
|
302
|
+
x_pos, y_pos, page.rect.width - x_pos, y_pos + 15
|
|
303
|
+
),
|
|
304
|
+
stripped_line,
|
|
305
|
+
font=normal_font,
|
|
306
|
+
)
|
|
307
|
+
y_pos += 15
|
|
308
|
+
|
|
309
|
+
# Write the accumulated text to the last page
|
|
310
|
+
text_writer.write_text(page)
|
|
311
|
+
|
|
312
|
+
# Save the PDF
|
|
313
|
+
doc.save(str(file_path))
|
|
314
|
+
doc.close()
|
|
315
|
+
|
|
316
|
+
logger.debug(f"Wrote PDF to {file_path} with PyMuPDF formatting")
|
|
240
317
|
|
|
241
318
|
def _write_csv_file(
|
|
242
319
|
self,
|
|
@@ -338,6 +415,7 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
338
415
|
|
|
339
416
|
def write_to_file(
|
|
340
417
|
self,
|
|
418
|
+
title: str,
|
|
341
419
|
content: Union[str, List[List[str]]],
|
|
342
420
|
filename: str,
|
|
343
421
|
encoding: Optional[str] = None,
|
|
@@ -351,6 +429,7 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
351
429
|
and HTML (.html, .htm).
|
|
352
430
|
|
|
353
431
|
Args:
|
|
432
|
+
title (str): The title of the document.
|
|
354
433
|
content (Union[str, List[List[str]]]): The content to write to the
|
|
355
434
|
file. Content format varies by file type:
|
|
356
435
|
- Text formats (txt, md, html, yaml): string
|
|
@@ -388,7 +467,7 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
388
467
|
self._write_docx_file(file_path, str(content))
|
|
389
468
|
elif extension == ".pdf":
|
|
390
469
|
self._write_pdf_file(
|
|
391
|
-
file_path, str(content), use_latex=use_latex
|
|
470
|
+
file_path, title, str(content), use_latex=use_latex
|
|
392
471
|
)
|
|
393
472
|
elif extension == ".csv":
|
|
394
473
|
self._write_csv_file(
|
camel/toolkits/human_toolkit.py
CHANGED
|
@@ -22,7 +22,12 @@ logger = logging.getLogger(__name__)
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class HumanToolkit(BaseToolkit):
|
|
25
|
-
r"""A class representing a toolkit for human interaction.
|
|
25
|
+
r"""A class representing a toolkit for human interaction.
|
|
26
|
+
|
|
27
|
+
Note:
|
|
28
|
+
This toolkit should be called to send a tidy message to the user to
|
|
29
|
+
keep them informed.
|
|
30
|
+
"""
|
|
26
31
|
|
|
27
32
|
def ask_human_via_console(self, question: str) -> str:
|
|
28
33
|
r"""Use this tool to ask a question to the user when you are stuck,
|
|
@@ -48,21 +53,21 @@ class HumanToolkit(BaseToolkit):
|
|
|
48
53
|
return reply
|
|
49
54
|
|
|
50
55
|
def send_message_to_user(self, message: str) -> None:
|
|
51
|
-
r"""Use this tool to send a message to the user
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
-
|
|
58
|
-
|
|
59
|
-
-
|
|
60
|
-
|
|
61
|
-
-
|
|
62
|
-
task.
|
|
56
|
+
r"""Use this tool to send a tidy message to the user in one short
|
|
57
|
+
sentence.
|
|
58
|
+
|
|
59
|
+
This one-way tool keeps the user informed about your progress,
|
|
60
|
+
decisions, or actions. It does not require a response.
|
|
61
|
+
You should use it to:
|
|
62
|
+
- Announce what you are about to do (e.g., "I will now search for
|
|
63
|
+
papers on GUI Agents.").
|
|
64
|
+
- Report the result of an action (e.g., "I have found 15 relevant
|
|
65
|
+
papers.").
|
|
66
|
+
- State a decision (e.g., "I will now analyze the top 10 papers.").
|
|
67
|
+
- Give a status update during a long-running task.
|
|
63
68
|
|
|
64
69
|
Args:
|
|
65
|
-
message (str): The
|
|
70
|
+
message (str): The tidy and informative message for the user.
|
|
66
71
|
"""
|
|
67
72
|
print(f"\nAgent Message:\n{message}")
|
|
68
73
|
logger.info(f"\nAgent Message:\n{message}")
|
|
@@ -11,8 +11,8 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
-
from .
|
|
14
|
+
from .hybrid_browser_toolkit import HybridBrowserToolkit
|
|
15
15
|
|
|
16
16
|
__all__ = [
|
|
17
|
-
"
|
|
17
|
+
"HybridBrowserToolkit",
|
|
18
18
|
]
|
|
@@ -24,6 +24,7 @@ class ActionExecutor:
|
|
|
24
24
|
# Configuration constants
|
|
25
25
|
DEFAULT_TIMEOUT = 5000 # 5 seconds
|
|
26
26
|
SHORT_TIMEOUT = 2000 # 2 seconds
|
|
27
|
+
MAX_SCROLL_AMOUNT = 5000 # Maximum scroll distance in pixels
|
|
27
28
|
|
|
28
29
|
def __init__(self, page: "Page"):
|
|
29
30
|
self.page = page
|
|
@@ -32,6 +33,7 @@ class ActionExecutor:
|
|
|
32
33
|
# Public helpers
|
|
33
34
|
# ------------------------------------------------------------------
|
|
34
35
|
async def execute(self, action: Dict[str, Any]) -> str:
|
|
36
|
+
r"""Execute an action and return the result description."""
|
|
35
37
|
if not action:
|
|
36
38
|
return "No action to execute"
|
|
37
39
|
|
|
@@ -64,32 +66,46 @@ class ActionExecutor:
|
|
|
64
66
|
# Internal handlers
|
|
65
67
|
# ------------------------------------------------------------------
|
|
66
68
|
async def _click(self, action: Dict[str, Any]) -> str:
|
|
69
|
+
r"""Handle click actions with multiple fallback strategies."""
|
|
67
70
|
ref = action.get("ref")
|
|
68
71
|
text = action.get("text")
|
|
69
72
|
selector = action.get("selector")
|
|
70
73
|
if not (ref or text or selector):
|
|
71
74
|
return "Error: click requires ref/text/selector"
|
|
72
75
|
|
|
76
|
+
# Build strategies in priority order: ref > selector > text
|
|
73
77
|
strategies = []
|
|
78
|
+
if ref:
|
|
79
|
+
strategies.append(f"[aria-ref='{ref}']")
|
|
74
80
|
if selector:
|
|
75
81
|
strategies.append(selector)
|
|
76
82
|
if text:
|
|
77
83
|
strategies.append(f'text="{text}"')
|
|
78
|
-
if ref:
|
|
79
|
-
strategies.append(f"[aria-ref='{ref}']")
|
|
80
84
|
|
|
85
|
+
# Strategy 1: Try Playwright force click for each selector
|
|
81
86
|
for sel in strategies:
|
|
82
87
|
try:
|
|
83
88
|
if await self.page.locator(sel).count() > 0:
|
|
84
89
|
await self.page.click(
|
|
85
|
-
sel, timeout=self.
|
|
90
|
+
sel, timeout=self.DEFAULT_TIMEOUT, force=True
|
|
86
91
|
)
|
|
87
|
-
return f"Clicked element via {sel}"
|
|
92
|
+
return f"Clicked element via force: {sel}"
|
|
88
93
|
except Exception:
|
|
89
|
-
|
|
90
|
-
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
# Strategy 2: Try JavaScript click as fallback
|
|
97
|
+
for sel in strategies:
|
|
98
|
+
try:
|
|
99
|
+
await self.page.locator(sel).first.evaluate("el => el.click()")
|
|
100
|
+
await asyncio.sleep(0.1) # Brief wait for effects
|
|
101
|
+
return f"Clicked element via JS: {sel}"
|
|
102
|
+
except Exception:
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
return "Error: All click strategies failed"
|
|
91
106
|
|
|
92
107
|
async def _type(self, action: Dict[str, Any]) -> str:
|
|
108
|
+
r"""Handle typing text into input fields."""
|
|
93
109
|
ref = action.get("ref")
|
|
94
110
|
selector = action.get("selector")
|
|
95
111
|
text = action.get("text", "")
|
|
@@ -103,6 +119,7 @@ class ActionExecutor:
|
|
|
103
119
|
return f"Type failed: {exc}"
|
|
104
120
|
|
|
105
121
|
async def _select(self, action: Dict[str, Any]) -> str:
|
|
122
|
+
r"""Handle selecting options from dropdowns."""
|
|
106
123
|
ref = action.get("ref")
|
|
107
124
|
selector = action.get("selector")
|
|
108
125
|
value = action.get("value", "")
|
|
@@ -118,8 +135,9 @@ class ActionExecutor:
|
|
|
118
135
|
return f"Select failed: {exc}"
|
|
119
136
|
|
|
120
137
|
async def _wait(self, action: Dict[str, Any]) -> str:
|
|
138
|
+
r"""Handle wait actions."""
|
|
121
139
|
if "timeout" in action:
|
|
122
|
-
ms = action["timeout"]
|
|
140
|
+
ms = int(action["timeout"])
|
|
123
141
|
await asyncio.sleep(ms / 1000)
|
|
124
142
|
return f"Waited {ms}ms"
|
|
125
143
|
if "selector" in action:
|
|
@@ -131,6 +149,7 @@ class ActionExecutor:
|
|
|
131
149
|
return "Error: wait requires timeout/selector"
|
|
132
150
|
|
|
133
151
|
async def _extract(self, action: Dict[str, Any]) -> str:
|
|
152
|
+
r"""Handle text extraction from elements."""
|
|
134
153
|
ref = action.get("ref")
|
|
135
154
|
if not ref:
|
|
136
155
|
return "Error: extract requires ref"
|
|
@@ -140,6 +159,7 @@ class ActionExecutor:
|
|
|
140
159
|
return f"Extracted: {txt[:100] if txt else 'None'}"
|
|
141
160
|
|
|
142
161
|
async def _scroll(self, action: Dict[str, Any]) -> str:
|
|
162
|
+
r"""Handle page scrolling with safe parameter validation."""
|
|
143
163
|
direction = action.get("direction", "down")
|
|
144
164
|
amount = action.get("amount", 300)
|
|
145
165
|
|
|
@@ -151,18 +171,22 @@ class ActionExecutor:
|
|
|
151
171
|
# Safely convert amount to integer and clamp to reasonable range
|
|
152
172
|
amount_int = int(amount)
|
|
153
173
|
amount_int = max(
|
|
154
|
-
-
|
|
155
|
-
|
|
174
|
+
-self.MAX_SCROLL_AMOUNT,
|
|
175
|
+
min(self.MAX_SCROLL_AMOUNT, amount_int),
|
|
176
|
+
) # Clamp to MAX_SCROLL_AMOUNT range
|
|
156
177
|
except (ValueError, TypeError):
|
|
157
178
|
return "Error: amount must be a valid number"
|
|
158
179
|
|
|
159
180
|
# Use safe evaluation with bound parameters
|
|
160
181
|
scroll_offset = amount_int if direction == "down" else -amount_int
|
|
161
|
-
await self.page.evaluate(
|
|
182
|
+
await self.page.evaluate(
|
|
183
|
+
"offset => window.scrollBy(0, offset)", scroll_offset
|
|
184
|
+
)
|
|
162
185
|
await asyncio.sleep(0.5)
|
|
163
186
|
return f"Scrolled {direction} by {abs(amount_int)}px"
|
|
164
187
|
|
|
165
188
|
async def _enter(self, action: Dict[str, Any]) -> str:
|
|
189
|
+
r"""Handle Enter key press actions."""
|
|
166
190
|
ref = action.get("ref")
|
|
167
191
|
selector = action.get("selector")
|
|
168
192
|
if ref:
|
|
@@ -175,16 +199,28 @@ class ActionExecutor:
|
|
|
175
199
|
|
|
176
200
|
# utilities
|
|
177
201
|
async def _wait_dom_stable(self) -> None:
|
|
202
|
+
r"""Wait for DOM to become stable before executing actions."""
|
|
178
203
|
try:
|
|
204
|
+
# Wait for basic DOM content loading
|
|
179
205
|
await self.page.wait_for_load_state(
|
|
180
206
|
'domcontentloaded', timeout=self.SHORT_TIMEOUT
|
|
181
207
|
)
|
|
208
|
+
|
|
209
|
+
# Try to wait for network idle briefly
|
|
210
|
+
try:
|
|
211
|
+
await self.page.wait_for_load_state(
|
|
212
|
+
'networkidle', timeout=self.SHORT_TIMEOUT
|
|
213
|
+
)
|
|
214
|
+
except Exception:
|
|
215
|
+
pass # Network idle is optional
|
|
216
|
+
|
|
182
217
|
except Exception:
|
|
183
|
-
pass
|
|
218
|
+
pass # Don't fail if wait times out
|
|
184
219
|
|
|
185
220
|
# static helpers
|
|
186
221
|
@staticmethod
|
|
187
222
|
def should_update_snapshot(action: Dict[str, Any]) -> bool:
|
|
223
|
+
r"""Determine if an action requires a snapshot update."""
|
|
188
224
|
change_types = {
|
|
189
225
|
"click",
|
|
190
226
|
"type",
|
|
@@ -12,24 +12,24 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
import json
|
|
15
|
-
import logging
|
|
16
15
|
import re
|
|
17
16
|
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
18
17
|
|
|
18
|
+
from camel.logger import get_logger
|
|
19
19
|
from camel.models import BaseModelBackend, ModelFactory
|
|
20
20
|
from camel.types import ModelPlatformType, ModelType
|
|
21
21
|
|
|
22
22
|
from .actions import ActionExecutor
|
|
23
|
-
from .
|
|
23
|
+
from .browser_session import NVBrowserSession
|
|
24
24
|
|
|
25
25
|
if TYPE_CHECKING:
|
|
26
26
|
from camel.agents import ChatAgent
|
|
27
27
|
|
|
28
|
-
logger =
|
|
28
|
+
logger = get_logger(__name__)
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
class PlaywrightLLMAgent:
|
|
32
|
-
"""High-level orchestration: snapshot ↔ LLM ↔ action executor."""
|
|
32
|
+
r"""High-level orchestration: snapshot ↔ LLM ↔ action executor."""
|
|
33
33
|
|
|
34
34
|
# System prompt as class constant to avoid recreation
|
|
35
35
|
SYSTEM_PROMPT = """
|
|
@@ -90,8 +90,8 @@ what was accomplished
|
|
|
90
90
|
self.action_history: List[Dict[str, Any]] = []
|
|
91
91
|
if model_backend is None:
|
|
92
92
|
model_backend = ModelFactory.create(
|
|
93
|
-
model_platform=ModelPlatformType.
|
|
94
|
-
model_type=ModelType.
|
|
93
|
+
model_platform=ModelPlatformType.DEFAULT,
|
|
94
|
+
model_type=ModelType.DEFAULT,
|
|
95
95
|
model_config_dict={"temperature": 0, "top_p": 1},
|
|
96
96
|
)
|
|
97
97
|
self.model_backend = model_backend
|
|
@@ -99,16 +99,19 @@ what was accomplished
|
|
|
99
99
|
self._chat_agent: Optional[ChatAgent] = None
|
|
100
100
|
|
|
101
101
|
async def navigate(self, url: str) -> str:
|
|
102
|
+
r"""Navigate to a URL and return the snapshot."""
|
|
102
103
|
try:
|
|
103
104
|
# NVBrowserSession handles waits internally
|
|
104
105
|
logger.debug("Navigated to URL: %s", url)
|
|
105
106
|
await self._session.visit(url)
|
|
106
107
|
return await self._session.get_snapshot(force_refresh=True)
|
|
107
108
|
except Exception as exc:
|
|
108
|
-
|
|
109
|
+
error_msg = f"Error: could not navigate to {url} - {exc}"
|
|
110
|
+
logger.error(error_msg)
|
|
111
|
+
return error_msg
|
|
109
112
|
|
|
110
113
|
def _get_chat_agent(self) -> "ChatAgent":
|
|
111
|
-
"""Get or create the ChatAgent instance."""
|
|
114
|
+
r"""Get or create the ChatAgent instance."""
|
|
112
115
|
from camel.agents import ChatAgent
|
|
113
116
|
|
|
114
117
|
if self._chat_agent is None:
|
|
@@ -165,12 +168,16 @@ what was accomplished
|
|
|
165
168
|
logger.warning(
|
|
166
169
|
"Could not parse JSON from LLM response: %s", content[:200]
|
|
167
170
|
)
|
|
171
|
+
return self._get_fallback_response("Parsing error")
|
|
172
|
+
|
|
173
|
+
def _get_fallback_response(self, error_msg: str) -> Dict[str, Any]:
|
|
174
|
+
r"""Generate a fallback response structure."""
|
|
168
175
|
return {
|
|
169
|
-
"plan": ["Could not parse response"],
|
|
176
|
+
"plan": [f"Could not parse response: {error_msg}"],
|
|
170
177
|
"action": {
|
|
171
178
|
"type": "finish",
|
|
172
179
|
"ref": None,
|
|
173
|
-
"summary": "Parsing error",
|
|
180
|
+
"summary": f"Parsing error: {error_msg}",
|
|
174
181
|
},
|
|
175
182
|
}
|
|
176
183
|
|
|
@@ -181,7 +188,7 @@ what was accomplished
|
|
|
181
188
|
is_initial: bool,
|
|
182
189
|
history: Optional[List[Dict[str, Any]]] = None,
|
|
183
190
|
) -> Dict[str, Any]:
|
|
184
|
-
"""Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
|
|
191
|
+
r"""Call the LLM (via CAMEL ChatAgent) to get plan & next action."""
|
|
185
192
|
# Build user message
|
|
186
193
|
if is_initial:
|
|
187
194
|
user_content = f"Snapshot:\n{snapshot}\n\nTask: {prompt}"
|
|
@@ -208,6 +215,7 @@ what was accomplished
|
|
|
208
215
|
return self._safe_parse_json(content)
|
|
209
216
|
|
|
210
217
|
async def process_command(self, prompt: str, max_steps: int = 15):
|
|
218
|
+
r"""Process a command using LLM-guided browser automation."""
|
|
211
219
|
# initial full snapshot
|
|
212
220
|
full_snapshot = await self._session.get_snapshot()
|
|
213
221
|
assert self._session.snapshot is not None
|
|
@@ -270,9 +278,11 @@ what was accomplished
|
|
|
270
278
|
logger.info("Process completed with %d steps", steps)
|
|
271
279
|
|
|
272
280
|
async def _run_action(self, action: Dict[str, Any]) -> str:
|
|
281
|
+
r"""Execute a single action and return the result."""
|
|
273
282
|
if action.get("type") == "navigate":
|
|
274
283
|
return await self.navigate(action.get("url", ""))
|
|
275
284
|
return await self._session.exec_action(action)
|
|
276
285
|
|
|
277
286
|
async def close(self):
|
|
287
|
+
r"""Clean up browser session and resources."""
|
|
278
288
|
await self._session.close()
|
|
@@ -57,13 +57,12 @@ class NVBrowserSession:
|
|
|
57
57
|
|
|
58
58
|
def __new__(
|
|
59
59
|
cls, *, headless: bool = True, user_data_dir: Optional[str] = None
|
|
60
|
-
):
|
|
61
|
-
loop
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
return cls._sessions[loop]
|
|
60
|
+
) -> "NVBrowserSession":
|
|
61
|
+
# Defer event loop lookup until we actually need it
|
|
62
|
+
# This allows creation outside of async context
|
|
63
|
+
instance = super().__new__(cls)
|
|
64
|
+
instance._initialized = False
|
|
65
|
+
return instance
|
|
67
66
|
|
|
68
67
|
def __init__(
|
|
69
68
|
self, *, headless: bool = True, user_data_dir: Optional[str] = None
|
|
@@ -90,6 +89,47 @@ class NVBrowserSession:
|
|
|
90
89
|
# Browser lifecycle helpers
|
|
91
90
|
# ------------------------------------------------------------------
|
|
92
91
|
async def ensure_browser(self) -> None:
|
|
92
|
+
r"""Ensure browser is ready, implementing singleton pattern per event
|
|
93
|
+
loop.
|
|
94
|
+
"""
|
|
95
|
+
# Check if we need to reuse or create a session for this event loop
|
|
96
|
+
try:
|
|
97
|
+
loop = asyncio.get_running_loop()
|
|
98
|
+
except RuntimeError as e:
|
|
99
|
+
raise RuntimeError(
|
|
100
|
+
"ensure_browser() must be called from within an async context"
|
|
101
|
+
) from e
|
|
102
|
+
|
|
103
|
+
# Check if there's already a session for this loop
|
|
104
|
+
if loop in self._sessions and self._sessions[loop] is not self:
|
|
105
|
+
# Copy the existing session's browser resources
|
|
106
|
+
existing = self._sessions[loop]
|
|
107
|
+
# Wait for existing session to be fully initialized
|
|
108
|
+
async with existing._ensure_lock:
|
|
109
|
+
if (
|
|
110
|
+
existing._initialized
|
|
111
|
+
and existing._page is not None
|
|
112
|
+
and existing._playwright is not None
|
|
113
|
+
):
|
|
114
|
+
try:
|
|
115
|
+
# Verify the page is still responsive
|
|
116
|
+
await existing._page.title()
|
|
117
|
+
self._playwright = existing._playwright
|
|
118
|
+
self._browser = existing._browser
|
|
119
|
+
self._context = existing._context
|
|
120
|
+
self._page = existing._page
|
|
121
|
+
self.snapshot = existing.snapshot
|
|
122
|
+
self.executor = existing.executor
|
|
123
|
+
self._initialized = True
|
|
124
|
+
return
|
|
125
|
+
except Exception:
|
|
126
|
+
# Existing session is broken, continue with new
|
|
127
|
+
# initialization
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
# Register this instance for the current loop
|
|
131
|
+
self._sessions[loop] = self
|
|
132
|
+
|
|
93
133
|
# Serialise initialisation to avoid race conditions where multiple
|
|
94
134
|
# concurrent coroutine calls create multiple browser instances for
|
|
95
135
|
# the same NVBrowserSession.
|
|
@@ -98,6 +138,7 @@ class NVBrowserSession:
|
|
|
98
138
|
|
|
99
139
|
# Moved original logic to helper
|
|
100
140
|
async def _ensure_browser_inner(self) -> None:
|
|
141
|
+
r"""Internal browser initialization logic."""
|
|
101
142
|
from playwright.async_api import async_playwright
|
|
102
143
|
|
|
103
144
|
if self._page is not None:
|
|
@@ -144,11 +185,23 @@ class NVBrowserSession:
|
|
|
144
185
|
r"""Close all browser resources, ensuring cleanup even if some
|
|
145
186
|
operations fail.
|
|
146
187
|
"""
|
|
147
|
-
#
|
|
148
|
-
|
|
188
|
+
# Remove this session from the sessions dict and close resources
|
|
189
|
+
try:
|
|
190
|
+
loop = asyncio.get_running_loop()
|
|
191
|
+
if loop in self._sessions and self._sessions[loop] is self:
|
|
192
|
+
del self._sessions[loop]
|
|
193
|
+
except RuntimeError:
|
|
194
|
+
pass # No running loop, that's okay
|
|
195
|
+
|
|
196
|
+
# Clean up any stale loop references
|
|
197
|
+
stale_loops = [loop for loop in self._sessions if loop.is_closed()]
|
|
198
|
+
for loop in stale_loops:
|
|
199
|
+
del self._sessions[loop]
|
|
200
|
+
|
|
149
201
|
await self._close_session()
|
|
150
202
|
|
|
151
203
|
async def _close_session(self) -> None:
|
|
204
|
+
r"""Internal session cleanup with comprehensive error handling."""
|
|
152
205
|
errors: list[str] = []
|
|
153
206
|
|
|
154
207
|
# Close context first (which closes pages)
|
|
@@ -204,6 +257,7 @@ class NVBrowserSession:
|
|
|
204
257
|
# Convenience wrappers around common actions
|
|
205
258
|
# ------------------------------------------------------------------
|
|
206
259
|
async def visit(self, url: str) -> str:
|
|
260
|
+
r"""Navigate to a URL with proper error handling."""
|
|
207
261
|
await self.ensure_browser()
|
|
208
262
|
assert self._page is not None
|
|
209
263
|
|
|
@@ -233,7 +287,7 @@ class NVBrowserSession:
|
|
|
233
287
|
force_refresh=force_refresh, diff_only=diff_only
|
|
234
288
|
)
|
|
235
289
|
|
|
236
|
-
async def exec_action(self, action:
|
|
290
|
+
async def exec_action(self, action: Dict[str, Any]) -> str:
|
|
237
291
|
await self.ensure_browser()
|
|
238
292
|
assert self.executor is not None
|
|
239
293
|
return await self.executor.execute(action)
|