camel-ai 0.2.71a1__py3-none-any.whl → 0.2.71a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +61 -3
- camel/messages/func_message.py +32 -5
- camel/societies/workforce/single_agent_worker.py +1 -5
- camel/societies/workforce/workforce.py +68 -8
- camel/tasks/task.py +2 -2
- camel/toolkits/craw4ai_toolkit.py +27 -7
- camel/toolkits/file_write_toolkit.py +110 -31
- camel/toolkits/human_toolkit.py +19 -14
- camel/toolkits/jina_reranker_toolkit.py +3 -4
- camel/toolkits/terminal_toolkit.py +189 -48
- camel/toolkits/video_download_toolkit.py +1 -2
- camel/utils/message_summarizer.py +148 -0
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a2.dist-info}/METADATA +4 -4
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a2.dist-info}/RECORD +17 -16
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.71a1.dist-info → camel_ai-0.2.71a2.dist-info}/licenses/LICENSE +0 -0
camel/__init__.py
CHANGED
camel/agents/chat_agent.py
CHANGED
|
@@ -13,10 +13,12 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
|
+
import asyncio
|
|
16
17
|
import json
|
|
17
18
|
import logging
|
|
18
19
|
import textwrap
|
|
19
20
|
import threading
|
|
21
|
+
import time
|
|
20
22
|
import uuid
|
|
21
23
|
from collections import defaultdict
|
|
22
24
|
from pathlib import Path
|
|
@@ -173,6 +175,11 @@ class ChatAgent(BaseAgent):
|
|
|
173
175
|
stop_event (Optional[threading.Event], optional): Event to signal
|
|
174
176
|
termination of the agent's operation. When set, the agent will
|
|
175
177
|
terminate its execution. (default: :obj:`None`)
|
|
178
|
+
mask_tool_output (Optional[bool]): Whether to return a sanitized
|
|
179
|
+
placeholder instead of the raw tool output. (default: :obj:`False`)
|
|
180
|
+
pause_event (Optional[asyncio.Event]): Event to signal pause of the
|
|
181
|
+
agent's operation. When clear, the agent will pause its execution.
|
|
182
|
+
(default: :obj:`None`)
|
|
176
183
|
"""
|
|
177
184
|
|
|
178
185
|
def __init__(
|
|
@@ -206,6 +213,8 @@ class ChatAgent(BaseAgent):
|
|
|
206
213
|
max_iteration: Optional[int] = None,
|
|
207
214
|
agent_id: Optional[str] = None,
|
|
208
215
|
stop_event: Optional[threading.Event] = None,
|
|
216
|
+
mask_tool_output: bool = False,
|
|
217
|
+
pause_event: Optional[asyncio.Event] = None,
|
|
209
218
|
) -> None:
|
|
210
219
|
if isinstance(model, ModelManager):
|
|
211
220
|
self.model_backend = model
|
|
@@ -280,6 +289,9 @@ class ChatAgent(BaseAgent):
|
|
|
280
289
|
self.response_terminators = response_terminators or []
|
|
281
290
|
self.max_iteration = max_iteration
|
|
282
291
|
self.stop_event = stop_event
|
|
292
|
+
self.mask_tool_output = mask_tool_output
|
|
293
|
+
self._secure_result_store: Dict[str, Any] = {}
|
|
294
|
+
self.pause_event = pause_event
|
|
283
295
|
|
|
284
296
|
def reset(self):
|
|
285
297
|
r"""Resets the :obj:`ChatAgent` to its initial state."""
|
|
@@ -1143,6 +1155,10 @@ class ChatAgent(BaseAgent):
|
|
|
1143
1155
|
iteration_count = 0
|
|
1144
1156
|
|
|
1145
1157
|
while True:
|
|
1158
|
+
if self.pause_event is not None and not self.pause_event.is_set():
|
|
1159
|
+
while not self.pause_event.is_set():
|
|
1160
|
+
time.sleep(0.001)
|
|
1161
|
+
|
|
1146
1162
|
try:
|
|
1147
1163
|
openai_messages, num_tokens = self.memory.get_context()
|
|
1148
1164
|
accumulated_context_tokens += num_tokens
|
|
@@ -1184,6 +1200,12 @@ class ChatAgent(BaseAgent):
|
|
|
1184
1200
|
external_tool_call_requests = []
|
|
1185
1201
|
external_tool_call_requests.append(tool_call_request)
|
|
1186
1202
|
else:
|
|
1203
|
+
if (
|
|
1204
|
+
self.pause_event is not None
|
|
1205
|
+
and not self.pause_event.is_set()
|
|
1206
|
+
):
|
|
1207
|
+
while not self.pause_event.is_set():
|
|
1208
|
+
time.sleep(0.001)
|
|
1187
1209
|
tool_call_records.append(
|
|
1188
1210
|
self._execute_tool(tool_call_request)
|
|
1189
1211
|
)
|
|
@@ -1287,6 +1309,8 @@ class ChatAgent(BaseAgent):
|
|
|
1287
1309
|
step_token_usage = self._create_token_usage_tracker()
|
|
1288
1310
|
iteration_count = 0
|
|
1289
1311
|
while True:
|
|
1312
|
+
if self.pause_event is not None and not self.pause_event.is_set():
|
|
1313
|
+
await self.pause_event.wait()
|
|
1290
1314
|
try:
|
|
1291
1315
|
openai_messages, num_tokens = self.memory.get_context()
|
|
1292
1316
|
accumulated_context_tokens += num_tokens
|
|
@@ -1328,6 +1352,11 @@ class ChatAgent(BaseAgent):
|
|
|
1328
1352
|
external_tool_call_requests = []
|
|
1329
1353
|
external_tool_call_requests.append(tool_call_request)
|
|
1330
1354
|
else:
|
|
1355
|
+
if (
|
|
1356
|
+
self.pause_event is not None
|
|
1357
|
+
and not self.pause_event.is_set()
|
|
1358
|
+
):
|
|
1359
|
+
await self.pause_event.wait()
|
|
1331
1360
|
tool_call_record = await self._aexecute_tool(
|
|
1332
1361
|
tool_call_request
|
|
1333
1362
|
)
|
|
@@ -1958,14 +1987,27 @@ class ChatAgent(BaseAgent):
|
|
|
1958
1987
|
tool_call_id = tool_call_request.tool_call_id
|
|
1959
1988
|
tool = self._internal_tools[func_name]
|
|
1960
1989
|
try:
|
|
1961
|
-
|
|
1990
|
+
raw_result = tool(**args)
|
|
1991
|
+
if self.mask_tool_output:
|
|
1992
|
+
self._secure_result_store[tool_call_id] = raw_result
|
|
1993
|
+
result = (
|
|
1994
|
+
"[The tool has been executed successfully, but the output"
|
|
1995
|
+
" from the tool is masked. You can move forward]"
|
|
1996
|
+
)
|
|
1997
|
+
mask_flag = True
|
|
1998
|
+
else:
|
|
1999
|
+
result = raw_result
|
|
2000
|
+
mask_flag = False
|
|
1962
2001
|
except Exception as e:
|
|
1963
2002
|
# Capture the error message to prevent framework crash
|
|
1964
2003
|
error_msg = f"Error executing tool '{func_name}': {e!s}"
|
|
1965
|
-
result =
|
|
2004
|
+
result = f"Tool execution failed: {error_msg}"
|
|
2005
|
+
mask_flag = False
|
|
1966
2006
|
logging.warning(error_msg)
|
|
1967
2007
|
|
|
1968
|
-
return self._record_tool_calling(
|
|
2008
|
+
return self._record_tool_calling(
|
|
2009
|
+
func_name, args, result, tool_call_id, mask_output=mask_flag
|
|
2010
|
+
)
|
|
1969
2011
|
|
|
1970
2012
|
async def _aexecute_tool(
|
|
1971
2013
|
self,
|
|
@@ -2015,9 +2057,23 @@ class ChatAgent(BaseAgent):
|
|
|
2015
2057
|
args: Dict[str, Any],
|
|
2016
2058
|
result: Any,
|
|
2017
2059
|
tool_call_id: str,
|
|
2060
|
+
mask_output: bool = False,
|
|
2018
2061
|
):
|
|
2019
2062
|
r"""Record the tool calling information in the memory, and return the
|
|
2020
2063
|
tool calling record.
|
|
2064
|
+
|
|
2065
|
+
Args:
|
|
2066
|
+
func_name (str): The name of the tool function called.
|
|
2067
|
+
args (Dict[str, Any]): The arguments passed to the tool.
|
|
2068
|
+
result (Any): The result returned by the tool execution.
|
|
2069
|
+
tool_call_id (str): A unique identifier for the tool call.
|
|
2070
|
+
mask_output (bool, optional): Whether to return a sanitized
|
|
2071
|
+
placeholder instead of the raw tool output.
|
|
2072
|
+
(default: :obj:`False`)
|
|
2073
|
+
|
|
2074
|
+
Returns:
|
|
2075
|
+
ToolCallingRecord: A struct containing information about
|
|
2076
|
+
this tool call.
|
|
2021
2077
|
"""
|
|
2022
2078
|
assist_msg = FunctionCallingMessage(
|
|
2023
2079
|
role_name=self.role_name,
|
|
@@ -2036,6 +2092,7 @@ class ChatAgent(BaseAgent):
|
|
|
2036
2092
|
func_name=func_name,
|
|
2037
2093
|
result=result,
|
|
2038
2094
|
tool_call_id=tool_call_id,
|
|
2095
|
+
mask_output=mask_output,
|
|
2039
2096
|
)
|
|
2040
2097
|
|
|
2041
2098
|
# Use precise timestamps to ensure correct ordering
|
|
@@ -2140,6 +2197,7 @@ class ChatAgent(BaseAgent):
|
|
|
2140
2197
|
),
|
|
2141
2198
|
max_iteration=self.max_iteration,
|
|
2142
2199
|
stop_event=self.stop_event,
|
|
2200
|
+
pause_event=self.pause_event,
|
|
2143
2201
|
)
|
|
2144
2202
|
|
|
2145
2203
|
# Copy memory if requested
|
camel/messages/func_message.py
CHANGED
|
@@ -47,12 +47,16 @@ class FunctionCallingMessage(BaseMessage):
|
|
|
47
47
|
(default: :obj:`None`)
|
|
48
48
|
tool_call_id (Optional[str]): The ID of the tool call, if available.
|
|
49
49
|
(default: :obj:`None`)
|
|
50
|
+
mask_output (Optional[bool]): Whether to return a sanitized placeholder
|
|
51
|
+
instead of the raw tool output.
|
|
52
|
+
(default: :obj:`False`)
|
|
50
53
|
"""
|
|
51
54
|
|
|
52
55
|
func_name: Optional[str] = None
|
|
53
56
|
args: Optional[Dict] = None
|
|
54
57
|
result: Optional[Any] = None
|
|
55
58
|
tool_call_id: Optional[str] = None
|
|
59
|
+
mask_output: Optional[bool] = False
|
|
56
60
|
|
|
57
61
|
def to_openai_message(
|
|
58
62
|
self,
|
|
@@ -105,10 +109,13 @@ class FunctionCallingMessage(BaseMessage):
|
|
|
105
109
|
# This is a function response
|
|
106
110
|
# TODO: Allow for more flexible setting of tool role,
|
|
107
111
|
# optionally to be the same as assistant messages
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
+
if self.mask_output:
|
|
113
|
+
content = "[MASKED]"
|
|
114
|
+
else:
|
|
115
|
+
content = function_format.format_tool_response(
|
|
116
|
+
self.func_name, # type: ignore[arg-type]
|
|
117
|
+
self.result, # type: ignore[arg-type]
|
|
118
|
+
)
|
|
112
119
|
return ShareGPTMessage(from_="tool", value=content) # type: ignore[call-arg]
|
|
113
120
|
|
|
114
121
|
def to_openai_assistant_message(self) -> OpenAIAssistantMessage:
|
|
@@ -154,10 +161,30 @@ class FunctionCallingMessage(BaseMessage):
|
|
|
154
161
|
" due to missing function name."
|
|
155
162
|
)
|
|
156
163
|
|
|
157
|
-
|
|
164
|
+
if self.mask_output:
|
|
165
|
+
result_content = "[MASKED]"
|
|
166
|
+
else:
|
|
167
|
+
result_content = str(self.result)
|
|
158
168
|
|
|
159
169
|
return {
|
|
160
170
|
"role": "tool",
|
|
161
171
|
"content": result_content,
|
|
162
172
|
"tool_call_id": self.tool_call_id or "null",
|
|
163
173
|
}
|
|
174
|
+
|
|
175
|
+
def to_dict(self) -> Dict:
|
|
176
|
+
r"""Converts the message to a dictionary.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
dict: The converted dictionary.
|
|
180
|
+
"""
|
|
181
|
+
base = super().to_dict()
|
|
182
|
+
base["func_name"] = self.func_name
|
|
183
|
+
if self.args is not None:
|
|
184
|
+
base["args"] = self.args
|
|
185
|
+
if self.result is not None:
|
|
186
|
+
base["result"] = self.result
|
|
187
|
+
if self.tool_call_id is not None:
|
|
188
|
+
base["tool_call_id"] = self.tool_call_id
|
|
189
|
+
base["mask_output"] = self.mask_output
|
|
190
|
+
return base
|
|
@@ -43,8 +43,6 @@ class AgentPool:
|
|
|
43
43
|
(default: :obj:`10`)
|
|
44
44
|
auto_scale (bool): Whether to automatically scale the pool size.
|
|
45
45
|
(default: :obj:`True`)
|
|
46
|
-
scale_factor (float): Factor by which to scale the pool when needed.
|
|
47
|
-
(default: :obj:`1.5`)
|
|
48
46
|
idle_timeout (float): Time in seconds after which idle agents are
|
|
49
47
|
removed. (default: :obj:`180.0`)
|
|
50
48
|
"""
|
|
@@ -55,13 +53,11 @@ class AgentPool:
|
|
|
55
53
|
initial_size: int = 1,
|
|
56
54
|
max_size: int = 10,
|
|
57
55
|
auto_scale: bool = True,
|
|
58
|
-
scale_factor: float = 1.5,
|
|
59
56
|
idle_timeout: float = 180.0, # 3 minutes
|
|
60
57
|
):
|
|
61
58
|
self.base_agent = base_agent
|
|
62
59
|
self.max_size = max_size
|
|
63
60
|
self.auto_scale = auto_scale
|
|
64
|
-
self.scale_factor = scale_factor
|
|
65
61
|
self.idle_timeout = idle_timeout
|
|
66
62
|
|
|
67
63
|
# Pool management
|
|
@@ -332,7 +328,7 @@ class SingleAgentWorker(Worker):
|
|
|
332
328
|
# Store the actual token usage for this specific task
|
|
333
329
|
task.additional_info["token_usage"] = {"total_tokens": total_tokens}
|
|
334
330
|
|
|
335
|
-
print(f"======\n{Fore.GREEN}
|
|
331
|
+
print(f"======\n{Fore.GREEN}Response from {self}:{Fore.RESET}")
|
|
336
332
|
|
|
337
333
|
try:
|
|
338
334
|
result_dict = json.loads(response.msg.content)
|
|
@@ -395,6 +395,40 @@ class Workforce(BaseNode):
|
|
|
395
395
|
"better context continuity during task handoffs."
|
|
396
396
|
)
|
|
397
397
|
|
|
398
|
+
# ------------------------------------------------------------------
|
|
399
|
+
# Helper for propagating pause control to externally supplied agents
|
|
400
|
+
# ------------------------------------------------------------------
|
|
401
|
+
|
|
402
|
+
def _attach_pause_event_to_agent(self, agent: ChatAgent) -> None:
|
|
403
|
+
r"""Ensure the given ChatAgent shares this workforce's pause_event.
|
|
404
|
+
|
|
405
|
+
If the agent already has a different pause_event we overwrite it and
|
|
406
|
+
emit a debug log (it is unlikely an agent needs multiple independent
|
|
407
|
+
pause controls once managed by this workforce)."""
|
|
408
|
+
try:
|
|
409
|
+
existing_pause_event = getattr(agent, "pause_event", None)
|
|
410
|
+
if existing_pause_event is not self._pause_event:
|
|
411
|
+
if existing_pause_event is not None:
|
|
412
|
+
logger.debug(
|
|
413
|
+
f"Overriding pause_event for agent {agent.agent_id} "
|
|
414
|
+
f"(had different pause_event: "
|
|
415
|
+
f"{id(existing_pause_event)} "
|
|
416
|
+
f"-> {id(self._pause_event)})"
|
|
417
|
+
)
|
|
418
|
+
agent.pause_event = self._pause_event
|
|
419
|
+
except AttributeError:
|
|
420
|
+
# Should not happen, but guard against unexpected objects
|
|
421
|
+
logger.warning(
|
|
422
|
+
f"Cannot attach pause_event to object {type(agent)} - "
|
|
423
|
+
f"missing pause_event attribute"
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
def _ensure_pause_event_in_kwargs(self, kwargs: Optional[Dict]) -> Dict:
|
|
427
|
+
r"""Insert pause_event into kwargs dict for ChatAgent construction."""
|
|
428
|
+
new_kwargs = dict(kwargs) if kwargs else {}
|
|
429
|
+
new_kwargs.setdefault("pause_event", self._pause_event)
|
|
430
|
+
return new_kwargs
|
|
431
|
+
|
|
398
432
|
def __repr__(self):
|
|
399
433
|
return (
|
|
400
434
|
f"Workforce {self.node_id} ({self.description}) - "
|
|
@@ -1138,6 +1172,9 @@ class Workforce(BaseNode):
|
|
|
1138
1172
|
Returns:
|
|
1139
1173
|
Workforce: The workforce node itself.
|
|
1140
1174
|
"""
|
|
1175
|
+
# Ensure the worker agent shares this workforce's pause control
|
|
1176
|
+
self._attach_pause_event_to_agent(worker)
|
|
1177
|
+
|
|
1141
1178
|
worker_node = SingleAgentWorker(
|
|
1142
1179
|
description=description,
|
|
1143
1180
|
worker=worker,
|
|
@@ -1184,6 +1221,18 @@ class Workforce(BaseNode):
|
|
|
1184
1221
|
Returns:
|
|
1185
1222
|
Workforce: The workforce node itself.
|
|
1186
1223
|
"""
|
|
1224
|
+
# Ensure provided kwargs carry pause_event so that internally created
|
|
1225
|
+
# ChatAgents (assistant/user/summarizer) inherit it.
|
|
1226
|
+
assistant_agent_kwargs = self._ensure_pause_event_in_kwargs(
|
|
1227
|
+
assistant_agent_kwargs
|
|
1228
|
+
)
|
|
1229
|
+
user_agent_kwargs = self._ensure_pause_event_in_kwargs(
|
|
1230
|
+
user_agent_kwargs
|
|
1231
|
+
)
|
|
1232
|
+
summarize_agent_kwargs = self._ensure_pause_event_in_kwargs(
|
|
1233
|
+
summarize_agent_kwargs
|
|
1234
|
+
)
|
|
1235
|
+
|
|
1187
1236
|
worker_node = RolePlayingWorker(
|
|
1188
1237
|
description=description,
|
|
1189
1238
|
assistant_role_name=assistant_role_name,
|
|
@@ -1212,6 +1261,9 @@ class Workforce(BaseNode):
|
|
|
1212
1261
|
Returns:
|
|
1213
1262
|
Workforce: The workforce node itself.
|
|
1214
1263
|
"""
|
|
1264
|
+
# Align child workforce's pause_event with this one for unified
|
|
1265
|
+
# control of worker agents only.
|
|
1266
|
+
workforce._pause_event = self._pause_event
|
|
1215
1267
|
self._children.append(workforce)
|
|
1216
1268
|
return self
|
|
1217
1269
|
|
|
@@ -1245,14 +1297,17 @@ class Workforce(BaseNode):
|
|
|
1245
1297
|
# Handle asyncio.Event in a thread-safe way
|
|
1246
1298
|
if self._loop and not self._loop.is_closed():
|
|
1247
1299
|
# If we have a loop, use it to set the event safely
|
|
1248
|
-
asyncio.run_coroutine_threadsafe(
|
|
1249
|
-
self._async_reset(), self._loop
|
|
1250
|
-
).result()
|
|
1251
|
-
else:
|
|
1252
1300
|
try:
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1301
|
+
asyncio.run_coroutine_threadsafe(
|
|
1302
|
+
self._async_reset(), self._loop
|
|
1303
|
+
).result()
|
|
1304
|
+
except RuntimeError as e:
|
|
1305
|
+
logger.warning(f"Failed to reset via existing loop: {e}")
|
|
1306
|
+
# Fallback to direct event manipulation
|
|
1307
|
+
self._pause_event.set()
|
|
1308
|
+
else:
|
|
1309
|
+
# No active loop, directly set the event
|
|
1310
|
+
self._pause_event.set()
|
|
1256
1311
|
|
|
1257
1312
|
if hasattr(self, 'metrics_logger') and self.metrics_logger is not None:
|
|
1258
1313
|
self.metrics_logger.reset_task_data()
|
|
@@ -1656,7 +1711,12 @@ class Workforce(BaseNode):
|
|
|
1656
1711
|
model_config_dict={"temperature": 0},
|
|
1657
1712
|
)
|
|
1658
1713
|
|
|
1659
|
-
return ChatAgent(
|
|
1714
|
+
return ChatAgent(
|
|
1715
|
+
worker_sys_msg,
|
|
1716
|
+
model=model,
|
|
1717
|
+
tools=function_list, # type: ignore[arg-type]
|
|
1718
|
+
pause_event=self._pause_event,
|
|
1719
|
+
)
|
|
1660
1720
|
|
|
1661
1721
|
async def _get_returned_task(self) -> Optional[Task]:
|
|
1662
1722
|
r"""Get the task that's published by this node and just get returned
|
camel/tasks/task.py
CHANGED
|
@@ -56,7 +56,7 @@ class TaskValidationMode(Enum):
|
|
|
56
56
|
def validate_task_content(
|
|
57
57
|
content: str,
|
|
58
58
|
task_id: str = "unknown",
|
|
59
|
-
min_length: int =
|
|
59
|
+
min_length: int = 1,
|
|
60
60
|
mode: TaskValidationMode = TaskValidationMode.INPUT,
|
|
61
61
|
check_failure_patterns: bool = True,
|
|
62
62
|
) -> bool:
|
|
@@ -69,7 +69,7 @@ def validate_task_content(
|
|
|
69
69
|
task_id (str): Task ID for logging purposes.
|
|
70
70
|
(default: :obj:`"unknown"`)
|
|
71
71
|
min_length (int): Minimum content length after stripping whitespace.
|
|
72
|
-
(default: :obj:`
|
|
72
|
+
(default: :obj:`1`)
|
|
73
73
|
mode (TaskValidationMode): Validation mode - INPUT for task content,
|
|
74
74
|
OUTPUT for task results. (default: :obj:`TaskValidationMode.INPUT`)
|
|
75
75
|
check_failure_patterns (bool): Whether to check for failure indicators
|
|
@@ -31,6 +31,16 @@ class Crawl4AIToolkit(BaseToolkit):
|
|
|
31
31
|
timeout: Optional[float] = None,
|
|
32
32
|
):
|
|
33
33
|
super().__init__(timeout=timeout)
|
|
34
|
+
self._client = None
|
|
35
|
+
|
|
36
|
+
async def _get_client(self):
|
|
37
|
+
r"""Get or create the AsyncWebCrawler client."""
|
|
38
|
+
if self._client is None:
|
|
39
|
+
from crawl4ai import AsyncWebCrawler
|
|
40
|
+
|
|
41
|
+
self._client = AsyncWebCrawler()
|
|
42
|
+
await self._client.__aenter__()
|
|
43
|
+
return self._client
|
|
34
44
|
|
|
35
45
|
async def scrape(self, url: str) -> str:
|
|
36
46
|
r"""Scrapes a webpage and returns its content.
|
|
@@ -47,19 +57,29 @@ class Crawl4AIToolkit(BaseToolkit):
|
|
|
47
57
|
str: The scraped content of the webpage as a string. If the
|
|
48
58
|
scraping fails, it will return an error message.
|
|
49
59
|
"""
|
|
50
|
-
from crawl4ai import
|
|
60
|
+
from crawl4ai import CrawlerRunConfig
|
|
51
61
|
|
|
52
62
|
try:
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
63
|
+
client = await self._get_client()
|
|
64
|
+
config = CrawlerRunConfig(
|
|
65
|
+
only_text=True,
|
|
66
|
+
)
|
|
67
|
+
content = await client.arun(url, crawler_config=config)
|
|
68
|
+
return str(content.markdown) if content.markdown else ""
|
|
59
69
|
except Exception as e:
|
|
60
70
|
logger.error(f"Error scraping {url}: {e}")
|
|
61
71
|
return f"Error scraping {url}: {e}"
|
|
62
72
|
|
|
73
|
+
async def __aenter__(self):
|
|
74
|
+
"""Async context manager entry."""
|
|
75
|
+
return self
|
|
76
|
+
|
|
77
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
|
78
|
+
"""Async context manager exit - cleanup the client."""
|
|
79
|
+
if self._client is not None:
|
|
80
|
+
await self._client.__aexit__(exc_type, exc_val, exc_tb)
|
|
81
|
+
self._client = None
|
|
82
|
+
|
|
63
83
|
def get_tools(self) -> List[FunctionTool]:
|
|
64
84
|
r"""Returns a list of FunctionTool objects representing the
|
|
65
85
|
functions in the toolkit.
|
|
@@ -146,23 +146,25 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
146
146
|
document.save(str(file_path))
|
|
147
147
|
logger.debug(f"Wrote DOCX to {file_path} with default formatting")
|
|
148
148
|
|
|
149
|
-
@dependencies_required('pylatex', '
|
|
149
|
+
@dependencies_required('pylatex', 'pymupdf')
|
|
150
150
|
def _write_pdf_file(
|
|
151
|
-
self,
|
|
151
|
+
self,
|
|
152
|
+
file_path: Path,
|
|
153
|
+
title: str,
|
|
154
|
+
content: str,
|
|
155
|
+
use_latex: bool = False,
|
|
152
156
|
) -> None:
|
|
153
157
|
r"""Write text content to a PDF file with default formatting.
|
|
154
158
|
|
|
155
159
|
Args:
|
|
156
160
|
file_path (Path): The target file path.
|
|
161
|
+
title (str): The title of the document.
|
|
157
162
|
content (str): The text content to write.
|
|
158
163
|
use_latex (bool): Whether to use LaTeX for rendering. (requires
|
|
159
|
-
LaTeX toolchain). If False, uses
|
|
164
|
+
LaTeX toolchain). If False, uses PyMuPDF for simpler PDF
|
|
160
165
|
generation. (default: :obj:`False`)
|
|
161
|
-
|
|
162
|
-
Raises:
|
|
163
|
-
RuntimeError: If the 'pylatex' or 'fpdf' library is not installed
|
|
164
|
-
when use_latex=True.
|
|
165
166
|
"""
|
|
167
|
+
# TODO: table generation need to be improved
|
|
166
168
|
if use_latex:
|
|
167
169
|
from pylatex import (
|
|
168
170
|
Command,
|
|
@@ -213,30 +215,105 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
213
215
|
|
|
214
216
|
logger.info(f"Wrote PDF (with LaTeX) to {file_path}")
|
|
215
217
|
else:
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
#
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
#
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
218
|
+
import pymupdf
|
|
219
|
+
|
|
220
|
+
# Create a new PDF document
|
|
221
|
+
doc = pymupdf.open()
|
|
222
|
+
|
|
223
|
+
# Add a page
|
|
224
|
+
page = doc.new_page()
|
|
225
|
+
|
|
226
|
+
# Process the content
|
|
227
|
+
lines = content.strip().split('\n')
|
|
228
|
+
document_title = title
|
|
229
|
+
|
|
230
|
+
# Create a TextWriter for writing text to the page
|
|
231
|
+
text_writer = pymupdf.TextWriter(page.rect)
|
|
232
|
+
|
|
233
|
+
# Define fonts
|
|
234
|
+
normal_font = pymupdf.Font(
|
|
235
|
+
"helv"
|
|
236
|
+
) # Standard font with multilingual support
|
|
237
|
+
bold_font = pymupdf.Font("helv")
|
|
238
|
+
|
|
239
|
+
# Start position for text
|
|
240
|
+
y_pos = 50
|
|
241
|
+
x_pos = 50
|
|
242
|
+
|
|
243
|
+
# Add title
|
|
244
|
+
text_writer.fill_textbox(
|
|
245
|
+
pymupdf.Rect(
|
|
246
|
+
x_pos, y_pos, page.rect.width - x_pos, y_pos + 30
|
|
247
|
+
),
|
|
248
|
+
document_title,
|
|
249
|
+
fontsize=16,
|
|
250
|
+
)
|
|
251
|
+
y_pos += 40
|
|
252
|
+
|
|
253
|
+
# Process content
|
|
254
|
+
for line in lines:
|
|
255
|
+
stripped_line = line.strip()
|
|
256
|
+
|
|
257
|
+
# Skip empty lines but add some space
|
|
258
|
+
if not stripped_line:
|
|
259
|
+
y_pos += 10
|
|
260
|
+
continue
|
|
261
|
+
|
|
262
|
+
# Handle headers
|
|
263
|
+
if stripped_line.startswith('## '):
|
|
264
|
+
text_writer.fill_textbox(
|
|
265
|
+
pymupdf.Rect(
|
|
266
|
+
x_pos, y_pos, page.rect.width - x_pos, y_pos + 20
|
|
267
|
+
),
|
|
268
|
+
stripped_line[3:].strip(),
|
|
269
|
+
font=bold_font,
|
|
270
|
+
fontsize=14,
|
|
271
|
+
)
|
|
272
|
+
y_pos += 25
|
|
273
|
+
elif stripped_line.startswith('# '):
|
|
274
|
+
text_writer.fill_textbox(
|
|
275
|
+
pymupdf.Rect(
|
|
276
|
+
x_pos, y_pos, page.rect.width - x_pos, y_pos + 25
|
|
277
|
+
),
|
|
278
|
+
stripped_line[2:].strip(),
|
|
279
|
+
font=bold_font,
|
|
280
|
+
fontsize=16,
|
|
281
|
+
)
|
|
282
|
+
y_pos += 30
|
|
283
|
+
# Handle horizontal rule
|
|
284
|
+
elif stripped_line == '---':
|
|
285
|
+
page.draw_line(
|
|
286
|
+
pymupdf.Point(x_pos, y_pos + 5),
|
|
287
|
+
pymupdf.Point(page.rect.width - x_pos, y_pos + 5),
|
|
288
|
+
)
|
|
289
|
+
y_pos += 15
|
|
290
|
+
# Regular text
|
|
235
291
|
else:
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
292
|
+
# Check if we need a new page
|
|
293
|
+
if y_pos > page.rect.height - 50:
|
|
294
|
+
text_writer.write_text(page)
|
|
295
|
+
page = doc.new_page()
|
|
296
|
+
text_writer = pymupdf.TextWriter(page.rect)
|
|
297
|
+
y_pos = 50
|
|
298
|
+
|
|
299
|
+
# Add text to the current page
|
|
300
|
+
text_writer.fill_textbox(
|
|
301
|
+
pymupdf.Rect(
|
|
302
|
+
x_pos, y_pos, page.rect.width - x_pos, y_pos + 15
|
|
303
|
+
),
|
|
304
|
+
stripped_line,
|
|
305
|
+
font=normal_font,
|
|
306
|
+
)
|
|
307
|
+
y_pos += 15
|
|
308
|
+
|
|
309
|
+
# Write the accumulated text to the last page
|
|
310
|
+
text_writer.write_text(page)
|
|
311
|
+
|
|
312
|
+
# Save the PDF
|
|
313
|
+
doc.save(str(file_path))
|
|
314
|
+
doc.close()
|
|
315
|
+
|
|
316
|
+
logger.debug(f"Wrote PDF to {file_path} with PyMuPDF formatting")
|
|
240
317
|
|
|
241
318
|
def _write_csv_file(
|
|
242
319
|
self,
|
|
@@ -338,6 +415,7 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
338
415
|
|
|
339
416
|
def write_to_file(
|
|
340
417
|
self,
|
|
418
|
+
title: str,
|
|
341
419
|
content: Union[str, List[List[str]]],
|
|
342
420
|
filename: str,
|
|
343
421
|
encoding: Optional[str] = None,
|
|
@@ -351,6 +429,7 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
351
429
|
and HTML (.html, .htm).
|
|
352
430
|
|
|
353
431
|
Args:
|
|
432
|
+
title (str): The title of the document.
|
|
354
433
|
content (Union[str, List[List[str]]]): The content to write to the
|
|
355
434
|
file. Content format varies by file type:
|
|
356
435
|
- Text formats (txt, md, html, yaml): string
|
|
@@ -388,7 +467,7 @@ class FileWriteToolkit(BaseToolkit):
|
|
|
388
467
|
self._write_docx_file(file_path, str(content))
|
|
389
468
|
elif extension == ".pdf":
|
|
390
469
|
self._write_pdf_file(
|
|
391
|
-
file_path, str(content), use_latex=use_latex
|
|
470
|
+
file_path, title, str(content), use_latex=use_latex
|
|
392
471
|
)
|
|
393
472
|
elif extension == ".csv":
|
|
394
473
|
self._write_csv_file(
|