camel-ai 0.2.60__py3-none-any.whl → 0.2.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +158 -7
- camel/configs/anthropic_config.py +6 -5
- camel/datagen/evol_instruct/scorer.py +22 -23
- camel/datagen/evol_instruct/templates.py +46 -46
- camel/datasets/static_dataset.py +144 -0
- camel/loaders/__init__.py +5 -2
- camel/loaders/chunkr_reader.py +117 -91
- camel/loaders/mistral_reader.py +148 -0
- camel/memories/blocks/chat_history_block.py +1 -2
- camel/models/model_manager.py +7 -3
- camel/societies/workforce/workforce.py +65 -24
- camel/storages/__init__.py +2 -0
- camel/storages/vectordb_storages/__init__.py +2 -0
- camel/storages/vectordb_storages/faiss.py +712 -0
- camel/toolkits/__init__.py +2 -0
- camel/toolkits/async_browser_toolkit.py +75 -523
- camel/toolkits/bohrium_toolkit.py +318 -0
- camel/toolkits/browser_toolkit.py +215 -538
- camel/toolkits/browser_toolkit_commons.py +568 -0
- camel/toolkits/file_write_toolkit.py +76 -29
- camel/toolkits/mcp_toolkit.py +77 -1
- camel/toolkits/wolfram_alpha_toolkit.py +5 -1
- camel/types/enums.py +13 -1
- camel/utils/__init__.py +2 -0
- camel/utils/commons.py +27 -0
- {camel_ai-0.2.60.dist-info → camel_ai-0.2.61.dist-info}/METADATA +11 -1
- {camel_ai-0.2.60.dist-info → camel_ai-0.2.61.dist-info}/RECORD +30 -26
- {camel_ai-0.2.60.dist-info → camel_ai-0.2.61.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.60.dist-info → camel_ai-0.2.61.dist-info}/licenses/LICENSE +0 -0
camel/__init__.py
CHANGED
camel/agents/chat_agent.py
CHANGED
|
@@ -75,7 +75,10 @@ from camel.types import (
|
|
|
75
75
|
RoleType,
|
|
76
76
|
)
|
|
77
77
|
from camel.types.agents import ToolCallingRecord
|
|
78
|
-
from camel.utils import
|
|
78
|
+
from camel.utils import (
|
|
79
|
+
get_model_encoding,
|
|
80
|
+
model_from_json_schema,
|
|
81
|
+
)
|
|
79
82
|
|
|
80
83
|
if TYPE_CHECKING:
|
|
81
84
|
from camel.terminators import ResponseTerminator
|
|
@@ -163,6 +166,7 @@ class ChatAgent(BaseAgent):
|
|
|
163
166
|
model: Optional[
|
|
164
167
|
Union[
|
|
165
168
|
BaseModelBackend,
|
|
169
|
+
ModelManager,
|
|
166
170
|
Tuple[str, str],
|
|
167
171
|
str,
|
|
168
172
|
ModelType,
|
|
@@ -188,12 +192,15 @@ class ChatAgent(BaseAgent):
|
|
|
188
192
|
agent_id: Optional[str] = None,
|
|
189
193
|
stop_event: Optional[threading.Event] = None,
|
|
190
194
|
) -> None:
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
195
|
+
if isinstance(model, ModelManager):
|
|
196
|
+
self.model_backend = model
|
|
197
|
+
else:
|
|
198
|
+
# Resolve model backends and set up model manager
|
|
199
|
+
resolved_models = self._resolve_models(model)
|
|
200
|
+
self.model_backend = ModelManager(
|
|
201
|
+
resolved_models,
|
|
202
|
+
scheduling_strategy=scheduling_strategy,
|
|
203
|
+
)
|
|
197
204
|
self.model_type = self.model_backend.model_type
|
|
198
205
|
|
|
199
206
|
# Assign unique ID
|
|
@@ -1706,3 +1713,147 @@ class ChatAgent(BaseAgent):
|
|
|
1706
1713
|
return (
|
|
1707
1714
|
f"ChatAgent({self.role_name}, {self.role_type}, {self.model_type})"
|
|
1708
1715
|
)
|
|
1716
|
+
|
|
1717
|
+
def to_mcp(
|
|
1718
|
+
self,
|
|
1719
|
+
name: str = "CAMEL-ChatAgent",
|
|
1720
|
+
description: str = "A helpful assistant using the CAMEL AI framework.",
|
|
1721
|
+
dependencies: Optional[List[str]] = None,
|
|
1722
|
+
host: str = "localhost",
|
|
1723
|
+
port: int = 8000,
|
|
1724
|
+
):
|
|
1725
|
+
r"""Expose this ChatAgent as an MCP server.
|
|
1726
|
+
|
|
1727
|
+
Args:
|
|
1728
|
+
name (str): Name of the MCP server.
|
|
1729
|
+
(default: :obj:`CAMEL-ChatAgent`)
|
|
1730
|
+
description (Optional[List[str]]): Description of the agent. If
|
|
1731
|
+
None, a generic description is used. (default: :obj:`A helpful
|
|
1732
|
+
assistant using the CAMEL AI framework.`)
|
|
1733
|
+
dependencies (Optional[List[str]]): Additional
|
|
1734
|
+
dependencies for the MCP server. (default: :obj:`None`)
|
|
1735
|
+
host (str): Host to bind to for HTTP transport.
|
|
1736
|
+
(default: :obj:`localhost`)
|
|
1737
|
+
port (int): Port to bind to for HTTP transport.
|
|
1738
|
+
(default: :obj:`8000`)
|
|
1739
|
+
|
|
1740
|
+
Returns:
|
|
1741
|
+
FastMCP: An MCP server instance that can be run.
|
|
1742
|
+
"""
|
|
1743
|
+
try:
|
|
1744
|
+
from mcp.server.fastmcp import FastMCP
|
|
1745
|
+
except ImportError:
|
|
1746
|
+
raise ImportError(
|
|
1747
|
+
"The 'mcp' package is required to use the to_mcp method. "
|
|
1748
|
+
"Install it with 'pip install mcp'."
|
|
1749
|
+
)
|
|
1750
|
+
|
|
1751
|
+
# Combine dependencies
|
|
1752
|
+
all_dependencies = ["camel-ai[all]"]
|
|
1753
|
+
if dependencies:
|
|
1754
|
+
all_dependencies.extend(dependencies)
|
|
1755
|
+
|
|
1756
|
+
mcp_server = FastMCP(
|
|
1757
|
+
name,
|
|
1758
|
+
dependencies=all_dependencies,
|
|
1759
|
+
host=host,
|
|
1760
|
+
port=port,
|
|
1761
|
+
)
|
|
1762
|
+
|
|
1763
|
+
# Store agent reference
|
|
1764
|
+
agent_instance = self
|
|
1765
|
+
|
|
1766
|
+
# Define functions first
|
|
1767
|
+
async def step(message, response_format=None):
|
|
1768
|
+
r"""Execute a single step in the chat session with the agent."""
|
|
1769
|
+
format_cls = None
|
|
1770
|
+
if response_format:
|
|
1771
|
+
format_cls = model_from_json_schema(
|
|
1772
|
+
"DynamicResponseFormat", response_format
|
|
1773
|
+
)
|
|
1774
|
+
response = await agent_instance.astep(message, format_cls)
|
|
1775
|
+
return {
|
|
1776
|
+
"status": "success",
|
|
1777
|
+
"messages": [msg.to_dict() for msg in response.msgs],
|
|
1778
|
+
"terminated": response.terminated,
|
|
1779
|
+
"info": response.info,
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
# Reset tool
|
|
1783
|
+
def reset():
|
|
1784
|
+
r"""Reset the chat agent to its initial state."""
|
|
1785
|
+
agent_instance.reset()
|
|
1786
|
+
return {"status": "success", "message": "Agent reset successfully"}
|
|
1787
|
+
|
|
1788
|
+
# Set language tool
|
|
1789
|
+
def set_output_language(language):
|
|
1790
|
+
r"""Set the output language for the chat agent."""
|
|
1791
|
+
agent_instance.output_language = language
|
|
1792
|
+
return {
|
|
1793
|
+
"status": "success",
|
|
1794
|
+
"message": f"Output language set to '{language}'",
|
|
1795
|
+
}
|
|
1796
|
+
|
|
1797
|
+
# Agent info resource and tool
|
|
1798
|
+
def get_agent_info():
|
|
1799
|
+
r"""Get information about the agent."""
|
|
1800
|
+
info = {
|
|
1801
|
+
"agent_id": agent_instance.agent_id,
|
|
1802
|
+
"model_type": str(agent_instance.model_type),
|
|
1803
|
+
"role_name": agent_instance.role_name,
|
|
1804
|
+
"role_type": str(agent_instance.role_type),
|
|
1805
|
+
"output_language": agent_instance.output_language or "None",
|
|
1806
|
+
"description": description,
|
|
1807
|
+
}
|
|
1808
|
+
return info
|
|
1809
|
+
|
|
1810
|
+
# Chat history resource and tool
|
|
1811
|
+
def get_chat_history():
|
|
1812
|
+
r"""Get the chat history for the agent."""
|
|
1813
|
+
# Convert messages to simple serializable format
|
|
1814
|
+
messages = []
|
|
1815
|
+
for msg in agent_instance.chat_history:
|
|
1816
|
+
# Create a simplified version of each message
|
|
1817
|
+
msg_dict = {
|
|
1818
|
+
"role": msg.get("role", ""),
|
|
1819
|
+
"content": msg.get("content", ""),
|
|
1820
|
+
}
|
|
1821
|
+
# Include function calls if present
|
|
1822
|
+
if "function_call" in msg:
|
|
1823
|
+
msg_dict["function_call"] = {
|
|
1824
|
+
"name": msg["function_call"].get("name", ""),
|
|
1825
|
+
"arguments": msg["function_call"].get("arguments", ""),
|
|
1826
|
+
}
|
|
1827
|
+
messages.append(msg_dict)
|
|
1828
|
+
return messages
|
|
1829
|
+
|
|
1830
|
+
# Available tools resource and tool
|
|
1831
|
+
def get_available_tools():
|
|
1832
|
+
r"""Get a list of available internal tools."""
|
|
1833
|
+
tool_info = {}
|
|
1834
|
+
for name, tool in agent_instance.tool_dict.items():
|
|
1835
|
+
tool_info[name] = {
|
|
1836
|
+
"name": name,
|
|
1837
|
+
"description": tool.get_function_description() or "",
|
|
1838
|
+
"parameters": [
|
|
1839
|
+
{"name": param_name, "type": str(param_type)}
|
|
1840
|
+
for param_name, param_type in tool.get_parameters().items() # noqa: E501
|
|
1841
|
+
],
|
|
1842
|
+
}
|
|
1843
|
+
return tool_info
|
|
1844
|
+
|
|
1845
|
+
# Now register everything using decorators
|
|
1846
|
+
mcp_server.tool()(step)
|
|
1847
|
+
mcp_server.tool()(reset)
|
|
1848
|
+
mcp_server.tool()(set_output_language)
|
|
1849
|
+
|
|
1850
|
+
mcp_server.resource("agent://")(get_agent_info)
|
|
1851
|
+
mcp_server.tool()(get_agent_info)
|
|
1852
|
+
|
|
1853
|
+
mcp_server.resource("history://")(get_chat_history)
|
|
1854
|
+
mcp_server.tool()(get_chat_history)
|
|
1855
|
+
|
|
1856
|
+
mcp_server.resource("tools://")(get_available_tools)
|
|
1857
|
+
mcp_server.tool()(get_available_tools)
|
|
1858
|
+
|
|
1859
|
+
return mcp_server
|
|
@@ -56,14 +56,14 @@ class AnthropicConfig(BaseConfig):
|
|
|
56
56
|
metadata about the request. Can include user_id as an external
|
|
57
57
|
identifier for the user associated with the request.
|
|
58
58
|
(default: :obj:`None`)
|
|
59
|
-
thinking (dict, optional): Configuration for enabling
|
|
60
|
-
Claude's extended thinking. When enabled, responses include
|
|
61
|
-
thinking content blocks showing Claude's thinking process.
|
|
62
|
-
(default: :obj:`None`)
|
|
63
59
|
tool_choice (dict, optional): How the model should
|
|
64
60
|
use the provided tools. The model can use a specific tool, any
|
|
65
61
|
available tool, decide by itself, or not use tools at all.
|
|
66
62
|
(default: :obj:`None`)
|
|
63
|
+
extra_headers (Optional[dict], optional): Additional headers for the
|
|
64
|
+
request. (default: :obj:`None`)
|
|
65
|
+
extra_body (dict, optional): Extra body parameters to be passed to
|
|
66
|
+
the Anthropic API.
|
|
67
67
|
"""
|
|
68
68
|
|
|
69
69
|
max_tokens: Optional[int] = None
|
|
@@ -73,8 +73,9 @@ class AnthropicConfig(BaseConfig):
|
|
|
73
73
|
top_k: Optional[int] = None
|
|
74
74
|
stream: Optional[bool] = None
|
|
75
75
|
metadata: Optional[dict] = None
|
|
76
|
-
thinking: Optional[dict] = None
|
|
77
76
|
tool_choice: Optional[dict] = None
|
|
77
|
+
extra_headers: Optional[dict] = None
|
|
78
|
+
extra_body: Optional[dict] = None
|
|
78
79
|
|
|
79
80
|
|
|
80
81
|
ANTHROPIC_API_PARAMS = {param for param in AnthropicConfig.model_fields.keys()}
|
|
@@ -38,24 +38,27 @@ class BaseScorer(ABC):
|
|
|
38
38
|
|
|
39
39
|
class MathScorer(BaseScorer):
|
|
40
40
|
def __init__(self, agent: Optional[ChatAgent] = None):
|
|
41
|
-
self.system_msg =
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
41
|
+
self.system_msg = """
|
|
42
|
+
You are an evaluator for math problems. Your task is to compare a new math
|
|
43
|
+
problem against a reference math problem by trying to solve it, and rate it
|
|
44
|
+
in **three dimensions**.
|
|
45
|
+
|
|
46
|
+
1. Diversity (1-5): How novel is the new problem compared to the
|
|
47
|
+
reference? 1 = almost the same, 5 = completely different.
|
|
48
|
+
|
|
49
|
+
2. Difficulty (1-10): Rate the relative difficulty compared to the reference
|
|
50
|
+
problem. 1 = much less difficult, 5 = similar difficulty, 10 = much more
|
|
51
|
+
difficult. The difficulty should be based on the complexity of reasoning—i.e.,
|
|
52
|
+
problems that require multi-step reasoning or clever methods to solve.
|
|
53
|
+
|
|
54
|
+
3. Solvability (1-10): How likely is the problem solvable using standard math
|
|
55
|
+
techniques and only contain one question that could be answered by a number or
|
|
56
|
+
a formula? 1 = very unsolvable or ambiguous, 10 = solvable and could be
|
|
57
|
+
answered by a number or a formula.
|
|
58
|
+
|
|
59
|
+
Respond with a JSON object like:
|
|
60
|
+
{ "solution": ..., "diversity": ..., "difficulty": ..., "solvability": ... }
|
|
61
|
+
"""
|
|
59
62
|
self.agent = agent or ChatAgent(self.system_msg)
|
|
60
63
|
|
|
61
64
|
class MathScoreSchema(BaseModel):
|
|
@@ -69,10 +72,6 @@ class MathScorer(BaseScorer):
|
|
|
69
72
|
difficulty: int = Field(
|
|
70
73
|
..., description="Score for the relative difficulty"
|
|
71
74
|
)
|
|
72
|
-
validity: int = Field(
|
|
73
|
-
...,
|
|
74
|
-
description="Score for how well-defined and sound the problem is",
|
|
75
|
-
)
|
|
76
75
|
solvability: int = Field(
|
|
77
76
|
...,
|
|
78
77
|
description="Score for the solvability of the problem",
|
|
@@ -95,7 +94,7 @@ class MathScorer(BaseScorer):
|
|
|
95
94
|
query = (
|
|
96
95
|
f"Reference problem:\n{reference_problem}\n\n"
|
|
97
96
|
f"New problem:\n{new_problem}\n\n"
|
|
98
|
-
"
|
|
97
|
+
"Try to solve the new problem. Then provide scores in JSON format."
|
|
99
98
|
)
|
|
100
99
|
response = self.agent.step(query, response_format=self.MathScoreSchema)
|
|
101
100
|
score_data = json.loads(response.msg.content)
|
|
@@ -208,54 +208,54 @@ class MathEvolInstructTemplates(BaseEvolInstructTemplates):
|
|
|
208
208
|
r"""Contains templates for MathEvolInstruct prompt transformations."""
|
|
209
209
|
|
|
210
210
|
# Meta-instructions for in-depth evolving
|
|
211
|
-
INST_IN_DEPTH =
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
211
|
+
INST_IN_DEPTH = """
|
|
212
|
+
Please act as a math expert. Your objective is to create a new math problem
|
|
213
|
+
that is more challenging yet concise than the given math problem. Modify the
|
|
214
|
+
problem to increase its complexity and depth. The generated problem should be
|
|
215
|
+
clearly stated, strictly mathematical, and suitable for solving with symbolic
|
|
216
|
+
computation (e.g., using sympy). You will be given a method to guide your
|
|
217
|
+
creation. Make sure to follow the method strictly. Consolidate any multiple
|
|
218
|
+
parts into one integrated question that ask for one definitive answer. Do not
|
|
219
|
+
include multiple-choice, true/false, or proof-based questions. The final
|
|
220
|
+
answer should be a number or a formula. Respond with your generated problem
|
|
221
|
+
directly. The difficulty should be based on the complexity of reasoning—i.e.,
|
|
222
|
+
problems that require multi-step reasoning or clever methods to solve. The
|
|
223
|
+
challenge of a problem should not stem purely from computational complexity;
|
|
224
|
+
while complex calculations may be involved, a problem should not be considered
|
|
225
|
+
difficult solely because lengthy computations increase solving time.
|
|
226
|
+
#Original Problem#:
|
|
227
|
+
{prompt}
|
|
228
|
+
#Generated Problem#:
|
|
229
|
+
"""
|
|
226
230
|
|
|
227
231
|
EVOL_METHODS = {
|
|
228
|
-
"constraints":
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
"condense":
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
"necessary changes to let the problem could be answered with one "
|
|
256
|
-
"number or expression by removing the sub-questions or combining "
|
|
257
|
-
"them into one."
|
|
258
|
-
),
|
|
232
|
+
"constraints": """
|
|
233
|
+
Add one or more significant constraints or requirements into the
|
|
234
|
+
'#Given Prompt#'. The added constraints must meaningfully alter how the model
|
|
235
|
+
would respond. For example, specify additional rules, contexts, or limitations
|
|
236
|
+
that demand creative adjustments. This method should make the problem more
|
|
237
|
+
challenging in the reasoning and the solution of it should be clever and
|
|
238
|
+
elegant.
|
|
239
|
+
""",
|
|
240
|
+
"deepening": """
|
|
241
|
+
Increase the difficulty of the #Given Prompt# by integrating additional layers
|
|
242
|
+
of reasoning and rigor. Refine the problem so that all added difficulty is
|
|
243
|
+
consolidated into a single coherent question requiring one final answer,
|
|
244
|
+
avoiding fragmentation into multiple sub-problems.
|
|
245
|
+
""",
|
|
246
|
+
"expansion": """
|
|
247
|
+
Expand the #Given Prompt# by incorporating additional perspectives or layers
|
|
248
|
+
of complexity into the problem statement. Ensure that the revised problem
|
|
249
|
+
remains a single, unified question with one final answer, rather than a
|
|
250
|
+
series of separate sub-questions.
|
|
251
|
+
""",
|
|
252
|
+
"condense": """
|
|
253
|
+
Reformulate the given math problem into a well-structured and formally stated
|
|
254
|
+
mathematical question. Remove unnecessary instructions, explanations, or hints.
|
|
255
|
+
If the given problem contains several sub-questions, make necessary changes
|
|
256
|
+
to let the problem could be answered with one number or one expression by
|
|
257
|
+
removing the sub-questions or combining them into one.
|
|
258
|
+
""",
|
|
259
259
|
}
|
|
260
260
|
|
|
261
261
|
IN_DEPTH_KEYS = ['constraints', 'deepening', 'expansion']
|
camel/datasets/static_dataset.py
CHANGED
|
@@ -398,3 +398,147 @@ class StaticDataset(Dataset):
|
|
|
398
398
|
f"got {type(item).__name__}"
|
|
399
399
|
)
|
|
400
400
|
return data
|
|
401
|
+
|
|
402
|
+
def save_to_json(self, file_path: Union[str, Path]) -> None:
|
|
403
|
+
r"""Save the dataset to a local JSON file.
|
|
404
|
+
|
|
405
|
+
Args:
|
|
406
|
+
file_path (Union[str, Path]): Path to the output JSON file.
|
|
407
|
+
If a string is provided, it will be converted to a Path object.
|
|
408
|
+
|
|
409
|
+
Raises:
|
|
410
|
+
TypeError: If file_path is not a string or Path object.
|
|
411
|
+
OSError: If there's an error writing to the file.
|
|
412
|
+
"""
|
|
413
|
+
if isinstance(file_path, str):
|
|
414
|
+
file_path = Path(file_path)
|
|
415
|
+
elif not isinstance(file_path, Path):
|
|
416
|
+
raise TypeError(
|
|
417
|
+
f"Expected file_path to be a string or Path object, "
|
|
418
|
+
f"got {type(file_path).__name__}"
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
# Convert DataPoint objects to dictionaries
|
|
422
|
+
data_dicts = [datapoint.to_dict() for datapoint in self.data]
|
|
423
|
+
|
|
424
|
+
# Ensure the parent directory exists
|
|
425
|
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
426
|
+
|
|
427
|
+
try:
|
|
428
|
+
logger.debug(f"Saving dataset to {file_path}")
|
|
429
|
+
with file_path.open('w', encoding='utf-8') as f:
|
|
430
|
+
json.dump(data_dicts, f, ensure_ascii=False, indent=2)
|
|
431
|
+
logger.info(
|
|
432
|
+
f"Successfully saved {len(data_dicts)} items to {file_path}"
|
|
433
|
+
)
|
|
434
|
+
except OSError as e:
|
|
435
|
+
logger.error(f"Error saving dataset to {file_path}: {e}")
|
|
436
|
+
raise
|
|
437
|
+
|
|
438
|
+
def save_to_huggingface(
|
|
439
|
+
self,
|
|
440
|
+
dataset_name: str,
|
|
441
|
+
token: Optional[str] = None,
|
|
442
|
+
filepath: str = "records/records.json",
|
|
443
|
+
private: bool = False,
|
|
444
|
+
description: Optional[str] = None,
|
|
445
|
+
license: Optional[str] = None,
|
|
446
|
+
version: Optional[str] = None,
|
|
447
|
+
tags: Optional[List[str]] = None,
|
|
448
|
+
language: Optional[List[str]] = None,
|
|
449
|
+
task_categories: Optional[List[str]] = None,
|
|
450
|
+
authors: Optional[List[str]] = None,
|
|
451
|
+
**kwargs: Any,
|
|
452
|
+
) -> str:
|
|
453
|
+
r"""Save the dataset to the Hugging Face Hub using the project's
|
|
454
|
+
HuggingFaceDatasetManager.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
dataset_name (str): The name of the dataset on Hugging Face Hub.
|
|
458
|
+
Should be in the format 'username/dataset_name' .
|
|
459
|
+
token (Optional[str]): The Hugging Face API token. If not provided,
|
|
460
|
+
the token will be read from the environment variable `HF_TOKEN`
|
|
461
|
+
(default: :obj:`None`)
|
|
462
|
+
filepath (str): The path in the repository where the dataset
|
|
463
|
+
will be saved. (default: :obj:`"records/records.json"`)
|
|
464
|
+
private (bool): Whether the dataset should be private.
|
|
465
|
+
(default: :obj:`False`)
|
|
466
|
+
description (Optional[str]): A description of the dataset.
|
|
467
|
+
(default: :obj:`None`)
|
|
468
|
+
license (Optional[str]): The license of the dataset.
|
|
469
|
+
(default: :obj:`None`)
|
|
470
|
+
version (Optional[str]): The version of the dataset.
|
|
471
|
+
(default: :obj:`None`)
|
|
472
|
+
tags (Optional[List[str]]): A list of tags for the dataset.
|
|
473
|
+
(default: :obj:`None`)
|
|
474
|
+
language (Optional[List[str]]): A list of languages the dataset is
|
|
475
|
+
in. (default: :obj:`None`)
|
|
476
|
+
task_categories (Optional[List[str]]): A list of task categories.
|
|
477
|
+
(default: :obj:`None`)
|
|
478
|
+
authors (Optional[List[str]]): A list of authors of the dataset.
|
|
479
|
+
(default: :obj:`None`)
|
|
480
|
+
**kwargs (Any): Additional keyword arguments to pass to the
|
|
481
|
+
Hugging Face API.
|
|
482
|
+
|
|
483
|
+
Returns:
|
|
484
|
+
str: The URL of the dataset on the Hugging Face Hub.
|
|
485
|
+
|
|
486
|
+
Raises:
|
|
487
|
+
OSError: If there's an error uploading the dataset.
|
|
488
|
+
"""
|
|
489
|
+
# lazy import to avoid heavy dependencies
|
|
490
|
+
from camel.datahubs.huggingface import HuggingFaceDatasetManager
|
|
491
|
+
from camel.datahubs.models import Record
|
|
492
|
+
|
|
493
|
+
# Initialize the HuggingFaceDatasetManager
|
|
494
|
+
manager = HuggingFaceDatasetManager(token=token)
|
|
495
|
+
|
|
496
|
+
# Convert DataPoint objects to Record objects
|
|
497
|
+
records = []
|
|
498
|
+
for datapoint in self.data:
|
|
499
|
+
datapoint_dict = datapoint.to_dict()
|
|
500
|
+
|
|
501
|
+
record_dict = {
|
|
502
|
+
"question": datapoint_dict.get("question", ""),
|
|
503
|
+
"final_answer": datapoint_dict.get("final_answer", ""),
|
|
504
|
+
"rationale": datapoint_dict.get("rationale", ""),
|
|
505
|
+
"metadata": datapoint_dict.get("metadata", {}),
|
|
506
|
+
}
|
|
507
|
+
record = Record(**record_dict)
|
|
508
|
+
records.append(record)
|
|
509
|
+
|
|
510
|
+
logger.debug(f"Creating dataset {dataset_name}")
|
|
511
|
+
try:
|
|
512
|
+
# Create the dataset
|
|
513
|
+
dataset_url = manager.create_dataset(
|
|
514
|
+
name=dataset_name, private=private, **kwargs
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
# Add records to the dataset
|
|
518
|
+
manager.add_records(
|
|
519
|
+
dataset_name=dataset_name,
|
|
520
|
+
records=records,
|
|
521
|
+
filepath=filepath,
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
# Create dataset card if description is provided
|
|
525
|
+
if description:
|
|
526
|
+
manager.create_dataset_card(
|
|
527
|
+
dataset_name=dataset_name,
|
|
528
|
+
description=description,
|
|
529
|
+
license=license,
|
|
530
|
+
version=version,
|
|
531
|
+
tags=tags,
|
|
532
|
+
authors=authors,
|
|
533
|
+
language=language,
|
|
534
|
+
task_categories=task_categories,
|
|
535
|
+
)
|
|
536
|
+
|
|
537
|
+
logger.info(
|
|
538
|
+
f"Successfully uploaded dataset to {dataset_name}, "
|
|
539
|
+
f"the url is {dataset_url}"
|
|
540
|
+
)
|
|
541
|
+
return dataset_url
|
|
542
|
+
except Exception as e:
|
|
543
|
+
logger.error(f"Error uploading dataset to Hugging Face: {e}")
|
|
544
|
+
raise
|
camel/loaders/__init__.py
CHANGED
|
@@ -14,12 +14,13 @@
|
|
|
14
14
|
|
|
15
15
|
from .apify_reader import Apify
|
|
16
16
|
from .base_io import File, create_file, create_file_from_raw_bytes
|
|
17
|
-
from .chunkr_reader import ChunkrReader
|
|
17
|
+
from .chunkr_reader import ChunkrReader, ChunkrReaderConfig
|
|
18
18
|
from .crawl4ai_reader import Crawl4AI
|
|
19
19
|
from .firecrawl_reader import Firecrawl
|
|
20
20
|
from .jina_url_reader import JinaURLReader
|
|
21
21
|
from .markitdown import MarkItDownLoader
|
|
22
22
|
from .mineru_extractor import MinerU
|
|
23
|
+
from .mistral_reader import MistralReader
|
|
23
24
|
from .pandas_reader import PandasReader
|
|
24
25
|
from .scrapegraph_reader import ScrapeGraphAI
|
|
25
26
|
from .unstructured_io import UnstructuredIO
|
|
@@ -32,10 +33,12 @@ __all__ = [
|
|
|
32
33
|
'JinaURLReader',
|
|
33
34
|
'Firecrawl',
|
|
34
35
|
'Apify',
|
|
35
|
-
'ChunkrReader',
|
|
36
36
|
'PandasReader',
|
|
37
|
+
'ChunkrReader',
|
|
38
|
+
'ChunkrReaderConfig',
|
|
37
39
|
'MinerU',
|
|
38
40
|
'Crawl4AI',
|
|
39
41
|
'MarkItDownLoader',
|
|
40
42
|
'ScrapeGraphAI',
|
|
43
|
+
'MistralReader',
|
|
41
44
|
]
|