camel-ai 0.2.59__py3-none-any.whl → 0.2.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +158 -7
- camel/configs/anthropic_config.py +6 -5
- camel/configs/cohere_config.py +1 -1
- camel/configs/mistral_config.py +1 -1
- camel/configs/openai_config.py +3 -0
- camel/configs/reka_config.py +1 -1
- camel/configs/samba_config.py +2 -2
- camel/datagen/cot_datagen.py +29 -34
- camel/datagen/evol_instruct/scorer.py +22 -23
- camel/datagen/evol_instruct/templates.py +46 -46
- camel/datasets/static_dataset.py +144 -0
- camel/embeddings/jina_embedding.py +8 -1
- camel/embeddings/sentence_transformers_embeddings.py +2 -2
- camel/embeddings/vlm_embedding.py +9 -2
- camel/loaders/__init__.py +5 -2
- camel/loaders/chunkr_reader.py +117 -91
- camel/loaders/mistral_reader.py +148 -0
- camel/memories/blocks/chat_history_block.py +1 -2
- camel/memories/records.py +3 -0
- camel/messages/base.py +15 -3
- camel/models/azure_openai_model.py +1 -0
- camel/models/model_factory.py +2 -2
- camel/models/model_manager.py +7 -3
- camel/retrievers/bm25_retriever.py +1 -2
- camel/retrievers/hybrid_retrival.py +2 -2
- camel/societies/workforce/workforce.py +65 -24
- camel/storages/__init__.py +2 -0
- camel/storages/vectordb_storages/__init__.py +2 -0
- camel/storages/vectordb_storages/faiss.py +712 -0
- camel/storages/vectordb_storages/oceanbase.py +1 -2
- camel/toolkits/__init__.py +2 -0
- camel/toolkits/async_browser_toolkit.py +80 -524
- camel/toolkits/bohrium_toolkit.py +318 -0
- camel/toolkits/browser_toolkit.py +221 -541
- camel/toolkits/browser_toolkit_commons.py +568 -0
- camel/toolkits/dalle_toolkit.py +4 -0
- camel/toolkits/excel_toolkit.py +8 -2
- camel/toolkits/file_write_toolkit.py +76 -29
- camel/toolkits/github_toolkit.py +43 -25
- camel/toolkits/image_analysis_toolkit.py +3 -0
- camel/toolkits/jina_reranker_toolkit.py +194 -77
- camel/toolkits/mcp_toolkit.py +134 -16
- camel/toolkits/page_script.js +40 -28
- camel/toolkits/twitter_toolkit.py +6 -1
- camel/toolkits/video_analysis_toolkit.py +3 -0
- camel/toolkits/video_download_toolkit.py +3 -0
- camel/toolkits/wolfram_alpha_toolkit.py +51 -23
- camel/types/enums.py +27 -6
- camel/utils/__init__.py +2 -0
- camel/utils/commons.py +27 -0
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.61.dist-info}/METADATA +17 -9
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.61.dist-info}/RECORD +55 -51
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.61.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.59.dist-info → camel_ai-0.2.61.dist-info}/licenses/LICENSE +0 -0
camel/__init__.py
CHANGED
camel/agents/chat_agent.py
CHANGED
|
@@ -75,7 +75,10 @@ from camel.types import (
|
|
|
75
75
|
RoleType,
|
|
76
76
|
)
|
|
77
77
|
from camel.types.agents import ToolCallingRecord
|
|
78
|
-
from camel.utils import
|
|
78
|
+
from camel.utils import (
|
|
79
|
+
get_model_encoding,
|
|
80
|
+
model_from_json_schema,
|
|
81
|
+
)
|
|
79
82
|
|
|
80
83
|
if TYPE_CHECKING:
|
|
81
84
|
from camel.terminators import ResponseTerminator
|
|
@@ -163,6 +166,7 @@ class ChatAgent(BaseAgent):
|
|
|
163
166
|
model: Optional[
|
|
164
167
|
Union[
|
|
165
168
|
BaseModelBackend,
|
|
169
|
+
ModelManager,
|
|
166
170
|
Tuple[str, str],
|
|
167
171
|
str,
|
|
168
172
|
ModelType,
|
|
@@ -188,12 +192,15 @@ class ChatAgent(BaseAgent):
|
|
|
188
192
|
agent_id: Optional[str] = None,
|
|
189
193
|
stop_event: Optional[threading.Event] = None,
|
|
190
194
|
) -> None:
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
195
|
+
if isinstance(model, ModelManager):
|
|
196
|
+
self.model_backend = model
|
|
197
|
+
else:
|
|
198
|
+
# Resolve model backends and set up model manager
|
|
199
|
+
resolved_models = self._resolve_models(model)
|
|
200
|
+
self.model_backend = ModelManager(
|
|
201
|
+
resolved_models,
|
|
202
|
+
scheduling_strategy=scheduling_strategy,
|
|
203
|
+
)
|
|
197
204
|
self.model_type = self.model_backend.model_type
|
|
198
205
|
|
|
199
206
|
# Assign unique ID
|
|
@@ -1706,3 +1713,147 @@ class ChatAgent(BaseAgent):
|
|
|
1706
1713
|
return (
|
|
1707
1714
|
f"ChatAgent({self.role_name}, {self.role_type}, {self.model_type})"
|
|
1708
1715
|
)
|
|
1716
|
+
|
|
1717
|
+
def to_mcp(
|
|
1718
|
+
self,
|
|
1719
|
+
name: str = "CAMEL-ChatAgent",
|
|
1720
|
+
description: str = "A helpful assistant using the CAMEL AI framework.",
|
|
1721
|
+
dependencies: Optional[List[str]] = None,
|
|
1722
|
+
host: str = "localhost",
|
|
1723
|
+
port: int = 8000,
|
|
1724
|
+
):
|
|
1725
|
+
r"""Expose this ChatAgent as an MCP server.
|
|
1726
|
+
|
|
1727
|
+
Args:
|
|
1728
|
+
name (str): Name of the MCP server.
|
|
1729
|
+
(default: :obj:`CAMEL-ChatAgent`)
|
|
1730
|
+
description (Optional[List[str]]): Description of the agent. If
|
|
1731
|
+
None, a generic description is used. (default: :obj:`A helpful
|
|
1732
|
+
assistant using the CAMEL AI framework.`)
|
|
1733
|
+
dependencies (Optional[List[str]]): Additional
|
|
1734
|
+
dependencies for the MCP server. (default: :obj:`None`)
|
|
1735
|
+
host (str): Host to bind to for HTTP transport.
|
|
1736
|
+
(default: :obj:`localhost`)
|
|
1737
|
+
port (int): Port to bind to for HTTP transport.
|
|
1738
|
+
(default: :obj:`8000`)
|
|
1739
|
+
|
|
1740
|
+
Returns:
|
|
1741
|
+
FastMCP: An MCP server instance that can be run.
|
|
1742
|
+
"""
|
|
1743
|
+
try:
|
|
1744
|
+
from mcp.server.fastmcp import FastMCP
|
|
1745
|
+
except ImportError:
|
|
1746
|
+
raise ImportError(
|
|
1747
|
+
"The 'mcp' package is required to use the to_mcp method. "
|
|
1748
|
+
"Install it with 'pip install mcp'."
|
|
1749
|
+
)
|
|
1750
|
+
|
|
1751
|
+
# Combine dependencies
|
|
1752
|
+
all_dependencies = ["camel-ai[all]"]
|
|
1753
|
+
if dependencies:
|
|
1754
|
+
all_dependencies.extend(dependencies)
|
|
1755
|
+
|
|
1756
|
+
mcp_server = FastMCP(
|
|
1757
|
+
name,
|
|
1758
|
+
dependencies=all_dependencies,
|
|
1759
|
+
host=host,
|
|
1760
|
+
port=port,
|
|
1761
|
+
)
|
|
1762
|
+
|
|
1763
|
+
# Store agent reference
|
|
1764
|
+
agent_instance = self
|
|
1765
|
+
|
|
1766
|
+
# Define functions first
|
|
1767
|
+
async def step(message, response_format=None):
|
|
1768
|
+
r"""Execute a single step in the chat session with the agent."""
|
|
1769
|
+
format_cls = None
|
|
1770
|
+
if response_format:
|
|
1771
|
+
format_cls = model_from_json_schema(
|
|
1772
|
+
"DynamicResponseFormat", response_format
|
|
1773
|
+
)
|
|
1774
|
+
response = await agent_instance.astep(message, format_cls)
|
|
1775
|
+
return {
|
|
1776
|
+
"status": "success",
|
|
1777
|
+
"messages": [msg.to_dict() for msg in response.msgs],
|
|
1778
|
+
"terminated": response.terminated,
|
|
1779
|
+
"info": response.info,
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
# Reset tool
|
|
1783
|
+
def reset():
|
|
1784
|
+
r"""Reset the chat agent to its initial state."""
|
|
1785
|
+
agent_instance.reset()
|
|
1786
|
+
return {"status": "success", "message": "Agent reset successfully"}
|
|
1787
|
+
|
|
1788
|
+
# Set language tool
|
|
1789
|
+
def set_output_language(language):
|
|
1790
|
+
r"""Set the output language for the chat agent."""
|
|
1791
|
+
agent_instance.output_language = language
|
|
1792
|
+
return {
|
|
1793
|
+
"status": "success",
|
|
1794
|
+
"message": f"Output language set to '{language}'",
|
|
1795
|
+
}
|
|
1796
|
+
|
|
1797
|
+
# Agent info resource and tool
|
|
1798
|
+
def get_agent_info():
|
|
1799
|
+
r"""Get information about the agent."""
|
|
1800
|
+
info = {
|
|
1801
|
+
"agent_id": agent_instance.agent_id,
|
|
1802
|
+
"model_type": str(agent_instance.model_type),
|
|
1803
|
+
"role_name": agent_instance.role_name,
|
|
1804
|
+
"role_type": str(agent_instance.role_type),
|
|
1805
|
+
"output_language": agent_instance.output_language or "None",
|
|
1806
|
+
"description": description,
|
|
1807
|
+
}
|
|
1808
|
+
return info
|
|
1809
|
+
|
|
1810
|
+
# Chat history resource and tool
|
|
1811
|
+
def get_chat_history():
|
|
1812
|
+
r"""Get the chat history for the agent."""
|
|
1813
|
+
# Convert messages to simple serializable format
|
|
1814
|
+
messages = []
|
|
1815
|
+
for msg in agent_instance.chat_history:
|
|
1816
|
+
# Create a simplified version of each message
|
|
1817
|
+
msg_dict = {
|
|
1818
|
+
"role": msg.get("role", ""),
|
|
1819
|
+
"content": msg.get("content", ""),
|
|
1820
|
+
}
|
|
1821
|
+
# Include function calls if present
|
|
1822
|
+
if "function_call" in msg:
|
|
1823
|
+
msg_dict["function_call"] = {
|
|
1824
|
+
"name": msg["function_call"].get("name", ""),
|
|
1825
|
+
"arguments": msg["function_call"].get("arguments", ""),
|
|
1826
|
+
}
|
|
1827
|
+
messages.append(msg_dict)
|
|
1828
|
+
return messages
|
|
1829
|
+
|
|
1830
|
+
# Available tools resource and tool
|
|
1831
|
+
def get_available_tools():
|
|
1832
|
+
r"""Get a list of available internal tools."""
|
|
1833
|
+
tool_info = {}
|
|
1834
|
+
for name, tool in agent_instance.tool_dict.items():
|
|
1835
|
+
tool_info[name] = {
|
|
1836
|
+
"name": name,
|
|
1837
|
+
"description": tool.get_function_description() or "",
|
|
1838
|
+
"parameters": [
|
|
1839
|
+
{"name": param_name, "type": str(param_type)}
|
|
1840
|
+
for param_name, param_type in tool.get_parameters().items() # noqa: E501
|
|
1841
|
+
],
|
|
1842
|
+
}
|
|
1843
|
+
return tool_info
|
|
1844
|
+
|
|
1845
|
+
# Now register everything using decorators
|
|
1846
|
+
mcp_server.tool()(step)
|
|
1847
|
+
mcp_server.tool()(reset)
|
|
1848
|
+
mcp_server.tool()(set_output_language)
|
|
1849
|
+
|
|
1850
|
+
mcp_server.resource("agent://")(get_agent_info)
|
|
1851
|
+
mcp_server.tool()(get_agent_info)
|
|
1852
|
+
|
|
1853
|
+
mcp_server.resource("history://")(get_chat_history)
|
|
1854
|
+
mcp_server.tool()(get_chat_history)
|
|
1855
|
+
|
|
1856
|
+
mcp_server.resource("tools://")(get_available_tools)
|
|
1857
|
+
mcp_server.tool()(get_available_tools)
|
|
1858
|
+
|
|
1859
|
+
return mcp_server
|
|
@@ -56,14 +56,14 @@ class AnthropicConfig(BaseConfig):
|
|
|
56
56
|
metadata about the request. Can include user_id as an external
|
|
57
57
|
identifier for the user associated with the request.
|
|
58
58
|
(default: :obj:`None`)
|
|
59
|
-
thinking (dict, optional): Configuration for enabling
|
|
60
|
-
Claude's extended thinking. When enabled, responses include
|
|
61
|
-
thinking content blocks showing Claude's thinking process.
|
|
62
|
-
(default: :obj:`None`)
|
|
63
59
|
tool_choice (dict, optional): How the model should
|
|
64
60
|
use the provided tools. The model can use a specific tool, any
|
|
65
61
|
available tool, decide by itself, or not use tools at all.
|
|
66
62
|
(default: :obj:`None`)
|
|
63
|
+
extra_headers (Optional[dict], optional): Additional headers for the
|
|
64
|
+
request. (default: :obj:`None`)
|
|
65
|
+
extra_body (dict, optional): Extra body parameters to be passed to
|
|
66
|
+
the Anthropic API.
|
|
67
67
|
"""
|
|
68
68
|
|
|
69
69
|
max_tokens: Optional[int] = None
|
|
@@ -73,8 +73,9 @@ class AnthropicConfig(BaseConfig):
|
|
|
73
73
|
top_k: Optional[int] = None
|
|
74
74
|
stream: Optional[bool] = None
|
|
75
75
|
metadata: Optional[dict] = None
|
|
76
|
-
thinking: Optional[dict] = None
|
|
77
76
|
tool_choice: Optional[dict] = None
|
|
77
|
+
extra_headers: Optional[dict] = None
|
|
78
|
+
extra_body: Optional[dict] = None
|
|
78
79
|
|
|
79
80
|
|
|
80
81
|
ANTHROPIC_API_PARAMS = {param for param in AnthropicConfig.model_fields.keys()}
|
camel/configs/cohere_config.py
CHANGED
camel/configs/mistral_config.py
CHANGED
camel/configs/openai_config.py
CHANGED
|
@@ -104,6 +104,8 @@ class ChatGPTConfig(BaseConfig):
|
|
|
104
104
|
parallel_tool_calls (bool, optional): A parameter specifying whether
|
|
105
105
|
the model should call tools in parallel or not.
|
|
106
106
|
(default: :obj:`None`)
|
|
107
|
+
extra_headers: Optional[Dict[str, str]]: Extra headers to use for the
|
|
108
|
+
model. (default: :obj:`None`)
|
|
107
109
|
"""
|
|
108
110
|
|
|
109
111
|
temperature: Optional[float] = None
|
|
@@ -120,6 +122,7 @@ class ChatGPTConfig(BaseConfig):
|
|
|
120
122
|
tool_choice: Optional[Union[Dict[str, str], str]] = None
|
|
121
123
|
reasoning_effort: Optional[str] = None
|
|
122
124
|
parallel_tool_calls: Optional[bool] = None
|
|
125
|
+
extra_headers: Optional[Dict[str, str]] = None
|
|
123
126
|
|
|
124
127
|
|
|
125
128
|
OPENAI_API_PARAMS = {param for param in ChatGPTConfig.model_fields.keys()}
|
camel/configs/reka_config.py
CHANGED
camel/configs/samba_config.py
CHANGED
|
@@ -65,7 +65,7 @@ class SambaVerseAPIConfig(BaseConfig):
|
|
|
65
65
|
|
|
66
66
|
|
|
67
67
|
SAMBA_VERSE_API_PARAMS = {
|
|
68
|
-
param for param in SambaVerseAPIConfig
|
|
68
|
+
param for param in SambaVerseAPIConfig.model_fields.keys()
|
|
69
69
|
}
|
|
70
70
|
|
|
71
71
|
|
|
@@ -160,5 +160,5 @@ class SambaCloudAPIConfig(BaseConfig):
|
|
|
160
160
|
|
|
161
161
|
|
|
162
162
|
SAMBA_CLOUD_API_PARAMS = {
|
|
163
|
-
param for param in SambaCloudAPIConfig
|
|
163
|
+
param for param in SambaCloudAPIConfig.model_fields.keys()
|
|
164
164
|
}
|
camel/datagen/cot_datagen.py
CHANGED
|
@@ -204,26 +204,23 @@ class CoTDataGenerator:
|
|
|
204
204
|
logger.info("Answer verification result: %s", is_correct)
|
|
205
205
|
return is_correct
|
|
206
206
|
|
|
207
|
-
def
|
|
207
|
+
def evaluate_partial_solution(
|
|
208
208
|
self, question: str, partial_solution: str = ""
|
|
209
209
|
) -> float:
|
|
210
|
-
r"""
|
|
210
|
+
r"""Evaluate the quality of a partial solution against the
|
|
211
|
+
golden answer.
|
|
211
212
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
scores
|
|
215
|
-
b. Expansion: Generate new solution steps using the generator agent
|
|
216
|
-
c. Simulation: Evaluate solution quality using similarity scores
|
|
217
|
-
d. Backpropagation: Update solution tree with new findings
|
|
213
|
+
This function generates a similarity score between the given partial
|
|
214
|
+
solution and the correct answer (golden answer).
|
|
218
215
|
|
|
219
216
|
Args:
|
|
220
|
-
question (str): The question
|
|
221
|
-
partial_solution (str): The
|
|
217
|
+
question (str): The question being solved.
|
|
218
|
+
partial_solution (str): The partial solution generated so far.
|
|
222
219
|
(default::obj:`""`)
|
|
223
220
|
|
|
224
221
|
Returns:
|
|
225
|
-
float:
|
|
226
|
-
solution
|
|
222
|
+
float: A similarity score between 0 and 1, indicating how close the
|
|
223
|
+
partial solution is to the golden answer.
|
|
227
224
|
"""
|
|
228
225
|
if question not in self.golden_answers:
|
|
229
226
|
raise ValueError(
|
|
@@ -293,10 +290,21 @@ class CoTDataGenerator:
|
|
|
293
290
|
r"""Solve a question using a multi-step approach.
|
|
294
291
|
|
|
295
292
|
The solution process follows these steps:
|
|
296
|
-
1. Try to solve directly - if correct, return the solution
|
|
297
|
-
2. If not correct,
|
|
298
|
-
|
|
299
|
-
|
|
293
|
+
1. Try to solve directly - if correct, return the solution.
|
|
294
|
+
2. If not correct, perform a search by iteratively generating
|
|
295
|
+
new solutions and evaluating their similarity scores to
|
|
296
|
+
find a good solution. The search process involves:
|
|
297
|
+
a. Generation: Generate new solution candidates using
|
|
298
|
+
the generator agent.
|
|
299
|
+
b. Evaluation: Score each solution candidate for similarity
|
|
300
|
+
to the golden answer.
|
|
301
|
+
c. Selection: Keep the best-scoring candidate found so far.
|
|
302
|
+
d. Early stopping: If a sufficiently high-scoring solution
|
|
303
|
+
is found (score > 0.9), stop early.
|
|
304
|
+
3. If the solution isn't perfect, use binary search to locate
|
|
305
|
+
errors.
|
|
306
|
+
4. Generate a new solution based on the correct part of the
|
|
307
|
+
initial solution.
|
|
300
308
|
|
|
301
309
|
Args:
|
|
302
310
|
question (str): The question to solve.
|
|
@@ -304,14 +312,14 @@ class CoTDataGenerator:
|
|
|
304
312
|
Returns:
|
|
305
313
|
str: The best solution found.
|
|
306
314
|
"""
|
|
315
|
+
|
|
307
316
|
# 1. Try direct solution first
|
|
308
317
|
solution = self.get_answer(question)
|
|
309
318
|
if self.verify_answer(question, solution):
|
|
310
319
|
logger.info("Initial solution is correct")
|
|
311
320
|
return solution
|
|
312
321
|
|
|
313
|
-
# 2. If direct solution fails,
|
|
314
|
-
# to find a solution with high similarity score
|
|
322
|
+
# 2. If direct solution fails, iteratively search for a better solution
|
|
315
323
|
best_solution = ""
|
|
316
324
|
best_score: float = 0.0
|
|
317
325
|
for i in range(self.search_limit):
|
|
@@ -319,23 +327,10 @@ class CoTDataGenerator:
|
|
|
319
327
|
current_solution = self.get_answer(question, best_solution)
|
|
320
328
|
|
|
321
329
|
# Evaluate solution similarity score
|
|
322
|
-
prompt = (
|
|
323
|
-
f"Please evaluate this solution and "
|
|
324
|
-
f"give a score between 0-1:\n"
|
|
325
|
-
f"Question: {question}\n"
|
|
326
|
-
f"Solution: {current_solution}\n"
|
|
327
|
-
f"Correct answer: {self.golden_answers.get(question, '')}\n"
|
|
328
|
-
f"Return a JSON object with a single field 'score' containing "
|
|
329
|
-
f"a float between 0 and 1, like this: {{'score': 0.85}}\n"
|
|
330
|
-
)
|
|
331
|
-
self.generator_agent.reset()
|
|
332
|
-
response = self.generator_agent.step(prompt)
|
|
333
330
|
try:
|
|
334
|
-
|
|
335
|
-
|
|
331
|
+
score = self.evaluate_partial_solution(
|
|
332
|
+
question, current_solution
|
|
336
333
|
)
|
|
337
|
-
agent_response = response.msgs[0].parsed.score # type: ignore [union-attr]
|
|
338
|
-
score = agent_response
|
|
339
334
|
|
|
340
335
|
# Exit early if we find a very good solution (score > 0.9)
|
|
341
336
|
if score > 0.9:
|
|
@@ -357,7 +352,7 @@ class CoTDataGenerator:
|
|
|
357
352
|
best_score,
|
|
358
353
|
)
|
|
359
354
|
except Exception as e:
|
|
360
|
-
logger.error("Error
|
|
355
|
+
logger.error("Error evaluating partial solution: %s", str(e))
|
|
361
356
|
continue
|
|
362
357
|
|
|
363
358
|
# 3. If the answer is not completely correct,
|
|
@@ -38,24 +38,27 @@ class BaseScorer(ABC):
|
|
|
38
38
|
|
|
39
39
|
class MathScorer(BaseScorer):
|
|
40
40
|
def __init__(self, agent: Optional[ChatAgent] = None):
|
|
41
|
-
self.system_msg =
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
41
|
+
self.system_msg = """
|
|
42
|
+
You are an evaluator for math problems. Your task is to compare a new math
|
|
43
|
+
problem against a reference math problem by trying to solve it, and rate it
|
|
44
|
+
in **three dimensions**.
|
|
45
|
+
|
|
46
|
+
1. Diversity (1-5): How novel is the new problem compared to the
|
|
47
|
+
reference? 1 = almost the same, 5 = completely different.
|
|
48
|
+
|
|
49
|
+
2. Difficulty (1-10): Rate the relative difficulty compared to the reference
|
|
50
|
+
problem. 1 = much less difficult, 5 = similar difficulty, 10 = much more
|
|
51
|
+
difficult. The difficulty should be based on the complexity of reasoning—i.e.,
|
|
52
|
+
problems that require multi-step reasoning or clever methods to solve.
|
|
53
|
+
|
|
54
|
+
3. Solvability (1-10): How likely is the problem solvable using standard math
|
|
55
|
+
techniques and only contain one question that could be answered by a number or
|
|
56
|
+
a formula? 1 = very unsolvable or ambiguous, 10 = solvable and could be
|
|
57
|
+
answered by a number or a formula.
|
|
58
|
+
|
|
59
|
+
Respond with a JSON object like:
|
|
60
|
+
{ "solution": ..., "diversity": ..., "difficulty": ..., "solvability": ... }
|
|
61
|
+
"""
|
|
59
62
|
self.agent = agent or ChatAgent(self.system_msg)
|
|
60
63
|
|
|
61
64
|
class MathScoreSchema(BaseModel):
|
|
@@ -69,10 +72,6 @@ class MathScorer(BaseScorer):
|
|
|
69
72
|
difficulty: int = Field(
|
|
70
73
|
..., description="Score for the relative difficulty"
|
|
71
74
|
)
|
|
72
|
-
validity: int = Field(
|
|
73
|
-
...,
|
|
74
|
-
description="Score for how well-defined and sound the problem is",
|
|
75
|
-
)
|
|
76
75
|
solvability: int = Field(
|
|
77
76
|
...,
|
|
78
77
|
description="Score for the solvability of the problem",
|
|
@@ -95,7 +94,7 @@ class MathScorer(BaseScorer):
|
|
|
95
94
|
query = (
|
|
96
95
|
f"Reference problem:\n{reference_problem}\n\n"
|
|
97
96
|
f"New problem:\n{new_problem}\n\n"
|
|
98
|
-
"
|
|
97
|
+
"Try to solve the new problem. Then provide scores in JSON format."
|
|
99
98
|
)
|
|
100
99
|
response = self.agent.step(query, response_format=self.MathScoreSchema)
|
|
101
100
|
score_data = json.loads(response.msg.content)
|
|
@@ -208,54 +208,54 @@ class MathEvolInstructTemplates(BaseEvolInstructTemplates):
|
|
|
208
208
|
r"""Contains templates for MathEvolInstruct prompt transformations."""
|
|
209
209
|
|
|
210
210
|
# Meta-instructions for in-depth evolving
|
|
211
|
-
INST_IN_DEPTH =
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
211
|
+
INST_IN_DEPTH = """
|
|
212
|
+
Please act as a math expert. Your objective is to create a new math problem
|
|
213
|
+
that is more challenging yet concise than the given math problem. Modify the
|
|
214
|
+
problem to increase its complexity and depth. The generated problem should be
|
|
215
|
+
clearly stated, strictly mathematical, and suitable for solving with symbolic
|
|
216
|
+
computation (e.g., using sympy). You will be given a method to guide your
|
|
217
|
+
creation. Make sure to follow the method strictly. Consolidate any multiple
|
|
218
|
+
parts into one integrated question that ask for one definitive answer. Do not
|
|
219
|
+
include multiple-choice, true/false, or proof-based questions. The final
|
|
220
|
+
answer should be a number or a formula. Respond with your generated problem
|
|
221
|
+
directly. The difficulty should be based on the complexity of reasoning—i.e.,
|
|
222
|
+
problems that require multi-step reasoning or clever methods to solve. The
|
|
223
|
+
challenge of a problem should not stem purely from computational complexity;
|
|
224
|
+
while complex calculations may be involved, a problem should not be considered
|
|
225
|
+
difficult solely because lengthy computations increase solving time.
|
|
226
|
+
#Original Problem#:
|
|
227
|
+
{prompt}
|
|
228
|
+
#Generated Problem#:
|
|
229
|
+
"""
|
|
226
230
|
|
|
227
231
|
EVOL_METHODS = {
|
|
228
|
-
"constraints":
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
"condense":
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
"necessary changes to let the problem could be answered with one "
|
|
256
|
-
"number or expression by removing the sub-questions or combining "
|
|
257
|
-
"them into one."
|
|
258
|
-
),
|
|
232
|
+
"constraints": """
|
|
233
|
+
Add one or more significant constraints or requirements into the
|
|
234
|
+
'#Given Prompt#'. The added constraints must meaningfully alter how the model
|
|
235
|
+
would respond. For example, specify additional rules, contexts, or limitations
|
|
236
|
+
that demand creative adjustments. This method should make the problem more
|
|
237
|
+
challenging in the reasoning and the solution of it should be clever and
|
|
238
|
+
elegant.
|
|
239
|
+
""",
|
|
240
|
+
"deepening": """
|
|
241
|
+
Increase the difficulty of the #Given Prompt# by integrating additional layers
|
|
242
|
+
of reasoning and rigor. Refine the problem so that all added difficulty is
|
|
243
|
+
consolidated into a single coherent question requiring one final answer,
|
|
244
|
+
avoiding fragmentation into multiple sub-problems.
|
|
245
|
+
""",
|
|
246
|
+
"expansion": """
|
|
247
|
+
Expand the #Given Prompt# by incorporating additional perspectives or layers
|
|
248
|
+
of complexity into the problem statement. Ensure that the revised problem
|
|
249
|
+
remains a single, unified question with one final answer, rather than a
|
|
250
|
+
series of separate sub-questions.
|
|
251
|
+
""",
|
|
252
|
+
"condense": """
|
|
253
|
+
Reformulate the given math problem into a well-structured and formally stated
|
|
254
|
+
mathematical question. Remove unnecessary instructions, explanations, or hints.
|
|
255
|
+
If the given problem contains several sub-questions, make necessary changes
|
|
256
|
+
to let the problem could be answered with one number or one expression by
|
|
257
|
+
removing the sub-questions or combining them into one.
|
|
258
|
+
""",
|
|
259
259
|
}
|
|
260
260
|
|
|
261
261
|
IN_DEPTH_KEYS = ['constraints', 'deepening', 'expansion']
|