camel-ai 0.2.37__py3-none-any.whl → 0.2.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +4 -0
- camel/agents/repo_agent.py +2 -2
- camel/benchmarks/apibank.py +1 -1
- camel/benchmarks/apibench.py +1 -1
- camel/configs/__init__.py +3 -0
- camel/configs/modelscope_config.py +59 -0
- camel/datagen/evol_instruct/__init__.py +20 -0
- camel/datagen/evol_instruct/evol_instruct.py +424 -0
- camel/datagen/evol_instruct/scorer.py +166 -0
- camel/datagen/evol_instruct/templates.py +268 -0
- camel/datagen/self_improving_cot.py +1 -1
- camel/datasets/__init__.py +2 -0
- camel/datasets/base_generator.py +22 -9
- camel/datasets/few_shot_generator.py +2 -3
- camel/datasets/self_instruct_generator.py +415 -0
- camel/embeddings/openai_compatible_embedding.py +13 -5
- camel/environments/models.py +10 -4
- camel/environments/single_step.py +181 -41
- camel/interpreters/docker_interpreter.py +2 -2
- camel/interpreters/e2b_interpreter.py +1 -1
- camel/interpreters/internal_python_interpreter.py +1 -1
- camel/interpreters/subprocess_interpreter.py +1 -1
- camel/loaders/__init__.py +2 -2
- camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
- camel/loaders/unstructured_io.py +2 -1
- camel/memories/blocks/chat_history_block.py +1 -1
- camel/memories/context_creators/score_based.py +198 -67
- camel/models/__init__.py +2 -0
- camel/models/aiml_model.py +9 -3
- camel/models/anthropic_model.py +11 -3
- camel/models/azure_openai_model.py +9 -3
- camel/models/base_audio_model.py +6 -0
- camel/models/base_model.py +4 -0
- camel/models/deepseek_model.py +9 -3
- camel/models/gemini_model.py +9 -3
- camel/models/groq_model.py +9 -3
- camel/models/internlm_model.py +8 -2
- camel/models/model_factory.py +123 -0
- camel/models/modelscope_model.py +208 -0
- camel/models/moonshot_model.py +8 -2
- camel/models/nemotron_model.py +9 -3
- camel/models/nvidia_model.py +9 -3
- camel/models/ollama_model.py +9 -3
- camel/models/openai_audio_models.py +7 -5
- camel/models/openai_compatible_model.py +9 -3
- camel/models/openai_model.py +58 -5
- camel/models/openrouter_model.py +9 -3
- camel/models/qwen_model.py +9 -3
- camel/models/samba_model.py +9 -3
- camel/models/sglang_model.py +11 -4
- camel/models/siliconflow_model.py +8 -2
- camel/models/stub_model.py +2 -1
- camel/models/togetherai_model.py +11 -5
- camel/models/vllm_model.py +10 -4
- camel/models/yi_model.py +9 -3
- camel/models/zhipuai_model.py +11 -5
- camel/retrievers/auto_retriever.py +14 -0
- camel/retrievers/vector_retriever.py +1 -1
- camel/storages/__init__.py +2 -0
- camel/storages/graph_storages/neo4j_graph.py +1 -1
- camel/storages/vectordb_storages/__init__.py +2 -0
- camel/storages/vectordb_storages/base.py +2 -2
- camel/storages/vectordb_storages/milvus.py +2 -2
- camel/storages/vectordb_storages/qdrant.py +2 -2
- camel/storages/vectordb_storages/tidb.py +332 -0
- camel/tasks/task.py +2 -2
- camel/toolkits/__init__.py +9 -1
- camel/toolkits/arxiv_toolkit.py +2 -1
- camel/toolkits/ask_news_toolkit.py +11 -3
- camel/toolkits/audio_analysis_toolkit.py +2 -0
- camel/toolkits/base.py +3 -0
- camel/toolkits/browser_toolkit.py +84 -61
- camel/toolkits/code_execution.py +3 -1
- camel/toolkits/dappier_toolkit.py +2 -1
- camel/toolkits/data_commons_toolkit.py +2 -0
- camel/toolkits/excel_toolkit.py +2 -0
- camel/toolkits/file_write_toolkit.py +2 -0
- camel/toolkits/github_toolkit.py +6 -4
- camel/toolkits/google_scholar_toolkit.py +2 -0
- camel/toolkits/human_toolkit.py +17 -1
- camel/toolkits/image_analysis_toolkit.py +2 -0
- camel/toolkits/linkedin_toolkit.py +2 -1
- camel/toolkits/math_toolkit.py +2 -0
- camel/toolkits/mcp_toolkit.py +42 -52
- camel/toolkits/meshy_toolkit.py +20 -2
- camel/toolkits/networkx_toolkit.py +2 -0
- camel/toolkits/notion_toolkit.py +7 -0
- camel/toolkits/openai_agent_toolkit.py +131 -0
- camel/toolkits/openbb_toolkit.py +2 -1
- camel/toolkits/pubmed_toolkit.py +2 -0
- camel/toolkits/reddit_toolkit.py +2 -1
- camel/toolkits/retrieval_toolkit.py +2 -1
- camel/toolkits/search_toolkit.py +2 -1
- camel/toolkits/searxng_toolkit.py +207 -0
- camel/toolkits/semantic_scholar_toolkit.py +2 -0
- camel/toolkits/slack_toolkit.py +2 -0
- camel/toolkits/stripe_toolkit.py +2 -1
- camel/toolkits/sympy_toolkit.py +2 -0
- camel/toolkits/terminal_toolkit.py +2 -0
- camel/toolkits/thinking_toolkit.py +168 -12
- camel/toolkits/twitter_toolkit.py +2 -1
- camel/toolkits/video_analysis_toolkit.py +2 -1
- camel/toolkits/video_download_toolkit.py +2 -1
- camel/toolkits/weather_toolkit.py +2 -0
- camel/toolkits/whatsapp_toolkit.py +2 -1
- camel/toolkits/zapier_toolkit.py +2 -1
- camel/types/enums.py +66 -0
- camel/types/unified_model_type.py +5 -0
- camel/utils/__init__.py +2 -0
- camel/utils/chunker/code_chunker.py +9 -9
- camel/utils/commons.py +50 -30
- camel/utils/constants.py +2 -2
- camel/utils/mcp.py +79 -0
- camel/verifiers/__init__.py +2 -0
- camel/verifiers/base.py +15 -15
- camel/verifiers/math_verifier.py +182 -0
- camel/verifiers/python_verifier.py +28 -28
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/METADATA +54 -4
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/RECORD +122 -110
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from typing import Dict, Optional
|
|
18
|
+
|
|
19
|
+
from pydantic import BaseModel, Field
|
|
20
|
+
|
|
21
|
+
from camel.agents import ChatAgent
|
|
22
|
+
from camel.logger import get_logger
|
|
23
|
+
|
|
24
|
+
logger = get_logger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BaseScorer(ABC):
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def score(
|
|
30
|
+
self, reference_prompt: str, candidate_prompt: str
|
|
31
|
+
) -> Dict[str, int]:
|
|
32
|
+
r"""Compare a candidate prompt against a reference prompt and
|
|
33
|
+
return a tuple of scores. The higher the score, the better.
|
|
34
|
+
For example, (diversity, difficulty, feasibility).
|
|
35
|
+
"""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MathScorer(BaseScorer):
|
|
40
|
+
def __init__(self, agent: Optional[ChatAgent] = None):
|
|
41
|
+
self.system_msg = (
|
|
42
|
+
"You are an evaluator for math problems. Your task is to compare "
|
|
43
|
+
"a new math problem against a reference math problem, and rate it "
|
|
44
|
+
"in **four dimensions**, each scored from 1 to 5.\n\n"
|
|
45
|
+
"1. Diversity (1-5): How novel is the new problem compared to the "
|
|
46
|
+
"reference? 1 = very similar, 5 = completely different.\n"
|
|
47
|
+
"2. Difficulty (1-5): Rate the relative difficulty compared to the"
|
|
48
|
+
" reference problem. 1 = much less difficult, "
|
|
49
|
+
"3 = similar difficulty, 5 = much more difficult.\n"
|
|
50
|
+
"3. Validity (1-5): How well-defined and sound is the problem?"
|
|
51
|
+
"1 = very vague or flawed, 5 = very clear and rigorous.\n"
|
|
52
|
+
"4. Solvability (1-5): How likely is the problem solvable using "
|
|
53
|
+
"standard math techniques? 1 = very unsolvable or ambiguous, "
|
|
54
|
+
"5 = very clearly solvable.\n\n"
|
|
55
|
+
"Respond with a JSON object like: "
|
|
56
|
+
"{ \"diversity\": ..., \"difficulty\": ..., "
|
|
57
|
+
"\"validity\": ..., \"solvability\": ... }"
|
|
58
|
+
)
|
|
59
|
+
self.agent = agent or ChatAgent(self.system_msg)
|
|
60
|
+
|
|
61
|
+
class MathScoreSchema(BaseModel):
|
|
62
|
+
diversity: int = Field(
|
|
63
|
+
...,
|
|
64
|
+
description=(
|
|
65
|
+
"Score for the diversity of the math problem "
|
|
66
|
+
"compared to the reference"
|
|
67
|
+
),
|
|
68
|
+
)
|
|
69
|
+
difficulty: int = Field(
|
|
70
|
+
..., description="Score for the relative difficulty"
|
|
71
|
+
)
|
|
72
|
+
validity: int = Field(
|
|
73
|
+
...,
|
|
74
|
+
description="Score for how well-defined and sound the problem is",
|
|
75
|
+
)
|
|
76
|
+
solvability: int = Field(
|
|
77
|
+
...,
|
|
78
|
+
description="Score for the solvability of the problem",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def score(
|
|
82
|
+
self, reference_problem: str, new_problem: str
|
|
83
|
+
) -> Dict[str, int]:
|
|
84
|
+
r"""Evaluates the new math problem relative to the reference math
|
|
85
|
+
problem.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
reference_problem (str): The reference math problem.
|
|
89
|
+
new_problem (str): The new or evolved math problem.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Dict[str, int]: A dictionary with scores for diversity, difficulty,
|
|
93
|
+
validity, and solvability.
|
|
94
|
+
"""
|
|
95
|
+
query = (
|
|
96
|
+
f"Reference problem:\n{reference_problem}\n\n"
|
|
97
|
+
f"New problem:\n{new_problem}\n\n"
|
|
98
|
+
"Provide scores in JSON format."
|
|
99
|
+
)
|
|
100
|
+
response = self.agent.step(query, response_format=self.MathScoreSchema)
|
|
101
|
+
score_data = json.loads(response.msg.content)
|
|
102
|
+
return score_data
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class GeneralScorer(BaseScorer):
|
|
106
|
+
def __init__(self, agent: Optional[ChatAgent] = None):
|
|
107
|
+
self.system_msg = (
|
|
108
|
+
"You are an evaluator for problems in various domains. Your task "
|
|
109
|
+
"is to compare a new problem against a reference problem, and rate"
|
|
110
|
+
" it in **three dimensions**, each scored from 1 to 5.\n\n"
|
|
111
|
+
"1. Diversity (1-5): How novel is the new problem compared to the "
|
|
112
|
+
"reference? 1 = very similar, 5 = completely different.\n"
|
|
113
|
+
"2. Complexity (1-5): Relative to the reference problem. "
|
|
114
|
+
"1 = much less complex, 3 = similar complexity, "
|
|
115
|
+
"5 = much more complex.\n"
|
|
116
|
+
"3. Validity (1-5): How well-defined, meaningful, the problem is."
|
|
117
|
+
"1 = vague/flawed, 5 = precise and fully meaningful.\n"
|
|
118
|
+
"Respond with a JSON object like: "
|
|
119
|
+
"{ \"diversity\": ..., \"complexity\": ..., \"validity\": ... }"
|
|
120
|
+
)
|
|
121
|
+
self.agent = agent or ChatAgent(self.system_msg)
|
|
122
|
+
|
|
123
|
+
class GeneralScoreSchema(BaseModel):
|
|
124
|
+
diversity: int = Field(
|
|
125
|
+
...,
|
|
126
|
+
description=(
|
|
127
|
+
"Score for the diversity of the problem "
|
|
128
|
+
"compared to the reference."
|
|
129
|
+
),
|
|
130
|
+
)
|
|
131
|
+
complexity: int = Field(
|
|
132
|
+
...,
|
|
133
|
+
description=("Score for the relative complexity of the problem."),
|
|
134
|
+
)
|
|
135
|
+
validity: int = Field(
|
|
136
|
+
...,
|
|
137
|
+
description=(
|
|
138
|
+
"Score estimating the likelihood that the problem is "
|
|
139
|
+
"well-defined."
|
|
140
|
+
),
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def score(
|
|
144
|
+
self, reference_problem: str, new_problem: str
|
|
145
|
+
) -> Dict[str, int]:
|
|
146
|
+
r"""Evaluates the new problem against the reference problem using
|
|
147
|
+
structured scoring.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
reference_problem (str): The original problem.
|
|
151
|
+
new_problem (str): The evolved or new problem.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Dict[str, int]: A dictionary with scores for diversity, complexity,
|
|
155
|
+
and validity.
|
|
156
|
+
"""
|
|
157
|
+
query = (
|
|
158
|
+
f"Reference problem:\n{reference_problem}\n\n"
|
|
159
|
+
f"New problem:\n{new_problem}\n\n"
|
|
160
|
+
"Provide scores in JSON format."
|
|
161
|
+
)
|
|
162
|
+
response = self.agent.step(
|
|
163
|
+
query, response_format=self.GeneralScoreSchema
|
|
164
|
+
)
|
|
165
|
+
score_data = json.loads(response.msg.content)
|
|
166
|
+
return score_data
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from typing import Dict, List, Union
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
# flake8: noqa
|
|
21
|
+
@dataclass(frozen=True)
|
|
22
|
+
class BaseEvolInstructTemplates(ABC):
|
|
23
|
+
r"""Abstract base class for evolution instruction templates.
|
|
24
|
+
|
|
25
|
+
This class defines a required structure for prompt transformation templates
|
|
26
|
+
- `EVOL_METHODS`: A dictionary mapping method keys to their descriptions.
|
|
27
|
+
- `STRATEGY`: A dictionary defining strategies and associated methods.
|
|
28
|
+
|
|
29
|
+
Subclasses should define concrete templates for specific domains.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
@property
|
|
33
|
+
@abstractmethod
|
|
34
|
+
def EVOL_METHODS(self) -> Dict[str, str]:
|
|
35
|
+
r"""A dictionary mapping evolution method keys to their descriptions."""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
@abstractmethod
|
|
40
|
+
def STRATEGY(self) -> Dict[str, Dict[str, Union[str, List[str]]]]:
|
|
41
|
+
r"""A dictionary defining strategies and their corresponding methods."""
|
|
42
|
+
pass
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
# flake8: noqa
|
|
46
|
+
@dataclass(frozen=True)
|
|
47
|
+
class EvolInstructTemplates(BaseEvolInstructTemplates):
|
|
48
|
+
r"""Contains templates for EvolInstruct prompt transformations.
|
|
49
|
+
|
|
50
|
+
References:
|
|
51
|
+
- WizardLM: Empowering Large Language Models to Follow Complex
|
|
52
|
+
Instructions
|
|
53
|
+
https://arxiv.org/pdf/2304.12244
|
|
54
|
+
- eva: Evolving Alignment via Asymmetric Self-Play
|
|
55
|
+
https://arxiv.org/abs/2411.00062
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
# High-level instructions on in-depth/in-breadth evolving
|
|
59
|
+
INST_IN_DEPTH = (
|
|
60
|
+
"Please act as an expert Prompt Creator.\n"
|
|
61
|
+
"Your objective is to rewrite a given prompt into a more complex "
|
|
62
|
+
"version to make those large language models (e.g., gemini) a bit "
|
|
63
|
+
"harder to handle.\n"
|
|
64
|
+
"But the rewritten prompt must be reasonable and must be understood "
|
|
65
|
+
"and responded by humans.\n"
|
|
66
|
+
"Your rewriting cannot omit the non-text parts such as the table and "
|
|
67
|
+
"code in #Given Prompt#, if there is any."
|
|
68
|
+
"You should try your best not to make the #Rewritten Prompt# become "
|
|
69
|
+
"verbose, "
|
|
70
|
+
"The #Rewritten Prompt# should be roughly the similar length or a "
|
|
71
|
+
"little bit more than that of #Given Prompt#.\n"
|
|
72
|
+
"The #Rewritten Prompt# must sound like a real human user's prompt; "
|
|
73
|
+
"DON'T make it like sound machine-generated."
|
|
74
|
+
"Specifically, you SHOULD complicate the given prompt using the "
|
|
75
|
+
"following method: "
|
|
76
|
+
"\n{method}\n"
|
|
77
|
+
"The rewritten prompt should reflect meaningful changes across its "
|
|
78
|
+
"structure, ensuring the entire sentence feels sufficiently different "
|
|
79
|
+
"from the original. "
|
|
80
|
+
"Again, make sure the rewritten prompt is more CHALLENGING."
|
|
81
|
+
"Respond with your rewritten prompt directly. "
|
|
82
|
+
"#Given Prompt#:\n{prompt}\n"
|
|
83
|
+
"#Rewritten Prompt#:\n"
|
|
84
|
+
).lstrip()
|
|
85
|
+
|
|
86
|
+
INST_IN_BREADTH = (
|
|
87
|
+
"Please act as an expert Prompt Creator.\n"
|
|
88
|
+
"Your objective is to generate a brand-new prompt based on the #Given "
|
|
89
|
+
"Prompt#. "
|
|
90
|
+
"The purpose of this task is to promote diversity and generality of "
|
|
91
|
+
"training prompts for language models, helping it practice with "
|
|
92
|
+
"varied challenges and perspectives.\n"
|
|
93
|
+
"The LENGTH and complexity of the #Created Prompt# should be similar "
|
|
94
|
+
"to that of the #Given Prompt#.\n"
|
|
95
|
+
"The #Created Prompt# must be reasonable, interpretable, and solvable "
|
|
96
|
+
"by humans.\n"
|
|
97
|
+
"The #Created Prompt# must sound like a real human user's prompt; "
|
|
98
|
+
"DON'T make it sound like machine-generated."
|
|
99
|
+
"Follow the method described below to guide your creation:\n"
|
|
100
|
+
"{method}\n"
|
|
101
|
+
"The created prompt should reflect meaningful changes across its "
|
|
102
|
+
"structure, ensuring the entire sentence feels sufficiently different "
|
|
103
|
+
"from the original. "
|
|
104
|
+
"Respond with your created prompt directly.\n"
|
|
105
|
+
"#Given Prompt#:\n{prompt}\n"
|
|
106
|
+
"#Created Prompt#:\n"
|
|
107
|
+
).lstrip()
|
|
108
|
+
|
|
109
|
+
# Sub-method instructions (following the eva paper setting)
|
|
110
|
+
IN_BREADTH_KEYS = [
|
|
111
|
+
'persona',
|
|
112
|
+
'shift-in',
|
|
113
|
+
'shift-out',
|
|
114
|
+
'mix',
|
|
115
|
+
'abstract',
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
IN_DEPTH_KEYS = [
|
|
119
|
+
'constraints',
|
|
120
|
+
'deepening',
|
|
121
|
+
'concretizing',
|
|
122
|
+
'reasoning',
|
|
123
|
+
'expansion',
|
|
124
|
+
]
|
|
125
|
+
|
|
126
|
+
STRATEGY = {
|
|
127
|
+
"IN-DEPTH": {
|
|
128
|
+
'meta_instruction': INST_IN_DEPTH,
|
|
129
|
+
'methods': IN_DEPTH_KEYS,
|
|
130
|
+
},
|
|
131
|
+
"IN-BREADTH": {
|
|
132
|
+
'meta_instruction': INST_IN_BREADTH,
|
|
133
|
+
'methods': IN_BREADTH_KEYS,
|
|
134
|
+
},
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
EVOL_METHODS = {
|
|
138
|
+
"persona": (
|
|
139
|
+
"Reframe the #Given Prompt# as if written by a user with a "
|
|
140
|
+
"completely different persona, background, or expertise. Adjust "
|
|
141
|
+
"the tone, style, phrasing, or anything you feel proper to "
|
|
142
|
+
"reflect this change. The changes should make the prompt feel "
|
|
143
|
+
"like it was authored by someone entirely new."
|
|
144
|
+
),
|
|
145
|
+
"shift-in": (
|
|
146
|
+
"Shift the high-level idea of the #Given Prompt# to explore a "
|
|
147
|
+
"different subdomain or context within the same domain. Ensure "
|
|
148
|
+
"the new topic still challenges the model to reason or provide "
|
|
149
|
+
"knowledge relevant to the domain."
|
|
150
|
+
),
|
|
151
|
+
"shift-out": (
|
|
152
|
+
"Shift the high-level idea of the #Given Prompt# to a completely "
|
|
153
|
+
"different topic in a different setting. The new topic may "
|
|
154
|
+
"challenge the model with similar reasoning or contextual "
|
|
155
|
+
"understanding but in a novel way."
|
|
156
|
+
),
|
|
157
|
+
"mix": (
|
|
158
|
+
"Combine the high-level concept of the #Given Prompt# with "
|
|
159
|
+
"elements from a different domain. Introduce novel scenarios or "
|
|
160
|
+
"contexts to create diversity while maintaining relevance to the "
|
|
161
|
+
"original idea."
|
|
162
|
+
),
|
|
163
|
+
"abstract": (
|
|
164
|
+
"Turn the #Given Prompt# into a more abstract or generalized "
|
|
165
|
+
"version, removing specific details while preserving its intent. "
|
|
166
|
+
"Ensure the new prompt encourages broader, principle-driven "
|
|
167
|
+
"reasoning."
|
|
168
|
+
),
|
|
169
|
+
"constraints": (
|
|
170
|
+
"Add one or more significant constraints or requirements into the "
|
|
171
|
+
"'#Given Prompt#'. The added constraints must meaningfully alter "
|
|
172
|
+
"how the model would respond. For example, specify additional "
|
|
173
|
+
"rules, contexts, or limitations that demand creative adjustments."
|
|
174
|
+
),
|
|
175
|
+
"deepening": (
|
|
176
|
+
"If the #Given Prompt# contains inquiries about certain issues, "
|
|
177
|
+
"increase the depth and breadth of the inquiry. Make the question "
|
|
178
|
+
"require a more detailed, multi-layered, or comprehensive response"
|
|
179
|
+
". For instance, break the problem into sub-problems or require "
|
|
180
|
+
"connections between unrelated concepts."
|
|
181
|
+
),
|
|
182
|
+
"concretizing": (
|
|
183
|
+
"Replace general concepts in the #Given Prompt# with more specific"
|
|
184
|
+
" and detailed concepts. Ensure that the change makes the problem "
|
|
185
|
+
"more defined and concrete, leaving less room for ambiguity. For "
|
|
186
|
+
"example, replace 'a device' with 'a wearable fitness tracker "
|
|
187
|
+
"with GPS'."
|
|
188
|
+
),
|
|
189
|
+
"reasoning": (
|
|
190
|
+
"Add one or more reasoning steps into the '#Given Prompt#'. "
|
|
191
|
+
"Explicitly rewrite it to demand multi-step reasoning or justify "
|
|
192
|
+
"intermediate steps in the solution. For instance, if the original"
|
|
193
|
+
" prompt is a simple query, make the response require a "
|
|
194
|
+
"step-by-step breakdown of logic or calculations."
|
|
195
|
+
),
|
|
196
|
+
"expansion": (
|
|
197
|
+
"Expand the #Given Prompt# by including additional perspectives, "
|
|
198
|
+
"domains, or layers of complexity. For example, if the original "
|
|
199
|
+
"prompt focuses on a single scenario, add related scenarios or ask"
|
|
200
|
+
" the model to compare different situations."
|
|
201
|
+
),
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
# flake8: noqa
|
|
206
|
+
@dataclass(frozen=True)
|
|
207
|
+
class MathEvolInstructTemplates(BaseEvolInstructTemplates):
|
|
208
|
+
r"""Contains templates for MathEvolInstruct prompt transformations."""
|
|
209
|
+
|
|
210
|
+
# Meta-instructions for in-depth evolving
|
|
211
|
+
INST_IN_DEPTH = (
|
|
212
|
+
"Please act as a math expert. Your objective is to create a new math "
|
|
213
|
+
"problem that is more challenging yet concise than the given math "
|
|
214
|
+
"problem. Ensure that the mathematical content (including any "
|
|
215
|
+
"equations or figures) is preserved, and rephrase the problem to "
|
|
216
|
+
"increase its complexity and depth. The generated problem should be "
|
|
217
|
+
"clearly stated, strictly mathematical, and suitable for solving with "
|
|
218
|
+
"symbolic computation (e.g., using sympy). You will be given a method "
|
|
219
|
+
"to guide your creation. Make sure to follow the method strictly. "
|
|
220
|
+
"Consolidate any multiple parts into one integrated question that "
|
|
221
|
+
"ask for one definitive answer. Respond with your generated problem "
|
|
222
|
+
"directly. "
|
|
223
|
+
"#Original Problem#:\n{prompt}\n"
|
|
224
|
+
"#Generated Problem#:\n"
|
|
225
|
+
).lstrip()
|
|
226
|
+
|
|
227
|
+
EVOL_METHODS = {
|
|
228
|
+
"constraints": (
|
|
229
|
+
"Add one or more significant constraints or requirements into the "
|
|
230
|
+
"'#Given Prompt#'. The added constraints must meaningfully alter "
|
|
231
|
+
"how the model would respond. For example, specify additional "
|
|
232
|
+
"rules, contexts, or limitations that demand creative adjustments."
|
|
233
|
+
),
|
|
234
|
+
"deepening": (
|
|
235
|
+
"Increase the difficulty of the #Given Prompt# by integrating "
|
|
236
|
+
"additional layers of reasoning and rigor. Refine the problem so "
|
|
237
|
+
"that all added difficulty is consolidated into a single coherent "
|
|
238
|
+
"question requiring one final answer, avoiding fragmentation into "
|
|
239
|
+
"multiple sub-problems."
|
|
240
|
+
),
|
|
241
|
+
"expansion": (
|
|
242
|
+
"Expand the #Given Prompt# by incorporating additional "
|
|
243
|
+
"perspectives or layers of complexity into the problem statement. "
|
|
244
|
+
"Ensure that the revised problem remains a single, unified "
|
|
245
|
+
"question with one final answer, rather than a series of separate "
|
|
246
|
+
"sub-questions."
|
|
247
|
+
),
|
|
248
|
+
"condense": (
|
|
249
|
+
"Reformulate the given math problem into a well-structured and "
|
|
250
|
+
"formally stated mathematical question.\n"
|
|
251
|
+
"- Present the problem in a structured and rigorous mathematical "
|
|
252
|
+
"format.\n"
|
|
253
|
+
"- Removing unnecessary instructions, explanations, or hints.\n"
|
|
254
|
+
"- If the given problem contains several sub-questions, make "
|
|
255
|
+
"necessary changes to let the problem could be answered with one "
|
|
256
|
+
"number or expression by removing the sub-questions or combining "
|
|
257
|
+
"them into one."
|
|
258
|
+
),
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
IN_DEPTH_KEYS = ['constraints', 'deepening', 'expansion']
|
|
262
|
+
|
|
263
|
+
STRATEGY = {
|
|
264
|
+
"IN-DEPTH": {
|
|
265
|
+
'meta_instruction': INST_IN_DEPTH,
|
|
266
|
+
'methods': IN_DEPTH_KEYS,
|
|
267
|
+
},
|
|
268
|
+
}
|
|
@@ -518,7 +518,7 @@ class SelfImprovingCoTPipeline:
|
|
|
518
518
|
self.reason_agent.model_backend.model_config_dict['n'] = (
|
|
519
519
|
self.rejection_sampling_n
|
|
520
520
|
)
|
|
521
|
-
# Generate multiple
|
|
521
|
+
# Generate multiple candidate traces in one call using parameter n
|
|
522
522
|
responses = self.reason_agent.step(prompt)
|
|
523
523
|
# Extract cancidate traces
|
|
524
524
|
candidate_traces = [choice.content for choice in responses.msgs]
|
camel/datasets/__init__.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
from .base_generator import BaseGenerator
|
|
15
15
|
from .few_shot_generator import FewShotGenerator
|
|
16
16
|
from .models import DataPoint
|
|
17
|
+
from .self_instruct_generator import SelfInstructGenerator
|
|
17
18
|
from .static_dataset import StaticDataset
|
|
18
19
|
|
|
19
20
|
__all__ = [
|
|
@@ -21,4 +22,5 @@ __all__ = [
|
|
|
21
22
|
"DataPoint",
|
|
22
23
|
"FewShotGenerator",
|
|
23
24
|
"StaticDataset",
|
|
25
|
+
"SelfInstructGenerator",
|
|
24
26
|
]
|
camel/datasets/base_generator.py
CHANGED
|
@@ -39,6 +39,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
39
39
|
def __init__(
|
|
40
40
|
self,
|
|
41
41
|
seed: int = 42,
|
|
42
|
+
buffer: int = 20,
|
|
42
43
|
cache: Union[str, Path, None] = None,
|
|
43
44
|
data_path: Union[str, Path, None] = None,
|
|
44
45
|
**kwargs,
|
|
@@ -47,6 +48,8 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
47
48
|
|
|
48
49
|
Args:
|
|
49
50
|
seed (int): Random seed for reproducibility. (default: :obj:`42`)
|
|
51
|
+
buffer (int): Amount of DataPoints to be generated when the
|
|
52
|
+
iterator runs out of DataPoints in data. (default: :obj:`20`)
|
|
50
53
|
cache (Union[str, Path, None]): Optional path to save generated
|
|
51
54
|
datapoints during iteration. If None is provided, datapoints
|
|
52
55
|
will be discarded every 100 generations.
|
|
@@ -56,7 +59,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
56
59
|
"""
|
|
57
60
|
self._rng = random.Random(seed)
|
|
58
61
|
self.cache = Path(cache) if cache else None
|
|
59
|
-
|
|
62
|
+
self._buffer = buffer
|
|
60
63
|
self._data: List[DataPoint] = []
|
|
61
64
|
self._batch_to_save: List[DataPoint] = []
|
|
62
65
|
|
|
@@ -72,15 +75,27 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
72
75
|
)
|
|
73
76
|
|
|
74
77
|
@abc.abstractmethod
|
|
75
|
-
async def generate_new(self, n: int, **kwargs) ->
|
|
76
|
-
r"""Generate n new datapoints.
|
|
78
|
+
async def generate_new(self, n: int, **kwargs) -> None:
|
|
79
|
+
r"""Generate n new datapoints and append them to self._data.
|
|
80
|
+
|
|
81
|
+
Subclass implementations must generate the specified number of
|
|
82
|
+
datapoints and append them directly to the `self._data` list.
|
|
83
|
+
This method should not return the datapoints; the iterator
|
|
84
|
+
relies on `self._data` being populated.
|
|
77
85
|
|
|
78
86
|
Args:
|
|
79
|
-
n (int): Number of datapoints to generate.
|
|
87
|
+
n (int): Number of datapoints to generate and append.
|
|
80
88
|
**kwargs: Additional generation parameters.
|
|
81
89
|
|
|
82
90
|
Returns:
|
|
83
|
-
|
|
91
|
+
None: This method should not return anything.
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
```python
|
|
95
|
+
async def generate_new(self, n: int, **kwargs) -> None:
|
|
96
|
+
new_points = [DataPoint(...) for _ in range(n)]
|
|
97
|
+
self._data.extend(new_points)
|
|
98
|
+
```
|
|
84
99
|
"""
|
|
85
100
|
pass
|
|
86
101
|
|
|
@@ -99,8 +114,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
99
114
|
async def generator():
|
|
100
115
|
while True:
|
|
101
116
|
if not self._data:
|
|
102
|
-
|
|
103
|
-
self._data.extend(new_datapoints)
|
|
117
|
+
await self.generate_new(self._buffer)
|
|
104
118
|
datapoint = self._data.pop(0)
|
|
105
119
|
yield datapoint
|
|
106
120
|
self._batch_to_save.append(datapoint)
|
|
@@ -137,8 +151,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
137
151
|
|
|
138
152
|
while True:
|
|
139
153
|
if not self._data:
|
|
140
|
-
|
|
141
|
-
self._data.extend(new_datapoints)
|
|
154
|
+
asyncio.run(self.generate_new(self._buffer))
|
|
142
155
|
datapoint = self._data.pop(0)
|
|
143
156
|
yield datapoint
|
|
144
157
|
self._batch_to_save.append(datapoint)
|
|
@@ -126,7 +126,7 @@ class FewShotGenerator(BaseGenerator):
|
|
|
126
126
|
max_retries: int = 10,
|
|
127
127
|
num_examples: int = 3,
|
|
128
128
|
**kwargs,
|
|
129
|
-
) ->
|
|
129
|
+
) -> None:
|
|
130
130
|
r"""Generates and validates `n` new datapoints through
|
|
131
131
|
few-shot prompting, with a retry limit.
|
|
132
132
|
|
|
@@ -203,7 +203,7 @@ class FewShotGenerator(BaseGenerator):
|
|
|
203
203
|
try:
|
|
204
204
|
verifier_response = await self.verifier.verify(
|
|
205
205
|
solution=rationale,
|
|
206
|
-
|
|
206
|
+
reference_answer=None,
|
|
207
207
|
)
|
|
208
208
|
if not verifier_response or not verifier_response.result:
|
|
209
209
|
raise ValueError(
|
|
@@ -255,4 +255,3 @@ class FewShotGenerator(BaseGenerator):
|
|
|
255
255
|
# Thread-safe way to extend the data list
|
|
256
256
|
async with asyncio.Lock():
|
|
257
257
|
self._data.extend(valid_data_points)
|
|
258
|
-
return valid_data_points
|