camel-ai 0.2.36__py3-none-any.whl → 0.2.38__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/__init__.py +2 -0
- camel/agents/repo_agent.py +579 -0
- camel/configs/aiml_config.py +20 -19
- camel/configs/anthropic_config.py +25 -27
- camel/configs/cohere_config.py +11 -10
- camel/configs/deepseek_config.py +16 -16
- camel/configs/gemini_config.py +8 -8
- camel/configs/groq_config.py +18 -19
- camel/configs/internlm_config.py +8 -8
- camel/configs/litellm_config.py +26 -24
- camel/configs/mistral_config.py +8 -8
- camel/configs/moonshot_config.py +11 -11
- camel/configs/nvidia_config.py +13 -13
- camel/configs/ollama_config.py +14 -15
- camel/configs/openai_config.py +3 -3
- camel/configs/openrouter_config.py +9 -9
- camel/configs/qwen_config.py +8 -8
- camel/configs/reka_config.py +12 -11
- camel/configs/samba_config.py +14 -14
- camel/configs/sglang_config.py +15 -16
- camel/configs/siliconflow_config.py +18 -17
- camel/configs/togetherai_config.py +18 -19
- camel/configs/vllm_config.py +18 -19
- camel/configs/yi_config.py +7 -8
- camel/configs/zhipuai_config.py +8 -9
- camel/datagen/evol_instruct/__init__.py +20 -0
- camel/datagen/evol_instruct/evol_instruct.py +424 -0
- camel/datagen/evol_instruct/scorer.py +166 -0
- camel/datagen/evol_instruct/templates.py +268 -0
- camel/datasets/static_dataset.py +25 -23
- camel/environments/models.py +10 -1
- camel/environments/single_step.py +296 -136
- camel/extractors/__init__.py +16 -1
- camel/interpreters/docker_interpreter.py +1 -1
- camel/interpreters/e2b_interpreter.py +1 -1
- camel/interpreters/subprocess_interpreter.py +1 -1
- camel/loaders/__init__.py +2 -2
- camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
- camel/memories/context_creators/score_based.py +198 -67
- camel/models/aiml_model.py +9 -3
- camel/models/anthropic_model.py +11 -3
- camel/models/azure_openai_model.py +9 -3
- camel/models/base_audio_model.py +6 -0
- camel/models/base_model.py +4 -0
- camel/models/deepseek_model.py +9 -3
- camel/models/gemini_model.py +9 -3
- camel/models/groq_model.py +9 -3
- camel/models/internlm_model.py +8 -2
- camel/models/model_factory.py +4 -0
- camel/models/moonshot_model.py +8 -2
- camel/models/nemotron_model.py +9 -3
- camel/models/nvidia_model.py +9 -3
- camel/models/ollama_model.py +9 -3
- camel/models/openai_audio_models.py +5 -3
- camel/models/openai_compatible_model.py +9 -3
- camel/models/openai_model.py +9 -3
- camel/models/openrouter_model.py +9 -3
- camel/models/qwen_model.py +9 -3
- camel/models/samba_model.py +9 -3
- camel/models/sglang_model.py +11 -4
- camel/models/siliconflow_model.py +8 -2
- camel/models/stub_model.py +2 -1
- camel/models/togetherai_model.py +9 -3
- camel/models/vllm_model.py +9 -3
- camel/models/yi_model.py +9 -3
- camel/models/zhipuai_model.py +9 -3
- camel/retrievers/auto_retriever.py +14 -0
- camel/storages/__init__.py +2 -0
- camel/storages/vectordb_storages/__init__.py +2 -0
- camel/storages/vectordb_storages/tidb.py +332 -0
- camel/toolkits/__init__.py +7 -0
- camel/toolkits/browser_toolkit.py +84 -61
- camel/toolkits/openai_agent_toolkit.py +131 -0
- camel/toolkits/searxng_toolkit.py +207 -0
- camel/toolkits/thinking_toolkit.py +230 -0
- camel/types/enums.py +4 -0
- camel/utils/chunker/code_chunker.py +9 -15
- camel/verifiers/base.py +28 -5
- camel/verifiers/python_verifier.py +321 -68
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/METADATA +103 -8
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/RECORD +84 -75
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.36.dist-info → camel_ai-0.2.38.dist-info}/licenses/LICENSE +0 -0
camel/configs/yi_config.py
CHANGED
|
@@ -16,7 +16,6 @@ from __future__ import annotations
|
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
18
|
from camel.configs.base_config import BaseConfig
|
|
19
|
-
from camel.types import NOT_GIVEN, NotGiven
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class YiConfig(BaseConfig):
|
|
@@ -37,22 +36,22 @@ class YiConfig(BaseConfig):
|
|
|
37
36
|
max_tokens (int, optional): Specifies the maximum number of tokens
|
|
38
37
|
the model can generate. This sets an upper limit, but does not
|
|
39
38
|
guarantee that this number will always be reached.
|
|
40
|
-
(default: :obj:`
|
|
39
|
+
(default: :obj:`None`)
|
|
41
40
|
top_p (float, optional): Controls the randomness of the generated
|
|
42
41
|
results. Lower values lead to less randomness, while higher
|
|
43
|
-
values increase randomness. (default: :obj:`
|
|
42
|
+
values increase randomness. (default: :obj:`None`)
|
|
44
43
|
temperature (float, optional): Controls the diversity and focus of
|
|
45
44
|
the generated results. Lower values make the output more focused,
|
|
46
45
|
while higher values make it more diverse. (default: :obj:`0.3`)
|
|
47
46
|
stream (bool, optional): If True, enables streaming output.
|
|
48
|
-
(default: :obj:`
|
|
47
|
+
(default: :obj:`None`)
|
|
49
48
|
"""
|
|
50
49
|
|
|
51
50
|
tool_choice: Optional[Union[dict[str, str], str]] = None
|
|
52
|
-
max_tokens:
|
|
53
|
-
top_p: float =
|
|
54
|
-
temperature: float =
|
|
55
|
-
stream: bool =
|
|
51
|
+
max_tokens: Optional[int] = None
|
|
52
|
+
top_p: Optional[float] = None
|
|
53
|
+
temperature: Optional[float] = None
|
|
54
|
+
stream: Optional[bool] = None
|
|
56
55
|
|
|
57
56
|
|
|
58
57
|
YI_API_PARAMS = {param for param in YiConfig.model_fields.keys()}
|
camel/configs/zhipuai_config.py
CHANGED
|
@@ -16,7 +16,6 @@ from __future__ import annotations
|
|
|
16
16
|
from typing import Optional, Sequence, Union
|
|
17
17
|
|
|
18
18
|
from camel.configs.base_config import BaseConfig
|
|
19
|
-
from camel.types import NOT_GIVEN, NotGiven
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class ZhipuAIConfig(BaseConfig):
|
|
@@ -29,15 +28,15 @@ class ZhipuAIConfig(BaseConfig):
|
|
|
29
28
|
temperature (float, optional): Sampling temperature to use, between
|
|
30
29
|
:obj:`0` and :obj:`2`. Higher values make the output more random,
|
|
31
30
|
while lower values make it more focused and deterministic.
|
|
32
|
-
(default: :obj:`
|
|
31
|
+
(default: :obj:`None`)
|
|
33
32
|
top_p (float, optional): An alternative to sampling with temperature,
|
|
34
33
|
called nucleus sampling, where the model considers the results of
|
|
35
34
|
the tokens with top_p probability mass. So :obj:`0.1` means only
|
|
36
35
|
the tokens comprising the top 10% probability mass are considered.
|
|
37
|
-
(default: :obj:`
|
|
36
|
+
(default: :obj:`None`)
|
|
38
37
|
stream (bool, optional): If True, partial message deltas will be sent
|
|
39
38
|
as data-only server-sent events as they become available.
|
|
40
|
-
(default: :obj:`
|
|
39
|
+
(default: :obj:`None`)
|
|
41
40
|
stop (str or list, optional): Up to :obj:`4` sequences where the API
|
|
42
41
|
will stop generating further tokens. (default: :obj:`None`)
|
|
43
42
|
max_tokens (int, optional): The maximum number of tokens to generate
|
|
@@ -60,11 +59,11 @@ class ZhipuAIConfig(BaseConfig):
|
|
|
60
59
|
are present.
|
|
61
60
|
"""
|
|
62
61
|
|
|
63
|
-
temperature: float =
|
|
64
|
-
top_p: float =
|
|
65
|
-
stream: bool =
|
|
66
|
-
stop: Union[str, Sequence[str]
|
|
67
|
-
max_tokens:
|
|
62
|
+
temperature: Optional[float] = None
|
|
63
|
+
top_p: Optional[float] = None
|
|
64
|
+
stream: Optional[bool] = None
|
|
65
|
+
stop: Optional[Union[str, Sequence[str]]] = None
|
|
66
|
+
max_tokens: Optional[int] = None
|
|
68
67
|
tool_choice: Optional[Union[dict[str, str], str]] = None
|
|
69
68
|
|
|
70
69
|
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
from .evol_instruct import EvolInstructPipeline
|
|
16
|
+
|
|
17
|
+
__all__ = [
|
|
18
|
+
'EvolInstructPipeline',
|
|
19
|
+
'MathEvolInstructTemplates',
|
|
20
|
+
]
|
|
@@ -0,0 +1,424 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import random
|
|
16
|
+
import time
|
|
17
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
18
|
+
from math import ceil
|
|
19
|
+
from typing import Any, Dict, List, Optional, Tuple, Type, Union, cast
|
|
20
|
+
|
|
21
|
+
from tqdm import tqdm
|
|
22
|
+
|
|
23
|
+
from camel.agents import ChatAgent
|
|
24
|
+
from camel.datagen.evol_instruct.scorer import BaseScorer, GeneralScorer
|
|
25
|
+
from camel.datagen.evol_instruct.templates import EvolInstructTemplates
|
|
26
|
+
from camel.logger import get_logger
|
|
27
|
+
|
|
28
|
+
logger = get_logger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class EvolInstructPipeline:
|
|
32
|
+
r"""Pipeline for evolving prompts using the Evol-Instruct methodology.
|
|
33
|
+
|
|
34
|
+
Supports custom templates defining evolution strategies and methods. The
|
|
35
|
+
pipeline leverages language models to iteratively refine prompts through
|
|
36
|
+
specified evolution strategies.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
templates (Type[EvolInstructTemplates]): Template class containing
|
|
40
|
+
evolution strategy and method definitions. Must provide
|
|
41
|
+
`EVOL_METHODS` and `STRATEGY` attributes.
|
|
42
|
+
(default: :obj:`EvolInstructTemplates`)
|
|
43
|
+
agent (Optional[ChatAgent]): Chat agent instance for LLM interaction.
|
|
44
|
+
If :obj:`None`, initializes with a default ChatAgent.
|
|
45
|
+
(default: :obj:`None`)
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(
|
|
49
|
+
self,
|
|
50
|
+
templates: Type = EvolInstructTemplates,
|
|
51
|
+
agent: Optional[ChatAgent] = None,
|
|
52
|
+
) -> None:
|
|
53
|
+
r"""Initialize pipeline with templates and language model agent.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
templates (Type[EvolInstructTemplates]): Template class containing
|
|
57
|
+
evolution strategy configurations.
|
|
58
|
+
(default: :obj:`EvolInstructTemplates`)
|
|
59
|
+
agent (Optional[ChatAgent]): Preconfigured chat agent instance.
|
|
60
|
+
Creates a default ChatAgent if not provided.
|
|
61
|
+
(default: :obj:`None`)
|
|
62
|
+
"""
|
|
63
|
+
self.templates = templates
|
|
64
|
+
self.agent = agent or ChatAgent()
|
|
65
|
+
|
|
66
|
+
def _resolve_evolution_method(self, method_key: str) -> str:
|
|
67
|
+
r"""Resolve evolution method key to concrete implementation.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
method_key (str): Input method identifier. Can be:
|
|
71
|
+
- Direct method key from templates.EVOL_METHODS
|
|
72
|
+
- Strategy name from templates.STRATEGY keys
|
|
73
|
+
|
|
74
|
+
Returns:
|
|
75
|
+
str: Resolved method key from EVOL_METHODS
|
|
76
|
+
"""
|
|
77
|
+
if method_key in self.templates.EVOL_METHODS:
|
|
78
|
+
return method_key
|
|
79
|
+
if method_key.upper() in self.templates.STRATEGY:
|
|
80
|
+
strategy = self.templates.STRATEGY[method_key.upper()]
|
|
81
|
+
strategy_methods = strategy["methods"]
|
|
82
|
+
return random.choice(strategy_methods)
|
|
83
|
+
|
|
84
|
+
logger.warning(
|
|
85
|
+
f"Invalid evolution method: {method_key}. "
|
|
86
|
+
f"Using random selection."
|
|
87
|
+
)
|
|
88
|
+
return random.choice(list(self.templates.EVOL_METHODS))
|
|
89
|
+
|
|
90
|
+
def _get_evolution_methods(
|
|
91
|
+
self,
|
|
92
|
+
method: Union[str, List[str]],
|
|
93
|
+
num_generations: int = 2,
|
|
94
|
+
) -> List[str]:
|
|
95
|
+
r"""Get list of evolution methods based on input specification.
|
|
96
|
+
|
|
97
|
+
Args:
|
|
98
|
+
method (Union[str, List[str]]): Specification for method selection.
|
|
99
|
+
Can be:
|
|
100
|
+
- Strategy name for methods from that strategy
|
|
101
|
+
- Specific method name
|
|
102
|
+
- List of method specifications
|
|
103
|
+
num_generations (int): Number of methods to return.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
List[str]: List of resolved method names
|
|
107
|
+
"""
|
|
108
|
+
candidate_methods = []
|
|
109
|
+
|
|
110
|
+
if isinstance(method, list):
|
|
111
|
+
for method_spec in method:
|
|
112
|
+
candidate_methods.append(
|
|
113
|
+
self._resolve_evolution_method(method_spec)
|
|
114
|
+
)
|
|
115
|
+
elif isinstance(method, str):
|
|
116
|
+
if method.upper() in self.templates.STRATEGY:
|
|
117
|
+
strategy = self.templates.STRATEGY[method.upper()]
|
|
118
|
+
candidate_methods = strategy["methods"]
|
|
119
|
+
else:
|
|
120
|
+
candidate_methods = [self._resolve_evolution_method(method)]
|
|
121
|
+
|
|
122
|
+
# Remove duplicates while preserving order
|
|
123
|
+
unique_candidates = []
|
|
124
|
+
for method_name in candidate_methods:
|
|
125
|
+
if method_name not in unique_candidates:
|
|
126
|
+
unique_candidates.append(method_name)
|
|
127
|
+
|
|
128
|
+
if len(unique_candidates) >= num_generations:
|
|
129
|
+
methods = random.sample(unique_candidates, num_generations)
|
|
130
|
+
else:
|
|
131
|
+
methods = unique_candidates.copy()
|
|
132
|
+
while len(methods) < num_generations:
|
|
133
|
+
methods.append(random.choice(unique_candidates))
|
|
134
|
+
|
|
135
|
+
return methods
|
|
136
|
+
|
|
137
|
+
def _generate_single_evolution(
|
|
138
|
+
self,
|
|
139
|
+
prompt: str,
|
|
140
|
+
method: str,
|
|
141
|
+
return_method: bool = False,
|
|
142
|
+
) -> Tuple[str, str]:
|
|
143
|
+
r"""Generate a single evolved prompt from a seed prompt.
|
|
144
|
+
|
|
145
|
+
Args:
|
|
146
|
+
prompt (str): The seed prompt to evolve.
|
|
147
|
+
method (str): The evolution method key to use.
|
|
148
|
+
return_method (bool): If True, returns method along with prompt.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Tuple[str, str]: Evolved prompt and method
|
|
152
|
+
"""
|
|
153
|
+
resolved_method = self._resolve_evolution_method(method)
|
|
154
|
+
|
|
155
|
+
# Find strategy containing the resolved method
|
|
156
|
+
strategy_key = None
|
|
157
|
+
for strategy, group in self.templates.STRATEGY.items():
|
|
158
|
+
if resolved_method in group["methods"]:
|
|
159
|
+
strategy_key = strategy
|
|
160
|
+
break
|
|
161
|
+
|
|
162
|
+
if strategy_key is None:
|
|
163
|
+
strategy_key = random.choice(list(self.templates.STRATEGY.keys()))
|
|
164
|
+
|
|
165
|
+
strategy = self.templates.STRATEGY[strategy_key]
|
|
166
|
+
instruction_template = strategy["meta_instruction"]
|
|
167
|
+
instruction = instruction_template.format(
|
|
168
|
+
method=self.templates.EVOL_METHODS.get(
|
|
169
|
+
resolved_method,
|
|
170
|
+
random.choice(list(self.templates.EVOL_METHODS.values())),
|
|
171
|
+
),
|
|
172
|
+
prompt=prompt,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
self.agent.reset()
|
|
176
|
+
response = self.agent.step(instruction)
|
|
177
|
+
evolved_prompt = response.msgs[0].content.strip()
|
|
178
|
+
|
|
179
|
+
if return_method:
|
|
180
|
+
return (evolved_prompt, resolved_method)
|
|
181
|
+
else:
|
|
182
|
+
return (evolved_prompt, "")
|
|
183
|
+
|
|
184
|
+
def _generate_multiple_evolutions(
|
|
185
|
+
self,
|
|
186
|
+
prompt: str,
|
|
187
|
+
method: Union[str, List[str]],
|
|
188
|
+
num_generations: int = 2,
|
|
189
|
+
keep_original: bool = True,
|
|
190
|
+
num_threads: int = 10,
|
|
191
|
+
) -> List[Tuple[str, str]]:
|
|
192
|
+
r"""Generate multiple evolved versions of a prompt.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
prompt (str): Seed prompt to evolve.
|
|
196
|
+
method (Union[str, List[str]]): Evolution method specification.
|
|
197
|
+
num_generations (int): Candidates to generate per iteration.
|
|
198
|
+
keep_original (bool): Whether to keep the original prompt.
|
|
199
|
+
num_threads (int): Number of threads for parallel processing.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
List[Tuple[str, str]]: List of (evolved_prompt, method) pairs
|
|
203
|
+
"""
|
|
204
|
+
results = [(prompt, "original")] if keep_original else []
|
|
205
|
+
|
|
206
|
+
if isinstance(method, list) and len(method) == num_generations:
|
|
207
|
+
candidate_methods = method
|
|
208
|
+
else:
|
|
209
|
+
candidate_methods = self._get_evolution_methods(
|
|
210
|
+
method=method, num_generations=num_generations
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
def _process_single_method(method_name: str) -> Tuple[str, str]:
|
|
214
|
+
return self._generate_single_evolution(
|
|
215
|
+
prompt, method_name, return_method=True
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
|
219
|
+
evolved_results = list(
|
|
220
|
+
executor.map(_process_single_method, candidate_methods)
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
results.extend(evolved_results)
|
|
224
|
+
return results
|
|
225
|
+
|
|
226
|
+
def _generate_iterative_evolutions(
|
|
227
|
+
self,
|
|
228
|
+
prompt: str,
|
|
229
|
+
evolution_spec: Union[str, List[Union[str, List[str]]]],
|
|
230
|
+
num_generations: int = 2,
|
|
231
|
+
num_iterations: Optional[int] = None,
|
|
232
|
+
keep_original: bool = True,
|
|
233
|
+
scorer: Optional[BaseScorer] = None,
|
|
234
|
+
num_threads: int = 10,
|
|
235
|
+
) -> Dict[int, List[Dict[str, Any]]]:
|
|
236
|
+
r"""Generate iterative evolutions of a prompt with scoring.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
prompt (str): Seed prompt to evolve.
|
|
240
|
+
evolution_spec (Union[str, List[Union[str, List[str]]]]):
|
|
241
|
+
Evolution method specification.
|
|
242
|
+
If a list is provided and num_iterations is None, then
|
|
243
|
+
num_iterations is set to the length of the list.
|
|
244
|
+
num_generations (int): Candidates to generate per iteration.
|
|
245
|
+
num_iterations (Optional[int]): Number of evolution iterations.
|
|
246
|
+
Defaults to the length of evolution_spec.
|
|
247
|
+
keep_original (bool): Include original prompt in results.
|
|
248
|
+
scorer (Optional[BaseScorer]): Scoring model for candidate.
|
|
249
|
+
num_threads (int): Number of threads for parallel processing.
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
Dict[int, List[Dict[str, Any]]]: Evolution results per iteration,
|
|
253
|
+
where each candidate is represented as a dict with keys:
|
|
254
|
+
"instruction", "method", and "scores".
|
|
255
|
+
"""
|
|
256
|
+
if num_iterations is None:
|
|
257
|
+
if isinstance(evolution_spec, list):
|
|
258
|
+
num_iterations = len(evolution_spec)
|
|
259
|
+
else:
|
|
260
|
+
num_iterations = 1
|
|
261
|
+
|
|
262
|
+
results = {}
|
|
263
|
+
current_prompt = prompt
|
|
264
|
+
scorer = scorer or GeneralScorer()
|
|
265
|
+
|
|
266
|
+
for iteration in range(num_iterations):
|
|
267
|
+
if isinstance(evolution_spec, list):
|
|
268
|
+
if iteration < len(evolution_spec):
|
|
269
|
+
iteration_spec = evolution_spec[iteration]
|
|
270
|
+
else:
|
|
271
|
+
iteration_spec = evolution_spec[-1]
|
|
272
|
+
else:
|
|
273
|
+
iteration_spec = evolution_spec
|
|
274
|
+
|
|
275
|
+
batch_results = self._generate_multiple_evolutions(
|
|
276
|
+
prompt=current_prompt,
|
|
277
|
+
method=iteration_spec,
|
|
278
|
+
num_generations=num_generations,
|
|
279
|
+
keep_original=False,
|
|
280
|
+
num_threads=num_threads,
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
scored_results = []
|
|
284
|
+
for candidate, method_used in batch_results:
|
|
285
|
+
scores = scorer.score(current_prompt, candidate)
|
|
286
|
+
scored_results.append(
|
|
287
|
+
{
|
|
288
|
+
"instruction": candidate,
|
|
289
|
+
"method": method_used,
|
|
290
|
+
"scores": scores,
|
|
291
|
+
}
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
best_index = max(
|
|
295
|
+
range(len(scored_results)),
|
|
296
|
+
key=lambda i: sum(
|
|
297
|
+
cast(Dict[str, int], scored_results[i]["scores"]).values()
|
|
298
|
+
),
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
best_candidate = cast(
|
|
302
|
+
str, scored_results[best_index]["instruction"]
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
if keep_original:
|
|
306
|
+
results[iteration] = [
|
|
307
|
+
{
|
|
308
|
+
"instruction": current_prompt,
|
|
309
|
+
"method": "original",
|
|
310
|
+
"scores": {},
|
|
311
|
+
},
|
|
312
|
+
*scored_results,
|
|
313
|
+
]
|
|
314
|
+
else:
|
|
315
|
+
results[iteration] = scored_results
|
|
316
|
+
|
|
317
|
+
current_prompt = best_candidate
|
|
318
|
+
|
|
319
|
+
return results
|
|
320
|
+
|
|
321
|
+
def generate(
|
|
322
|
+
self,
|
|
323
|
+
prompts: List[str],
|
|
324
|
+
evolution_spec: Union[str, List[Union[str, List[str]]]],
|
|
325
|
+
num_generations: int = 2,
|
|
326
|
+
num_iterations: Optional[int] = None,
|
|
327
|
+
keep_original: bool = True,
|
|
328
|
+
scorer: Optional[BaseScorer] = None,
|
|
329
|
+
num_chunks: int = 1,
|
|
330
|
+
retry_limit: int = 3,
|
|
331
|
+
retry_delay: float = 1.0,
|
|
332
|
+
num_threads: int = 10,
|
|
333
|
+
) -> List[Dict[int, List[Dict[str, Any]]]]:
|
|
334
|
+
r"""Evolve a batch of prompts through iterative refinement.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
prompts (List[str]): Seed prompts to evolve.
|
|
338
|
+
evolution_spec (Union[str, List[Union[str, List[str]]]]):
|
|
339
|
+
Evolution method specification.
|
|
340
|
+
If a list is provided and num_iterations is None, then
|
|
341
|
+
num_iterations is set to the length of the list.
|
|
342
|
+
num_generations (int): Candidates to generate per iteration.
|
|
343
|
+
num_iterations (Optional[int]): Number of evolution iterations.
|
|
344
|
+
Defaults to the length of evolution_spec.
|
|
345
|
+
keep_original (bool): Include original prompts in results.
|
|
346
|
+
scorer (Optional[BaseScorer]): Scoring model for candidate.
|
|
347
|
+
num_chunks (int): Number of parallel processing chunks.
|
|
348
|
+
retry_limit (int): Max retries for failed generations.
|
|
349
|
+
retry_delay (float): Delay between retries in seconds.
|
|
350
|
+
num_threads (int): Number of threads for parallel processing.
|
|
351
|
+
|
|
352
|
+
Returns:
|
|
353
|
+
List[Dict[int, List[Dict[str, Any]]]]: Evolution results.
|
|
354
|
+
"""
|
|
355
|
+
if num_iterations is None:
|
|
356
|
+
if isinstance(evolution_spec, list):
|
|
357
|
+
num_iterations = len(evolution_spec)
|
|
358
|
+
else:
|
|
359
|
+
num_iterations = 1
|
|
360
|
+
|
|
361
|
+
evolution_plan: List[List[List[str]]] = []
|
|
362
|
+
for _ in prompts:
|
|
363
|
+
prompt_plan = []
|
|
364
|
+
for iteration in range(num_iterations):
|
|
365
|
+
if isinstance(evolution_spec, list):
|
|
366
|
+
if iteration < len(evolution_spec):
|
|
367
|
+
raw_spec = evolution_spec[iteration]
|
|
368
|
+
else:
|
|
369
|
+
raw_spec = evolution_spec[-1]
|
|
370
|
+
else:
|
|
371
|
+
raw_spec = evolution_spec
|
|
372
|
+
prompt_plan.append(
|
|
373
|
+
self._get_evolution_methods(raw_spec, num_generations)
|
|
374
|
+
)
|
|
375
|
+
evolution_plan.append(prompt_plan)
|
|
376
|
+
|
|
377
|
+
def _process_prompt(
|
|
378
|
+
args: Tuple[str, List[List[str]]],
|
|
379
|
+
) -> Dict[int, List[Dict[str, Any]]]:
|
|
380
|
+
prompt, methods = args
|
|
381
|
+
retries = 0
|
|
382
|
+
while retries <= retry_limit:
|
|
383
|
+
try:
|
|
384
|
+
return self._generate_iterative_evolutions(
|
|
385
|
+
prompt=prompt,
|
|
386
|
+
evolution_spec=evolution_spec,
|
|
387
|
+
num_generations=num_generations,
|
|
388
|
+
num_iterations=num_iterations,
|
|
389
|
+
keep_original=keep_original,
|
|
390
|
+
scorer=scorer,
|
|
391
|
+
num_threads=num_threads,
|
|
392
|
+
)
|
|
393
|
+
except Exception as e:
|
|
394
|
+
retries += 1
|
|
395
|
+
if retries <= retry_limit:
|
|
396
|
+
logger.warning(
|
|
397
|
+
f"Error processing prompt "
|
|
398
|
+
f"(attempt {retries}/{retry_limit}): {e!s}"
|
|
399
|
+
)
|
|
400
|
+
time.sleep(retry_delay)
|
|
401
|
+
else:
|
|
402
|
+
logger.error("Failed to process prompt.")
|
|
403
|
+
return {}
|
|
404
|
+
|
|
405
|
+
raise RuntimeError("_process_prompt() did not return.")
|
|
406
|
+
|
|
407
|
+
num_chunks = max(1, min(num_chunks, len(prompts)))
|
|
408
|
+
chunk_size = ceil(len(prompts) / num_chunks)
|
|
409
|
+
results = []
|
|
410
|
+
|
|
411
|
+
for chunk_idx in range(0, len(prompts), chunk_size):
|
|
412
|
+
chunk = prompts[chunk_idx : chunk_idx + chunk_size]
|
|
413
|
+
plan_chunk = evolution_plan[chunk_idx : chunk_idx + chunk_size]
|
|
414
|
+
|
|
415
|
+
with ThreadPoolExecutor(max_workers=num_threads) as executor:
|
|
416
|
+
chunk_results = list(
|
|
417
|
+
tqdm(
|
|
418
|
+
executor.map(_process_prompt, zip(chunk, plan_chunk)),
|
|
419
|
+
total=len(chunk),
|
|
420
|
+
)
|
|
421
|
+
)
|
|
422
|
+
results.extend(chunk_results)
|
|
423
|
+
|
|
424
|
+
return results
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from abc import ABC, abstractmethod
|
|
17
|
+
from typing import Dict, Optional
|
|
18
|
+
|
|
19
|
+
from pydantic import BaseModel, Field
|
|
20
|
+
|
|
21
|
+
from camel.agents import ChatAgent
|
|
22
|
+
from camel.logger import get_logger
|
|
23
|
+
|
|
24
|
+
logger = get_logger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class BaseScorer(ABC):
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def score(
|
|
30
|
+
self, reference_prompt: str, candidate_prompt: str
|
|
31
|
+
) -> Dict[str, int]:
|
|
32
|
+
r"""Compare a candidate prompt against a reference prompt and
|
|
33
|
+
return a tuple of scores. The higher the score, the better.
|
|
34
|
+
For example, (diversity, difficulty, feasibility).
|
|
35
|
+
"""
|
|
36
|
+
pass
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MathScorer(BaseScorer):
|
|
40
|
+
def __init__(self, agent: Optional[ChatAgent] = None):
|
|
41
|
+
self.system_msg = (
|
|
42
|
+
"You are an evaluator for math problems. Your task is to compare "
|
|
43
|
+
"a new math problem against a reference math problem, and rate it "
|
|
44
|
+
"in **four dimensions**, each scored from 1 to 5.\n\n"
|
|
45
|
+
"1. Diversity (1-5): How novel is the new problem compared to the "
|
|
46
|
+
"reference? 1 = very similar, 5 = completely different.\n"
|
|
47
|
+
"2. Difficulty (1-5): Rate the relative difficulty compared to the"
|
|
48
|
+
" reference problem. 1 = much less difficult, "
|
|
49
|
+
"3 = similar difficulty, 5 = much more difficult.\n"
|
|
50
|
+
"3. Validity (1-5): How well-defined and sound is the problem?"
|
|
51
|
+
"1 = very vague or flawed, 5 = very clear and rigorous.\n"
|
|
52
|
+
"4. Solvability (1-5): How likely is the problem solvable using "
|
|
53
|
+
"standard math techniques? 1 = very unsolvable or ambiguous, "
|
|
54
|
+
"5 = very clearly solvable.\n\n"
|
|
55
|
+
"Respond with a JSON object like: "
|
|
56
|
+
"{ \"diversity\": ..., \"difficulty\": ..., "
|
|
57
|
+
"\"validity\": ..., \"solvability\": ... }"
|
|
58
|
+
)
|
|
59
|
+
self.agent = agent or ChatAgent(self.system_msg)
|
|
60
|
+
|
|
61
|
+
class MathScoreSchema(BaseModel):
|
|
62
|
+
diversity: int = Field(
|
|
63
|
+
...,
|
|
64
|
+
description=(
|
|
65
|
+
"Score for the diversity of the math problem "
|
|
66
|
+
"compared to the reference"
|
|
67
|
+
),
|
|
68
|
+
)
|
|
69
|
+
difficulty: int = Field(
|
|
70
|
+
..., description="Score for the relative difficulty"
|
|
71
|
+
)
|
|
72
|
+
validity: int = Field(
|
|
73
|
+
...,
|
|
74
|
+
description="Score for how well-defined and sound the problem is",
|
|
75
|
+
)
|
|
76
|
+
solvability: int = Field(
|
|
77
|
+
...,
|
|
78
|
+
description="Score for the solvability of the problem",
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def score(
|
|
82
|
+
self, reference_problem: str, new_problem: str
|
|
83
|
+
) -> Dict[str, int]:
|
|
84
|
+
r"""Evaluates the new math problem relative to the reference math
|
|
85
|
+
problem.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
reference_problem (str): The reference math problem.
|
|
89
|
+
new_problem (str): The new or evolved math problem.
|
|
90
|
+
|
|
91
|
+
Returns:
|
|
92
|
+
Dict[str, int]: A dictionary with scores for diversity, difficulty,
|
|
93
|
+
validity, and solvability.
|
|
94
|
+
"""
|
|
95
|
+
query = (
|
|
96
|
+
f"Reference problem:\n{reference_problem}\n\n"
|
|
97
|
+
f"New problem:\n{new_problem}\n\n"
|
|
98
|
+
"Provide scores in JSON format."
|
|
99
|
+
)
|
|
100
|
+
response = self.agent.step(query, response_format=self.MathScoreSchema)
|
|
101
|
+
score_data = json.loads(response.msg.content)
|
|
102
|
+
return score_data
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
class GeneralScorer(BaseScorer):
|
|
106
|
+
def __init__(self, agent: Optional[ChatAgent] = None):
|
|
107
|
+
self.system_msg = (
|
|
108
|
+
"You are an evaluator for problems in various domains. Your task "
|
|
109
|
+
"is to compare a new problem against a reference problem, and rate"
|
|
110
|
+
" it in **three dimensions**, each scored from 1 to 5.\n\n"
|
|
111
|
+
"1. Diversity (1-5): How novel is the new problem compared to the "
|
|
112
|
+
"reference? 1 = very similar, 5 = completely different.\n"
|
|
113
|
+
"2. Complexity (1-5): Relative to the reference problem. "
|
|
114
|
+
"1 = much less complex, 3 = similar complexity, "
|
|
115
|
+
"5 = much more complex.\n"
|
|
116
|
+
"3. Validity (1-5): How well-defined, meaningful, the problem is."
|
|
117
|
+
"1 = vague/flawed, 5 = precise and fully meaningful.\n"
|
|
118
|
+
"Respond with a JSON object like: "
|
|
119
|
+
"{ \"diversity\": ..., \"complexity\": ..., \"validity\": ... }"
|
|
120
|
+
)
|
|
121
|
+
self.agent = agent or ChatAgent(self.system_msg)
|
|
122
|
+
|
|
123
|
+
class GeneralScoreSchema(BaseModel):
|
|
124
|
+
diversity: int = Field(
|
|
125
|
+
...,
|
|
126
|
+
description=(
|
|
127
|
+
"Score for the diversity of the problem "
|
|
128
|
+
"compared to the reference."
|
|
129
|
+
),
|
|
130
|
+
)
|
|
131
|
+
complexity: int = Field(
|
|
132
|
+
...,
|
|
133
|
+
description=("Score for the relative complexity of the problem."),
|
|
134
|
+
)
|
|
135
|
+
validity: int = Field(
|
|
136
|
+
...,
|
|
137
|
+
description=(
|
|
138
|
+
"Score estimating the likelihood that the problem is "
|
|
139
|
+
"well-defined."
|
|
140
|
+
),
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
def score(
|
|
144
|
+
self, reference_problem: str, new_problem: str
|
|
145
|
+
) -> Dict[str, int]:
|
|
146
|
+
r"""Evaluates the new problem against the reference problem using
|
|
147
|
+
structured scoring.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
reference_problem (str): The original problem.
|
|
151
|
+
new_problem (str): The evolved or new problem.
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Dict[str, int]: A dictionary with scores for diversity, complexity,
|
|
155
|
+
and validity.
|
|
156
|
+
"""
|
|
157
|
+
query = (
|
|
158
|
+
f"Reference problem:\n{reference_problem}\n\n"
|
|
159
|
+
f"New problem:\n{new_problem}\n\n"
|
|
160
|
+
"Provide scores in JSON format."
|
|
161
|
+
)
|
|
162
|
+
response = self.agent.step(
|
|
163
|
+
query, response_format=self.GeneralScoreSchema
|
|
164
|
+
)
|
|
165
|
+
score_data = json.loads(response.msg.content)
|
|
166
|
+
return score_data
|