camel-ai 0.2.37__py3-none-any.whl → 0.2.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +4 -0
- camel/agents/repo_agent.py +2 -2
- camel/benchmarks/apibank.py +1 -1
- camel/benchmarks/apibench.py +1 -1
- camel/configs/__init__.py +3 -0
- camel/configs/modelscope_config.py +59 -0
- camel/datagen/evol_instruct/__init__.py +20 -0
- camel/datagen/evol_instruct/evol_instruct.py +424 -0
- camel/datagen/evol_instruct/scorer.py +166 -0
- camel/datagen/evol_instruct/templates.py +268 -0
- camel/datagen/self_improving_cot.py +1 -1
- camel/datasets/__init__.py +2 -0
- camel/datasets/base_generator.py +22 -9
- camel/datasets/few_shot_generator.py +2 -3
- camel/datasets/self_instruct_generator.py +415 -0
- camel/embeddings/openai_compatible_embedding.py +13 -5
- camel/environments/models.py +10 -4
- camel/environments/single_step.py +181 -41
- camel/interpreters/docker_interpreter.py +2 -2
- camel/interpreters/e2b_interpreter.py +1 -1
- camel/interpreters/internal_python_interpreter.py +1 -1
- camel/interpreters/subprocess_interpreter.py +1 -1
- camel/loaders/__init__.py +2 -2
- camel/loaders/{panda_reader.py → pandas_reader.py} +61 -30
- camel/loaders/unstructured_io.py +2 -1
- camel/memories/blocks/chat_history_block.py +1 -1
- camel/memories/context_creators/score_based.py +198 -67
- camel/models/__init__.py +2 -0
- camel/models/aiml_model.py +9 -3
- camel/models/anthropic_model.py +11 -3
- camel/models/azure_openai_model.py +9 -3
- camel/models/base_audio_model.py +6 -0
- camel/models/base_model.py +4 -0
- camel/models/deepseek_model.py +9 -3
- camel/models/gemini_model.py +9 -3
- camel/models/groq_model.py +9 -3
- camel/models/internlm_model.py +8 -2
- camel/models/model_factory.py +123 -0
- camel/models/modelscope_model.py +208 -0
- camel/models/moonshot_model.py +8 -2
- camel/models/nemotron_model.py +9 -3
- camel/models/nvidia_model.py +9 -3
- camel/models/ollama_model.py +9 -3
- camel/models/openai_audio_models.py +7 -5
- camel/models/openai_compatible_model.py +9 -3
- camel/models/openai_model.py +58 -5
- camel/models/openrouter_model.py +9 -3
- camel/models/qwen_model.py +9 -3
- camel/models/samba_model.py +9 -3
- camel/models/sglang_model.py +11 -4
- camel/models/siliconflow_model.py +8 -2
- camel/models/stub_model.py +2 -1
- camel/models/togetherai_model.py +11 -5
- camel/models/vllm_model.py +10 -4
- camel/models/yi_model.py +9 -3
- camel/models/zhipuai_model.py +11 -5
- camel/retrievers/auto_retriever.py +14 -0
- camel/retrievers/vector_retriever.py +1 -1
- camel/storages/__init__.py +2 -0
- camel/storages/graph_storages/neo4j_graph.py +1 -1
- camel/storages/vectordb_storages/__init__.py +2 -0
- camel/storages/vectordb_storages/base.py +2 -2
- camel/storages/vectordb_storages/milvus.py +2 -2
- camel/storages/vectordb_storages/qdrant.py +2 -2
- camel/storages/vectordb_storages/tidb.py +332 -0
- camel/tasks/task.py +2 -2
- camel/toolkits/__init__.py +9 -1
- camel/toolkits/arxiv_toolkit.py +2 -1
- camel/toolkits/ask_news_toolkit.py +11 -3
- camel/toolkits/audio_analysis_toolkit.py +2 -0
- camel/toolkits/base.py +3 -0
- camel/toolkits/browser_toolkit.py +84 -61
- camel/toolkits/code_execution.py +3 -1
- camel/toolkits/dappier_toolkit.py +2 -1
- camel/toolkits/data_commons_toolkit.py +2 -0
- camel/toolkits/excel_toolkit.py +2 -0
- camel/toolkits/file_write_toolkit.py +2 -0
- camel/toolkits/github_toolkit.py +6 -4
- camel/toolkits/google_scholar_toolkit.py +2 -0
- camel/toolkits/human_toolkit.py +17 -1
- camel/toolkits/image_analysis_toolkit.py +2 -0
- camel/toolkits/linkedin_toolkit.py +2 -1
- camel/toolkits/math_toolkit.py +2 -0
- camel/toolkits/mcp_toolkit.py +42 -52
- camel/toolkits/meshy_toolkit.py +20 -2
- camel/toolkits/networkx_toolkit.py +2 -0
- camel/toolkits/notion_toolkit.py +7 -0
- camel/toolkits/openai_agent_toolkit.py +131 -0
- camel/toolkits/openbb_toolkit.py +2 -1
- camel/toolkits/pubmed_toolkit.py +2 -0
- camel/toolkits/reddit_toolkit.py +2 -1
- camel/toolkits/retrieval_toolkit.py +2 -1
- camel/toolkits/search_toolkit.py +2 -1
- camel/toolkits/searxng_toolkit.py +207 -0
- camel/toolkits/semantic_scholar_toolkit.py +2 -0
- camel/toolkits/slack_toolkit.py +2 -0
- camel/toolkits/stripe_toolkit.py +2 -1
- camel/toolkits/sympy_toolkit.py +2 -0
- camel/toolkits/terminal_toolkit.py +2 -0
- camel/toolkits/thinking_toolkit.py +168 -12
- camel/toolkits/twitter_toolkit.py +2 -1
- camel/toolkits/video_analysis_toolkit.py +2 -1
- camel/toolkits/video_download_toolkit.py +2 -1
- camel/toolkits/weather_toolkit.py +2 -0
- camel/toolkits/whatsapp_toolkit.py +2 -1
- camel/toolkits/zapier_toolkit.py +2 -1
- camel/types/enums.py +66 -0
- camel/types/unified_model_type.py +5 -0
- camel/utils/__init__.py +2 -0
- camel/utils/chunker/code_chunker.py +9 -9
- camel/utils/commons.py +50 -30
- camel/utils/constants.py +2 -2
- camel/utils/mcp.py +79 -0
- camel/verifiers/__init__.py +2 -0
- camel/verifiers/base.py +15 -15
- camel/verifiers/math_verifier.py +182 -0
- camel/verifiers/python_verifier.py +28 -28
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/METADATA +54 -4
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/RECORD +122 -110
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.37.dist-info → camel_ai-0.2.39.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import random
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from typing import Iterable, List, Optional, cast
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, Field, ValidationError
|
|
21
|
+
|
|
22
|
+
from camel.agents import ChatAgent
|
|
23
|
+
from camel.logger import get_logger
|
|
24
|
+
from camel.models import ModelFactory
|
|
25
|
+
from camel.types import ModelPlatformType, ModelType
|
|
26
|
+
from camel.verifiers import BaseVerifier
|
|
27
|
+
|
|
28
|
+
from .base_generator import BaseGenerator
|
|
29
|
+
from .models import DataPoint
|
|
30
|
+
from .static_dataset import StaticDataset
|
|
31
|
+
|
|
32
|
+
logger = get_logger(__name__)
|
|
33
|
+
|
|
34
|
+
DEFAULT_INSTRUCTION_SYSTEM_PROMPT = """
|
|
35
|
+
You are a high-capacity instruction generation assistant.
|
|
36
|
+
|
|
37
|
+
Your task is to generate a **new, creative, and challenging question** based on
|
|
38
|
+
several examples.
|
|
39
|
+
These examples may cover different domains or styles, but your goal is to:
|
|
40
|
+
- **Understand their specific patterns** in structure, and complexity;
|
|
41
|
+
- **Combine and synthesize** ideas from multiple examples, rather than copying
|
|
42
|
+
or lightly editing any single one;
|
|
43
|
+
- **Intelligently integrate** multiple reasoning steps, constraints, or
|
|
44
|
+
concepts into a single, coherent question;
|
|
45
|
+
- Ensure the new question is **non-trivial** and requires deep thinking or
|
|
46
|
+
multi-step reasoning.
|
|
47
|
+
|
|
48
|
+
**Guidelines:**
|
|
49
|
+
- Use the examples as inspiration for format, depth, and tone.
|
|
50
|
+
- Your new question should be self-contained, logically sound, and answerable.
|
|
51
|
+
- Do not repeat exact phrasings or create shallow combinations; instead,
|
|
52
|
+
produce something meaningfully new.
|
|
53
|
+
- Avoid open-ended or subjective questions that depend on personal opinions or
|
|
54
|
+
discussion.
|
|
55
|
+
- The generated question must have a **clear, objective, and verifiable
|
|
56
|
+
answer**.
|
|
57
|
+
- Aim for increased depth or novelty through subtle combination or
|
|
58
|
+
transformation.
|
|
59
|
+
- Keep the final output to a **single unified question** with one clear answer,
|
|
60
|
+
not a multi-part task.
|
|
61
|
+
|
|
62
|
+
**Output Format (strict):**
|
|
63
|
+
```
|
|
64
|
+
Question: [Generated question]
|
|
65
|
+
```
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
DEFAULT_RATIONALE_SYSTEM_PROMPT = """You are an advanced Python code assistant.
|
|
69
|
+
|
|
70
|
+
Your task is to **solve the given question by writing Python code only**,
|
|
71
|
+
without any explanation or natural language output.
|
|
72
|
+
The code must compute the answer **programmatically**, not by hardcoding or
|
|
73
|
+
guessing the result.
|
|
74
|
+
|
|
75
|
+
**Rules:**
|
|
76
|
+
- Use Python code to perform the actual computation.
|
|
77
|
+
- Use {package_list} to solve the problem. Do not import any other libraries.
|
|
78
|
+
- **Do not hardcode the final answer** (e.g., avoid writing `print(1/2)` unless
|
|
79
|
+
that value is computed).
|
|
80
|
+
- The result must be obtained through valid computation logic in code.
|
|
81
|
+
- Do not include explanations. Output code only.
|
|
82
|
+
- The entire code must be wrapped in triple backticks:
|
|
83
|
+
```
|
|
84
|
+
[Your Python code here]
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Now, solve the following question using Python. Only output the code:
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class SelfInstructGenerator(BaseGenerator):
|
|
92
|
+
r"""A generator for creating synthetic datapoints using self-instruct.
|
|
93
|
+
|
|
94
|
+
It utilizes both a human-provided dataset (seed_dataset) and generated
|
|
95
|
+
machine instructions (machine_instructions) to produce new, synthetic
|
|
96
|
+
datapoints that include a question, a computed rationale (code), and a
|
|
97
|
+
final answer (from a verifier).
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
class QuestionSchema(BaseModel):
|
|
101
|
+
r"""Schema for the generated question.
|
|
102
|
+
|
|
103
|
+
Attributes:
|
|
104
|
+
question (str): The question generated by the model.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
question: str = Field(description="The question generated")
|
|
108
|
+
|
|
109
|
+
class RationaleSchema(BaseModel):
|
|
110
|
+
r"""Schema for the generated rationale code.
|
|
111
|
+
|
|
112
|
+
Attributes:
|
|
113
|
+
code (str): The generated code without any formatting.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
code: str = Field(
|
|
117
|
+
description="The generated code without any formatting"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def __init__(
|
|
121
|
+
self,
|
|
122
|
+
seed_dataset: StaticDataset,
|
|
123
|
+
verifier: BaseVerifier,
|
|
124
|
+
instruction_agent: Optional[ChatAgent] = None,
|
|
125
|
+
rationale_agent: Optional[ChatAgent] = None,
|
|
126
|
+
seed: int = 42,
|
|
127
|
+
**kwargs,
|
|
128
|
+
):
|
|
129
|
+
r"""Initialize the self-instruct generator.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
seed_dataset (StaticDataset): Dataset containing seed instructions.
|
|
133
|
+
verifier (BaseVerifier): Verifier instance to validate generated
|
|
134
|
+
solutions.
|
|
135
|
+
instruction_agent (Optional[ChatAgent]): Agent for generating
|
|
136
|
+
instructions. If not provided, a default agent will be created.
|
|
137
|
+
rationale_agent (Optional[ChatAgent]): Agent for generating
|
|
138
|
+
rationales. If not provided, a default agent will be created.
|
|
139
|
+
seed (int): Random seed for reproducibility. (default: :obj:`42`)
|
|
140
|
+
**kwargs: Additional keyword arguments passed to the BaseGenerator.
|
|
141
|
+
"""
|
|
142
|
+
super().__init__(seed=seed, **kwargs)
|
|
143
|
+
self.seed_dataset = seed_dataset
|
|
144
|
+
self.verifier = verifier
|
|
145
|
+
# extract packages from verifier
|
|
146
|
+
self.packages: List[str] = getattr(
|
|
147
|
+
self.verifier, "required_packages", []
|
|
148
|
+
)
|
|
149
|
+
# create default agents if not provided
|
|
150
|
+
self.instruction_agent = (
|
|
151
|
+
instruction_agent or self.default_instruction_agent()
|
|
152
|
+
)
|
|
153
|
+
self.rationale_agent = (
|
|
154
|
+
rationale_agent or self.default_rationale_agent()
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Extract questions from the seed dataset as human_instructions
|
|
158
|
+
self.human_instructions: List[str] = [
|
|
159
|
+
dp.question
|
|
160
|
+
for dp in list(cast(Iterable[DataPoint], self.seed_dataset))
|
|
161
|
+
]
|
|
162
|
+
self.machine_instructions: List[DataPoint] = []
|
|
163
|
+
# Create an instance-level lock for thread-safe updates to _data
|
|
164
|
+
self._lock = asyncio.Lock()
|
|
165
|
+
self._data = [] # Storage for generated DataPoint instances
|
|
166
|
+
|
|
167
|
+
def default_instruction_agent(self) -> ChatAgent:
|
|
168
|
+
r"""Create the default instruction generation agent.
|
|
169
|
+
|
|
170
|
+
This agent is configured with a moderate temperature setting to
|
|
171
|
+
encourage creative and diverse instruction generation behavior.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
ChatAgent: An agent with the default instruction prompt.
|
|
175
|
+
"""
|
|
176
|
+
model = ModelFactory.create(
|
|
177
|
+
model_platform=ModelPlatformType.DEFAULT,
|
|
178
|
+
model_type=ModelType.DEFAULT,
|
|
179
|
+
model_config_dict={"temperature": 0.7},
|
|
180
|
+
)
|
|
181
|
+
return ChatAgent(
|
|
182
|
+
DEFAULT_INSTRUCTION_SYSTEM_PROMPT,
|
|
183
|
+
model=model,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def default_rationale_agent(self) -> ChatAgent:
|
|
187
|
+
r"""Create the default rationale generation agent.
|
|
188
|
+
|
|
189
|
+
This agent is configured with a deterministic (zero temperature)
|
|
190
|
+
setting to ensure consistent and precise rationale generation based on
|
|
191
|
+
a given instruction and package list.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
ChatAgent: An agent with the rationale prompt
|
|
195
|
+
"""
|
|
196
|
+
model = ModelFactory.create(
|
|
197
|
+
model_platform=ModelPlatformType.DEFAULT,
|
|
198
|
+
model_type=ModelType.DEFAULT,
|
|
199
|
+
model_config_dict={"temperature": 0.0},
|
|
200
|
+
)
|
|
201
|
+
return ChatAgent(
|
|
202
|
+
DEFAULT_RATIONALE_SYSTEM_PROMPT.format(package_list=self.packages),
|
|
203
|
+
model=model,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
@staticmethod
|
|
207
|
+
def format_support_block(dp: DataPoint) -> str:
|
|
208
|
+
r"""Format a DataPoint into a few-shot example block.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
dp (DataPoint): A data point.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
str: A formatted string containing the question and its
|
|
215
|
+
corresponding code block in Markdown-style Python format.
|
|
216
|
+
"""
|
|
217
|
+
support_q = dp.question.strip()
|
|
218
|
+
support_code = dp.rationale.strip() if dp.rationale else ""
|
|
219
|
+
return (
|
|
220
|
+
f"Question:\n{support_q}\n\n"
|
|
221
|
+
"Code:\n"
|
|
222
|
+
"```python\n"
|
|
223
|
+
f"{support_code}\n"
|
|
224
|
+
"```"
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
def generate_new_instruction(
|
|
228
|
+
self,
|
|
229
|
+
agent: ChatAgent,
|
|
230
|
+
support_human_dps: list[DataPoint],
|
|
231
|
+
support_machine_dps: list[DataPoint],
|
|
232
|
+
) -> str:
|
|
233
|
+
r"""Generate a new instruction using self-instruct prompting.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
agent (ChatAgent): The agent to use for generating the instruction.
|
|
237
|
+
support_human_dps (list[DataPoint]): List of human examples to
|
|
238
|
+
sample.
|
|
239
|
+
support_machine_dps (list[DataPoint]): List of machine examples to
|
|
240
|
+
sample.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
str: The newly generated question.
|
|
244
|
+
"""
|
|
245
|
+
human_sample = [dp.question for dp in list(support_human_dps)]
|
|
246
|
+
machine_sample = [dp.question for dp in list(support_machine_dps)]
|
|
247
|
+
|
|
248
|
+
few_shot_examples = human_sample + machine_sample
|
|
249
|
+
|
|
250
|
+
# Build the prompt using the few-shot examples
|
|
251
|
+
prompt = "Below are some question examples:\n\n"
|
|
252
|
+
for idx, instr in enumerate(few_shot_examples, start=1):
|
|
253
|
+
prompt += f"Question {idx}: {instr}\n"
|
|
254
|
+
prompt += f"Question {len(few_shot_examples) + 1}:\n"
|
|
255
|
+
prompt += "Now generate a new question based on the given examples.\n"
|
|
256
|
+
|
|
257
|
+
question_template = f"Question: {prompt}"
|
|
258
|
+
response = cast(
|
|
259
|
+
SelfInstructGenerator.QuestionSchema,
|
|
260
|
+
agent.step(question_template, response_format=self.QuestionSchema)
|
|
261
|
+
.msgs[0]
|
|
262
|
+
.parsed,
|
|
263
|
+
)
|
|
264
|
+
return response.question
|
|
265
|
+
|
|
266
|
+
def generate_rationale(
|
|
267
|
+
self,
|
|
268
|
+
question: str,
|
|
269
|
+
agent: Optional[ChatAgent] = None,
|
|
270
|
+
support_human_dps: Optional[list[DataPoint]] = None,
|
|
271
|
+
) -> str:
|
|
272
|
+
r"""Generate rationale code (solution) for the given question.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
question (str): The question to be solved.
|
|
276
|
+
agent (Optional[ChatAgent]): The agent to use for generating the
|
|
277
|
+
rationale. If None is provided, the default rationale agent
|
|
278
|
+
will be used. (default: :obj:`None`)
|
|
279
|
+
support_human_dps (Optional[list[DataPoint]]): List of human
|
|
280
|
+
examples to sample. (default: :obj:`None`)
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
str: The generated code solution as a string.
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
# Build few-shot example prompt
|
|
287
|
+
few_shot_prompt = ""
|
|
288
|
+
if support_human_dps:
|
|
289
|
+
few_shot_examples = [
|
|
290
|
+
self.format_support_block(dp) for dp in support_human_dps
|
|
291
|
+
]
|
|
292
|
+
few_shot_prompt += "Below are example questions and solutions:\n\n"
|
|
293
|
+
few_shot_prompt += "\n\n".join(few_shot_examples)
|
|
294
|
+
|
|
295
|
+
few_shot_prompt += f"\n\nWrite code to solve the question:\n{question}"
|
|
296
|
+
|
|
297
|
+
response = cast(
|
|
298
|
+
SelfInstructGenerator.RationaleSchema,
|
|
299
|
+
(agent or self.default_rationale_agent())
|
|
300
|
+
.step(few_shot_prompt, response_format=self.RationaleSchema)
|
|
301
|
+
.msgs[0]
|
|
302
|
+
.parsed,
|
|
303
|
+
)
|
|
304
|
+
return response.code
|
|
305
|
+
|
|
306
|
+
async def generate_new(
|
|
307
|
+
self,
|
|
308
|
+
n: int,
|
|
309
|
+
max_retries: int = 10,
|
|
310
|
+
human_sample_count: int = 3,
|
|
311
|
+
machine_sample_count: int = 1,
|
|
312
|
+
**kwargs,
|
|
313
|
+
) -> None:
|
|
314
|
+
r"""Generates and validates `n` new datapoints through
|
|
315
|
+
self-instruct prompting, with a retry limit.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
n (int): The number of valid datapoints to generate.
|
|
319
|
+
max_retries (int): Maximum number of retries before stopping.
|
|
320
|
+
(default: :obj:`10`)
|
|
321
|
+
human_sample_count (int): Number of human examples to sample.
|
|
322
|
+
(default: :obj:`3`)
|
|
323
|
+
machine_sample_count (int): Number of machine examples to sample.
|
|
324
|
+
(default: :obj:`1`)
|
|
325
|
+
**kwargs: Additional keyword arguments.
|
|
326
|
+
|
|
327
|
+
Notes:
|
|
328
|
+
- Retries on validation failures until `n` valid datapoints exist
|
|
329
|
+
or `max_retries` is reached, whichever comes first.
|
|
330
|
+
- If retries are exhausted before reaching `n`, a `RuntimeError`
|
|
331
|
+
is raised.
|
|
332
|
+
- Metadata includes a timestamp for tracking datapoint creation.
|
|
333
|
+
"""
|
|
334
|
+
valid_data_points: list[DataPoint] = []
|
|
335
|
+
retries = 0
|
|
336
|
+
|
|
337
|
+
while len(valid_data_points) < n and retries < max_retries:
|
|
338
|
+
try:
|
|
339
|
+
human_dps_list = list(cast(List[DataPoint], self.seed_dataset))
|
|
340
|
+
support_human_dps = random.sample(
|
|
341
|
+
human_dps_list,
|
|
342
|
+
min(human_sample_count, len(human_dps_list)),
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
machine_dps_list = list(self.machine_instructions)
|
|
346
|
+
support_machine_dps = []
|
|
347
|
+
if machine_dps_list and machine_sample_count > 0:
|
|
348
|
+
support_machine_dps = random.sample(
|
|
349
|
+
machine_dps_list,
|
|
350
|
+
min(machine_sample_count, len(machine_dps_list)),
|
|
351
|
+
)
|
|
352
|
+
question = self.generate_new_instruction(
|
|
353
|
+
self.instruction_agent,
|
|
354
|
+
support_human_dps,
|
|
355
|
+
support_machine_dps,
|
|
356
|
+
)
|
|
357
|
+
rationale = self.generate_rationale(
|
|
358
|
+
question, self.rationale_agent, support_human_dps
|
|
359
|
+
)
|
|
360
|
+
if not isinstance(rationale, str):
|
|
361
|
+
raise TypeError(f"Rationale {rationale} is not a string.")
|
|
362
|
+
|
|
363
|
+
try:
|
|
364
|
+
verifier_response = await self.verifier.verify(
|
|
365
|
+
solution=rationale,
|
|
366
|
+
reference_answer=None,
|
|
367
|
+
)
|
|
368
|
+
if not verifier_response or not verifier_response.result:
|
|
369
|
+
raise ValueError(
|
|
370
|
+
"Verifier unsuccessful, response: "
|
|
371
|
+
f"{verifier_response}"
|
|
372
|
+
)
|
|
373
|
+
except (ValueError, AttributeError) as e:
|
|
374
|
+
logger.warning(
|
|
375
|
+
f"Verifier issue: {e}, "
|
|
376
|
+
f"retrying... ({retries + 1}/{max_retries})"
|
|
377
|
+
)
|
|
378
|
+
retries += 1
|
|
379
|
+
continue
|
|
380
|
+
try:
|
|
381
|
+
new_datapoint = DataPoint(
|
|
382
|
+
question=question,
|
|
383
|
+
rationale=rationale,
|
|
384
|
+
final_answer=verifier_response.result,
|
|
385
|
+
metadata={
|
|
386
|
+
"synthetic": str(True),
|
|
387
|
+
"created": datetime.now().isoformat(),
|
|
388
|
+
"generator": "self_instruct",
|
|
389
|
+
},
|
|
390
|
+
)
|
|
391
|
+
except ValidationError as e:
|
|
392
|
+
logger.warning(
|
|
393
|
+
f"Datapoint validation failed: {e}, "
|
|
394
|
+
f"retrying... ({retries + 1}/{max_retries})"
|
|
395
|
+
)
|
|
396
|
+
retries += 1
|
|
397
|
+
continue
|
|
398
|
+
|
|
399
|
+
valid_data_points.append(new_datapoint)
|
|
400
|
+
|
|
401
|
+
except Exception as e:
|
|
402
|
+
logger.warning(
|
|
403
|
+
f"Unexpected error: {e}, retrying..."
|
|
404
|
+
f" ({retries + 1}/{max_retries})"
|
|
405
|
+
)
|
|
406
|
+
retries += 1
|
|
407
|
+
|
|
408
|
+
if len(valid_data_points) < n:
|
|
409
|
+
raise RuntimeError(
|
|
410
|
+
f"Failed to generate {n} valid datapoints "
|
|
411
|
+
f"after {max_retries} retries."
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
async with self._lock:
|
|
415
|
+
self._data.extend(valid_data_points)
|
|
@@ -30,6 +30,9 @@ class OpenAICompatibleEmbedding(BaseEmbedding[str]):
|
|
|
30
30
|
model_type (str): The model type to be used for text embeddings.
|
|
31
31
|
api_key (str): The API key for authenticating with the model service.
|
|
32
32
|
url (str): The url to the model service.
|
|
33
|
+
output_dim (Optional[int]): The dimensionality of the embedding
|
|
34
|
+
vectors. If None, it will be determined during the first
|
|
35
|
+
embedding call.
|
|
33
36
|
"""
|
|
34
37
|
|
|
35
38
|
@api_keys_required(
|
|
@@ -43,9 +46,10 @@ class OpenAICompatibleEmbedding(BaseEmbedding[str]):
|
|
|
43
46
|
model_type: str,
|
|
44
47
|
api_key: Optional[str] = None,
|
|
45
48
|
url: Optional[str] = None,
|
|
49
|
+
output_dim: Optional[int] = None,
|
|
46
50
|
) -> None:
|
|
47
51
|
self.model_type = model_type
|
|
48
|
-
self.output_dim: Optional[int] =
|
|
52
|
+
self.output_dim: Optional[int] = output_dim
|
|
49
53
|
|
|
50
54
|
self._api_key = api_key or os.environ.get(
|
|
51
55
|
"OPENAI_COMPATIBILITY_API_KEY"
|
|
@@ -87,10 +91,14 @@ class OpenAICompatibleEmbedding(BaseEmbedding[str]):
|
|
|
87
91
|
|
|
88
92
|
Returns:
|
|
89
93
|
int: The dimensionality of the embedding for the current model.
|
|
94
|
+
|
|
95
|
+
Raises:
|
|
96
|
+
ValueError: If the embedding dimension cannot be determined.
|
|
90
97
|
"""
|
|
91
98
|
if self.output_dim is None:
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
)
|
|
99
|
+
self.embed_list(["test"])
|
|
100
|
+
|
|
101
|
+
if self.output_dim is None:
|
|
102
|
+
raise ValueError("Failed to determine embedding dimension")
|
|
103
|
+
|
|
96
104
|
return self.output_dim
|
camel/environments/models.py
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
|
|
15
15
|
from datetime import datetime, timezone
|
|
16
|
-
from typing import Any, Dict, Optional, Protocol
|
|
16
|
+
from typing import Any, Dict, Optional, Protocol, Tuple
|
|
17
17
|
|
|
18
18
|
from pydantic import BaseModel, Field
|
|
19
19
|
|
|
@@ -33,9 +33,8 @@ class Action(BaseModel):
|
|
|
33
33
|
generated (UTC).
|
|
34
34
|
"""
|
|
35
35
|
|
|
36
|
-
index: int = Field(
|
|
37
|
-
|
|
38
|
-
)
|
|
36
|
+
index: int = Field(default=0, description="...")
|
|
37
|
+
|
|
39
38
|
llm_response: str = Field(description="Generated response from the LLM")
|
|
40
39
|
metadata: Dict[str, Any] = Field(
|
|
41
40
|
default_factory=dict,
|
|
@@ -87,6 +86,13 @@ class StepResult(BaseModel):
|
|
|
87
86
|
description="Additional information about the step",
|
|
88
87
|
)
|
|
89
88
|
|
|
89
|
+
def as_tuple(
|
|
90
|
+
self,
|
|
91
|
+
) -> Tuple[Observation, float, bool, Dict[str, Any]]:
|
|
92
|
+
r"""Returns all fields of the model as a tuple, in declaration order"""
|
|
93
|
+
self.info["rewards_dict"] = self.rewards_dict
|
|
94
|
+
return (self.observation, self.reward, self.done, self.info)
|
|
95
|
+
|
|
90
96
|
|
|
91
97
|
class Environment(Protocol):
|
|
92
98
|
async def reset(self) -> Observation:
|