camel-ai 0.2.38__py3-none-any.whl → 0.2.39__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +4 -0
- camel/agents/repo_agent.py +2 -2
- camel/benchmarks/apibank.py +1 -1
- camel/benchmarks/apibench.py +1 -1
- camel/configs/__init__.py +3 -0
- camel/configs/modelscope_config.py +59 -0
- camel/datagen/self_improving_cot.py +1 -1
- camel/datasets/__init__.py +2 -0
- camel/datasets/base_generator.py +22 -9
- camel/datasets/few_shot_generator.py +2 -3
- camel/datasets/self_instruct_generator.py +415 -0
- camel/embeddings/openai_compatible_embedding.py +13 -5
- camel/environments/models.py +1 -1
- camel/environments/single_step.py +155 -89
- camel/interpreters/docker_interpreter.py +1 -1
- camel/interpreters/internal_python_interpreter.py +1 -1
- camel/loaders/unstructured_io.py +2 -1
- camel/memories/blocks/chat_history_block.py +1 -1
- camel/memories/context_creators/score_based.py +2 -2
- camel/models/__init__.py +2 -0
- camel/models/model_factory.py +119 -0
- camel/models/modelscope_model.py +208 -0
- camel/models/openai_audio_models.py +2 -2
- camel/models/openai_model.py +49 -2
- camel/models/togetherai_model.py +2 -2
- camel/models/vllm_model.py +1 -1
- camel/models/zhipuai_model.py +2 -2
- camel/retrievers/vector_retriever.py +1 -1
- camel/storages/graph_storages/neo4j_graph.py +1 -1
- camel/storages/vectordb_storages/base.py +2 -2
- camel/storages/vectordb_storages/milvus.py +2 -2
- camel/storages/vectordb_storages/qdrant.py +2 -2
- camel/tasks/task.py +2 -2
- camel/toolkits/__init__.py +4 -1
- camel/toolkits/arxiv_toolkit.py +2 -1
- camel/toolkits/ask_news_toolkit.py +11 -3
- camel/toolkits/audio_analysis_toolkit.py +2 -0
- camel/toolkits/base.py +3 -0
- camel/toolkits/code_execution.py +3 -1
- camel/toolkits/dappier_toolkit.py +2 -1
- camel/toolkits/data_commons_toolkit.py +2 -0
- camel/toolkits/excel_toolkit.py +2 -0
- camel/toolkits/file_write_toolkit.py +2 -0
- camel/toolkits/github_toolkit.py +6 -4
- camel/toolkits/google_scholar_toolkit.py +2 -0
- camel/toolkits/human_toolkit.py +17 -1
- camel/toolkits/image_analysis_toolkit.py +2 -0
- camel/toolkits/linkedin_toolkit.py +2 -1
- camel/toolkits/math_toolkit.py +2 -0
- camel/toolkits/mcp_toolkit.py +42 -52
- camel/toolkits/meshy_toolkit.py +20 -2
- camel/toolkits/networkx_toolkit.py +2 -0
- camel/toolkits/notion_toolkit.py +7 -0
- camel/toolkits/openbb_toolkit.py +2 -1
- camel/toolkits/pubmed_toolkit.py +2 -0
- camel/toolkits/reddit_toolkit.py +2 -1
- camel/toolkits/retrieval_toolkit.py +2 -1
- camel/toolkits/search_toolkit.py +2 -1
- camel/toolkits/semantic_scholar_toolkit.py +2 -0
- camel/toolkits/slack_toolkit.py +2 -0
- camel/toolkits/stripe_toolkit.py +2 -1
- camel/toolkits/sympy_toolkit.py +2 -0
- camel/toolkits/terminal_toolkit.py +2 -0
- camel/toolkits/twitter_toolkit.py +2 -1
- camel/toolkits/video_analysis_toolkit.py +2 -1
- camel/toolkits/video_download_toolkit.py +2 -1
- camel/toolkits/weather_toolkit.py +2 -0
- camel/toolkits/whatsapp_toolkit.py +2 -1
- camel/toolkits/zapier_toolkit.py +2 -1
- camel/types/enums.py +65 -0
- camel/types/unified_model_type.py +5 -0
- camel/utils/__init__.py +2 -0
- camel/utils/chunker/code_chunker.py +9 -9
- camel/utils/commons.py +50 -30
- camel/utils/constants.py +2 -2
- camel/utils/mcp.py +79 -0
- camel/verifiers/__init__.py +2 -0
- camel/verifiers/base.py +15 -15
- camel/verifiers/math_verifier.py +182 -0
- camel/verifiers/python_verifier.py +18 -26
- {camel_ai-0.2.38.dist-info → camel_ai-0.2.39.dist-info}/METADATA +3 -1
- {camel_ai-0.2.38.dist-info → camel_ai-0.2.39.dist-info}/RECORD +85 -80
- {camel_ai-0.2.38.dist-info → camel_ai-0.2.39.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.38.dist-info → camel_ai-0.2.39.dist-info}/licenses/LICENSE +0 -0
camel/__init__.py
CHANGED
camel/agents/chat_agent.py
CHANGED
|
@@ -526,6 +526,10 @@ class ChatAgent(BaseAgent):
|
|
|
526
526
|
message.content = response.output_messages[0].content
|
|
527
527
|
if not self._try_format_message(message, response_format):
|
|
528
528
|
logger.warning(f"Failed to parse response: {message.content}")
|
|
529
|
+
logger.warning(
|
|
530
|
+
"To improve reliability, consider using models "
|
|
531
|
+
"that are better equipped to handle structured output"
|
|
532
|
+
)
|
|
529
533
|
|
|
530
534
|
async def _aformat_response_if_needed(
|
|
531
535
|
self,
|
camel/agents/repo_agent.py
CHANGED
|
@@ -17,7 +17,7 @@ from string import Template
|
|
|
17
17
|
from typing import TYPE_CHECKING, List, Optional, Tuple, Union
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
20
|
-
from github import Github
|
|
20
|
+
from github.MainClass import Github
|
|
21
21
|
from pydantic import BaseModel
|
|
22
22
|
|
|
23
23
|
from camel.agents import ChatAgent
|
|
@@ -219,7 +219,7 @@ class RepoAgent(ChatAgent):
|
|
|
219
219
|
List[RepositoryInfo]: A list of objects containing information
|
|
220
220
|
about the all repositories, including the contents.
|
|
221
221
|
"""
|
|
222
|
-
from github import Github
|
|
222
|
+
from github.MainClass import Github
|
|
223
223
|
|
|
224
224
|
github_client = Github(self.github_auth_token)
|
|
225
225
|
res = []
|
camel/benchmarks/apibank.py
CHANGED
|
@@ -48,7 +48,7 @@ def process_messages(
|
|
|
48
48
|
Args:
|
|
49
49
|
chat_history (List[Dict[str, Any]):
|
|
50
50
|
A list of dictionaries representing the chat history.
|
|
51
|
-
prompt (str): A
|
|
51
|
+
prompt (str): A prompt to be set as the system message.
|
|
52
52
|
|
|
53
53
|
Returns:
|
|
54
54
|
List[Dict[str, str]]: A list of dictionaries representing
|
camel/benchmarks/apibench.py
CHANGED
|
@@ -30,7 +30,7 @@ logger = logging.getLogger(__name__)
|
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
# Mapping of dataset names to file names
|
|
33
|
-
# 'Oracle'
|
|
33
|
+
# 'Oracle' retriever used here which means all the full
|
|
34
34
|
# API documentation will be included in the prompt
|
|
35
35
|
dataset_mapping = {
|
|
36
36
|
"huggingface": {
|
camel/configs/__init__.py
CHANGED
|
@@ -21,6 +21,7 @@ from .groq_config import GROQ_API_PARAMS, GroqConfig
|
|
|
21
21
|
from .internlm_config import INTERNLM_API_PARAMS, InternLMConfig
|
|
22
22
|
from .litellm_config import LITELLM_API_PARAMS, LiteLLMConfig
|
|
23
23
|
from .mistral_config import MISTRAL_API_PARAMS, MistralConfig
|
|
24
|
+
from .modelscope_config import MODELSCOPE_API_PARAMS, ModelScopeConfig
|
|
24
25
|
from .moonshot_config import MOONSHOT_API_PARAMS, MoonshotConfig
|
|
25
26
|
from .nvidia_config import NVIDIA_API_PARAMS, NvidiaConfig
|
|
26
27
|
from .ollama_config import OLLAMA_API_PARAMS, OllamaConfig
|
|
@@ -85,6 +86,8 @@ __all__ = [
|
|
|
85
86
|
'INTERNLM_API_PARAMS',
|
|
86
87
|
'MoonshotConfig',
|
|
87
88
|
"MOONSHOT_API_PARAMS",
|
|
89
|
+
'ModelScopeConfig',
|
|
90
|
+
'MODELSCOPE_API_PARAMS',
|
|
88
91
|
'SiliconFlowConfig',
|
|
89
92
|
'SILICONFLOW_API_PARAMS',
|
|
90
93
|
'AIMLConfig',
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from typing import Optional, Union
|
|
17
|
+
|
|
18
|
+
from camel.configs.base_config import BaseConfig
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ModelScopeConfig(BaseConfig):
|
|
22
|
+
r"""Defines the parameters for generating chat completions using the
|
|
23
|
+
ModelScope API. You can refer to the following link for more details:
|
|
24
|
+
https://www.modelscope.cn/docs/model-service/API-Inference/intro
|
|
25
|
+
|
|
26
|
+
Args:
|
|
27
|
+
tool_choice (Union[dict[str, str], str], optional): Controls which (if
|
|
28
|
+
any) tool is called by the model. :obj:`"none"` means the model
|
|
29
|
+
will not call any tool and instead generates a message.
|
|
30
|
+
:obj:`"auto"` means the model can pick between generating a
|
|
31
|
+
message or calling one or more tools. :obj:`"required"` or
|
|
32
|
+
specifying a particular tool via
|
|
33
|
+
{"type": "function", "function": {"name": "some_function"}}
|
|
34
|
+
can be used to guide the model to use tools more strongly.
|
|
35
|
+
(default: :obj:`None`)
|
|
36
|
+
max_tokens (int, optional): Specifies the maximum number of tokens
|
|
37
|
+
the model can generate. This sets an upper limit, but does not
|
|
38
|
+
guarantee that this number will always be reached.
|
|
39
|
+
(default: :obj:`None`)
|
|
40
|
+
top_p (float, optional): Controls the randomness of the generated
|
|
41
|
+
results. Lower values lead to less randomness, while higher
|
|
42
|
+
values increase randomness. (default: :obj:`None`)
|
|
43
|
+
temperature (float, optional): Controls the diversity and focus of
|
|
44
|
+
the generated results. Lower values make the output more focused,
|
|
45
|
+
while higher values make it more diverse. (default: :obj:`0.3`)
|
|
46
|
+
stream (bool, optional): If True, enables streaming output.
|
|
47
|
+
(default: :obj:`None`)
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
tool_choice: Optional[Union[dict[str, str], str]] = None
|
|
51
|
+
max_tokens: Optional[int] = None
|
|
52
|
+
top_p: Optional[float] = None
|
|
53
|
+
temperature: Optional[float] = None
|
|
54
|
+
stream: Optional[bool] = None
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
MODELSCOPE_API_PARAMS = {
|
|
58
|
+
param for param in ModelScopeConfig.model_fields.keys()
|
|
59
|
+
}
|
|
@@ -518,7 +518,7 @@ class SelfImprovingCoTPipeline:
|
|
|
518
518
|
self.reason_agent.model_backend.model_config_dict['n'] = (
|
|
519
519
|
self.rejection_sampling_n
|
|
520
520
|
)
|
|
521
|
-
# Generate multiple
|
|
521
|
+
# Generate multiple candidate traces in one call using parameter n
|
|
522
522
|
responses = self.reason_agent.step(prompt)
|
|
523
523
|
# Extract cancidate traces
|
|
524
524
|
candidate_traces = [choice.content for choice in responses.msgs]
|
camel/datasets/__init__.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
from .base_generator import BaseGenerator
|
|
15
15
|
from .few_shot_generator import FewShotGenerator
|
|
16
16
|
from .models import DataPoint
|
|
17
|
+
from .self_instruct_generator import SelfInstructGenerator
|
|
17
18
|
from .static_dataset import StaticDataset
|
|
18
19
|
|
|
19
20
|
__all__ = [
|
|
@@ -21,4 +22,5 @@ __all__ = [
|
|
|
21
22
|
"DataPoint",
|
|
22
23
|
"FewShotGenerator",
|
|
23
24
|
"StaticDataset",
|
|
25
|
+
"SelfInstructGenerator",
|
|
24
26
|
]
|
camel/datasets/base_generator.py
CHANGED
|
@@ -39,6 +39,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
39
39
|
def __init__(
|
|
40
40
|
self,
|
|
41
41
|
seed: int = 42,
|
|
42
|
+
buffer: int = 20,
|
|
42
43
|
cache: Union[str, Path, None] = None,
|
|
43
44
|
data_path: Union[str, Path, None] = None,
|
|
44
45
|
**kwargs,
|
|
@@ -47,6 +48,8 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
47
48
|
|
|
48
49
|
Args:
|
|
49
50
|
seed (int): Random seed for reproducibility. (default: :obj:`42`)
|
|
51
|
+
buffer (int): Amount of DataPoints to be generated when the
|
|
52
|
+
iterator runs out of DataPoints in data. (default: :obj:`20`)
|
|
50
53
|
cache (Union[str, Path, None]): Optional path to save generated
|
|
51
54
|
datapoints during iteration. If None is provided, datapoints
|
|
52
55
|
will be discarded every 100 generations.
|
|
@@ -56,7 +59,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
56
59
|
"""
|
|
57
60
|
self._rng = random.Random(seed)
|
|
58
61
|
self.cache = Path(cache) if cache else None
|
|
59
|
-
|
|
62
|
+
self._buffer = buffer
|
|
60
63
|
self._data: List[DataPoint] = []
|
|
61
64
|
self._batch_to_save: List[DataPoint] = []
|
|
62
65
|
|
|
@@ -72,15 +75,27 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
72
75
|
)
|
|
73
76
|
|
|
74
77
|
@abc.abstractmethod
|
|
75
|
-
async def generate_new(self, n: int, **kwargs) ->
|
|
76
|
-
r"""Generate n new datapoints.
|
|
78
|
+
async def generate_new(self, n: int, **kwargs) -> None:
|
|
79
|
+
r"""Generate n new datapoints and append them to self._data.
|
|
80
|
+
|
|
81
|
+
Subclass implementations must generate the specified number of
|
|
82
|
+
datapoints and append them directly to the `self._data` list.
|
|
83
|
+
This method should not return the datapoints; the iterator
|
|
84
|
+
relies on `self._data` being populated.
|
|
77
85
|
|
|
78
86
|
Args:
|
|
79
|
-
n (int): Number of datapoints to generate.
|
|
87
|
+
n (int): Number of datapoints to generate and append.
|
|
80
88
|
**kwargs: Additional generation parameters.
|
|
81
89
|
|
|
82
90
|
Returns:
|
|
83
|
-
|
|
91
|
+
None: This method should not return anything.
|
|
92
|
+
|
|
93
|
+
Example:
|
|
94
|
+
```python
|
|
95
|
+
async def generate_new(self, n: int, **kwargs) -> None:
|
|
96
|
+
new_points = [DataPoint(...) for _ in range(n)]
|
|
97
|
+
self._data.extend(new_points)
|
|
98
|
+
```
|
|
84
99
|
"""
|
|
85
100
|
pass
|
|
86
101
|
|
|
@@ -99,8 +114,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
99
114
|
async def generator():
|
|
100
115
|
while True:
|
|
101
116
|
if not self._data:
|
|
102
|
-
|
|
103
|
-
self._data.extend(new_datapoints)
|
|
117
|
+
await self.generate_new(self._buffer)
|
|
104
118
|
datapoint = self._data.pop(0)
|
|
105
119
|
yield datapoint
|
|
106
120
|
self._batch_to_save.append(datapoint)
|
|
@@ -137,8 +151,7 @@ class BaseGenerator(abc.ABC, IterableDataset):
|
|
|
137
151
|
|
|
138
152
|
while True:
|
|
139
153
|
if not self._data:
|
|
140
|
-
|
|
141
|
-
self._data.extend(new_datapoints)
|
|
154
|
+
asyncio.run(self.generate_new(self._buffer))
|
|
142
155
|
datapoint = self._data.pop(0)
|
|
143
156
|
yield datapoint
|
|
144
157
|
self._batch_to_save.append(datapoint)
|
|
@@ -126,7 +126,7 @@ class FewShotGenerator(BaseGenerator):
|
|
|
126
126
|
max_retries: int = 10,
|
|
127
127
|
num_examples: int = 3,
|
|
128
128
|
**kwargs,
|
|
129
|
-
) ->
|
|
129
|
+
) -> None:
|
|
130
130
|
r"""Generates and validates `n` new datapoints through
|
|
131
131
|
few-shot prompting, with a retry limit.
|
|
132
132
|
|
|
@@ -203,7 +203,7 @@ class FewShotGenerator(BaseGenerator):
|
|
|
203
203
|
try:
|
|
204
204
|
verifier_response = await self.verifier.verify(
|
|
205
205
|
solution=rationale,
|
|
206
|
-
|
|
206
|
+
reference_answer=None,
|
|
207
207
|
)
|
|
208
208
|
if not verifier_response or not verifier_response.result:
|
|
209
209
|
raise ValueError(
|
|
@@ -255,4 +255,3 @@ class FewShotGenerator(BaseGenerator):
|
|
|
255
255
|
# Thread-safe way to extend the data list
|
|
256
256
|
async with asyncio.Lock():
|
|
257
257
|
self._data.extend(valid_data_points)
|
|
258
|
-
return valid_data_points
|
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
|
|
15
|
+
import asyncio
|
|
16
|
+
import random
|
|
17
|
+
from datetime import datetime
|
|
18
|
+
from typing import Iterable, List, Optional, cast
|
|
19
|
+
|
|
20
|
+
from pydantic import BaseModel, Field, ValidationError
|
|
21
|
+
|
|
22
|
+
from camel.agents import ChatAgent
|
|
23
|
+
from camel.logger import get_logger
|
|
24
|
+
from camel.models import ModelFactory
|
|
25
|
+
from camel.types import ModelPlatformType, ModelType
|
|
26
|
+
from camel.verifiers import BaseVerifier
|
|
27
|
+
|
|
28
|
+
from .base_generator import BaseGenerator
|
|
29
|
+
from .models import DataPoint
|
|
30
|
+
from .static_dataset import StaticDataset
|
|
31
|
+
|
|
32
|
+
logger = get_logger(__name__)
|
|
33
|
+
|
|
34
|
+
DEFAULT_INSTRUCTION_SYSTEM_PROMPT = """
|
|
35
|
+
You are a high-capacity instruction generation assistant.
|
|
36
|
+
|
|
37
|
+
Your task is to generate a **new, creative, and challenging question** based on
|
|
38
|
+
several examples.
|
|
39
|
+
These examples may cover different domains or styles, but your goal is to:
|
|
40
|
+
- **Understand their specific patterns** in structure, and complexity;
|
|
41
|
+
- **Combine and synthesize** ideas from multiple examples, rather than copying
|
|
42
|
+
or lightly editing any single one;
|
|
43
|
+
- **Intelligently integrate** multiple reasoning steps, constraints, or
|
|
44
|
+
concepts into a single, coherent question;
|
|
45
|
+
- Ensure the new question is **non-trivial** and requires deep thinking or
|
|
46
|
+
multi-step reasoning.
|
|
47
|
+
|
|
48
|
+
**Guidelines:**
|
|
49
|
+
- Use the examples as inspiration for format, depth, and tone.
|
|
50
|
+
- Your new question should be self-contained, logically sound, and answerable.
|
|
51
|
+
- Do not repeat exact phrasings or create shallow combinations; instead,
|
|
52
|
+
produce something meaningfully new.
|
|
53
|
+
- Avoid open-ended or subjective questions that depend on personal opinions or
|
|
54
|
+
discussion.
|
|
55
|
+
- The generated question must have a **clear, objective, and verifiable
|
|
56
|
+
answer**.
|
|
57
|
+
- Aim for increased depth or novelty through subtle combination or
|
|
58
|
+
transformation.
|
|
59
|
+
- Keep the final output to a **single unified question** with one clear answer,
|
|
60
|
+
not a multi-part task.
|
|
61
|
+
|
|
62
|
+
**Output Format (strict):**
|
|
63
|
+
```
|
|
64
|
+
Question: [Generated question]
|
|
65
|
+
```
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
DEFAULT_RATIONALE_SYSTEM_PROMPT = """You are an advanced Python code assistant.
|
|
69
|
+
|
|
70
|
+
Your task is to **solve the given question by writing Python code only**,
|
|
71
|
+
without any explanation or natural language output.
|
|
72
|
+
The code must compute the answer **programmatically**, not by hardcoding or
|
|
73
|
+
guessing the result.
|
|
74
|
+
|
|
75
|
+
**Rules:**
|
|
76
|
+
- Use Python code to perform the actual computation.
|
|
77
|
+
- Use {package_list} to solve the problem. Do not import any other libraries.
|
|
78
|
+
- **Do not hardcode the final answer** (e.g., avoid writing `print(1/2)` unless
|
|
79
|
+
that value is computed).
|
|
80
|
+
- The result must be obtained through valid computation logic in code.
|
|
81
|
+
- Do not include explanations. Output code only.
|
|
82
|
+
- The entire code must be wrapped in triple backticks:
|
|
83
|
+
```
|
|
84
|
+
[Your Python code here]
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Now, solve the following question using Python. Only output the code:
|
|
88
|
+
"""
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class SelfInstructGenerator(BaseGenerator):
|
|
92
|
+
r"""A generator for creating synthetic datapoints using self-instruct.
|
|
93
|
+
|
|
94
|
+
It utilizes both a human-provided dataset (seed_dataset) and generated
|
|
95
|
+
machine instructions (machine_instructions) to produce new, synthetic
|
|
96
|
+
datapoints that include a question, a computed rationale (code), and a
|
|
97
|
+
final answer (from a verifier).
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
class QuestionSchema(BaseModel):
|
|
101
|
+
r"""Schema for the generated question.
|
|
102
|
+
|
|
103
|
+
Attributes:
|
|
104
|
+
question (str): The question generated by the model.
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
question: str = Field(description="The question generated")
|
|
108
|
+
|
|
109
|
+
class RationaleSchema(BaseModel):
|
|
110
|
+
r"""Schema for the generated rationale code.
|
|
111
|
+
|
|
112
|
+
Attributes:
|
|
113
|
+
code (str): The generated code without any formatting.
|
|
114
|
+
"""
|
|
115
|
+
|
|
116
|
+
code: str = Field(
|
|
117
|
+
description="The generated code without any formatting"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
def __init__(
|
|
121
|
+
self,
|
|
122
|
+
seed_dataset: StaticDataset,
|
|
123
|
+
verifier: BaseVerifier,
|
|
124
|
+
instruction_agent: Optional[ChatAgent] = None,
|
|
125
|
+
rationale_agent: Optional[ChatAgent] = None,
|
|
126
|
+
seed: int = 42,
|
|
127
|
+
**kwargs,
|
|
128
|
+
):
|
|
129
|
+
r"""Initialize the self-instruct generator.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
seed_dataset (StaticDataset): Dataset containing seed instructions.
|
|
133
|
+
verifier (BaseVerifier): Verifier instance to validate generated
|
|
134
|
+
solutions.
|
|
135
|
+
instruction_agent (Optional[ChatAgent]): Agent for generating
|
|
136
|
+
instructions. If not provided, a default agent will be created.
|
|
137
|
+
rationale_agent (Optional[ChatAgent]): Agent for generating
|
|
138
|
+
rationales. If not provided, a default agent will be created.
|
|
139
|
+
seed (int): Random seed for reproducibility. (default: :obj:`42`)
|
|
140
|
+
**kwargs: Additional keyword arguments passed to the BaseGenerator.
|
|
141
|
+
"""
|
|
142
|
+
super().__init__(seed=seed, **kwargs)
|
|
143
|
+
self.seed_dataset = seed_dataset
|
|
144
|
+
self.verifier = verifier
|
|
145
|
+
# extract packages from verifier
|
|
146
|
+
self.packages: List[str] = getattr(
|
|
147
|
+
self.verifier, "required_packages", []
|
|
148
|
+
)
|
|
149
|
+
# create default agents if not provided
|
|
150
|
+
self.instruction_agent = (
|
|
151
|
+
instruction_agent or self.default_instruction_agent()
|
|
152
|
+
)
|
|
153
|
+
self.rationale_agent = (
|
|
154
|
+
rationale_agent or self.default_rationale_agent()
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Extract questions from the seed dataset as human_instructions
|
|
158
|
+
self.human_instructions: List[str] = [
|
|
159
|
+
dp.question
|
|
160
|
+
for dp in list(cast(Iterable[DataPoint], self.seed_dataset))
|
|
161
|
+
]
|
|
162
|
+
self.machine_instructions: List[DataPoint] = []
|
|
163
|
+
# Create an instance-level lock for thread-safe updates to _data
|
|
164
|
+
self._lock = asyncio.Lock()
|
|
165
|
+
self._data = [] # Storage for generated DataPoint instances
|
|
166
|
+
|
|
167
|
+
def default_instruction_agent(self) -> ChatAgent:
|
|
168
|
+
r"""Create the default instruction generation agent.
|
|
169
|
+
|
|
170
|
+
This agent is configured with a moderate temperature setting to
|
|
171
|
+
encourage creative and diverse instruction generation behavior.
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
ChatAgent: An agent with the default instruction prompt.
|
|
175
|
+
"""
|
|
176
|
+
model = ModelFactory.create(
|
|
177
|
+
model_platform=ModelPlatformType.DEFAULT,
|
|
178
|
+
model_type=ModelType.DEFAULT,
|
|
179
|
+
model_config_dict={"temperature": 0.7},
|
|
180
|
+
)
|
|
181
|
+
return ChatAgent(
|
|
182
|
+
DEFAULT_INSTRUCTION_SYSTEM_PROMPT,
|
|
183
|
+
model=model,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
def default_rationale_agent(self) -> ChatAgent:
|
|
187
|
+
r"""Create the default rationale generation agent.
|
|
188
|
+
|
|
189
|
+
This agent is configured with a deterministic (zero temperature)
|
|
190
|
+
setting to ensure consistent and precise rationale generation based on
|
|
191
|
+
a given instruction and package list.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
ChatAgent: An agent with the rationale prompt
|
|
195
|
+
"""
|
|
196
|
+
model = ModelFactory.create(
|
|
197
|
+
model_platform=ModelPlatformType.DEFAULT,
|
|
198
|
+
model_type=ModelType.DEFAULT,
|
|
199
|
+
model_config_dict={"temperature": 0.0},
|
|
200
|
+
)
|
|
201
|
+
return ChatAgent(
|
|
202
|
+
DEFAULT_RATIONALE_SYSTEM_PROMPT.format(package_list=self.packages),
|
|
203
|
+
model=model,
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
@staticmethod
|
|
207
|
+
def format_support_block(dp: DataPoint) -> str:
|
|
208
|
+
r"""Format a DataPoint into a few-shot example block.
|
|
209
|
+
|
|
210
|
+
Args:
|
|
211
|
+
dp (DataPoint): A data point.
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
str: A formatted string containing the question and its
|
|
215
|
+
corresponding code block in Markdown-style Python format.
|
|
216
|
+
"""
|
|
217
|
+
support_q = dp.question.strip()
|
|
218
|
+
support_code = dp.rationale.strip() if dp.rationale else ""
|
|
219
|
+
return (
|
|
220
|
+
f"Question:\n{support_q}\n\n"
|
|
221
|
+
"Code:\n"
|
|
222
|
+
"```python\n"
|
|
223
|
+
f"{support_code}\n"
|
|
224
|
+
"```"
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
def generate_new_instruction(
|
|
228
|
+
self,
|
|
229
|
+
agent: ChatAgent,
|
|
230
|
+
support_human_dps: list[DataPoint],
|
|
231
|
+
support_machine_dps: list[DataPoint],
|
|
232
|
+
) -> str:
|
|
233
|
+
r"""Generate a new instruction using self-instruct prompting.
|
|
234
|
+
|
|
235
|
+
Args:
|
|
236
|
+
agent (ChatAgent): The agent to use for generating the instruction.
|
|
237
|
+
support_human_dps (list[DataPoint]): List of human examples to
|
|
238
|
+
sample.
|
|
239
|
+
support_machine_dps (list[DataPoint]): List of machine examples to
|
|
240
|
+
sample.
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
str: The newly generated question.
|
|
244
|
+
"""
|
|
245
|
+
human_sample = [dp.question for dp in list(support_human_dps)]
|
|
246
|
+
machine_sample = [dp.question for dp in list(support_machine_dps)]
|
|
247
|
+
|
|
248
|
+
few_shot_examples = human_sample + machine_sample
|
|
249
|
+
|
|
250
|
+
# Build the prompt using the few-shot examples
|
|
251
|
+
prompt = "Below are some question examples:\n\n"
|
|
252
|
+
for idx, instr in enumerate(few_shot_examples, start=1):
|
|
253
|
+
prompt += f"Question {idx}: {instr}\n"
|
|
254
|
+
prompt += f"Question {len(few_shot_examples) + 1}:\n"
|
|
255
|
+
prompt += "Now generate a new question based on the given examples.\n"
|
|
256
|
+
|
|
257
|
+
question_template = f"Question: {prompt}"
|
|
258
|
+
response = cast(
|
|
259
|
+
SelfInstructGenerator.QuestionSchema,
|
|
260
|
+
agent.step(question_template, response_format=self.QuestionSchema)
|
|
261
|
+
.msgs[0]
|
|
262
|
+
.parsed,
|
|
263
|
+
)
|
|
264
|
+
return response.question
|
|
265
|
+
|
|
266
|
+
def generate_rationale(
|
|
267
|
+
self,
|
|
268
|
+
question: str,
|
|
269
|
+
agent: Optional[ChatAgent] = None,
|
|
270
|
+
support_human_dps: Optional[list[DataPoint]] = None,
|
|
271
|
+
) -> str:
|
|
272
|
+
r"""Generate rationale code (solution) for the given question.
|
|
273
|
+
|
|
274
|
+
Args:
|
|
275
|
+
question (str): The question to be solved.
|
|
276
|
+
agent (Optional[ChatAgent]): The agent to use for generating the
|
|
277
|
+
rationale. If None is provided, the default rationale agent
|
|
278
|
+
will be used. (default: :obj:`None`)
|
|
279
|
+
support_human_dps (Optional[list[DataPoint]]): List of human
|
|
280
|
+
examples to sample. (default: :obj:`None`)
|
|
281
|
+
|
|
282
|
+
Returns:
|
|
283
|
+
str: The generated code solution as a string.
|
|
284
|
+
"""
|
|
285
|
+
|
|
286
|
+
# Build few-shot example prompt
|
|
287
|
+
few_shot_prompt = ""
|
|
288
|
+
if support_human_dps:
|
|
289
|
+
few_shot_examples = [
|
|
290
|
+
self.format_support_block(dp) for dp in support_human_dps
|
|
291
|
+
]
|
|
292
|
+
few_shot_prompt += "Below are example questions and solutions:\n\n"
|
|
293
|
+
few_shot_prompt += "\n\n".join(few_shot_examples)
|
|
294
|
+
|
|
295
|
+
few_shot_prompt += f"\n\nWrite code to solve the question:\n{question}"
|
|
296
|
+
|
|
297
|
+
response = cast(
|
|
298
|
+
SelfInstructGenerator.RationaleSchema,
|
|
299
|
+
(agent or self.default_rationale_agent())
|
|
300
|
+
.step(few_shot_prompt, response_format=self.RationaleSchema)
|
|
301
|
+
.msgs[0]
|
|
302
|
+
.parsed,
|
|
303
|
+
)
|
|
304
|
+
return response.code
|
|
305
|
+
|
|
306
|
+
async def generate_new(
|
|
307
|
+
self,
|
|
308
|
+
n: int,
|
|
309
|
+
max_retries: int = 10,
|
|
310
|
+
human_sample_count: int = 3,
|
|
311
|
+
machine_sample_count: int = 1,
|
|
312
|
+
**kwargs,
|
|
313
|
+
) -> None:
|
|
314
|
+
r"""Generates and validates `n` new datapoints through
|
|
315
|
+
self-instruct prompting, with a retry limit.
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
n (int): The number of valid datapoints to generate.
|
|
319
|
+
max_retries (int): Maximum number of retries before stopping.
|
|
320
|
+
(default: :obj:`10`)
|
|
321
|
+
human_sample_count (int): Number of human examples to sample.
|
|
322
|
+
(default: :obj:`3`)
|
|
323
|
+
machine_sample_count (int): Number of machine examples to sample.
|
|
324
|
+
(default: :obj:`1`)
|
|
325
|
+
**kwargs: Additional keyword arguments.
|
|
326
|
+
|
|
327
|
+
Notes:
|
|
328
|
+
- Retries on validation failures until `n` valid datapoints exist
|
|
329
|
+
or `max_retries` is reached, whichever comes first.
|
|
330
|
+
- If retries are exhausted before reaching `n`, a `RuntimeError`
|
|
331
|
+
is raised.
|
|
332
|
+
- Metadata includes a timestamp for tracking datapoint creation.
|
|
333
|
+
"""
|
|
334
|
+
valid_data_points: list[DataPoint] = []
|
|
335
|
+
retries = 0
|
|
336
|
+
|
|
337
|
+
while len(valid_data_points) < n and retries < max_retries:
|
|
338
|
+
try:
|
|
339
|
+
human_dps_list = list(cast(List[DataPoint], self.seed_dataset))
|
|
340
|
+
support_human_dps = random.sample(
|
|
341
|
+
human_dps_list,
|
|
342
|
+
min(human_sample_count, len(human_dps_list)),
|
|
343
|
+
)
|
|
344
|
+
|
|
345
|
+
machine_dps_list = list(self.machine_instructions)
|
|
346
|
+
support_machine_dps = []
|
|
347
|
+
if machine_dps_list and machine_sample_count > 0:
|
|
348
|
+
support_machine_dps = random.sample(
|
|
349
|
+
machine_dps_list,
|
|
350
|
+
min(machine_sample_count, len(machine_dps_list)),
|
|
351
|
+
)
|
|
352
|
+
question = self.generate_new_instruction(
|
|
353
|
+
self.instruction_agent,
|
|
354
|
+
support_human_dps,
|
|
355
|
+
support_machine_dps,
|
|
356
|
+
)
|
|
357
|
+
rationale = self.generate_rationale(
|
|
358
|
+
question, self.rationale_agent, support_human_dps
|
|
359
|
+
)
|
|
360
|
+
if not isinstance(rationale, str):
|
|
361
|
+
raise TypeError(f"Rationale {rationale} is not a string.")
|
|
362
|
+
|
|
363
|
+
try:
|
|
364
|
+
verifier_response = await self.verifier.verify(
|
|
365
|
+
solution=rationale,
|
|
366
|
+
reference_answer=None,
|
|
367
|
+
)
|
|
368
|
+
if not verifier_response or not verifier_response.result:
|
|
369
|
+
raise ValueError(
|
|
370
|
+
"Verifier unsuccessful, response: "
|
|
371
|
+
f"{verifier_response}"
|
|
372
|
+
)
|
|
373
|
+
except (ValueError, AttributeError) as e:
|
|
374
|
+
logger.warning(
|
|
375
|
+
f"Verifier issue: {e}, "
|
|
376
|
+
f"retrying... ({retries + 1}/{max_retries})"
|
|
377
|
+
)
|
|
378
|
+
retries += 1
|
|
379
|
+
continue
|
|
380
|
+
try:
|
|
381
|
+
new_datapoint = DataPoint(
|
|
382
|
+
question=question,
|
|
383
|
+
rationale=rationale,
|
|
384
|
+
final_answer=verifier_response.result,
|
|
385
|
+
metadata={
|
|
386
|
+
"synthetic": str(True),
|
|
387
|
+
"created": datetime.now().isoformat(),
|
|
388
|
+
"generator": "self_instruct",
|
|
389
|
+
},
|
|
390
|
+
)
|
|
391
|
+
except ValidationError as e:
|
|
392
|
+
logger.warning(
|
|
393
|
+
f"Datapoint validation failed: {e}, "
|
|
394
|
+
f"retrying... ({retries + 1}/{max_retries})"
|
|
395
|
+
)
|
|
396
|
+
retries += 1
|
|
397
|
+
continue
|
|
398
|
+
|
|
399
|
+
valid_data_points.append(new_datapoint)
|
|
400
|
+
|
|
401
|
+
except Exception as e:
|
|
402
|
+
logger.warning(
|
|
403
|
+
f"Unexpected error: {e}, retrying..."
|
|
404
|
+
f" ({retries + 1}/{max_retries})"
|
|
405
|
+
)
|
|
406
|
+
retries += 1
|
|
407
|
+
|
|
408
|
+
if len(valid_data_points) < n:
|
|
409
|
+
raise RuntimeError(
|
|
410
|
+
f"Failed to generate {n} valid datapoints "
|
|
411
|
+
f"after {max_retries} retries."
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
async with self._lock:
|
|
415
|
+
self._data.extend(valid_data_points)
|