camel-ai 0.2.21__py3-none-any.whl → 0.2.23a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/_types.py +41 -0
- camel/agents/_utils.py +188 -0
- camel/agents/chat_agent.py +556 -965
- camel/agents/knowledge_graph_agent.py +7 -1
- camel/agents/multi_hop_generator_agent.py +1 -1
- camel/configs/base_config.py +10 -13
- camel/configs/deepseek_config.py +4 -30
- camel/configs/gemini_config.py +5 -31
- camel/configs/openai_config.py +14 -32
- camel/configs/qwen_config.py +36 -36
- camel/datagen/self_improving_cot.py +79 -1
- camel/datagen/self_instruct/filter/instruction_filter.py +19 -3
- camel/datagen/self_instruct/self_instruct.py +7 -2
- camel/datasets/__init__.py +28 -0
- camel/datasets/base.py +969 -0
- camel/embeddings/openai_embedding.py +10 -1
- camel/environments/__init__.py +16 -0
- camel/environments/base.py +503 -0
- camel/extractors/__init__.py +16 -0
- camel/extractors/base.py +263 -0
- camel/interpreters/docker/Dockerfile +12 -0
- camel/interpreters/docker_interpreter.py +19 -1
- camel/interpreters/subprocess_interpreter.py +42 -17
- camel/loaders/__init__.py +2 -0
- camel/loaders/mineru_extractor.py +250 -0
- camel/memories/agent_memories.py +16 -1
- camel/memories/blocks/chat_history_block.py +10 -2
- camel/memories/blocks/vectordb_block.py +1 -0
- camel/memories/context_creators/score_based.py +20 -3
- camel/memories/records.py +10 -0
- camel/messages/base.py +8 -8
- camel/models/_utils.py +57 -0
- camel/models/aiml_model.py +48 -17
- camel/models/anthropic_model.py +41 -3
- camel/models/azure_openai_model.py +39 -3
- camel/models/base_model.py +132 -4
- camel/models/cohere_model.py +88 -11
- camel/models/deepseek_model.py +107 -63
- camel/models/gemini_model.py +133 -15
- camel/models/groq_model.py +72 -10
- camel/models/internlm_model.py +14 -3
- camel/models/litellm_model.py +9 -2
- camel/models/mistral_model.py +42 -5
- camel/models/model_manager.py +48 -3
- camel/models/moonshot_model.py +33 -4
- camel/models/nemotron_model.py +32 -3
- camel/models/nvidia_model.py +43 -3
- camel/models/ollama_model.py +139 -17
- camel/models/openai_audio_models.py +7 -1
- camel/models/openai_compatible_model.py +37 -3
- camel/models/openai_model.py +158 -46
- camel/models/qwen_model.py +61 -4
- camel/models/reka_model.py +53 -3
- camel/models/samba_model.py +209 -4
- camel/models/sglang_model.py +153 -14
- camel/models/siliconflow_model.py +16 -3
- camel/models/stub_model.py +46 -4
- camel/models/togetherai_model.py +38 -3
- camel/models/vllm_model.py +37 -3
- camel/models/yi_model.py +36 -3
- camel/models/zhipuai_model.py +38 -3
- camel/retrievers/__init__.py +3 -0
- camel/retrievers/hybrid_retrival.py +237 -0
- camel/toolkits/__init__.py +9 -2
- camel/toolkits/arxiv_toolkit.py +2 -1
- camel/toolkits/ask_news_toolkit.py +4 -2
- camel/toolkits/base.py +22 -3
- camel/toolkits/code_execution.py +2 -0
- camel/toolkits/dappier_toolkit.py +2 -1
- camel/toolkits/data_commons_toolkit.py +38 -12
- camel/toolkits/function_tool.py +13 -0
- camel/toolkits/github_toolkit.py +5 -1
- camel/toolkits/google_maps_toolkit.py +2 -1
- camel/toolkits/google_scholar_toolkit.py +2 -0
- camel/toolkits/human_toolkit.py +0 -3
- camel/toolkits/linkedin_toolkit.py +3 -2
- camel/toolkits/meshy_toolkit.py +3 -2
- camel/toolkits/mineru_toolkit.py +178 -0
- camel/toolkits/networkx_toolkit.py +240 -0
- camel/toolkits/notion_toolkit.py +2 -0
- camel/toolkits/openbb_toolkit.py +3 -2
- camel/toolkits/reddit_toolkit.py +11 -3
- camel/toolkits/retrieval_toolkit.py +6 -1
- camel/toolkits/semantic_scholar_toolkit.py +2 -1
- camel/toolkits/stripe_toolkit.py +8 -2
- camel/toolkits/sympy_toolkit.py +44 -1
- camel/toolkits/video_toolkit.py +2 -0
- camel/toolkits/whatsapp_toolkit.py +3 -2
- camel/toolkits/zapier_toolkit.py +191 -0
- camel/types/__init__.py +2 -2
- camel/types/agents/__init__.py +16 -0
- camel/types/agents/tool_calling_record.py +52 -0
- camel/types/enums.py +3 -0
- camel/types/openai_types.py +16 -14
- camel/utils/__init__.py +2 -1
- camel/utils/async_func.py +2 -2
- camel/utils/commons.py +114 -1
- camel/verifiers/__init__.py +23 -0
- camel/verifiers/base.py +340 -0
- camel/verifiers/models.py +82 -0
- camel/verifiers/python_verifier.py +202 -0
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/METADATA +273 -256
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/RECORD +106 -85
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/WHEEL +1 -1
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/LICENSE +0 -0
|
@@ -144,6 +144,7 @@ class KnowledgeGraphAgent(ChatAgent):
|
|
|
144
144
|
self,
|
|
145
145
|
element: "Element",
|
|
146
146
|
parse_graph_elements: bool = False,
|
|
147
|
+
prompt: Optional[str] = None,
|
|
147
148
|
) -> Union[str, GraphElement]:
|
|
148
149
|
r"""Run the agent to extract node and relationship information.
|
|
149
150
|
|
|
@@ -151,6 +152,8 @@ class KnowledgeGraphAgent(ChatAgent):
|
|
|
151
152
|
element (Element): The input element.
|
|
152
153
|
parse_graph_elements (bool, optional): Whether to parse into
|
|
153
154
|
`GraphElement`. Defaults to `False`.
|
|
155
|
+
prompt (str, optional): The custom prompt to be used.
|
|
156
|
+
Defaults to `None`.
|
|
154
157
|
|
|
155
158
|
Returns:
|
|
156
159
|
Union[str, GraphElement]: The extracted node and relationship
|
|
@@ -160,7 +163,10 @@ class KnowledgeGraphAgent(ChatAgent):
|
|
|
160
163
|
self.reset()
|
|
161
164
|
self.element = element
|
|
162
165
|
|
|
163
|
-
|
|
166
|
+
# Use the provided prompt or fall back to the default text_prompt
|
|
167
|
+
final_prompt = prompt if prompt is not None else text_prompt
|
|
168
|
+
|
|
169
|
+
knowledge_graph_prompt = TextPrompt(final_prompt)
|
|
164
170
|
knowledge_graph_generation = knowledge_graph_prompt.format(
|
|
165
171
|
task=str(element)
|
|
166
172
|
)
|
|
@@ -75,7 +75,7 @@ class MultiHopGeneratorAgent(ProgrammableChatAgent):
|
|
|
75
75
|
Supporting Facts: [List of relevant text segments used]
|
|
76
76
|
""" # noqa: E501
|
|
77
77
|
)
|
|
78
|
-
self.
|
|
78
|
+
self._system_message = BaseMessage.make_assistant_message(
|
|
79
79
|
role_name='Assistant', content=system_text
|
|
80
80
|
)
|
|
81
81
|
|
camel/configs/base_config.py
CHANGED
|
@@ -66,6 +66,8 @@ class BaseConfig(ABC, BaseModel):
|
|
|
66
66
|
|
|
67
67
|
This method converts the current configuration object to a dictionary
|
|
68
68
|
representation, which can be used for serialization or other purposes.
|
|
69
|
+
The dictionary won't contain None values, as some API does not support
|
|
70
|
+
None values. (Like tool in OpenAI beta API)
|
|
69
71
|
|
|
70
72
|
Returns:
|
|
71
73
|
dict[str, Any]: A dictionary representation of the current
|
|
@@ -73,17 +75,12 @@ class BaseConfig(ABC, BaseModel):
|
|
|
73
75
|
"""
|
|
74
76
|
config_dict = self.model_dump()
|
|
75
77
|
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
78
|
+
# Convert tools to OpenAI tool schema
|
|
79
|
+
config_dict["tools"] = (
|
|
80
|
+
[tool.get_openai_tool_schema() for tool in self.tools]
|
|
81
|
+
if self.tools
|
|
82
|
+
else None
|
|
83
|
+
)
|
|
79
84
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
if not isinstance(tool, FunctionTool):
|
|
83
|
-
raise ValueError(
|
|
84
|
-
f"The tool {tool} should "
|
|
85
|
-
"be an instance of `FunctionTool`."
|
|
86
|
-
)
|
|
87
|
-
tools_schema.append(tool.get_openai_tool_schema())
|
|
88
|
-
config_dict["tools"] = tools_schema
|
|
89
|
-
return config_dict
|
|
85
|
+
# Remove None values
|
|
86
|
+
return {k: v for k, v in config_dict.items() if v is not None}
|
camel/configs/deepseek_config.py
CHANGED
|
@@ -14,12 +14,11 @@
|
|
|
14
14
|
|
|
15
15
|
from __future__ import annotations
|
|
16
16
|
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Optional, Sequence, Type, Union
|
|
18
18
|
|
|
19
19
|
from pydantic import BaseModel
|
|
20
20
|
|
|
21
21
|
from camel.configs.base_config import BaseConfig
|
|
22
|
-
from camel.types import NOT_GIVEN, NotGiven
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
class DeepSeekConfig(BaseConfig):
|
|
@@ -89,10 +88,10 @@ class DeepSeekConfig(BaseConfig):
|
|
|
89
88
|
temperature: float = 1.0 # deepseek default: 1.0
|
|
90
89
|
top_p: float = 1.0
|
|
91
90
|
stream: bool = False
|
|
92
|
-
stop: Union[str, Sequence[str]
|
|
93
|
-
max_tokens:
|
|
91
|
+
stop: Optional[Union[str, Sequence[str]]] = None
|
|
92
|
+
max_tokens: Optional[int] = None
|
|
94
93
|
presence_penalty: float = 0.0
|
|
95
|
-
response_format: Union[Type[BaseModel], dict
|
|
94
|
+
response_format: Optional[Union[Type[BaseModel], dict]] = None
|
|
96
95
|
frequency_penalty: float = 0.0
|
|
97
96
|
tool_choice: Optional[Union[dict[str, str], str]] = None
|
|
98
97
|
logprobs: bool = False
|
|
@@ -105,30 +104,5 @@ class DeepSeekConfig(BaseConfig):
|
|
|
105
104
|
if self.stream:
|
|
106
105
|
self.stream_options = {"include_usage": include_usage}
|
|
107
106
|
|
|
108
|
-
def as_dict(self) -> dict[str, Any]:
|
|
109
|
-
r"""Convert the current configuration to a dictionary.
|
|
110
|
-
|
|
111
|
-
This method converts the current configuration object to a dictionary
|
|
112
|
-
representation, which can be used for serialization or other purposes.
|
|
113
|
-
|
|
114
|
-
Returns:
|
|
115
|
-
dict[str, Any]: A dictionary representation of the current
|
|
116
|
-
configuration.
|
|
117
|
-
"""
|
|
118
|
-
config_dict = self.model_dump()
|
|
119
|
-
if self.tools:
|
|
120
|
-
from camel.toolkits import FunctionTool
|
|
121
|
-
|
|
122
|
-
tools_schema = []
|
|
123
|
-
for tool in self.tools:
|
|
124
|
-
if not isinstance(tool, FunctionTool):
|
|
125
|
-
raise ValueError(
|
|
126
|
-
f"The tool {tool} should "
|
|
127
|
-
"be an instance of `FunctionTool`."
|
|
128
|
-
)
|
|
129
|
-
tools_schema.append(tool.get_openai_tool_schema())
|
|
130
|
-
config_dict["tools"] = NOT_GIVEN
|
|
131
|
-
return config_dict
|
|
132
|
-
|
|
133
107
|
|
|
134
108
|
DEEPSEEK_API_PARAMS = {param for param in DeepSeekConfig.model_fields.keys()}
|
camel/configs/gemini_config.py
CHANGED
|
@@ -14,12 +14,11 @@
|
|
|
14
14
|
|
|
15
15
|
from __future__ import annotations
|
|
16
16
|
|
|
17
|
-
from typing import
|
|
17
|
+
from typing import Optional, Sequence, Type, Union
|
|
18
18
|
|
|
19
19
|
from pydantic import BaseModel
|
|
20
20
|
|
|
21
21
|
from camel.configs.base_config import BaseConfig
|
|
22
|
-
from camel.types import NOT_GIVEN, NotGiven
|
|
23
22
|
|
|
24
23
|
|
|
25
24
|
class GeminiConfig(BaseConfig):
|
|
@@ -80,35 +79,10 @@ class GeminiConfig(BaseConfig):
|
|
|
80
79
|
top_p: float = 1.0
|
|
81
80
|
n: int = 1
|
|
82
81
|
stream: bool = False
|
|
83
|
-
stop: Union[str, Sequence[str]
|
|
84
|
-
max_tokens:
|
|
85
|
-
response_format: Union[Type[BaseModel], dict
|
|
86
|
-
tool_choice: Optional[Union[dict[str, str], str
|
|
87
|
-
|
|
88
|
-
def as_dict(self) -> dict[str, Any]:
|
|
89
|
-
r"""Convert the current configuration to a dictionary.
|
|
90
|
-
|
|
91
|
-
This method converts the current configuration object to a dictionary
|
|
92
|
-
representation, which can be used for serialization or other purposes.
|
|
93
|
-
|
|
94
|
-
Returns:
|
|
95
|
-
dict[str, Any]: A dictionary representation of the current
|
|
96
|
-
configuration.
|
|
97
|
-
"""
|
|
98
|
-
config_dict = self.model_dump()
|
|
99
|
-
if self.tools:
|
|
100
|
-
from camel.toolkits import FunctionTool
|
|
101
|
-
|
|
102
|
-
tools_schema = []
|
|
103
|
-
for tool in self.tools:
|
|
104
|
-
if not isinstance(tool, FunctionTool):
|
|
105
|
-
raise ValueError(
|
|
106
|
-
f"The tool {tool} should "
|
|
107
|
-
"be an instance of `FunctionTool`."
|
|
108
|
-
)
|
|
109
|
-
tools_schema.append(tool.get_openai_tool_schema())
|
|
110
|
-
config_dict["tools"] = NOT_GIVEN
|
|
111
|
-
return config_dict
|
|
82
|
+
stop: Optional[Union[str, Sequence[str]]] = None
|
|
83
|
+
max_tokens: Optional[int] = None
|
|
84
|
+
response_format: Optional[Union[Type[BaseModel], dict]] = None
|
|
85
|
+
tool_choice: Optional[Union[dict[str, str], str]] = None
|
|
112
86
|
|
|
113
87
|
|
|
114
88
|
Gemini_API_PARAMS = {param for param in GeminiConfig.model_fields.keys()}
|
camel/configs/openai_config.py
CHANGED
|
@@ -13,12 +13,11 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
|
-
from typing import
|
|
16
|
+
from typing import Dict, Optional, Sequence, Type, Union
|
|
17
17
|
|
|
18
18
|
from pydantic import BaseModel, Field
|
|
19
19
|
|
|
20
20
|
from camel.configs.base_config import BaseConfig
|
|
21
|
-
from camel.types import NOT_GIVEN, NotGiven
|
|
22
21
|
|
|
23
22
|
|
|
24
23
|
class ChatGPTConfig(BaseConfig):
|
|
@@ -95,45 +94,28 @@ class ChatGPTConfig(BaseConfig):
|
|
|
95
94
|
forces the model to call that tool. :obj:`"none"` is the default
|
|
96
95
|
when no tools are present. :obj:`"auto"` is the default if tools
|
|
97
96
|
are present.
|
|
97
|
+
reasoning_effort(str, optional): A parameter specifying the level of
|
|
98
|
+
reasoning used by certain model types. Valid values are :obj:
|
|
99
|
+
`"low"`, :obj:`"medium"`, or :obj:`"high"`. If set, it is only
|
|
100
|
+
applied to the model types that support it (e.g., :obj:`o1`,
|
|
101
|
+
:obj:`o1mini`, :obj:`o1preview`, :obj:`o3mini`). If not provided
|
|
102
|
+
or if the model type does not support it, this parameter is
|
|
103
|
+
ignored. (default: :obj:`None`)
|
|
98
104
|
"""
|
|
99
105
|
|
|
100
106
|
temperature: float = 0.2 # openai default: 1.0
|
|
101
107
|
top_p: float = 1.0
|
|
102
108
|
n: int = 1
|
|
103
109
|
stream: bool = False
|
|
104
|
-
stop: Union[str, Sequence[str]
|
|
105
|
-
max_tokens:
|
|
110
|
+
stop: Optional[Union[str, Sequence[str]]] = None
|
|
111
|
+
max_tokens: Optional[int] = None
|
|
106
112
|
presence_penalty: float = 0.0
|
|
107
|
-
response_format: Union[Type[BaseModel],
|
|
113
|
+
response_format: Optional[Union[Type[BaseModel], Dict]] = None
|
|
108
114
|
frequency_penalty: float = 0.0
|
|
109
|
-
logit_bias:
|
|
115
|
+
logit_bias: Dict = Field(default_factory=dict)
|
|
110
116
|
user: str = ""
|
|
111
|
-
tool_choice: Optional[Union[
|
|
112
|
-
|
|
113
|
-
def as_dict(self) -> dict[str, Any]:
|
|
114
|
-
r"""Convert the current configuration to a dictionary.
|
|
115
|
-
|
|
116
|
-
This method converts the current configuration object to a dictionary
|
|
117
|
-
representation, which can be used for serialization or other purposes.
|
|
118
|
-
|
|
119
|
-
Returns:
|
|
120
|
-
dict[str, Any]: A dictionary representation of the current
|
|
121
|
-
configuration.
|
|
122
|
-
"""
|
|
123
|
-
config_dict = self.model_dump()
|
|
124
|
-
if self.tools:
|
|
125
|
-
from camel.toolkits import FunctionTool
|
|
126
|
-
|
|
127
|
-
tools_schema = []
|
|
128
|
-
for tool in self.tools:
|
|
129
|
-
if not isinstance(tool, FunctionTool):
|
|
130
|
-
raise ValueError(
|
|
131
|
-
f"The tool {tool} should "
|
|
132
|
-
"be an instance of `FunctionTool`."
|
|
133
|
-
)
|
|
134
|
-
tools_schema.append(tool.get_openai_tool_schema())
|
|
135
|
-
config_dict["tools"] = NOT_GIVEN
|
|
136
|
-
return config_dict
|
|
117
|
+
tool_choice: Optional[Union[Dict[str, str], str]] = None
|
|
118
|
+
reasoning_effort: Optional[str] = None
|
|
137
119
|
|
|
138
120
|
|
|
139
121
|
OPENAI_API_PARAMS = {param for param in ChatGPTConfig.model_fields.keys()}
|
camel/configs/qwen_config.py
CHANGED
|
@@ -13,10 +13,9 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
|
-
from typing import
|
|
16
|
+
from typing import Dict, List, Optional, Union
|
|
17
17
|
|
|
18
18
|
from camel.configs.base_config import BaseConfig
|
|
19
|
-
from camel.types import NOT_GIVEN, NotGiven
|
|
20
19
|
|
|
21
20
|
|
|
22
21
|
class QwenConfig(BaseConfig):
|
|
@@ -27,58 +26,59 @@ class QwenConfig(BaseConfig):
|
|
|
27
26
|
Args:
|
|
28
27
|
stream (bool, optional): Whether to stream the response.
|
|
29
28
|
(default: :obj:`False`)
|
|
30
|
-
temperature (float, optional): Controls the diversity and
|
|
31
|
-
the generated results. Lower values make the output more
|
|
32
|
-
while higher values make it more diverse.
|
|
33
|
-
|
|
34
|
-
|
|
29
|
+
temperature (float, optional): Controls the diversity and
|
|
30
|
+
focus of the generated results. Lower values make the output more
|
|
31
|
+
focused, while higher values make it more diverse.
|
|
32
|
+
(default: :obj:`0.3`)
|
|
33
|
+
top_p (float, optional): Controls the diversity and focus of
|
|
34
|
+
the generated results. Higher values make the output more diverse,
|
|
35
35
|
while lower values make it more focused. (default: :obj:`0.9`)
|
|
36
|
-
presence_penalty (float, optional): Controls the repetition
|
|
36
|
+
presence_penalty (float, optional): Controls the repetition
|
|
37
37
|
content in the generated results. Positive values reduce the
|
|
38
38
|
repetition of content, while negative values increase it.
|
|
39
39
|
(default: :obj:`0.0`)
|
|
40
|
-
response_format (
|
|
41
|
-
returned content. The available values are
|
|
42
|
-
`{"type": "
|
|
43
|
-
will output a standard JSON string.
|
|
44
|
-
(default: :obj:`
|
|
45
|
-
max_tokens (
|
|
40
|
+
response_format (Optional[Dict[str, str]], optional): Specifies the
|
|
41
|
+
format of the returned content. The available values are
|
|
42
|
+
`{"type": "text"}` or `{"type": "json_object"}`. Setting it to
|
|
43
|
+
`{"type": "json_object"}` will output a standard JSON string.
|
|
44
|
+
(default: :obj:`None`)
|
|
45
|
+
max_tokens (Optional[int], optional): Allows the model to
|
|
46
46
|
generate the maximum number of tokens.
|
|
47
|
-
(default: :obj:`NOT_GIVEN`)
|
|
48
|
-
seed (int, optional): Sets the seed parameter to make the text
|
|
49
|
-
generation process more deterministic, typically used to ensure
|
|
50
|
-
that the results are consistent across model runs. By passing the
|
|
51
|
-
same seed value (specified by you) in each model call while
|
|
52
|
-
keeping other parameters unchanged, the model is likely to return
|
|
53
|
-
the same result.
|
|
54
47
|
(default: :obj:`None`)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
48
|
+
seed (Optional[int], optional): Sets the seed parameter to make the
|
|
49
|
+
text generation process more deterministic, typically used to
|
|
50
|
+
ensure that the results are consistent across model runs. By
|
|
51
|
+
passing the same seed value (specified by you) in each model call
|
|
52
|
+
while keeping other parameters unchanged, the model is likely to
|
|
53
|
+
return the same result.
|
|
59
54
|
(default: :obj:`None`)
|
|
60
|
-
|
|
55
|
+
stop (Optional[Union[str, List]], optional): Using the stop parameter,
|
|
56
|
+
the model will automatically stop generating text when it is about
|
|
57
|
+
to include the specified string or token_id. You can use the stop
|
|
58
|
+
parameter to control the output of the model by passing sensitive
|
|
59
|
+
words. (default: :obj:`None`)
|
|
60
|
+
tools (List, optional): Specifies an array of tools that the model can
|
|
61
61
|
call. It can contain one or more tool objects. During a function
|
|
62
62
|
call process, the model will select one tool from the array.
|
|
63
63
|
(default: :obj:`None`)
|
|
64
|
-
extra_body (
|
|
65
|
-
Qwen API. If you want to enable internet search,
|
|
66
|
-
parameter to `{"enable_search": True}`.
|
|
67
|
-
(default: :obj:`
|
|
64
|
+
extra_body (Optional[Dict[str, str]], optional): Additional parameters
|
|
65
|
+
to be sent to the Qwen API. If you want to enable internet search,
|
|
66
|
+
you can set this parameter to `{"enable_search": True}`.
|
|
67
|
+
(default: :obj:`None`)
|
|
68
68
|
include_usage (bool, optional): When streaming, specifies whether to
|
|
69
|
-
include usage information in `stream_options`.
|
|
70
|
-
:obj:`True`)
|
|
69
|
+
include usage information in `stream_options`.
|
|
70
|
+
(default: :obj:`True`)
|
|
71
71
|
"""
|
|
72
72
|
|
|
73
73
|
stream: bool = False
|
|
74
74
|
temperature: float = 0.3
|
|
75
75
|
top_p: float = 0.9
|
|
76
76
|
presence_penalty: float = 0.0
|
|
77
|
-
response_format:
|
|
78
|
-
max_tokens:
|
|
77
|
+
response_format: Optional[Dict[str, str]] = None
|
|
78
|
+
max_tokens: Optional[int] = None
|
|
79
79
|
seed: Optional[int] = None
|
|
80
|
-
stop: Optional[Union[str,
|
|
81
|
-
extra_body:
|
|
80
|
+
stop: Optional[Union[str, List]] = None
|
|
81
|
+
extra_body: Optional[Dict[str, str]] = None
|
|
82
82
|
|
|
83
83
|
def __init__(self, include_usage: bool = True, **kwargs):
|
|
84
84
|
super().__init__(**kwargs)
|
|
@@ -85,6 +85,7 @@ class SelfImprovingCoTPipeline:
|
|
|
85
85
|
problems: List[Dict],
|
|
86
86
|
max_iterations: int = 3,
|
|
87
87
|
score_threshold: Union[float, Dict[str, float]] = 0.7,
|
|
88
|
+
rejection_sampling_n: Optional[int] = None,
|
|
88
89
|
evaluate_agent: Optional[ChatAgent] = None,
|
|
89
90
|
reward_model: Optional[BaseRewardModel] = None,
|
|
90
91
|
output_path: Optional[str] = None,
|
|
@@ -111,6 +112,11 @@ class SelfImprovingCoTPipeline:
|
|
|
111
112
|
"coherence": 0.7}. If using reward model and threshold for a
|
|
112
113
|
dimension is not specified, will use the default value 0.7.
|
|
113
114
|
(default: :obj:`0.7`)
|
|
115
|
+
rejection_sampling_n (int, optional): Specifies the number of
|
|
116
|
+
samples to be drawn using the rejection sampling
|
|
117
|
+
method, where samples are accepted or rejected based on
|
|
118
|
+
a predefined condition to achieve a desired distribution.
|
|
119
|
+
(default: :obj: `None`)
|
|
114
120
|
evaluate_agent (Optional[ChatAgent]): The chat agent used for
|
|
115
121
|
evaluating reasoning traces. (default: :obj:`None`)
|
|
116
122
|
reward_model (BaseRewardModel, optional): Model used to evaluate
|
|
@@ -139,6 +145,7 @@ class SelfImprovingCoTPipeline:
|
|
|
139
145
|
self.output_path = output_path
|
|
140
146
|
self.max_iterations = max_iterations
|
|
141
147
|
self.score_threshold = score_threshold
|
|
148
|
+
self.rejection_sampling_n = rejection_sampling_n
|
|
142
149
|
self.reward_model = reward_model
|
|
143
150
|
self.evaluator = (
|
|
144
151
|
Evaluator(reward_model=reward_model) if reward_model else None
|
|
@@ -486,6 +493,71 @@ class SelfImprovingCoTPipeline:
|
|
|
486
493
|
|
|
487
494
|
return evaluation.model_dump()
|
|
488
495
|
|
|
496
|
+
@retry_on_error()
|
|
497
|
+
def generate_reasoning_trace_rejection(self, problem: str) -> str:
|
|
498
|
+
r"""Generate multiple candidate reasoning traces for a problem and
|
|
499
|
+
select the best one based on evaluation.
|
|
500
|
+
|
|
501
|
+
Args:
|
|
502
|
+
problem (str): The problem text for generating a reasoning trace.
|
|
503
|
+
|
|
504
|
+
Returns:
|
|
505
|
+
str: The best candidate trace that meets quality criteria, or the
|
|
506
|
+
first candidate if none qualify.
|
|
507
|
+
"""
|
|
508
|
+
few_shot_examples = (
|
|
509
|
+
f"Examples: {self.few_shot_examples}"
|
|
510
|
+
if self.few_shot_examples
|
|
511
|
+
else ""
|
|
512
|
+
)
|
|
513
|
+
prompt = self.REASONING_TEMPLATE.format(
|
|
514
|
+
problem=problem, few_shot_examples=few_shot_examples
|
|
515
|
+
)
|
|
516
|
+
responses, candidate_traces = None, []
|
|
517
|
+
if 'n' in self.reason_agent.model_backend.model_config_dict:
|
|
518
|
+
self.reason_agent.model_backend.model_config_dict['n'] = (
|
|
519
|
+
self.rejection_sampling_n
|
|
520
|
+
)
|
|
521
|
+
# Generate multiple condidate traces in one call using parameter n
|
|
522
|
+
responses = self.reason_agent.step(prompt)
|
|
523
|
+
# Extract cancidate traces
|
|
524
|
+
candidate_traces = [choice.content for choice in responses.msgs]
|
|
525
|
+
else:
|
|
526
|
+
sampling_n = (
|
|
527
|
+
self.rejection_sampling_n
|
|
528
|
+
if self.rejection_sampling_n is not None
|
|
529
|
+
else 1
|
|
530
|
+
)
|
|
531
|
+
for _i in range(sampling_n):
|
|
532
|
+
trace = self.generate_reasoning_trace(problem)
|
|
533
|
+
candidate_traces.append(trace)
|
|
534
|
+
|
|
535
|
+
best_trace = None
|
|
536
|
+
best_avg_score = 0.01
|
|
537
|
+
candidate_avg_scores = []
|
|
538
|
+
for trace in candidate_traces:
|
|
539
|
+
eval_results = self.evaluate_trace(problem, trace)
|
|
540
|
+
# Remove feedback from scores
|
|
541
|
+
scores = {k: v for k, v in eval_results.items() if k != "feedback"}
|
|
542
|
+
# Compute average score (assuming at least one score exists)
|
|
543
|
+
if scores:
|
|
544
|
+
avg_score = sum(scores.values()) / len(scores)
|
|
545
|
+
else:
|
|
546
|
+
avg_score = 0.0
|
|
547
|
+
candidate_avg_scores.append(avg_score)
|
|
548
|
+
# If the candidate meets the threshold and is the best, select it
|
|
549
|
+
if (
|
|
550
|
+
self._check_score_threshold(scores)
|
|
551
|
+
and avg_score > best_avg_score
|
|
552
|
+
):
|
|
553
|
+
best_trace = trace
|
|
554
|
+
best_avg_score = avg_score
|
|
555
|
+
if best_trace is None:
|
|
556
|
+
best_trace = candidate_traces[
|
|
557
|
+
candidate_avg_scores.index(max(candidate_avg_scores))
|
|
558
|
+
]
|
|
559
|
+
return best_trace
|
|
560
|
+
|
|
489
561
|
@retry_on_error()
|
|
490
562
|
def improve_trace(
|
|
491
563
|
self,
|
|
@@ -602,7 +674,13 @@ class SelfImprovingCoTPipeline:
|
|
|
602
674
|
|
|
603
675
|
problem_text = problem["problem"]
|
|
604
676
|
solution_text = problem.get("solution", "")
|
|
605
|
-
current_trace =
|
|
677
|
+
current_trace = None
|
|
678
|
+
if self.rejection_sampling_n:
|
|
679
|
+
current_trace = self.generate_reasoning_trace_rejection(
|
|
680
|
+
problem_text
|
|
681
|
+
)
|
|
682
|
+
else:
|
|
683
|
+
current_trace = self.generate_reasoning_trace(problem_text)
|
|
606
684
|
improvement_history = []
|
|
607
685
|
scores = {}
|
|
608
686
|
|
|
@@ -11,14 +11,22 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
-
from typing import Any, Dict, List
|
|
14
|
+
from typing import Any, Dict, List, Tuple, Union
|
|
15
|
+
|
|
16
|
+
from camel.logger import get_logger
|
|
15
17
|
|
|
16
18
|
from .filter_function import FilterFunction, RewardModelFilter
|
|
17
19
|
from .filter_registry import FILTER_REGISTRY
|
|
18
20
|
|
|
21
|
+
logger = get_logger(__name__)
|
|
22
|
+
|
|
19
23
|
|
|
20
24
|
class InstructionFilter:
|
|
21
|
-
def __init__(
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
filters_config: Dict[str, Dict[str, Any]],
|
|
28
|
+
stop_on_first_failure: bool = False,
|
|
29
|
+
):
|
|
22
30
|
r"""Initialize the InstructionFilter with a dictionary of filter
|
|
23
31
|
configurations.
|
|
24
32
|
|
|
@@ -37,12 +45,15 @@ class InstructionFilter:
|
|
|
37
45
|
Each key in filters_config corresponds to a filter name
|
|
38
46
|
(registered in FILTER_REGISTRY).
|
|
39
47
|
Each value is a dict of parameters for that filter.
|
|
48
|
+
stop_on_first_failure (bool): If True, stops checking filters after
|
|
49
|
+
the first failure.
|
|
40
50
|
"""
|
|
41
51
|
self.filters: List[FilterFunction] = []
|
|
42
52
|
for filter_name, params in filters_config.items():
|
|
43
53
|
if filter_name not in FILTER_REGISTRY:
|
|
44
54
|
raise ValueError(f"Unknown filter function: {filter_name}")
|
|
45
55
|
self.filters.append(FILTER_REGISTRY[filter_name](params))
|
|
56
|
+
self.stop_on_first_failure: bool = stop_on_first_failure
|
|
46
57
|
|
|
47
58
|
def add_filter(self, filter_function: FilterFunction):
|
|
48
59
|
r"""Add a custom filter function to the InstructionFilter.
|
|
@@ -55,7 +66,7 @@ class InstructionFilter:
|
|
|
55
66
|
|
|
56
67
|
def filter(
|
|
57
68
|
self, prompt: str, instruction: str, return_details: bool = False
|
|
58
|
-
):
|
|
69
|
+
) -> Union[bool, Tuple[bool, List[str]]]:
|
|
59
70
|
r"""Check if the given instruction passes all filter functions.
|
|
60
71
|
|
|
61
72
|
Args:
|
|
@@ -75,6 +86,11 @@ class InstructionFilter:
|
|
|
75
86
|
f.prompt = prompt
|
|
76
87
|
if not f.apply(instruction):
|
|
77
88
|
failed_filters.append(type(f).__name__)
|
|
89
|
+
logger.warning(
|
|
90
|
+
f"{type(f).__name__} failed instruction: {instruction}"
|
|
91
|
+
)
|
|
92
|
+
if self.stop_on_first_failure:
|
|
93
|
+
break
|
|
78
94
|
|
|
79
95
|
if return_details:
|
|
80
96
|
return len(failed_filters) == 0, failed_filters
|
|
@@ -45,6 +45,8 @@ class SelfInstructPipeline:
|
|
|
45
45
|
filter_config (Optional[Dict[str, Dict[str, Any]]]): configuration
|
|
46
46
|
for the filter functions registered in FILE_REGISTRY.
|
|
47
47
|
(default::obj:`None`)
|
|
48
|
+
stop_on_first_failure (bool): If True, stops checking filters after
|
|
49
|
+
the first failure.
|
|
48
50
|
"""
|
|
49
51
|
|
|
50
52
|
def __init__(
|
|
@@ -56,6 +58,7 @@ class SelfInstructPipeline:
|
|
|
56
58
|
human_to_machine_ratio: tuple = (6, 2),
|
|
57
59
|
instruction_filter: Optional[InstructionFilter] = None,
|
|
58
60
|
filter_config: Optional[Dict[str, Dict[str, Any]]] = None,
|
|
61
|
+
stop_on_first_failure: bool = False,
|
|
59
62
|
):
|
|
60
63
|
self.agent = agent
|
|
61
64
|
self.num_machine_instructions = num_machine_instructions
|
|
@@ -80,7 +83,9 @@ class SelfInstructPipeline:
|
|
|
80
83
|
config_to_use = (
|
|
81
84
|
filter_config if filter_config is not None else default_config
|
|
82
85
|
)
|
|
83
|
-
self.instruction_filter = InstructionFilter(
|
|
86
|
+
self.instruction_filter = InstructionFilter(
|
|
87
|
+
config_to_use, stop_on_first_failure
|
|
88
|
+
)
|
|
84
89
|
|
|
85
90
|
def load_seed(self, path: str):
|
|
86
91
|
r"""Load seed tasks from a file. Defaults to a predefined seed file if
|
|
@@ -361,7 +366,7 @@ class SelfInstructPipeline:
|
|
|
361
366
|
in JSON format.
|
|
362
367
|
"""
|
|
363
368
|
with open(self.data_output_path, 'w') as f:
|
|
364
|
-
json.dump(self.machine_tasks, f, indent=4)
|
|
369
|
+
json.dump(self.machine_tasks, f, indent=4, ensure_ascii=False)
|
|
365
370
|
|
|
366
371
|
def generate(self):
|
|
367
372
|
r"""Execute the entire pipeline to generate machine instructions
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
from .base import (
|
|
15
|
+
BaseDataset,
|
|
16
|
+
DataPoint,
|
|
17
|
+
GenerativeDataset,
|
|
18
|
+
SeedDataset,
|
|
19
|
+
SyntheticDataset,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"DataPoint",
|
|
24
|
+
"BaseDataset",
|
|
25
|
+
"SeedDataset",
|
|
26
|
+
"GenerativeDataset",
|
|
27
|
+
"SyntheticDataset",
|
|
28
|
+
]
|