camel-ai 0.2.21__py3-none-any.whl → 0.2.23a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/_types.py +41 -0
- camel/agents/_utils.py +188 -0
- camel/agents/chat_agent.py +556 -965
- camel/agents/knowledge_graph_agent.py +7 -1
- camel/agents/multi_hop_generator_agent.py +1 -1
- camel/configs/base_config.py +10 -13
- camel/configs/deepseek_config.py +4 -30
- camel/configs/gemini_config.py +5 -31
- camel/configs/openai_config.py +14 -32
- camel/configs/qwen_config.py +36 -36
- camel/datagen/self_improving_cot.py +79 -1
- camel/datagen/self_instruct/filter/instruction_filter.py +19 -3
- camel/datagen/self_instruct/self_instruct.py +7 -2
- camel/datasets/__init__.py +28 -0
- camel/datasets/base.py +969 -0
- camel/embeddings/openai_embedding.py +10 -1
- camel/environments/__init__.py +16 -0
- camel/environments/base.py +503 -0
- camel/extractors/__init__.py +16 -0
- camel/extractors/base.py +263 -0
- camel/interpreters/docker/Dockerfile +12 -0
- camel/interpreters/docker_interpreter.py +19 -1
- camel/interpreters/subprocess_interpreter.py +42 -17
- camel/loaders/__init__.py +2 -0
- camel/loaders/mineru_extractor.py +250 -0
- camel/memories/agent_memories.py +16 -1
- camel/memories/blocks/chat_history_block.py +10 -2
- camel/memories/blocks/vectordb_block.py +1 -0
- camel/memories/context_creators/score_based.py +20 -3
- camel/memories/records.py +10 -0
- camel/messages/base.py +8 -8
- camel/models/_utils.py +57 -0
- camel/models/aiml_model.py +48 -17
- camel/models/anthropic_model.py +41 -3
- camel/models/azure_openai_model.py +39 -3
- camel/models/base_model.py +132 -4
- camel/models/cohere_model.py +88 -11
- camel/models/deepseek_model.py +107 -63
- camel/models/gemini_model.py +133 -15
- camel/models/groq_model.py +72 -10
- camel/models/internlm_model.py +14 -3
- camel/models/litellm_model.py +9 -2
- camel/models/mistral_model.py +42 -5
- camel/models/model_manager.py +48 -3
- camel/models/moonshot_model.py +33 -4
- camel/models/nemotron_model.py +32 -3
- camel/models/nvidia_model.py +43 -3
- camel/models/ollama_model.py +139 -17
- camel/models/openai_audio_models.py +7 -1
- camel/models/openai_compatible_model.py +37 -3
- camel/models/openai_model.py +158 -46
- camel/models/qwen_model.py +61 -4
- camel/models/reka_model.py +53 -3
- camel/models/samba_model.py +209 -4
- camel/models/sglang_model.py +153 -14
- camel/models/siliconflow_model.py +16 -3
- camel/models/stub_model.py +46 -4
- camel/models/togetherai_model.py +38 -3
- camel/models/vllm_model.py +37 -3
- camel/models/yi_model.py +36 -3
- camel/models/zhipuai_model.py +38 -3
- camel/retrievers/__init__.py +3 -0
- camel/retrievers/hybrid_retrival.py +237 -0
- camel/toolkits/__init__.py +9 -2
- camel/toolkits/arxiv_toolkit.py +2 -1
- camel/toolkits/ask_news_toolkit.py +4 -2
- camel/toolkits/base.py +22 -3
- camel/toolkits/code_execution.py +2 -0
- camel/toolkits/dappier_toolkit.py +2 -1
- camel/toolkits/data_commons_toolkit.py +38 -12
- camel/toolkits/function_tool.py +13 -0
- camel/toolkits/github_toolkit.py +5 -1
- camel/toolkits/google_maps_toolkit.py +2 -1
- camel/toolkits/google_scholar_toolkit.py +2 -0
- camel/toolkits/human_toolkit.py +0 -3
- camel/toolkits/linkedin_toolkit.py +3 -2
- camel/toolkits/meshy_toolkit.py +3 -2
- camel/toolkits/mineru_toolkit.py +178 -0
- camel/toolkits/networkx_toolkit.py +240 -0
- camel/toolkits/notion_toolkit.py +2 -0
- camel/toolkits/openbb_toolkit.py +3 -2
- camel/toolkits/reddit_toolkit.py +11 -3
- camel/toolkits/retrieval_toolkit.py +6 -1
- camel/toolkits/semantic_scholar_toolkit.py +2 -1
- camel/toolkits/stripe_toolkit.py +8 -2
- camel/toolkits/sympy_toolkit.py +44 -1
- camel/toolkits/video_toolkit.py +2 -0
- camel/toolkits/whatsapp_toolkit.py +3 -2
- camel/toolkits/zapier_toolkit.py +191 -0
- camel/types/__init__.py +2 -2
- camel/types/agents/__init__.py +16 -0
- camel/types/agents/tool_calling_record.py +52 -0
- camel/types/enums.py +3 -0
- camel/types/openai_types.py +16 -14
- camel/utils/__init__.py +2 -1
- camel/utils/async_func.py +2 -2
- camel/utils/commons.py +114 -1
- camel/verifiers/__init__.py +23 -0
- camel/verifiers/base.py +340 -0
- camel/verifiers/models.py +82 -0
- camel/verifiers/python_verifier.py +202 -0
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/METADATA +273 -256
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/RECORD +106 -85
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/WHEEL +1 -1
- {camel_ai-0.2.21.dist-info → camel_ai-0.2.23a0.dist-info}/LICENSE +0 -0
camel/models/stub_model.py
CHANGED
|
@@ -12,9 +12,10 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
import time
|
|
15
|
-
from typing import Any, Dict, List, Optional, Union
|
|
15
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
16
16
|
|
|
17
|
-
from openai import Stream
|
|
17
|
+
from openai import AsyncStream, Stream
|
|
18
|
+
from pydantic import BaseModel
|
|
18
19
|
|
|
19
20
|
from camel.messages import OpenAIMessage
|
|
20
21
|
from camel.models import BaseModelBackend
|
|
@@ -74,8 +75,49 @@ class StubModel(BaseModelBackend):
|
|
|
74
75
|
self._token_counter = StubTokenCounter()
|
|
75
76
|
return self._token_counter
|
|
76
77
|
|
|
77
|
-
def
|
|
78
|
-
self,
|
|
78
|
+
async def _arun(
|
|
79
|
+
self,
|
|
80
|
+
messages: List[OpenAIMessage],
|
|
81
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
82
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
83
|
+
) -> Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
|
|
84
|
+
r"""Run fake inference by returning a fixed string.
|
|
85
|
+
All arguments are unused for the dummy model.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
|
|
89
|
+
The response from the dummy model.
|
|
90
|
+
"""
|
|
91
|
+
ARBITRARY_STRING = "Lorem Ipsum"
|
|
92
|
+
response: ChatCompletion = ChatCompletion(
|
|
93
|
+
id="stub_model_id",
|
|
94
|
+
model="stub",
|
|
95
|
+
object="chat.completion",
|
|
96
|
+
created=int(time.time()),
|
|
97
|
+
choices=[
|
|
98
|
+
Choice(
|
|
99
|
+
finish_reason="stop",
|
|
100
|
+
index=0,
|
|
101
|
+
message=ChatCompletionMessage(
|
|
102
|
+
content=ARBITRARY_STRING,
|
|
103
|
+
role="assistant",
|
|
104
|
+
),
|
|
105
|
+
logprobs=None,
|
|
106
|
+
)
|
|
107
|
+
],
|
|
108
|
+
usage=CompletionUsage(
|
|
109
|
+
completion_tokens=10,
|
|
110
|
+
prompt_tokens=10,
|
|
111
|
+
total_tokens=20,
|
|
112
|
+
),
|
|
113
|
+
)
|
|
114
|
+
return response
|
|
115
|
+
|
|
116
|
+
def _run(
|
|
117
|
+
self,
|
|
118
|
+
messages: List[OpenAIMessage],
|
|
119
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
120
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
79
121
|
) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
|
|
80
122
|
r"""Run fake inference by returning a fixed string.
|
|
81
123
|
All arguments are unused for the dummy model.
|
camel/models/togetherai_model.py
CHANGED
|
@@ -13,9 +13,10 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
|
|
15
15
|
import os
|
|
16
|
-
from typing import Any, Dict, List, Optional, Union
|
|
16
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
17
17
|
|
|
18
|
-
from openai import OpenAI, Stream
|
|
18
|
+
from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream
|
|
19
|
+
from pydantic import BaseModel
|
|
19
20
|
|
|
20
21
|
from camel.configs import TOGETHERAI_API_PARAMS, TogetherAIConfig
|
|
21
22
|
from camel.messages import OpenAIMessage
|
|
@@ -82,10 +83,44 @@ class TogetherAIModel(BaseModelBackend):
|
|
|
82
83
|
api_key=self._api_key,
|
|
83
84
|
base_url=self._url,
|
|
84
85
|
)
|
|
86
|
+
self._async_client = AsyncOpenAI(
|
|
87
|
+
timeout=180,
|
|
88
|
+
max_retries=3,
|
|
89
|
+
api_key=self._api_key,
|
|
90
|
+
base_url=self._url,
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
async def _arun(
|
|
94
|
+
self,
|
|
95
|
+
messages: List[OpenAIMessage],
|
|
96
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
97
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
98
|
+
) -> Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
|
|
99
|
+
r"""Runs inference of OpenAI chat completion.
|
|
100
|
+
|
|
101
|
+
Args:
|
|
102
|
+
messages (List[OpenAIMessage]): Message list with the chat history
|
|
103
|
+
in OpenAI API format.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
|
|
107
|
+
`ChatCompletion` in the non-stream mode, or
|
|
108
|
+
`AsyncStream[ChatCompletionChunk]` in the stream mode.
|
|
109
|
+
"""
|
|
110
|
+
# Use OpenAI cilent as interface call Together AI
|
|
111
|
+
# Reference: https://docs.together.ai/docs/openai-api-compatibility
|
|
112
|
+
response = await self._async_client.chat.completions.create(
|
|
113
|
+
messages=messages,
|
|
114
|
+
model=self.model_type,
|
|
115
|
+
**self.model_config_dict,
|
|
116
|
+
)
|
|
117
|
+
return response
|
|
85
118
|
|
|
86
|
-
def
|
|
119
|
+
def _run(
|
|
87
120
|
self,
|
|
88
121
|
messages: List[OpenAIMessage],
|
|
122
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
123
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
89
124
|
) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
|
|
90
125
|
r"""Runs inference of OpenAI chat completion.
|
|
91
126
|
|
camel/models/vllm_model.py
CHANGED
|
@@ -13,9 +13,10 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
import os
|
|
15
15
|
import subprocess
|
|
16
|
-
from typing import Any, Dict, List, Optional, Union
|
|
16
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
17
17
|
|
|
18
|
-
from openai import OpenAI, Stream
|
|
18
|
+
from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream
|
|
19
|
+
from pydantic import BaseModel
|
|
19
20
|
|
|
20
21
|
from camel.configs import VLLM_API_PARAMS, VLLMConfig
|
|
21
22
|
from camel.messages import OpenAIMessage
|
|
@@ -77,6 +78,12 @@ class VLLMModel(BaseModelBackend):
|
|
|
77
78
|
api_key="EMPTY", # required but ignored
|
|
78
79
|
base_url=self._url,
|
|
79
80
|
)
|
|
81
|
+
self._async_client = AsyncOpenAI(
|
|
82
|
+
timeout=180,
|
|
83
|
+
max_retries=3,
|
|
84
|
+
api_key="EMPTY", # required but ignored
|
|
85
|
+
base_url=self._url,
|
|
86
|
+
)
|
|
80
87
|
|
|
81
88
|
def _start_server(self) -> None:
|
|
82
89
|
r"""Starts the vllm server in a subprocess."""
|
|
@@ -121,9 +128,36 @@ class VLLMModel(BaseModelBackend):
|
|
|
121
128
|
"input into vLLM model backend."
|
|
122
129
|
)
|
|
123
130
|
|
|
124
|
-
def
|
|
131
|
+
async def _arun(
|
|
132
|
+
self,
|
|
133
|
+
messages: List[OpenAIMessage],
|
|
134
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
135
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
136
|
+
) -> Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
|
|
137
|
+
r"""Runs inference of OpenAI chat completion.
|
|
138
|
+
|
|
139
|
+
Args:
|
|
140
|
+
messages (List[OpenAIMessage]): Message list with the chat history
|
|
141
|
+
in OpenAI API format.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
|
|
145
|
+
`ChatCompletion` in the non-stream mode, or
|
|
146
|
+
`AsyncStream[ChatCompletionChunk]` in the stream mode.
|
|
147
|
+
"""
|
|
148
|
+
|
|
149
|
+
response = await self._async_client.chat.completions.create(
|
|
150
|
+
messages=messages,
|
|
151
|
+
model=self.model_type,
|
|
152
|
+
**self.model_config_dict,
|
|
153
|
+
)
|
|
154
|
+
return response
|
|
155
|
+
|
|
156
|
+
def _run(
|
|
125
157
|
self,
|
|
126
158
|
messages: List[OpenAIMessage],
|
|
159
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
160
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
127
161
|
) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
|
|
128
162
|
r"""Runs inference of OpenAI chat completion.
|
|
129
163
|
|
camel/models/yi_model.py
CHANGED
|
@@ -13,9 +13,10 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
|
|
15
15
|
import os
|
|
16
|
-
from typing import Any, Dict, List, Optional, Union
|
|
16
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
17
17
|
|
|
18
|
-
from openai import OpenAI, Stream
|
|
18
|
+
from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream
|
|
19
|
+
from pydantic import BaseModel
|
|
19
20
|
|
|
20
21
|
from camel.configs import YI_API_PARAMS, YiConfig
|
|
21
22
|
from camel.messages import OpenAIMessage
|
|
@@ -80,10 +81,42 @@ class YiModel(BaseModelBackend):
|
|
|
80
81
|
api_key=self._api_key,
|
|
81
82
|
base_url=self._url,
|
|
82
83
|
)
|
|
84
|
+
self._async_client = AsyncOpenAI(
|
|
85
|
+
timeout=180,
|
|
86
|
+
max_retries=3,
|
|
87
|
+
api_key=self._api_key,
|
|
88
|
+
base_url=self._url,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
async def _arun(
|
|
92
|
+
self,
|
|
93
|
+
messages: List[OpenAIMessage],
|
|
94
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
95
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
96
|
+
) -> Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
|
|
97
|
+
r"""Runs inference of Yi chat completion.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
messages (List[OpenAIMessage]): Message list with the chat history
|
|
101
|
+
in OpenAI API format.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
|
|
105
|
+
`ChatCompletion` in the non-stream mode, or
|
|
106
|
+
`AsyncStream[ChatCompletionChunk]` in the stream mode.
|
|
107
|
+
"""
|
|
108
|
+
response = await self._async_client.chat.completions.create(
|
|
109
|
+
messages=messages,
|
|
110
|
+
model=self.model_type,
|
|
111
|
+
**self.model_config_dict,
|
|
112
|
+
)
|
|
113
|
+
return response
|
|
83
114
|
|
|
84
|
-
def
|
|
115
|
+
def _run(
|
|
85
116
|
self,
|
|
86
117
|
messages: List[OpenAIMessage],
|
|
118
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
119
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
87
120
|
) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
|
|
88
121
|
r"""Runs inference of Yi chat completion.
|
|
89
122
|
|
camel/models/zhipuai_model.py
CHANGED
|
@@ -13,9 +13,10 @@
|
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
|
|
15
15
|
import os
|
|
16
|
-
from typing import Any, Dict, List, Optional, Union
|
|
16
|
+
from typing import Any, Dict, List, Optional, Type, Union
|
|
17
17
|
|
|
18
|
-
from openai import OpenAI, Stream
|
|
18
|
+
from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream
|
|
19
|
+
from pydantic import BaseModel
|
|
19
20
|
|
|
20
21
|
from camel.configs import ZHIPUAI_API_PARAMS, ZhipuAIConfig
|
|
21
22
|
from camel.messages import OpenAIMessage
|
|
@@ -80,10 +81,44 @@ class ZhipuAIModel(BaseModelBackend):
|
|
|
80
81
|
api_key=self._api_key,
|
|
81
82
|
base_url=self._url,
|
|
82
83
|
)
|
|
84
|
+
self._async_client = AsyncOpenAI(
|
|
85
|
+
timeout=180,
|
|
86
|
+
max_retries=3,
|
|
87
|
+
api_key=self._api_key,
|
|
88
|
+
base_url=self._url,
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
async def _arun(
|
|
92
|
+
self,
|
|
93
|
+
messages: List[OpenAIMessage],
|
|
94
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
95
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
96
|
+
) -> Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
|
|
97
|
+
r"""Runs inference of OpenAI chat completion.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
messages (List[OpenAIMessage]): Message list with the chat history
|
|
101
|
+
in OpenAI API format.
|
|
102
|
+
|
|
103
|
+
Returns:
|
|
104
|
+
Union[ChatCompletion, AsyncStream[ChatCompletionChunk]]:
|
|
105
|
+
`ChatCompletion` in the non-stream mode, or
|
|
106
|
+
`AsyncStream[ChatCompletionChunk]` in the stream mode.
|
|
107
|
+
"""
|
|
108
|
+
# Use OpenAI cilent as interface call ZhipuAI
|
|
109
|
+
# Reference: https://open.bigmodel.cn/dev/api#openai_sdk
|
|
110
|
+
response = await self._async_client.chat.completions.create(
|
|
111
|
+
messages=messages,
|
|
112
|
+
model=self.model_type,
|
|
113
|
+
**self.model_config_dict,
|
|
114
|
+
)
|
|
115
|
+
return response
|
|
83
116
|
|
|
84
|
-
def
|
|
117
|
+
def _run(
|
|
85
118
|
self,
|
|
86
119
|
messages: List[OpenAIMessage],
|
|
120
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
121
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
87
122
|
) -> Union[ChatCompletion, Stream[ChatCompletionChunk]]:
|
|
88
123
|
r"""Runs inference of OpenAI chat completion.
|
|
89
124
|
|
camel/retrievers/__init__.py
CHANGED
|
@@ -11,11 +11,13 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
# ruff: noqa: I001
|
|
14
15
|
from .auto_retriever import AutoRetriever
|
|
15
16
|
from .base import BaseRetriever
|
|
16
17
|
from .bm25_retriever import BM25Retriever
|
|
17
18
|
from .cohere_rerank_retriever import CohereRerankRetriever
|
|
18
19
|
from .vector_retriever import VectorRetriever
|
|
20
|
+
from .hybrid_retrival import HybridRetriever
|
|
19
21
|
|
|
20
22
|
__all__ = [
|
|
21
23
|
'BaseRetriever',
|
|
@@ -23,4 +25,5 @@ __all__ = [
|
|
|
23
25
|
'AutoRetriever',
|
|
24
26
|
'BM25Retriever',
|
|
25
27
|
'CohereRerankRetriever',
|
|
28
|
+
'HybridRetriever',
|
|
26
29
|
]
|
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
|
+
from typing import Any, Collection, Dict, List, Optional, Sequence, Union
|
|
15
|
+
|
|
16
|
+
import numpy as np
|
|
17
|
+
|
|
18
|
+
from camel.embeddings import BaseEmbedding
|
|
19
|
+
from camel.retrievers import BaseRetriever, BM25Retriever, VectorRetriever
|
|
20
|
+
from camel.storages import BaseVectorStorage
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class HybridRetriever(BaseRetriever):
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
embedding_model: Optional[BaseEmbedding] = None,
|
|
27
|
+
vector_storage: Optional[BaseVectorStorage] = None,
|
|
28
|
+
) -> None:
|
|
29
|
+
r"""Initializes the HybridRetriever with optional embedding model and
|
|
30
|
+
vector storage.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
embedding_model (Optional[BaseEmbedding]): An optional embedding
|
|
34
|
+
model used by the VectorRetriever. Defaults to None.
|
|
35
|
+
vector_storage (Optional[BaseVectorStorage]): An optional vector
|
|
36
|
+
storage used by the VectorRetriever. Defaults to None.
|
|
37
|
+
"""
|
|
38
|
+
self.vr = VectorRetriever(embedding_model, vector_storage)
|
|
39
|
+
self.bm25 = BM25Retriever()
|
|
40
|
+
|
|
41
|
+
def process(self, content_input_path: str) -> None:
|
|
42
|
+
r"""Processes the content input path for both vector and BM25
|
|
43
|
+
retrievers.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
content_input_path (str): File path or URL of the content to be
|
|
47
|
+
processed.
|
|
48
|
+
|
|
49
|
+
Raises:
|
|
50
|
+
ValueError: If the content_input_path is empty.
|
|
51
|
+
"""
|
|
52
|
+
if not content_input_path:
|
|
53
|
+
raise ValueError("content_input_path cannot be empty.")
|
|
54
|
+
|
|
55
|
+
self.content_input_path = content_input_path
|
|
56
|
+
self.vr.process(content=self.content_input_path)
|
|
57
|
+
self.bm25.process(content_input_path=self.content_input_path)
|
|
58
|
+
|
|
59
|
+
def _sort_rrf_scores(
|
|
60
|
+
self,
|
|
61
|
+
vector_retriever_results: List[Dict[str, Any]],
|
|
62
|
+
bm25_retriever_results: List[Dict[str, Any]],
|
|
63
|
+
top_k: int,
|
|
64
|
+
vector_weight: float,
|
|
65
|
+
bm25_weight: float,
|
|
66
|
+
rank_smoothing_factor: float,
|
|
67
|
+
) -> List[Dict[str, Union[str, float]]]:
|
|
68
|
+
r"""Sorts and combines results from vector and BM25 retrievers using
|
|
69
|
+
Reciprocal Rank Fusion (RRF).
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
vector_retriever_results: A list of dictionaries containing the
|
|
73
|
+
results from the vector retriever, where each dictionary
|
|
74
|
+
contains a 'text' entry.
|
|
75
|
+
bm25_retriever_results: A list of dictionaries containing the
|
|
76
|
+
results from the BM25 retriever, where each dictionary
|
|
77
|
+
contains a 'text' entry.
|
|
78
|
+
top_k: The number of top results to return after sorting by RRF
|
|
79
|
+
score.
|
|
80
|
+
vector_weight: The weight to assign to the vector retriever
|
|
81
|
+
results in the RRF calculation.
|
|
82
|
+
bm25_weight: The weight to assign to the BM25 retriever results in
|
|
83
|
+
the RRF calculation.
|
|
84
|
+
rank_smoothing_factor: A hyperparameter for the RRF calculation
|
|
85
|
+
that helps smooth the rank positions.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
List[Dict[str, Union[str, float]]]: A list of dictionaries
|
|
89
|
+
representing the sorted results. Each dictionary contains the
|
|
90
|
+
'text'from the retrieved items and their corresponding 'rrf_score'.
|
|
91
|
+
|
|
92
|
+
Raises:
|
|
93
|
+
ValueError: If any of the input weights are negative.
|
|
94
|
+
|
|
95
|
+
References:
|
|
96
|
+
https://medium.com/@devalshah1619/mathematical-intuition-behind-reciprocal-rank-fusion-rrf-explained-in-2-mins-002df0cc5e2a
|
|
97
|
+
https://colab.research.google.com/drive/1iwVJrN96fiyycxN1pBqWlEr_4EPiGdGy#scrollTo=0qh83qGV2dY8
|
|
98
|
+
"""
|
|
99
|
+
text_to_id = {}
|
|
100
|
+
id_to_info = {}
|
|
101
|
+
current_id = 1
|
|
102
|
+
|
|
103
|
+
# Iterate over vector_retriever_results
|
|
104
|
+
for rank, result in enumerate(vector_retriever_results, start=1):
|
|
105
|
+
text = result.get('text', None) # type: ignore[attr-defined]
|
|
106
|
+
if text is None:
|
|
107
|
+
raise KeyError("Each result must contain a 'text' key")
|
|
108
|
+
|
|
109
|
+
if text not in text_to_id:
|
|
110
|
+
text_to_id[text] = current_id
|
|
111
|
+
id_to_info[current_id] = {'text': text, 'vector_rank': rank}
|
|
112
|
+
current_id += 1
|
|
113
|
+
else:
|
|
114
|
+
id_to_info[text_to_id[text]]['vector_rank'] = rank
|
|
115
|
+
|
|
116
|
+
# Iterate over bm25_retriever_results
|
|
117
|
+
for rank, result in enumerate(bm25_retriever_results, start=1):
|
|
118
|
+
text = result['text']
|
|
119
|
+
if text not in text_to_id:
|
|
120
|
+
text_to_id[text] = current_id
|
|
121
|
+
id_to_info[current_id] = {'text': text, 'bm25_rank': rank}
|
|
122
|
+
current_id += 1
|
|
123
|
+
else:
|
|
124
|
+
id_to_info[text_to_id[text]].setdefault('bm25_rank', rank)
|
|
125
|
+
|
|
126
|
+
vector_ranks = np.array(
|
|
127
|
+
[
|
|
128
|
+
info.get('vector_rank', float('inf'))
|
|
129
|
+
for info in id_to_info.values()
|
|
130
|
+
]
|
|
131
|
+
)
|
|
132
|
+
bm25_ranks = np.array(
|
|
133
|
+
[
|
|
134
|
+
info.get('bm25_rank', float('inf'))
|
|
135
|
+
for info in id_to_info.values()
|
|
136
|
+
]
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
# Calculate RRF scores
|
|
140
|
+
vector_rrf_scores = vector_weight / (
|
|
141
|
+
rank_smoothing_factor + vector_ranks
|
|
142
|
+
)
|
|
143
|
+
bm25_rrf_scores = bm25_weight / (rank_smoothing_factor + bm25_ranks)
|
|
144
|
+
rrf_scores = vector_rrf_scores + bm25_rrf_scores
|
|
145
|
+
|
|
146
|
+
for idx, (_, info) in enumerate(id_to_info.items()):
|
|
147
|
+
info['rrf_score'] = rrf_scores[idx]
|
|
148
|
+
sorted_results = sorted(
|
|
149
|
+
id_to_info.values(), key=lambda x: x['rrf_score'], reverse=True
|
|
150
|
+
)
|
|
151
|
+
return sorted_results[:top_k]
|
|
152
|
+
|
|
153
|
+
def query(
|
|
154
|
+
self,
|
|
155
|
+
query: str,
|
|
156
|
+
top_k: int = 20,
|
|
157
|
+
vector_weight: float = 0.8,
|
|
158
|
+
bm25_weight: float = 0.2,
|
|
159
|
+
rank_smoothing_factor: int = 60,
|
|
160
|
+
vector_retriever_top_k: int = 50,
|
|
161
|
+
vector_retriever_similarity_threshold: float = 0.5,
|
|
162
|
+
bm25_retriever_top_k: int = 50,
|
|
163
|
+
return_detailed_info: bool = False,
|
|
164
|
+
) -> Union[
|
|
165
|
+
dict[str, Sequence[Collection[str]]],
|
|
166
|
+
dict[str, Sequence[Union[str, float]]],
|
|
167
|
+
]:
|
|
168
|
+
r"""Executes a hybrid retrieval query using both vector and BM25
|
|
169
|
+
retrievers.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
query (str): The search query.
|
|
173
|
+
top_k (int): Number of top results to return (default 20).
|
|
174
|
+
vector_weight (float): Weight for vector retriever results in RRF.
|
|
175
|
+
bm25_weight (float): Weight for BM25 retriever results in RRF.
|
|
176
|
+
rank_smoothing_factor (int): RRF hyperparameter for rank smoothing.
|
|
177
|
+
vector_retriever_top_k (int): Top results from vector retriever.
|
|
178
|
+
vector_retriever_similarity_threshold (float): Similarity
|
|
179
|
+
threshold for vector retriever.
|
|
180
|
+
bm25_retriever_top_k (int): Top results from BM25 retriever.
|
|
181
|
+
return_detailed_info (bool): Return detailed info if True.
|
|
182
|
+
|
|
183
|
+
Returns:
|
|
184
|
+
Union[
|
|
185
|
+
dict[str, Sequence[Collection[str]]],
|
|
186
|
+
dict[str, Sequence[Union[str, float]]]
|
|
187
|
+
]: By default, returns only the text information. If
|
|
188
|
+
`return_detailed_info` is `True`, return detailed information
|
|
189
|
+
including rrf scores.
|
|
190
|
+
"""
|
|
191
|
+
if top_k > max(vector_retriever_top_k, bm25_retriever_top_k):
|
|
192
|
+
raise ValueError(
|
|
193
|
+
"top_k needs to be less than or equal to the "
|
|
194
|
+
"maximum value among vector_retriever_top_k and "
|
|
195
|
+
"bm25_retriever_top_k."
|
|
196
|
+
)
|
|
197
|
+
if vector_weight < 0 or bm25_weight < 0:
|
|
198
|
+
raise ValueError(
|
|
199
|
+
"Neither `vector_weight` nor `bm25_weight` can be negative."
|
|
200
|
+
)
|
|
201
|
+
|
|
202
|
+
vr_raw_results: List[Dict[str, Any]] = self.vr.query(
|
|
203
|
+
query=query,
|
|
204
|
+
top_k=vector_retriever_top_k,
|
|
205
|
+
similarity_threshold=vector_retriever_similarity_threshold,
|
|
206
|
+
)
|
|
207
|
+
# if the number of results is less than top_k, return all results
|
|
208
|
+
with_score = [
|
|
209
|
+
info for info in vr_raw_results if 'similarity score' in info
|
|
210
|
+
]
|
|
211
|
+
vector_retriever_results = sorted(
|
|
212
|
+
with_score, key=lambda x: x['similarity score'], reverse=True
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
bm25_retriever_results = self.bm25.query(
|
|
216
|
+
query=query,
|
|
217
|
+
top_k=bm25_retriever_top_k,
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
all_retrieved_info = self._sort_rrf_scores(
|
|
221
|
+
vector_retriever_results,
|
|
222
|
+
bm25_retriever_results,
|
|
223
|
+
top_k,
|
|
224
|
+
vector_weight,
|
|
225
|
+
bm25_weight,
|
|
226
|
+
rank_smoothing_factor,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
retrieved_info = {
|
|
230
|
+
"Original Query": query,
|
|
231
|
+
"Retrieved Context": (
|
|
232
|
+
all_retrieved_info
|
|
233
|
+
if return_detailed_info
|
|
234
|
+
else [item['text'] for item in all_retrieved_info] # type: ignore[misc]
|
|
235
|
+
),
|
|
236
|
+
}
|
|
237
|
+
return retrieved_info
|
camel/toolkits/__init__.py
CHANGED
|
@@ -45,8 +45,12 @@ from .human_toolkit import HumanToolkit
|
|
|
45
45
|
from .stripe_toolkit import StripeToolkit
|
|
46
46
|
from .video_toolkit import VideoDownloaderToolkit
|
|
47
47
|
from .dappier_toolkit import DappierToolkit
|
|
48
|
-
from .
|
|
48
|
+
from .networkx_toolkit import NetworkXToolkit
|
|
49
49
|
from .semantic_scholar_toolkit import SemanticScholarToolkit
|
|
50
|
+
from .zapier_toolkit import ZapierToolkit
|
|
51
|
+
from .sympy_toolkit import SymPyToolkit
|
|
52
|
+
from .mineru_toolkit import MinerUToolkit
|
|
53
|
+
|
|
50
54
|
|
|
51
55
|
__all__ = [
|
|
52
56
|
'BaseToolkit',
|
|
@@ -79,6 +83,9 @@ __all__ = [
|
|
|
79
83
|
'MeshyToolkit',
|
|
80
84
|
'OpenBBToolkit',
|
|
81
85
|
'DappierToolkit',
|
|
82
|
-
'
|
|
86
|
+
'NetworkXToolkit',
|
|
83
87
|
'SemanticScholarToolkit',
|
|
88
|
+
'ZapierToolkit',
|
|
89
|
+
'SymPyToolkit',
|
|
90
|
+
'MinerUToolkit',
|
|
84
91
|
]
|
camel/toolkits/arxiv_toolkit.py
CHANGED
|
@@ -28,8 +28,9 @@ class ArxivToolkit(BaseToolkit):
|
|
|
28
28
|
"""
|
|
29
29
|
|
|
30
30
|
@dependencies_required('arxiv')
|
|
31
|
-
def __init__(self) -> None:
|
|
31
|
+
def __init__(self, timeout: Optional[float] = None) -> None:
|
|
32
32
|
r"""Initializes the ArxivToolkit and sets up the arXiv client."""
|
|
33
|
+
super().__init__(timeout=timeout)
|
|
33
34
|
import arxiv
|
|
34
35
|
|
|
35
36
|
self.client = arxiv.Client()
|
|
@@ -62,11 +62,13 @@ class AskNewsToolkit(BaseToolkit):
|
|
|
62
62
|
based on user queries using the AskNews API.
|
|
63
63
|
"""
|
|
64
64
|
|
|
65
|
-
def __init__(self):
|
|
65
|
+
def __init__(self, timeout: Optional[float] = None):
|
|
66
66
|
r"""Initialize the AskNewsToolkit with API clients.The API keys and
|
|
67
67
|
credentials are retrieved from environment variables.
|
|
68
68
|
"""
|
|
69
|
-
|
|
69
|
+
super().__init__(timeout=timeout)
|
|
70
|
+
|
|
71
|
+
from asknews_sdk import AskNewsSDK # type: ignore[import-not-found]
|
|
70
72
|
|
|
71
73
|
client_id = os.environ.get("ASKNEWS_CLIENT_ID")
|
|
72
74
|
client_secret = os.environ.get("ASKNEWS_CLIENT_SECRET")
|
camel/toolkits/base.py
CHANGED
|
@@ -12,14 +12,33 @@
|
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
|
|
14
14
|
|
|
15
|
-
from typing import List
|
|
15
|
+
from typing import List, Optional
|
|
16
16
|
|
|
17
17
|
from camel.toolkits import FunctionTool
|
|
18
|
-
from camel.utils import AgentOpsMeta
|
|
18
|
+
from camel.utils import AgentOpsMeta, with_timeout
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class BaseToolkit(metaclass=AgentOpsMeta):
|
|
22
|
-
r"""Base class for toolkits.
|
|
22
|
+
r"""Base class for toolkits.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
timeout (Optional[float]): The timeout for the toolkit.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
timeout: Optional[float] = None
|
|
29
|
+
|
|
30
|
+
def __init__(self, timeout: Optional[float] = None):
|
|
31
|
+
# check if timeout is a positive number
|
|
32
|
+
if timeout is not None and timeout <= 0:
|
|
33
|
+
raise ValueError("Timeout must be a positive number.")
|
|
34
|
+
self.timeout = timeout
|
|
35
|
+
|
|
36
|
+
# Add timeout to all callable methods in the toolkit
|
|
37
|
+
def __init_subclass__(cls, **kwargs):
|
|
38
|
+
super().__init_subclass__(**kwargs)
|
|
39
|
+
for attr_name, attr_value in cls.__dict__.items():
|
|
40
|
+
if callable(attr_value) and not attr_name.startswith("__"):
|
|
41
|
+
setattr(cls, attr_name, with_timeout(attr_value))
|
|
23
42
|
|
|
24
43
|
def get_tools(self) -> List[FunctionTool]:
|
|
25
44
|
r"""Returns a list of FunctionTool objects representing the
|
camel/toolkits/code_execution.py
CHANGED
|
@@ -48,7 +48,9 @@ class CodeExecutionToolkit(BaseToolkit):
|
|
|
48
48
|
unsafe_mode: bool = False,
|
|
49
49
|
import_white_list: Optional[List[str]] = None,
|
|
50
50
|
require_confirm: bool = False,
|
|
51
|
+
timeout: Optional[float] = None,
|
|
51
52
|
) -> None:
|
|
53
|
+
super().__init__(timeout=timeout)
|
|
52
54
|
self.verbose = verbose
|
|
53
55
|
self.unsafe_mode = unsafe_mode
|
|
54
56
|
self.import_white_list = import_white_list or list()
|
|
@@ -33,10 +33,11 @@ class DappierToolkit(BaseToolkit):
|
|
|
33
33
|
(None, "DAPPIER_API_KEY"),
|
|
34
34
|
]
|
|
35
35
|
)
|
|
36
|
-
def __init__(self):
|
|
36
|
+
def __init__(self, timeout: Optional[float] = None):
|
|
37
37
|
r"""Initialize the DappierTookit with API clients.The API keys and
|
|
38
38
|
credentials are retrieved from environment variables.
|
|
39
39
|
"""
|
|
40
|
+
super().__init__(timeout=timeout)
|
|
40
41
|
from dappier import Dappier
|
|
41
42
|
|
|
42
43
|
dappier_api_key = os.environ.get("DAPPIER_API_KEY")
|