camel-ai 0.2.3a1__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -1
- camel/agents/chat_agent.py +93 -69
- camel/agents/knowledge_graph_agent.py +4 -6
- camel/bots/__init__.py +16 -2
- camel/bots/discord_app.py +138 -0
- camel/bots/slack/__init__.py +30 -0
- camel/bots/slack/models.py +158 -0
- camel/bots/slack/slack_app.py +255 -0
- camel/configs/__init__.py +1 -2
- camel/configs/anthropic_config.py +2 -5
- camel/configs/base_config.py +6 -6
- camel/configs/groq_config.py +2 -3
- camel/configs/ollama_config.py +1 -2
- camel/configs/openai_config.py +2 -23
- camel/configs/samba_config.py +2 -2
- camel/configs/togetherai_config.py +1 -1
- camel/configs/vllm_config.py +1 -1
- camel/configs/zhipuai_config.py +2 -3
- camel/embeddings/openai_embedding.py +2 -2
- camel/loaders/__init__.py +2 -0
- camel/loaders/chunkr_reader.py +163 -0
- camel/loaders/firecrawl_reader.py +3 -3
- camel/loaders/unstructured_io.py +35 -33
- camel/messages/__init__.py +1 -0
- camel/models/__init__.py +2 -4
- camel/models/anthropic_model.py +32 -26
- camel/models/azure_openai_model.py +39 -36
- camel/models/base_model.py +31 -20
- camel/models/gemini_model.py +37 -29
- camel/models/groq_model.py +29 -23
- camel/models/litellm_model.py +44 -61
- camel/models/mistral_model.py +32 -29
- camel/models/model_factory.py +66 -76
- camel/models/nemotron_model.py +33 -23
- camel/models/ollama_model.py +42 -47
- camel/models/{openai_compatibility_model.py → openai_compatible_model.py} +31 -49
- camel/models/openai_model.py +48 -29
- camel/models/reka_model.py +30 -28
- camel/models/samba_model.py +82 -177
- camel/models/stub_model.py +2 -2
- camel/models/togetherai_model.py +37 -43
- camel/models/vllm_model.py +43 -50
- camel/models/zhipuai_model.py +33 -27
- camel/retrievers/auto_retriever.py +29 -97
- camel/retrievers/vector_retriever.py +58 -47
- camel/societies/babyagi_playing.py +6 -3
- camel/societies/role_playing.py +5 -3
- camel/storages/graph_storages/graph_element.py +2 -2
- camel/storages/key_value_storages/json.py +6 -1
- camel/toolkits/__init__.py +20 -7
- camel/toolkits/arxiv_toolkit.py +155 -0
- camel/toolkits/ask_news_toolkit.py +653 -0
- camel/toolkits/base.py +2 -3
- camel/toolkits/code_execution.py +6 -7
- camel/toolkits/dalle_toolkit.py +6 -6
- camel/toolkits/{openai_function.py → function_tool.py} +34 -11
- camel/toolkits/github_toolkit.py +9 -10
- camel/toolkits/google_maps_toolkit.py +7 -7
- camel/toolkits/google_scholar_toolkit.py +146 -0
- camel/toolkits/linkedin_toolkit.py +7 -7
- camel/toolkits/math_toolkit.py +8 -8
- camel/toolkits/open_api_toolkit.py +5 -5
- camel/toolkits/reddit_toolkit.py +7 -7
- camel/toolkits/retrieval_toolkit.py +5 -5
- camel/toolkits/search_toolkit.py +9 -9
- camel/toolkits/slack_toolkit.py +11 -11
- camel/toolkits/twitter_toolkit.py +378 -452
- camel/toolkits/weather_toolkit.py +6 -6
- camel/toolkits/whatsapp_toolkit.py +177 -0
- camel/types/__init__.py +6 -1
- camel/types/enums.py +40 -85
- camel/types/openai_types.py +3 -0
- camel/types/unified_model_type.py +104 -0
- camel/utils/__init__.py +0 -2
- camel/utils/async_func.py +7 -7
- camel/utils/commons.py +32 -3
- camel/utils/token_counting.py +30 -212
- camel/workforce/role_playing_worker.py +1 -1
- camel/workforce/single_agent_worker.py +1 -1
- camel/workforce/task_channel.py +4 -3
- camel/workforce/workforce.py +4 -4
- camel_ai-0.2.4.dist-info/LICENSE +201 -0
- {camel_ai-0.2.3a1.dist-info → camel_ai-0.2.4.dist-info}/METADATA +27 -56
- {camel_ai-0.2.3a1.dist-info → camel_ai-0.2.4.dist-info}/RECORD +85 -76
- {camel_ai-0.2.3a1.dist-info → camel_ai-0.2.4.dist-info}/WHEEL +1 -1
- camel/bots/discord_bot.py +0 -206
- camel/models/open_source_model.py +0 -170
camel/models/vllm_model.py
CHANGED
|
@@ -17,58 +17,66 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
17
17
|
|
|
18
18
|
from openai import OpenAI, Stream
|
|
19
19
|
|
|
20
|
-
from camel.configs import VLLM_API_PARAMS
|
|
20
|
+
from camel.configs import VLLM_API_PARAMS, VLLMConfig
|
|
21
21
|
from camel.messages import OpenAIMessage
|
|
22
|
-
from camel.
|
|
22
|
+
from camel.models import BaseModelBackend
|
|
23
|
+
from camel.types import (
|
|
24
|
+
ChatCompletion,
|
|
25
|
+
ChatCompletionChunk,
|
|
26
|
+
ModelType,
|
|
27
|
+
)
|
|
23
28
|
from camel.utils import BaseTokenCounter, OpenAITokenCounter
|
|
24
29
|
|
|
25
30
|
|
|
26
31
|
# flake8: noqa: E501
|
|
27
|
-
class VLLMModel:
|
|
28
|
-
r"""vLLM service interface.
|
|
32
|
+
class VLLMModel(BaseModelBackend):
|
|
33
|
+
r"""vLLM service interface.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
model_type (Union[ModelType, str]): Model for which a backend is
|
|
37
|
+
created.
|
|
38
|
+
model_config_dict (Optional[Dict[str, Any]], optional): A dictionary
|
|
39
|
+
that will be fed into:obj:`openai.ChatCompletion.create()`. If
|
|
40
|
+
:obj:`None`, :obj:`VLLMConfig().as_dict()` will be used.
|
|
41
|
+
(default: :obj:`None`)
|
|
42
|
+
api_key (Optional[str], optional): The API key for authenticating with
|
|
43
|
+
the model service. vLLM doesn't need API key, it would be ignored
|
|
44
|
+
if set. (default: :obj:`None`)
|
|
45
|
+
url (Optional[str], optional): The url to the model service. If not
|
|
46
|
+
provided, :obj:`"http://localhost:8000/v1"` will be used.
|
|
47
|
+
(default: :obj:`None`)
|
|
48
|
+
token_counter (Optional[BaseTokenCounter], optional): Token counter to
|
|
49
|
+
use for the model. If not provided, :obj:`OpenAITokenCounter(
|
|
50
|
+
ModelType.GPT_4O_MINI)` will be used.
|
|
51
|
+
(default: :obj:`None`)
|
|
52
|
+
|
|
53
|
+
References:
|
|
54
|
+
https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html
|
|
55
|
+
"""
|
|
29
56
|
|
|
30
57
|
def __init__(
|
|
31
58
|
self,
|
|
32
|
-
model_type: str,
|
|
33
|
-
model_config_dict: Dict[str, Any],
|
|
34
|
-
url: Optional[str] = None,
|
|
59
|
+
model_type: Union[ModelType, str],
|
|
60
|
+
model_config_dict: Optional[Dict[str, Any]] = None,
|
|
35
61
|
api_key: Optional[str] = None,
|
|
62
|
+
url: Optional[str] = None,
|
|
36
63
|
token_counter: Optional[BaseTokenCounter] = None,
|
|
37
64
|
) -> None:
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
model_type (str): Model for which a backend is created.
|
|
44
|
-
model_config_dict (Dict[str, Any]): A dictionary that will
|
|
45
|
-
be fed into openai.ChatCompletion.create().
|
|
46
|
-
url (Optional[str]): The url to the model service. (default:
|
|
47
|
-
:obj:`"http://localhost:8000/v1"`)
|
|
48
|
-
api_key (Optional[str]): The API key for authenticating with the
|
|
49
|
-
model service.
|
|
50
|
-
token_counter (Optional[BaseTokenCounter]): Token counter to use
|
|
51
|
-
for the model. If not provided, `OpenAITokenCounter(ModelType.
|
|
52
|
-
GPT_4O_MINI)` will be used.
|
|
53
|
-
"""
|
|
54
|
-
self.model_type = model_type
|
|
55
|
-
self.model_config_dict = model_config_dict
|
|
56
|
-
self._url = (
|
|
57
|
-
url
|
|
58
|
-
or os.environ.get("VLLM_BASE_URL")
|
|
59
|
-
or "http://localhost:8000/v1"
|
|
65
|
+
if model_config_dict is None:
|
|
66
|
+
model_config_dict = VLLMConfig().as_dict()
|
|
67
|
+
url = url or os.environ.get("VLLM_BASE_URL")
|
|
68
|
+
super().__init__(
|
|
69
|
+
model_type, model_config_dict, api_key, url, token_counter
|
|
60
70
|
)
|
|
61
|
-
if not
|
|
71
|
+
if not self._url:
|
|
62
72
|
self._start_server()
|
|
63
73
|
# Use OpenAI cilent as interface call vLLM
|
|
64
74
|
self._client = OpenAI(
|
|
65
75
|
timeout=60,
|
|
66
76
|
max_retries=3,
|
|
77
|
+
api_key="Set-but-ignored", # required but ignored
|
|
67
78
|
base_url=self._url,
|
|
68
|
-
api_key=api_key,
|
|
69
79
|
)
|
|
70
|
-
self._token_counter = token_counter
|
|
71
|
-
self.check_model_config()
|
|
72
80
|
|
|
73
81
|
def _start_server(self) -> None:
|
|
74
82
|
r"""Starts the vllm server in a subprocess."""
|
|
@@ -78,8 +86,9 @@ class VLLMModel:
|
|
|
78
86
|
stdout=subprocess.PIPE,
|
|
79
87
|
stderr=subprocess.PIPE,
|
|
80
88
|
)
|
|
89
|
+
self._url = "http://localhost:8000/v1"
|
|
81
90
|
print(
|
|
82
|
-
f"vllm server started on
|
|
91
|
+
f"vllm server started on {self._url} "
|
|
83
92
|
f"for {self.model_type} model."
|
|
84
93
|
)
|
|
85
94
|
except Exception as e:
|
|
@@ -135,22 +144,6 @@ class VLLMModel:
|
|
|
135
144
|
)
|
|
136
145
|
return response
|
|
137
146
|
|
|
138
|
-
@property
|
|
139
|
-
def token_limit(self) -> int:
|
|
140
|
-
r"""Returns the maximum token limit for the given model.
|
|
141
|
-
|
|
142
|
-
Returns:
|
|
143
|
-
int: The maximum token limit for the given model.
|
|
144
|
-
"""
|
|
145
|
-
max_tokens = self.model_config_dict.get("max_tokens")
|
|
146
|
-
if isinstance(max_tokens, int):
|
|
147
|
-
return max_tokens
|
|
148
|
-
print(
|
|
149
|
-
"Must set `max_tokens` as an integer in `model_config_dict` when"
|
|
150
|
-
" setting up the model. Using 4096 as default value."
|
|
151
|
-
)
|
|
152
|
-
return 4096
|
|
153
|
-
|
|
154
147
|
@property
|
|
155
148
|
def stream(self) -> bool:
|
|
156
149
|
r"""Returns whether the model is in stream mode, which sends partial
|
camel/models/zhipuai_model.py
CHANGED
|
@@ -17,10 +17,14 @@ from typing import Any, Dict, List, Optional, Union
|
|
|
17
17
|
|
|
18
18
|
from openai import OpenAI, Stream
|
|
19
19
|
|
|
20
|
-
from camel.configs import ZHIPUAI_API_PARAMS
|
|
20
|
+
from camel.configs import ZHIPUAI_API_PARAMS, ZhipuAIConfig
|
|
21
21
|
from camel.messages import OpenAIMessage
|
|
22
22
|
from camel.models import BaseModelBackend
|
|
23
|
-
from camel.types import
|
|
23
|
+
from camel.types import (
|
|
24
|
+
ChatCompletion,
|
|
25
|
+
ChatCompletionChunk,
|
|
26
|
+
ModelType,
|
|
27
|
+
)
|
|
24
28
|
from camel.utils import (
|
|
25
29
|
BaseTokenCounter,
|
|
26
30
|
OpenAITokenCounter,
|
|
@@ -29,40 +33,42 @@ from camel.utils import (
|
|
|
29
33
|
|
|
30
34
|
|
|
31
35
|
class ZhipuAIModel(BaseModelBackend):
|
|
32
|
-
r"""ZhipuAI API in a unified BaseModelBackend interface.
|
|
36
|
+
r"""ZhipuAI API in a unified BaseModelBackend interface.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
model_type (Union[ModelType, str]): Model for which a backend is
|
|
40
|
+
created, one of GLM_* series.
|
|
41
|
+
model_config_dict (Optional[Dict[str, Any]], optional): A dictionary
|
|
42
|
+
that will be fed into:obj:`openai.ChatCompletion.create()`. If
|
|
43
|
+
:obj:`None`, :obj:`ZhipuAIConfig().as_dict()` will be used.
|
|
44
|
+
(default: :obj:`None`)
|
|
45
|
+
api_key (Optional[str], optional): The API key for authenticating with
|
|
46
|
+
the ZhipuAI service. (default: :obj:`None`)
|
|
47
|
+
url (Optional[str], optional): The url to the ZhipuAI service.
|
|
48
|
+
(default: :obj:`https://open.bigmodel.cn/api/paas/v4/`)
|
|
49
|
+
token_counter (Optional[BaseTokenCounter], optional): Token counter to
|
|
50
|
+
use for the model. If not provided, :obj:`OpenAITokenCounter(
|
|
51
|
+
ModelType.GPT_4O_MINI)` will be used.
|
|
52
|
+
(default: :obj:`None`)
|
|
53
|
+
"""
|
|
33
54
|
|
|
34
55
|
def __init__(
|
|
35
56
|
self,
|
|
36
|
-
model_type: ModelType,
|
|
37
|
-
model_config_dict: Dict[str, Any],
|
|
57
|
+
model_type: Union[ModelType, str],
|
|
58
|
+
model_config_dict: Optional[Dict[str, Any]] = None,
|
|
38
59
|
api_key: Optional[str] = None,
|
|
39
60
|
url: Optional[str] = None,
|
|
40
61
|
token_counter: Optional[BaseTokenCounter] = None,
|
|
41
62
|
) -> None:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
be fed into openai.ChatCompletion.create().
|
|
49
|
-
api_key (Optional[str]): The API key for authenticating with the
|
|
50
|
-
ZhipuAI service. (default: :obj:`None`)
|
|
51
|
-
url (Optional[str]): The url to the ZhipuAI service. (default:
|
|
52
|
-
:obj:`None`)
|
|
53
|
-
token_counter (Optional[BaseTokenCounter]): Token counter to use
|
|
54
|
-
for the model. If not provided, `OpenAITokenCounter(ModelType.
|
|
55
|
-
GPT_4O_MINI)` will be used.
|
|
56
|
-
"""
|
|
63
|
+
if model_config_dict is None:
|
|
64
|
+
model_config_dict = ZhipuAIConfig().as_dict()
|
|
65
|
+
api_key = api_key or os.environ.get("ZHIPUAI_API_KEY")
|
|
66
|
+
url = url or os.environ.get(
|
|
67
|
+
"ZHIPUAI_API_BASE_URL", "https://open.bigmodel.cn/api/paas/v4/"
|
|
68
|
+
)
|
|
57
69
|
super().__init__(
|
|
58
70
|
model_type, model_config_dict, api_key, url, token_counter
|
|
59
71
|
)
|
|
60
|
-
self._url = url or os.environ.get("ZHIPUAI_API_BASE_URL")
|
|
61
|
-
self._api_key = api_key or os.environ.get("ZHIPUAI_API_KEY")
|
|
62
|
-
if not self._url or not self._api_key:
|
|
63
|
-
raise ValueError(
|
|
64
|
-
"ZHIPUAI_API_BASE_URL and ZHIPUAI_API_KEY should be set."
|
|
65
|
-
)
|
|
66
72
|
self._client = OpenAI(
|
|
67
73
|
timeout=60,
|
|
68
74
|
max_retries=3,
|
|
@@ -90,7 +96,7 @@ class ZhipuAIModel(BaseModelBackend):
|
|
|
90
96
|
# Reference: https://open.bigmodel.cn/dev/api#openai_sdk
|
|
91
97
|
response = self._client.chat.completions.create(
|
|
92
98
|
messages=messages,
|
|
93
|
-
model=self.model_type
|
|
99
|
+
model=self.model_type,
|
|
94
100
|
**self.model_config_dict,
|
|
95
101
|
)
|
|
96
102
|
return response
|
|
@@ -11,10 +11,17 @@
|
|
|
11
11
|
# See the License for the specific language governing permissions and
|
|
12
12
|
# limitations under the License.
|
|
13
13
|
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
-
import datetime
|
|
15
|
-
import os
|
|
16
14
|
import re
|
|
17
|
-
|
|
15
|
+
import uuid
|
|
16
|
+
from typing import (
|
|
17
|
+
TYPE_CHECKING,
|
|
18
|
+
Collection,
|
|
19
|
+
List,
|
|
20
|
+
Optional,
|
|
21
|
+
Sequence,
|
|
22
|
+
Tuple,
|
|
23
|
+
Union,
|
|
24
|
+
)
|
|
18
25
|
|
|
19
26
|
from camel.embeddings import BaseEmbedding, OpenAIEmbedding
|
|
20
27
|
from camel.retrievers.vector_retriever import VectorRetriever
|
|
@@ -22,15 +29,12 @@ from camel.storages import (
|
|
|
22
29
|
BaseVectorStorage,
|
|
23
30
|
MilvusStorage,
|
|
24
31
|
QdrantStorage,
|
|
25
|
-
VectorDBQuery,
|
|
26
32
|
)
|
|
27
33
|
from camel.types import StorageType
|
|
28
34
|
from camel.utils import Constants
|
|
29
35
|
|
|
30
|
-
|
|
36
|
+
if TYPE_CHECKING:
|
|
31
37
|
from unstructured.documents.elements import Element
|
|
32
|
-
except ImportError:
|
|
33
|
-
Element = None
|
|
34
38
|
|
|
35
39
|
|
|
36
40
|
class AutoRetriever:
|
|
@@ -98,7 +102,9 @@ class AutoRetriever:
|
|
|
98
102
|
f"Unsupported vector storage type: {self.storage_type}"
|
|
99
103
|
)
|
|
100
104
|
|
|
101
|
-
def _collection_name_generator(
|
|
105
|
+
def _collection_name_generator(
|
|
106
|
+
self, content: Union[str, "Element"]
|
|
107
|
+
) -> str:
|
|
102
108
|
r"""Generates a valid collection name from a given file path or URL.
|
|
103
109
|
|
|
104
110
|
Args:
|
|
@@ -108,74 +114,19 @@ class AutoRetriever:
|
|
|
108
114
|
Returns:
|
|
109
115
|
str: A sanitized, valid collection name suitable for use.
|
|
110
116
|
"""
|
|
117
|
+
from unstructured.documents.elements import Element
|
|
111
118
|
|
|
112
119
|
if isinstance(content, Element):
|
|
113
|
-
content = content.metadata.file_directory
|
|
120
|
+
content = content.metadata.file_directory or str(uuid.uuid4())
|
|
114
121
|
|
|
115
122
|
collection_name = re.sub(r'[^a-zA-Z0-9]', '', content)[:20]
|
|
116
123
|
|
|
117
124
|
return collection_name
|
|
118
125
|
|
|
119
|
-
def _get_file_modified_date_from_file(
|
|
120
|
-
self, content_input_path: str
|
|
121
|
-
) -> str:
|
|
122
|
-
r"""Retrieves the last modified date and time of a given file. This
|
|
123
|
-
function takes a file path as input and returns the last modified date
|
|
124
|
-
and time of that file.
|
|
125
|
-
|
|
126
|
-
Args:
|
|
127
|
-
content_input_path (str): The file path of the content whose
|
|
128
|
-
modified date is to be retrieved.
|
|
129
|
-
|
|
130
|
-
Returns:
|
|
131
|
-
str: The last modified time from file.
|
|
132
|
-
"""
|
|
133
|
-
mod_time = os.path.getmtime(content_input_path)
|
|
134
|
-
readable_mod_time = datetime.datetime.fromtimestamp(
|
|
135
|
-
mod_time
|
|
136
|
-
).isoformat(timespec='seconds')
|
|
137
|
-
return readable_mod_time
|
|
138
|
-
|
|
139
|
-
def _get_file_modified_date_from_storage(
|
|
140
|
-
self, vector_storage_instance: BaseVectorStorage
|
|
141
|
-
) -> str:
|
|
142
|
-
r"""Retrieves the last modified date and time of a given file. This
|
|
143
|
-
function takes vector storage instance as input and returns the last
|
|
144
|
-
modified date from the metadata.
|
|
145
|
-
|
|
146
|
-
Args:
|
|
147
|
-
vector_storage_instance (BaseVectorStorage): The vector storage
|
|
148
|
-
where modified date is to be retrieved from metadata.
|
|
149
|
-
|
|
150
|
-
Returns:
|
|
151
|
-
str: The last modified date from vector storage.
|
|
152
|
-
"""
|
|
153
|
-
|
|
154
|
-
# Insert any query to get modified date from vector db
|
|
155
|
-
# NOTE: Can be optimized when CAMEL vector storage support
|
|
156
|
-
# direct chunk payload extraction
|
|
157
|
-
query_vector_any = self.embedding_model.embed(obj="any_query")
|
|
158
|
-
query_any = VectorDBQuery(query_vector_any, top_k=1)
|
|
159
|
-
result_any = vector_storage_instance.query(query_any)
|
|
160
|
-
|
|
161
|
-
# Extract the file's last modified date from the metadata
|
|
162
|
-
# in the query result
|
|
163
|
-
if result_any[0].record.payload is not None:
|
|
164
|
-
file_modified_date_from_meta = result_any[0].record.payload[
|
|
165
|
-
"metadata"
|
|
166
|
-
]['last_modified']
|
|
167
|
-
else:
|
|
168
|
-
raise ValueError(
|
|
169
|
-
"The vector storage exits but the payload is None,"
|
|
170
|
-
"please check the collection"
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
return file_modified_date_from_meta
|
|
174
|
-
|
|
175
126
|
def run_vector_retriever(
|
|
176
127
|
self,
|
|
177
128
|
query: str,
|
|
178
|
-
contents: Union[str, List[str], Element, List[Element]],
|
|
129
|
+
contents: Union[str, List[str], "Element", List["Element"]],
|
|
179
130
|
top_k: int = Constants.DEFAULT_TOP_K_RESULTS,
|
|
180
131
|
similarity_threshold: float = Constants.DEFAULT_SIMILARITY_THRESHOLD,
|
|
181
132
|
return_detailed_info: bool = False,
|
|
@@ -212,12 +163,20 @@ class AutoRetriever:
|
|
|
212
163
|
`contents` is empty.
|
|
213
164
|
RuntimeError: If any errors occur during the retrieve process.
|
|
214
165
|
"""
|
|
166
|
+
from unstructured.documents.elements import Element
|
|
167
|
+
|
|
215
168
|
if not contents:
|
|
216
169
|
raise ValueError("content cannot be empty.")
|
|
217
170
|
|
|
218
|
-
contents
|
|
219
|
-
|
|
220
|
-
|
|
171
|
+
# Normalize contents to a list
|
|
172
|
+
if isinstance(contents, str):
|
|
173
|
+
contents = [contents]
|
|
174
|
+
elif isinstance(contents, Element):
|
|
175
|
+
contents = [contents]
|
|
176
|
+
elif not isinstance(contents, list):
|
|
177
|
+
raise ValueError(
|
|
178
|
+
"contents must be a string, Element, or a list of them."
|
|
179
|
+
)
|
|
221
180
|
|
|
222
181
|
all_retrieved_info = []
|
|
223
182
|
for content in contents:
|
|
@@ -228,34 +187,7 @@ class AutoRetriever:
|
|
|
228
187
|
collection_name
|
|
229
188
|
)
|
|
230
189
|
|
|
231
|
-
|
|
232
|
-
# for local path since no standard way for remote url
|
|
233
|
-
file_is_modified = False # initialize with a default value
|
|
234
|
-
if (
|
|
235
|
-
vector_storage_instance.status().vector_count != 0
|
|
236
|
-
and isinstance(content, str)
|
|
237
|
-
and os.path.exists(content)
|
|
238
|
-
):
|
|
239
|
-
# Get original modified date from file
|
|
240
|
-
modified_date_from_file = (
|
|
241
|
-
self._get_file_modified_date_from_file(content)
|
|
242
|
-
)
|
|
243
|
-
# Get modified date from vector storage
|
|
244
|
-
modified_date_from_storage = (
|
|
245
|
-
self._get_file_modified_date_from_storage(
|
|
246
|
-
vector_storage_instance
|
|
247
|
-
)
|
|
248
|
-
)
|
|
249
|
-
# Determine if the file has been modified since the last
|
|
250
|
-
# check
|
|
251
|
-
file_is_modified = (
|
|
252
|
-
modified_date_from_file != modified_date_from_storage
|
|
253
|
-
)
|
|
254
|
-
|
|
255
|
-
if (
|
|
256
|
-
vector_storage_instance.status().vector_count == 0
|
|
257
|
-
or file_is_modified
|
|
258
|
-
):
|
|
190
|
+
if vector_storage_instance.status().vector_count == 0:
|
|
259
191
|
# Clear the vector storage
|
|
260
192
|
vector_storage_instance.clear()
|
|
261
193
|
# Process and store the content to the vector storage
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
import os
|
|
15
15
|
import warnings
|
|
16
16
|
from io import IOBase
|
|
17
|
-
from typing import Any, Dict, List, Optional, Union
|
|
17
|
+
from typing import IO, TYPE_CHECKING, Any, Dict, List, Optional, Union
|
|
18
18
|
from urllib.parse import urlparse
|
|
19
19
|
|
|
20
20
|
from camel.embeddings import BaseEmbedding, OpenAIEmbedding
|
|
@@ -28,10 +28,8 @@ from camel.storages import (
|
|
|
28
28
|
)
|
|
29
29
|
from camel.utils import Constants
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
if TYPE_CHECKING:
|
|
32
32
|
from unstructured.documents.elements import Element
|
|
33
|
-
except ImportError:
|
|
34
|
-
Element = None
|
|
35
33
|
|
|
36
34
|
|
|
37
35
|
class VectorRetriever(BaseRetriever):
|
|
@@ -73,7 +71,7 @@ class VectorRetriever(BaseRetriever):
|
|
|
73
71
|
|
|
74
72
|
def process(
|
|
75
73
|
self,
|
|
76
|
-
content: Union[str, Element,
|
|
74
|
+
content: Union[str, "Element", IO[bytes]],
|
|
77
75
|
chunk_type: str = "chunk_by_title",
|
|
78
76
|
max_characters: int = 500,
|
|
79
77
|
embed_batch: int = 50,
|
|
@@ -86,7 +84,7 @@ class VectorRetriever(BaseRetriever):
|
|
|
86
84
|
specified vector storage.
|
|
87
85
|
|
|
88
86
|
Args:
|
|
89
|
-
content (Union[str, Element,
|
|
87
|
+
content (Union[str, Element, IO[bytes]]): Local file path, remote
|
|
90
88
|
URL, string content, Element object, or a binary file object.
|
|
91
89
|
chunk_type (str): Type of chunking going to apply. Defaults to
|
|
92
90
|
"chunk_by_title".
|
|
@@ -97,67 +95,73 @@ class VectorRetriever(BaseRetriever):
|
|
|
97
95
|
otherwise skip chunking. Defaults to True.
|
|
98
96
|
**kwargs (Any): Additional keyword arguments for content parsing.
|
|
99
97
|
"""
|
|
98
|
+
from unstructured.documents.elements import Element
|
|
99
|
+
|
|
100
100
|
if isinstance(content, Element):
|
|
101
101
|
elements = [content]
|
|
102
102
|
elif isinstance(content, IOBase):
|
|
103
103
|
elements = self.uio.parse_bytes(file=content, **kwargs) or []
|
|
104
|
-
|
|
104
|
+
elif isinstance(content, str):
|
|
105
105
|
# Check if the content is URL
|
|
106
106
|
parsed_url = urlparse(content)
|
|
107
107
|
is_url = all([parsed_url.scheme, parsed_url.netloc])
|
|
108
108
|
if is_url or os.path.exists(content):
|
|
109
|
-
elements =
|
|
109
|
+
elements = (
|
|
110
|
+
self.uio.parse_file_or_url(input_path=content, **kwargs)
|
|
111
|
+
or []
|
|
112
|
+
)
|
|
110
113
|
else:
|
|
111
114
|
elements = [self.uio.create_element_from_text(text=content)]
|
|
115
|
+
|
|
112
116
|
if not elements:
|
|
113
117
|
warnings.warn(
|
|
114
118
|
f"No elements were extracted from the content: {content}"
|
|
115
119
|
)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
120
|
+
else:
|
|
121
|
+
# Chunk the content if required
|
|
122
|
+
chunks = (
|
|
123
|
+
self.uio.chunk_elements(
|
|
124
|
+
chunk_type=chunk_type,
|
|
125
|
+
elements=elements,
|
|
126
|
+
max_characters=max_characters,
|
|
127
|
+
)
|
|
128
|
+
if should_chunk
|
|
129
|
+
else elements
|
|
124
130
|
)
|
|
125
|
-
if should_chunk
|
|
126
|
-
else elements
|
|
127
|
-
)
|
|
128
131
|
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
132
|
+
# Process chunks in batches and store embeddings
|
|
133
|
+
for i in range(0, len(chunks), embed_batch):
|
|
134
|
+
batch_chunks = chunks[i : i + embed_batch]
|
|
135
|
+
batch_vectors = self.embedding_model.embed_list(
|
|
136
|
+
objs=[str(chunk) for chunk in batch_chunks]
|
|
137
|
+
)
|
|
135
138
|
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
139
|
+
records = []
|
|
140
|
+
# Prepare the payload for each vector record, includes the
|
|
141
|
+
# content path, chunk metadata, and chunk text
|
|
142
|
+
for vector, chunk in zip(batch_vectors, batch_chunks):
|
|
143
|
+
if isinstance(content, str):
|
|
144
|
+
content_path_info = {"content path": content}
|
|
145
|
+
elif isinstance(content, IOBase):
|
|
146
|
+
content_path_info = {"content path": "From file bytes"}
|
|
147
|
+
elif isinstance(content, Element):
|
|
148
|
+
content_path_info = {
|
|
149
|
+
"content path": content.metadata.file_directory
|
|
150
|
+
or ""
|
|
151
|
+
}
|
|
152
|
+
chunk_metadata = {"metadata": chunk.metadata.to_dict()}
|
|
153
|
+
chunk_text = {"text": str(chunk)}
|
|
154
|
+
combined_dict = {
|
|
155
|
+
**content_path_info,
|
|
156
|
+
**chunk_metadata,
|
|
157
|
+
**chunk_text,
|
|
147
158
|
}
|
|
148
|
-
chunk_metadata = {"metadata": chunk.metadata.to_dict()}
|
|
149
|
-
chunk_text = {"text": str(chunk)}
|
|
150
|
-
combined_dict = {
|
|
151
|
-
**content_path_info,
|
|
152
|
-
**chunk_metadata,
|
|
153
|
-
**chunk_text,
|
|
154
|
-
}
|
|
155
159
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
160
|
+
records.append(
|
|
161
|
+
VectorRecord(vector=vector, payload=combined_dict)
|
|
162
|
+
)
|
|
159
163
|
|
|
160
|
-
|
|
164
|
+
self.storage.add(records=records)
|
|
161
165
|
|
|
162
166
|
def query(
|
|
163
167
|
self,
|
|
@@ -195,6 +199,13 @@ class VectorRetriever(BaseRetriever):
|
|
|
195
199
|
db_query = VectorDBQuery(query_vector=query_vector, top_k=top_k)
|
|
196
200
|
query_results = self.storage.query(query=db_query)
|
|
197
201
|
|
|
202
|
+
# If no results found, raise an error
|
|
203
|
+
if not query_results:
|
|
204
|
+
raise ValueError(
|
|
205
|
+
"Query result is empty, please check if "
|
|
206
|
+
"the vector storage is empty."
|
|
207
|
+
)
|
|
208
|
+
|
|
198
209
|
if query_results[0].record.payload is None:
|
|
199
210
|
raise ValueError(
|
|
200
211
|
"Payload of vector storage is None, please check the "
|
|
@@ -106,7 +106,7 @@ class BabyAGI:
|
|
|
106
106
|
)
|
|
107
107
|
|
|
108
108
|
self.assistant_agent: ChatAgent
|
|
109
|
-
self.assistant_sys_msg: BaseMessage
|
|
109
|
+
self.assistant_sys_msg: Optional[BaseMessage]
|
|
110
110
|
self.task_creation_agent: TaskCreationAgent
|
|
111
111
|
self.task_prioritization_agent: TaskPrioritizationAgent
|
|
112
112
|
self.init_agents(
|
|
@@ -202,7 +202,8 @@ class BabyAGI:
|
|
|
202
202
|
|
|
203
203
|
self.task_creation_agent = TaskCreationAgent(
|
|
204
204
|
objective=self.specified_task_prompt,
|
|
205
|
-
role_name=self.assistant_sys_msg
|
|
205
|
+
role_name=getattr(self.assistant_sys_msg, 'role_name', None)
|
|
206
|
+
or "assistant",
|
|
206
207
|
output_language=output_language,
|
|
207
208
|
message_window_size=message_window_size,
|
|
208
209
|
**(task_creation_agent_kwargs or {}),
|
|
@@ -238,7 +239,9 @@ class BabyAGI:
|
|
|
238
239
|
|
|
239
240
|
task_name = self.subtasks.popleft()
|
|
240
241
|
assistant_msg_msg = BaseMessage.make_user_message(
|
|
241
|
-
role_name=self.assistant_sys_msg
|
|
242
|
+
role_name=getattr(self.assistant_sys_msg, 'role_name', None)
|
|
243
|
+
or "assistant",
|
|
244
|
+
content=f"{task_name}",
|
|
242
245
|
)
|
|
243
246
|
|
|
244
247
|
assistant_response = self.assistant_agent.step(assistant_msg_msg)
|
camel/societies/role_playing.py
CHANGED
|
@@ -149,8 +149,8 @@ class RolePlaying:
|
|
|
149
149
|
|
|
150
150
|
self.assistant_agent: ChatAgent
|
|
151
151
|
self.user_agent: ChatAgent
|
|
152
|
-
self.assistant_sys_msg: BaseMessage
|
|
153
|
-
self.user_sys_msg: BaseMessage
|
|
152
|
+
self.assistant_sys_msg: Optional[BaseMessage]
|
|
153
|
+
self.user_sys_msg: Optional[BaseMessage]
|
|
154
154
|
self._init_agents(
|
|
155
155
|
init_assistant_sys_msg,
|
|
156
156
|
init_user_sys_msg,
|
|
@@ -454,9 +454,11 @@ class RolePlaying:
|
|
|
454
454
|
)
|
|
455
455
|
if init_msg_content is None:
|
|
456
456
|
init_msg_content = default_init_msg_content
|
|
457
|
+
|
|
457
458
|
# Initialize a message sent by the assistant
|
|
458
459
|
init_msg = BaseMessage.make_assistant_message(
|
|
459
|
-
role_name=self.assistant_sys_msg
|
|
460
|
+
role_name=getattr(self.assistant_sys_msg, 'role_name', None)
|
|
461
|
+
or "assistant",
|
|
460
462
|
content=init_msg_content,
|
|
461
463
|
)
|
|
462
464
|
|
|
@@ -20,7 +20,7 @@ from pydantic import BaseModel, ConfigDict, Field
|
|
|
20
20
|
try:
|
|
21
21
|
from unstructured.documents.elements import Element
|
|
22
22
|
except ImportError:
|
|
23
|
-
Element = None
|
|
23
|
+
Element = None # type:ignore[misc,assignment]
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
class Node(BaseModel):
|
|
@@ -73,6 +73,6 @@ class GraphElement(BaseModel):
|
|
|
73
73
|
source: Element
|
|
74
74
|
|
|
75
75
|
def __post_init__(self):
|
|
76
|
-
if Element
|
|
76
|
+
if "Element" not in globals():
|
|
77
77
|
raise ImportError("""The 'unstructured' package is required to use
|
|
78
78
|
the 'source' attribute.""")
|
|
@@ -18,7 +18,12 @@ from pathlib import Path
|
|
|
18
18
|
from typing import Any, ClassVar, Dict, List, Optional
|
|
19
19
|
|
|
20
20
|
from camel.storages.key_value_storages import BaseKeyValueStorage
|
|
21
|
-
from camel.types import
|
|
21
|
+
from camel.types import (
|
|
22
|
+
ModelType,
|
|
23
|
+
OpenAIBackendRole,
|
|
24
|
+
RoleType,
|
|
25
|
+
TaskType,
|
|
26
|
+
)
|
|
22
27
|
|
|
23
28
|
|
|
24
29
|
class _CamelJSONEncoder(json.JSONEncoder):
|