camel-ai 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of camel-ai might be problematic. Click here for more details.
- camel/__init__.py +1 -11
- camel/agents/__init__.py +5 -5
- camel/agents/chat_agent.py +124 -63
- camel/agents/critic_agent.py +28 -17
- camel/agents/deductive_reasoner_agent.py +235 -0
- camel/agents/embodied_agent.py +92 -40
- camel/agents/role_assignment_agent.py +27 -17
- camel/agents/task_agent.py +60 -34
- camel/agents/tool_agents/base.py +0 -1
- camel/agents/tool_agents/hugging_face_tool_agent.py +7 -4
- camel/configs.py +119 -7
- camel/embeddings/__init__.py +2 -0
- camel/embeddings/base.py +3 -2
- camel/embeddings/openai_embedding.py +3 -3
- camel/embeddings/sentence_transformers_embeddings.py +65 -0
- camel/functions/__init__.py +13 -3
- camel/functions/google_maps_function.py +335 -0
- camel/functions/math_functions.py +7 -7
- camel/functions/openai_function.py +344 -42
- camel/functions/search_functions.py +100 -35
- camel/functions/twitter_function.py +484 -0
- camel/functions/weather_functions.py +36 -23
- camel/generators.py +65 -46
- camel/human.py +17 -11
- camel/interpreters/__init__.py +25 -0
- camel/interpreters/base.py +49 -0
- camel/{utils/python_interpreter.py → interpreters/internal_python_interpreter.py} +129 -48
- camel/interpreters/interpreter_error.py +19 -0
- camel/interpreters/subprocess_interpreter.py +190 -0
- camel/loaders/__init__.py +22 -0
- camel/{functions/base_io_functions.py → loaders/base_io.py} +38 -35
- camel/{functions/unstructured_io_fuctions.py → loaders/unstructured_io.py} +199 -110
- camel/memories/__init__.py +17 -7
- camel/memories/agent_memories.py +156 -0
- camel/memories/base.py +97 -32
- camel/memories/blocks/__init__.py +21 -0
- camel/memories/{chat_history_memory.py → blocks/chat_history_block.py} +34 -34
- camel/memories/blocks/vectordb_block.py +101 -0
- camel/memories/context_creators/__init__.py +3 -2
- camel/memories/context_creators/score_based.py +32 -20
- camel/memories/records.py +6 -5
- camel/messages/__init__.py +2 -2
- camel/messages/base.py +99 -16
- camel/messages/func_message.py +7 -4
- camel/models/__init__.py +4 -2
- camel/models/anthropic_model.py +132 -0
- camel/models/base_model.py +3 -2
- camel/models/model_factory.py +10 -8
- camel/models/open_source_model.py +25 -13
- camel/models/openai_model.py +9 -10
- camel/models/stub_model.py +6 -5
- camel/prompts/__init__.py +7 -5
- camel/prompts/ai_society.py +21 -14
- camel/prompts/base.py +54 -47
- camel/prompts/code.py +22 -14
- camel/prompts/evaluation.py +8 -5
- camel/prompts/misalignment.py +26 -19
- camel/prompts/object_recognition.py +35 -0
- camel/prompts/prompt_templates.py +14 -8
- camel/prompts/role_description_prompt_template.py +16 -10
- camel/prompts/solution_extraction.py +9 -5
- camel/prompts/task_prompt_template.py +24 -21
- camel/prompts/translation.py +9 -5
- camel/responses/agent_responses.py +5 -2
- camel/retrievers/__init__.py +24 -0
- camel/retrievers/auto_retriever.py +319 -0
- camel/retrievers/base.py +64 -0
- camel/retrievers/bm25_retriever.py +149 -0
- camel/retrievers/vector_retriever.py +166 -0
- camel/societies/__init__.py +1 -1
- camel/societies/babyagi_playing.py +56 -32
- camel/societies/role_playing.py +188 -133
- camel/storages/__init__.py +18 -0
- camel/storages/graph_storages/__init__.py +23 -0
- camel/storages/graph_storages/base.py +82 -0
- camel/storages/graph_storages/graph_element.py +74 -0
- camel/storages/graph_storages/neo4j_graph.py +582 -0
- camel/storages/key_value_storages/base.py +1 -2
- camel/storages/key_value_storages/in_memory.py +1 -2
- camel/storages/key_value_storages/json.py +8 -13
- camel/storages/vectordb_storages/__init__.py +33 -0
- camel/storages/vectordb_storages/base.py +202 -0
- camel/storages/vectordb_storages/milvus.py +396 -0
- camel/storages/vectordb_storages/qdrant.py +371 -0
- camel/terminators/__init__.py +1 -1
- camel/terminators/base.py +2 -3
- camel/terminators/response_terminator.py +21 -12
- camel/terminators/token_limit_terminator.py +5 -3
- camel/types/__init__.py +12 -6
- camel/types/enums.py +86 -13
- camel/types/openai_types.py +10 -5
- camel/utils/__init__.py +18 -13
- camel/utils/commons.py +242 -81
- camel/utils/token_counting.py +135 -15
- {camel_ai-0.1.1.dist-info → camel_ai-0.1.3.dist-info}/METADATA +116 -74
- camel_ai-0.1.3.dist-info/RECORD +101 -0
- {camel_ai-0.1.1.dist-info → camel_ai-0.1.3.dist-info}/WHEEL +1 -1
- camel/memories/context_creators/base.py +0 -72
- camel_ai-0.1.1.dist-info/RECORD +0 -75
|
@@ -13,29 +13,24 @@
|
|
|
13
13
|
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
|
+
from enum import EnumMeta
|
|
16
17
|
from pathlib import Path
|
|
17
|
-
from typing import Any, Dict, List, Optional
|
|
18
|
+
from typing import Any, ClassVar, Dict, List, Optional
|
|
18
19
|
|
|
19
20
|
from camel.storages.key_value_storages import BaseKeyValueStorage
|
|
20
|
-
from camel.types import
|
|
21
|
-
ModelType,
|
|
22
|
-
OpenAIBackendRole,
|
|
23
|
-
RoleType,
|
|
24
|
-
TaskType,
|
|
25
|
-
VectorDistance,
|
|
26
|
-
)
|
|
21
|
+
from camel.types import ModelType, OpenAIBackendRole, RoleType, TaskType
|
|
27
22
|
|
|
28
23
|
|
|
29
24
|
class _CamelJSONEncoder(json.JSONEncoder):
|
|
30
25
|
r"""A custom JSON encoder for serializing specifically enumerated types.
|
|
31
26
|
Ensures enumerated types can be stored in and retrieved from JSON format.
|
|
32
27
|
"""
|
|
33
|
-
|
|
28
|
+
|
|
29
|
+
CAMEL_ENUMS: ClassVar[Dict[str, EnumMeta]] = {
|
|
34
30
|
"RoleType": RoleType,
|
|
35
31
|
"TaskType": TaskType,
|
|
36
32
|
"ModelType": ModelType,
|
|
37
33
|
"OpenAIBackendRole": OpenAIBackendRole,
|
|
38
|
-
"VectorDistance": VectorDistance,
|
|
39
34
|
}
|
|
40
35
|
|
|
41
36
|
def default(self, obj) -> Any:
|
|
@@ -75,7 +70,8 @@ class JsonStorage(BaseKeyValueStorage):
|
|
|
75
70
|
"""
|
|
76
71
|
with self.json_path.open("a") as f:
|
|
77
72
|
f.writelines(
|
|
78
|
-
[json.dumps(r, cls=_CamelJSONEncoder) + "\n" for r in records]
|
|
73
|
+
[json.dumps(r, cls=_CamelJSONEncoder) + "\n" for r in records]
|
|
74
|
+
)
|
|
79
75
|
|
|
80
76
|
def load(self) -> List[Dict[str, Any]]:
|
|
81
77
|
r"""Loads all stored records from the key-value storage system.
|
|
@@ -91,7 +87,6 @@ class JsonStorage(BaseKeyValueStorage):
|
|
|
91
87
|
]
|
|
92
88
|
|
|
93
89
|
def clear(self) -> None:
|
|
94
|
-
r"""Removes all records from the key-value storage system.
|
|
95
|
-
"""
|
|
90
|
+
r"""Removes all records from the key-value storage system."""
|
|
96
91
|
with self.json_path.open("w"):
|
|
97
92
|
pass
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
from .base import (
|
|
16
|
+
BaseVectorStorage,
|
|
17
|
+
VectorDBQuery,
|
|
18
|
+
VectorDBQueryResult,
|
|
19
|
+
VectorDBStatus,
|
|
20
|
+
VectorRecord,
|
|
21
|
+
)
|
|
22
|
+
from .milvus import MilvusStorage
|
|
23
|
+
from .qdrant import QdrantStorage
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
'BaseVectorStorage',
|
|
27
|
+
'VectorDBQuery',
|
|
28
|
+
'VectorDBQueryResult',
|
|
29
|
+
'QdrantStorage',
|
|
30
|
+
'MilvusStorage',
|
|
31
|
+
'VectorRecord',
|
|
32
|
+
'VectorDBStatus',
|
|
33
|
+
]
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
from typing import Any, Dict, List, Optional
|
|
18
|
+
from uuid import uuid4
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class VectorRecord:
|
|
23
|
+
r"""Encapsulates information about a vector's unique identifier and its
|
|
24
|
+
payload, which is primarily used as a data transfer object when saving
|
|
25
|
+
to vector storage.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
vector (List[float]): The numerical representation of the vector.
|
|
29
|
+
id (str, optional): A unique identifier for the vector. If not
|
|
30
|
+
provided, an random uuid will be assigned.
|
|
31
|
+
payload (Optional[Dict[str, Any]], optional): Any additional metadata
|
|
32
|
+
or information related to the vector. (default: :obj:`None`)
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
vector: List[float]
|
|
36
|
+
id: str = field(default_factory=lambda: str(uuid4()))
|
|
37
|
+
payload: Optional[Dict[str, Any]] = None
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass
|
|
41
|
+
class VectorDBQuery:
|
|
42
|
+
r"""Represents a query to a vector database.
|
|
43
|
+
|
|
44
|
+
Attributes:
|
|
45
|
+
query_vector (List[float]): The numerical representation of the query
|
|
46
|
+
vector.
|
|
47
|
+
top_k (int, optional): The number of top similar vectors to retrieve
|
|
48
|
+
from the database. (default: :obj:`1`)
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
query_vector: List[float]
|
|
52
|
+
top_k: int = 1
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass
|
|
56
|
+
class VectorDBQueryResult:
|
|
57
|
+
r"""Encapsulates the result of a query against a vector database.
|
|
58
|
+
|
|
59
|
+
Attributes:
|
|
60
|
+
record (VectorRecord): The target vector record.
|
|
61
|
+
similarity (float): The similarity score between the query vector and
|
|
62
|
+
the record.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
record: VectorRecord
|
|
66
|
+
similarity: float
|
|
67
|
+
|
|
68
|
+
@classmethod
|
|
69
|
+
def construct(
|
|
70
|
+
cls,
|
|
71
|
+
similarity: float,
|
|
72
|
+
vector: List[float],
|
|
73
|
+
id: str,
|
|
74
|
+
payload: Optional[Dict[str, Any]] = None,
|
|
75
|
+
) -> "VectorDBQueryResult":
|
|
76
|
+
r"""A class method to construct a `VectorDBQueryResult` instance."""
|
|
77
|
+
return cls(
|
|
78
|
+
record=VectorRecord(vector, id, payload),
|
|
79
|
+
similarity=similarity,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class VectorDBStatus:
|
|
85
|
+
r"""Vector database status.
|
|
86
|
+
|
|
87
|
+
Attributes:
|
|
88
|
+
vector_dim (int): The dimention of stored vectors.
|
|
89
|
+
vector_count (int): The number of stored vectors.
|
|
90
|
+
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
vector_dim: int
|
|
94
|
+
vector_count: int
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
class BaseVectorStorage(ABC):
|
|
98
|
+
r"""An abstract base class for vector storage systems."""
|
|
99
|
+
|
|
100
|
+
@abstractmethod
|
|
101
|
+
def add(
|
|
102
|
+
self,
|
|
103
|
+
records: List[VectorRecord],
|
|
104
|
+
**kwargs: Any,
|
|
105
|
+
) -> None:
|
|
106
|
+
r"""Saves a list of vector records to the storage.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
records (List[VectorRecord]): List of vector records to be saved.
|
|
110
|
+
**kwargs (Any): Additional keyword arguments.
|
|
111
|
+
|
|
112
|
+
Raises:
|
|
113
|
+
RuntimeError: If there is an error during the saving process.
|
|
114
|
+
"""
|
|
115
|
+
pass
|
|
116
|
+
|
|
117
|
+
@abstractmethod
|
|
118
|
+
def delete(
|
|
119
|
+
self,
|
|
120
|
+
ids: List[str],
|
|
121
|
+
**kwargs: Any,
|
|
122
|
+
) -> None:
|
|
123
|
+
r"""Deletes a list of vectors identified by their IDs from the storage.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
ids (List[str]): List of unique identifiers for the vectors to be
|
|
127
|
+
deleted.
|
|
128
|
+
**kwargs (Any): Additional keyword arguments.
|
|
129
|
+
|
|
130
|
+
Raises:
|
|
131
|
+
RuntimeError: If there is an error during the deletion process.
|
|
132
|
+
"""
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
@abstractmethod
|
|
136
|
+
def status(self) -> VectorDBStatus:
|
|
137
|
+
r"""Returns status of the vector database.
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
VectorDBStatus: The vector database status.
|
|
141
|
+
"""
|
|
142
|
+
pass
|
|
143
|
+
|
|
144
|
+
@abstractmethod
|
|
145
|
+
def query(
|
|
146
|
+
self,
|
|
147
|
+
query: VectorDBQuery,
|
|
148
|
+
**kwargs: Any,
|
|
149
|
+
) -> List[VectorDBQueryResult]:
|
|
150
|
+
r"""Searches for similar vectors in the storage based on the provided query.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
query (VectorDBQuery): The query object containing the search
|
|
154
|
+
vector and the number of top similar vectors to retrieve.
|
|
155
|
+
**kwargs (Any): Additional keyword arguments.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
List[VectorDBQueryResult]: A list of vectors retrieved from the
|
|
159
|
+
storage based on similarity to the query vector.
|
|
160
|
+
"""
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
@abstractmethod
|
|
164
|
+
def clear(self) -> None:
|
|
165
|
+
r"""Remove all vectors from the storage."""
|
|
166
|
+
pass
|
|
167
|
+
|
|
168
|
+
@abstractmethod
|
|
169
|
+
def load(self) -> None:
|
|
170
|
+
r"""Load the collection hosted on cloud service."""
|
|
171
|
+
pass
|
|
172
|
+
|
|
173
|
+
@property
|
|
174
|
+
@abstractmethod
|
|
175
|
+
def client(self) -> Any:
|
|
176
|
+
r"""Provides access to the underlying vector database client."""
|
|
177
|
+
pass
|
|
178
|
+
|
|
179
|
+
def get_payloads_by_vector(
|
|
180
|
+
self,
|
|
181
|
+
vector: List[float],
|
|
182
|
+
top_k: int,
|
|
183
|
+
) -> List[Dict[str, Any]]:
|
|
184
|
+
r"""Returns payloads of top k vector records that closest to the given
|
|
185
|
+
vector.
|
|
186
|
+
|
|
187
|
+
This function is a wrapper of `BaseVectorStorage.query`.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
vector (List[float]): The search vector.
|
|
191
|
+
top_k (int): The number of top similer vectors.
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
List[List[Dict[str, Any]]]: A list of vector payloads retrieved
|
|
195
|
+
from the storage based on similarity to the query vector.
|
|
196
|
+
"""
|
|
197
|
+
results = self.query(VectorDBQuery(vector, top_k))
|
|
198
|
+
return [
|
|
199
|
+
result.record.payload
|
|
200
|
+
for result in results
|
|
201
|
+
if result.record.payload is not None
|
|
202
|
+
]
|
|
@@ -0,0 +1,396 @@
|
|
|
1
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
2
|
+
# Licensed under the Apache License, Version 2.0 (the “License”);
|
|
3
|
+
# you may not use this file except in compliance with the License.
|
|
4
|
+
# You may obtain a copy of the License at
|
|
5
|
+
#
|
|
6
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
+
#
|
|
8
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
9
|
+
# distributed under the License is distributed on an “AS IS” BASIS,
|
|
10
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
11
|
+
# See the License for the specific language governing permissions and
|
|
12
|
+
# limitations under the License.
|
|
13
|
+
# =========== Copyright 2023 @ CAMEL-AI.org. All Rights Reserved. ===========
|
|
14
|
+
import logging
|
|
15
|
+
import re
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from typing import Any, Dict, List, Optional, Tuple
|
|
18
|
+
|
|
19
|
+
from camel.storages.vectordb_storages import (
|
|
20
|
+
BaseVectorStorage,
|
|
21
|
+
VectorDBQuery,
|
|
22
|
+
VectorDBQueryResult,
|
|
23
|
+
VectorDBStatus,
|
|
24
|
+
VectorRecord,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class MilvusStorage(BaseVectorStorage):
|
|
31
|
+
r"""An implementation of the `BaseVectorStorage` for interacting with
|
|
32
|
+
Milvus, a cloud-native vector search engine.
|
|
33
|
+
|
|
34
|
+
The detailed information about Milvus is available at:
|
|
35
|
+
`Milvus <https://milvus.io/docs/overview.md/>`_
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
vector_dim (int): The dimenstion of storing vectors.
|
|
39
|
+
url_and_api_key (Tuple[str, str]): Tuple containing
|
|
40
|
+
the URL and API key for connecting to a remote Milvus instance.
|
|
41
|
+
URL maps to Milvus uri concept, typically "endpoint:port".
|
|
42
|
+
API key maps to Milvus token concept, for self-hosted it's
|
|
43
|
+
"username:pwd", for Zilliz Cloud (fully-managed Milvus) it's API
|
|
44
|
+
Key.
|
|
45
|
+
collection_name (Optional[str], optional): Name for the collection in
|
|
46
|
+
the Milvus. If not provided, set it to the current time with iso
|
|
47
|
+
format. (default: :obj:`None`)
|
|
48
|
+
**kwargs (Any): Additional keyword arguments for initializing
|
|
49
|
+
`MilvusClient`.
|
|
50
|
+
|
|
51
|
+
Raises:
|
|
52
|
+
ImportError: If `pymilvus` package is not installed.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
vector_dim: int,
|
|
58
|
+
url_and_api_key: Tuple[str, str],
|
|
59
|
+
collection_name: Optional[str] = None,
|
|
60
|
+
**kwargs: Any,
|
|
61
|
+
) -> None:
|
|
62
|
+
try:
|
|
63
|
+
from pymilvus import MilvusClient
|
|
64
|
+
except ImportError as exc:
|
|
65
|
+
raise ImportError(
|
|
66
|
+
"Please install `pymilvus` first. You can install it by "
|
|
67
|
+
"running `pip install pymilvus`."
|
|
68
|
+
) from exc
|
|
69
|
+
|
|
70
|
+
self._client: MilvusClient
|
|
71
|
+
self._create_client(url_and_api_key, **kwargs)
|
|
72
|
+
self.vector_dim = vector_dim
|
|
73
|
+
self.collection_name = (
|
|
74
|
+
collection_name or self._generate_collection_name()
|
|
75
|
+
)
|
|
76
|
+
self._check_and_create_collection()
|
|
77
|
+
|
|
78
|
+
def _create_client(
|
|
79
|
+
self,
|
|
80
|
+
url_and_api_key: Tuple[str, str],
|
|
81
|
+
**kwargs: Any,
|
|
82
|
+
) -> None:
|
|
83
|
+
r"""Initializes the Milvus client with the provided connection details.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
url_and_api_key (Tuple[str, str]): The URL and API key for the
|
|
87
|
+
Milvus server.
|
|
88
|
+
**kwargs: Additional keyword arguments passed to the Milvus client.
|
|
89
|
+
"""
|
|
90
|
+
from pymilvus import MilvusClient
|
|
91
|
+
|
|
92
|
+
self._client = MilvusClient(
|
|
93
|
+
uri=url_and_api_key[0],
|
|
94
|
+
token=url_and_api_key[1],
|
|
95
|
+
**kwargs,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
def _check_and_create_collection(self) -> None:
|
|
99
|
+
r"""Checks if the specified collection exists in Milvus and creates it
|
|
100
|
+
if it doesn't, ensuring it matches the specified vector dimensionality.
|
|
101
|
+
"""
|
|
102
|
+
if self._collection_exists(self.collection_name):
|
|
103
|
+
in_dim = self._get_collection_info(self.collection_name)[
|
|
104
|
+
"vector_dim"
|
|
105
|
+
]
|
|
106
|
+
if in_dim != self.vector_dim:
|
|
107
|
+
# The name of collection has to be confirmed by the user
|
|
108
|
+
raise ValueError(
|
|
109
|
+
"Vector dimension of the existing collection "
|
|
110
|
+
f'"{self.collection_name}" ({in_dim}) is different from '
|
|
111
|
+
f"the given embedding dim ({self.vector_dim})."
|
|
112
|
+
)
|
|
113
|
+
else:
|
|
114
|
+
self._create_collection(
|
|
115
|
+
collection_name=self.collection_name,
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
def _create_collection(
|
|
119
|
+
self,
|
|
120
|
+
collection_name: str,
|
|
121
|
+
**kwargs: Any,
|
|
122
|
+
) -> None:
|
|
123
|
+
r"""Creates a new collection in the database.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
collection_name (str): Name of the collection to be created.
|
|
127
|
+
**kwargs (Any): Additional keyword arguments pass to create
|
|
128
|
+
collection.
|
|
129
|
+
"""
|
|
130
|
+
|
|
131
|
+
from pymilvus import DataType
|
|
132
|
+
|
|
133
|
+
# Set the schema
|
|
134
|
+
schema = self._client.create_schema(
|
|
135
|
+
auto_id=False,
|
|
136
|
+
enable_dynamic_field=True,
|
|
137
|
+
description='collection schema',
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
schema.add_field(
|
|
141
|
+
field_name="id",
|
|
142
|
+
datatype=DataType.VARCHAR,
|
|
143
|
+
descrition='A unique identifier for the vector',
|
|
144
|
+
is_primary=True,
|
|
145
|
+
max_length=65535,
|
|
146
|
+
)
|
|
147
|
+
# max_length reference: https://milvus.io/docs/limitations.md
|
|
148
|
+
schema.add_field(
|
|
149
|
+
field_name="vector",
|
|
150
|
+
datatype=DataType.FLOAT_VECTOR,
|
|
151
|
+
description='The numerical representation of the vector',
|
|
152
|
+
dim=self.vector_dim,
|
|
153
|
+
)
|
|
154
|
+
schema.add_field(
|
|
155
|
+
field_name="payload",
|
|
156
|
+
datatype=DataType.JSON,
|
|
157
|
+
description=(
|
|
158
|
+
'Any additional metadata or information related' 'to the vector'
|
|
159
|
+
),
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
# Create the collection
|
|
163
|
+
self._client.create_collection(
|
|
164
|
+
collection_name=collection_name,
|
|
165
|
+
schema=schema,
|
|
166
|
+
**kwargs,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Set the index of the parameters
|
|
170
|
+
index_params = self._client.prepare_index_params()
|
|
171
|
+
|
|
172
|
+
index_params.add_index(
|
|
173
|
+
field_name="vector",
|
|
174
|
+
metric_type="COSINE",
|
|
175
|
+
index_type="AUTOINDEX",
|
|
176
|
+
index_name="vector_index",
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
self._client.create_index(
|
|
180
|
+
collection_name=collection_name, index_params=index_params
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
def _delete_collection(
|
|
184
|
+
self,
|
|
185
|
+
collection_name: str,
|
|
186
|
+
) -> None:
|
|
187
|
+
r"""Deletes an existing collection from the database.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
collection (str): Name of the collection to be deleted.
|
|
191
|
+
"""
|
|
192
|
+
self._client.drop_collection(collection_name=collection_name)
|
|
193
|
+
|
|
194
|
+
def _collection_exists(self, collection_name: str) -> bool:
|
|
195
|
+
r"""Checks whether a collection with the specified name exists in the
|
|
196
|
+
database.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
collection_name (str): The name of the collection to check.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
bool: True if the collection exists, False otherwise.
|
|
203
|
+
"""
|
|
204
|
+
return self._client.has_collection(collection_name)
|
|
205
|
+
|
|
206
|
+
def _generate_collection_name(self) -> str:
|
|
207
|
+
r"""Generates a unique name for a new collection based on the current
|
|
208
|
+
timestamp. Milvus collection names can only contain alphanumeric
|
|
209
|
+
characters and underscores.
|
|
210
|
+
|
|
211
|
+
Returns:
|
|
212
|
+
str: A unique, valid collection name.
|
|
213
|
+
"""
|
|
214
|
+
timestamp = datetime.now().isoformat()
|
|
215
|
+
transformed_name = re.sub(r'[^a-zA-Z0-9_]', '_', timestamp)
|
|
216
|
+
valid_name = "Time" + transformed_name
|
|
217
|
+
return valid_name
|
|
218
|
+
|
|
219
|
+
def _get_collection_info(self, collection_name: str) -> Dict[str, Any]:
|
|
220
|
+
r"""Retrieves details of an existing collection.
|
|
221
|
+
|
|
222
|
+
Args:
|
|
223
|
+
collection_name (str): Name of the collection to be checked.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
Dict[str, Any]: A dictionary containing details about the
|
|
227
|
+
collection.
|
|
228
|
+
"""
|
|
229
|
+
vector_count = self._client.get_collection_stats(collection_name)[
|
|
230
|
+
'row_count'
|
|
231
|
+
]
|
|
232
|
+
collection_info = self._client.describe_collection(collection_name)
|
|
233
|
+
collection_id = collection_info['collection_id']
|
|
234
|
+
|
|
235
|
+
dim_value = next(
|
|
236
|
+
(
|
|
237
|
+
field['params']['dim']
|
|
238
|
+
for field in collection_info['fields']
|
|
239
|
+
if field['description']
|
|
240
|
+
== 'The numerical representation of the vector'
|
|
241
|
+
),
|
|
242
|
+
None,
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
return {
|
|
246
|
+
"id": collection_id, # the id of the collection
|
|
247
|
+
"vector_count": vector_count, # the number of the vector
|
|
248
|
+
"vector_dim": dim_value, # the dimension of the vector
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
def _validate_and_convert_vectors(
|
|
252
|
+
self, records: List[VectorRecord]
|
|
253
|
+
) -> List[dict]:
|
|
254
|
+
r"""Validates and converts VectorRecord instances to the format
|
|
255
|
+
expected by Milvus.
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
records (List[VectorRecord]): List of vector records to validate
|
|
259
|
+
and convert.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
List[dict]: A list of dictionaries formatted for Milvus insertion.
|
|
263
|
+
"""
|
|
264
|
+
|
|
265
|
+
validated_data = []
|
|
266
|
+
|
|
267
|
+
for record in records:
|
|
268
|
+
record_dict = {
|
|
269
|
+
"id": record.id,
|
|
270
|
+
"payload": record.payload if record.payload is not None else '',
|
|
271
|
+
"vector": record.vector,
|
|
272
|
+
}
|
|
273
|
+
validated_data.append(record_dict)
|
|
274
|
+
|
|
275
|
+
return validated_data
|
|
276
|
+
|
|
277
|
+
def add(
|
|
278
|
+
self,
|
|
279
|
+
records: List[VectorRecord],
|
|
280
|
+
**kwargs,
|
|
281
|
+
) -> None:
|
|
282
|
+
r"""Adds a list of vectors to the specified collection.
|
|
283
|
+
|
|
284
|
+
Args:
|
|
285
|
+
records (List[VectorRecord]): List of vectors to be added.
|
|
286
|
+
**kwargs (Any): Additional keyword arguments pass to insert.
|
|
287
|
+
|
|
288
|
+
Raises:
|
|
289
|
+
RuntimeError: If there was an error in the addition process.
|
|
290
|
+
"""
|
|
291
|
+
validated_records = self._validate_and_convert_vectors(records)
|
|
292
|
+
|
|
293
|
+
op_info = self._client.insert(
|
|
294
|
+
collection_name=self.collection_name,
|
|
295
|
+
data=validated_records,
|
|
296
|
+
**kwargs,
|
|
297
|
+
)
|
|
298
|
+
logger.debug(f"Successfully added vectors in Milvus: {op_info}")
|
|
299
|
+
|
|
300
|
+
def delete(
|
|
301
|
+
self,
|
|
302
|
+
ids: List[str],
|
|
303
|
+
**kwargs: Any,
|
|
304
|
+
) -> None:
|
|
305
|
+
r"""Deletes a list of vectors identified by their IDs from the
|
|
306
|
+
storage. If unsure of ids you can first query the collection to grab
|
|
307
|
+
the corresponding data.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
ids (List[str]): List of unique identifiers for the vectors to be
|
|
311
|
+
deleted.
|
|
312
|
+
**kwargs (Any): Additional keyword arguments passed to delete.
|
|
313
|
+
|
|
314
|
+
Raises:
|
|
315
|
+
RuntimeError: If there is an error during the deletion process.
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
op_info = self._client.delete(
|
|
319
|
+
collection_name=self.collection_name, pks=ids, **kwargs
|
|
320
|
+
)
|
|
321
|
+
logger.debug(f"Successfully deleted vectors in Milvus: {op_info}")
|
|
322
|
+
|
|
323
|
+
def status(self) -> VectorDBStatus:
|
|
324
|
+
r"""Retrieves the current status of the Milvus collection. This method
|
|
325
|
+
provides information about the collection, including its vector
|
|
326
|
+
dimensionality and the total number of vectors stored.
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
VectorDBStatus: An object containing information about the
|
|
330
|
+
collection's status.
|
|
331
|
+
"""
|
|
332
|
+
status = self._get_collection_info(self.collection_name)
|
|
333
|
+
return VectorDBStatus(
|
|
334
|
+
vector_dim=status["vector_dim"],
|
|
335
|
+
vector_count=status["vector_count"],
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
def query(
|
|
339
|
+
self,
|
|
340
|
+
query: VectorDBQuery,
|
|
341
|
+
**kwargs: Any,
|
|
342
|
+
) -> List[VectorDBQueryResult]:
|
|
343
|
+
r"""Searches for similar vectors in the storage based on the provided
|
|
344
|
+
query.
|
|
345
|
+
|
|
346
|
+
Args:
|
|
347
|
+
query (VectorDBQuery): The query object containing the search
|
|
348
|
+
vector and the number of top similar vectors to retrieve.
|
|
349
|
+
**kwargs (Any): Additional keyword arguments passed to search.
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
List[VectorDBQueryResult]: A list of vectors retrieved from the
|
|
353
|
+
storage based on similarity to the query vector.
|
|
354
|
+
"""
|
|
355
|
+
search_result = self._client.search(
|
|
356
|
+
collection_name=self.collection_name,
|
|
357
|
+
data=[query.query_vector],
|
|
358
|
+
limit=query.top_k,
|
|
359
|
+
output_fields=['vector', 'payload'],
|
|
360
|
+
**kwargs,
|
|
361
|
+
)
|
|
362
|
+
query_results = []
|
|
363
|
+
for point in search_result:
|
|
364
|
+
query_results.append(
|
|
365
|
+
VectorDBQueryResult.construct(
|
|
366
|
+
similarity=(point[0]['distance']),
|
|
367
|
+
id=str(point[0]['id']),
|
|
368
|
+
payload=(point[0]['entity'].get('payload')),
|
|
369
|
+
vector=point[0]['entity'].get('vector'),
|
|
370
|
+
)
|
|
371
|
+
)
|
|
372
|
+
|
|
373
|
+
return query_results
|
|
374
|
+
|
|
375
|
+
def clear(self) -> None:
|
|
376
|
+
r"""Removes all vectors from the Milvus collection. This method
|
|
377
|
+
deletes the existing collection and then recreates it with the same
|
|
378
|
+
schema to effectively remove all stored vectors.
|
|
379
|
+
"""
|
|
380
|
+
self._delete_collection(self.collection_name)
|
|
381
|
+
self._create_collection(collection_name=self.collection_name)
|
|
382
|
+
|
|
383
|
+
def load(self) -> None:
|
|
384
|
+
r"""Load the collection hosted on cloud service."""
|
|
385
|
+
self._client.load_collection(self.collection_name)
|
|
386
|
+
|
|
387
|
+
@property
|
|
388
|
+
def client(self) -> Any:
|
|
389
|
+
r"""Provides direct access to the Milvus client. This property allows
|
|
390
|
+
for direct interactions with the Milvus client for operations that are
|
|
391
|
+
not covered by the `MilvusStorage` class.
|
|
392
|
+
|
|
393
|
+
Returns:
|
|
394
|
+
Any: The Milvus client instance.
|
|
395
|
+
"""
|
|
396
|
+
return self._client
|