crewplus 0.2.47__tar.gz → 0.2.51__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crewplus might be problematic. Click here for more details.
- {crewplus-0.2.47 → crewplus-0.2.51}/PKG-INFO +11 -6
- {crewplus-0.2.47 → crewplus-0.2.51}/README.md +10 -5
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/vectorstores/milvus/milvus_schema_manager.py +77 -31
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/vectorstores/milvus/schema_milvus.py +24 -1
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/vectorstores/milvus/vdb_service.py +191 -8
- {crewplus-0.2.47 → crewplus-0.2.51}/pyproject.toml +1 -1
- {crewplus-0.2.47 → crewplus-0.2.51}/LICENSE +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/__init__.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/callbacks/__init__.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/callbacks/async_langfuse_handler.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/services/__init__.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/services/azure_chat_model.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/services/gemini_chat_model.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/services/init_services.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/services/model_load_balancer.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/services/tracing_manager.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/utils/__init__.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/utils/schema_action.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/utils/schema_document_updater.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/crewplus/vectorstores/milvus/__init__.py +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/docs/GeminiChatModel.md +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/docs/ModelLoadBalancer.md +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/docs/VDBService.md +0 -0
- {crewplus-0.2.47 → crewplus-0.2.51}/docs/index.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: crewplus
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.51
|
|
4
4
|
Summary: Base services for CrewPlus AI applications
|
|
5
5
|
Author-Email: Tim Liu <tim@opsmateai.com>
|
|
6
6
|
License: MIT
|
|
@@ -116,6 +116,11 @@ crewplus-base/ # GitHub repo name
|
|
|
116
116
|
|
|
117
117
|
```
|
|
118
118
|
|
|
119
|
+
## Version Update
|
|
120
|
+
|
|
121
|
+
0.2.50
|
|
122
|
+
Add async aget_vector_store to enable async vector search
|
|
123
|
+
|
|
119
124
|
## Deploy to PyPI
|
|
120
125
|
|
|
121
126
|
Clean Previous Build Artifacts:
|
|
@@ -123,17 +128,17 @@ Remove the dist/, build/, and *.egg-info/ directories to ensure that no old file
|
|
|
123
128
|
|
|
124
129
|
rm -rf dist build *.egg-info
|
|
125
130
|
|
|
126
|
-
|
|
131
|
+
### install deployment tool
|
|
127
132
|
pip install twine
|
|
128
133
|
|
|
129
|
-
|
|
134
|
+
### build package
|
|
130
135
|
python -m build
|
|
131
136
|
|
|
132
|
-
|
|
137
|
+
### deploy to TestPyPI (Test first)
|
|
133
138
|
python -m twine upload --repository testpypi dist/*
|
|
134
139
|
|
|
135
|
-
|
|
140
|
+
### install from TestPyPI
|
|
136
141
|
pip install -i https://test.pypi.org/simple/ crewplus
|
|
137
142
|
|
|
138
|
-
|
|
143
|
+
### Deploy to official PyPI
|
|
139
144
|
python -m twine upload dist/*
|
|
@@ -96,6 +96,11 @@ crewplus-base/ # GitHub repo name
|
|
|
96
96
|
|
|
97
97
|
```
|
|
98
98
|
|
|
99
|
+
## Version Update
|
|
100
|
+
|
|
101
|
+
0.2.50
|
|
102
|
+
Add async aget_vector_store to enable async vector search
|
|
103
|
+
|
|
99
104
|
## Deploy to PyPI
|
|
100
105
|
|
|
101
106
|
Clean Previous Build Artifacts:
|
|
@@ -103,17 +108,17 @@ Remove the dist/, build/, and *.egg-info/ directories to ensure that no old file
|
|
|
103
108
|
|
|
104
109
|
rm -rf dist build *.egg-info
|
|
105
110
|
|
|
106
|
-
|
|
111
|
+
### install deployment tool
|
|
107
112
|
pip install twine
|
|
108
113
|
|
|
109
|
-
|
|
114
|
+
### build package
|
|
110
115
|
python -m build
|
|
111
116
|
|
|
112
|
-
|
|
117
|
+
### deploy to TestPyPI (Test first)
|
|
113
118
|
python -m twine upload --repository testpypi dist/*
|
|
114
119
|
|
|
115
|
-
|
|
120
|
+
### install from TestPyPI
|
|
116
121
|
pip install -i https://test.pypi.org/simple/ crewplus
|
|
117
122
|
|
|
118
|
-
|
|
123
|
+
### Deploy to official PyPI
|
|
119
124
|
python -m twine upload dist/*
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from pymilvus import DataType, MilvusClient
|
|
1
|
+
from pymilvus import DataType, MilvusClient, AsyncMilvusClient
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
from typing import Any
|
|
@@ -11,17 +11,19 @@ class MilvusSchemaManager:
|
|
|
11
11
|
and index parameters based on a JSON definition. It interacts with a
|
|
12
12
|
MilvusClient instance to perform these operations.
|
|
13
13
|
"""
|
|
14
|
-
def __init__(self, client: MilvusClient, logger=None):
|
|
14
|
+
def __init__(self, client: MilvusClient = None, async_client: AsyncMilvusClient = None, logger=None):
|
|
15
15
|
"""
|
|
16
16
|
Initializes the MilvusSchemaManager.
|
|
17
17
|
|
|
18
18
|
Args:
|
|
19
19
|
client (MilvusClient): An instance of the Milvus client.
|
|
20
|
+
async_client (AsyncMilvusClient): An instance of the async Milvus client.
|
|
20
21
|
logger (logging.Logger, optional): A logger instance. If not provided,
|
|
21
22
|
a default logger will be created.
|
|
22
23
|
Defaults to None.
|
|
23
24
|
"""
|
|
24
25
|
self.client = client
|
|
26
|
+
self.async_client = async_client
|
|
25
27
|
self.logger = logger or logging.getLogger(__name__)
|
|
26
28
|
|
|
27
29
|
def bind_client(self, client: MilvusClient):
|
|
@@ -33,6 +35,15 @@ class MilvusSchemaManager:
|
|
|
33
35
|
"""
|
|
34
36
|
self.client = client
|
|
35
37
|
|
|
38
|
+
def bind_async_client(self, async_client: AsyncMilvusClient):
|
|
39
|
+
"""
|
|
40
|
+
Binds a new AsyncMilvusClient instance to the manager.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
async_client (AsyncMilvusClient): The Milvus async client instance to use.
|
|
44
|
+
"""
|
|
45
|
+
self.async_client = async_client
|
|
46
|
+
|
|
36
47
|
def _add_array_field(self, schema, field_name, field_info):
|
|
37
48
|
"""
|
|
38
49
|
Adds an ARRAY field to the schema based on field information.
|
|
@@ -82,23 +93,8 @@ class MilvusSchemaManager:
|
|
|
82
93
|
|
|
83
94
|
schema.add_field(**field_args)
|
|
84
95
|
|
|
85
|
-
def
|
|
86
|
-
"""
|
|
87
|
-
Creates a Milvus collection schema from a JSON string.
|
|
88
|
-
|
|
89
|
-
Args:
|
|
90
|
-
json_schema (str): A JSON string defining the schema.
|
|
91
|
-
|
|
92
|
-
Returns:
|
|
93
|
-
A Milvus schema object.
|
|
94
|
-
|
|
95
|
-
Raises:
|
|
96
|
-
ValueError: If an unknown field type is encountered in the schema.
|
|
97
|
-
"""
|
|
98
|
-
schema_data = json.loads(json_schema)
|
|
96
|
+
def _build_collection_schema_from_dict(self, schema, schema_data):
|
|
99
97
|
fields = schema_data['node_types']['Document']['properties']
|
|
100
|
-
|
|
101
|
-
schema = self.client.create_schema(auto_id=False, enable_dynamic_fields=True)
|
|
102
98
|
for field_name, field_info in fields.items():
|
|
103
99
|
field_type = field_info['type']
|
|
104
100
|
if field_type == "STRING" or field_type == "VARCHAR" or field_type == "TEXT":
|
|
@@ -123,28 +119,36 @@ class MilvusSchemaManager:
|
|
|
123
119
|
schema.add_field(field_name=field_name, datatype=DataType.FLOAT_VECTOR, dim=dim)
|
|
124
120
|
else:
|
|
125
121
|
raise ValueError(f"Unknown field type: {field_type}")
|
|
126
|
-
|
|
127
122
|
return schema
|
|
128
123
|
|
|
129
|
-
def
|
|
124
|
+
def create_collection_schema(self, json_schema: str):
|
|
130
125
|
"""
|
|
131
|
-
Creates
|
|
132
|
-
|
|
133
|
-
This method defines indexes based on the 'indexes' section of the schema
|
|
134
|
-
and automatically creates an 'AUTOINDEX' for any FLOAT_VECTOR fields.
|
|
126
|
+
Creates a Milvus collection schema from a JSON string.
|
|
135
127
|
|
|
136
128
|
Args:
|
|
137
|
-
json_schema (str): A JSON string defining the schema
|
|
129
|
+
json_schema (str): A JSON string defining the schema.
|
|
138
130
|
|
|
139
131
|
Returns:
|
|
140
|
-
Milvus
|
|
132
|
+
A Milvus schema object.
|
|
133
|
+
|
|
134
|
+
Raises:
|
|
135
|
+
ValueError: If an unknown field type is encountered in the schema.
|
|
141
136
|
"""
|
|
142
137
|
schema_data = json.loads(json_schema)
|
|
138
|
+
schema = self.client.create_schema(auto_id=False, enable_dynamic_fields=True)
|
|
139
|
+
return self._build_collection_schema_from_dict(schema, schema_data)
|
|
140
|
+
|
|
141
|
+
async def acreate_collection_schema(self, json_schema: str):
|
|
142
|
+
"""
|
|
143
|
+
Asynchronously creates a Milvus collection schema from a JSON string.
|
|
144
|
+
"""
|
|
145
|
+
schema_data = json.loads(json_schema)
|
|
146
|
+
schema = AsyncMilvusClient.create_schema(auto_id=False, enable_dynamic_fields=True)
|
|
147
|
+
return self._build_collection_schema_from_dict(schema, schema_data)
|
|
148
|
+
|
|
149
|
+
def _build_index_params_from_dict(self, index_params, schema_data):
|
|
143
150
|
fields = schema_data['node_types']['Document']['properties']
|
|
144
|
-
|
|
145
|
-
index_params = self.client.prepare_index_params()
|
|
146
151
|
|
|
147
|
-
# Check if 'indexes' key exists
|
|
148
152
|
if 'indexes' in schema_data['node_types']['Document']:
|
|
149
153
|
indexes = schema_data['node_types']['Document']['indexes']
|
|
150
154
|
for index_name, index_details in indexes.items():
|
|
@@ -158,7 +162,6 @@ class MilvusSchemaManager:
|
|
|
158
162
|
params=params
|
|
159
163
|
)
|
|
160
164
|
|
|
161
|
-
# Automatic indexing for FLOAT_VECTOR fields
|
|
162
165
|
for field_name, field_info in fields.items():
|
|
163
166
|
if field_info['type'] == "FLOAT_VECTOR":
|
|
164
167
|
index_params.add_index(
|
|
@@ -167,9 +170,33 @@ class MilvusSchemaManager:
|
|
|
167
170
|
index_type="AUTOINDEX",
|
|
168
171
|
metric_type="L2"
|
|
169
172
|
)
|
|
170
|
-
|
|
171
173
|
return index_params
|
|
172
174
|
|
|
175
|
+
def create_index_params(self, json_schema: str):
|
|
176
|
+
"""
|
|
177
|
+
Creates index parameters from a JSON schema string.
|
|
178
|
+
|
|
179
|
+
This method defines indexes based on the 'indexes' section of the schema
|
|
180
|
+
and automatically creates an 'AUTOINDEX' for any FLOAT_VECTOR fields.
|
|
181
|
+
|
|
182
|
+
Args:
|
|
183
|
+
json_schema (str): A JSON string defining the schema and indexes.
|
|
184
|
+
|
|
185
|
+
Returns:
|
|
186
|
+
Milvus index parameters object.
|
|
187
|
+
"""
|
|
188
|
+
schema_data = json.loads(json_schema)
|
|
189
|
+
index_params = self.client.prepare_index_params()
|
|
190
|
+
return self._build_index_params_from_dict(index_params, schema_data)
|
|
191
|
+
|
|
192
|
+
async def acreate_index_params(self, json_schema: str):
|
|
193
|
+
"""
|
|
194
|
+
Asynchronously creates index parameters from a JSON schema string.
|
|
195
|
+
"""
|
|
196
|
+
schema_data = json.loads(json_schema)
|
|
197
|
+
index_params = AsyncMilvusClient.prepare_index_params()
|
|
198
|
+
return self._build_index_params_from_dict(index_params, schema_data)
|
|
199
|
+
|
|
173
200
|
def create_collection(self, collection_name: str, json_schema: str):
|
|
174
201
|
"""
|
|
175
202
|
Creates a new collection in Milvus.
|
|
@@ -182,6 +209,8 @@ class MilvusSchemaManager:
|
|
|
182
209
|
json_schema (str): The JSON string defining the collection's schema
|
|
183
210
|
and indexes.
|
|
184
211
|
"""
|
|
212
|
+
if not self.client:
|
|
213
|
+
raise ValueError("Synchronous client not provided for create_collection.")
|
|
185
214
|
schema = self.create_collection_schema(json_schema)
|
|
186
215
|
index_params = self.create_index_params(json_schema)
|
|
187
216
|
|
|
@@ -192,6 +221,23 @@ class MilvusSchemaManager:
|
|
|
192
221
|
enable_dynamic_fields=True # we need to enable dynamic fields for schema updates
|
|
193
222
|
)
|
|
194
223
|
|
|
224
|
+
async def acreate_collection(self, collection_name: str, json_schema: str):
|
|
225
|
+
"""
|
|
226
|
+
Asynchronously creates a new collection in Milvus.
|
|
227
|
+
"""
|
|
228
|
+
if not self.async_client:
|
|
229
|
+
raise ValueError("Asynchronous client not provided for acreate_collection.")
|
|
230
|
+
|
|
231
|
+
schema = await self.acreate_collection_schema(json_schema)
|
|
232
|
+
index_params = await self.acreate_index_params(json_schema)
|
|
233
|
+
|
|
234
|
+
await self.async_client.create_collection(
|
|
235
|
+
collection_name=collection_name,
|
|
236
|
+
schema=schema,
|
|
237
|
+
index_params=index_params,
|
|
238
|
+
enable_dynamic_fields=True
|
|
239
|
+
)
|
|
240
|
+
|
|
195
241
|
def validate_schema(self, json_schema: str) -> bool:
|
|
196
242
|
"""
|
|
197
243
|
Validates the given schema by attempting to create a collection schema and index params.
|
|
@@ -91,7 +91,7 @@ class SchemaMilvus(Milvus):
|
|
|
91
91
|
)
|
|
92
92
|
self.logger = logger or logging.getLogger(__name__)
|
|
93
93
|
self.collection_schema = None
|
|
94
|
-
self.schema_manager = MilvusSchemaManager(client=self.client)
|
|
94
|
+
self.schema_manager = MilvusSchemaManager(client=self.client, async_client=self.aclient)
|
|
95
95
|
|
|
96
96
|
def set_schema(self, schema: str):
|
|
97
97
|
"""
|
|
@@ -149,6 +149,29 @@ class SchemaMilvus(Milvus):
|
|
|
149
149
|
self.logger.error(f"Failed to create collection: {e}")
|
|
150
150
|
return False
|
|
151
151
|
|
|
152
|
+
async def acreate_collection(self) -> bool:
|
|
153
|
+
"""
|
|
154
|
+
Asynchronously validates the schema and creates the collection using the MilvusSchemaManager.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
bool: True if the collection is successfully created, False otherwise.
|
|
158
|
+
"""
|
|
159
|
+
if self.collection_schema is None:
|
|
160
|
+
self.logger.error("Collection schema is not set. Please set a schema using set_schema().")
|
|
161
|
+
return False
|
|
162
|
+
|
|
163
|
+
self.schema_manager.bind_async_client(self.aclient)
|
|
164
|
+
if not self.schema_manager.validate_schema(self.collection_schema):
|
|
165
|
+
self.logger.error("Failed to validate schema")
|
|
166
|
+
return False
|
|
167
|
+
try:
|
|
168
|
+
await self.schema_manager.acreate_collection(self.collection_name, self.collection_schema)
|
|
169
|
+
self.logger.info(f"Collection {self.collection_name} created successfully")
|
|
170
|
+
return True
|
|
171
|
+
except Exception as e:
|
|
172
|
+
self.logger.error(f"Failed to create collection asynchronously: {e}")
|
|
173
|
+
return False
|
|
174
|
+
|
|
152
175
|
def drop_collection(self, collection_name: Optional[str] = None) -> bool:
|
|
153
176
|
"""
|
|
154
177
|
Drops the collection using the Milvus client.
|
|
@@ -2,18 +2,20 @@
|
|
|
2
2
|
# @Author: Cursor
|
|
3
3
|
# @Date: 2025-02-12
|
|
4
4
|
# @Last Modified by: Gemini
|
|
5
|
-
# @Last Modified time: 2025-
|
|
5
|
+
# @Last Modified time: 2025-10-09
|
|
6
6
|
|
|
7
7
|
import logging
|
|
8
8
|
from typing import List, Dict, Union, Optional
|
|
9
9
|
from langchain_milvus import Milvus
|
|
10
10
|
from langchain_core.embeddings import Embeddings
|
|
11
11
|
from langchain_openai import AzureOpenAIEmbeddings
|
|
12
|
-
from pymilvus import MilvusClient
|
|
12
|
+
from pymilvus import MilvusClient, AsyncMilvusClient
|
|
13
13
|
import time
|
|
14
|
+
import asyncio
|
|
14
15
|
|
|
15
16
|
from ...services.init_services import get_model_balancer
|
|
16
17
|
from .schema_milvus import SchemaMilvus, DEFAULT_SCHEMA
|
|
18
|
+
from .milvus_schema_manager import MilvusSchemaManager
|
|
17
19
|
|
|
18
20
|
class VDBService(object):
|
|
19
21
|
"""
|
|
@@ -84,6 +86,7 @@ class VDBService(object):
|
|
|
84
86
|
>>> assert vector_store is same_vector_store
|
|
85
87
|
"""
|
|
86
88
|
_client: MilvusClient
|
|
89
|
+
_async_client: AsyncMilvusClient
|
|
87
90
|
_instances: Dict[str, Milvus] = {}
|
|
88
91
|
|
|
89
92
|
schema: str
|
|
@@ -111,6 +114,7 @@ class VDBService(object):
|
|
|
111
114
|
logger (logging.Logger, optional): Logger instance. Defaults to None.
|
|
112
115
|
"""
|
|
113
116
|
self.logger = logger or logging.getLogger(__name__)
|
|
117
|
+
self.collection_schema = None
|
|
114
118
|
|
|
115
119
|
if settings:
|
|
116
120
|
self.settings = settings
|
|
@@ -143,17 +147,19 @@ class VDBService(object):
|
|
|
143
147
|
raise ValueError(msg)
|
|
144
148
|
|
|
145
149
|
self._client = self._initialize_milvus_client(provider)
|
|
150
|
+
self._async_client = self._initialize_async_milvus_client(provider)
|
|
146
151
|
|
|
147
152
|
self.schema = schema
|
|
148
153
|
self.index_params = self.settings.get("index_params")
|
|
149
154
|
|
|
155
|
+
self.schema_manager = MilvusSchemaManager(client=self._client, async_client=self._async_client)
|
|
156
|
+
|
|
150
157
|
self.logger.info("VDBService initialized successfully")
|
|
151
158
|
|
|
152
|
-
def
|
|
159
|
+
def _get_milvus_client_args(self, provider: str) -> dict:
|
|
153
160
|
"""
|
|
154
|
-
|
|
161
|
+
Constructs the arguments for Milvus/AsyncMilvus client initialization based on the provider.
|
|
155
162
|
"""
|
|
156
|
-
client_args = {}
|
|
157
163
|
if provider == "milvus":
|
|
158
164
|
host = self.connection_args.get("host", "localhost")
|
|
159
165
|
port = self.connection_args.get("port", 19530)
|
|
@@ -168,15 +174,20 @@ class VDBService(object):
|
|
|
168
174
|
"password": self.connection_args.get("password"),
|
|
169
175
|
"db_name": self.connection_args.get("db_name")
|
|
170
176
|
}
|
|
171
|
-
|
|
172
|
-
client_args = {k: v for k, v in client_args.items() if v is not None}
|
|
177
|
+
return {k: v for k, v in client_args.items() if v is not None}
|
|
173
178
|
|
|
174
179
|
elif provider == "zilliz":
|
|
175
|
-
|
|
180
|
+
return self.connection_args
|
|
176
181
|
else:
|
|
177
182
|
self.logger.error(f"Unsupported vector store provider: {provider}")
|
|
178
183
|
raise NotImplementedError(f"Vector store provider '{provider}' is not supported.")
|
|
179
184
|
|
|
185
|
+
def _initialize_milvus_client(self, provider: str) -> MilvusClient:
|
|
186
|
+
"""
|
|
187
|
+
Initializes and returns a MilvusClient with a retry mechanism.
|
|
188
|
+
"""
|
|
189
|
+
client_args = self._get_milvus_client_args(provider)
|
|
190
|
+
|
|
180
191
|
try:
|
|
181
192
|
# First attempt to connect
|
|
182
193
|
return MilvusClient(**client_args)
|
|
@@ -189,6 +200,22 @@ class VDBService(object):
|
|
|
189
200
|
self.logger.error(f"Failed to initialize MilvusClient on retry. Final error: {e_retry}")
|
|
190
201
|
raise RuntimeError(f"Could not initialize MilvusClient after retry: {e_retry}")
|
|
191
202
|
|
|
203
|
+
def _initialize_async_milvus_client(self, provider: str) -> AsyncMilvusClient:
|
|
204
|
+
"""
|
|
205
|
+
Initializes and returns an AsyncMilvusClient with a retry mechanism.
|
|
206
|
+
"""
|
|
207
|
+
client_args = self._get_milvus_client_args(provider)
|
|
208
|
+
try:
|
|
209
|
+
return AsyncMilvusClient(**client_args)
|
|
210
|
+
except Exception as e:
|
|
211
|
+
self.logger.error(f"Failed to initialize AsyncMilvusClient, trying again. Error: {e}")
|
|
212
|
+
# Second attempt after failure
|
|
213
|
+
try:
|
|
214
|
+
return AsyncMilvusClient(**client_args)
|
|
215
|
+
except Exception as e_retry:
|
|
216
|
+
self.logger.error(f"Failed to initialize AsyncMilvusClient on retry. Final error: {e_retry}")
|
|
217
|
+
raise RuntimeError(f"Could not initialize AsyncMilvusClient after retry: {e_retry}") from e_retry
|
|
218
|
+
|
|
192
219
|
def get_vector_client(self) -> MilvusClient:
|
|
193
220
|
"""
|
|
194
221
|
Returns the active MilvusClient instance.
|
|
@@ -198,6 +225,15 @@ class VDBService(object):
|
|
|
198
225
|
"""
|
|
199
226
|
return self._client
|
|
200
227
|
|
|
228
|
+
def get_async_vector_client(self) -> AsyncMilvusClient:
|
|
229
|
+
"""
|
|
230
|
+
Returns the active AsyncMilvusClient instance.
|
|
231
|
+
|
|
232
|
+
Returns:
|
|
233
|
+
AsyncMilvusClient: The initialized async client for interacting with the vector database.
|
|
234
|
+
"""
|
|
235
|
+
return self._async_client
|
|
236
|
+
|
|
201
237
|
def get_embeddings(self, from_model_balancer: bool = False, provider: Optional[str] = "azure-openai", model_type: Optional[str] = "embedding-large") -> Embeddings:
|
|
202
238
|
"""
|
|
203
239
|
Gets an embedding function, either from the model balancer or directly from settings.
|
|
@@ -276,6 +312,34 @@ class VDBService(object):
|
|
|
276
312
|
self.logger.error(f"An error occurred while ensuring collection '{collection_name}' : {e}")
|
|
277
313
|
raise RuntimeError(f"Failed to ensure collection '{collection_name}' .") from e
|
|
278
314
|
|
|
315
|
+
async def _aensure_collection_exists(self, collection_name: str, embeddings: Embeddings, check_existence: bool = True):
|
|
316
|
+
"""
|
|
317
|
+
Asynchronously checks if a collection exists and creates it if it doesn't.
|
|
318
|
+
"""
|
|
319
|
+
try:
|
|
320
|
+
client = self.get_async_vector_client()
|
|
321
|
+
if check_existence and not await client.has_collection(collection_name):
|
|
322
|
+
self.logger.info(f"Collection '{collection_name}' does not exist. Creating it.")
|
|
323
|
+
|
|
324
|
+
schema_milvus = SchemaMilvus(
|
|
325
|
+
embedding_function=embeddings,
|
|
326
|
+
collection_name=collection_name,
|
|
327
|
+
connection_args=self.connection_args,
|
|
328
|
+
index_params=self.index_params
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
schema_to_use = self.schema or DEFAULT_SCHEMA
|
|
332
|
+
if not self.schema:
|
|
333
|
+
self.logger.warning(f"No schema provided for VDBService. Using DEFAULT_SCHEMA for collection '{collection_name}'.")
|
|
334
|
+
|
|
335
|
+
schema_milvus.set_schema(schema_to_use)
|
|
336
|
+
|
|
337
|
+
if not await schema_milvus.acreate_collection():
|
|
338
|
+
raise RuntimeError(f"SchemaMilvus failed to create collection '{collection_name}'.")
|
|
339
|
+
except Exception as e:
|
|
340
|
+
self.logger.error(f"An error occurred while ensuring collection '{collection_name}' : {e}")
|
|
341
|
+
raise RuntimeError(f"Failed to ensure collection '{collection_name}' .") from e
|
|
342
|
+
|
|
279
343
|
def _is_good_connection(self, vdb_instance: Milvus, collection_name: str) -> tuple[bool, bool | None]:
|
|
280
344
|
"""
|
|
281
345
|
Checks if the Milvus instance has a good connection by verifying collection existence.
|
|
@@ -301,6 +365,21 @@ class VDBService(object):
|
|
|
301
365
|
self.logger.warning(f"Connection check failed for cached instance of '{collection_name}': {e}")
|
|
302
366
|
return False, None
|
|
303
367
|
|
|
368
|
+
async def _ais_good_connection(self, vdb_instance: Milvus, collection_name: str) -> tuple[bool, bool | None]:
|
|
369
|
+
"""
|
|
370
|
+
Asynchronously checks if the Milvus instance has a good connection.
|
|
371
|
+
"""
|
|
372
|
+
try:
|
|
373
|
+
collection_exists = await vdb_instance.aclient.has_collection(collection_name)
|
|
374
|
+
if collection_exists:
|
|
375
|
+
self.logger.debug(f"Connection for cached instance of '{collection_name}' is alive.")
|
|
376
|
+
else:
|
|
377
|
+
self.logger.warning(f"Collection '{collection_name}' not found for cached instance. It may have been dropped.")
|
|
378
|
+
return True, collection_exists
|
|
379
|
+
except Exception as e:
|
|
380
|
+
self.logger.warning(f"Connection check failed for cached instance of '{collection_name}': {e}")
|
|
381
|
+
return False, None
|
|
382
|
+
|
|
304
383
|
def get_vector_store(self, collection_name: str, embeddings: Embeddings = None, metric_type: str = "IP") -> Milvus:
|
|
305
384
|
"""
|
|
306
385
|
Gets a vector store instance, creating it if it doesn't exist for the collection.
|
|
@@ -373,6 +452,63 @@ class VDBService(object):
|
|
|
373
452
|
|
|
374
453
|
return vdb
|
|
375
454
|
|
|
455
|
+
async def aget_vector_store(self, collection_name: str, embeddings: Embeddings = None, metric_type: str = "IP") -> Milvus:
|
|
456
|
+
"""
|
|
457
|
+
Asynchronously gets a vector store instance, creating it if it doesn't exist.
|
|
458
|
+
"""
|
|
459
|
+
if not collection_name:
|
|
460
|
+
self.logger.error("aget_vector_store called with no collection_name.")
|
|
461
|
+
raise ValueError("collection_name must be provided.")
|
|
462
|
+
|
|
463
|
+
check_existence = True
|
|
464
|
+
if collection_name in self._instances:
|
|
465
|
+
instance = self._instances[collection_name]
|
|
466
|
+
is_connected, collection_exists = await self._ais_good_connection(instance, collection_name)
|
|
467
|
+
|
|
468
|
+
if is_connected and collection_exists:
|
|
469
|
+
self.logger.info(f"Returning existing vector store instance for collection: {collection_name}")
|
|
470
|
+
return instance
|
|
471
|
+
|
|
472
|
+
self.logger.warning(f"Cached instance for '{collection_name}' is invalid. Removing it from cache.")
|
|
473
|
+
del self._instances[collection_name]
|
|
474
|
+
|
|
475
|
+
if is_connected and not collection_exists:
|
|
476
|
+
check_existence = False
|
|
477
|
+
|
|
478
|
+
self.logger.info(f"Creating new vector store instance for collection: {collection_name}")
|
|
479
|
+
if embeddings is None:
|
|
480
|
+
embeddings = self.get_embeddings()
|
|
481
|
+
|
|
482
|
+
await self._aensure_collection_exists(collection_name, embeddings, check_existence=check_existence)
|
|
483
|
+
|
|
484
|
+
try:
|
|
485
|
+
self.logger.debug(f"Testing embedding function for collection '{collection_name}'...")
|
|
486
|
+
await embeddings.aembed_query("validation_test_string")
|
|
487
|
+
self.logger.debug("Embedding function is valid.")
|
|
488
|
+
except Exception as e:
|
|
489
|
+
self.logger.error(
|
|
490
|
+
f"The provided embedding function is invalid and failed with error: {e}. "
|
|
491
|
+
f"Cannot create a vector store for collection '{collection_name}'."
|
|
492
|
+
)
|
|
493
|
+
raise RuntimeError(f"Invalid embedding function provided.") from e
|
|
494
|
+
|
|
495
|
+
index_params = self.index_params or {
|
|
496
|
+
"metric_type": metric_type,
|
|
497
|
+
"index_type": "AUTOINDEX",
|
|
498
|
+
"params": {}
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
vdb = await asyncio.to_thread(
|
|
502
|
+
self._create_milvus_instance_with_retry,
|
|
503
|
+
collection_name=collection_name,
|
|
504
|
+
embeddings=embeddings,
|
|
505
|
+
index_params=index_params
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
self._instances[collection_name] = vdb
|
|
509
|
+
|
|
510
|
+
return vdb
|
|
511
|
+
|
|
376
512
|
def _create_milvus_instance_with_retry(self, collection_name: str, embeddings: Embeddings, index_params: dict) -> Milvus:
|
|
377
513
|
"""
|
|
378
514
|
Creates a Milvus instance with a retry mechanism for connection failures.
|
|
@@ -429,6 +565,36 @@ class VDBService(object):
|
|
|
429
565
|
del self._instances[collection_name]
|
|
430
566
|
self.logger.info(f"Removed '{collection_name}' from instance cache.")
|
|
431
567
|
|
|
568
|
+
async def adrop_collection(self, collection_name: str) -> None:
|
|
569
|
+
"""
|
|
570
|
+
Asynchronously deletes a collection from the vector database and removes it from the cache.
|
|
571
|
+
|
|
572
|
+
Args:
|
|
573
|
+
collection_name (str): The name of the collection to drop.
|
|
574
|
+
|
|
575
|
+
Raises:
|
|
576
|
+
ValueError: If collection_name is not provided.
|
|
577
|
+
RuntimeError: If the operation fails on the database side.
|
|
578
|
+
"""
|
|
579
|
+
if not collection_name:
|
|
580
|
+
self.logger.error("adrop_collection called without a collection_name.")
|
|
581
|
+
raise ValueError("collection_name must be provided.")
|
|
582
|
+
|
|
583
|
+
self.logger.info(f"Attempting to drop collection asynchronously: {collection_name}")
|
|
584
|
+
|
|
585
|
+
try:
|
|
586
|
+
client = self.get_async_vector_client()
|
|
587
|
+
await client.drop_collection(collection_name=collection_name)
|
|
588
|
+
self.logger.info(f"Successfully dropped collection asynchronously: {collection_name}")
|
|
589
|
+
except Exception as e:
|
|
590
|
+
self.logger.error(f"Failed to drop collection '{collection_name}' asynchronously: {e}")
|
|
591
|
+
raise RuntimeError(f"An error occurred while dropping collection '{collection_name}' asynchronously.") from e
|
|
592
|
+
finally:
|
|
593
|
+
# Whether successful or not, remove the stale instance from the cache.
|
|
594
|
+
if collection_name in self._instances:
|
|
595
|
+
del self._instances[collection_name]
|
|
596
|
+
self.logger.info(f"Removed '{collection_name}' from instance cache.")
|
|
597
|
+
|
|
432
598
|
def delete_data_by_filter(self, collection_name: str = None, filter: str = None) -> None:
|
|
433
599
|
""" Delete data by filter
|
|
434
600
|
|
|
@@ -446,6 +612,23 @@ class VDBService(object):
|
|
|
446
612
|
except Exception as e:
|
|
447
613
|
raise RuntimeError(f"delete collection data failed: {str(e)}")
|
|
448
614
|
|
|
615
|
+
async def adelete_data_by_filter(self, collection_name: str = None, filter: str = None) -> None:
|
|
616
|
+
""" Asynchronously delete data by filter
|
|
617
|
+
|
|
618
|
+
Args:
|
|
619
|
+
collection_name (str): collection_name
|
|
620
|
+
filter (str): filter
|
|
621
|
+
"""
|
|
622
|
+
self.logger.info(f"Delete data by filter asynchronously:{filter}")
|
|
623
|
+
|
|
624
|
+
try:
|
|
625
|
+
client=self.get_async_vector_client()
|
|
626
|
+
if collection_name is None or client is None or filter is None:
|
|
627
|
+
return RuntimeError(f"collection_name must be not null or check out your client to link milvus")
|
|
628
|
+
await client.delete(collection_name=collection_name, filter=filter)
|
|
629
|
+
except Exception as e:
|
|
630
|
+
raise RuntimeError(f"delete collection data failed: {str(e)}")
|
|
631
|
+
|
|
449
632
|
@staticmethod
|
|
450
633
|
def delete_old_indexes(url: str = None, vdb: Milvus = None) -> (bool | None):
|
|
451
634
|
""" Delete old indexes of the same source_url
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|