crewplus 0.2.9__py3-none-any.whl → 0.2.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of crewplus might be problematic. Click here for more details.
- crewplus/__init__.py +10 -0
- crewplus/services/__init__.py +5 -0
- crewplus/services/init_services.py +3 -7
- crewplus/services/model_load_balancer.py +0 -1
- crewplus/utils/__init__.py +4 -0
- crewplus/vectorstores/milvus/__init__.py +5 -0
- crewplus/vectorstores/milvus/schema_milvus.py +1 -26
- crewplus/vectorstores/milvus/vdb_service.py +341 -426
- {crewplus-0.2.9.dist-info → crewplus-0.2.10.dist-info}/METADATA +2 -2
- crewplus-0.2.10.dist-info/RECORD +17 -0
- crewplus-0.2.9.dist-info/RECORD +0 -20
- docs/GeminiChatModel.md +0 -226
- docs/ModelLoadBalancer.md +0 -134
- docs/VDBService.md +0 -238
- docs/index.md +0 -23
- {crewplus-0.2.9.dist-info → crewplus-0.2.10.dist-info}/WHEEL +0 -0
- {crewplus-0.2.9.dist-info → crewplus-0.2.10.dist-info}/entry_points.txt +0 -0
- {crewplus-0.2.9.dist-info → crewplus-0.2.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,427 +1,342 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
# @Author: Cursor
|
|
3
|
-
# @Date: 2025-02-12
|
|
4
|
-
# @Last Modified by: Gemini
|
|
5
|
-
# @Last Modified time: 2025-07-
|
|
6
|
-
|
|
7
|
-
import logging
|
|
8
|
-
from typing import List, Dict, Union, Optional
|
|
9
|
-
from langchain_milvus import Zilliz
|
|
10
|
-
from langchain_core.embeddings import Embeddings
|
|
11
|
-
from langchain_openai import AzureOpenAIEmbeddings
|
|
12
|
-
from pymilvus import MilvusClient
|
|
13
|
-
|
|
14
|
-
from
|
|
15
|
-
from .schema_milvus import SchemaMilvus
|
|
16
|
-
|
|
17
|
-
class VDBService(object):
|
|
18
|
-
"""
|
|
19
|
-
A service to manage connections to Milvus/Zilliz vector databases and embedding models.
|
|
20
|
-
|
|
21
|
-
This service centralizes the configuration and instantiation of the Milvus client
|
|
22
|
-
and provides helper methods to get embedding functions and vector store instances.
|
|
23
|
-
|
|
24
|
-
Args:
|
|
25
|
-
settings (dict
|
|
26
|
-
and embedding models.
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
...
|
|
43
|
-
...
|
|
44
|
-
...
|
|
45
|
-
...
|
|
46
|
-
...
|
|
47
|
-
...
|
|
48
|
-
...
|
|
49
|
-
...
|
|
50
|
-
...
|
|
51
|
-
...
|
|
52
|
-
...
|
|
53
|
-
...
|
|
54
|
-
...
|
|
55
|
-
...
|
|
56
|
-
...
|
|
57
|
-
...
|
|
58
|
-
...
|
|
59
|
-
...
|
|
60
|
-
... }
|
|
61
|
-
... }
|
|
62
|
-
...
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
>>>
|
|
69
|
-
>>>
|
|
70
|
-
>>> #
|
|
71
|
-
>>>
|
|
72
|
-
>>>
|
|
73
|
-
>>>
|
|
74
|
-
>>>
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
self.
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
return
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
if
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
def
|
|
279
|
-
"""
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
self.logger.
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
)
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
Args:
|
|
343
|
-
url (str): source url
|
|
344
|
-
vdb (Zilliz): Zilliz instance
|
|
345
|
-
"""
|
|
346
|
-
self.logger.info(f"Delete old indexes of the same source_url:{url}")
|
|
347
|
-
|
|
348
|
-
if url is None or vdb is None:
|
|
349
|
-
return None
|
|
350
|
-
|
|
351
|
-
# Delete indexes of the same source_url
|
|
352
|
-
expr = f'source_url == "{url}" or source == "{url}"'
|
|
353
|
-
pks = vdb.get_pks(expr)
|
|
354
|
-
|
|
355
|
-
# Delete entities by pks
|
|
356
|
-
if pks is not None and len(pks) > 0 :
|
|
357
|
-
res = vdb.delete(pks)
|
|
358
|
-
self.logger.info("Deleted old indexes result: " + str(res))
|
|
359
|
-
return res
|
|
360
|
-
|
|
361
|
-
def delete_old_indexes_by_id(self, source_id: str = None, vdb: Zilliz = None) -> (bool | None):
|
|
362
|
-
""" Delete old indexes of the same source_id
|
|
363
|
-
|
|
364
|
-
Args:
|
|
365
|
-
source_id (str): source id
|
|
366
|
-
"""
|
|
367
|
-
self.logger.info(f"Delete old indexes of the same source_id:{source_id}")
|
|
368
|
-
|
|
369
|
-
if source_id is None or vdb is None:
|
|
370
|
-
return None
|
|
371
|
-
|
|
372
|
-
# Delete indexes of the same source_id
|
|
373
|
-
expr = f'source_id == "{source_id}"'
|
|
374
|
-
pks = vdb.get_pks(expr)
|
|
375
|
-
|
|
376
|
-
# Delete entities by pks
|
|
377
|
-
if pks is not None and len(pks) > 0 :
|
|
378
|
-
res = vdb.delete(pks)
|
|
379
|
-
self.logger.info("Deleted old indexes result: " + str(res))
|
|
380
|
-
return res
|
|
381
|
-
|
|
382
|
-
def drop_collection(self, collection_name: str) -> None:
|
|
383
|
-
"""
|
|
384
|
-
Deletes a collection from the vector database and removes it from the cache.
|
|
385
|
-
|
|
386
|
-
Args:
|
|
387
|
-
collection_name (str): The name of the collection to drop.
|
|
388
|
-
|
|
389
|
-
Raises:
|
|
390
|
-
ValueError: If collection_name is not provided.
|
|
391
|
-
RuntimeError: If the operation fails on the database side.
|
|
392
|
-
"""
|
|
393
|
-
if not collection_name:
|
|
394
|
-
self.logger.error("drop_collection called without a collection_name.")
|
|
395
|
-
raise ValueError("collection_name must be provided.")
|
|
396
|
-
|
|
397
|
-
self.logger.info(f"Attempting to drop collection: {collection_name}")
|
|
398
|
-
|
|
399
|
-
try:
|
|
400
|
-
client = self.get_vector_client()
|
|
401
|
-
client.drop_collection(collection_name=collection_name)
|
|
402
|
-
self.logger.info(f"Successfully dropped collection: {collection_name}")
|
|
403
|
-
except Exception as e:
|
|
404
|
-
self.logger.error(f"Failed to drop collection '{collection_name}': {e}")
|
|
405
|
-
raise RuntimeError(f"An error occurred while dropping collection '{collection_name}'.") from e
|
|
406
|
-
finally:
|
|
407
|
-
# Whether successful or not, remove the stale instance from the cache.
|
|
408
|
-
if collection_name in self._instances:
|
|
409
|
-
del self._instances[collection_name]
|
|
410
|
-
self.logger.info(f"Removed '{collection_name}' from instance cache.")
|
|
411
|
-
|
|
412
|
-
def delete_data_by_filter(self, collection_name: str = None, filter: str = None) -> None:
|
|
413
|
-
""" Delete data by filter
|
|
414
|
-
|
|
415
|
-
Args:
|
|
416
|
-
collection_name (str): collection_name
|
|
417
|
-
filter (str): filter
|
|
418
|
-
"""
|
|
419
|
-
self.logger.info(f"Delete data by filter:{filter}")
|
|
420
|
-
|
|
421
|
-
try:
|
|
422
|
-
client=self.get_vector_client()
|
|
423
|
-
if collection_name is None or client is None or filter is None:
|
|
424
|
-
return RuntimeError(f"collection_name must be not null or check out your client to link milvus")
|
|
425
|
-
client.delete(collection_name=collection_name, filter=filter)
|
|
426
|
-
except Exception as e:
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# @Author: Cursor
|
|
3
|
+
# @Date: 2025-02-12
|
|
4
|
+
# @Last Modified by: Gemini
|
|
5
|
+
# @Last Modified time: 2025-07-01
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from typing import List, Dict, Union, Optional
|
|
9
|
+
from langchain_milvus import Zilliz
|
|
10
|
+
from langchain_core.embeddings import Embeddings
|
|
11
|
+
from langchain_openai import AzureOpenAIEmbeddings
|
|
12
|
+
from pymilvus import MilvusClient
|
|
13
|
+
|
|
14
|
+
from crewplus.services.init_services import get_model_balancer
|
|
15
|
+
from crewplus.vectorstores.milvus.schema_milvus import SchemaMilvus
|
|
16
|
+
|
|
17
|
+
class VDBService(object):
|
|
18
|
+
"""
|
|
19
|
+
A service to manage connections to Milvus/Zilliz vector databases and embedding models.
|
|
20
|
+
|
|
21
|
+
This service centralizes the configuration and instantiation of the Milvus client
|
|
22
|
+
and provides helper methods to get embedding functions and vector store instances.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
settings (dict): A dictionary containing configuration for the vector store
|
|
26
|
+
and embedding models.
|
|
27
|
+
schema (str, optional): The schema definition for a collection. Defaults to None.
|
|
28
|
+
logger (logging.Logger, optional): An optional logger instance. Defaults to None.
|
|
29
|
+
|
|
30
|
+
Raises:
|
|
31
|
+
ValueError: If required configurations are missing from the settings dictionary.
|
|
32
|
+
NotImplementedError: If an unsupported provider is specified.
|
|
33
|
+
RuntimeError: If the MilvusClient fails to initialize after a retry.
|
|
34
|
+
|
|
35
|
+
Example:
|
|
36
|
+
>>> settings = {
|
|
37
|
+
... "embedder": {
|
|
38
|
+
... "provider": "azure-openai",
|
|
39
|
+
... "config": {
|
|
40
|
+
... "model": "text-embedding-3-small",
|
|
41
|
+
... "api_version": "2023-05-15",
|
|
42
|
+
... "api_key": "YOUR_AZURE_OPENAI_KEY",
|
|
43
|
+
... "openai_base_url": "YOUR_AZURE_OPENAI_ENDPOINT",
|
|
44
|
+
... "embedding_dims": 1536
|
|
45
|
+
... }
|
|
46
|
+
... },
|
|
47
|
+
... "vector_store": {
|
|
48
|
+
... "provider": "milvus",
|
|
49
|
+
... "config": {
|
|
50
|
+
... "host": "localhost",
|
|
51
|
+
... "port": 19530,
|
|
52
|
+
... "user": "root",
|
|
53
|
+
... "password": "password",
|
|
54
|
+
... "db_name": "default"
|
|
55
|
+
... }
|
|
56
|
+
... },
|
|
57
|
+
... "index_params": {
|
|
58
|
+
... "metric_type": "L2",
|
|
59
|
+
... "index_type": "AUTOINDEX",
|
|
60
|
+
... "params": {}
|
|
61
|
+
... }
|
|
62
|
+
... }
|
|
63
|
+
>>> vdb_service = VDBService(settings=settings)
|
|
64
|
+
>>> # Get the raw Milvus client
|
|
65
|
+
>>> client = vdb_service.get_vector_client()
|
|
66
|
+
>>> print(client.list_collections())
|
|
67
|
+
>>> # Get an embedding function
|
|
68
|
+
>>> embeddings = vdb_service.get_embeddings()
|
|
69
|
+
>>> print(embeddings)
|
|
70
|
+
>>> # Get a LangChain vector store instance (will be cached)
|
|
71
|
+
>>> vector_store = vdb_service.get_vector_store(collection_name="my_collection")
|
|
72
|
+
>>> print(vector_store)
|
|
73
|
+
>>> same_vector_store = vdb_service.get_vector_store(collection_name="my_collection")
|
|
74
|
+
>>> assert vector_store is same_vector_store
|
|
75
|
+
"""
|
|
76
|
+
_client: MilvusClient
|
|
77
|
+
_instances: Dict[str, Zilliz] = {}
|
|
78
|
+
|
|
79
|
+
schema: str
|
|
80
|
+
embedding_function: Embeddings
|
|
81
|
+
index_params: dict
|
|
82
|
+
connection_args: dict
|
|
83
|
+
settings: dict
|
|
84
|
+
|
|
85
|
+
def __init__(self, settings: dict, schema: str = None, logger: logging.Logger = None):
|
|
86
|
+
"""
|
|
87
|
+
Initializes the VDBService.
|
|
88
|
+
|
|
89
|
+
Args:
|
|
90
|
+
settings (dict): Configuration dictionary for the service.
|
|
91
|
+
schema (str, optional): Default schema for new collections. Defaults to None.
|
|
92
|
+
logger (logging.Logger, optional): Logger instance. Defaults to None.
|
|
93
|
+
"""
|
|
94
|
+
self.logger = logger or logging.getLogger(__name__)
|
|
95
|
+
self.settings = settings
|
|
96
|
+
|
|
97
|
+
vector_store_settings = self.settings.get("vector_store")
|
|
98
|
+
if not vector_store_settings:
|
|
99
|
+
msg = "'vector_store' not found in settings"
|
|
100
|
+
self.logger.error(msg)
|
|
101
|
+
raise ValueError(msg)
|
|
102
|
+
|
|
103
|
+
provider = vector_store_settings.get("provider")
|
|
104
|
+
self.connection_args = vector_store_settings.get("config")
|
|
105
|
+
|
|
106
|
+
if not provider or not self.connection_args:
|
|
107
|
+
msg = "'provider' or 'config' not found in 'vector_store' settings"
|
|
108
|
+
self.logger.error(msg)
|
|
109
|
+
raise ValueError(msg)
|
|
110
|
+
|
|
111
|
+
self._client = self._initialize_milvus_client(provider)
|
|
112
|
+
|
|
113
|
+
self.schema = schema
|
|
114
|
+
self.index_params = self.settings.get("index_params")
|
|
115
|
+
|
|
116
|
+
self.logger.info("VDBService initialized successfully")
|
|
117
|
+
|
|
118
|
+
def _initialize_milvus_client(self, provider: str) -> MilvusClient:
|
|
119
|
+
"""
|
|
120
|
+
Initializes and returns a MilvusClient with a retry mechanism.
|
|
121
|
+
"""
|
|
122
|
+
client_args = {}
|
|
123
|
+
if provider == "milvus":
|
|
124
|
+
host = self.connection_args.get("host", "localhost")
|
|
125
|
+
port = self.connection_args.get("port", 19530)
|
|
126
|
+
|
|
127
|
+
# Use https for remote hosts, and http for local connections.
|
|
128
|
+
scheme = "https" if host not in ["localhost", "127.0.0.1"] else "http"
|
|
129
|
+
uri = f"{scheme}://{host}:{port}"
|
|
130
|
+
|
|
131
|
+
client_args = {
|
|
132
|
+
"uri": uri,
|
|
133
|
+
"user": self.connection_args.get("user"),
|
|
134
|
+
"password": self.connection_args.get("password"),
|
|
135
|
+
"db_name": self.connection_args.get("db_name")
|
|
136
|
+
}
|
|
137
|
+
# Filter out None values to use client defaults
|
|
138
|
+
client_args = {k: v for k, v in client_args.items() if v is not None}
|
|
139
|
+
|
|
140
|
+
elif provider == "zilliz":
|
|
141
|
+
client_args = self.connection_args
|
|
142
|
+
else:
|
|
143
|
+
self.logger.error(f"Unsupported vector store provider: {provider}")
|
|
144
|
+
raise NotImplementedError(f"Vector store provider '{provider}' is not supported.")
|
|
145
|
+
|
|
146
|
+
try:
|
|
147
|
+
# First attempt to connect
|
|
148
|
+
return MilvusClient(**client_args)
|
|
149
|
+
except Exception as e:
|
|
150
|
+
self.logger.error(f"Failed to initialize MilvusClient, trying again. Error: {e}")
|
|
151
|
+
# Second attempt after failure
|
|
152
|
+
try:
|
|
153
|
+
return MilvusClient(**client_args)
|
|
154
|
+
except Exception as e_retry:
|
|
155
|
+
self.logger.error(f"Failed to initialize MilvusClient on retry. Final error: {e_retry}")
|
|
156
|
+
raise RuntimeError(f"Could not initialize MilvusClient after retry: {e_retry}")
|
|
157
|
+
|
|
158
|
+
def get_vector_client(self) -> MilvusClient:
|
|
159
|
+
"""
|
|
160
|
+
Returns the active MilvusClient instance.
|
|
161
|
+
|
|
162
|
+
Returns:
|
|
163
|
+
MilvusClient: The initialized client for interacting with the vector database.
|
|
164
|
+
"""
|
|
165
|
+
return self._client
|
|
166
|
+
|
|
167
|
+
def get_embeddings(self, from_model_balancer: bool = False, model_type: Optional[str] = "embedding-large") -> Embeddings:
|
|
168
|
+
"""
|
|
169
|
+
Gets an embedding function, either from the model balancer or directly from settings.
|
|
170
|
+
|
|
171
|
+
Args:
|
|
172
|
+
from_model_balancer (bool): If True, uses the central model balancer service.
|
|
173
|
+
If False, creates a new instance based on 'embedder' settings.
|
|
174
|
+
model_type (str, optional): The type of model to get from the balancer. Defaults to "embedding-large".
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
Embeddings: An instance of a LangChain embedding model.
|
|
178
|
+
"""
|
|
179
|
+
if from_model_balancer:
|
|
180
|
+
model_balancer = get_model_balancer()
|
|
181
|
+
return model_balancer.get_model(model_type=model_type)
|
|
182
|
+
|
|
183
|
+
embedder_config = self.settings.get("embedder")
|
|
184
|
+
if not embedder_config:
|
|
185
|
+
self.logger.error("'embedder' configuration not found in settings.")
|
|
186
|
+
raise ValueError("'embedder' configuration not found in settings.")
|
|
187
|
+
|
|
188
|
+
provider = embedder_config.get("provider")
|
|
189
|
+
config = embedder_config.get("config")
|
|
190
|
+
|
|
191
|
+
if not provider or not config:
|
|
192
|
+
self.logger.error("Embedder 'provider' or 'config' not found in settings.")
|
|
193
|
+
raise ValueError("Embedder 'provider' or 'config' not found in settings.")
|
|
194
|
+
|
|
195
|
+
if provider == "azure-openai":
|
|
196
|
+
# Map the settings config to AzureOpenAIEmbeddings parameters.
|
|
197
|
+
azure_config = {
|
|
198
|
+
"azure_deployment": config.get("model"),
|
|
199
|
+
"openai_api_version": config.get("api_version"),
|
|
200
|
+
"api_key": config.get("api_key"),
|
|
201
|
+
"azure_endpoint": config.get("openai_base_url"),
|
|
202
|
+
"dimensions": config.get("embedding_dims"),
|
|
203
|
+
"chunk_size": config.get("chunk_size", 16),
|
|
204
|
+
"request_timeout": config.get("request_timeout", 60),
|
|
205
|
+
"max_retries": config.get("max_retries", 2)
|
|
206
|
+
}
|
|
207
|
+
# Filter out None values to use client defaults.
|
|
208
|
+
azure_config = {k: v for k, v in azure_config.items() if v is not None}
|
|
209
|
+
|
|
210
|
+
return AzureOpenAIEmbeddings(**azure_config)
|
|
211
|
+
else:
|
|
212
|
+
self.logger.error(f"Unsupported embedding provider: {provider}")
|
|
213
|
+
raise NotImplementedError(f"Embedding provider '{provider}' is not supported yet.")
|
|
214
|
+
|
|
215
|
+
def get_vector_store(self, collection_name: str, embeddings: Embeddings = None, metric_type: str = "L2") -> Zilliz:
|
|
216
|
+
"""
|
|
217
|
+
Gets a vector store instance, creating it if it doesn't exist for the collection.
|
|
218
|
+
|
|
219
|
+
This method caches instances by collection name to avoid re-instantiation.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
collection_name (str): The name of the collection in the vector database.
|
|
223
|
+
embeddings (Embeddings, optional): An embedding model instance. If None, one is created.
|
|
224
|
+
metric_type (str): The distance metric for the index. Defaults to "L2".
|
|
225
|
+
|
|
226
|
+
Returns:
|
|
227
|
+
Zilliz: LangChain Zilliz instance, which is compatible with both Zilliz and Milvus.
|
|
228
|
+
"""
|
|
229
|
+
if not collection_name:
|
|
230
|
+
self.logger.error("get_vector_store called with no collection_name.")
|
|
231
|
+
raise ValueError("collection_name must be provided.")
|
|
232
|
+
|
|
233
|
+
# Return the cached instance if it already exists.
|
|
234
|
+
if collection_name in self._instances:
|
|
235
|
+
self.logger.info(f"Returning existing vector store instance for collection: {collection_name}")
|
|
236
|
+
return self._instances[collection_name]
|
|
237
|
+
|
|
238
|
+
self.logger.info(f"Creating new vector store instance for collection: {collection_name}")
|
|
239
|
+
if embeddings is None:
|
|
240
|
+
embeddings = self.get_embeddings()
|
|
241
|
+
|
|
242
|
+
index_params = self.index_params or {
|
|
243
|
+
"metric_type": metric_type,
|
|
244
|
+
"index_type": "AUTOINDEX",
|
|
245
|
+
"params": {}
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
vdb = Zilliz(
|
|
249
|
+
embedding_function=embeddings,
|
|
250
|
+
collection_name=collection_name,
|
|
251
|
+
connection_args=self.connection_args,
|
|
252
|
+
index_params=index_params
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
# Cache the newly created instance.
|
|
256
|
+
self._instances[collection_name] = vdb
|
|
257
|
+
|
|
258
|
+
return vdb
|
|
259
|
+
|
|
260
|
+
def delete_old_indexes(self, url: str = None, vdb: Zilliz = None) -> None:
|
|
261
|
+
""" Delete old indexes of the same source_url
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
url (str): source url
|
|
265
|
+
"""
|
|
266
|
+
if url is None or vdb is None:
|
|
267
|
+
return
|
|
268
|
+
|
|
269
|
+
# Delete indexes of the same source_url
|
|
270
|
+
expr = "source in [\"" + url + "\"]"
|
|
271
|
+
pks = vdb.get_pks(expr)
|
|
272
|
+
|
|
273
|
+
# Delete entities by pks
|
|
274
|
+
if pks is not None and len(pks) > 0 :
|
|
275
|
+
old_items = vdb.delete(pks)
|
|
276
|
+
self.logger.info("ingesting document -- delete old indexes -- " + str(old_items))
|
|
277
|
+
|
|
278
|
+
def delete_old_indexes_by_id(self, id: str = None, vdb: Zilliz = None) -> None:
|
|
279
|
+
""" Delete old indexes of the same source_id
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
id (str): source id
|
|
283
|
+
"""
|
|
284
|
+
self.logger.info(f"Delete old indexes of the same source_id:{id}")
|
|
285
|
+
|
|
286
|
+
if id is None or vdb is None:
|
|
287
|
+
return
|
|
288
|
+
|
|
289
|
+
# Delete indexes of the same source_id
|
|
290
|
+
expr = "source_id in [\"" + id + "\"]"
|
|
291
|
+
pks = vdb.get_pks(expr)
|
|
292
|
+
|
|
293
|
+
# Delete entities by pks
|
|
294
|
+
if pks is not None and len(pks) > 0 :
|
|
295
|
+
old_items = vdb.delete(pks)
|
|
296
|
+
self.logger.info("ingesting document -- delete old indexes -- " + str(old_items))
|
|
297
|
+
|
|
298
|
+
def drop_collection(self, collection_name: str) -> None:
|
|
299
|
+
"""
|
|
300
|
+
Deletes a collection from the vector database and removes it from the cache.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
collection_name (str): The name of the collection to drop.
|
|
304
|
+
|
|
305
|
+
Raises:
|
|
306
|
+
ValueError: If collection_name is not provided.
|
|
307
|
+
RuntimeError: If the operation fails on the database side.
|
|
308
|
+
"""
|
|
309
|
+
if not collection_name:
|
|
310
|
+
self.logger.error("drop_collection called without a collection_name.")
|
|
311
|
+
raise ValueError("collection_name must be provided.")
|
|
312
|
+
|
|
313
|
+
self.logger.info(f"Attempting to drop collection: {collection_name}")
|
|
314
|
+
|
|
315
|
+
try:
|
|
316
|
+
client = self.get_vector_client()
|
|
317
|
+
client.drop_collection(collection_name=collection_name)
|
|
318
|
+
self.logger.info(f"Successfully dropped collection: {collection_name}")
|
|
319
|
+
except Exception as e:
|
|
320
|
+
self.logger.error(f"Failed to drop collection '{collection_name}': {e}")
|
|
321
|
+
raise RuntimeError(f"An error occurred while dropping collection '{collection_name}'.") from e
|
|
322
|
+
finally:
|
|
323
|
+
# Whether successful or not, remove the stale instance from the cache.
|
|
324
|
+
if collection_name in self._instances:
|
|
325
|
+
del self._instances[collection_name]
|
|
326
|
+
self.logger.info(f"Removed '{collection_name}' from instance cache.")
|
|
327
|
+
|
|
328
|
+
def delete_data_by_filter(self, collection_name: str = None, filter: str = None) -> None:
|
|
329
|
+
""" Delete a collection
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
collection_name (str): scollection_name
|
|
333
|
+
"""
|
|
334
|
+
self.logger.info(f"drop a collection by name:{collection_name}")
|
|
335
|
+
|
|
336
|
+
try:
|
|
337
|
+
client=self.get_vector_client()
|
|
338
|
+
if collection_name is None or client is None or filter is None:
|
|
339
|
+
return RuntimeError(f"collection_name must be not null or check out your client to link milvus")
|
|
340
|
+
client.delete(collection_name=collection_name, filter=filter)
|
|
341
|
+
except Exception as e:
|
|
427
342
|
raise RuntimeError(f"delete collection data failed: {str(e)}")
|