crewplus 0.2.13__tar.gz → 0.2.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crewplus might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: crewplus
3
- Version: 0.2.13
3
+ Version: 0.2.15
4
4
  Summary: Base services for CrewPlus AI applications
5
5
  Author-Email: Tim Liu <tim@opsmateai.com>
6
6
  License: MIT
@@ -6,7 +6,7 @@
6
6
 
7
7
  import logging
8
8
  from typing import List, Dict, Union, Optional
9
- from langchain_milvus import Zilliz
9
+ from langchain_milvus import Milvus
10
10
  from langchain_core.embeddings import Embeddings
11
11
  from langchain_openai import AzureOpenAIEmbeddings
12
12
  from pymilvus import MilvusClient
@@ -60,8 +60,8 @@ class VDBService(object):
60
60
  ... }
61
61
  ... },
62
62
  ... "index_params": {
63
- ... "metric_type": "L2",
64
- ... "index_type": "AUTOINDEX",
63
+ ... "metric_type": "IP",
64
+ ... "index_type": "HNSW",
65
65
  ... "params": {}
66
66
  ... }
67
67
  ... }
@@ -83,7 +83,7 @@ class VDBService(object):
83
83
  >>> assert vector_store is same_vector_store
84
84
  """
85
85
  _client: MilvusClient
86
- _instances: Dict[str, Zilliz] = {}
86
+ _instances: Dict[str, Milvus] = {}
87
87
 
88
88
  schema: str
89
89
  embedding_function: Embeddings
@@ -245,7 +245,7 @@ class VDBService(object):
245
245
  self.logger.error(f"Unsupported embedding provider: {provider}")
246
246
  raise NotImplementedError(f"Embedding provider '{provider}' is not supported yet.")
247
247
 
248
- def _ensure_collection_exists(self, collection_name: str, embeddings: Embeddings):
248
+ def _ensure_collection_exists(self, collection_name: str, embeddings: Embeddings, check_existence: bool = True):
249
249
  """
250
250
  Checks if a collection exists and creates it if it doesn't.
251
251
  This operation is wrapped in a try-except block to handle potential failures
@@ -253,7 +253,7 @@ class VDBService(object):
253
253
  """
254
254
  try:
255
255
  client = self.get_vector_client()
256
- if not client.has_collection(collection_name):
256
+ if check_existence and not client.has_collection(collection_name):
257
257
  self.logger.info(f"Collection '{collection_name}' does not exist. Creating it.")
258
258
 
259
259
  schema_milvus = SchemaMilvus(
@@ -275,7 +275,32 @@ class VDBService(object):
275
275
  self.logger.error(f"An error occurred while ensuring collection '{collection_name}' : {e}")
276
276
  raise RuntimeError(f"Failed to ensure collection '{collection_name}' .") from e
277
277
 
278
- def get_vector_store(self, collection_name: str, embeddings: Embeddings = None, metric_type: str = "L2") -> Zilliz:
278
+ def _is_good_connection(self, vdb_instance: Milvus, collection_name: str) -> tuple[bool, bool | None]:
279
+ """
280
+ Checks if the Milvus instance has a good connection by verifying collection existence.
281
+
282
+ Args:
283
+ vdb_instance (Milvus): The cached vector store instance.
284
+ collection_name (str): The name of the collection to check.
285
+
286
+ Returns:
287
+ tuple[bool, bool | None]: A tuple of (is_connected, collection_exists).
288
+ collection_exists is None if the connection failed.
289
+ """
290
+ try:
291
+ # Use has_collection as a lightweight way to verify the connection and collection status.
292
+ # If the server is unreachable, this will raise an exception.
293
+ collection_exists = vdb_instance.client.has_collection(collection_name)
294
+ if collection_exists:
295
+ self.logger.debug(f"Connection for cached instance of '{collection_name}' is alive.")
296
+ else:
297
+ self.logger.warning(f"Collection '{collection_name}' not found for cached instance. It may have been dropped.")
298
+ return True, collection_exists
299
+ except Exception as e:
300
+ self.logger.warning(f"Connection check failed for cached instance of '{collection_name}': {e}")
301
+ return False, None
302
+
303
+ def get_vector_store(self, collection_name: str, embeddings: Embeddings = None, metric_type: str = "IP") -> Milvus:
279
304
  """
280
305
  Gets a vector store instance, creating it if it doesn't exist for the collection.
281
306
  This method validates both the embedding function and the vector store connection
@@ -284,26 +309,38 @@ class VDBService(object):
284
309
  Args:
285
310
  collection_name (str): The name of the collection in the vector database.
286
311
  embeddings (Embeddings, optional): An embedding model instance. If None, one is created.
287
- metric_type (str): The distance metric for the index. Defaults to "L2".
312
+ metric_type (str): The distance metric for the index. Defaults to "IP".
288
313
 
289
314
  Returns:
290
- Zilliz: LangChain Zilliz instance, which is compatible with both Zilliz and Milvus.
315
+ Milvus: LangChain Milvus instance, which is compatible with both Zilliz and Milvus.
291
316
  """
292
317
  if not collection_name:
293
318
  self.logger.error("get_vector_store called with no collection_name.")
294
319
  raise ValueError("collection_name must be provided.")
295
320
 
296
- # Return the cached instance if it already exists.
321
+ check_existence = True
322
+ # Check for a cached instance and validate its connection before returning.
297
323
  if collection_name in self._instances:
298
- self.logger.info(f"Returning existing vector store instance for collection: {collection_name}")
299
- return self._instances[collection_name]
324
+ instance = self._instances[collection_name]
325
+ is_connected, collection_exists = self._is_good_connection(instance, collection_name)
326
+
327
+ if is_connected and collection_exists:
328
+ self.logger.info(f"Returning existing vector store instance for collection: {collection_name}")
329
+ return instance
330
+
331
+ self.logger.warning(f"Cached instance for '{collection_name}' is invalid. Removing it from cache.")
332
+ del self._instances[collection_name]
333
+
334
+ if is_connected and not collection_exists:
335
+ # We know the collection doesn't exist, so no need to check again.
336
+ check_existence = False
300
337
 
301
338
  self.logger.info(f"Creating new vector store instance for collection: {collection_name}")
302
339
  if embeddings is None:
303
340
  embeddings = self.get_embeddings()
304
341
 
305
342
  # Ensure the collection exists before proceeding.
306
- self._ensure_collection_exists(collection_name, embeddings)
343
+ self._ensure_collection_exists(collection_name, embeddings, check_existence=check_existence)
307
344
 
308
345
  # 1. Validate the embedding function before proceeding.
309
346
  try:
@@ -317,14 +354,14 @@ class VDBService(object):
317
354
  )
318
355
  raise RuntimeError(f"Invalid embedding function provided.") from e
319
356
 
320
- # If embeddings are valid, proceed to create the Zilliz instance.
357
+ # If embeddings are valid, proceed to create the Milvus instance.
321
358
  index_params = self.index_params or {
322
359
  "metric_type": metric_type,
323
360
  "index_type": "AUTOINDEX",
324
361
  "params": {}
325
362
  }
326
363
 
327
- vdb = Zilliz(
364
+ vdb = Milvus(
328
365
  embedding_function=embeddings,
329
366
  collection_name=collection_name,
330
367
  connection_args=self.connection_args,
@@ -336,12 +373,12 @@ class VDBService(object):
336
373
 
337
374
  return vdb
338
375
 
339
- def delete_old_indexes(self, url: str = None, vdb: Zilliz = None) -> (bool | None):
376
+ def delete_old_indexes(self, url: str = None, vdb: Milvus = None) -> (bool | None):
340
377
  """ Delete old indexes of the same source_url
341
378
 
342
379
  Args:
343
380
  url (str): source url
344
- vdb (Zilliz): Zilliz instance
381
+ vdb (Milvus): Milvus/Zilliz instance
345
382
  """
346
383
  self.logger.info(f"Delete old indexes of the same source_url:{url}")
347
384
 
@@ -358,7 +395,7 @@ class VDBService(object):
358
395
  self.logger.info("Deleted old indexes result: " + str(res))
359
396
  return res
360
397
 
361
- def delete_old_indexes_by_id(self, source_id: str = None, vdb: Zilliz = None) -> (bool | None):
398
+ def delete_old_indexes_by_id(self, source_id: str = None, vdb: Milvus = None) -> (bool | None):
362
399
  """ Delete old indexes of the same source_id
363
400
 
364
401
  Args:
@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
6
6
 
7
7
  [project]
8
8
  name = "crewplus"
9
- version = "0.2.13"
9
+ version = "0.2.15"
10
10
  description = "Base services for CrewPlus AI applications"
11
11
  authors = [
12
12
  { name = "Tim Liu", email = "tim@opsmateai.com" },
File without changes
File without changes
File without changes
File without changes