PyPI - crewplus - Versions diffs - 0.2.70__tar.gz → 0.2.71__tar.gz - Mend

crewplus 0.2.70tar.gz → 0.2.71tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of crewplus might be problematic. Click here for more details.

Files changed (24) hide show

{crewplus-0.2.70 → crewplus-0.2.71}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: crewplus
-Version: 0.2.70
+Version: 0.2.71
 Summary: Base services for CrewPlus AI applications
 Author-Email: Tim Liu <tim@opsmateai.com>
 License: MIT

{crewplus-0.2.70 → crewplus-0.2.71}/crewplus/vectorstores/milvus/vdb_service.py RENAMED Viewed

@@ -5,14 +5,15 @@
 # @Last Modified time: 2025-10-09
 import logging
-from typing import List, Dict, Union, Optional
+from typing import List, Dict, Union, Optional, Any
 from langchain_milvus import Milvus
 from langchain_core.embeddings import Embeddings
 from langchain_openai import AzureOpenAIEmbeddings
-from pymilvus import MilvusClient, AsyncMilvusClient
+from pymilvus import MilvusClient, AsyncMilvusClient, connections
 import time
 import asyncio
 import uuid
+from collections import defaultdict
 from ...services.init_services import get_model_balancer
 from .schema_milvus import SchemaMilvus, DEFAULT_SCHEMA
@@ -96,6 +97,7 @@ class VDBService(object):
     _async_client: Optional[AsyncMilvusClient] = None
     _instances: Dict[str, Milvus] = {}
     _async_instances: Dict[str, Milvus] = {}
+    _async_instance_locks: Dict[str, asyncio.Lock] = defaultdict(asyncio.Lock)
     schema: str
     embedding_function: Embeddings
@@ -168,6 +170,10 @@ class VDBService(object):
         # lazy-initialize async milvus
         # self._async_client = self._initialize_async_milvus_client(provider)
+        # Do not initialize the async client here.
+        # It must be lazily initialized within an async context.
+        self._async_client: Optional[AsyncMilvusClient] = None
         self.schema = schema
         self.index_params = self.settings.get("index_params")
@@ -256,12 +262,16 @@ class VDBService(object):
     async def aget_async_vector_client(self) -> AsyncMilvusClient:
         """
-        Asynchronously returns the active AsyncMilvusClient instance, initializing it if necessary.
-        Returns:
-            AsyncMilvusClient: The initialized async client for interacting with the vector database.
+        Lazily initializes and returns the AsyncMilvusClient.
+        This ensures the client is created within the running event loop.
         """
-        return await self._get_or_create_async_client()
+        if self._async_client is None:
+            self.logger.info("Lazily initializing AsyncMilvusClient...")
+            client_args = self._get_milvus_client_args(self._provider)
+            # Use the dedicated async alias
+            client_args['alias'] = self.async_alias
+            self._async_client = AsyncMilvusClient(**client_args)
+        return self._async_client
     def get_vector_field(self, collection_name: str) -> str:
         """
@@ -370,6 +380,7 @@ class VDBService(object):
         Asynchronously checks if a collection exists and creates it if it doesn't.
         """
         try:
+            # Call the new lazy initializer for the async client
             client = await self.aget_async_vector_client()
             if check_existence and not await client.has_collection(collection_name):
                 self.logger.info(f"Collection '{collection_name}' does not exist. Creating it.")
@@ -498,130 +509,117 @@ class VDBService(object):
     async def _get_or_create_async_client(self) -> AsyncMilvusClient:
         """
-        Lazily initializes and returns the AsyncMilvusClient.
-        This runs the blocking constructor in a separate thread, but also creates
-        a temporary event loop inside that thread to satisfy the client's
-        initialization requirements.
+        Lazily initializes the AsyncMilvusClient.
+        Based on grpcio source, the client MUST be initialized in a thread
+        with a running event loop. Therefore, we initialize it directly in the
+        main async context. The synchronous __init__ is fast enough not to
+        block the event loop meaningfully.
         """
         if self._async_client is None:
-            self.logger.info("Lazily initializing AsyncMilvusClient...")
-            def _create_with_loop():
-                # This function runs in a separate thread via asyncio.to_thread
-                try:
-                    # Check if an event loop exists in this new thread
-                    asyncio.get_running_loop()
-                except RuntimeError:  # 'RuntimeError: There is no current event loop...'
-                    # If not, create and set a new one
-                    loop = asyncio.new_event_loop()
-                    asyncio.set_event_loop(loop)
-                # Now, with an event loop present in this thread, initialize the client.
-                # This is still a blocking call, but it's contained in the thread.
-                provider = self.settings.get("vector_store", {}).get("provider")
-                return self._initialize_async_milvus_client(provider)
-            self._async_client = await asyncio.to_thread(_create_with_loop)
+            self.logger.info("Lazily initializing AsyncMilvusClient directly in the main event loop...")
+            provider = self.settings.get("vector_store", {}).get("provider")
+            # This is a synchronous call, but it's lightweight and must run here.
+            self._async_client = self._initialize_async_milvus_client(provider)
         return self._async_client
     async def aget_vector_store(self, collection_name: str, embeddings: Embeddings = None, metric_type: str = "IP") -> Milvus:
         """
         Asynchronously gets a vector store instance, creating it if it doesn't exist.
+        This version is optimized to handle high concurrency using a lock.
         """
         if not collection_name:
             self.logger.error("aget_vector_store called with no collection_name.")
             raise ValueError("collection_name must be provided.")
-        # Check for a cached instance. If found, return it immediately.
-        if collection_name in self._async_instances:
-            self.logger.info(f"Returning existing async vector store instance for collection: {collection_name}")
-            return self._async_instances[collection_name]
-        self.logger.info(f"Creating new async vector store instance for collection: {collection_name}")
-        if embeddings is None:
-            embeddings = self.get_embeddings()
+        lock = self._async_instance_locks[collection_name]
+        async with lock:
+            if collection_name in self._async_instances:
+                self.logger.info(f"Returning existing async vector store instance for collection: {collection_name} (post-lock)")
+                return self._async_instances[collection_name]
-        await self._aensure_collection_exists(collection_name, embeddings, check_existence=True)
+            self.logger.info(f"Creating new async vector store instance for collection: {collection_name}")
+            if embeddings is None:
+                embeddings = self.get_embeddings()
-        # try:
-        #     self.logger.info(f"Testing embedding function for collection '{collection_name}'...")
-        #     await embeddings.aembed_query("validation_test_string")
-        #     self.logger.info("Embedding function is valid.")
-        # except Exception as e:
-        #     self.logger.error(
-        #         f"The provided embedding function is invalid and failed with error: {e}. "
-        #         f"Cannot create a vector store for collection '{collection_name}'."
-        #     )
-        #     raise RuntimeError(f"Invalid embedding function provided.") from e
+            # CRITICAL: Ensure the shared async client is initialized *under the lock*
+            # before any operation that might use it.
+            await self._get_or_create_async_client()
-        index_params = self.index_params or {
-            "metric_type": metric_type,
-            "index_type": "AUTOINDEX",
-            "params": {}
-        }
-        # Create a dedicated connection_args for the async path with the correct alias
-        async_conn_args = self.connection_args.copy()
-        async_conn_args['alias'] = self.async_alias
-        # For async operations, we MUST instantiate the Milvus object using the SYNCHRONOUS alias
-        # because its __init__ method is synchronous. This is now done in a separate thread.
-        vdb = await self._acreate_milvus_instance_with_retry(
-            collection_name=collection_name,
-            embeddings=embeddings,
-            index_params=index_params,
-            connection_args=async_conn_args  # Pass the async-specific connection args
-        )
+            await self._aensure_collection_exists(collection_name, embeddings, check_existence=True)
-        # After successful synchronous initialization, we hot-swap the alias on the
-        # ASYNCHRONOUS client to ensure future async operations use the correct connection.
-        self.logger.info(f"Swapping to async alias for instance of collection {collection_name}")
-        # DO NOT get the async client here, get it outside this function
-        #await self._get_or_create_async_client()
-        vdb.aclient._using = self.async_alias
+            vdb = await self._acreate_milvus_instance_with_retry(
+                collection_name=collection_name,
+                embeddings=embeddings,
+                metric_type=metric_type
+            )
-        self._async_instances[collection_name] = vdb
+            self.logger.info(f"Swapping to async alias for instance of collection {collection_name}")
+            vdb.aclient._using = self.async_alias
-        return vdb
+            self._async_instances[collection_name] = vdb
+            return vdb
-    async def _acreate_milvus_instance_with_retry(self, collection_name: str, embeddings: Embeddings, index_params: dict, connection_args: Optional[dict] = None) -> Milvus:
+    async def _acreate_milvus_instance_with_retry(
+        self,
+        embeddings: Embeddings,
+        collection_name: str,
+        metric_type: str = "IP",
+    ) -> Milvus:
         """
-        Asynchronously creates a Milvus instance with a retry mechanism, running the synchronous
-        constructor in a separate thread to avoid blocking the event loop.
+        Asynchronously creates a Milvus instance with retry logic, ensuring the connection
+        is established in the target thread.
         """
-        retries = 2
-        conn_args = connection_args if connection_args is not None else self.connection_args
-        def _create_instance():
-            # This synchronous function will be run in a thread
-            return Milvus(
-                embedding_function=embeddings,
-                collection_name=collection_name,
-                connection_args=conn_args,
-                index_params=index_params
-            )
-        self.logger.info(f"Creating Milvus instance for collection '{collection_name}' in a separate thread...")
-        self.logger.info(f"Connection args: {conn_args}")
+        retries = 3
+        last_exception = None
-        for attempt in range(retries + 1):
+        for attempt in range(retries):
             try:
-                # Run the blocking constructor in a separate thread
-                vdb = await asyncio.to_thread(_create_instance)
-                self.logger.info(f"Successfully connected to Milvus for collection '{collection_name}' on attempt {attempt + 1}.")
-                return vdb  # Return on success
+                conn_args = self.connection_args.copy()
+                # Langchain's Milvus class will use the alias to find the connection.
+                conn_args["alias"] = self.sync_alias
+                def _create_instance_in_thread():
+                    # --- START: CRITICAL FIX ---
+                    # Manually connect within the thread before creating the Milvus instance.
+                    # This ensures pymilvus registers the connection details for the current thread.
+                    try:
+                        connections.connect(**conn_args)
+                        self.logger.info(f"Successfully connected to Milvus with alias '{self.sync_alias}' in thread.")
+                    except Exception as e:
+                        self.logger.error(f"Failed to manually connect in thread: {e}")
+                        raise
+                    # Now, creating the Milvus instance will find the existing connection via the alias.
+                    instance = Milvus(
+                        embedding_function=embeddings,
+                        collection_name=collection_name,
+                        connection_args=conn_args, # Pass args for completeness
+                        # metric_type=metric_type,  # <-- CRITICAL FIX: REMOVE THIS LINE
+                        consistency_level="Strong",
+                        # --- START: CRITICAL FIX ---
+                        # Pass self.index_params to the Milvus constructor here
+                        index_params=self.index_params,
+                        # --- END: CRITICAL FIX ---
+                    )
+                    return instance
+                    # --- END: CRITICAL FIX ---
+                self.logger.info(f"Attempt {attempt + 1}/{retries}: Creating Milvus instance for collection '{collection_name}' in a separate thread...")
+                vdb = await asyncio.to_thread(_create_instance_in_thread)
+                self.logger.info("Successfully created Milvus instance.")
+                return vdb
             except Exception as e:
+                last_exception = e
                 self.logger.warning(
-                    f"Attempt {attempt + 1}/{retries + 1} to connect to Milvus for collection '{collection_name}' failed: {e}"
+                    f"Attempt {attempt + 1}/{retries} failed to create Milvus instance: {e}. Retrying in {2 ** attempt}s..."
                 )
-                if attempt < retries:
-                    self.logger.info("Retrying in 3 seconds...")
-                    await asyncio.sleep(3) # Use async sleep
-                else:
-                    self.logger.error(f"Failed to connect to Milvus for collection '{collection_name}' after {retries + 1} attempts.")
-                    raise RuntimeError(f"Could not connect to Milvus after {retries + 1} attempts.") from e
+                await asyncio.sleep(2 ** attempt)
+        raise RuntimeError(
+            f"Failed to create Milvus instance after {retries} retries."
+        ) from last_exception
     def _create_milvus_instance_with_retry(self, collection_name: str, embeddings: Embeddings, index_params: dict, connection_args: Optional[dict] = None) -> Milvus:
         """

{crewplus-0.2.70 → crewplus-0.2.71}/pyproject.toml RENAMED Viewed

@@ -6,7 +6,7 @@ build-backend = "pdm.backend"
 [project]
 name = "crewplus"
-version = "0.2.70"
+version = "0.2.71"
 description = "Base services for CrewPlus AI applications"
 authors = [
     { name = "Tim Liu", email = "tim@opsmateai.com" },