PyPI - MindsDB - Versions diffs - 25.1.3.0__py3-none-any.whl → 25.1.4.0__py3-none-any.whl - Mend

MindsDB 25.1.3.0py3-none-any.whl → 25.1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (55) hide show

mindsdb/integrations/utilities/rag/pipelines/rag.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from copy import copy
-from typing import Optional, Any
+from typing import Optional, Any, List
 from langchain_core.output_parsers import StrOutputParser
 from langchain.retrievers import ContextualCompressionRetriever
+from langchain_core.documents import Document
 from langchain_core.prompts import ChatPromptTemplate
 from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableSerializable
@@ -28,6 +29,23 @@ from mindsdb.interfaces.agents.langchain_agent import create_chat_model
 class LangChainRAGPipeline:
     """
     Builds a RAG pipeline using langchain LCEL components
+    Args:
+        retriever_runnable: Base retriever component
+        prompt_template: Template for generating responses
+        llm: Language model for generating responses
+        reranker (bool): Whether to use reranking (default: False)
+        reranker_config (RerankerConfig): Configuration for the reranker, including:
+            - model: Model to use for reranking
+            - filtering_threshold: Minimum score to keep a document
+            - num_docs_to_keep: Maximum number of documents to keep
+            - max_concurrent_requests: Maximum concurrent API requests
+            - max_retries: Number of retry attempts for failed requests
+            - retry_delay: Delay between retries
+            - early_stop (bool): Whether to enable early stopping
+            - early_stop_threshold: Confidence threshold for early stopping
+        vector_store_config (VectorStoreConfig): Vector store configuration
+        summarization_config (SummarizationConfig): Summarization configuration
     """
     def __init__(
@@ -40,19 +58,15 @@ class LangChainRAGPipeline:
             vector_store_config: Optional[VectorStoreConfig] = None,
             summarization_config: Optional[SummarizationConfig] = None
     ):
         self.retriever_runnable = retriever_runnable
         self.prompt_template = prompt_template
         self.llm = llm
         if reranker:
             if reranker_config is None:
                 reranker_config = RerankerConfig()
-            self.reranker = LLMReranker(
-                model=reranker_config.model,
-                base_url=reranker_config.base_url,
-                filtering_threshold=reranker_config.filtering_threshold,
-                num_docs_to_keep=reranker_config.num_docs_to_keep
-            )
+            # Convert config to dict and initialize reranker
+            reranker_kwargs = reranker_config.model_dump(exclude_none=True)
+            self.reranker = LLMReranker(**reranker_kwargs)
         else:
             self.reranker = None
         self.summarizer = None
@@ -102,17 +116,45 @@ class LangChainRAGPipeline:
             raise ValueError("One of the required components (llm) is None")
         if self.reranker:
-            reranker = self.reranker
-            retriever = copy(self.retriever_runnable)
-            self.retriever_runnable = ContextualCompressionRetriever(
-                base_compressor=reranker, base_retriever=retriever
+            # Create a custom retriever that handles async operations properly
+            class AsyncRerankerRetriever(ContextualCompressionRetriever):
+                """Async-aware retriever that properly handles concurrent reranking operations."""
+                def __init__(self, base_retriever, reranker):
+                    super().__init__(
+                        base_compressor=reranker,
+                        base_retriever=base_retriever
+                    )
+                async def ainvoke(self, query: str) -> List[Document]:
+                    """Async retrieval with proper concurrency handling."""
+                    # Get initial documents
+                    if hasattr(self.base_retriever, 'ainvoke'):
+                        docs = await self.base_retriever.ainvoke(query)
+                    else:
+                        docs = await RunnablePassthrough(self.base_retriever.get_relevant_documents)(query)
+                    # Rerank documents
+                    if docs:
+                        docs = await self.base_compressor.acompress_documents(docs, query)
+                    return docs
+                def get_relevant_documents(self, query: str) -> List[Document]:
+                    """Sync wrapper for async retrieval."""
+                    import asyncio
+                    return asyncio.run(self.ainvoke(query))
+            # Use our custom async-aware retriever
+            self.retriever_runnable = AsyncRerankerRetriever(
+                base_retriever=copy(self.retriever_runnable),
+                reranker=self.reranker
             )
         rag_chain_from_docs = (
-                RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))  # noqa: E126, E122
-                | prompt
-                | self.llm
-                | StrOutputParser()
+            RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
+            | prompt
+            | self.llm
+            | StrOutputParser()
         )
         retrieval_chain = RunnableParallel(
@@ -125,6 +167,16 @@ class LangChainRAGPipeline:
         rag_chain_with_source = retrieval_chain.assign(answer=rag_chain_from_docs)
         return rag_chain_with_source
+    async def ainvoke(self, input_dict: dict) -> dict:
+        """Async invocation of the RAG pipeline."""
+        chain = self.with_returned_sources()
+        return await chain.ainvoke(input_dict)
+    def invoke(self, input_dict: dict) -> dict:
+        """Sync invocation of the RAG pipeline."""
+        import asyncio
+        return asyncio.run(self.ainvoke(input_dict))
     @classmethod
     def _apply_search_kwargs(cls, retriever: Any, search_kwargs: Optional[SearchKwargs] = None, search_type: Optional[SearchType] = None) -> Any:
         """Apply search kwargs and search type to the retriever if they exist"""
@@ -235,6 +287,10 @@ class LangChainRAGPipeline:
         )
         vector_store_retriever = vector_store_operator.vector_store.as_retriever()
         vector_store_retriever = cls._apply_search_kwargs(vector_store_retriever, config.search_kwargs, config.search_type)
+        distance_function = DistanceFunction.SQUARED_EUCLIDEAN_DISTANCE
+        if config.vector_store_config.is_sparse and config.vector_store_config.vector_size is not None:
+            # Use negative dot product for sparse retrieval.
+            distance_function = DistanceFunction.NEGATIVE_DOT_PRODUCT
         retriever = SQLRetriever(
             fallback_retriever=vector_store_retriever,
             vector_store_handler=knowledge_base_table.get_vector_db(),
@@ -248,8 +304,7 @@ class LangChainRAGPipeline:
             query_checker_template=retriever_config.query_checker_template,
             embeddings_table=knowledge_base_table._kb.vector_database_table,
             source_table=retriever_config.source_table,
-            # Currently only similarity search is supported.
-            distance_function=DistanceFunction.SQUARED_EUCLIDEAN_DISTANCE,
+            distance_function=distance_function,
             search_kwargs=config.search_kwargs,
             llm=sql_llm
         )

mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py CHANGED Viewed

@@ -4,16 +4,15 @@ import asyncio
 import logging
 import math
 import os
+import random
 from typing import Any, Dict, List, Optional, Sequence, Tuple
-from uuid import uuid4
 from langchain.retrievers.document_compressors.base import BaseDocumentCompressor
 from langchain_core.callbacks import Callbacks
-from mindsdb.integrations.utilities.rag.settings import DEFAULT_RERANKING_MODEL, DEFAULT_LLM_ENDPOINT
 from langchain_core.documents import Document
-from langchain_core.messages import HumanMessage, SystemMessage
-from langchain_openai import ChatOpenAI
+from openai import AsyncOpenAI
+from mindsdb.integrations.utilities.rag.settings import DEFAULT_RERANKING_MODEL, DEFAULT_LLM_ENDPOINT
 log = logging.getLogger(__name__)
@@ -23,128 +22,187 @@ class LLMReranker(BaseDocumentCompressor):
     model: str = DEFAULT_RERANKING_MODEL  # Model to use for reranking
     temperature: float = 0.0  # Temperature for the model
     openai_api_key: Optional[str] = None
-    remove_irrelevant: bool = True  # New flag to control removal of irrelevant documents,
+    remove_irrelevant: bool = True  # New flag to control removal of irrelevant documents
     base_url: str = DEFAULT_LLM_ENDPOINT
     num_docs_to_keep: Optional[int] = None  # How many of the top documents to keep after reranking & compressing.
     _api_key_var: str = "OPENAI_API_KEY"
-    client: Optional[Any] = None
+    client: Optional[AsyncOpenAI] = None
+    _semaphore: Optional[asyncio.Semaphore] = None
+    max_concurrent_requests: int = 20
+    max_retries: int = 3
+    retry_delay: float = 1.0
+    request_timeout: float = 20.0  # Timeout for API requests
+    early_stop: bool = True  # Whether to enable early stopping
+    early_stop_threshold: float = 0.8  # Confidence threshold for early stopping
     class Config:
         arbitrary_types_allowed = True
-    async def search_relevancy(self, query: str, document: str) -> Any:
-        openai_api_key = self.openai_api_key or os.getenv(self._api_key_var)
-        # Initialize the ChatOpenAI client
-        client = ChatOpenAI(openai_api_base=self.base_url, api_key=openai_api_key, model=self.model, temperature=0,
-                            logprobs=True)
-        # Create the message history for the conversation
-        message_history = [
-            SystemMessage(
-                content="""Your task is to classify whether the document is relevant to the search query provided below. Answer just "YES" or "NO"."""),
-            HumanMessage(content=f"""Document: ```{document}```; Search query: ```{query}```""")
-        ]
-        # Generate the response using LangChain's chat model
-        response = await client.agenerate(
-            messages=[message_history],
-            max_tokens=1
-        )
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._semaphore = asyncio.Semaphore(self.max_concurrent_requests)
+    async def _init_client(self):
+        if self.client is None:
+            openai_api_key = self.openai_api_key or os.getenv(self._api_key_var)
+            if not openai_api_key:
+                raise ValueError(f"OpenAI API key not found in environment variable {self._api_key_var}")
+            self.client = AsyncOpenAI(
+                api_key=openai_api_key,
+                base_url=self.base_url,
+                timeout=self.request_timeout,
+                max_retries=2  # Client-level retries
+            )
-        # Return the response from the model
-        return response.generations[0]
+    async def search_relevancy(self, query: str, document: str) -> Any:
+        await self._init_client()
+        async with self._semaphore:
+            for attempt in range(self.max_retries):
+                try:
+                    response = await self.client.chat.completions.create(
+                        model=self.model,
+                        messages=[
+                            {"role": "system", "content": "Rate the relevance of the document to the query. Respond with 'yes' or 'no'."},
+                            {"role": "user", "content": f"Query: {query}\nDocument: {document}\nIs this document relevant?"}
+                        ],
+                        temperature=self.temperature,
+                        n=1,
+                        logprobs=True,
+                        max_tokens=1
+                    )
+                    # Extract response and logprobs
+                    answer = response.choices[0].message.content
+                    logprob = response.choices[0].logprobs.content[0].logprob
+                    return {"answer": answer, "logprob": logprob}
+                except Exception as e:
+                    if attempt == self.max_retries - 1:
+                        log.error(f"Failed after {self.max_retries} attempts: {str(e)}")
+                        raise
+                    # Exponential backoff with jitter
+                    retry_delay = self.retry_delay * (2 ** attempt) + random.uniform(0, 0.1)
+                    await asyncio.sleep(retry_delay)
     async def _rank(self, query_document_pairs: List[Tuple[str, str]]) -> List[Tuple[str, float]]:
-        # Gather results asynchronously for all query-document pairs
-        results = await asyncio.gather(
-            *[self.search_relevancy(query=query, document=document) for (query, document) in query_document_pairs]
-        )
         ranked_results = []
-        for idx, result in enumerate(results):
-            # Extract the log probability (assuming logprobs are provided in LangChain response)
-            msg = result[0].message
-            logprob = msg.response_metadata['logprobs']['content'][0]['logprob']
-            prob = math.exp(logprob)
-            answer = result[0].message.content  # The model's "YES" or "NO" response
-            # Calculate the score based on the model's response
-            if answer == "YES":
-                score = prob
-            elif answer.lower().strip().startswith("y"):
-                score = prob
-            elif answer == "NO":
-                score = 1 - prob
-            elif answer.lower().strip().startswith("n"):
-                score = 1 - prob
-            else:
-                score = 0.0  # Default if something unexpected happens
-            # Append the document and score to the result
-            ranked_results.append((query_document_pairs[idx][1], score))  # (document, score)
+        # Process in larger batches for better throughput
+        batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs))
+        for i in range(0, len(query_document_pairs), batch_size):
+            batch = query_document_pairs[i:i + batch_size]
+            try:
+                results = await asyncio.gather(
+                    *[self.search_relevancy(query=query, document=document) for (query, document) in batch],
+                    return_exceptions=True
+                )
+                for idx, result in enumerate(results):
+                    if isinstance(result, Exception):
+                        log.error(f"Error processing document {i+idx}: {str(result)}")
+                        ranked_results.append((batch[idx][1], 0.0))
+                        continue
+                    answer = result["answer"]
+                    logprob = result["logprob"]
+                    prob = math.exp(logprob)
+                    # Convert answer to score using the model's confidence
+                    if answer.lower().strip() == "yes":
+                        score = prob  # If yes, use the model's confidence
+                    elif answer.lower().strip() == "no":
+                        score = 1 - prob  # If no, invert the confidence
+                    else:
+                        score = 0.5 * prob  # For unclear answers, reduce confidence
+                    ranked_results.append((batch[idx][1], score))
+                    # Check if we should stop early
+                    high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
+                    can_stop_early = (
+                        self.early_stop  # Early stopping is enabled
+                        and self.num_docs_to_keep  # We have a target number of docs
+                        and len(high_scoring_docs) >= self.num_docs_to_keep  # Found enough good docs
+                        and score >= self.early_stop_threshold  # Current doc is good enough
+                    )
+                    if can_stop_early:
+                        log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
+                        return ranked_results
+            except Exception as e:
+                log.error(f"Batch processing error: {str(e)}")
+                continue
         return ranked_results
-    def compress_documents(
-            self,
-            documents: Sequence[Document],
-            query: str,
-            callbacks: Optional[Callbacks] = None,
+    async def acompress_documents(
+        self,
+        documents: Sequence[Document],
+        query: str,
+        callbacks: Optional[Callbacks] = None,
     ) -> Sequence[Document]:
-        """Compress documents using OpenAI's rerank capability with individual document assessment."""
-        log.info(f"Compressing documents. Initial count: {len(documents)}")
-        if len(documents) == 0:
-            log.warning("No documents to compress. Returning empty list.")
+        """Async compress documents using reranking with proper error handling."""
+        if callbacks:
+            await callbacks.on_retriever_start({"query": query}, "Reranking documents")
+        log.info(f"Async compressing documents. Initial count: {len(documents)}")
+        if not documents:
+            if callbacks:
+                await callbacks.on_retriever_end({"documents": []})
             return []
-        doc_contents = [doc.page_content for doc in documents]
-        query_documents_pairs = [(query, doc) for doc in doc_contents]
-        # Create event loop and run async code
-        import asyncio
         try:
-            rankings = asyncio.get_event_loop().run_until_complete(self._rank(query_documents_pairs))
-        except RuntimeError:
-            # If no event loop is available, create a new one
-            loop = asyncio.new_event_loop()
-            asyncio.set_event_loop(loop)
-            rankings = loop.run_until_complete(self._rank(query_documents_pairs))
-        compressed = []
-        for ind, ranking in enumerate(rankings):
-            doc = documents[ind]
-            document_text, score = ranking
-            doc.metadata["relevance_score"] = score
-            doc.metadata["is_relevant"] = score > self.filtering_threshold
-            # Add the document to the compressed list if it is relevant or if we are not removing irrelevant documents
-            if not self.remove_irrelevant:
-                compressed.append(doc)
-            elif doc.metadata["is_relevant"]:
-                compressed.append(doc)
-        log.info(f"Compression complete. {len(compressed)} documents returned")
-        if not compressed:
-            log.warning("No documents found after compression")
-        if self.num_docs_to_keep is not None:
-            # Sort by relevance score with highest first.
-            compressed.sort(
-                key=lambda d: d.metadata.get('relevance_score', 0) if d.metadata else 0,
-                reverse=True
-            )
-            compressed = compressed[:self.num_docs_to_keep]
-        # Handle retrieval callbacks to account for reranked & compressed docs.
-        callbacks = callbacks if callbacks else []
-        run_id = uuid4().hex
-        if not isinstance(callbacks, list):
-            callbacks = callbacks.handlers
-        for callback in callbacks:
-            callback.on_retriever_end(compressed, run_id=run_id)
-        return compressed
+            # Prepare query-document pairs
+            query_document_pairs = [(query, doc.page_content) for doc in documents]
+            if callbacks:
+                await callbacks.on_text("Starting document reranking...")
+            # Get ranked results
+            ranked_results = await self._rank(query_document_pairs)
+            # Sort by score in descending order
+            ranked_results.sort(key=lambda x: x[1], reverse=True)
+            # Filter based on threshold and num_docs_to_keep
+            filtered_docs = []
+            for doc, score in ranked_results:
+                if score >= self.filtering_threshold:
+                    matching_doc = next(d for d in documents if d.page_content == doc)
+                    matching_doc.metadata = {**(matching_doc.metadata or {}), "relevance_score": score}
+                    filtered_docs.append(matching_doc)
+                    if callbacks:
+                        await callbacks.on_text(f"Document scored {score:.2f}")
+                    if self.num_docs_to_keep and len(filtered_docs) >= self.num_docs_to_keep:
+                        break
+            log.info(f"Async compression complete. Final count: {len(filtered_docs)}")
+            if callbacks:
+                await callbacks.on_retriever_end({"documents": filtered_docs})
+            return filtered_docs
+        except Exception as e:
+            error_msg = f"Error during async document compression: {str(e)}"
+            log.error(error_msg)
+            if callbacks:
+                await callbacks.on_retriever_error(error_msg)
+            return documents  # Return original documents on error
+    def compress_documents(
+        self,
+        documents: Sequence[Document],
+        query: str,
+        callbacks: Optional[Callbacks] = None,
+    ) -> Sequence[Document]:
+        """Sync wrapper for async compression."""
+        return asyncio.run(self.acompress_documents(documents, query, callbacks))
     @property
     def _identifying_params(self) -> Dict[str, Any]:

mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py CHANGED Viewed

@@ -136,7 +136,7 @@ Output:
         )
         retry_chain = LLMChain(llm=self.llm, prompt=retry_prompt)
         # Generate rewritten query.
-        return retry_chain.predict(
+        sql_query = retry_chain.predict(
             query=query,
             dialect='postgres',
             error=error,
@@ -144,6 +144,16 @@ Output:
             schema=schema,
             callbacks=run_manager.get_child() if run_manager else None
         )
+        query_checker_prompt = PromptTemplate(
+            input_variables=['dialect', 'query'],
+            template=self.query_checker_template
+        )
+        query_checker_chain = LLMChain(llm=self.llm, prompt=query_checker_prompt)
+        # Check the query & return the final result to be executed.
+        return query_checker_chain.predict(
+            dialect='postgres',
+            query=sql_query
+        )
     def _get_relevant_documents(
         self, query: str, *, run_manager: CallbackManagerForRetrieverRun
@@ -163,20 +173,28 @@ Output:
         # Actually execute the similarity search with metadata filters.
         document_response = self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
         num_retries = 0
-        while document_response.resp_type == RESPONSE_TYPE.ERROR:
-            error_msg = document_response.error_message
-            # LLMs won't always generate a working SQL query so we should have a fallback after retrying.
-            logger.info(f'SQL Retriever query {checked_sql_query} failed with error {error_msg}')
+        while num_retries < self.num_retries:
+            if document_response.resp_type == RESPONSE_TYPE.ERROR:
+                error_msg = document_response.error_message
+                # LLMs won't always generate a working SQL query so we should have a fallback after retrying.
+                logger.info(f'SQL Retriever query {checked_sql_query} failed with error {error_msg}')
+                checked_sql_query = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
+            elif len(document_response.data_frame) == 0:
+                error_msg = "No documents retrieved from query."
+                checked_sql_query = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
+            else:
+                break
+            checked_sql_query_with_embeddings = checked_sql_query.format(embeddings=str(embedded_query))
+            # Handle LLM output that has the ```sql delimiter possibly.
+            checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```sql', '')
+            checked_sql_query_with_embeddings = checked_sql_query_with_embeddings.replace('```', '')
+            document_response = self.vector_store_handler.native_query(checked_sql_query_with_embeddings)
+            num_retries += 1
             if num_retries >= self.num_retries:
                 logger.info('Using fallback retriever in SQL retriever.')
                 return self.fallback_retriever._get_relevant_documents(retrieval_query, run_manager=run_manager)
-            query_to_retry = self._prepare_retry_query(checked_sql_query, error_msg, run_manager)
-            query_to_retry_with_embeddings = query_to_retry.format(embeddings=str(embedded_query))
-            # Handle LLM output that has the ```sql delimiter possibly.
-            query_to_retry_with_embeddings = query_to_retry_with_embeddings.replace('```sql', '')
-            query_to_retry_with_embeddings = query_to_retry_with_embeddings.replace('```', '')
-            document_response = self.vector_store_handler.native_query(query_to_retry_with_embeddings)
-            num_retries += 1
         document_df = document_response.data_frame
         retrieved_documents = []

mindsdb/integrations/utilities/rag/settings.py CHANGED Viewed

@@ -136,7 +136,6 @@ Columns:
         "description": "Metadata for the document chunk. Always select metadata and always join with the {source_table} table on the string metadata field 'original_row_id'"
     }}
 }}
-```
 {schema}
@@ -451,8 +450,13 @@ class SummarizationConfig(BaseModel):
 class RerankerConfig(BaseModel):
     model: str = DEFAULT_RERANKING_MODEL
     base_url: str = DEFAULT_LLM_ENDPOINT
-    filtering_threshold: float = 0.99
+    filtering_threshold: float = 0.5
     num_docs_to_keep: Optional[int] = None
+    max_concurrent_requests: int = 20
+    max_retries: int = 3
+    retry_delay: float = 1.0
+    early_stop: bool = True  # Whether to enable early stopping
+    early_stop_threshold: float = 0.8  # Confidence threshold for early stopping
 class MultiHopRetrieverConfig(BaseModel):

mindsdb/interfaces/agents/agents_controller.py CHANGED Viewed

@@ -362,24 +362,22 @@ class AgentsController:
             project_name: str = 'mindsdb',
             tools: List[BaseTool] = None,
             stream: bool = False) -> Union[Iterator[object], pd.DataFrame]:
-        '''
+        """
         Queries an agent to get a completion.
         Parameters:
             agent (db.Agents): Existing agent to get completion from
             messages (List[Dict[str, str]]): Chat history to send to the agent
-            trace_id (str): ID of Langfuse trace to use
-            observation_id (str): ID of parent Langfuse observation to use
             project_name (str): Project the agent belongs to (default mindsdb)
             tools (List[BaseTool]): Tools to use while getting the completion
-            stream (bool): Whether or not to stream the response
+            stream (bool): Whether to stream the response
         Returns:
             response (Union[Iterator[object], pd.DataFrame]): Completion as a DataFrame or iterator of completion chunks
         Raises:
             ValueError: Agent's model does not exist.
-        '''
+        """
         if stream:
             return self._get_completion_stream(
                 agent,

MindsDB 25.1.3.0__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.1.3.0py3-none-any.whl → 25.1.4.0py3-none-any.whl