PyPI - MindsDB - Versions diffs - 25.6.4.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl - Mend

MindsDB 25.6.4.0py3-none-any.whl → 25.7.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (61) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +53 -94
mindsdb/api/a2a/agent.py +30 -206
mindsdb/api/a2a/common/server/server.py +26 -27
mindsdb/api/a2a/task_manager.py +93 -227
mindsdb/api/a2a/utils.py +21 -0
mindsdb/api/executor/command_executor.py +8 -6
mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
mindsdb/api/executor/planner/query_prepare.py +68 -87
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
mindsdb/api/executor/utilities/sql.py +97 -21
mindsdb/api/http/namespaces/agents.py +126 -201
mindsdb/api/http/namespaces/config.py +12 -1
mindsdb/api/http/namespaces/file.py +49 -24
mindsdb/api/mcp/start.py +45 -31
mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +244 -141
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +3 -2
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +1 -1
mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
mindsdb/integrations/libs/keyword_search_base.py +41 -0
mindsdb/integrations/libs/vectordatabase_handler.py +114 -84
mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
mindsdb/integrations/utilities/sql_utils.py +11 -0
mindsdb/interfaces/agents/agents_controller.py +29 -9
mindsdb/interfaces/agents/langchain_agent.py +7 -5
mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
mindsdb/interfaces/database/projects.py +1 -3
mindsdb/interfaces/functions/controller.py +54 -64
mindsdb/interfaces/functions/to_markdown.py +47 -14
mindsdb/interfaces/knowledge_base/controller.py +228 -110
mindsdb/interfaces/knowledge_base/evaluate.py +18 -6
mindsdb/interfaces/knowledge_base/executor.py +346 -0
mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
mindsdb/interfaces/skills/sql_agent.py +181 -130
mindsdb/interfaces/storage/db.py +9 -7
mindsdb/utilities/config.py +58 -40
mindsdb/utilities/exception.py +58 -7
mindsdb/utilities/security.py +54 -11
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/METADATA +245 -259
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/RECORD +61 -58
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/WHEEL +0 -0
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/top_level.txt +0 -0

mindsdb/integrations/handlers/postgres_handler/postgres_handler.py CHANGED Viewed

@@ -476,7 +476,7 @@ class PostgresHandler(MetaDatabaseHandler):
         config = self._make_connection_args()
         config["autocommit"] = True
-        conn = psycopg.connect(connect_timeout=10, **config)
+        conn = psycopg.connect(**config)
         # create db trigger
         trigger_name = f"mdb_notify_{table_name}"

mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py CHANGED Viewed

@@ -271,10 +271,11 @@ class SalesforceHandler(MetaAPIHandler):
         # Retrieve the metadata for all Salesforce resources.
         main_metadata = connection.sobjects.describe()
         if table_names:
             # Filter the metadata for the specified tables.
-            main_metadata = [resource for resource in main_metadata["sobjects"] if resource["name"] in table_names]
+            main_metadata = [
+                resource for resource in main_metadata["sobjects"] if resource["name"].lower() in table_names
+            ]
         else:
             main_metadata = main_metadata["sobjects"]

mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py CHANGED Viewed

@@ -165,7 +165,7 @@ def create_table_class(resource_name: Text) -> MetaAPIResource:
             client = self.handler.connect()
             resource_metadata = next(
-                (resource for resource in main_metadata if resource["name"] == resource_name),
+                (resource for resource in main_metadata if resource["name"].lower() == resource_name),
             )
             # Get row count if Id column is aggregatable.

mindsdb/integrations/handlers/statsforecast_handler/requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1 1	statsforecast==1.6.0
2	+ scipy==1.15.3

mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt CHANGED Viewed

	@@ -1 +1,2 @@
1 1	statsforecast==1.6.0
2	+ scipy==1.15.3

mindsdb/integrations/libs/keyword_search_base.py ADDED Viewed

@@ -0,0 +1,41 @@
+from mindsdb_sql_parser.ast import Select
+from typing import List
+import pandas as pd
+from mindsdb.integrations.utilities.sql_utils import FilterCondition, KeywordSearchArgs
+class KeywordSearchBase:
+    """
+    Base class for keyword search integrations.
+    This class provides a common interface for keyword search functionality.
+    """
+    def __init__(self, *args, **kwargs):
+        pass
+    def dispatch_keyword_select(
+        self, query: Select, conditions: List[FilterCondition] = None, keyword_search_args: KeywordSearchArgs = None
+    ):
+        """Dispatches a keyword search select query to the appropriate method."""
+        raise NotImplementedError()
+    def keyword_select(
+        self,
+        table_name: str,
+        columns: List[str] = None,
+        conditions: List[FilterCondition] = None,
+        offset: int = None,
+        limit: int = None,
+    ) -> pd.DataFrame:
+        """Select data from table
+        Args:
+            table_name (str): table name
+            columns (List[str]): columns to select
+            conditions (List[FilterCondition]): conditions to select
+        Returns:
+            HandlerResponse
+        """
+        raise NotImplementedError()

mindsdb/integrations/libs/vectordatabase_handler.py CHANGED Viewed

@@ -2,6 +2,7 @@ import ast
 import hashlib
 from enum import Enum
 from typing import Dict, List, Optional
+import datetime as dt
 import pandas as pd
 from mindsdb_sql_parser.ast import (
@@ -20,7 +21,7 @@ from mindsdb_sql_parser.ast.base import ASTNode
 from mindsdb.integrations.libs.response import RESPONSE_TYPE, HandlerResponse
 from mindsdb.utilities import log
-from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
+from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator, KeywordSearchArgs
 from mindsdb.integrations.utilities.query_traversal import query_traversal
 from .base import BaseHandler
@@ -28,6 +29,9 @@ from .base import BaseHandler
 LOG = log.getLogger(__name__)
+class VectorHandlerException(Exception): ...
 class TableField(Enum):
     """
     Enum for table fields.
@@ -43,9 +47,9 @@ class TableField(Enum):
 class DistanceFunction(Enum):
-    SQUARED_EUCLIDEAN_DISTANCE = '<->',
-    NEGATIVE_DOT_PRODUCT = '<#>',
-    COSINE_DISTANCE = '<=>'
+    SQUARED_EUCLIDEAN_DISTANCE = ("<->",)
+    NEGATIVE_DOT_PRODUCT = ("<#>",)
+    COSINE_DISTANCE = "<=>"
 class VectorStoreHandler(BaseHandler):
@@ -118,9 +122,7 @@ class VectorStoreHandler(BaseHandler):
                         right_hand = [item.value for item in node.args[1].items]
                     else:
                         raise Exception(f"Unsupported right hand side: {node.args[1]}")
-                    conditions.append(
-                        FilterCondition(column=left_hand, op=op, value=right_hand)
-                    )
+                    conditions.append(FilterCondition(column=left_hand, op=op, value=right_hand))
             query_traversal(where_statement, _extract_comparison_conditions)
@@ -129,15 +131,23 @@ class VectorStoreHandler(BaseHandler):
         return conditions
-    def _convert_metadata_filters(self, conditions):
+    def _convert_metadata_filters(self, conditions, allowed_metadata_columns=None):
         if conditions is None:
             return
         # try to treat conditions that are not in TableField as metadata conditions
         for condition in conditions:
-            if not self._is_condition_allowed(condition):
-                condition.column = (
-                    TableField.METADATA.value + "." + condition.column
-                )
+            if self._is_metadata_condition(condition):
+                # check restriction
+                if allowed_metadata_columns is not None:
+                    # system columns are underscored, skip them
+                    if condition.column.lower() not in allowed_metadata_columns and not condition.column.startswith(
+                        "_"
+                    ):
+                        raise ValueError(f"Column is not found: {condition.column}")
+                # convert if required
+                if not condition.column.startswith(TableField.METADATA.value):
+                    condition.column = TableField.METADATA.value + "." + condition.column
     def _is_columns_allowed(self, columns: List[str]) -> bool:
         """
@@ -146,16 +156,11 @@ class VectorStoreHandler(BaseHandler):
         allowed_columns = set([col["name"] for col in self.SCHEMA])
         return set(columns).issubset(allowed_columns)
-    def _is_condition_allowed(self, condition: FilterCondition) -> bool:
+    def _is_metadata_condition(self, condition: FilterCondition) -> bool:
         allowed_field_values = set([field.value for field in TableField])
         if condition.column in allowed_field_values:
-            return True
-        else:
-            # check if column is a metadata column
-            if condition.column.startswith(TableField.METADATA.value):
-                return True
-            else:
-                return False
+            return False
+        return True
     def _dispatch_create_table(self, query: CreateTable):
         """
@@ -184,17 +189,12 @@ class VectorStoreHandler(BaseHandler):
         columns = [column.name for column in query.columns]
         if not self._is_columns_allowed(columns):
-            raise Exception(
-                f"Columns {columns} not allowed."
-                f"Allowed columns are {[col['name'] for col in self.SCHEMA]}"
-            )
+            raise Exception(f"Columns {columns} not allowed.Allowed columns are {[col['name'] for col in self.SCHEMA]}")
         # get content column if it is present
         if TableField.CONTENT.value in columns:
             content_col_index = columns.index("content")
-            content = [
-                self._value_or_self(row[content_col_index]) for row in query.values
-            ]
+            content = [self._value_or_self(row[content_col_index]) for row in query.values]
         else:
             content = None
@@ -209,19 +209,13 @@ class VectorStoreHandler(BaseHandler):
         # get embeddings column if it is present
         if TableField.EMBEDDINGS.value in columns:
             embeddings_col_index = columns.index("embeddings")
-            embeddings = [
-                ast.literal_eval(self._value_or_self(row[embeddings_col_index]))
-                for row in query.values
-            ]
+            embeddings = [ast.literal_eval(self._value_or_self(row[embeddings_col_index])) for row in query.values]
         else:
             raise Exception("Embeddings column is required!")
         if TableField.METADATA.value in columns:
             metadata_col_index = columns.index("metadata")
-            metadata = [
-                ast.literal_eval(self._value_or_self(row[metadata_col_index]))
-                for row in query.values
-            ]
+            metadata = [ast.literal_eval(self._value_or_self(row[metadata_col_index])) for row in query.values]
         else:
             metadata = None
@@ -277,6 +271,15 @@ class VectorStoreHandler(BaseHandler):
         return self.do_upsert(table_name, df)
+    def set_metadata_cur_time(self, df, col_name):
+        metadata_col = TableField.METADATA.value
+        cur_date = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        def set_time(meta):
+            meta[col_name] = cur_date
+        df[metadata_col].apply(set_time)
     def do_upsert(self, table_name, df):
         """Upsert data into table, handling document updates and deletions.
@@ -289,6 +292,7 @@ class VectorStoreHandler(BaseHandler):
         2. Updated documents: Delete old chunks and insert new ones
         """
         id_col = TableField.ID.value
+        metadata_col = TableField.METADATA.value
         content_col = TableField.CONTENT.value
         def gen_hash(v):
@@ -309,37 +313,48 @@ class VectorStoreHandler(BaseHandler):
         # id is string TODO is it ok?
         df[id_col] = df[id_col].apply(str)
-        if hasattr(self, 'upsert'):
+        # set updated_at
+        self.set_metadata_cur_time(df, "_updated_at")
+        if hasattr(self, "upsert"):
             self.upsert(table_name, df)
             return
         # find existing ids
-        res = self.select(
+        df_existed = self.select(
             table_name,
-            columns=[id_col],
-            conditions=[
-                FilterCondition(column=id_col, op=FilterOperator.IN, value=list(df[id_col]))
-            ]
+            columns=[id_col, metadata_col],
+            conditions=[FilterCondition(column=id_col, op=FilterOperator.IN, value=list(df[id_col]))],
         )
-        existed_ids = list(res[id_col])
+        existed_ids = list(df_existed[id_col])
         # update existed
         df_update = df[df[id_col].isin(existed_ids)]
         df_insert = df[~df[id_col].isin(existed_ids)]
         if not df_update.empty:
+            # get values of existed `created_at` and return them to metadata
+            created_dates = {row[id_col]: row[metadata_col].get("_created_at") for _, row in df_existed.iterrows()}
+            def keep_created_at(row):
+                val = created_dates.get(row[id_col])
+                if val:
+                    row[metadata_col]["_created_at"] = val
+                return row
+            df_update.apply(keep_created_at, axis=1)
             try:
                 self.update(table_name, df_update, [id_col])
             except NotImplementedError:
                 # not implemented? do it with delete and insert
-                conditions = [FilterCondition(
-                    column=id_col,
-                    op=FilterOperator.IN,
-                    value=list(df[id_col])
-                )]
+                conditions = [FilterCondition(column=id_col, op=FilterOperator.IN, value=list(df[id_col]))]
                 self.delete(table_name, conditions)
                 self.insert(table_name, df_update)
         if not df_insert.empty:
+            # set created_at
+            self.set_metadata_cur_time(df_insert, "_created_at")
             self.insert(table_name, df_insert)
     def dispatch_delete(self, query: Delete, conditions: List[FilterCondition] = None):
@@ -356,42 +371,66 @@ class VectorStoreHandler(BaseHandler):
         # dispatch delete
         return self.delete(table_name, conditions=conditions)
-    def dispatch_select(self, query: Select, conditions: List[FilterCondition] = None):
+    def dispatch_select(
+        self,
+        query: Select,
+        conditions: Optional[List[FilterCondition]] = None,
+        allowed_metadata_columns: List[str] = None,
+        keyword_search_args: Optional[KeywordSearchArgs] = None,
+    ):
         """
-        Dispatch select query to the appropriate method.
+        Dispatches a select query to the appropriate method, handling both
+        standard selections and keyword searches based on the provided arguments.
         """
-        # parse key arguments
+        # 1. Parse common query arguments
         table_name = query.from_table.parts[-1]
-        # if targets are star, select all columns
+        # If targets are a star (*), select all schema columns
         if isinstance(query.targets[0], Star):
             columns = [col["name"] for col in self.SCHEMA]
         else:
             columns = [col.parts[-1] for col in query.targets]
+        # 2. Validate columns
         if not self._is_columns_allowed(columns):
-            raise Exception(
-                f"Columns {columns} not allowed."
-                f"Allowed columns are {[col['name'] for col in self.SCHEMA]}"
-            )
+            allowed_cols = [col["name"] for col in self.SCHEMA]
+            raise Exception(f"Columns {columns} not allowed. Allowed columns are {allowed_cols}")
-        # check if columns are allowed
+        # 3. Extract and process conditions
         if conditions is None:
             where_statement = query.where
             conditions = self.extract_conditions(where_statement)
-        self._convert_metadata_filters(conditions)
+        self._convert_metadata_filters(conditions, allowed_metadata_columns=allowed_metadata_columns)
-        # get offset and limit
+        # 4. Get offset and limit
         offset = query.offset.value if query.offset is not None else None
         limit = query.limit.value if query.limit is not None else None
-        # dispatch select
-        return self.select(
-            table_name,
-            columns=columns,
-            conditions=conditions,
-            offset=offset,
-            limit=limit,
-        )
+        # 5. Conditionally dispatch to the correct select method
+        if keyword_search_args:
+            # It's a keyword search
+            return self.keyword_select(
+                table_name,
+                columns=columns,
+                conditions=conditions,
+                offset=offset,
+                limit=limit,
+                keyword_search_args=keyword_search_args,
+            )
+        else:
+            # It's a standard select
+            try:
+                return self.select(
+                    table_name,
+                    columns=columns,
+                    conditions=conditions,
+                    offset=offset,
+                    limit=limit,
+                )
+            except Exception as e:
+                handler_engine = self.__class__.name
+                raise VectorHandlerException(f"Error in {handler_engine} database: {e}")
     def _dispatch(self, query: ASTNode) -> HandlerResponse:
         """
@@ -408,10 +447,7 @@ class VectorStoreHandler(BaseHandler):
         if type(query) in dispatch_router:
             resp = dispatch_router[type(query)](query)
             if resp is not None:
-                return HandlerResponse(
-                    resp_type=RESPONSE_TYPE.TABLE,
-                    data_frame=resp
-                )
+                return HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=resp)
             else:
                 return HandlerResponse(resp_type=RESPONSE_TYPE.OK)
@@ -455,9 +491,7 @@ class VectorStoreHandler(BaseHandler):
         """
         raise NotImplementedError()
-    def insert(
-        self, table_name: str, data: pd.DataFrame
-    ) -> HandlerResponse:
+    def insert(self, table_name: str, data: pd.DataFrame) -> HandlerResponse:
         """Insert data into table
         Args:
@@ -470,9 +504,7 @@ class VectorStoreHandler(BaseHandler):
         """
         raise NotImplementedError()
-    def update(
-        self, table_name: str, data: pd.DataFrame, key_columns: List[str] = None
-    ):
+    def update(self, table_name: str, data: pd.DataFrame, key_columns: List[str] = None):
         """Update data in table
         Args:
@@ -485,9 +517,7 @@ class VectorStoreHandler(BaseHandler):
         """
         raise NotImplementedError()
-    def delete(
-        self, table_name: str, conditions: List[FilterCondition] = None
-    ) -> HandlerResponse:
+    def delete(self, table_name: str, conditions: List[FilterCondition] = None) -> HandlerResponse:
         """Delete data from table
         Args:
@@ -535,9 +565,9 @@ class VectorStoreHandler(BaseHandler):
         query: str = None,
         metadata: Dict[str, str] = None,
         distance_function=DistanceFunction.COSINE_DISTANCE,
-        **kwargs
+        **kwargs,
     ) -> pd.DataFrame:
-        '''
+        """
         Executes a hybrid search, combining semantic search and one or both of keyword/metadata search.
         For insight on the query construction, see: https://docs.pgvecto.rs/use-case/hybrid-search.html#advanced-search-merge-the-results-of-full-text-search-and-vector-search.
@@ -551,11 +581,11 @@ class VectorStoreHandler(BaseHandler):
         Returns:
             df(pd.DataFrame): Hybrid search result, sorted by hybrid search rank
-        '''
-        raise NotImplementedError(f'Hybrid search not supported for VectorStoreHandler {self.name}')
+        """
+        raise NotImplementedError(f"Hybrid search not supported for VectorStoreHandler {self.name}")
     def create_index(self, *args, **kwargs):
         """
         Create an index on the specified table.
         """
-        raise NotImplementedError(f'create_index not supported for VectorStoreHandler {self.name}')
+        raise NotImplementedError(f"create_index not supported for VectorStoreHandler {self.name}")

mindsdb/integrations/utilities/rag/rerankers/base_reranker.py CHANGED Viewed

@@ -33,7 +33,7 @@ class BaseLLMReranker(BaseModel, ABC):
     client: Optional[AsyncOpenAI | BaseMLEngine] = None
     _semaphore: Optional[asyncio.Semaphore] = None
     max_concurrent_requests: int = 20
-    max_retries: int = 3
+    max_retries: int = 2
     retry_delay: float = 1.0
     request_timeout: float = 20.0  # Timeout for API requests
     early_stop: bool = True  # Whether to enable early stopping
@@ -100,7 +100,7 @@ class BaseLLMReranker(BaseModel, ABC):
             if self.api_key is not None:
                 kwargs["api_key"] = self.api_key
-            return await self.client.acompletion(model=f"{self.provider}/{self.model}", messages=messages, args=kwargs)
+            return await self.client.acompletion(self.provider, model=self.model, messages=messages, args=kwargs)
     async def _rank(self, query_document_pairs: List[Tuple[str, str]], rerank_callback=None) -> List[Tuple[str, float]]:
         ranked_results = []
@@ -109,47 +109,41 @@ class BaseLLMReranker(BaseModel, ABC):
         batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs))
         for i in range(0, len(query_document_pairs), batch_size):
             batch = query_document_pairs[i : i + batch_size]
-            try:
-                results = await asyncio.gather(
-                    *[
-                        self._backoff_wrapper(query=query, document=document, rerank_callback=rerank_callback)
-                        for (query, document) in batch
-                    ],
-                    return_exceptions=True,
-                )
-                for idx, result in enumerate(results):
-                    if isinstance(result, Exception):
-                        log.error(f"Error processing document {i + idx}: {str(result)}")
-                        ranked_results.append((batch[idx][1], 0.0))
-                        continue
-                    score = result["relevance_score"]
-                    ranked_results.append((batch[idx][1], score))
-                    # Check if we should stop early
-                    try:
-                        high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
-                        can_stop_early = (
-                            self.early_stop  # Early stopping is enabled
-                            and self.num_docs_to_keep  # We have a target number of docs
-                            and len(high_scoring_docs) >= self.num_docs_to_keep  # Found enough good docs
-                            and score >= self.early_stop_threshold  # Current doc is good enough
-                        )
-                        if can_stop_early:
-                            log.info(
-                                f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence"
-                            )
-                            return ranked_results
-                    except Exception as e:
-                        # Don't let early stopping errors stop the whole process
-                        log.warning(f"Error in early stopping check: {str(e)}")
-            except Exception as e:
-                log.error(f"Batch processing error: {str(e)}")
-                continue
+            results = await asyncio.gather(
+                *[
+                    self._backoff_wrapper(query=query, document=document, rerank_callback=rerank_callback)
+                    for (query, document) in batch
+                ],
+                return_exceptions=True,
+            )
+            for idx, result in enumerate(results):
+                if isinstance(result, Exception):
+                    log.error(f"Error processing document {i + idx}: {str(result)}")
+                    raise RuntimeError(f"Error during reranking: {result}")
+                score = result["relevance_score"]
+                ranked_results.append((batch[idx][1], score))
+                # Check if we should stop early
+                try:
+                    high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
+                    can_stop_early = (
+                        self.early_stop  # Early stopping is enabled
+                        and self.num_docs_to_keep  # We have a target number of docs
+                        and len(high_scoring_docs) >= self.num_docs_to_keep  # Found enough good docs
+                        and score >= self.early_stop_threshold  # Current doc is good enough
+                    )
+                    if can_stop_early:
+                        log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
+                        return ranked_results
+                except Exception as e:
+                    # Don't let early stopping errors stop the whole process
+                    log.warning(f"Error in early stopping check: {str(e)}")
         return ranked_results
     async def _backoff_wrapper(self, query: str, document: str, rerank_callback=None) -> Any:

mindsdb/integrations/utilities/sql_utils.py CHANGED Viewed

@@ -60,6 +60,17 @@ class FilterCondition:
         """
+class KeywordSearchArgs:
+    def __init__(self, column: str, query: str):
+        """
+        Args:
+            column: The column to search in.
+            query: The search query string.
+        """
+        self.column = column
+        self.query = query
 class SortColumn:
     def __init__(self, column: str, ascending: bool = True):
         self.column = column

MindsDB 25.6.4.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.6.4.0py3-none-any.whl → 25.7.2.0py3-none-any.whl