PyPI - MindsDB - Versions diffs - 25.6.4.0__py3-none-any.whl → 25.7.1.0__py3-none-any.whl - Mend

MindsDB 25.6.4.0py3-none-any.whl → 25.7.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (46) hide show

mindsdb/integrations/libs/vectordatabase_handler.py CHANGED Viewed

@@ -2,6 +2,7 @@ import ast
 import hashlib
 from enum import Enum
 from typing import Dict, List, Optional
+import datetime as dt
 import pandas as pd
 from mindsdb_sql_parser.ast import (
@@ -28,6 +29,9 @@ from .base import BaseHandler
 LOG = log.getLogger(__name__)
+class VectorHandlerException(Exception): ...
 class TableField(Enum):
     """
     Enum for table fields.
@@ -43,9 +47,9 @@ class TableField(Enum):
 class DistanceFunction(Enum):
-    SQUARED_EUCLIDEAN_DISTANCE = '<->',
-    NEGATIVE_DOT_PRODUCT = '<#>',
-    COSINE_DISTANCE = '<=>'
+    SQUARED_EUCLIDEAN_DISTANCE = ("<->",)
+    NEGATIVE_DOT_PRODUCT = ("<#>",)
+    COSINE_DISTANCE = "<=>"
 class VectorStoreHandler(BaseHandler):
@@ -118,9 +122,7 @@ class VectorStoreHandler(BaseHandler):
                         right_hand = [item.value for item in node.args[1].items]
                     else:
                         raise Exception(f"Unsupported right hand side: {node.args[1]}")
-                    conditions.append(
-                        FilterCondition(column=left_hand, op=op, value=right_hand)
-                    )
+                    conditions.append(FilterCondition(column=left_hand, op=op, value=right_hand))
             query_traversal(where_statement, _extract_comparison_conditions)
@@ -129,15 +131,23 @@ class VectorStoreHandler(BaseHandler):
         return conditions
-    def _convert_metadata_filters(self, conditions):
+    def _convert_metadata_filters(self, conditions, allowed_metadata_columns=None):
         if conditions is None:
             return
         # try to treat conditions that are not in TableField as metadata conditions
         for condition in conditions:
-            if not self._is_condition_allowed(condition):
-                condition.column = (
-                    TableField.METADATA.value + "." + condition.column
-                )
+            if self._is_metadata_condition(condition):
+                # check restriction
+                if allowed_metadata_columns is not None:
+                    # system columns are underscored, skip them
+                    if condition.column.lower() not in allowed_metadata_columns and not condition.column.startswith(
+                        "_"
+                    ):
+                        raise ValueError(f"Column is not found: {condition.column}")
+                # convert if required
+                if not condition.column.startswith(TableField.METADATA.value):
+                    condition.column = TableField.METADATA.value + "." + condition.column
     def _is_columns_allowed(self, columns: List[str]) -> bool:
         """
@@ -146,16 +156,11 @@ class VectorStoreHandler(BaseHandler):
         allowed_columns = set([col["name"] for col in self.SCHEMA])
         return set(columns).issubset(allowed_columns)
-    def _is_condition_allowed(self, condition: FilterCondition) -> bool:
+    def _is_metadata_condition(self, condition: FilterCondition) -> bool:
         allowed_field_values = set([field.value for field in TableField])
         if condition.column in allowed_field_values:
-            return True
-        else:
-            # check if column is a metadata column
-            if condition.column.startswith(TableField.METADATA.value):
-                return True
-            else:
-                return False
+            return False
+        return True
     def _dispatch_create_table(self, query: CreateTable):
         """
@@ -184,17 +189,12 @@ class VectorStoreHandler(BaseHandler):
         columns = [column.name for column in query.columns]
         if not self._is_columns_allowed(columns):
-            raise Exception(
-                f"Columns {columns} not allowed."
-                f"Allowed columns are {[col['name'] for col in self.SCHEMA]}"
-            )
+            raise Exception(f"Columns {columns} not allowed.Allowed columns are {[col['name'] for col in self.SCHEMA]}")
         # get content column if it is present
         if TableField.CONTENT.value in columns:
             content_col_index = columns.index("content")
-            content = [
-                self._value_or_self(row[content_col_index]) for row in query.values
-            ]
+            content = [self._value_or_self(row[content_col_index]) for row in query.values]
         else:
             content = None
@@ -209,19 +209,13 @@ class VectorStoreHandler(BaseHandler):
         # get embeddings column if it is present
         if TableField.EMBEDDINGS.value in columns:
             embeddings_col_index = columns.index("embeddings")
-            embeddings = [
-                ast.literal_eval(self._value_or_self(row[embeddings_col_index]))
-                for row in query.values
-            ]
+            embeddings = [ast.literal_eval(self._value_or_self(row[embeddings_col_index])) for row in query.values]
         else:
             raise Exception("Embeddings column is required!")
         if TableField.METADATA.value in columns:
             metadata_col_index = columns.index("metadata")
-            metadata = [
-                ast.literal_eval(self._value_or_self(row[metadata_col_index]))
-                for row in query.values
-            ]
+            metadata = [ast.literal_eval(self._value_or_self(row[metadata_col_index])) for row in query.values]
         else:
             metadata = None
@@ -277,6 +271,15 @@ class VectorStoreHandler(BaseHandler):
         return self.do_upsert(table_name, df)
+    def set_metadata_cur_time(self, df, col_name):
+        metadata_col = TableField.METADATA.value
+        cur_date = dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        def set_time(meta):
+            meta[col_name] = cur_date
+        df[metadata_col].apply(set_time)
     def do_upsert(self, table_name, df):
         """Upsert data into table, handling document updates and deletions.
@@ -289,6 +292,7 @@ class VectorStoreHandler(BaseHandler):
         2. Updated documents: Delete old chunks and insert new ones
         """
         id_col = TableField.ID.value
+        metadata_col = TableField.METADATA.value
         content_col = TableField.CONTENT.value
         def gen_hash(v):
@@ -309,37 +313,48 @@ class VectorStoreHandler(BaseHandler):
         # id is string TODO is it ok?
         df[id_col] = df[id_col].apply(str)
-        if hasattr(self, 'upsert'):
+        # set updated_at
+        self.set_metadata_cur_time(df, "_updated_at")
+        if hasattr(self, "upsert"):
             self.upsert(table_name, df)
             return
         # find existing ids
-        res = self.select(
+        df_existed = self.select(
             table_name,
-            columns=[id_col],
-            conditions=[
-                FilterCondition(column=id_col, op=FilterOperator.IN, value=list(df[id_col]))
-            ]
+            columns=[id_col, metadata_col],
+            conditions=[FilterCondition(column=id_col, op=FilterOperator.IN, value=list(df[id_col]))],
         )
-        existed_ids = list(res[id_col])
+        existed_ids = list(df_existed[id_col])
         # update existed
         df_update = df[df[id_col].isin(existed_ids)]
         df_insert = df[~df[id_col].isin(existed_ids)]
         if not df_update.empty:
+            # get values of existed `created_at` and return them to metadata
+            created_dates = {row[id_col]: row[metadata_col].get("_created_at") for _, row in df_existed.iterrows()}
+            def keep_created_at(row):
+                val = created_dates.get(row[id_col])
+                if val:
+                    row[metadata_col]["_created_at"] = val
+                return row
+            df_update.apply(keep_created_at, axis=1)
             try:
                 self.update(table_name, df_update, [id_col])
             except NotImplementedError:
                 # not implemented? do it with delete and insert
-                conditions = [FilterCondition(
-                    column=id_col,
-                    op=FilterOperator.IN,
-                    value=list(df[id_col])
-                )]
+                conditions = [FilterCondition(column=id_col, op=FilterOperator.IN, value=list(df[id_col]))]
                 self.delete(table_name, conditions)
                 self.insert(table_name, df_update)
         if not df_insert.empty:
+            # set created_at
+            self.set_metadata_cur_time(df_insert, "_created_at")
             self.insert(table_name, df_insert)
     def dispatch_delete(self, query: Delete, conditions: List[FilterCondition] = None):
@@ -356,7 +371,9 @@ class VectorStoreHandler(BaseHandler):
         # dispatch delete
         return self.delete(table_name, conditions=conditions)
-    def dispatch_select(self, query: Select, conditions: List[FilterCondition] = None):
+    def dispatch_select(
+        self, query: Select, conditions: List[FilterCondition] = None, allowed_metadata_columns: List[str] = None
+    ):
         """
         Dispatch select query to the appropriate method.
         """
@@ -369,29 +386,30 @@ class VectorStoreHandler(BaseHandler):
             columns = [col.parts[-1] for col in query.targets]
         if not self._is_columns_allowed(columns):
-            raise Exception(
-                f"Columns {columns} not allowed."
-                f"Allowed columns are {[col['name'] for col in self.SCHEMA]}"
-            )
+            raise Exception(f"Columns {columns} not allowed.Allowed columns are {[col['name'] for col in self.SCHEMA]}")
         # check if columns are allowed
         if conditions is None:
             where_statement = query.where
             conditions = self.extract_conditions(where_statement)
-        self._convert_metadata_filters(conditions)
+        self._convert_metadata_filters(conditions, allowed_metadata_columns=allowed_metadata_columns)
         # get offset and limit
         offset = query.offset.value if query.offset is not None else None
         limit = query.limit.value if query.limit is not None else None
         # dispatch select
-        return self.select(
-            table_name,
-            columns=columns,
-            conditions=conditions,
-            offset=offset,
-            limit=limit,
-        )
+        try:
+            return self.select(
+                table_name,
+                columns=columns,
+                conditions=conditions,
+                offset=offset,
+                limit=limit,
+            )
+        except Exception as e:
+            handler_engine = self.__class__.name
+            raise VectorHandlerException(f"Error in {handler_engine} database: {e}")
     def _dispatch(self, query: ASTNode) -> HandlerResponse:
         """
@@ -408,10 +426,7 @@ class VectorStoreHandler(BaseHandler):
         if type(query) in dispatch_router:
             resp = dispatch_router[type(query)](query)
             if resp is not None:
-                return HandlerResponse(
-                    resp_type=RESPONSE_TYPE.TABLE,
-                    data_frame=resp
-                )
+                return HandlerResponse(resp_type=RESPONSE_TYPE.TABLE, data_frame=resp)
             else:
                 return HandlerResponse(resp_type=RESPONSE_TYPE.OK)
@@ -455,9 +470,7 @@ class VectorStoreHandler(BaseHandler):
         """
         raise NotImplementedError()
-    def insert(
-        self, table_name: str, data: pd.DataFrame
-    ) -> HandlerResponse:
+    def insert(self, table_name: str, data: pd.DataFrame) -> HandlerResponse:
         """Insert data into table
         Args:
@@ -470,9 +483,7 @@ class VectorStoreHandler(BaseHandler):
         """
         raise NotImplementedError()
-    def update(
-        self, table_name: str, data: pd.DataFrame, key_columns: List[str] = None
-    ):
+    def update(self, table_name: str, data: pd.DataFrame, key_columns: List[str] = None):
         """Update data in table
         Args:
@@ -485,9 +496,7 @@ class VectorStoreHandler(BaseHandler):
         """
         raise NotImplementedError()
-    def delete(
-        self, table_name: str, conditions: List[FilterCondition] = None
-    ) -> HandlerResponse:
+    def delete(self, table_name: str, conditions: List[FilterCondition] = None) -> HandlerResponse:
         """Delete data from table
         Args:
@@ -535,9 +544,9 @@ class VectorStoreHandler(BaseHandler):
         query: str = None,
         metadata: Dict[str, str] = None,
         distance_function=DistanceFunction.COSINE_DISTANCE,
-        **kwargs
+        **kwargs,
     ) -> pd.DataFrame:
-        '''
+        """
         Executes a hybrid search, combining semantic search and one or both of keyword/metadata search.
         For insight on the query construction, see: https://docs.pgvecto.rs/use-case/hybrid-search.html#advanced-search-merge-the-results-of-full-text-search-and-vector-search.
@@ -551,11 +560,11 @@ class VectorStoreHandler(BaseHandler):
         Returns:
             df(pd.DataFrame): Hybrid search result, sorted by hybrid search rank
-        '''
-        raise NotImplementedError(f'Hybrid search not supported for VectorStoreHandler {self.name}')
+        """
+        raise NotImplementedError(f"Hybrid search not supported for VectorStoreHandler {self.name}")
     def create_index(self, *args, **kwargs):
         """
         Create an index on the specified table.
         """
-        raise NotImplementedError(f'create_index not supported for VectorStoreHandler {self.name}')
+        raise NotImplementedError(f"create_index not supported for VectorStoreHandler {self.name}")

mindsdb/integrations/utilities/rag/rerankers/base_reranker.py CHANGED Viewed

@@ -33,7 +33,7 @@ class BaseLLMReranker(BaseModel, ABC):
     client: Optional[AsyncOpenAI | BaseMLEngine] = None
     _semaphore: Optional[asyncio.Semaphore] = None
     max_concurrent_requests: int = 20
-    max_retries: int = 3
+    max_retries: int = 2
     retry_delay: float = 1.0
     request_timeout: float = 20.0  # Timeout for API requests
     early_stop: bool = True  # Whether to enable early stopping
@@ -100,7 +100,7 @@ class BaseLLMReranker(BaseModel, ABC):
             if self.api_key is not None:
                 kwargs["api_key"] = self.api_key
-            return await self.client.acompletion(model=f"{self.provider}/{self.model}", messages=messages, args=kwargs)
+            return await self.client.acompletion(self.provider, model=self.model, messages=messages, args=kwargs)
     async def _rank(self, query_document_pairs: List[Tuple[str, str]], rerank_callback=None) -> List[Tuple[str, float]]:
         ranked_results = []
@@ -109,47 +109,41 @@ class BaseLLMReranker(BaseModel, ABC):
         batch_size = min(self.max_concurrent_requests * 2, len(query_document_pairs))
         for i in range(0, len(query_document_pairs), batch_size):
             batch = query_document_pairs[i : i + batch_size]
-            try:
-                results = await asyncio.gather(
-                    *[
-                        self._backoff_wrapper(query=query, document=document, rerank_callback=rerank_callback)
-                        for (query, document) in batch
-                    ],
-                    return_exceptions=True,
-                )
-                for idx, result in enumerate(results):
-                    if isinstance(result, Exception):
-                        log.error(f"Error processing document {i + idx}: {str(result)}")
-                        ranked_results.append((batch[idx][1], 0.0))
-                        continue
-                    score = result["relevance_score"]
-                    ranked_results.append((batch[idx][1], score))
-                    # Check if we should stop early
-                    try:
-                        high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
-                        can_stop_early = (
-                            self.early_stop  # Early stopping is enabled
-                            and self.num_docs_to_keep  # We have a target number of docs
-                            and len(high_scoring_docs) >= self.num_docs_to_keep  # Found enough good docs
-                            and score >= self.early_stop_threshold  # Current doc is good enough
-                        )
-                        if can_stop_early:
-                            log.info(
-                                f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence"
-                            )
-                            return ranked_results
-                    except Exception as e:
-                        # Don't let early stopping errors stop the whole process
-                        log.warning(f"Error in early stopping check: {str(e)}")
-            except Exception as e:
-                log.error(f"Batch processing error: {str(e)}")
-                continue
+            results = await asyncio.gather(
+                *[
+                    self._backoff_wrapper(query=query, document=document, rerank_callback=rerank_callback)
+                    for (query, document) in batch
+                ],
+                return_exceptions=True,
+            )
+            for idx, result in enumerate(results):
+                if isinstance(result, Exception):
+                    log.error(f"Error processing document {i + idx}: {str(result)}")
+                    raise RuntimeError(f"Error during reranking: {result}")
+                score = result["relevance_score"]
+                ranked_results.append((batch[idx][1], score))
+                # Check if we should stop early
+                try:
+                    high_scoring_docs = [r for r in ranked_results if r[1] >= self.filtering_threshold]
+                    can_stop_early = (
+                        self.early_stop  # Early stopping is enabled
+                        and self.num_docs_to_keep  # We have a target number of docs
+                        and len(high_scoring_docs) >= self.num_docs_to_keep  # Found enough good docs
+                        and score >= self.early_stop_threshold  # Current doc is good enough
+                    )
+                    if can_stop_early:
+                        log.info(f"Early stopping after finding {self.num_docs_to_keep} documents with high confidence")
+                        return ranked_results
+                except Exception as e:
+                    # Don't let early stopping errors stop the whole process
+                    log.warning(f"Error in early stopping check: {str(e)}")
         return ranked_results
     async def _backoff_wrapper(self, query: str, document: str, rerank_callback=None) -> Any:

mindsdb/interfaces/agents/agents_controller.py CHANGED Viewed

@@ -160,7 +160,7 @@ class AgentsController:
         Parameters:
             name (str): The name of the new agent
             project_name (str): The containing project
-            model_name (str): The name of the existing ML model the agent will use
+            model_name (str | dict): The name of the existing ML model the agent will use
             skills (List[Union[str, dict]]): List of existing skill names to add to the new agent, or list of dicts
                  with one of keys is "name", and other is additional parameters for relationship agent<>skill
             provider (str): The provider of the model
@@ -172,6 +172,9 @@ class AgentsController:
                 include_knowledge_bases: List of knowledge bases to include for text2sql skills
                 ignore_knowledge_bases: List of knowledge bases to ignore for text2sql skills
                 <provider>_api_key: API key for the provider (e.g., openai_api_key)
+                data: Dict, data sources for an agent, keys:
+                  - knowledge_bases: List of KBs to use (alternative to `include_knowledge_bases`)
+                  - tables: list of tables to use (alternative to `include_tables`)
         Returns:
             agent (db.Agents): The created agent
@@ -188,12 +191,17 @@ class AgentsController:
         if agent is not None:
             raise ValueError(f"Agent with name already exists: {name}")
-        if model_name is not None:
-            _, provider = self.check_model_provider(model_name, provider)
         # No need to copy params since we're not preserving the original reference
         params = params or {}
+        if isinstance(model_name, dict):
+            # move into params
+            params["model"] = model_name
+            model_name = None
+        if model_name is not None:
+            _, provider = self.check_model_provider(model_name, provider)
         if model_name is None:
             logger.warning("'model_name' param is not provided. Using default global llm model at runtime.")
@@ -230,6 +238,12 @@ class AgentsController:
         if "database" in params or need_params:
             params["database"] = database
+        if "data" in params:
+            if include_knowledge_bases is None:
+                include_knowledge_bases = params["data"].get("knowledge_bases")
+            if include_tables is None:
+                include_tables = params["data"].get("tables")
         if "knowledge_base_database" in params or include_knowledge_bases or ignore_knowledge_bases:
             params["knowledge_base_database"] = knowledge_base_database
@@ -549,13 +563,19 @@ class AgentsController:
         agent.deleted_at = datetime.datetime.now()
         db.session.commit()
-    def get_agent_llm_params(self, model_params: dict):
+    def get_agent_llm_params(self, agent_params: dict):
         """
         Get agent LLM parameters by combining default config with user provided parameters.
         Similar to how knowledge bases handle default parameters.
         """
         combined_model_params = copy.deepcopy(config.get("default_llm", {}))
+        if "model" in agent_params:
+            model_params = agent_params["model"]
+        else:
+            # params for LLM can be arbitrary
+            model_params = agent_params
         if model_params:
             combined_model_params.update(model_params)
@@ -596,9 +616,9 @@ class AgentsController:
             db.session.commit()
         # Get agent parameters and combine with default LLM parameters at runtime
-        agent_params = self.get_agent_llm_params(agent.params)
+        llm_params = self.get_agent_llm_params(agent.params)
-        lang_agent = LangchainAgent(agent, model, params=agent_params)
+        lang_agent = LangchainAgent(agent, model, llm_params=llm_params)
         return lang_agent.get_completion(messages)
     def _get_completion_stream(
@@ -636,7 +656,7 @@ class AgentsController:
             db.session.commit()
         # Get agent parameters and combine with default LLM parameters at runtime
-        agent_params = self.get_agent_llm_params(agent.params)
+        llm_params = self.get_agent_llm_params(agent.params)
-        lang_agent = LangchainAgent(agent, model=model, params=agent_params)
+        lang_agent = LangchainAgent(agent, model=model, llm_params=llm_params)
         return lang_agent.get_completion(messages, stream=True)

mindsdb/interfaces/agents/langchain_agent.py CHANGED Viewed

@@ -228,7 +228,7 @@ def process_chunk(chunk):
 class LangchainAgent:
-    def __init__(self, agent: db.Agents, model: dict = None, params: dict = None):
+    def __init__(self, agent: db.Agents, model: dict = None, llm_params: dict = None):
         self.agent = agent
         self.model = model
@@ -241,12 +241,12 @@ class LangchainAgent:
         self.mdb_langfuse_callback_handler: Optional[object] = None  # custom (see langfuse_callback_handler.py)
         self.langfuse_client_wrapper = LangfuseClientWrapper()
-        self.args = self._initialize_args(params)
+        self.args = self._initialize_args(llm_params)
         # Back compatibility for old models
         self.provider = self.args.get("provider", get_llm_provider(self.args))
-    def _initialize_args(self, params: dict = None) -> dict:
+    def _initialize_args(self, llm_params: dict = None) -> dict:
         """
         Initialize the arguments for agent execution.
@@ -254,14 +254,16 @@ class LangchainAgent:
         The params are already merged with defaults by AgentsController.get_agent_llm_params.
         Args:
-            params: Parameters for agent execution (already merged with defaults)
+            llm_params: Parameters for agent execution (already merged with defaults)
         Returns:
             dict: Final parameters for agent execution
         """
         # Use the parameters passed to the method (already merged with defaults by AgentsController)
         # No fallback needed as AgentsController.get_agent_llm_params already handles this
-        args = params.copy() if params else {}
+        args = self.agent.params.copy()
+        if llm_params:
+            args.update(llm_params)
         # Set model name and provider if given in create agent otherwise use global llm defaults
         # AgentsController.get_agent_llm_params

mindsdb/interfaces/agents/mcp_client_agent.py CHANGED Viewed

@@ -71,11 +71,11 @@ class MCPLangchainAgent(LangchainAgent):
         self,
         agent: db.Agents,
         model: dict = None,
-        params: dict = None,
+        llm_params: dict = None,
         mcp_host: str = "127.0.0.1",
         mcp_port: int = 47337,
     ):
-        super().__init__(agent, model, params)
+        super().__init__(agent, model, llm_params)
         self.mcp_host = mcp_host
         self.mcp_port = mcp_port
         self.exit_stack = AsyncExitStack()
@@ -251,10 +251,10 @@ def create_mcp_agent(
         raise ValueError(f"Agent {agent_name} not found in project {project_name}")
     # Get merged parameters (defaults + agent params)
-    merged_params = agent_controller.get_agent_llm_params(agent_db.params)
+    llm_params = agent_controller.get_agent_llm_params(agent_db.params)
     # Create MCP agent with merged parameters
-    mcp_agent = MCPLangchainAgent(agent_db, params=merged_params, mcp_host=mcp_host, mcp_port=mcp_port)
+    mcp_agent = MCPLangchainAgent(agent_db, llm_params=llm_params, mcp_host=mcp_host, mcp_port=mcp_port)
     # Wrap for LiteLLM compatibility
     return LiteLLMAgentWrapper(mcp_agent)

MindsDB 25.6.4.0__py3-none-any.whl → 25.7.1.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.6.4.0py3-none-any.whl → 25.7.1.0py3-none-any.whl