PyPI - MindsDB - Versions diffs - 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl - Mend

MindsDB 25.9.2.0a1py3-none-any.whl → 25.9.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (116) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +39 -20
mindsdb/api/a2a/agent.py +7 -9
mindsdb/api/a2a/common/server/server.py +3 -3
mindsdb/api/a2a/common/server/task_manager.py +4 -4
mindsdb/api/a2a/task_manager.py +15 -17
mindsdb/api/common/middleware.py +9 -11
mindsdb/api/executor/command_executor.py +2 -4
mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
mindsdb/api/executor/exceptions.py +29 -10
mindsdb/api/executor/planner/plan_join.py +17 -3
mindsdb/api/executor/sql_query/sql_query.py +74 -74
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
mindsdb/api/executor/utilities/functions.py +6 -6
mindsdb/api/executor/utilities/sql.py +32 -16
mindsdb/api/http/gui.py +5 -11
mindsdb/api/http/initialize.py +8 -10
mindsdb/api/http/namespaces/agents.py +10 -12
mindsdb/api/http/namespaces/analysis.py +13 -20
mindsdb/api/http/namespaces/auth.py +1 -1
mindsdb/api/http/namespaces/config.py +15 -11
mindsdb/api/http/namespaces/databases.py +140 -201
mindsdb/api/http/namespaces/file.py +15 -4
mindsdb/api/http/namespaces/handlers.py +7 -2
mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
mindsdb/api/http/namespaces/models.py +94 -126
mindsdb/api/http/namespaces/projects.py +13 -22
mindsdb/api/http/namespaces/sql.py +33 -25
mindsdb/api/http/namespaces/tab.py +27 -37
mindsdb/api/http/namespaces/views.py +1 -1
mindsdb/api/http/start.py +14 -8
mindsdb/api/mcp/__init__.py +2 -1
mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +13 -1
mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
mindsdb/integrations/libs/api_handler.py +10 -10
mindsdb/integrations/libs/base.py +4 -4
mindsdb/integrations/libs/llm/utils.py +2 -2
mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
mindsdb/integrations/libs/process_cache.py +132 -140
mindsdb/integrations/libs/response.py +18 -12
mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
mindsdb/integrations/utilities/files/file_reader.py +6 -7
mindsdb/integrations/utilities/rag/config_loader.py +37 -26
mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
mindsdb/integrations/utilities/rag/settings.py +58 -133
mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
mindsdb/interfaces/agents/agents_controller.py +2 -1
mindsdb/interfaces/agents/constants.py +0 -2
mindsdb/interfaces/agents/litellm_server.py +34 -58
mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
mindsdb/interfaces/chatbot/polling.py +30 -18
mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
mindsdb/interfaces/database/integrations.py +19 -2
mindsdb/interfaces/file/file_controller.py +6 -6
mindsdb/interfaces/functions/controller.py +1 -1
mindsdb/interfaces/functions/to_markdown.py +2 -2
mindsdb/interfaces/jobs/jobs_controller.py +5 -5
mindsdb/interfaces/jobs/scheduler.py +3 -8
mindsdb/interfaces/knowledge_base/controller.py +50 -23
mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
mindsdb/interfaces/model/model_controller.py +170 -166
mindsdb/interfaces/query_context/context_controller.py +14 -2
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
mindsdb/interfaces/skills/retrieval_tool.py +43 -50
mindsdb/interfaces/skills/skill_tool.py +2 -2
mindsdb/interfaces/skills/sql_agent.py +25 -19
mindsdb/interfaces/storage/fs.py +114 -169
mindsdb/interfaces/storage/json.py +19 -18
mindsdb/interfaces/tabs/tabs_controller.py +49 -72
mindsdb/interfaces/tasks/task_monitor.py +3 -9
mindsdb/interfaces/tasks/task_thread.py +7 -9
mindsdb/interfaces/triggers/trigger_task.py +7 -13
mindsdb/interfaces/triggers/triggers_controller.py +47 -50
mindsdb/migrations/migrate.py +16 -16
mindsdb/utilities/api_status.py +58 -0
mindsdb/utilities/config.py +49 -0
mindsdb/utilities/exception.py +40 -1
mindsdb/utilities/fs.py +0 -1
mindsdb/utilities/hooks/profiling.py +17 -14
mindsdb/utilities/langfuse.py +40 -45
mindsdb/utilities/log.py +272 -0
mindsdb/utilities/ml_task_queue/consumer.py +52 -58
mindsdb/utilities/ml_task_queue/producer.py +26 -30
mindsdb/utilities/render/sqlalchemy_render.py +7 -6
mindsdb/utilities/utils.py +2 -2
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +269 -264
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +115 -115
mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0

mindsdb/interfaces/skills/retrieval_tool.py CHANGED Viewed

@@ -1,32 +1,32 @@
-import traceback
+from langchain_core.documents import Document
+from langchain_core.tools import Tool
 from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
 from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
 from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel
 from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
+from mindsdb.integrations.libs.response import RESPONSE_TYPE
+from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
+    construct_model_from_args,
+)
 from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
 from mindsdb.interfaces.skills.skill_tool import skill_tool
 from mindsdb.interfaces.storage import db
 from mindsdb.interfaces.storage.db import KnowledgeBase
 from mindsdb.utilities import log
-from langchain_core.documents import Document
-from langchain_core.tools import Tool
-from mindsdb.integrations.libs.response import RESPONSE_TYPE
-from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
 logger = log.getLogger(__name__)
 def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipelineModel:
-    tools_config = tool['config']
+    tools_config = tool["config"]
     tools_config.update(pred_args)
     kb_params = {}
     embeddings_model = None
-    if 'source' in tool:
-        kb_name = tool['source']
+    if "source" in tool:
+        kb_name = tool["source"]
         executor = skill_tool.get_command_executor()
         kb = _get_knowledge_base(kb_name, skill.project_id, executor)
@@ -34,30 +34,26 @@ def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipeli
             raise ValueError(f"Knowledge base not found: {kb_name}")
         kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
-        vector_store_config = {
-            'kb_table': kb_table
-        }
-        is_sparse = tools_config.pop('is_sparse', None)
-        vector_size = tools_config.pop('vector_size', None)
+        vector_store_config = {"kb_table": kb_table}
+        is_sparse = tools_config.pop("is_sparse", None)
+        vector_size = tools_config.pop("vector_size", None)
         if is_sparse is not None:
-            vector_store_config['is_sparse'] = is_sparse
+            vector_store_config["is_sparse"] = is_sparse
         if vector_size is not None:
-            vector_store_config['vector_size'] = vector_size
-        kb_params = {
-            'vector_store_config': vector_store_config
-        }
+            vector_store_config["vector_size"] = vector_size
+        kb_params = {"vector_store_config": vector_store_config}
         # Get embedding model from knowledge base table
         if kb_table._kb.embedding_model:
             # Extract embedding model args from knowledge base table
-            embedding_args = kb_table._kb.embedding_model.learn_args.get('using', {})
+            embedding_args = kb_table._kb.embedding_model.learn_args.get("using", {})
             # Construct the embedding model directly
             embeddings_model = construct_model_from_args(embedding_args)
             logger.debug(f"Using knowledge base embedding model with args: {embedding_args}")
         else:
             embeddings_model = DEFAULT_EMBEDDINGS_MODEL_CLASS()
             logger.debug("Using default embedding model as knowledge base has no embedding model")
-    elif 'embedding_model' not in tools_config:
+    elif "embedding_model" not in tools_config:
         embeddings_model = DEFAULT_EMBEDDINGS_MODEL_CLASS()
         logger.debug("Using default embedding model as no knowledge base provided")
@@ -75,29 +71,28 @@ def _build_rag_pipeline_tool(tool: dict, pred_args: dict, skill: db.Skills):
         try:
             result = rag_pipeline(query)
             logger.debug(f"RAG pipeline result: {result}")
-            return result['answer']
+            return result["answer"]
         except Exception as e:
-            logger.error(f"Error in RAG pipeline: {str(e)}")
-            logger.error(traceback.format_exc())
+            logger.exception("Error in RAG pipeline:")
             return f"Error in retrieval: {str(e)}"
     # Create RAG tool
-    tools_config = tool['config']
+    tools_config = tool["config"]
     tools_config.update(pred_args)
     return Tool(
         func=rag_wrapper,
-        name=tool['name'],
-        description=tool['description'],
-        response_format='content',
+        name=tool["name"],
+        description=tool["description"],
+        response_format="content",
         # Return directly by default since we already use an LLM against retrieved context to generate a response.
-        return_direct=tools_config.get('return_direct', True)
+        return_direct=tools_config.get("return_direct", True),
     )
 def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
-    if 'source' not in tool:
+    if "source" not in tool:
         raise ValueError("Knowledge base for tool not found")
-    kb_name = tool['source']
+    kb_name = tool["source"]
     executor = skill_tool.get_command_executor()
     kb = _get_knowledge_base(kb_name, skill.project_id, executor)
     if not kb:
@@ -110,16 +105,16 @@ def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
     def _get_document_by_name(name: str):
         if metadata_config.name_column_index is not None:
-            tsquery_str = ' & '.join(name.split(' '))
+            tsquery_str = " & ".join(name.split(" "))
             documents_response = vector_db_handler.native_query(
-                f'SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery(\'{tsquery_str}\') LIMIT 1;'
+                f"SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery('{tsquery_str}') LIMIT 1;"
             )
         else:
             documents_response = vector_db_handler.native_query(
-                f'SELECT * FROM {metadata_config.table} WHERE "{metadata_config.name_column}" ILIKE \'%{name}%\' LIMIT 1;'
+                f"SELECT * FROM {metadata_config.table} WHERE \"{metadata_config.name_column}\" ILIKE '%{name}%' LIMIT 1;"
             )
         if documents_response.resp_type == RESPONSE_TYPE.ERROR:
-            raise RuntimeError(f'There was an error looking up documents: {documents_response.error_message}')
+            raise RuntimeError(f"There was an error looking up documents: {documents_response.error_message}")
         if documents_response.data_frame.empty:
             return None
         document_row = documents_response.data_frame.head(1)
@@ -127,38 +122,36 @@ def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
         id_filter_condition = FilterCondition(
             f"{metadata_config.embeddings_metadata_column}->>'{metadata_config.doc_id_key}'",
             FilterOperator.EQUAL,
-            str(document_row.get(metadata_config.id_column).item())
+            str(document_row.get(metadata_config.id_column).item()),
         )
         document_chunks_df = vector_db_handler.select(
-            metadata_config.embeddings_table,
-            conditions=[id_filter_condition]
+            metadata_config.embeddings_table, conditions=[id_filter_condition]
         )
         if document_chunks_df.empty:
             return None
-        sort_col = 'chunk_id' if 'chunk_id' in document_chunks_df.columns else 'id'
+        sort_col = "chunk_id" if "chunk_id" in document_chunks_df.columns else "id"
         document_chunks_df.sort_values(by=sort_col)
-        content = ''
+        content = ""
         for _, chunk in document_chunks_df.iterrows():
             if len(content) > metadata_config.max_document_context:
                 break
-            content += chunk.get(metadata_config.content_column, '')
+            content += chunk.get(metadata_config.content_column, "")
-        return Document(
-            page_content=content,
-            metadata=document_row.to_dict(orient='records')[0]
-        )
+        return Document(page_content=content, metadata=document_row.to_dict(orient="records")[0])
     def _lookup_document_by_name(name: str):
         found_document = _get_document_by_name(name)
         if found_document is None:
-            return f'I could not find any document with name {name}. Please make sure the document name matches exactly.'
+            return (
+                f"I could not find any document with name {name}. Please make sure the document name matches exactly."
+            )
         return f"I found document {found_document.metadata.get(metadata_config.id_column)} with name {found_document.metadata.get(metadata_config.name_column)}. Here is the full document to use as context:\n\n{found_document.page_content}"
     return Tool(
         func=_lookup_document_by_name,
-        name=tool.get('name', '') + '_name_lookup',
-        description='You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.',
-        return_direct=False
+        name=tool.get("name", "") + "_name_lookup",
+        description="You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.",
+        return_direct=False,
     )
@@ -181,7 +174,7 @@ def build_retrieval_tools(tool: dict, pred_args: dict, skill: db.Skills):
     try:
         rag_config = _load_rag_config(tool, pred_args, skill)
     except Exception as e:
-        logger.error(f"Error building RAG pipeline: {str(e)}")
+        logger.exception("Error building RAG pipeline:")
         raise ValueError(f"Failed to build RAG pipeline: {str(e)}")
     tools = [_build_rag_pipeline_tool(tool, pred_args, skill)]
     if rag_config.metadata_config is None:

mindsdb/interfaces/skills/skill_tool.py CHANGED Viewed

@@ -274,8 +274,8 @@ class SkillToolController:
                     else:
                         for table_name in response.data_frame.iloc[:, name_idx]:
                             tables_list.append(f"{database}.{escape_table_name(table_name)}")
-                except Exception as e:
-                    logger.warning(f"Could not get tables from database {database}: {str(e)}")
+                except Exception:
+                    logger.warning(f"Could not get tables from database {database}:", exc_info=True)
                 continue
             # Handle table restrictions

mindsdb/interfaces/skills/sql_agent.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import re
 import csv
 import inspect
-import traceback
 from io import StringIO
 from typing import Iterable, List, Optional, Any, Tuple
 from collections import defaultdict
@@ -254,8 +253,16 @@ class SQLAgent:
                             self.check_table_permission(node)
                         except ValueError as origin_exc:
                             # was it badly quoted by llm?
-                            if len(node.parts) == 1 and node.is_quoted[0] and "." in node.parts[0]:
-                                node2 = Identifier(node.parts[0])
+                            #
+                            if "." in node.parts[0]:
+                                # extract quoted parts (with dots) to sub-parts
+                                parts = []
+                                for i, item in enumerate(node.parts):
+                                    if node.is_quoted[i] and "." in item:
+                                        parts.extend(Identifier(item).parts)
+                                    else:
+                                        parts.append(item)
+                                node2 = Identifier(parts=parts)
                                 try:
                                     _check_f(node2, is_table=True)
                                     return node2
@@ -382,9 +389,9 @@ class SQLAgent:
             #     self._cache.set(cache_key, set(kb_names))
             return kb_names
-        except Exception as e:
+        except Exception:
             # If there's an error, log it and return an empty list
-            logger.error(f"Error in get_usable_knowledge_base_names: {str(e)}")
+            logger.exception("Error in get_usable_knowledge_base_names")
             return []
     def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifier]) -> List[Identifier]:
@@ -483,9 +490,9 @@ class SQLAgent:
                 # remove backticks
                 name = name.replace("`", "")
-                split = name.split(".")
-                if len(split) > 1:
-                    all_tables.append(Identifier(parts=[split[0], split[-1]]))
+                parts = name.split(".")
+                if len(parts) > 1:
+                    all_tables.append(Identifier(parts=parts))
                 else:
                     all_tables.append(Identifier(name))
@@ -526,8 +533,8 @@ class SQLAgent:
             sample_rows = list(map(lambda row: [truncate_value(value) for value in row], sample_rows))
             sample_rows_str = "\n" + f"{kb_name}:" + list_to_csv_str(sample_rows)
-        except Exception as e:
-            logger.info(f"_get_sample_rows error: {e}")
+        except Exception:
+            logger.info("_get_sample_rows error:", exc_info=True)
             sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
         return sample_rows_str
@@ -560,7 +567,7 @@ class SQLAgent:
                 )
             ]
         except Exception as e:
-            logger.error(f"Failed processing column info for {table_str}: {e}", exc_info=True)
+            logger.exception(f"Failed processing column info for {table_str}:")
             raise ValueError(f"Failed to process column info for {table_str}") from e
         if not fields:
@@ -569,8 +576,8 @@ class SQLAgent:
         try:
             sample_rows_info = self._get_sample_rows(table_str, fields)
-        except Exception as e:
-            logger.warning(f"Could not get sample rows for {table_str}: {e}")
+        except Exception:
+            logger.warning(f"Could not get sample rows for {table_str}:", exc_info=True)
             sample_rows_info = "\n\t [error] Couldn't retrieve sample rows!"
         info = f"Table named `{table_str}`:\n"
@@ -585,7 +592,7 @@ class SQLAgent:
     def _get_sample_rows(self, table: str, fields: List[str]) -> str:
         logger.info(f"_get_sample_rows: table={table} fields={fields}")
-        command = f"select {', '.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
+        command = f"select * from {table} limit {self._sample_rows_in_table_info};"
         try:
             ret = self._call_engine(command)
             sample_rows = ret.data.to_lists()
@@ -596,8 +603,8 @@ class SQLAgent:
             sample_rows = list(map(lambda row: [truncate_value(value) for value in row], sample_rows))
             sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
-        except Exception as e:
-            logger.info(f"_get_sample_rows error: {e}")
+        except Exception:
+            logger.info("_get_sample_rows error:", exc_info=True)
             sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
         return sample_rows_str
@@ -647,7 +654,7 @@ class SQLAgent:
             logger.info(f"get_table_info_safe: {table_names}")
             return self.get_table_info(table_names)
         except Exception as e:
-            logger.info(f"get_table_info_safe error: {e}")
+            logger.info("get_table_info_safe error:", exc_info=True)
             return f"Error: {e}"
     def query_safe(self, command: str, fetch: str = "all") -> str:
@@ -655,8 +662,7 @@ class SQLAgent:
             logger.info(f"query_safe (fetch={fetch}): {command}")
             return self.query(command, fetch)
         except Exception as e:
-            logger.error(f"Error in query_safe: {str(e)}\n{traceback.format_exc()}")
-            logger.info(f"query_safe error: {e}")
+            logger.exception("Error in query_safe:")
             msg = f"Error: {e}"
             if "does not exist" in msg and " relation " in msg:
                 msg += "\nAvailable tables: " + ", ".join(self.get_usable_table_names())

MindsDB 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl

Potentially problematic release.

MindsDB 25.9.2.0a1py3-none-any.whl → 25.9.3rc1py3-none-any.whl