PyPI - MindsDB - Versions diffs - 25.9.2.0a1__py3-none-any.whl → 25.10.0rc1__py3-none-any.whl - Mend

MindsDB 25.9.2.0a1py3-none-any.whl → 25.10.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (163) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +40 -29
mindsdb/api/a2a/__init__.py +1 -1
mindsdb/api/a2a/agent.py +16 -10
mindsdb/api/a2a/common/server/server.py +7 -3
mindsdb/api/a2a/common/server/task_manager.py +12 -5
mindsdb/api/a2a/common/types.py +66 -0
mindsdb/api/a2a/task_manager.py +65 -17
mindsdb/api/common/middleware.py +10 -12
mindsdb/api/executor/command_executor.py +51 -40
mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +101 -49
mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
mindsdb/api/executor/datahub/datanodes/system_tables.py +3 -2
mindsdb/api/executor/exceptions.py +29 -10
mindsdb/api/executor/planner/plan_join.py +17 -3
mindsdb/api/executor/planner/query_prepare.py +2 -20
mindsdb/api/executor/sql_query/sql_query.py +74 -74
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
mindsdb/api/executor/utilities/functions.py +6 -6
mindsdb/api/executor/utilities/sql.py +37 -20
mindsdb/api/http/gui.py +5 -11
mindsdb/api/http/initialize.py +75 -61
mindsdb/api/http/namespaces/agents.py +10 -15
mindsdb/api/http/namespaces/analysis.py +13 -20
mindsdb/api/http/namespaces/auth.py +1 -1
mindsdb/api/http/namespaces/chatbots.py +0 -5
mindsdb/api/http/namespaces/config.py +15 -11
mindsdb/api/http/namespaces/databases.py +140 -201
mindsdb/api/http/namespaces/file.py +17 -4
mindsdb/api/http/namespaces/handlers.py +17 -7
mindsdb/api/http/namespaces/knowledge_bases.py +28 -7
mindsdb/api/http/namespaces/models.py +94 -126
mindsdb/api/http/namespaces/projects.py +13 -22
mindsdb/api/http/namespaces/sql.py +33 -25
mindsdb/api/http/namespaces/tab.py +27 -37
mindsdb/api/http/namespaces/views.py +1 -1
mindsdb/api/http/start.py +16 -10
mindsdb/api/mcp/__init__.py +2 -1
mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
mindsdb/integrations/handlers/byom_handler/byom_handler.py +165 -190
mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +14 -2
mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
mindsdb/integrations/libs/api_handler.py +10 -10
mindsdb/integrations/libs/base.py +4 -4
mindsdb/integrations/libs/llm/utils.py +2 -2
mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
mindsdb/integrations/libs/process_cache.py +132 -140
mindsdb/integrations/libs/response.py +18 -12
mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
mindsdb/integrations/utilities/files/file_reader.py +6 -7
mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
mindsdb/integrations/utilities/rag/config_loader.py +37 -26
mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +83 -30
mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
mindsdb/integrations/utilities/rag/settings.py +58 -133
mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
mindsdb/interfaces/agents/agents_controller.py +2 -3
mindsdb/interfaces/agents/constants.py +0 -2
mindsdb/interfaces/agents/litellm_server.py +34 -58
mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
mindsdb/interfaces/chatbot/polling.py +30 -18
mindsdb/interfaces/data_catalog/data_catalog_loader.py +16 -17
mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
mindsdb/interfaces/database/data_handlers_cache.py +190 -0
mindsdb/interfaces/database/database.py +3 -3
mindsdb/interfaces/database/integrations.py +7 -110
mindsdb/interfaces/database/projects.py +2 -6
mindsdb/interfaces/database/views.py +1 -4
mindsdb/interfaces/file/file_controller.py +6 -6
mindsdb/interfaces/functions/controller.py +1 -1
mindsdb/interfaces/functions/to_markdown.py +2 -2
mindsdb/interfaces/jobs/jobs_controller.py +5 -9
mindsdb/interfaces/jobs/scheduler.py +3 -9
mindsdb/interfaces/knowledge_base/controller.py +244 -128
mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
mindsdb/interfaces/knowledge_base/executor.py +11 -0
mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
mindsdb/interfaces/model/model_controller.py +172 -168
mindsdb/interfaces/query_context/context_controller.py +14 -2
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +10 -14
mindsdb/interfaces/skills/retrieval_tool.py +43 -50
mindsdb/interfaces/skills/skill_tool.py +2 -2
mindsdb/interfaces/skills/skills_controller.py +1 -4
mindsdb/interfaces/skills/sql_agent.py +25 -19
mindsdb/interfaces/storage/db.py +16 -6
mindsdb/interfaces/storage/fs.py +114 -169
mindsdb/interfaces/storage/json.py +19 -18
mindsdb/interfaces/tabs/tabs_controller.py +49 -72
mindsdb/interfaces/tasks/task_monitor.py +3 -9
mindsdb/interfaces/tasks/task_thread.py +7 -9
mindsdb/interfaces/triggers/trigger_task.py +7 -13
mindsdb/interfaces/triggers/triggers_controller.py +47 -52
mindsdb/migrations/migrate.py +16 -16
mindsdb/utilities/api_status.py +58 -0
mindsdb/utilities/config.py +68 -2
mindsdb/utilities/exception.py +40 -1
mindsdb/utilities/fs.py +0 -1
mindsdb/utilities/hooks/profiling.py +17 -14
mindsdb/utilities/json_encoder.py +24 -10
mindsdb/utilities/langfuse.py +40 -45
mindsdb/utilities/log.py +272 -0
mindsdb/utilities/ml_task_queue/consumer.py +52 -58
mindsdb/utilities/ml_task_queue/producer.py +26 -30
mindsdb/utilities/render/sqlalchemy_render.py +22 -20
mindsdb/utilities/starters.py +0 -10
mindsdb/utilities/utils.py +2 -2
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/METADATA +293 -276
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/RECORD +144 -158
mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
mindsdb/api/postgres/__init__.py +0 -0
mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -189
mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -253
mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
mindsdb/api/postgres/start.py +0 -11
mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/WHEEL +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/top_level.txt +0 -0

mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py CHANGED Viewed

@@ -55,7 +55,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
             f"""\
             Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$.
             Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
-            This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
+            This system is a highly intelligent and reliable SQL skill designed to work with databases.
             Follow these instructions with utmost precision:
             1. Final Response Format:
                - Assume the frontend fully supports Markdown unless the user specifies otherwise.
@@ -73,7 +73,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
                - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
             5. Date Handling:
                - **System current date and time: {current_date_time} (UTC or local timezone based on server settings).**
-               - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
+               - **Always** use `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
                - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
                - Do not compare date values without casting columns to date.
                - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
@@ -95,6 +95,8 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
             8. Identity and Purpose:
                - When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions.
                - When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data.
+            9. Important: you can use only mysql quoting rules to compose queries: backticks (`) for identifiers, and single quotes (') for constants
             Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
         """
         )
@@ -110,7 +112,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
             "If the query is correct, it will be parsed and returned. "
             f"ALWAYS run this tool before executing a query with {query_sql_database_tool.name}. "
         )
-        mindsdb_sql_parser_tool = MindsDBSQLParserTool(
+        mindsdb_sql_parser_tool = MindsDBSQLParserTool(  # noqa: F841
             name=f"mindsdb_sql_parser_tool{prefix}", description=mindsdb_sql_parser_tool_description
         )
@@ -118,7 +120,7 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
             query_sql_database_tool,
             info_sql_database_tool,
             list_sql_database_tool,
-            mindsdb_sql_parser_tool,
+            # mindsdb_sql_parser_tool,
         ]
         if not self.include_knowledge_base_tools:
             return sql_tools
@@ -175,29 +177,23 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
                 Query Types and Examples:
                 1. Basic semantic search:
-                   kb_query_tool("SELECT * FROM kb_name WHERE content = 'your search query';")
+                   kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'your search query';")
                 2. Metadata filtering:
                    kb_query_tool("SELECT * FROM kb_name WHERE metadata_field = 'value';")
                 3. Combined search:
-                   kb_query_tool("SELECT * FROM kb_name WHERE content = 'query' AND metadata_field = 'value';")
+                   kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' AND metadata_field = 'value';")
                 4. Setting relevance threshold:
-                   kb_query_tool("SELECT * FROM kb_name WHERE content = 'query' AND relevance_threshold = 0.7;")
+                   kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' AND relevance_threshold = 0.7;")
                 5. Limiting results:
-                   kb_query_tool("SELECT * FROM kb_name WHERE content = 'query' LIMIT 5;")
+                   kb_query_tool("SELECT * FROM kb_name WHERE chunk_content = 'query' LIMIT 5;")
                 6. Getting sample data:
                    kb_query_tool("SELECT * FROM kb_name LIMIT 3;")
-                7. Don't use LIKE operator on content filter ie semantic search:
-                SELECT * FROM `test_kb` WHERE content LIKE '%population of New York%' $STOP$
-                Like is not supported, use the following instead:
-                SELECT * FROM `test_kb` WHERE content = 'population of New York'
                 Result Format:
                 - Results include: id, chunk_id, chunk_content, metadata, distance, and relevance columns
                 - The metadata column contains a JSON object with all metadata fields

mindsdb/interfaces/skills/retrieval_tool.py CHANGED Viewed

@@ -1,32 +1,32 @@
-import traceback
+from langchain_core.documents import Document
+from langchain_core.tools import Tool
 from mindsdb.integrations.utilities.rag.rag_pipeline_builder import RAG
 from mindsdb.integrations.utilities.rag.config_loader import load_rag_config
 from mindsdb.integrations.utilities.rag.settings import RAGPipelineModel
 from mindsdb.integrations.utilities.sql_utils import FilterCondition, FilterOperator
+from mindsdb.integrations.libs.response import RESPONSE_TYPE
+from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import (
+    construct_model_from_args,
+)
 from mindsdb.interfaces.agents.constants import DEFAULT_EMBEDDINGS_MODEL_CLASS
 from mindsdb.interfaces.skills.skill_tool import skill_tool
 from mindsdb.interfaces.storage import db
 from mindsdb.interfaces.storage.db import KnowledgeBase
 from mindsdb.utilities import log
-from langchain_core.documents import Document
-from langchain_core.tools import Tool
-from mindsdb.integrations.libs.response import RESPONSE_TYPE
-from mindsdb.integrations.handlers.langchain_embedding_handler.langchain_embedding_handler import construct_model_from_args
 logger = log.getLogger(__name__)
 def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipelineModel:
-    tools_config = tool['config']
+    tools_config = tool["config"]
     tools_config.update(pred_args)
     kb_params = {}
     embeddings_model = None
-    if 'source' in tool:
-        kb_name = tool['source']
+    if "source" in tool:
+        kb_name = tool["source"]
         executor = skill_tool.get_command_executor()
         kb = _get_knowledge_base(kb_name, skill.project_id, executor)
@@ -34,30 +34,26 @@ def _load_rag_config(tool: dict, pred_args: dict, skill: db.Skills) -> RAGPipeli
             raise ValueError(f"Knowledge base not found: {kb_name}")
         kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
-        vector_store_config = {
-            'kb_table': kb_table
-        }
-        is_sparse = tools_config.pop('is_sparse', None)
-        vector_size = tools_config.pop('vector_size', None)
+        vector_store_config = {"kb_table": kb_table}
+        is_sparse = tools_config.pop("is_sparse", None)
+        vector_size = tools_config.pop("vector_size", None)
         if is_sparse is not None:
-            vector_store_config['is_sparse'] = is_sparse
+            vector_store_config["is_sparse"] = is_sparse
         if vector_size is not None:
-            vector_store_config['vector_size'] = vector_size
-        kb_params = {
-            'vector_store_config': vector_store_config
-        }
+            vector_store_config["vector_size"] = vector_size
+        kb_params = {"vector_store_config": vector_store_config}
         # Get embedding model from knowledge base table
         if kb_table._kb.embedding_model:
             # Extract embedding model args from knowledge base table
-            embedding_args = kb_table._kb.embedding_model.learn_args.get('using', {})
+            embedding_args = kb_table._kb.embedding_model.learn_args.get("using", {})
             # Construct the embedding model directly
             embeddings_model = construct_model_from_args(embedding_args)
             logger.debug(f"Using knowledge base embedding model with args: {embedding_args}")
         else:
             embeddings_model = DEFAULT_EMBEDDINGS_MODEL_CLASS()
             logger.debug("Using default embedding model as knowledge base has no embedding model")
-    elif 'embedding_model' not in tools_config:
+    elif "embedding_model" not in tools_config:
         embeddings_model = DEFAULT_EMBEDDINGS_MODEL_CLASS()
         logger.debug("Using default embedding model as no knowledge base provided")
@@ -75,29 +71,28 @@ def _build_rag_pipeline_tool(tool: dict, pred_args: dict, skill: db.Skills):
         try:
             result = rag_pipeline(query)
             logger.debug(f"RAG pipeline result: {result}")
-            return result['answer']
+            return result["answer"]
         except Exception as e:
-            logger.error(f"Error in RAG pipeline: {str(e)}")
-            logger.error(traceback.format_exc())
+            logger.exception("Error in RAG pipeline:")
             return f"Error in retrieval: {str(e)}"
     # Create RAG tool
-    tools_config = tool['config']
+    tools_config = tool["config"]
     tools_config.update(pred_args)
     return Tool(
         func=rag_wrapper,
-        name=tool['name'],
-        description=tool['description'],
-        response_format='content',
+        name=tool["name"],
+        description=tool["description"],
+        response_format="content",
         # Return directly by default since we already use an LLM against retrieved context to generate a response.
-        return_direct=tools_config.get('return_direct', True)
+        return_direct=tools_config.get("return_direct", True),
     )
 def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
-    if 'source' not in tool:
+    if "source" not in tool:
         raise ValueError("Knowledge base for tool not found")
-    kb_name = tool['source']
+    kb_name = tool["source"]
     executor = skill_tool.get_command_executor()
     kb = _get_knowledge_base(kb_name, skill.project_id, executor)
     if not kb:
@@ -110,16 +105,16 @@ def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
     def _get_document_by_name(name: str):
         if metadata_config.name_column_index is not None:
-            tsquery_str = ' & '.join(name.split(' '))
+            tsquery_str = " & ".join(name.split(" "))
             documents_response = vector_db_handler.native_query(
-                f'SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery(\'{tsquery_str}\') LIMIT 1;'
+                f"SELECT * FROM {metadata_config.table} WHERE {metadata_config.name_column_index} @@ to_tsquery('{tsquery_str}') LIMIT 1;"
             )
         else:
             documents_response = vector_db_handler.native_query(
-                f'SELECT * FROM {metadata_config.table} WHERE "{metadata_config.name_column}" ILIKE \'%{name}%\' LIMIT 1;'
+                f"SELECT * FROM {metadata_config.table} WHERE \"{metadata_config.name_column}\" ILIKE '%{name}%' LIMIT 1;"
             )
         if documents_response.resp_type == RESPONSE_TYPE.ERROR:
-            raise RuntimeError(f'There was an error looking up documents: {documents_response.error_message}')
+            raise RuntimeError(f"There was an error looking up documents: {documents_response.error_message}")
         if documents_response.data_frame.empty:
             return None
         document_row = documents_response.data_frame.head(1)
@@ -127,38 +122,36 @@ def _build_name_lookup_tool(tool: dict, pred_args: dict, skill: db.Skills):
         id_filter_condition = FilterCondition(
             f"{metadata_config.embeddings_metadata_column}->>'{metadata_config.doc_id_key}'",
             FilterOperator.EQUAL,
-            str(document_row.get(metadata_config.id_column).item())
+            str(document_row.get(metadata_config.id_column).item()),
         )
         document_chunks_df = vector_db_handler.select(
-            metadata_config.embeddings_table,
-            conditions=[id_filter_condition]
+            metadata_config.embeddings_table, conditions=[id_filter_condition]
         )
         if document_chunks_df.empty:
             return None
-        sort_col = 'chunk_id' if 'chunk_id' in document_chunks_df.columns else 'id'
+        sort_col = "chunk_id" if "chunk_id" in document_chunks_df.columns else "id"
         document_chunks_df.sort_values(by=sort_col)
-        content = ''
+        content = ""
         for _, chunk in document_chunks_df.iterrows():
             if len(content) > metadata_config.max_document_context:
                 break
-            content += chunk.get(metadata_config.content_column, '')
+            content += chunk.get(metadata_config.content_column, "")
-        return Document(
-            page_content=content,
-            metadata=document_row.to_dict(orient='records')[0]
-        )
+        return Document(page_content=content, metadata=document_row.to_dict(orient="records")[0])
     def _lookup_document_by_name(name: str):
         found_document = _get_document_by_name(name)
         if found_document is None:
-            return f'I could not find any document with name {name}. Please make sure the document name matches exactly.'
+            return (
+                f"I could not find any document with name {name}. Please make sure the document name matches exactly."
+            )
         return f"I found document {found_document.metadata.get(metadata_config.id_column)} with name {found_document.metadata.get(metadata_config.name_column)}. Here is the full document to use as context:\n\n{found_document.page_content}"
     return Tool(
         func=_lookup_document_by_name,
-        name=tool.get('name', '') + '_name_lookup',
-        description='You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.',
-        return_direct=False
+        name=tool.get("name", "") + "_name_lookup",
+        description="You must use this tool ONLY when the user is asking about a specific document by name or title. The input should be the exact name of the document the user is looking for.",
+        return_direct=False,
     )
@@ -181,7 +174,7 @@ def build_retrieval_tools(tool: dict, pred_args: dict, skill: db.Skills):
     try:
         rag_config = _load_rag_config(tool, pred_args, skill)
     except Exception as e:
-        logger.error(f"Error building RAG pipeline: {str(e)}")
+        logger.exception("Error building RAG pipeline:")
         raise ValueError(f"Failed to build RAG pipeline: {str(e)}")
     tools = [_build_rag_pipeline_tool(tool, pred_args, skill)]
     if rag_config.metadata_config is None:

mindsdb/interfaces/skills/skill_tool.py CHANGED Viewed

@@ -274,8 +274,8 @@ class SkillToolController:
                     else:
                         for table_name in response.data_frame.iloc[:, name_idx]:
                             tables_list.append(f"{database}.{escape_table_name(table_name)}")
-                except Exception as e:
-                    logger.warning(f"Could not get tables from database {database}: {str(e)}")
+                except Exception:
+                    logger.warning(f"Could not get tables from database {database}:", exc_info=True)
                 continue
             # Handle table restrictions

mindsdb/interfaces/skills/skills_controller.py CHANGED Viewed

@@ -100,10 +100,7 @@ class SkillsController:
             project_name = default_project
         project = self.project_controller.get(name=project_name)
-        if not name.islower():
-            raise ValueError(f"The name must be in lower case: {name}")
-        skill = self.get_skill(name, project_name)
+        skill = self.get_skill(name, project_name, strict_case=True)
         if skill is not None:
             raise ValueError(f"Skill with name already exists: {name}")

mindsdb/interfaces/skills/sql_agent.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import re
 import csv
 import inspect
-import traceback
 from io import StringIO
 from typing import Iterable, List, Optional, Any, Tuple
 from collections import defaultdict
@@ -254,8 +253,16 @@ class SQLAgent:
                             self.check_table_permission(node)
                         except ValueError as origin_exc:
                             # was it badly quoted by llm?
-                            if len(node.parts) == 1 and node.is_quoted[0] and "." in node.parts[0]:
-                                node2 = Identifier(node.parts[0])
+                            #
+                            if "." in node.parts[0]:
+                                # extract quoted parts (with dots) to sub-parts
+                                parts = []
+                                for i, item in enumerate(node.parts):
+                                    if node.is_quoted[i] and "." in item:
+                                        parts.extend(Identifier(item).parts)
+                                    else:
+                                        parts.append(item)
+                                node2 = Identifier(parts=parts)
                                 try:
                                     _check_f(node2, is_table=True)
                                     return node2
@@ -382,9 +389,9 @@ class SQLAgent:
             #     self._cache.set(cache_key, set(kb_names))
             return kb_names
-        except Exception as e:
+        except Exception:
             # If there's an error, log it and return an empty list
-            logger.error(f"Error in get_usable_knowledge_base_names: {str(e)}")
+            logger.exception("Error in get_usable_knowledge_base_names")
             return []
     def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifier]) -> List[Identifier]:
@@ -483,9 +490,9 @@ class SQLAgent:
                 # remove backticks
                 name = name.replace("`", "")
-                split = name.split(".")
-                if len(split) > 1:
-                    all_tables.append(Identifier(parts=[split[0], split[-1]]))
+                parts = name.split(".")
+                if len(parts) > 1:
+                    all_tables.append(Identifier(parts=parts))
                 else:
                     all_tables.append(Identifier(name))
@@ -526,8 +533,8 @@ class SQLAgent:
             sample_rows = list(map(lambda row: [truncate_value(value) for value in row], sample_rows))
             sample_rows_str = "\n" + f"{kb_name}:" + list_to_csv_str(sample_rows)
-        except Exception as e:
-            logger.info(f"_get_sample_rows error: {e}")
+        except Exception:
+            logger.info("_get_sample_rows error:", exc_info=True)
             sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
         return sample_rows_str
@@ -560,7 +567,7 @@ class SQLAgent:
                 )
             ]
         except Exception as e:
-            logger.error(f"Failed processing column info for {table_str}: {e}", exc_info=True)
+            logger.exception(f"Failed processing column info for {table_str}:")
             raise ValueError(f"Failed to process column info for {table_str}") from e
         if not fields:
@@ -569,8 +576,8 @@ class SQLAgent:
         try:
             sample_rows_info = self._get_sample_rows(table_str, fields)
-        except Exception as e:
-            logger.warning(f"Could not get sample rows for {table_str}: {e}")
+        except Exception:
+            logger.warning(f"Could not get sample rows for {table_str}:", exc_info=True)
             sample_rows_info = "\n\t [error] Couldn't retrieve sample rows!"
         info = f"Table named `{table_str}`:\n"
@@ -585,7 +592,7 @@ class SQLAgent:
     def _get_sample_rows(self, table: str, fields: List[str]) -> str:
         logger.info(f"_get_sample_rows: table={table} fields={fields}")
-        command = f"select {', '.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
+        command = f"select * from {table} limit {self._sample_rows_in_table_info};"
         try:
             ret = self._call_engine(command)
             sample_rows = ret.data.to_lists()
@@ -596,8 +603,8 @@ class SQLAgent:
             sample_rows = list(map(lambda row: [truncate_value(value) for value in row], sample_rows))
             sample_rows_str = "\n" + list_to_csv_str([fields] + sample_rows)
-        except Exception as e:
-            logger.info(f"_get_sample_rows error: {e}")
+        except Exception:
+            logger.info("_get_sample_rows error:", exc_info=True)
             sample_rows_str = "\n" + "\t [error] Couldn't retrieve sample rows!"
         return sample_rows_str
@@ -647,7 +654,7 @@ class SQLAgent:
             logger.info(f"get_table_info_safe: {table_names}")
             return self.get_table_info(table_names)
         except Exception as e:
-            logger.info(f"get_table_info_safe error: {e}")
+            logger.info("get_table_info_safe error:", exc_info=True)
             return f"Error: {e}"
     def query_safe(self, command: str, fetch: str = "all") -> str:
@@ -655,8 +662,7 @@ class SQLAgent:
             logger.info(f"query_safe (fetch={fetch}): {command}")
             return self.query(command, fetch)
         except Exception as e:
-            logger.error(f"Error in query_safe: {str(e)}\n{traceback.format_exc()}")
-            logger.info(f"query_safe error: {e}")
+            logger.exception("Error in query_safe:")
             msg = f"Error: {e}"
             if "does not exist" in msg and " relation " in msg:
                 msg += "\nAvailable tables: " + ", ".join(self.get_usable_table_names())

mindsdb/interfaces/storage/db.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import json
+import orjson
 import datetime
 from typing import Dict, List, Optional
@@ -47,10 +48,20 @@ def init(connection_str: str = None):
     global Base, session, engine
     if connection_str is None:
         connection_str = config["storage_db"]
+    # Use orjson with our CustomJSONEncoder.default for JSON serialization
+    _default_json = CustomJSONEncoder().default
+    def _json_serializer(value):
+        return orjson.dumps(
+            value,
+            default=_default_json,
+            option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_PASSTHROUGH_DATETIME,
+        ).decode("utf-8")
     base_args = {
         "pool_size": 30,
         "max_overflow": 200,
-        "json_serializer": CustomJSONEncoder().encode,
+        "json_serializer": _json_serializer,
     }
     engine = create_engine(connection_str, echo=False, **base_args)
     session = scoped_session(sessionmaker(bind=engine, autoflush=True))
@@ -534,11 +545,10 @@ class KnowledgeBase(Base):
         reranking_model = params.pop("reranking_model", None)
         if not with_secrets:
-            if embedding_model and "api_key" in embedding_model:
-                embedding_model["api_key"] = "******"
-            if reranking_model and "api_key" in reranking_model:
-                reranking_model["api_key"] = "******"
+            for key in ("api_key", "private_key"):
+                for el in (embedding_model, reranking_model):
+                    if el and key in el:
+                        el[key] = "******"
         return {
             "id": self.id,

MindsDB 25.9.2.0a1__py3-none-any.whl → 25.10.0rc1__py3-none-any.whl

Potentially problematic release.

MindsDB 25.9.2.0a1py3-none-any.whl → 25.10.0rc1py3-none-any.whl