PyPI - MindsDB - Versions diffs - 25.6.3.0__py3-none-any.whl → 25.6.4.0__py3-none-any.whl - Mend

MindsDB 25.6.3.0py3-none-any.whl → 25.6.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (28) hide show

mindsdb/integrations/libs/api_handler.py CHANGED Viewed

@@ -433,16 +433,15 @@ class APIHandler(BaseHandler):
         Args:
             name (str): the handler name
         """
         self._tables = {}
     def _register_table(self, table_name: str, table_class: Any):
         """
         Register the data resource. For e.g if you are using Twitter API it registers the `tweets` resource from `/api/v2/tweets`.
         """
-        if table_name in self._tables:
+        if table_name.lower() in self._tables:
             raise TableAlreadyExists(f"Table with name {table_name} already exists for this handler")
-        self._tables[table_name] = table_class
+        self._tables[table_name.lower()] = table_class
     def _get_table(self, name: Identifier):
         """
@@ -450,10 +449,10 @@ class APIHandler(BaseHandler):
         Args:
             name (Identifier): the table name
         """
-        name = name.parts[-1]
-        if name not in self._tables:
-            raise TableNotFound(f"Table not found: {name}")
-        return self._tables[name]
+        name = name.parts[-1].lower()
+        if name in self._tables:
+            return self._tables[name]
+        raise TableNotFound(f"Table not found: {name}")
     def query(self, query: ASTNode):
         if isinstance(query, Select):

mindsdb/interfaces/agents/constants.py CHANGED Viewed

@@ -171,6 +171,8 @@ NVIDIA_NIM_CHAT_MODELS = (
 )
 GOOGLE_GEMINI_CHAT_MODELS = (
+    "gemini-2.5-pro",
+    "gemini-2.5-flash",
     "gemini-2.5-pro-preview-03-25",
     "gemini-2.0-flash",
     "gemini-2.0-flash-lite",
@@ -228,3 +230,45 @@ You are an AI assistant powered by MindsDB. When answering questions, follow the
 For factual questions, ALWAYS use the available tools to look up information rather than relying on your internal knowledge.
 """
+MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. When answering questions, follow these guidelines:
+1. For questions about database tables and their contents:
+   - Use the sql_db_query to query the tables directly
+   - You can join tables if needed to get comprehensive information
+   - You are running on a federated query engine, so joins across multiple databases are allowed and supported
+   - **Important Rule for SQL Queries:** If you formulate an SQL query as part of answering a user's question, you *must* then use the `sql_db_query` tool to execute that query and get its results. The SQL query string itself is NOT the final answer to the user unless the user has specifically asked for the query. Your final AI response should be based on the *results* obtained from executing the query.
+2. For factual questions about specific topics, use the knowledge base tools, if available, in this sequence:
+- First use kb_list_tool to see available knowledge bases
+- Then use kb_info_tool to understand the structure of relevant knowledge bases
+- Finally use kb_query_tool to query the knowledge base for specific information
+For factual questions, ALWAYS use the available tools to look up information rather than relying on your internal knowledge.
+Here is the user's question: {{question}}
+TOOLS:
+------
+Assistant has access to the following tools:"""
+EXPLICIT_FORMAT_INSTRUCTIONS = """
+<< TOOL CALLING INSTRUCTIONS >>
+**It is critical you use the following format to call a tool**
+```
+Thought: Do I need to use a tool? Yes
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+```
+When you have a response to say to the Human, or if you do not need to use a tool, you MUST use the format:
+```
+Thought: Do I need to use a tool? No
+{ai_prefix}: [your response here]
+```
+"""

mindsdb/interfaces/agents/langchain_agent.py CHANGED Viewed

@@ -58,6 +58,8 @@ from mindsdb.interfaces.agents.constants import (
     TRACE_ID_COLUMN,
     DEFAULT_AGENT_SYSTEM_PROMPT,
     WRITER_CHAT_MODELS,
+    MINDSDB_PREFIX,
+    EXPLICIT_FORMAT_INSTRUCTIONS,
 )
 from mindsdb.interfaces.skills.skill_tool import skill_tool, SkillData
 from langchain_anthropic import ChatAnthropic
@@ -426,7 +428,12 @@ class LangchainAgent:
             llm,
             agent=agent_type,
             # Use custom output parser to handle flaky LLMs that don't ALWAYS conform to output format.
-            agent_kwargs={"output_parser": SafeOutputParser()},
+            agent_kwargs={
+                "output_parser": SafeOutputParser(),
+                "prefix": MINDSDB_PREFIX,  # Override default "Assistant is a large language model..." text
+                "format_instructions": EXPLICIT_FORMAT_INSTRUCTIONS,  # More explicit tool calling instructions
+                "ai_prefix": "AI",
+            },
             # Calls the agent's LLM Chain one final time to generate a final answer based on the previous steps
             early_stopping_method="generate",
             handle_parsing_errors=self._handle_parsing_errors,

mindsdb/interfaces/agents/mindsdb_database_agent.py CHANGED Viewed

@@ -111,24 +111,12 @@ class MindsDBSQL(SQLDatabase):
             )
             # Convert ExecuteAnswer to a DataFrame for easier manipulation
-            df = None
-            if hasattr(result, "data") and hasattr(result.data, "data_frame"):
-                df = result.data.data_frame
+            if result.data is not None:
+                df = result.data.to_df()
+                return df.to_string(index=False)
             else:
-                # Fallback to to_df when data_frame attr not available
-                try:
-                    df = result.data.to_df()
-                except Exception:
-                    df = None
-            # Default behaviour (string)
-            if df is not None:
-                if not df.empty:
-                    return df.to_string(index=False)
-                else:
-                    return "Query executed successfully, but returned no data."
-            return str(result)
+                return "Query executed successfully, but returned no data."
         except Exception as e:
             logger.error(f"Error executing SQL command: {str(e)}\n{traceback.format_exc()}")

mindsdb/interfaces/data_catalog/data_catalog_reader.py CHANGED Viewed

@@ -11,8 +11,6 @@ class DataCatalogReader(BaseDataCatalog):
         """
         Read the metadata from the data catalog and return it as a string.
         """
-        if not self.is_data_catalog_supported():
-            return f"Data catalog is not supported for database '{self.database_name}'."
         tables = self._read_metadata()
         if not tables:
             self.logger.warning(f"No metadata found for database '{self.database_name}'")
@@ -26,10 +24,29 @@ class DataCatalogReader(BaseDataCatalog):
             metadata_str += table.as_string() + "\n\n"
         return metadata_str
+    def read_metadata_as_records(self) -> list:
+        """
+        Read the metadata from the data catalog and return it as a list of database records.
+        """
+        tables = self._read_metadata()
+        if not tables:
+            self.logger.warning(f"No metadata found for database '{self.database_name}'")
+            return []
+        return tables
+    def get_handler_info(self) -> str:
+        """
+        Get the handler info for the database.
+        """
+        return self.data_handler.meta_get_handler_info()
     def _read_metadata(self) -> list:
         """
         Read the metadata from the data catalog and return it in a structured format.
         """
+        if not self.is_data_catalog_supported():
+            return f"Data catalog is not supported for database '{self.database_name}'."
         query = db.session.query(db.MetaTables).filter_by(integration_id=self.integration_id)
         if self.table_names:
             cleaned_table_names = [name.strip("`").split(".")[-1] for name in self.table_names]

mindsdb/interfaces/knowledge_base/controller.py CHANGED Viewed

@@ -9,6 +9,7 @@ import numpy as np
 from mindsdb_sql_parser.ast import BinaryOperation, Constant, Identifier, Select, Update, Delete, Star
 from mindsdb_sql_parser.ast.mindsdb import CreatePredictor
+from mindsdb_sql_parser import parse_sql
 from mindsdb.integrations.utilities.query_traversal import query_traversal
@@ -55,8 +56,13 @@ def get_model_params(model_params: dict, default_config_key: str):
     combined_model_params = copy.deepcopy(config.get(default_config_key, {}))
     if model_params:
+        if not isinstance(model_params, dict):
+            raise ValueError("Model parameters must be passed as a JSON object")
         combined_model_params.update(model_params)
+    combined_model_params.pop("use_default_llm", None)
     return combined_model_params
@@ -359,23 +365,30 @@ class KnowledgeBaseTable:
     def insert_query_result(self, query: str, project_name: str):
         """Process and insert SQL query results"""
-        if not self.document_loader:
-            raise ValueError("Document loader not configured")
+        ast_query = parse_sql(query)
-        documents = list(self.document_loader.load_query_result(query, project_name))
-        if documents:
-            self.insert_documents(documents)
+        command_executor = ExecuteCommands(self.session)
+        response = command_executor.execute_command(ast_query, project_name)
+        if response.error_code is not None:
+            raise ValueError(f"Error executing query: {response.error_message}")
+        if response.data is None:
+            raise ValueError("Query returned no data")
+        records = response.data.records
+        df = pd.DataFrame(records)
+        self.insert(df)
     def insert_rows(self, rows: List[Dict]):
         """Process and insert raw data rows"""
         if not rows:
             return
-        documents = [
-            Document(content=row.get("content", ""), id=row.get("id"), metadata=row.get("metadata", {})) for row in rows
-        ]
+        df = pd.DataFrame(rows)
-        self.insert_documents(documents)
+        self.insert(df)
     def insert_documents(self, documents: List[Document]):
         """Process and insert documents with preprocessing if configured"""
@@ -944,10 +957,7 @@ class KnowledgeBaseController:
         #         # it is params for model
         #         embedding_params.update(params["embedding_model"])
-        if "embedding_model" in params:
-            if not isinstance(params["embedding_model"], dict):
-                raise ValueError("embedding_model should be JSON object with model parameters.")
-            embedding_params.update(params["embedding_model"])
+        embedding_params = get_model_params(params.get("embedding_model", {}), "default_embedding_model")
         # if model_name is None:  # Legacy
         model_name = self._create_embedding_model(

mindsdb/interfaces/knowledge_base/evaluate.py CHANGED Viewed

@@ -168,13 +168,13 @@ class EvaluateBase:
             test_data = self.generate_test_data(gen_params)
             self.save_to_table(test_table, test_data, is_replace=True)
-        else:
-            test_data = self.read_from_table(test_table)
         if params.get("evaluate", True) is False:
             # no evaluate is required
             return pd.DataFrame()
+        test_data = self.read_from_table(test_table)
         scores = self.evaluate(test_data)
         scores["name"] = self.name
         scores["created_at"] = dt.datetime.now()
@@ -511,6 +511,6 @@ class EvaluateDocID(EvaluateBase):
             "total": total_questions,
             "total_found": total_found,
             "retrieved_in_top_10": accurate_in_top_10,
-            "cumulative_recall": cumulative_recall,
+            "cumulative_recall": json.dumps(cumulative_recall),
             "avg_query_time": avg_query_time,
         }

mindsdb/interfaces/knowledge_base/preprocessing/document_loader.py CHANGED Viewed

@@ -2,7 +2,6 @@ import os
 from typing import List, Iterator
 from langchain_core.documents import Document as LangchainDocument
 from langchain_text_splitters import MarkdownHeaderTextSplitter
-import pandas as pd
 from mindsdb.interfaces.file.file_controller import FileController
 from mindsdb.integrations.utilities.rag.loaders.file_loader import FileLoader
@@ -20,12 +19,12 @@ class DocumentLoader:
     """Handles loading documents from various sources including SQL queries"""
     def __init__(
-            self,
-            file_controller: FileController,
-            file_splitter: FileSplitter,
-            markdown_splitter: MarkdownHeaderTextSplitter,
-            file_loader_class=FileLoader,
-            mysql_proxy=None
+        self,
+        file_controller: FileController,
+        file_splitter: FileSplitter,
+        markdown_splitter: MarkdownHeaderTextSplitter,
+        file_loader_class=FileLoader,
+        mysql_proxy=None,
     ):
         """
         Initialize with required dependencies
@@ -52,8 +51,8 @@ class DocumentLoader:
             for doc in loader.lazy_load():
                 # Add file extension to metadata for proper splitting
                 extension = os.path.splitext(file_path)[1].lower()
-                doc.metadata['extension'] = extension
-                doc.metadata['source'] = file_name
+                doc.metadata["extension"] = extension
+                doc.metadata["source"] = file_name
                 # Use FileSplitter to handle the document based on its type
                 split_docs = self.file_splitter.split_documents([doc])
@@ -62,34 +61,22 @@ class DocumentLoader:
                     metadata = doc.metadata.copy()
                     metadata.update(split_doc.metadata or {})
-                    yield Document(
-                        content=split_doc.page_content,
-                        metadata=metadata
-                    )
+                    yield Document(content=split_doc.page_content, metadata=metadata)
     def load_web_pages(
-            self,
-            urls: List[str],
-            crawl_depth: int,
-            limit: int,
-            filters: List[str] = None,
+        self,
+        urls: List[str],
+        crawl_depth: int,
+        limit: int,
+        filters: List[str] = None,
     ) -> Iterator[Document]:
         """Load and split documents from web pages"""
-        websites_df = get_all_websites(
-            urls,
-            crawl_depth=crawl_depth,
-            limit=limit,
-            filters=filters
-        )
+        websites_df = get_all_websites(urls, crawl_depth=crawl_depth, limit=limit, filters=filters)
         for _, row in websites_df.iterrows():
             # Create a document with HTML extension for proper splitting
             doc = LangchainDocument(
-                page_content=row['text_content'],
-                metadata={
-                    'extension': '.html',
-                    'url': row['url']
-                }
+                page_content=row["text_content"], metadata={"extension": ".html", "url": row["url"]}
             )
             # Use FileSplitter to handle HTML content
@@ -98,60 +85,4 @@ class DocumentLoader:
                 metadata = doc.metadata.copy()
                 metadata.update(split_doc.metadata or {})
-                yield Document(
-                    content=split_doc.page_content,
-                    metadata=metadata
-                )
-    def load_query_result(self, query: str, project_name: str) -> Iterator[Document]:
-        """
-        Load documents from SQL query results
-        Args:
-            query: SQL query to execute
-            project_name: Name of the project context
-        Returns:
-            Iterator of Document objects
-        Raises:
-            ValueError: If mysql_proxy is not configured or query returns no data
-        """
-        if not self.mysql_proxy:
-            raise ValueError("MySQL proxy not configured")
-        if not query:
-            return
-        # Set project context and execute query
-        self.mysql_proxy.set_context({'db': project_name})
-        query_result = self.mysql_proxy.process_query(query)
-        if query_result.type != 'table':
-            raise ValueError('Query returned no data')
-        # Convert query result to DataFrame
-        df = query_result.data.to_df()
-        # Process each row into a Document
-        for _, row in df.iterrows():
-            # Extract id, content  and metadata
-            content = str(row.get('content', ''))
-            id = row.get('id', None)
-            # Convert remaining columns to metadata
-            metadata = {
-                col: str(row[col])
-                for col in df.columns
-                if col != 'content' and not pd.isna(row[col])
-            }
-            metadata['source'] = 'query'
-            # Split content using recursive splitter
-            if content:
-                yield Document(
-                    id=id,
-                    content=content,
-                    metadata=metadata
-                )
+                yield Document(content=split_doc.page_content, metadata=metadata)

mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py CHANGED Viewed

@@ -3,6 +3,28 @@ import re
 import json
 from pydantic import BaseModel, Field
 from langchain_core.tools import BaseTool
+from mindsdb_sql_parser.ast import Describe, Select, Identifier, Constant, Star
+def llm_str_strip(s):
+    length = -1
+    while length != len(s):
+        length = len(s)
+        # remove ```
+        if s.startswith("```"):
+            s = s[3:]
+        if s.endswith("```"):
+            s = s[:-3]
+        # remove trailing new lines
+        s = s.strip("\n")
+        # remove extra quotes
+        for q in ('"', "'", "`"):
+            if s.count(q) == 1:
+                s = s.strip(q)
+    return s
 class KnowledgeBaseListToolInput(BaseModel):
@@ -63,12 +85,14 @@ class KnowledgeBaseInfoTool(BaseTool):
                 return [kb.strip() for kb in tool_input.split(",")]
             # If it's just a single string without formatting, return it as a single item
             if tool_input.strip():
-                return [tool_input.strip()]
+                return [llm_str_strip(tool_input)]
             return []
         # Extract and clean the knowledge base names
         kb_names_str = match.group(1).strip()
         kb_names = re.findall(r"`([^`]+)`", kb_names_str)
+        kb_names = [llm_str_strip(n) for n in kb_names]
         return kb_names
     def _run(self, tool_input: str) -> str:
@@ -83,7 +107,7 @@ class KnowledgeBaseInfoTool(BaseTool):
         for kb_name in kb_names:
             try:
                 # Get knowledge base schema
-                schema_result = self.db.run_no_throw(f"DESCRIBE KNOWLEDGE_BASE `{kb_name}`;")
+                schema_result = self.db.run_no_throw(str(Describe(kb_name, type="knowledge_base")))
                 if not schema_result:
                     results.append(f"Knowledge base `{kb_name}` not found or has no schema information.")
@@ -111,7 +135,9 @@ class KnowledgeBaseInfoTool(BaseTool):
                 kb_info += "```\n\n"
                 # Get sample data
-                sample_data = self.db.run_no_throw(f"SELECT * FROM `{kb_name}` LIMIT 10;")
+                sample_data = self.db.run_no_throw(
+                    str(Select(targets=[Star()], from_table=Identifier(kb_name), limit=Constant(20)))
+                )
                 # Sample data
                 kb_info += "### Sample Data:\n"
@@ -196,6 +222,7 @@ class KnowledgeBaseQueryTool(BaseTool):
         try:
             # Execute the query
+            query = llm_str_strip(query)
             result = self.db.run_no_throw(query)
             if not result:

mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py CHANGED Viewed

@@ -10,25 +10,27 @@ from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_tool import MindsDBSQ
 from mindsdb.interfaces.skills.custom.text2sql.mindsdb_kb_tools import (
     KnowledgeBaseListTool,
     KnowledgeBaseInfoTool,
-    KnowledgeBaseQueryTool
+    KnowledgeBaseQueryTool,
 )
 class MindsDBSQLToolkit(SQLDatabaseToolkit):
+    include_knowledge_base_tools: bool = True
-    def get_tools(self, prefix='') -> List[BaseTool]:
+    def get_tools(self, prefix="") -> List[BaseTool]:
         current_date_time = datetime.now().strftime("%Y-%m-%d %H:%M")
         """Get the tools in the toolkit."""
         list_sql_database_tool = ListSQLDatabaseTool(
-            name=f'sql_db_list_tables{prefix}',
+            name=f"sql_db_list_tables{prefix}",
             db=self.db,
-            description=dedent("""\n
+            description=dedent(
+                """\n
                 Input is an empty string, output is a comma-separated list of tables in the database. Each table name is escaped using backticks.
                 Each table name in the list may be in one of two formats: database_name.`table_name` or database_name.schema_name.`table_name`.
                 Table names in response to the user must be escaped using backticks.
-            """)
+            """
+            ),
         )
         info_sql_database_tool_description = (
@@ -45,11 +47,11 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
             "    $START$ table1 table2 table3 $STOP$\n"
         )
         info_sql_database_tool = InfoSQLDatabaseTool(
-            name=f'sql_db_schema{prefix}',
-            db=self.db, description=info_sql_database_tool_description
+            name=f"sql_db_schema{prefix}", db=self.db, description=info_sql_database_tool_description
         )
-        query_sql_database_tool_description = dedent(f"""\
+        query_sql_database_tool_description = dedent(
+            f"""\
             Input: A detailed and well-structured SQL query. The query must be enclosed between the symbols $START$ and $STOP$.
             Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
             This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
@@ -93,11 +95,11 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
                - When asked about yourself or your maker, state that you are a Data-Mind, created by MindsDB to help answer data questions.
                - When asked about your purpose or how you can help, explore the available data sources and then explain that you can answer questions based on the connected data. Provide a few relevant example questions that you could answer for the user about their data.
             Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
-        """)
+        """
+        )
         query_sql_database_tool = QuerySQLDataBaseTool(
-            name=f'sql_db_query{prefix}',
-            db=self.db, description=query_sql_database_tool_description
+            name=f"sql_db_query{prefix}", db=self.db, description=query_sql_database_tool_description
         )
         mindsdb_sql_parser_tool_description = (
@@ -108,15 +110,24 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
             f"ALWAYS run this tool before executing a query with {query_sql_database_tool.name}. "
         )
         mindsdb_sql_parser_tool = MindsDBSQLParserTool(
-            name=f'mindsdb_sql_parser_tool{prefix}',
-            description=mindsdb_sql_parser_tool_description
+            name=f"mindsdb_sql_parser_tool{prefix}", description=mindsdb_sql_parser_tool_description
         )
+        sql_tools = [
+            query_sql_database_tool,
+            info_sql_database_tool,
+            list_sql_database_tool,
+            mindsdb_sql_parser_tool,
+        ]
+        if not self.include_knowledge_base_tools:
+            return sql_tools
         # Knowledge base tools
         kb_list_tool = KnowledgeBaseListTool(
-            name=f'kb_list_tool{prefix}',
+            name=f"kb_list_tool{prefix}",
             db=self.db,
-            description=dedent("""\
+            description=dedent(
+                """\
                 Lists all available knowledge bases that can be queried.
                 Input: No input required, just call the tool directly.
                 Output: A table of all available knowledge bases with their names and creation dates.
@@ -125,13 +136,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
                 Each knowledge base name is escaped using backticks.
                 Example usage: kb_list_tool()
-            """)
+            """
+            ),
         )
         kb_info_tool = KnowledgeBaseInfoTool(
-            name=f'kb_info_tool{prefix}',
+            name=f"kb_info_tool{prefix}",
             db=self.db,
-            description=dedent(f"""\
+            description=dedent(
+                f"""\
                 Gets detailed information about specific knowledge bases including their structure and metadata fields.
                 Input: A knowledge base name as a simple string.
@@ -143,13 +156,15 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
                 Example usage: kb_info_tool("kb_name")
                 Make sure the knowledge base exists by calling {kb_list_tool.name} first.
-            """)
+            """
+            ),
         )
         kb_query_tool = KnowledgeBaseQueryTool(
-            name=f'kb_query_tool{prefix}',
+            name=f"kb_query_tool{prefix}",
             db=self.db,
-            description=dedent(f"""\
+            description=dedent(
+                f"""\
                 Queries knowledge bases using SQL syntax to retrieve relevant information.
                 Input: A SQL query string that targets a knowledge base.
@@ -192,15 +207,12 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
                 - Always include a semicolon at the end of your SQL query
                 For factual questions, use this tool to retrieve information rather than relying on the model's knowledge.
-            """)
+            """
+            ),
         )
         # Return standard SQL tools and knowledge base tools
-        return [
-            query_sql_database_tool,
-            info_sql_database_tool,
-            list_sql_database_tool,
-            mindsdb_sql_parser_tool,
+        return sql_tools + [
             kb_list_tool,
             kb_info_tool,
             kb_query_tool,

MindsDB 25.6.3.0__py3-none-any.whl → 25.6.4.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.6.3.0py3-none-any.whl → 25.6.4.0py3-none-any.whl