PyPI - MindsDB - Versions diffs - 25.1.2.0__py3-none-any.whl → 25.1.5.0__py3-none-any.whl - Mend

MindsDB 25.1.2.0py3-none-any.whl → 25.1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (99) hide show

mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List
+from textwrap import dedent
 from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
 from langchain_community.tools import ListSQLDatabaseTool, InfoSQLDatabaseTool, QuerySQLDataBaseTool
@@ -11,57 +12,67 @@ class MindsDBSQLToolkit(SQLDatabaseToolkit):
     def get_tools(self, prefix='') -> List[BaseTool]:
         """Get the tools in the toolkit."""
-        list_sql_database_tool = ListSQLDatabaseTool(name=f'sql_db_list_tables{prefix}', db=self.db)
+        list_sql_database_tool = ListSQLDatabaseTool(
+            name=f'sql_db_list_tables{prefix}',
+            db=self.db,
+            description=(
+                "Input is an empty string, output is a comma-separated list of tables in the database. "
+                "Each table name in the list may be in one of two formats: database_name.table_name or "
+                "database_name.schema_name.table_name."
+                "If the table name is enclosed in backticks marks, then always use the table name with backticks marks in subsequent queries."
+            )
+        )
         info_sql_database_tool_description = (
-            "Input: A comma-separated list of tables. Output: Schema and sample rows for those tables. "
+            "Input: A comma-separated list of tables enclosed between the symbols $START$ and $END$. Output: Schema and sample rows for those tables. "
             f"Ensure tables exist by calling {list_sql_database_tool.name} first. "
             "Use this tool to investigate table schemas for needed columns. "
             "Get sample data with 'SELECT * FROM table LIMIT 3' before answering questions. "
-            "Example Input: table1, table2, table3"
+            "Example Input: $START$ table1, table2, table3 $END$"
         )
         info_sql_database_tool = InfoSQLDatabaseTool(
             name=f'sql_db_schema{prefix}',
             db=self.db, description=info_sql_database_tool_description
         )
-        query_sql_database_tool_description = (
-            "Input: A detailed SQL query. Output: Database result or error message. "
-            "For errors, rewrite and retry the query. For 'Unknown column' errors, use "
-            f"{info_sql_database_tool.name} to check table fields. "
-            "This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases. "
-            "Follow these instructions with utmost precision: "
-            "1. Query Output Format: "
-            "   - Always return results in well-formatted **Markdown tables**. "
-            "   - Ensure clarity and proper structure for easy readability. "
-            "2. Sample Data: "
-            "   - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers. "
-            "3. Categorical Data: "
-            "   - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first. "
-            "   - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses. "
-            "4. Result Limiting and Counting: "
-            "   - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`. "
-            "   - If the count is greater than 10, limit the query to return only 10 results initially. "
-            "   - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results. "
-            "   - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped. "
-            "5. Date Handling: "
-            "   - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date. "
-            "   - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..` "
-            "   - Do not compare date values without casting columns to date. "
-            "   - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples: "
-            "     SELECT NOW() + INTERVAL 5 DAY; "
-            "     SELECT NOW() - INTERVAL 3 HOUR; "
-            "     SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY; "
-            "     SELECT NOW() - INTERVAL 1 YEAR; "
-            "6. Query Best Practices: "
-            "   - Query only necessary columns, not all. "
-            "   - Use only existing column names from correct tables. "
-            "   - Use database-specific syntax for date operations. "
-            "7. Error Handling: "
-            "   - For errors, rewrite and retry the query. "
-            "   - For 'Unknown column' errors, check table fields using info_sql_database_tool. "
-            "Adhere to these guidelines for all queries and responses. Ask for clarification if needed."
-        )
+        query_sql_database_tool_description = dedent(f"""\
+            Input: A detailed SQL query.
+            Output: Database result or error message. For errors, rewrite and retry the query. For 'Unknown column' errors, use '{info_sql_database_tool.name}' to check table fields.
+            This system is a highly intelligent and reliable PostgreSQL SQL skill designed to work with databases.
+            Follow these instructions with utmost precision:
+            1. Query Output Format:
+               - Always return results in well-formatted **Markdown tables**.
+               - Ensure clarity and proper structure for easy readability.
+            2. Sample Data:
+               - Before answering a question, if you don't have sample data about a table, **always** get sample data using `SELECT * FROM table LIMIT 3` from the tables you believe are relevant to formulating your answers.
+            3. Categorical Data:
+               - Whenever working with a column where values seem categorical, especially when filtering with `WHERE col = 'value'`, `WHERE col IN (list of values)`, or `WHERE col NOT IN (list of values)`, **always** retrieve the distinct values first.
+               - Before writing your main query, always run `SELECT DISTINCT col` to fetch a list of unique values from that column. This step is mandatory to ensure accurate queries and responses.
+            4. Result Limiting and Counting:
+               - Unless instructed otherwise by the user, always run a count on the final query first using `SELECT COUNT(*)`.
+               - If the count is greater than 10, limit the query to return only 10 results initially.
+               - **Always** inform the user of the total number of results available and specify that you are providing the first 10 results.
+               - Let the user know they can request additional results and/or specify how they would like the results ordered or grouped.
+            5. Date Handling:
+               - **Always** use PostgreSQL-compatible `CURRENT_DATE` or `NOW()` functions when working with dates—never assume or guess the current date.
+               - For any date-related comparisons in the query, *always* ensure that your query casts the column being compared using `column_name::DATE [operator] ..`
+               - Do not compare date values without casting columns to date.
+               - For date interval operations, use Interval units as keywords. You can use keywords to specify units like days, hours, months, years, etc., directly without quotes. Examples:
+                 SELECT NOW() + INTERVAL 5 DAY;
+                 SELECT NOW() - INTERVAL 3 HOUR;
+                 SELECT NOW() + INTERVAL 2 MONTH + INTERVAL 3 DAY;
+                 SELECT NOW() - INTERVAL 1 YEAR;
+            6. Query Best Practices:
+               - Always send only one query at a time.
+               - The input SQL query must end with a semicolon.
+               - Query only necessary columns, not all.
+               - Use only existing column names from correct tables.
+               - Use database-specific syntax for date operations.
+            7. Error Handling:
+               - For errors, rewrite and retry the query.
+               - For 'Unknown column' errors, check table fields using info_sql_database_tool.
+            Adhere to these guidelines for all queries and responses. Ask for clarification if needed.
+        """)
         query_sql_database_tool = QuerySQLDataBaseTool(
             name=f'sql_db_query{prefix}',

mindsdb/interfaces/skills/retrieval_tool.py CHANGED Viewed

@@ -43,10 +43,17 @@ def build_retrieval_tool(tool: dict, pred_args: dict, skill: db.Skills):
             raise ValueError(f"Knowledge base not found: {kb_name}")
         kb_table = executor.session.kb_controller.get_table(kb.name, kb.project_id)
+        vector_store_config = {
+            'kb_table': kb_table
+        }
+        is_sparse = tools_config.pop('is_sparse', None)
+        vector_size = tools_config.pop('vector_size', None)
+        if is_sparse is not None:
+            vector_store_config['is_sparse'] = is_sparse
+        if vector_size is not None:
+            vector_store_config['vector_size'] = vector_size
         kb_params = {
-            'vector_store_config': {
-                'kb_table': kb_table
-            }
+            'vector_store_config': vector_store_config
         }
         # Get embedding model from knowledge base table

mindsdb/interfaces/skills/skill_tool.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import enum
-from collections import defaultdict
-from typing import List, Optional
+import inspect
 from dataclasses import dataclass
+from collections import defaultdict
+from typing import List, Dict, Optional
 from langchain_core.embeddings import Embeddings
 from langchain_core.language_models import BaseChatModel
 from mindsdb_sql_parser.ast import Select, BinaryOperation, Identifier, Constant, Star
-from mindsdb.integrations.libs.vectordatabase_handler import TableField
-from mindsdb.interfaces.skills.sql_agent import SQLAgent
-from mindsdb.interfaces.storage import db
 from mindsdb.utilities import log
 from mindsdb.utilities.cache import get_cache
+from mindsdb.interfaces.storage import db
+from mindsdb.interfaces.skills.sql_agent import SQLAgent
+from mindsdb.integrations.libs.vectordatabase_handler import TableField
 _DEFAULT_TOP_K_SIMILARITY_SEARCH = 5
@@ -45,27 +46,54 @@ class SkillData:
     agent_tables_list: Optional[List[str]]
     @property
-    def tables_list(self) -> List[str]:
-        """List of tables which may use this skill. If the list is empty, there are no restrictions.
-        The result list is a combination of skill's and agent's tables lists.
+    def restriction_on_tables(self) -> Optional[Dict[str, set]]:
+        """Schemas and tables which agent+skill may use. The result is intersections of skill's and agent's tables lists.
         Returns:
-            List[str]: List of tables.
+            Optional[Dict[str, set]]: allowed schemas and tables. Schemas - are keys in dict, tables - are values.
+                if result is None, then there are no restrictions
         Raises:
             ValueError: if there is no intersection between skill's and agent's list.
                 This means that all tables restricted for use.
         """
-        agent_tables_list = self.agent_tables_list or []
-        skill_tables_list = self.params.get('tables', [])
-        if len(skill_tables_list) > 0 and len(agent_tables_list) > 0:
-            diff = set(skill_tables_list) & set(agent_tables_list)
-            if len(diff) == 0:
-                raise ValueError("There are no tables allowed for use.")
-            return list(diff)
-        if len(skill_tables_list) > 0:
-            return skill_tables_list
-        return agent_tables_list
+        def list_to_map(input: List) -> Dict:
+            agent_tables_map = defaultdict(set)
+            for x in input:
+                if isinstance(x, str):
+                    table_name = x
+                    schema_name = None
+                elif isinstance(x, dict):
+                    table_name = x['table']
+                    schema_name = x.get('schema')
+                else:
+                    raise ValueError(f'Unexpected value in tables list: {x}')
+                agent_tables_map[schema_name].add(table_name)
+            return agent_tables_map
+        agent_tables_map = list_to_map(self.agent_tables_list or [])
+        skill_tables_map = list_to_map(self.params.get('tables', []))
+        if len(agent_tables_map) > 0 and len(skill_tables_map) > 0:
+            if len(set(agent_tables_map) & set(skill_tables_map)) == 0:
+                raise ValueError("Skill's and agent's allowed tables list have no shared schemas.")
+            intersection_tables_map = defaultdict(set)
+            has_intersection = False
+            for schema_name in agent_tables_map:
+                if schema_name not in skill_tables_map:
+                    continue
+                intersection_tables_map[schema_name] = agent_tables_map[schema_name] & skill_tables_map[schema_name]
+                if len(intersection_tables_map[schema_name]) > 0:
+                    has_intersection = True
+            if has_intersection is False:
+                raise ValueError("Skill's and agent's allowed tables list have no shared tables.")
+            return intersection_tables_map
+        if len(skill_tables_map) > 0:
+            return skill_tables_map
+        if len(agent_tables_map) > 0:
+            return agent_tables_map
+        return None
 class SkillToolController:
@@ -83,22 +111,6 @@ class SkillToolController:
             self.command_executor = ExecuteCommands(sql_session)
         return self.command_executor
-    def get_sql_agent(
-            self,
-            database: str,
-            include_tables: Optional[List[str]] = None,
-            ignore_tables: Optional[List[str]] = None,
-            sample_rows_in_table_info: int = 3,
-    ):
-        return SQLAgent(
-            self.get_command_executor(),
-            database,
-            include_tables,
-            ignore_tables,
-            sample_rows_in_table_info,
-            cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
-        )
     def _make_text_to_sql_tools(self, skills: List[db.Skills], llm) -> List:
         '''
            Uses SQLAgent to execute tool
@@ -112,19 +124,47 @@ class SkillToolController:
             raise ImportError(
                 'To use the text-to-SQL skill, please install langchain with `pip install mindsdb[langchain]`')
+        command_executor = self.get_command_executor()
         tables_list = []
         for skill in skills:
             database = skill.params['database']
-            for table in skill.tables_list:
-                tables_list.append(f'{database}.{table}')
-        # use list databases
-        database = ','.join(set(s.params['database'] for s in skills))
-        db = MindsDBSQL(
-            engine=self.get_command_executor(),
-            database=database,
-            metadata=self.get_command_executor().session.integration_controller,
-            include_tables=tables_list
+            restriction_on_tables = skill.restriction_on_tables
+            if restriction_on_tables is None:
+                handler = command_executor.session.integration_controller.get_data_handler(database)
+                if 'all' in inspect.signature(handler.get_tables).parameters:
+                    response = handler.get_tables(all=True)
+                else:
+                    response = handler.get_tables()
+                # no restrictions
+                if 'table_schema' in response.data_frame.columns:
+                    for _, row in response.data_frame.iterrows():
+                        tables_list.append(f"{database}.{row['table_schema']}.{row['table_name']}")
+                else:
+                    for _, row in response.data_frame.iterrows():
+                        tables_list.append(f"{database}.{row['table_name']}")
+                continue
+            for schema_name, tables in restriction_on_tables.items():
+                for table in tables:
+                    if schema_name is None:
+                        tables_list.append(f'{database}.{table}')
+                    else:
+                        tables_list.append(f'{database}.{schema_name}.{table}')
+        sql_agent = SQLAgent(
+            command_executor=command_executor,
+            databases=list(set(s.params['database'] for s in skills)),
+            databases_struct={
+                skill.params['database']: skill.restriction_on_tables
+                for skill in skills
+            },
+            include_tables=tables_list,
+            ignore_tables=None,
+            sample_rows_in_table_info=3,
+            cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
+        )
+        db = MindsDBSQL.custom_init(
+            sql_agent=sql_agent
         )
         # Users probably don't need to configure this for now.
@@ -138,14 +178,18 @@ class SkillToolController:
         for i, tool in enumerate(sql_database_tools):
             if isinstance(tool, QuerySQLDataBaseTool):
                 # Add our own custom description so our agent knows when to query this table.
-                tool.description = (
-                    f'Use this tool if you need data about {" OR ".join(descriptions)}. '
-                    'Use the conversation context to decide which table to query. '
-                    f'These are the available tables: {",".join(tables_list)}.\n' if len(tables_list) > 0 else '\n'
-                    f'ALWAYS consider these special cases:\n'
-                    f'- For TIMESTAMP type columns, make sure you include the time portion in your query (e.g. WHERE date_column = "2020-01-01 12:00:00")'
-                    f'Here are the rest of the instructions:\n'
-                    f'{tool.description}'
+                original_description = tool.description
+                tool.description = ''
+                if len(descriptions) > 0:
+                    tool.description += f'Use this tool if you need data about {" OR ".join(descriptions)}.\n'
+                tool.description += 'Use the conversation context to decide which table to query.\n'
+                if len(tables_list) > 0:
+                    f'These are the available tables: {",".join(tables_list)}.\n'
+                tool.description += (
+                    'ALWAYS consider these special cases:\n'
+                    ' - For TIMESTAMP type columns, make sure you include the time portion in your query (e.g. WHERE date_column = "2020-01-01 12:00:00")\n'
+                    'Here are the rest of the instructions:\n'
+                    f'{original_description}'
                 )
                 sql_database_tools[i] = tool
         return sql_database_tools
@@ -175,7 +219,6 @@ class SkillToolController:
         return build_retrieval_tool(tool, pred_args, skill)
     def _get_rag_query_function(self, skill: db.Skills):
         session_controller = self.get_command_executor().session
         def _answer_question(question: str) -> str:

mindsdb/interfaces/skills/skills_controller.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import datetime
-from typing import Dict, List
+from typing import Dict, List, Optional
 from sqlalchemy import null
 from sqlalchemy.orm.attributes import flag_modified
@@ -16,7 +16,7 @@ class SkillsController:
             project_controller = ProjectController()
         self.project_controller = project_controller
-    def get_skill(self, skill_name: str, project_name: str = 'mindsdb') -> db.Skills:
+    def get_skill(self, skill_name: str, project_name: str = 'mindsdb') -> Optional[db.Skills]:
         '''
         Gets a skill by name. Skills are expected to have unique names.
@@ -25,7 +25,7 @@ class SkillsController:
             project_name (str): The name of the containing project
         Returns:
-            skill (db.Skills): The database skill object
+            skill (Optional[db.Skills]): The database skill object
         Raises:
             ValueError: If `project_name` does not exist
@@ -136,6 +136,8 @@ class SkillsController:
         existing_skill = self.get_skill(skill_name, project_name)
         if existing_skill is None:
             raise ValueError(f'Skill with name not found: {skill_name}')
+        if isinstance(existing_skill.params, dict) and existing_skill.params.get('is_demo') is True:
+            raise ValueError("It is forbidden to change properties of the demo object")
         if new_name is not None:
             existing_skill.name = new_name
@@ -171,5 +173,7 @@ class SkillsController:
         skill = self.get_skill(skill_name, project_name)
         if skill is None:
             raise ValueError(f"Skill with name doesn't exist: {skill_name}")
+        if isinstance(skill.params, dict) and skill.params.get('is_demo') is True:
+            raise ValueError("Unable to delete demo object")
         skill.deleted_at = datetime.datetime.now()
         db.session.commit()

MindsDB 25.1.2.0__py3-none-any.whl → 25.1.5.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.1.2.0py3-none-any.whl → 25.1.5.0py3-none-any.whl