PyPI - MindsDB - Versions diffs - 25.1.3.0__py3-none-any.whl → 25.1.4.0__py3-none-any.whl - Mend

MindsDB 25.1.3.0py3-none-any.whl → 25.1.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (55) hide show

mindsdb/interfaces/skills/skill_tool.py CHANGED Viewed

@@ -1,17 +1,18 @@
 import enum
-from collections import defaultdict
-from typing import List, Optional
+import inspect
 from dataclasses import dataclass
+from collections import defaultdict
+from typing import List, Dict, Optional
 from langchain_core.embeddings import Embeddings
 from langchain_core.language_models import BaseChatModel
 from mindsdb_sql_parser.ast import Select, BinaryOperation, Identifier, Constant, Star
-from mindsdb.integrations.libs.vectordatabase_handler import TableField
-from mindsdb.interfaces.skills.sql_agent import SQLAgent
-from mindsdb.interfaces.storage import db
 from mindsdb.utilities import log
 from mindsdb.utilities.cache import get_cache
+from mindsdb.interfaces.storage import db
+from mindsdb.interfaces.skills.sql_agent import SQLAgent
+from mindsdb.integrations.libs.vectordatabase_handler import TableField
 _DEFAULT_TOP_K_SIMILARITY_SEARCH = 5
@@ -45,27 +46,54 @@ class SkillData:
     agent_tables_list: Optional[List[str]]
     @property
-    def tables_list(self) -> List[str]:
-        """List of tables which may use this skill. If the list is empty, there are no restrictions.
-        The result list is a combination of skill's and agent's tables lists.
+    def restriction_on_tables(self) -> Optional[Dict[str, set]]:
+        """Schemas and tables which agent+skill may use. The result is intersections of skill's and agent's tables lists.
         Returns:
-            List[str]: List of tables.
+            Optional[Dict[str, set]]: allowed schemas and tables. Schemas - are keys in dict, tables - are values.
+                if result is None, then there are no restrictions
         Raises:
             ValueError: if there is no intersection between skill's and agent's list.
                 This means that all tables restricted for use.
         """
-        agent_tables_list = self.agent_tables_list or []
-        skill_tables_list = self.params.get('tables', [])
-        if len(skill_tables_list) > 0 and len(agent_tables_list) > 0:
-            diff = set(skill_tables_list) & set(agent_tables_list)
-            if len(diff) == 0:
-                raise ValueError("There are no tables allowed for use.")
-            return list(diff)
-        if len(skill_tables_list) > 0:
-            return skill_tables_list
-        return agent_tables_list
+        def list_to_map(input: List) -> Dict:
+            agent_tables_map = defaultdict(set)
+            for x in input:
+                if isinstance(x, str):
+                    table_name = x
+                    schema_name = None
+                elif isinstance(x, dict):
+                    table_name = x['table']
+                    schema_name = x.get('schema')
+                else:
+                    raise ValueError(f'Unexpected value in tables list: {x}')
+                agent_tables_map[schema_name].add(table_name)
+            return agent_tables_map
+        agent_tables_map = list_to_map(self.agent_tables_list or [])
+        skill_tables_map = list_to_map(self.params.get('tables', []))
+        if len(agent_tables_map) > 0 and len(skill_tables_map) > 0:
+            if len(set(agent_tables_map) & set(skill_tables_map)) == 0:
+                raise ValueError("Skill's and agent's allowed tables list have no shared schemas.")
+            intersection_tables_map = defaultdict(set)
+            has_intersection = False
+            for schema_name in agent_tables_map:
+                if schema_name not in skill_tables_map:
+                    continue
+                intersection_tables_map[schema_name] = agent_tables_map[schema_name] & skill_tables_map[schema_name]
+                if len(intersection_tables_map[schema_name]) > 0:
+                    has_intersection = True
+            if has_intersection is False:
+                raise ValueError("Skill's and agent's allowed tables list have no shared tables.")
+            return intersection_tables_map
+        if len(skill_tables_map) > 0:
+            return skill_tables_map
+        if len(agent_tables_map) > 0:
+            return agent_tables_map
+        return None
 class SkillToolController:
@@ -83,22 +111,6 @@ class SkillToolController:
             self.command_executor = ExecuteCommands(sql_session)
         return self.command_executor
-    def get_sql_agent(
-            self,
-            database: str,
-            include_tables: Optional[List[str]] = None,
-            ignore_tables: Optional[List[str]] = None,
-            sample_rows_in_table_info: int = 3,
-    ):
-        return SQLAgent(
-            self.get_command_executor(),
-            database,
-            include_tables,
-            ignore_tables,
-            sample_rows_in_table_info,
-            cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
-        )
     def _make_text_to_sql_tools(self, skills: List[db.Skills], llm) -> List:
         '''
            Uses SQLAgent to execute tool
@@ -112,19 +124,47 @@ class SkillToolController:
             raise ImportError(
                 'To use the text-to-SQL skill, please install langchain with `pip install mindsdb[langchain]`')
+        command_executor = self.get_command_executor()
         tables_list = []
         for skill in skills:
             database = skill.params['database']
-            for table in skill.tables_list:
-                tables_list.append(f'{database}.{table}')
-        # use list databases
-        database = ','.join(set(s.params['database'] for s in skills))
-        db = MindsDBSQL(
-            engine=self.get_command_executor(),
-            database=database,
-            metadata=self.get_command_executor().session.integration_controller,
-            include_tables=tables_list
+            restriction_on_tables = skill.restriction_on_tables
+            if restriction_on_tables is None:
+                handler = command_executor.session.integration_controller.get_data_handler(database)
+                if 'all' in inspect.signature(handler.get_tables).parameters:
+                    response = handler.get_tables(all=True)
+                else:
+                    response = handler.get_tables()
+                # no restrictions
+                if 'table_schema' in response.data_frame.columns:
+                    for _, row in response.data_frame.iterrows():
+                        tables_list.append(f"{database}.{row['table_schema']}.{row['table_name']}")
+                else:
+                    for _, row in response.data_frame.iterrows():
+                        tables_list.append(f"{database}.{row['table_name']}")
+                continue
+            for schema_name, tables in restriction_on_tables.items():
+                for table in tables:
+                    if schema_name is None:
+                        tables_list.append(f'{database}.{table}')
+                    else:
+                        tables_list.append(f'{database}.{schema_name}.{table}')
+        sql_agent = SQLAgent(
+            command_executor=command_executor,
+            databases=list(set(s.params['database'] for s in skills)),
+            databases_struct={
+                skill.params['database']: skill.restriction_on_tables
+                for skill in skills
+            },
+            include_tables=tables_list,
+            ignore_tables=None,
+            sample_rows_in_table_info=3,
+            cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
+        )
+        db = MindsDBSQL.custom_init(
+            sql_agent=sql_agent
         )
         # Users probably don't need to configure this for now.
@@ -138,14 +178,18 @@ class SkillToolController:
         for i, tool in enumerate(sql_database_tools):
             if isinstance(tool, QuerySQLDataBaseTool):
                 # Add our own custom description so our agent knows when to query this table.
-                tool.description = (
-                    f'Use this tool if you need data about {" OR ".join(descriptions)}. '
-                    'Use the conversation context to decide which table to query. '
-                    f'These are the available tables: {",".join(tables_list)}.\n' if len(tables_list) > 0 else '\n'
-                    f'ALWAYS consider these special cases:\n'
-                    f'- For TIMESTAMP type columns, make sure you include the time portion in your query (e.g. WHERE date_column = "2020-01-01 12:00:00")'
-                    f'Here are the rest of the instructions:\n'
-                    f'{tool.description}'
+                original_description = tool.description
+                tool.description = ''
+                if len(descriptions) > 0:
+                    tool.description += f'Use this tool if you need data about {" OR ".join(descriptions)}.\n'
+                tool.description += 'Use the conversation context to decide which table to query.\n'
+                if len(tables_list) > 0:
+                    f'These are the available tables: {",".join(tables_list)}.\n'
+                tool.description += (
+                    'ALWAYS consider these special cases:\n'
+                    ' - For TIMESTAMP type columns, make sure you include the time portion in your query (e.g. WHERE date_column = "2020-01-01 12:00:00")\n'
+                    'Here are the rest of the instructions:\n'
+                    f'{original_description}'
                 )
                 sql_database_tools[i] = tool
         return sql_database_tools

mindsdb/interfaces/skills/sql_agent.py CHANGED Viewed

@@ -1,37 +1,38 @@
-from typing import Iterable, List, Optional
 import re
-from mindsdb_sql_parser.ast import Select, Show, Describe, Explain
+import inspect
+from typing import Iterable, List, Optional
 import pandas as pd
 from mindsdb_sql_parser import parse_sql
-from mindsdb_sql_parser.ast import Identifier
-from mindsdb.integrations.utilities.query_traversal import query_traversal
+from mindsdb_sql_parser.ast import Select, Show, Describe, Explain, Identifier
 from mindsdb.utilities import log
 from mindsdb.utilities.context import context as ctx
+from mindsdb.integrations.utilities.query_traversal import query_traversal
 logger = log.getLogger(__name__)
 class SQLAgent:
     def __init__(
             self,
             command_executor,
-            database: str,
+            databases: List[str],
+            databases_struct: dict,
             include_tables: Optional[List[str]] = None,
             ignore_tables: Optional[List[str]] = None,
             sample_rows_in_table_info: int = 3,
             cache: Optional[dict] = None
     ):
         self._command_executor = command_executor
+        self._mindsdb_db_struct = databases_struct
         self._sample_rows_in_table_info = int(sample_rows_in_table_info)
         self._tables_to_include = include_tables
         self._tables_to_ignore = []
-        self._databases = database.split(',')
+        self._databases = databases
         if not self._tables_to_include:
             # ignore_tables and include_tables should not be used together.
             # include_tables takes priority if it's set.
@@ -40,7 +41,6 @@ class SQLAgent:
     def _call_engine(self, query: str, database=None):
         # switch database
         ast_query = parse_sql(query.strip('`'))
         self._check_permissions(ast_query)
@@ -55,7 +55,6 @@ class SQLAgent:
         return ret
     def _check_permissions(self, ast_query):
         # check type of query
         if not isinstance(ast_query, (Select, Show, Describe, Explain)):
             raise ValueError(f"Query is not allowed: {ast_query.to_string()}")
@@ -66,14 +65,21 @@ class SQLAgent:
                 if is_table and isinstance(node, Identifier):
                     name1 = node.to_string()
                     name2 = '.'.join(node.parts)
-                    name3 = node.parts[-1]
+                    if len(node.parts) == 3:
+                        name3 = '.'.join(node.parts[1:])
+                    else:
+                        name3 = node.parts[-1]
                     if not {name1, name2, name3}.intersection(self._tables_to_include):
                         raise ValueError(f"Table {name1} not found. Available tables: {', '.join(self._tables_to_include)}")
             query_traversal(ast_query, _check_f)
     def get_usable_table_names(self) -> Iterable[str]:
+        """Get a list of tables that the agent has access to.
+        Returns:
+            Iterable[str]: list with table names
+        """
         cache_key = f'{ctx.company_id}_{",".join(self._databases)}_tables'
         # first check cache and return if found
@@ -85,25 +91,52 @@ class SQLAgent:
         if self._tables_to_include:
             return self._tables_to_include
-        ret = self._call_engine('show databases;')
-        dbs = [lst[0] for lst in ret.data.to_lists() if lst[0] != 'information_schema']
-        usable_tables = []
-        for db in dbs:
-            if db != 'mindsdb' and db in self._databases:
-                try:
-                    ret = self._call_engine('show tables', database=db)
-                    tables = [lst[0] for lst in ret.data.to_lists() if lst[0] != 'information_schema']
-                    for table in tables:
-                        # By default, include all tables in a database unless expilcitly ignored.
-                        table_name = f'{db}.{table}'
-                        if table_name not in self._tables_to_ignore:
-                            usable_tables.append(table_name)
-                except Exception as e:
-                    logger.warning('Unable to get tables for %s: %s', db, str(e))
+        result_tables = []
+        for db_name in self._mindsdb_db_struct:
+            handler = self._command_executor.session.integration_controller.get_data_handler(db_name)
+            schemas_names = list(self._mindsdb_db_struct[db_name].keys())
+            if len(schemas_names) > 1 and None in schemas_names:
+                raise Exception('default schema and named schemas can not be used in same filter')
+            if None in schemas_names:
+                # get tables only from default schema
+                response = handler.get_tables()
+                tables_in_default_schema = list(response.data_frame.table_name)
+                schema_tables_restrictions = self._mindsdb_db_struct[db_name][None]     # None - is default schema
+                if schema_tables_restrictions is None:
+                    for table_name in tables_in_default_schema:
+                        result_tables.append([db_name, table_name])
+                else:
+                    for table_name in schema_tables_restrictions:
+                        if table_name in tables_in_default_schema:
+                            result_tables.append([db_name, table_name])
+            else:
+                if 'all' in inspect.signature(handler.get_tables).parameters:
+                    response = handler.get_tables(all=True)
+                else:
+                    response = handler.get_tables()
+                response_schema_names = list(response.data_frame.table_schema.unique())
+                schemas_intersection = set(schemas_names) & set(response_schema_names)
+                if len(schemas_intersection) == 0:
+                    raise Exception('There are no allowed schemas in ds')
+                for schema_name in schemas_intersection:
+                    schema_sub_df = response.data_frame[response.data_frame['table_schema'] == schema_name]
+                    if self._mindsdb_db_struct[db_name][schema_name] is None:
+                        # all tables from schema allowed
+                        for row in schema_sub_df:
+                            result_tables.append([db_name, schema_name, row['table_name']])
+                    else:
+                        for table_name in self._mindsdb_db_struct[db_name][schema_name]:
+                            if table_name in schema_sub_df['table_name'].values:
+                                result_tables.append([db_name, schema_name, table_name])
+        result_tables = ['.'.join(x) for x in result_tables]
         if self._cache:
-            self._cache.set(cache_key, set(usable_tables))
-        return usable_tables
+            self._cache.set(cache_key, set(result_tables))
+        return result_tables
     def _resolve_table_names(self, table_names: List[str], all_tables: List[Identifier]) -> List[Identifier]:
         """
@@ -115,7 +148,10 @@ class SQLAgent:
         tables_idx = {}
         for table in all_tables:
             # by name
-            tables_idx[(table.parts[-1],)] = table
+            if len(table.parts) == 3:
+                tables_idx[tuple(table.parts[1:])] = table
+            else:
+                tables_idx[(table.parts[-1],)] = table
             # by path
             tables_idx[tuple(table.parts)] = table
@@ -165,26 +201,31 @@ class SQLAgent:
     def _get_single_table_info(self, table: Identifier) -> str:
         if len(table.parts) < 2:
             raise ValueError(f"Database is required for table: {table}")
-        integration, table_name = table.parts[-2:]
+        if len(table.parts) == 3:
+            integration, schema_name, table_name = table.parts[-3:]
+        else:
+            schema_name = None
+            integration, table_name = table.parts[-2:]
         table_str = str(table)
         dn = self._command_executor.session.datahub.get(integration)
         fields, dtypes = [], []
-        for column in dn.get_table_columns(table_name):
+        for column in dn.get_table_columns(table_name, schema_name):
             fields.append(column['name'])
             dtypes.append(column.get('type', ''))
-        info = f'Table named `{table_name}`\n'
-        info += f"\n/* Sample with first {self._sample_rows_in_table_info} rows from table {table_str}:\n"
+        info = f'Table named `{table_str}`:\n'
+        info += f"\nSample with first {self._sample_rows_in_table_info} rows from table {table_str}:\n"
         info += "\t".join([field for field in fields])
-        info += self._get_sample_rows(table_str, fields) + "\n*/"
+        info += self._get_sample_rows(table_str, fields) + "\n"
         info += '\nColumn data types: ' + ",\t".join(
-            [f'`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n'  # noqa
+            [f'\n`{field}` : `{dtype}`' for field, dtype in zip(fields, dtypes)]) + '\n'  # noqa
         return info
     def _get_sample_rows(self, table: str, fields: List[str]) -> str:
-        command = f"select {','.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
+        command = f"select {', '.join(fields)} from {table} limit {self._sample_rows_in_table_info};"
         try:
             ret = self._call_engine(command)
             sample_rows = ret.data.to_lists()

mindsdb/interfaces/storage/db.py CHANGED Viewed

@@ -212,7 +212,7 @@ class Project(Base):
     )
     deleted_at = Column(DateTime)
     name = Column(String, nullable=False)
-    company_id = Column(Integer)
+    company_id = Column(Integer, default=0)
     __table_args__ = (
         UniqueConstraint("name", "company_id", name="unique_project_name_company_id"),
     )

mindsdb/migrations/versions/2025-01-15_c06c35f7e8e1_project_company.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""project-company
+Revision ID: c06c35f7e8e1
+Revises: f6dc924079fa
+Create Date: 2025-01-15 14:14:29.295834
+"""
+from collections import defaultdict
+from alembic import op
+import sqlalchemy as sa
+import mindsdb.interfaces.storage.db  # noqa
+from mindsdb.utilities import log
+# revision identifiers, used by Alembic.
+revision = 'c06c35f7e8e1'
+down_revision = 'f6dc924079fa'
+branch_labels = None
+depends_on = None
+logger = log.getLogger(__name__)
+def upgrade():
+    """
+    convert company_id from null to 0 to make constrain works
+    duplicated names are renamed
+    """
+    conn = op.get_bind()
+    table = sa.Table(
+        'project',
+        sa.MetaData(),
+        sa.Column('id', sa.Integer()),
+        sa.Column('name', sa.String()),
+        sa.Column('company_id', sa.Integer()),
+    )
+    data = conn.execute(
+        table
+        .select()
+        .where(table.c.company_id == sa.null())
+    ).fetchall()
+    names = defaultdict(list)
+    for id, name, _ in data:
+        names[name].append(id)
+    # get duplicated
+    for name, ids in names.items():
+        if len(ids) == 1:
+            continue
+        # rename all except first
+        for id in ids[1:]:
+            new_name = f'{name}__{id}'
+            op.execute(
+                table
+                .update()
+                .where(table.c.id == id)
+                .values({'name': new_name})
+            )
+            logger.warning(f'Found duplicated project name: {name}, renamed to: {new_name}')
+    op.execute(
+        table
+        .update()
+        .where(table.c.company_id == sa.null())
+        .values({'company_id': 0})
+    )
+def downgrade():
+    table = sa.Table(
+        'project',
+        sa.MetaData(),
+        sa.Column('company_id', sa.Integer())
+    )
+    op.execute(
+        table
+        .update()
+        .where(table.c.company_id == 0)
+        .values({'company_id': sa.null()})
+    )

mindsdb/utilities/context.py CHANGED Viewed

@@ -24,7 +24,8 @@ class Context:
                 'enabled': False,
                 'pointer': None,
                 'tree': None
-            }
+            },
+            'email_confirmed': 0,
         })
     def __getattr__(self, name: str) -> Any:

MindsDB 25.1.3.0__py3-none-any.whl → 25.1.4.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.1.3.0py3-none-any.whl → 25.1.4.0py3-none-any.whl