PyPI - MindsDB - Versions diffs - 25.6.3.0__py3-none-any.whl → 25.6.4.0__py3-none-any.whl - Mend - Supply Chain Defender

MindsDB 25.6.3.0py3-none-any.whl → 25.6.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (28) hide show

mindsdb/interfaces/skills/skill_tool.py CHANGED Viewed

@@ -24,10 +24,10 @@ logger = log.getLogger(__name__)
 class SkillType(enum.Enum):
-    TEXT2SQL_LEGACY = 'text2sql'
-    TEXT2SQL = 'sql'
-    KNOWLEDGE_BASE = 'knowledge_base'
-    RETRIEVAL = 'retrieval'
+    TEXT2SQL_LEGACY = "text2sql"
+    TEXT2SQL = "sql"
+    KNOWLEDGE_BASE = "knowledge_base"
+    RETRIEVAL = "retrieval"
 @dataclass
@@ -41,6 +41,7 @@ class SkillData:
         project_id (int): id of the project
         agent_tables_list (Optional[List[str]]): the restriction on available tables for an agent using the skill
     """
     name: str
     type: str
     params: dict
@@ -59,6 +60,7 @@ class SkillData:
             ValueError: if there is no intersection between skill's and agent's list.
                 This means that all tables restricted for use.
         """
         def list_to_map(input: List) -> Dict:
             agent_tables_map = defaultdict(set)
             for x in input:
@@ -66,15 +68,15 @@ class SkillData:
                     table_name = x
                     schema_name = None
                 elif isinstance(x, dict):
-                    table_name = x['table']
-                    schema_name = x.get('schema')
+                    table_name = x["table"]
+                    schema_name = x.get("schema")
                 else:
-                    raise ValueError(f'Unexpected value in tables list: {x}')
+                    raise ValueError(f"Unexpected value in tables list: {x}")
                 agent_tables_map[schema_name].add(table_name)
             return agent_tables_map
         agent_tables_map = list_to_map(self.agent_tables_list or [])
-        skill_tables_map = list_to_map(self.params.get('tables', []))
+        skill_tables_map = list_to_map(self.params.get("tables", []))
         if len(agent_tables_map) > 0 and len(skill_tables_map) > 0:
             if len(set(agent_tables_map) & set(skill_tables_map)) == 0:
@@ -105,31 +107,34 @@ class SkillToolController:
     def get_command_executor(self):
         if self.command_executor is None:
             from mindsdb.api.executor.command_executor import ExecuteCommands
-            from mindsdb.api.executor.controllers import SessionController  # Top-level import produces circular import in some cases TODO: figure out a fix without losing runtime improvements (context: see #9304)  # noqa
+            from mindsdb.api.executor.controllers import (
+                SessionController,
+            )  # Top-level import produces circular import in some cases TODO: figure out a fix without losing runtime improvements (context: see #9304)  # noqa
             sql_session = SessionController()
-            sql_session.database = config.get('default_project')
+            sql_session.database = config.get("default_project")
             self.command_executor = ExecuteCommands(sql_session)
         return self.command_executor
     def _make_text_to_sql_tools(self, skills: List[db.Skills], llm) -> List:
-        '''
-           Uses SQLAgent to execute tool
-        '''
+        """
+        Uses SQLAgent to execute tool
+        """
         # To prevent dependency on Langchain unless an actual tool uses it.
         try:
             from mindsdb.interfaces.agents.mindsdb_database_agent import MindsDBSQL
             from mindsdb.interfaces.skills.custom.text2sql.mindsdb_sql_toolkit import MindsDBSQLToolkit
         except ImportError:
             raise ImportError(
-                'To use the text-to-SQL skill, please install langchain with `pip install mindsdb[langchain]`')
+                "To use the text-to-SQL skill, please install langchain with `pip install mindsdb[langchain]`"
+            )
         command_executor = self.get_command_executor()
         def escape_table_name(name: str) -> str:
-            name = name.strip(' `')
-            return f'`{name}`'
+            name = name.strip(" `")
+            return f"`{name}`"
         tables_list = []
         knowledge_bases_list = []
@@ -144,31 +149,31 @@ class SkillToolController:
         # First pass: collect all database and knowledge base parameters
         for skill in skills:
             # Update knowledge_base_database if specified in any skill
-            if skill.params.get('knowledge_base_database'):
-                knowledge_base_database = skill.params.get('knowledge_base_database')
+            if skill.params.get("knowledge_base_database"):
+                knowledge_base_database = skill.params.get("knowledge_base_database")
             # Extract databases from include_tables with dot notation
-            if skill.params.get('include_tables'):
-                include_tables = skill.params.get('include_tables')
+            if skill.params.get("include_tables"):
+                include_tables = skill.params.get("include_tables")
                 if isinstance(include_tables, str):
-                    include_tables = [t.strip() for t in include_tables.split(',')]
+                    include_tables = [t.strip() for t in include_tables.split(",")]
                 # Extract database names from dot notation
                 for table in include_tables:
-                    if '.' in table:
-                        db_name = table.split('.')[0]
+                    if "." in table:
+                        db_name = table.split(".")[0]
                         extracted_databases.add(db_name)
             # Extract databases from include_knowledge_bases with dot notation
-            if skill.params.get('include_knowledge_bases'):
-                include_kbs = skill.params.get('include_knowledge_bases')
+            if skill.params.get("include_knowledge_bases"):
+                include_kbs = skill.params.get("include_knowledge_bases")
                 if isinstance(include_kbs, str):
-                    include_kbs = [kb.strip() for kb in include_kbs.split(',')]
+                    include_kbs = [kb.strip() for kb in include_kbs.split(",")]
                 # Extract database names from dot notation
                 for kb in include_kbs:
-                    if '.' in kb:
-                        db_name = kb.split('.')[0]
+                    if "." in kb:
+                        db_name = kb.split(".")[0]
                         if db_name != knowledge_base_database:
                             # Only update if it's different from the default
                             knowledge_base_database = db_name
@@ -176,41 +181,41 @@ class SkillToolController:
         # Second pass: collect all tables and knowledge base restrictions
         for skill in skills:
             # Get database for tables (this is an actual database connection)
-            database = skill.params.get('database', DEFAULT_TEXT2SQL_DATABASE)
+            database = skill.params.get("database", DEFAULT_TEXT2SQL_DATABASE)
             # Add databases extracted from dot notation if no explicit database is provided
             if not database and extracted_databases:
                 # Use the first extracted database if no explicit database is provided
                 database = next(iter(extracted_databases))
                 # Update the skill params with the extracted database
-                skill.params['database'] = database
+                skill.params["database"] = database
             # Extract knowledge base restrictions if they exist in the skill params
-            if skill.params.get('include_knowledge_bases'):
+            if skill.params.get("include_knowledge_bases"):
                 # Convert to list if it's a string
-                include_kbs = skill.params.get('include_knowledge_bases')
+                include_kbs = skill.params.get("include_knowledge_bases")
                 if isinstance(include_kbs, str):
-                    include_kbs = [kb.strip() for kb in include_kbs.split(',')]
+                    include_kbs = [kb.strip() for kb in include_kbs.split(",")]
                 # Process each knowledge base name
                 for kb in include_kbs:
                     # If it doesn't have a dot, prefix it with the knowledge_base_database
-                    if '.' not in kb:
+                    if "." not in kb:
                         knowledge_bases_list.append(f"{knowledge_base_database}.{kb}")
                     else:
                         knowledge_bases_list.append(kb)
             # Collect ignore_knowledge_bases
-            if skill.params.get('ignore_knowledge_bases'):
+            if skill.params.get("ignore_knowledge_bases"):
                 # Convert to list if it's a string
-                ignore_kbs = skill.params.get('ignore_knowledge_bases')
+                ignore_kbs = skill.params.get("ignore_knowledge_bases")
                 if isinstance(ignore_kbs, str):
-                    ignore_kbs = [kb.strip() for kb in ignore_kbs.split(',')]
+                    ignore_kbs = [kb.strip() for kb in ignore_kbs.split(",")]
                 # Process each knowledge base name to ignore
                 for kb in ignore_kbs:
                     # If it doesn't have a dot, prefix it with the knowledge_base_database
-                    if '.' not in kb:
+                    if "." not in kb:
                         ignore_knowledge_bases_list.append(f"{knowledge_base_database}.{kb}")
                     else:
                         ignore_knowledge_bases_list.append(kb)
@@ -220,20 +225,20 @@ class SkillToolController:
                 continue
             # Process include_tables with dot notation
-            if skill.params.get('include_tables'):
-                include_tables = skill.params.get('include_tables')
+            if skill.params.get("include_tables"):
+                include_tables = skill.params.get("include_tables")
                 if isinstance(include_tables, str):
-                    include_tables = [t.strip() for t in include_tables.split(',')]
+                    include_tables = [t.strip() for t in include_tables.split(",")]
                 for table in include_tables:
                     # If table already has a database prefix, use it as is
-                    if '.' in table:
+                    if "." in table:
                         # Check if the table already has backticks
-                        if '`' in table:
+                        if "`" in table:
                             tables_list.append(table)
                         else:
                             # Apply escape_table_name only to the table part
-                            parts = table.split('.')
+                            parts = table.split(".")
                             if len(parts) == 2:
                                 # Format: database.table
                                 tables_list.append(f"{parts[0]}.{escape_table_name(parts[1])}")
@@ -255,15 +260,15 @@ class SkillToolController:
             if restriction_on_tables is None and database:
                 try:
                     handler = command_executor.session.integration_controller.get_data_handler(database)
-                    if 'all' in inspect.signature(handler.get_tables).parameters:
+                    if "all" in inspect.signature(handler.get_tables).parameters:
                         response = handler.get_tables(all=True)
                     else:
                         response = handler.get_tables()
                     # no restrictions
                     columns = [c.lower() for c in response.data_frame.columns]
-                    name_idx = columns.index('table_name') if 'table_name' in columns else 0
+                    name_idx = columns.index("table_name") if "table_name" in columns else 0
-                    if 'table_schema' in response.data_frame.columns:
+                    if "table_schema" in response.data_frame.columns:
                         for _, row in response.data_frame.iterrows():
                             tables_list.append(f"{database}.{row['table_schema']}.{escape_table_name(row[name_idx])}")
                     else:
@@ -278,15 +283,15 @@ class SkillToolController:
                 for schema_name, tables in restriction_on_tables.items():
                     for table in tables:
                         # Check if the table already has dot notation (e.g., 'postgresql_conn.home_rentals')
-                        if '.' in table:
+                        if "." in table:
                             # Table already has database prefix, add it directly
                             tables_list.append(escape_table_name(table))
                         else:
                             # No dot notation, apply schema and database as needed
                             if schema_name is None:
-                                tables_list.append(f'{database}.{escape_table_name(table)}')
+                                tables_list.append(f"{database}.{escape_table_name(table)}")
                             else:
-                                tables_list.append(f'{database}.{schema_name}.{escape_table_name(table)}')
+                                tables_list.append(f"{database}.{schema_name}.{escape_table_name(table)}")
                 continue
         # Remove duplicates from lists
@@ -320,8 +325,8 @@ class SkillToolController:
         # First, add databases from skills with explicit database parameters
         for skill in skills:
-            if skill.params.get('database'):
-                databases_struct[skill.params['database']] = skill.restriction_on_tables
+            if skill.params.get("database"):
+                databases_struct[skill.params["database"]] = skill.restriction_on_tables
         # Then, add all extracted databases with no restrictions
         for db_name in extracted_databases:
@@ -338,13 +343,11 @@ class SkillToolController:
             ignore_knowledge_bases=ignore_knowledge_bases,
             knowledge_base_database=knowledge_base_database,
             sample_rows_in_table_info=3,
-            cache=get_cache('agent', max_size=_MAX_CACHE_SIZE)
-        )
-        db = MindsDBSQL.custom_init(
-            sql_agent=sql_agent
+            cache=get_cache("agent", max_size=_MAX_CACHE_SIZE),
         )
-        toolkit = MindsDBSQLToolkit(db=db, llm=llm)
+        db = MindsDBSQL.custom_init(sql_agent=sql_agent)
+        should_include_kb_tools = include_knowledge_bases is not None and len(include_knowledge_bases) > 0
+        toolkit = MindsDBSQLToolkit(db=db, llm=llm, include_knowledge_base_tools=should_include_kb_tools)
         return toolkit.get_tools()
     def _make_retrieval_tools(self, skill: db.Skills, llm, embedding_model):
@@ -352,47 +355,46 @@ class SkillToolController:
         creates advanced retrieval tool i.e. RAG
         """
         params = skill.params
-        config = params.get('config', {})
-        if 'llm' not in config:
+        config = params.get("config", {})
+        if "llm" not in config:
             # Set LLM if not explicitly provided in configs.
-            config['llm'] = llm
+            config["llm"] = llm
         tool = dict(
-            name=params.get('name', skill.name),
-            source=params.get('source', None),
+            name=params.get("name", skill.name),
+            source=params.get("source", None),
             config=config,
-            description=f'You must use this tool to get more context or information '
-                        f'to answer a question about {params["description"]}. '
-                        f'The input should be the exact question the user is asking.',
-            type=skill.type
+            description=f"You must use this tool to get more context or information "
+            f"to answer a question about {params['description']}. "
+            f"The input should be the exact question the user is asking.",
+            type=skill.type,
         )
         pred_args = {}
-        pred_args['llm'] = llm
+        pred_args["llm"] = llm
         from .retrieval_tool import build_retrieval_tools
         return build_retrieval_tools(tool, pred_args, skill)
     def _get_rag_query_function(self, skill: db.Skills):
         session_controller = self.get_command_executor().session
         def _answer_question(question: str) -> str:
-            knowledge_base_name = skill.params['source']
+            knowledge_base_name = skill.params["source"]
             # make select in KB table
             query = Select(
                 targets=[Star()],
-                where=BinaryOperation(op='=', args=[
-                    Identifier(TableField.CONTENT.value), Constant(question)
-                ]),
+                where=BinaryOperation(op="=", args=[Identifier(TableField.CONTENT.value), Constant(question)]),
                 limit=Constant(_DEFAULT_TOP_K_SIMILARITY_SEARCH),
             )
             kb_table = session_controller.kb_controller.get_table(knowledge_base_name, skill.project_id)
             res = kb_table.select_query(query)
             # Handle both chunk_content and content column names
-            if hasattr(res, 'chunk_content'):
-                return '\n'.join(res.chunk_content)
-            elif hasattr(res, 'content'):
-                return '\n'.join(res.content)
+            if hasattr(res, "chunk_content"):
+                return "\n".join(res.chunk_content)
+            elif hasattr(res, "content"):
+                return "\n".join(res.content)
             else:
                 return "No content or chunk_content found in knowledge base response"
@@ -400,19 +402,22 @@ class SkillToolController:
     def _make_knowledge_base_tools(self, skill: db.Skills) -> dict:
         # To prevent dependency on Langchain unless an actual tool uses it.
-        description = skill.params.get('description', '')
+        description = skill.params.get("description", "")
-        logger.warning("This skill is deprecated and will be removed in the future. "
-                       "Please use `retrieval` skill instead ")
+        logger.warning(
+            "This skill is deprecated and will be removed in the future. Please use `retrieval` skill instead "
+        )
         return dict(
-            name='Knowledge Base Retrieval',
+            name="Knowledge Base Retrieval",
             func=self._get_rag_query_function(skill),
-            description=f'Use this tool to get more context or information to answer a question about {description}. The input should be the exact question the user is asking.',
-            type=skill.type
+            description=f"Use this tool to get more context or information to answer a question about {description}. The input should be the exact question the user is asking.",
+            type=skill.type,
         )
-    def get_tools_from_skills(self, skills_data: List[SkillData], llm: BaseChatModel, embedding_model: Embeddings) -> dict:
+    def get_tools_from_skills(
+        self, skills_data: List[SkillData], llm: BaseChatModel, embedding_model: Embeddings
+    ) -> dict:
         """Creates function for skill and metadata (name, description)
         Args:
@@ -431,7 +436,8 @@ class SkillToolController:
                 skill_type = SkillType(skill.type)
             except ValueError:
                 raise NotImplementedError(
-                    f'skill of type {skill.type} is not supported as a tool, supported types are: {list(SkillType._member_names_)}')
+                    f"skill of type {skill.type} is not supported as a tool, supported types are: {list(SkillType._member_names_)}"
+                )
             if skill_type == SkillType.TEXT2SQL_LEGACY:
                 skill_type = SkillType.TEXT2SQL
@@ -442,10 +448,7 @@ class SkillToolController:
             if skill_type == SkillType.TEXT2SQL:
                 tools[skill_type] = self._make_text_to_sql_tools(skills, llm)
             elif skill_type == SkillType.KNOWLEDGE_BASE:
-                tools[skill_type] = [
-                    self._make_knowledge_base_tools(skill)
-                    for skill in skills
-                ]
+                tools[skill_type] = [self._make_knowledge_base_tools(skill) for skill in skills]
             elif skill_type == SkillType.RETRIEVAL:
                 tools[skill_type] = []
                 for skill in skills:

mindsdb/interfaces/skills/sql_agent.py CHANGED Viewed

@@ -76,7 +76,7 @@ def split_table_name(table_name: str) -> List[str]:
         result.append(current.strip("`"))
     # ensure we split the table name
-    result = [r.split(".") for r in result][0]
+    # result = [r.split(".") for r in result][0]
     return result