PyPI - MindsDB - Versions diffs - 25.6.4.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl - Mend

MindsDB 25.6.4.0py3-none-any.whl → 25.7.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (61) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +53 -94
mindsdb/api/a2a/agent.py +30 -206
mindsdb/api/a2a/common/server/server.py +26 -27
mindsdb/api/a2a/task_manager.py +93 -227
mindsdb/api/a2a/utils.py +21 -0
mindsdb/api/executor/command_executor.py +8 -6
mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +1 -1
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +9 -11
mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
mindsdb/api/executor/planner/query_prepare.py +68 -87
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +6 -1
mindsdb/api/executor/sql_query/steps/union_step.py +11 -9
mindsdb/api/executor/utilities/sql.py +97 -21
mindsdb/api/http/namespaces/agents.py +126 -201
mindsdb/api/http/namespaces/config.py +12 -1
mindsdb/api/http/namespaces/file.py +49 -24
mindsdb/api/mcp/start.py +45 -31
mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +45 -52
mindsdb/integrations/handlers/huggingface_handler/__init__.py +17 -12
mindsdb/integrations/handlers/huggingface_handler/finetune.py +223 -223
mindsdb/integrations/handlers/huggingface_handler/huggingface_handler.py +383 -383
mindsdb/integrations/handlers/huggingface_handler/requirements.txt +7 -6
mindsdb/integrations/handlers/huggingface_handler/requirements_cpu.txt +7 -6
mindsdb/integrations/handlers/huggingface_handler/settings.py +25 -25
mindsdb/integrations/handlers/litellm_handler/litellm_handler.py +22 -15
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +244 -141
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +1 -1
mindsdb/integrations/handlers/salesforce_handler/salesforce_handler.py +3 -2
mindsdb/integrations/handlers/salesforce_handler/salesforce_tables.py +1 -1
mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
mindsdb/integrations/libs/keyword_search_base.py +41 -0
mindsdb/integrations/libs/vectordatabase_handler.py +114 -84
mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +36 -42
mindsdb/integrations/utilities/sql_utils.py +11 -0
mindsdb/interfaces/agents/agents_controller.py +29 -9
mindsdb/interfaces/agents/langchain_agent.py +7 -5
mindsdb/interfaces/agents/mcp_client_agent.py +4 -4
mindsdb/interfaces/agents/mindsdb_database_agent.py +10 -43
mindsdb/interfaces/data_catalog/data_catalog_reader.py +3 -1
mindsdb/interfaces/database/projects.py +1 -3
mindsdb/interfaces/functions/controller.py +54 -64
mindsdb/interfaces/functions/to_markdown.py +47 -14
mindsdb/interfaces/knowledge_base/controller.py +228 -110
mindsdb/interfaces/knowledge_base/evaluate.py +18 -6
mindsdb/interfaces/knowledge_base/executor.py +346 -0
mindsdb/interfaces/knowledge_base/llm_client.py +5 -6
mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +20 -45
mindsdb/interfaces/knowledge_base/preprocessing/models.py +36 -69
mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py +2 -0
mindsdb/interfaces/skills/sql_agent.py +181 -130
mindsdb/interfaces/storage/db.py +9 -7
mindsdb/utilities/config.py +58 -40
mindsdb/utilities/exception.py +58 -7
mindsdb/utilities/security.py +54 -11
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/METADATA +245 -259
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/RECORD +61 -58
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/WHEEL +0 -0
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.6.4.0.dist-info → mindsdb-25.7.2.0.dist-info}/top_level.txt +0 -0

mindsdb/interfaces/knowledge_base/preprocessing/models.py CHANGED Viewed

@@ -18,45 +18,31 @@ class PreprocessorType(Enum):
 class BasePreprocessingConfig(BaseModel):
     """Base configuration for preprocessing"""
     chunk_size: int = Field(default=DEFAULT_CHUNK_SIZE, description="Size of document chunks")
     chunk_overlap: int = Field(default=DEFAULT_CHUNK_OVERLAP, description="Overlap between chunks")
+    doc_id_column_name: str = Field(default="_original_doc_id", description="Name of doc_id columns in metadata")
 class ContextualConfig(BasePreprocessingConfig):
     """Configuration specific to contextual preprocessing"""
     llm_config: LLMConfig = Field(
-        default_factory=LLMConfig,
-        description="LLM configuration to use for context generation"
-    )
-    context_template: Optional[str] = Field(
-        default=None,
-        description="Custom template for context generation"
-    )
-    summarize: Optional[bool] = Field(
-        default=False,
-        description="Whether to return chunks as summarizations"
+        default_factory=LLMConfig, description="LLM configuration to use for context generation"
     )
+    context_template: Optional[str] = Field(default=None, description="Custom template for context generation")
+    summarize: Optional[bool] = Field(default=False, description="Whether to return chunks as summarizations")
-class TextChunkingConfig(BaseModel):
+class TextChunkingConfig(BasePreprocessingConfig):
     """Configuration for text chunking preprocessor using Pydantic"""
-    chunk_size: int = Field(
-        default=1000,
-        description="The target size of each text chunk",
-        gt=0
-    )
-    chunk_overlap: int = Field(
-        default=200,
-        description="The number of characters to overlap between chunks",
-        ge=0
-    )
-    length_function: Callable = Field(
-        default=len,
-        description="Function to measure text length"
-    )
+    chunk_size: int = Field(default=1000, description="The target size of each text chunk", gt=0)
+    chunk_overlap: int = Field(default=200, description="The number of characters to overlap between chunks", ge=0)
+    length_function: Callable = Field(default=len, description="Function to measure text length")
     separators: List[str] = Field(
         default=["\n\n", "\n", " ", ""],
-        description="List of separators to use for splitting text, in order of priority"
+        description="List of separators to use for splitting text, in order of priority",
     )
     class Config:
@@ -65,44 +51,28 @@ class TextChunkingConfig(BaseModel):
 class JSONChunkingConfig(BasePreprocessingConfig):
     """Configuration for JSON chunking preprocessor"""
-    flatten_nested: bool = Field(
-        default=True,
-        description="Whether to flatten nested JSON structures"
-    )
-    include_metadata: bool = Field(
-        default=True,
-        description="Whether to include original metadata in chunks"
-    )
+    flatten_nested: bool = Field(default=True, description="Whether to flatten nested JSON structures")
+    include_metadata: bool = Field(default=True, description="Whether to include original metadata in chunks")
     chunk_by_object: bool = Field(
-        default=True,
-        description="Whether to chunk by top-level objects (True) or create a single document (False)"
-    )
-    exclude_fields: List[str] = Field(
-        default_factory=list,
-        description="List of fields to exclude from chunking"
+        default=True, description="Whether to chunk by top-level objects (True) or create a single document (False)"
     )
+    exclude_fields: List[str] = Field(default_factory=list, description="List of fields to exclude from chunking")
     include_fields: List[str] = Field(
         default_factory=list,
-        description="List of fields to include in chunking (if empty, all fields except excluded ones are included)"
+        description="List of fields to include in chunking (if empty, all fields except excluded ones are included)",
     )
     metadata_fields: List[str] = Field(
         default_factory=list,
         description="List of fields to extract into metadata for filtering "
-                    "(can include nested fields using dot notation). "
-                    "If empty, all primitive fields will be extracted (top-level fields if available, otherwise all primitive fields in the flattened structure)."
+        "(can include nested fields using dot notation). "
+        "If empty, all primitive fields will be extracted (top-level fields if available, otherwise all primitive fields in the flattened structure).",
     )
     extract_all_primitives: bool = Field(
-        default=False,
-        description="Whether to extract all primitive values (strings, numbers, booleans) into metadata"
-    )
-    nested_delimiter: str = Field(
-        default=".",
-        description="Delimiter for flattened nested field names"
-    )
-    content_column: str = Field(
-        default="content",
-        description="Name of the content column for chunk ID generation"
+        default=False, description="Whether to extract all primitive values (strings, numbers, booleans) into metadata"
     )
+    nested_delimiter: str = Field(default=".", description="Delimiter for flattened nested field names")
+    content_column: str = Field(default="content", description="Name of the content column for chunk ID generation")
     class Config:
         arbitrary_types_allowed = True
@@ -110,25 +80,20 @@ class JSONChunkingConfig(BasePreprocessingConfig):
 class PreprocessingConfig(BaseModel):
     """Complete preprocessing configuration"""
-    type: PreprocessorType = Field(
-        default=PreprocessorType.TEXT_CHUNKING,
-        description="Type of preprocessing to apply"
-    )
+    type: PreprocessorType = Field(default=PreprocessorType.TEXT_CHUNKING, description="Type of preprocessing to apply")
     contextual_config: Optional[ContextualConfig] = Field(
-        default=None,
-        description="Configuration for contextual preprocessing"
+        default=None, description="Configuration for contextual preprocessing"
     )
     text_chunking_config: Optional[TextChunkingConfig] = Field(
-        default=None,
-        description="Configuration for text chunking preprocessing"
+        default=None, description="Configuration for text chunking preprocessing"
     )
     json_chunking_config: Optional[JSONChunkingConfig] = Field(
-        default=None,
-        description="Configuration for JSON chunking preprocessing"
+        default=None, description="Configuration for JSON chunking preprocessing"
     )
-    @model_validator(mode='after')
-    def validate_config_presence(self) -> 'PreprocessingConfig':
+    @model_validator(mode="after")
+    def validate_config_presence(self) -> "PreprocessingConfig":
         """Ensure the appropriate config is present for the chosen type"""
         if self.type == PreprocessorType.CONTEXTUAL and not self.contextual_config:
             self.contextual_config = ContextualConfig()
@@ -137,26 +102,28 @@ class PreprocessingConfig(BaseModel):
         if self.type == PreprocessorType.JSON_CHUNKING and not self.json_chunking_config:
             # Import here to avoid circular imports
             from mindsdb.interfaces.knowledge_base.preprocessing.json_chunker import JSONChunkingConfig
             self.json_chunking_config = JSONChunkingConfig()
         return self
 class Document(BaseModel):
     """Document model with default metadata handling"""
     id: Optional[Union[int, str]] = Field(default=None, description="Unique identifier for the document")
     content: str = Field(description="The document content")
     embeddings: Optional[List[float]] = Field(default=None, description="Vector embeddings of the content")
     metadata: Optional[Dict[str, Any]] = Field(default=None, description="Additional document metadata")
-    @model_validator(mode='after')
-    def validate_metadata(self) -> 'Document':
+    @model_validator(mode="after")
+    def validate_metadata(self) -> "Document":
         """Ensure metadata is present and valid"""
         if not self.metadata:
-            self.metadata = {'source': 'default'}
+            self.metadata = {"source": "default"}
         return self
 class ProcessedChunk(Document):
     """Processed chunk that aligns with VectorStoreHandler schema"""
     pass

mindsdb/interfaces/skills/custom/text2sql/mindsdb_kb_tools.py CHANGED Viewed

@@ -106,6 +106,8 @@ class KnowledgeBaseInfoTool(BaseTool):
         for kb_name in kb_names:
             try:
+                self.db.check_knowledge_base_permission(Identifier(kb_name))
                 # Get knowledge base schema
                 schema_result = self.db.run_no_throw(str(Describe(kb_name, type="knowledge_base")))

mindsdb/interfaces/skills/sql_agent.py CHANGED Viewed

@@ -3,7 +3,9 @@ import csv
 import inspect
 import traceback
 from io import StringIO
-from typing import Iterable, List, Optional, Any
+from typing import Iterable, List, Optional, Any, Tuple
+from collections import defaultdict
+import fnmatch
 import pandas as pd
 from mindsdb_sql_parser import parse_sql
@@ -75,12 +77,84 @@ def split_table_name(table_name: str) -> List[str]:
     if current:
         result.append(current.strip("`"))
-    # ensure we split the table name
-    # result = [r.split(".") for r in result][0]
     return result
+class TablesCollection:
+    """
+    Collection of identifiers.
+    Supports wildcard in tables name.
+    """
+    def __init__(self, items: List[Identifier | str] = None, default_db=None):
+        if items is None:
+            items = []
+        self.items = items
+        self._dbs = defaultdict(set)
+        self._schemas = defaultdict(dict)
+        self._no_db_tables = set()
+        self.has_wildcard = False
+        self.databases = set()
+        self._default_db = default_db
+        for name in items:
+            if not isinstance(name, Identifier):
+                name = Identifier(name)
+            db, schema, tbl = self._get_paths(name)
+            if db is None:
+                self._no_db_tables.add(tbl)
+            elif schema is None:
+                self._dbs[db].add(tbl)
+            else:
+                if schema not in self._schemas[db]:
+                    self._schemas[db][schema] = set()
+                self._schemas[db][schema].add(tbl)
+            if "*" in tbl:
+                self.has_wildcard = True
+            self.databases.add(db)
+    def _get_paths(self, table: Identifier) -> Tuple:
+        # split identifier to db, schema, table name
+        schema = None
+        db = None
+        match [x.lower() for x in table.parts]:
+            case [tbl]:
+                pass
+            case [db, tbl]:
+                pass
+            case [db, schema, tbl]:
+                pass
+            case _:
+                raise NotImplementedError
+        return db, schema, tbl.lower()
+    def match(self, table: Identifier) -> bool:
+        # Check if input table matches to tables in collection
+        db, schema, tbl = self._get_paths(table)
+        if db is None:
+            if tbl in self._no_db_tables:
+                return True
+            if self._default_db is not None:
+                return self.match(Identifier(parts=[self._default_db, tbl]))
+        if schema is not None:
+            if any([fnmatch.fnmatch(tbl, pattern) for pattern in self._schemas[db].get(schema, [])]):
+                return True
+        # table might be specified without schema
+        return any([fnmatch.fnmatch(tbl, pattern) for pattern in self._dbs[db]])
+    def __bool__(self):
+        return len(self.items) > 0
+    def __repr__(self):
+        return f"Tables({self.items})"
 class SQLAgent:
     """
     SQLAgent is a class that handles SQL queries for agents.
@@ -117,21 +191,23 @@ class SQLAgent:
         self._command_executor = command_executor
         self._mindsdb_db_struct = databases_struct
         self.knowledge_base_database = knowledge_base_database  # This is a project name, not a database connection
+        self._databases = databases
         self._sample_rows_in_table_info = int(sample_rows_in_table_info)
-        self._tables_to_include = include_tables
-        self._tables_to_ignore = []
-        self._knowledge_bases_to_include = include_knowledge_bases
-        self._knowledge_bases_to_ignore = []
-        self._databases = databases
-        if not self._tables_to_include:
+        self._tables_to_include = TablesCollection(include_tables)
+        if self._tables_to_include:
             # ignore_tables and include_tables should not be used together.
             # include_tables takes priority if it's set.
-            self._tables_to_ignore = ignore_tables or []
-        if not self._knowledge_bases_to_include:
+            ignore_tables = []
+        self._tables_to_ignore = TablesCollection(ignore_tables)
+        self._knowledge_bases_to_include = TablesCollection(include_knowledge_bases, default_db=knowledge_base_database)
+        if self._knowledge_bases_to_include:
             # ignore_knowledge_bases and include_knowledge_bases should not be used together.
             # include_knowledge_bases takes priority if it's set.
-            self._knowledge_bases_to_ignore = ignore_knowledge_bases or []
+            ignore_knowledge_bases = []
+        self._knowledge_bases_to_ignore = TablesCollection(ignore_knowledge_bases, default_db=knowledge_base_database)
         self._cache = cache
         from mindsdb.interfaces.skills.skill_tool import SkillToolController
@@ -159,46 +235,54 @@ class SQLAgent:
         if not isinstance(ast_query, (Select, Show, Describe, Explain)):
             raise ValueError(f"Query is not allowed: {ast_query.to_string()}")
+        kb_names = self.get_all_knowledge_base_names()
         # Check tables
         if self._tables_to_include:
-            tables_parts = [split_table_name(x) for x in self._tables_to_include]
-            no_schema_parts = []
-            for t in tables_parts:
-                if len(t) == 3:
-                    no_schema_parts.append([t[0], t[2]])
-            tables_parts += no_schema_parts
             def _check_f(node, is_table=None, **kwargs):
                 if is_table and isinstance(node, Identifier):
                     table_name = ".".join(node.parts)
-                    # Get the list of available knowledge bases
-                    kb_names = self.get_usable_knowledge_base_names()
                     # Check if this table is a knowledge base
-                    is_kb = table_name in kb_names
-                    # If it's a knowledge base and we have knowledge base restrictions
-                    if is_kb and self._knowledge_bases_to_include:
-                        kb_parts = [split_table_name(x) for x in self._knowledge_bases_to_include]
-                        if node.parts not in kb_parts:
-                            raise ValueError(
-                                f"Knowledge base {table_name} not found. Available knowledge bases: {', '.join(self._knowledge_bases_to_include)}"
-                            )
-                    # Regular table check
-                    elif not is_kb and self._tables_to_include and node.parts not in tables_parts:
-                        raise ValueError(
-                            f"Table {table_name} not found. Available tables: {', '.join(self._tables_to_include)}"
-                        )
-                    # Check if it's a restricted knowledge base
-                    elif is_kb and table_name in self._knowledge_bases_to_ignore:
-                        raise ValueError(f"Knowledge base {table_name} is not allowed.")
-                    # Check if it's a restricted table
-                    elif not is_kb and table_name in self._tables_to_ignore:
-                        raise ValueError(f"Table {table_name} is not allowed.")
+                    if table_name in kb_names or node.parts[-1] in kb_names:
+                        # If it's a knowledge base and we have knowledge base restrictions
+                        self.check_knowledge_base_permission(node)
+                    else:
+                        try:
+                            # Regular table check
+                            self.check_table_permission(node)
+                        except ValueError as origin_exc:
+                            # was it badly quoted by llm?
+                            if len(node.parts) == 1 and node.is_quoted[0] and "." in node.parts[0]:
+                                node2 = Identifier(node.parts[0])
+                                try:
+                                    _check_f(node2, is_table=True)
+                                    return node2
+                                except ValueError:
+                                    ...
+                            raise origin_exc
             query_traversal(ast_query, _check_f)
+    def check_knowledge_base_permission(self, node):
+        if self._knowledge_bases_to_include and not self._knowledge_bases_to_include.match(node):
+            raise ValueError(
+                f"Knowledge base {str(node)} not found. Available knowledge bases: {', '.join(self._knowledge_bases_to_include.items)}"
+            )
+        # Check if it's a restricted knowledge base
+        if self._knowledge_bases_to_ignore and self._knowledge_bases_to_ignore.match(node):
+            raise ValueError(f"Knowledge base {str(node)} is not allowed.")
+    def check_table_permission(self, node):
+        if self._tables_to_include and not self._tables_to_include.match(node):
+            raise ValueError(
+                f"Table {str(node)} not found. Available tables: {', '.join(self._tables_to_include.items)}"
+            )
+        # Check if it's a restricted table
+        if self._tables_to_ignore and self._tables_to_ignore.match(node):
+            raise ValueError(f"Table {str(node)} is not allowed.")
     def get_usable_table_names(self) -> Iterable[str]:
         """Get a list of tables that the agent has access to.
@@ -213,50 +297,35 @@ class SQLAgent:
             if cached_tables:
                 return cached_tables
-        if self._tables_to_include:
-            return self._tables_to_include
+        if not self._tables_to_include:
+            # no tables allowed
+            return []
+        if not self._tables_to_include.has_wildcard:
+            return self._tables_to_include.items
         result_tables = []
-        for db_name in self._mindsdb_db_struct:
+        for db_name in self._tables_to_include.databases:
             handler = self._command_executor.session.integration_controller.get_data_handler(db_name)
-            schemas_names = list(self._mindsdb_db_struct[db_name].keys())
-            if len(schemas_names) > 1 and None in schemas_names:
-                raise Exception("default schema and named schemas can not be used in same filter")
-            if None in schemas_names:
-                # get tables only from default schema
-                response = handler.get_tables()
-                tables_in_default_schema = list(response.data_frame.table_name)
-                schema_tables_restrictions = self._mindsdb_db_struct[db_name][None]  # None - is default schema
-                if schema_tables_restrictions is None:
-                    for table_name in tables_in_default_schema:
-                        result_tables.append([db_name, table_name])
-                else:
-                    for table_name in schema_tables_restrictions:
-                        if table_name in tables_in_default_schema:
-                            result_tables.append([db_name, table_name])
+            if "all" in inspect.signature(handler.get_tables).parameters:
+                response = handler.get_tables(all=True)
             else:
-                if "all" in inspect.signature(handler.get_tables).parameters:
-                    response = handler.get_tables(all=True)
+                response = handler.get_tables()
+            df = response.data_frame
+            col_name = "table_name"
+            if col_name not in df.columns:
+                # get first column if not found
+                col_name = df.columns[0]
+            for _, row in df.iterrows():
+                if "table_schema" in row:
+                    parts = [db_name, row["table_schema"], row[col_name]]
                 else:
-                    response = handler.get_tables()
-                response_schema_names = list(response.data_frame.table_schema.unique())
-                schemas_intersection = set(schemas_names) & set(response_schema_names)
-                if len(schemas_intersection) == 0:
-                    raise Exception("There are no allowed schemas in ds")
-                for schema_name in schemas_intersection:
-                    schema_sub_df = response.data_frame[response.data_frame["table_schema"] == schema_name]
-                    if self._mindsdb_db_struct[db_name][schema_name] is None:
-                        # all tables from schema allowed
-                        for row in schema_sub_df:
-                            result_tables.append([db_name, schema_name, row["table_name"]])
-                    else:
-                        for table_name in self._mindsdb_db_struct[db_name][schema_name]:
-                            if table_name in schema_sub_df["table_name"].values:
-                                result_tables.append([db_name, schema_name, table_name])
+                    parts = [db_name, row[col_name]]
+                if self._tables_to_include.match(Identifier(parts=parts)):
+                    if not self._tables_to_ignore.match(Identifier(parts=parts)):
+                        result_tables.append(parts)
         result_tables = [".".join(x) for x in result_tables]
         if self._cache:
@@ -269,7 +338,28 @@ class SQLAgent:
         Returns:
             Iterable[str]: list with knowledge base names
         """
-        cache_key = f"{ctx.company_id}_{self.knowledge_base_database}_knowledge_bases"
+        if not self._knowledge_bases_to_include and not self._knowledge_bases_to_ignore:
+            # white or black list have to be set
+            return []
+        # Filter knowledge bases based on ignore list
+        kb_names = []
+        for kb_name in self.get_all_knowledge_base_names():
+            kb = Identifier(parts=[self.knowledge_base_database, kb_name])
+            if self._knowledge_bases_to_include and not self._knowledge_bases_to_include.match(kb):
+                continue
+            if not self._knowledge_bases_to_ignore.match(kb):
+                kb_names.append(kb_name)
+        return kb_names
+    def get_all_knowledge_base_names(self) -> Iterable[str]:
+        """Get a list of all knowledge bases
+        Returns:
+            Iterable[str]: list with knowledge base names
+        """
+        # cache_key = f"{ctx.company_id}_{self.knowledge_base_database}_knowledge_bases"
         # todo we need to fix the cache, file cache can potentially store out of data information
         # # first check cache and return if found
@@ -278,58 +368,18 @@ class SQLAgent:
         #     if cached_kbs:
         #        return cached_kbs
-        if self._knowledge_bases_to_include:
-            return self._knowledge_bases_to_include
         try:
             # Query to get all knowledge bases
-            query = f"SHOW KNOWLEDGE_BASES FROM {self.knowledge_base_database};"
-            try:
-                result = self._call_engine(query, database=self.knowledge_base_database)
-            except Exception as e:
-                # If the direct query fails, try a different approach
-                # This handles the case where knowledge_base_database is not a valid integration
-                logger.warning(f"Error querying knowledge bases from {self.knowledge_base_database}: {str(e)}")
-                # Try to get knowledge bases directly from the project database
-                try:
-                    # Get knowledge bases from the project database
-                    kb_controller = self._command_executor.session.kb_controller
-                    kb_names = [kb["name"] for kb in kb_controller.list()]
-                    # Filter knowledge bases based on include list
-                    if self._knowledge_bases_to_include:
-                        kb_names = [kb_name for kb_name in kb_names if kb_name in self._knowledge_bases_to_include]
-                        if not kb_names:
-                            logger.warning(
-                                f"No knowledge bases found in the include list: {self._knowledge_bases_to_include}"
-                            )
-                            return []
-                        return kb_names
-                    # Filter knowledge bases based on ignore list
-                    kb_names = [kb_name for kb_name in kb_names if kb_name not in self._knowledge_bases_to_ignore]
-                    if self._cache:
-                        self._cache.set(cache_key, set(kb_names))
-                    return kb_names
-                except Exception as inner_e:
-                    logger.error(f"Error getting knowledge bases from kb_controller: {str(inner_e)}")
-                    return []
-            if not result:
-                return []
+            ast_query = Show(category="Knowledge Bases")
+            result = self._command_executor.execute_command(ast_query, database_name=self.knowledge_base_database)
             # Filter knowledge bases based on ignore list
             kb_names = []
-            for row in result:
-                kb_name = row["name"]
-                if kb_name not in self._knowledge_bases_to_ignore:
-                    kb_names.append(kb_name)
+            for row in result.data.records:
+                kb_names.append(row["NAME"])
-            if self._cache:
-                self._cache.set(cache_key, set(kb_names))
+            # if self._cache:
+            #     self._cache.set(cache_key, set(kb_names))
             return kb_names
         except Exception as e:
@@ -369,7 +419,7 @@ class SQLAgent:
             table_identifier = tables_idx.get(tuple(table_parts))
             if table_identifier is None:
-                raise ValueError(f"Table {table} not found in the database")
+                raise ValueError(f"Table {table_name} not found in the database")
             tables.append(table_identifier)
         return tables
@@ -411,13 +461,14 @@ class SQLAgent:
                 if len(parts) == 1:
                     raise ValueError(f"Invalid table name: {name}. Expected format is 'database.table'.")
-                database_table_map[parts[0]] = database_table_map.get(parts[0], []) + [parts[1]]
+                database_table_map.setdefault(parts[0], []).append(parts[1])
             data_catalog_str = ""
             for database_name, table_names in database_table_map.items():
                 data_catalog_reader = DataCatalogReader(database_name=database_name, table_names=table_names)
-                data_catalog_str += data_catalog_reader.read_metadata_as_string()
+                result = data_catalog_reader.read_metadata_as_string()
+                data_catalog_str += str(result or "")
             return data_catalog_str
@@ -430,7 +481,7 @@ class SQLAgent:
                 split = name.split(".")
                 if len(split) > 1:
-                    all_tables.append(Identifier(parts=[split[0], split[1]]))
+                    all_tables.append(Identifier(parts=[split[0], split[-1]]))
                 else:
                     all_tables.append(Identifier(name))

mindsdb/interfaces/storage/db.py CHANGED Viewed

@@ -684,10 +684,10 @@ class MetaColumns(Base):
         if self.default_value:
             column_info += f"\n{pad}- Default Value: {self.default_value}"
-        if self.meta_column_statistics:
+        stats = self.meta_column_statistics or []
+        if stats and callable(getattr(stats[0], "as_string", None)):
             column_info += f"\n\n{pad}- Column Statistics:"
-            column_info += f"\n{self.meta_column_statistics[0].as_string(indent + 4)}"
+            column_info += f"\n{stats[0].as_string(indent + 4)}"
         return column_info
@@ -708,18 +708,20 @@ class MetaColumnStatistics(Base):
         inner_pad = " " * (indent + 4)
         column_statistics = ""
+        most_common_values = self.most_common_values or []
+        most_common_frequencies = self.most_common_frequencies or []
-        if any(self.most_common_values) and any(self.most_common_frequencies):
+        if most_common_values and most_common_frequencies:
             column_statistics += f"{pad}- Top 10 Most Common Values and Frequencies:"
-            for i in range(min(10, len(self.most_common_values))):
-                freq = self.most_common_frequencies[i]
+            for i in range(min(10, len(most_common_values))):
+                freq = most_common_frequencies[i]
                 try:
                     percent = float(freq) * 100
                     freq_str = f"{percent:.2f}%"
                 except (ValueError, TypeError):
                     freq_str = str(freq)
-                column_statistics += f"\n{inner_pad}- {self.most_common_values[i]}: {freq_str}"
+                column_statistics += f"\n{inner_pad}- {most_common_values[i]}: {freq_str}"
             column_statistics += "\n"
         if self.null_percentage:

MindsDB 25.6.4.0__py3-none-any.whl → 25.7.2.0__py3-none-any.whl

Potentially problematic release.

MindsDB 25.6.4.0py3-none-any.whl → 25.7.2.0py3-none-any.whl