PyPI - MindsDB - Versions diffs - 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl - Mend

MindsDB 25.9.2.0a1py3-none-any.whl → 25.9.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (116) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +39 -20
mindsdb/api/a2a/agent.py +7 -9
mindsdb/api/a2a/common/server/server.py +3 -3
mindsdb/api/a2a/common/server/task_manager.py +4 -4
mindsdb/api/a2a/task_manager.py +15 -17
mindsdb/api/common/middleware.py +9 -11
mindsdb/api/executor/command_executor.py +2 -4
mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
mindsdb/api/executor/exceptions.py +29 -10
mindsdb/api/executor/planner/plan_join.py +17 -3
mindsdb/api/executor/sql_query/sql_query.py +74 -74
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
mindsdb/api/executor/utilities/functions.py +6 -6
mindsdb/api/executor/utilities/sql.py +32 -16
mindsdb/api/http/gui.py +5 -11
mindsdb/api/http/initialize.py +8 -10
mindsdb/api/http/namespaces/agents.py +10 -12
mindsdb/api/http/namespaces/analysis.py +13 -20
mindsdb/api/http/namespaces/auth.py +1 -1
mindsdb/api/http/namespaces/config.py +15 -11
mindsdb/api/http/namespaces/databases.py +140 -201
mindsdb/api/http/namespaces/file.py +15 -4
mindsdb/api/http/namespaces/handlers.py +7 -2
mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
mindsdb/api/http/namespaces/models.py +94 -126
mindsdb/api/http/namespaces/projects.py +13 -22
mindsdb/api/http/namespaces/sql.py +33 -25
mindsdb/api/http/namespaces/tab.py +27 -37
mindsdb/api/http/namespaces/views.py +1 -1
mindsdb/api/http/start.py +14 -8
mindsdb/api/mcp/__init__.py +2 -1
mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +13 -1
mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
mindsdb/integrations/libs/api_handler.py +10 -10
mindsdb/integrations/libs/base.py +4 -4
mindsdb/integrations/libs/llm/utils.py +2 -2
mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
mindsdb/integrations/libs/process_cache.py +132 -140
mindsdb/integrations/libs/response.py +18 -12
mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
mindsdb/integrations/utilities/files/file_reader.py +6 -7
mindsdb/integrations/utilities/rag/config_loader.py +37 -26
mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
mindsdb/integrations/utilities/rag/settings.py +58 -133
mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
mindsdb/interfaces/agents/agents_controller.py +2 -1
mindsdb/interfaces/agents/constants.py +0 -2
mindsdb/interfaces/agents/litellm_server.py +34 -58
mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
mindsdb/interfaces/chatbot/polling.py +30 -18
mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
mindsdb/interfaces/database/integrations.py +19 -2
mindsdb/interfaces/file/file_controller.py +6 -6
mindsdb/interfaces/functions/controller.py +1 -1
mindsdb/interfaces/functions/to_markdown.py +2 -2
mindsdb/interfaces/jobs/jobs_controller.py +5 -5
mindsdb/interfaces/jobs/scheduler.py +3 -8
mindsdb/interfaces/knowledge_base/controller.py +50 -23
mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
mindsdb/interfaces/model/model_controller.py +170 -166
mindsdb/interfaces/query_context/context_controller.py +14 -2
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
mindsdb/interfaces/skills/retrieval_tool.py +43 -50
mindsdb/interfaces/skills/skill_tool.py +2 -2
mindsdb/interfaces/skills/sql_agent.py +25 -19
mindsdb/interfaces/storage/fs.py +114 -169
mindsdb/interfaces/storage/json.py +19 -18
mindsdb/interfaces/tabs/tabs_controller.py +49 -72
mindsdb/interfaces/tasks/task_monitor.py +3 -9
mindsdb/interfaces/tasks/task_thread.py +7 -9
mindsdb/interfaces/triggers/trigger_task.py +7 -13
mindsdb/interfaces/triggers/triggers_controller.py +47 -50
mindsdb/migrations/migrate.py +16 -16
mindsdb/utilities/api_status.py +58 -0
mindsdb/utilities/config.py +49 -0
mindsdb/utilities/exception.py +40 -1
mindsdb/utilities/fs.py +0 -1
mindsdb/utilities/hooks/profiling.py +17 -14
mindsdb/utilities/langfuse.py +40 -45
mindsdb/utilities/log.py +272 -0
mindsdb/utilities/ml_task_queue/consumer.py +52 -58
mindsdb/utilities/ml_task_queue/producer.py +26 -30
mindsdb/utilities/render/sqlalchemy_render.py +7 -6
mindsdb/utilities/utils.py +2 -2
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +269 -264
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +115 -115
mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0

mindsdb/integrations/utilities/rag/settings.py CHANGED Viewed

@@ -8,7 +8,7 @@ from langchain_core.embeddings import Embeddings
 from langchain_core.language_models import BaseChatModel
 from langchain_core.vectorstores import VectorStore
 from langchain_core.stores import BaseStore
-from pydantic import BaseModel, Field, field_validator
+from pydantic import BaseModel, Field, field_validator, ConfigDict
 from langchain_text_splitters import TextSplitter
 DEFAULT_COLLECTION_NAME = "default_collection"
@@ -32,6 +32,11 @@ DEFAULT_VECTOR_STORE = Chroma
 DEFAULT_RERANKER_FLAG = False
 DEFAULT_RERANKING_MODEL = "gpt-4o"
 DEFAULT_LLM_ENDPOINT = "https://api.openai.com/v1"
+DEFAULT_RERANKER_N = 1
+DEFAULT_RERANKER_LOGPROBS = True
+DEFAULT_RERANKER_TOP_LOGPROBS = 4
+DEFAULT_RERANKER_MAX_TOKENS = 100
+DEFAULT_VALID_CLASS_TOKENS = ["1", "2", "3", "4"]
 DEFAULT_AUTO_META_PROMPT_TEMPLATE = """
 Below is a json representation of a table with information about {description}.
 Return a JSON list with an entry for each column. Each entry should have
@@ -366,14 +371,13 @@ DEFAULT_NUM_QUERY_RETRIES = 2
 class LLMConfig(BaseModel):
-    model_name: str = Field(
-        default=DEFAULT_LLM_MODEL, description="LLM model to use for generation"
-    )
+    model_name: str = Field(default=DEFAULT_LLM_MODEL, description="LLM model to use for generation")
     provider: str = Field(
         default=DEFAULT_LLM_MODEL_PROVIDER,
         description="LLM model provider to use for generation",
     )
     params: Dict[str, Any] = Field(default_factory=dict)
+    model_config = ConfigDict(protected_namespaces=())
 class MultiVectorRetrieverMode(Enum):
@@ -430,9 +434,7 @@ class SearchType(Enum):
 class SearchKwargs(BaseModel):
     k: int = Field(default=DEFAULT_K, description="Amount of documents to return", ge=1)
-    filter: Optional[Dict[str, Any]] = Field(
-        default=None, description="Filter by document metadata"
-    )
+    filter: Optional[Dict[str, Any]] = Field(default=None, description="Filter by document metadata")
     # For similarity_score_threshold search type
     score_threshold: Optional[float] = Field(
         default=None,
@@ -441,9 +443,7 @@ class SearchKwargs(BaseModel):
         le=1.0,
     )
     # For MMR search type
-    fetch_k: Optional[int] = Field(
-        default=None, description="Amount of documents to pass to MMR algorithm", ge=1
-    )
+    fetch_k: Optional[int] = Field(default=None, description="Amount of documents to pass to MMR algorithm", ge=1)
     lambda_mult: Optional[float] = Field(
         default=None,
         description="Diversity of results returned by MMR (1=min diversity, 0=max)",
@@ -459,9 +459,7 @@ class SearchKwargs(BaseModel):
 class LLMExample(BaseModel):
     input: str = Field(description="User input for the example")
-    output: str = Field(
-        description="What the LLM should generate for this example's input"
-    )
+    output: str = Field(description="What the LLM should generate for this example's input")
 class ValueSchema(BaseModel):
@@ -502,41 +500,25 @@ class ValueSchema(BaseModel):
 class MetadataConfig(BaseModel):
     """Class to configure metadata for retrieval. Only supports very basic document name lookup at the moment."""
-    table: str = Field(
-        description="Source table for metadata."
-    )
+    table: str = Field(description="Source table for metadata.")
     max_document_context: int = Field(
         # To work well with models with context window of 32768.
         default=16384,
-        description="Truncate a document before using as context with an LLM if it exceeds this amount of tokens"
-    )
-    embeddings_table: str = Field(
-        default="embeddings",
-        description="Source table for embeddings"
-    )
-    id_column: str = Field(
-        default="Id",
-        description="Name of ID column in metadata table"
-    )
-    name_column: str = Field(
-        default="Title",
-        description="Name of column containing name or title of document"
-    )
-    name_column_index: Optional[str] = Field(
-        default=None,
-        description="Name of GIN index to use when looking up name."
+        description="Truncate a document before using as context with an LLM if it exceeds this amount of tokens",
     )
+    embeddings_table: str = Field(default="embeddings", description="Source table for embeddings")
+    id_column: str = Field(default="Id", description="Name of ID column in metadata table")
+    name_column: str = Field(default="Title", description="Name of column containing name or title of document")
+    name_column_index: Optional[str] = Field(default=None, description="Name of GIN index to use when looking up name.")
     content_column: str = Field(
-        default="content",
-        description="Name of column in embeddings table containing chunk content"
+        default="content", description="Name of column in embeddings table containing chunk content"
     )
     embeddings_metadata_column: str = Field(
-        default="metadata",
-        description="Name of column in embeddings table containing chunk metadata"
+        default="metadata", description="Name of column in embeddings table containing chunk metadata"
     )
     doc_id_key: str = Field(
-        default="original_row_id",
-        description="Metadata field that links an embedded chunk back to source document ID"
+        default="original_row_id", description="Metadata field that links an embedded chunk back to source document ID"
     )
@@ -552,14 +534,12 @@ class ColumnSchema(BaseModel):
         ]
     ] = Field(
         default=None,
-        description="One of the following. A dict or ordered dict of {schema_value: ValueSchema, ...}, where schema value is the name given for this value description in the schema."
+        description="One of the following. A dict or ordered dict of {schema_value: ValueSchema, ...}, where schema value is the name given for this value description in the schema.",
     )
     example_questions: Optional[List[LLMExample]] = Field(
         default=None, description="Example questions where this table is useful."
     )
-    max_filters: Optional[int] = Field(
-        default=1, description="Maximum number of filters to generate for this column."
-    )
+    max_filters: Optional[int] = Field(default=1, description="Maximum number of filters to generate for this column.")
     filter_threshold: Optional[float] = Field(
         default=0.0,
         description="Minimum relevance threshold to include metadata filters from this column.",
@@ -578,9 +558,7 @@ class TableSchema(BaseModel):
     table: str = Field(description="Name of table in the database")
     description: str = Field(description="Description of what the table represents")
     usage: str = Field(description="How and when to use this Table for search.")
-    columns: Optional[
-        Union[OrderedDict[str, ColumnSchema], Dict[str, ColumnSchema]]
-    ] = Field(
+    columns: Optional[Union[OrderedDict[str, ColumnSchema], Dict[str, ColumnSchema]]] = Field(
         description="Dict or Ordered Dict of {column_name: ColumnSchemas} describing the metadata columns available for the table"
     )
     example_questions: Optional[List[LLMExample]] = Field(
@@ -590,9 +568,7 @@ class TableSchema(BaseModel):
         description="SQL join string to join this table with source documents table",
         default="",
     )
-    max_filters: Optional[int] = Field(
-        default=1, description="Maximum number of filters to generate for this table."
-    )
+    max_filters: Optional[int] = Field(default=1, description="Maximum number of filters to generate for this table.")
     filter_threshold: Optional[float] = Field(
         default=0.0,
         description="Minimum relevance required to use this table to generate filters.",
@@ -675,12 +651,8 @@ class SQLRetrieverConfig(BaseModel):
     source_table: str = Field(
         description="Name of the source table containing the original documents that were embedded"
     )
-    source_id_column: str = Field(
-        description="Name of the column containing the UUID.", default="Id"
-    )
-    max_filters: Optional[int] = Field(
-        description="Maximum number of filters to generate for sql queries.", default=10
-    )
+    source_id_column: str = Field(description="Name of the column containing the UUID.", default="Id")
+    max_filters: Optional[int] = Field(description="Maximum number of filters to generate for sql queries.", default=10)
     filter_threshold: Optional[float] = Field(
         description="Minimum relevance required to use this Database to generate filters.",
         default=0.0,
@@ -728,6 +700,11 @@ class RerankerConfig(BaseModel):
     retry_delay: float = 1.0
     early_stop: bool = True  # Whether to enable early stopping
     early_stop_threshold: float = 0.8  # Confidence threshold for early stopping
+    n: int = DEFAULT_RERANKER_N  # Number of completions to generate
+    logprobs: bool = DEFAULT_RERANKER_LOGPROBS  # Whether to include log probabilities
+    top_logprobs: int = DEFAULT_RERANKER_TOP_LOGPROBS  # Number of top log probabilities to include
+    max_tokens: int = DEFAULT_RERANKER_MAX_TOKENS  # Maximum tokens to generate
+    valid_class_tokens: List[str] = DEFAULT_VALID_CLASS_TOKENS  # Valid class tokens to look for in the response
 class MultiHopRetrieverConfig(BaseModel):
@@ -737,9 +714,7 @@ class MultiHopRetrieverConfig(BaseModel):
         default=RetrieverType.VECTOR_STORE,
         description="Type of base retriever to use for multi-hop retrieval",
     )
-    max_hops: int = Field(
-        default=3, description="Maximum number of follow-up questions to generate", ge=1
-    )
+    max_hops: int = Field(default=3, description="Maximum number of follow-up questions to generate", ge=1)
     reformulation_template: str = Field(
         default=DEFAULT_QUESTION_REFORMULATION_TEMPLATE,
         description="Template for reformulating questions",
@@ -751,48 +726,29 @@ class MultiHopRetrieverConfig(BaseModel):
 class RAGPipelineModel(BaseModel):
-    documents: Optional[List[Document]] = Field(
-        default=None, description="List of documents"
-    )
+    documents: Optional[List[Document]] = Field(default=None, description="List of documents")
     vector_store_config: VectorStoreConfig = Field(
         default_factory=VectorStoreConfig, description="Vector store configuration"
     )
     llm: Optional[BaseChatModel] = Field(default=None, description="Language model")
-    llm_model_name: str = Field(
-        default=DEFAULT_LLM_MODEL, description="Language model name"
-    )
-    llm_provider: Optional[str] = Field(
-        default=None, description="Language model provider"
-    )
+    llm_model_name: str = Field(default=DEFAULT_LLM_MODEL, description="Language model name")
+    llm_provider: Optional[str] = Field(default=None, description="Language model provider")
     vector_store: VectorStore = Field(
         default_factory=lambda: vector_store_map[VectorStoreConfig().vector_store_type],
         description="Vector store",
     )
-    db_connection_string: Optional[str] = Field(
-        default=None, description="Database connection string"
-    )
+    db_connection_string: Optional[str] = Field(default=None, description="Database connection string")
     metadata_config: Optional[MetadataConfig] = Field(
-        default=None,
-        description="Configuration for metadata to be used for retrieval"
+        default=None, description="Configuration for metadata to be used for retrieval"
     )
     table_name: str = Field(default=DEFAULT_TEST_TABLE_NAME, description="Table name")
-    embedding_model: Optional[Embeddings] = Field(
-        default=None, description="Embedding model"
-    )
-    rag_prompt_template: str = Field(
-        default=DEFAULT_RAG_PROMPT_TEMPLATE, description="RAG prompt template"
-    )
-    retriever_prompt_template: Optional[Union[str, dict]] = Field(
-        default=None, description="Retriever prompt template"
-    )
-    retriever_type: RetrieverType = Field(
-        default=RetrieverType.VECTOR_STORE, description="Retriever type"
-    )
-    search_type: SearchType = Field(
-        default=SearchType.SIMILARITY, description="Type of search to perform"
-    )
+    embedding_model: Optional[Embeddings] = Field(default=None, description="Embedding model")
+    rag_prompt_template: str = Field(default=DEFAULT_RAG_PROMPT_TEMPLATE, description="RAG prompt template")
+    retriever_prompt_template: Optional[Union[str, dict]] = Field(default=None, description="Retriever prompt template")
+    retriever_type: RetrieverType = Field(default=RetrieverType.VECTOR_STORE, description="Retriever type")
+    search_type: SearchType = Field(default=SearchType.SIMILARITY, description="Type of search to perform")
     search_kwargs: SearchKwargs = Field(
         default_factory=SearchKwargs,
         description="Search configuration for the retriever",
@@ -811,39 +767,23 @@ class RAGPipelineModel(BaseModel):
     multi_retriever_mode: MultiVectorRetrieverMode = Field(
         default=MultiVectorRetrieverMode.BOTH, description="Multi retriever mode"
     )
-    max_concurrency: int = Field(
-        default=DEFAULT_MAX_CONCURRENCY, description="Maximum concurrency"
-    )
+    max_concurrency: int = Field(default=DEFAULT_MAX_CONCURRENCY, description="Maximum concurrency")
     id_key: int = Field(default=DEFAULT_ID_KEY, description="ID key")
     parent_store: Optional[BaseStore] = Field(default=None, description="Parent store")
-    text_splitter: Optional[TextSplitter] = Field(
-        default=None, description="Text splitter"
-    )
+    text_splitter: Optional[TextSplitter] = Field(default=None, description="Text splitter")
     chunk_size: int = Field(default=DEFAULT_CHUNK_SIZE, description="Chunk size")
-    chunk_overlap: int = Field(
-        default=DEFAULT_CHUNK_OVERLAP, description="Chunk overlap"
-    )
+    chunk_overlap: int = Field(default=DEFAULT_CHUNK_OVERLAP, description="Chunk overlap")
     # Auto retriever specific
-    auto_retriever_filter_columns: Optional[List[str]] = Field(
-        default=None, description="Filter columns"
-    )
-    cardinality_threshold: int = Field(
-        default=DEFAULT_CARDINALITY_THRESHOLD, description="Cardinality threshold"
-    )
+    auto_retriever_filter_columns: Optional[List[str]] = Field(default=None, description="Filter columns")
+    cardinality_threshold: int = Field(default=DEFAULT_CARDINALITY_THRESHOLD, description="Cardinality threshold")
     content_column_name: str = Field(
         default=DEFAULT_CONTENT_COLUMN_NAME,
         description="Content column name (the column we will get embeddings)",
     )
-    dataset_description: str = Field(
-        default=DEFAULT_DATASET_DESCRIPTION, description="Description of the dataset"
-    )
-    reranker: bool = Field(
-        default=DEFAULT_RERANKER_FLAG, description="Whether to use reranker"
-    )
-    reranker_config: RerankerConfig = Field(
-        default_factory=RerankerConfig, description="Reranker configuration"
-    )
+    dataset_description: str = Field(default=DEFAULT_DATASET_DESCRIPTION, description="Description of the dataset")
+    reranker: bool = Field(default=DEFAULT_RERANKER_FLAG, description="Whether to use reranker")
+    reranker_config: RerankerConfig = Field(default_factory=RerankerConfig, description="Reranker configuration")
     multi_hop_config: Optional[MultiHopRetrieverConfig] = Field(
         default=None,
@@ -856,9 +796,7 @@ class RAGPipelineModel(BaseModel):
         """Validate that multi_hop_config is set when using multi-hop retrieval."""
         values = info.data
         if values.get("retriever_type") == RetrieverType.MULTI_HOP and v is None:
-            raise ValueError(
-                "multi_hop_config must be set when using multi-hop retrieval"
-            )
+            raise ValueError("multi_hop_config must be set when using multi-hop retrieval")
         return v
     class Config:
@@ -889,13 +827,9 @@ class RAGPipelineModel(BaseModel):
             if v.lambda_mult is not None and (v.lambda_mult < 0 or v.lambda_mult > 1):
                 raise ValueError("lambda_mult must be between 0 and 1")
             if v.fetch_k is None and v.lambda_mult is not None:
-                raise ValueError(
-                    "fetch_k is required when using lambda_mult with MMR search type"
-                )
+                raise ValueError("fetch_k is required when using lambda_mult with MMR search type")
             if v.lambda_mult is None and v.fetch_k is not None:
-                raise ValueError(
-                    "lambda_mult is required when using fetch_k with MMR search type"
-                )
+                raise ValueError("lambda_mult is required when using fetch_k with MMR search type")
         elif search_type != SearchType.MMR:
             if v.fetch_k is not None:
                 raise ValueError("fetch_k is only valid for MMR search type")
@@ -904,20 +838,11 @@ class RAGPipelineModel(BaseModel):
         # Validate similarity_score_threshold parameters
         if search_type == SearchType.SIMILARITY_SCORE_THRESHOLD:
-            if v.score_threshold is not None and (
-                v.score_threshold < 0 or v.score_threshold > 1
-            ):
+            if v.score_threshold is not None and (v.score_threshold < 0 or v.score_threshold > 1):
                 raise ValueError("score_threshold must be between 0 and 1")
             if v.score_threshold is None:
-                raise ValueError(
-                    "score_threshold is required for similarity_score_threshold search type"
-                )
-        elif (
-            search_type != SearchType.SIMILARITY_SCORE_THRESHOLD
-            and v.score_threshold is not None
-        ):
-            raise ValueError(
-                "score_threshold is only valid for similarity_score_threshold search type"
-            )
+                raise ValueError("score_threshold is required for similarity_score_threshold search type")
+        elif search_type != SearchType.SIMILARITY_SCORE_THRESHOLD and v.score_threshold is not None:
+            raise ValueError("score_threshold is only valid for similarity_score_threshold search type")
         return v

mindsdb/integrations/utilities/rag/splitters/file_splitter.py CHANGED Viewed

@@ -45,15 +45,11 @@ class FileSplitterConfig:
         headers_to_split_on=DEFAULT_MARKDOWN_HEADERS_TO_SPLIT_ON
     )
     # Splitter to use for HTML splitting
-    html_splitter: HTMLHeaderTextSplitter = HTMLHeaderTextSplitter(
-        headers_to_split_on=DEFAULT_HTML_HEADERS_TO_SPLIT_ON
-    )
+    html_splitter: HTMLHeaderTextSplitter = HTMLHeaderTextSplitter(headers_to_split_on=DEFAULT_HTML_HEADERS_TO_SPLIT_ON)
     def __post_init__(self):
         if self.text_chunking_config is None:
-            self.text_chunking_config = TextChunkingConfig(
-                chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
-            )
+            self.text_chunking_config = TextChunkingConfig(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
         if self.recursive_splitter is None:
             self.recursive_splitter = RecursiveCharacterTextSplitter(
@@ -85,9 +81,7 @@ class FileSplitter:
     ) -> Union[Callable, HTMLHeaderTextSplitter, MarkdownHeaderTextSplitter]:
         return self._extension_map.get(extension, self.default_splitter)()
-    def split_documents(
-        self, documents: List[Document], default_failover: bool = True
-    ) -> List[Document]:
+    def split_documents(self, documents: List[Document], default_failover: bool = True) -> List[Document]:
         """Splits a list of documents representing files using the appropriate splitting & chunking strategies
         Args:
@@ -105,13 +99,9 @@ class FileSplitter:
             try:
                 split_documents += split_func(document.page_content)
             except Exception as e:
-                logger.error(
-                    f"Error splitting document with extension {extension}: {str(e)}"
-                )
+                logger.exception(f"Error splitting document with extension {extension}:")
                 if not default_failover:
-                    raise ValueError(
-                        f"Error splitting document with extension {extension}"
-                    ) from e
+                    raise ValueError(f"Error splitting document with extension {extension}") from e
                 # Try default splitter as a failover, if enabled.
                 split_func = self._split_func_by_extension(extension=None)
                 split_documents += split_func(document.page_content)

mindsdb/interfaces/agents/agents_controller.py CHANGED Viewed

@@ -190,7 +190,8 @@ class AgentsController:
                     db.session.commit()
         except Exception as e:
-            raise ValueError(f"Failed to auto-create or update SQL skill: {str(e)}")
+            logger.exception("Failed to auto-create or update SQL skill:")
+            raise ValueError(f"Failed to auto-create or update SQL skill: {e}") from e
         return skill_name

mindsdb/interfaces/agents/constants.py CHANGED Viewed

@@ -260,8 +260,6 @@ MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. You have access
 For factual questions, ALWAYS use the available tools to look up information rather than relying on your internal knowledge.
-Here is the user's question: {{question}}
 TOOLS:
 ------

mindsdb/interfaces/agents/litellm_server.py CHANGED Viewed

@@ -62,7 +62,9 @@ class ChatCompletionResponse(BaseModel):
     created: int = 0
     model: str
     choices: List[ChatCompletionChoice]
-    usage: Dict[str, int] = Field(default_factory=lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
+    usage: Dict[str, int] = Field(
+        default_factory=lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
+    )
 class DirectSQLRequest(BaseModel):
@@ -74,14 +76,14 @@ async def chat_completions(request: ChatCompletionRequest):
     global agent_wrapper
     if agent_wrapper is None:
-        raise HTTPException(status_code=500, detail="Agent not initialized. Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http")
+        raise HTTPException(
+            status_code=500,
+            detail="Agent not initialized. Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http",
+        )
     try:
         # Convert request to messages format
-        messages = [
-            {"role": msg.role, "content": msg.content}
-            for msg in request.messages
-        ]
+        messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
         if request.stream:
             # Return a streaming response
@@ -90,9 +92,10 @@ async def chat_completions(request: ChatCompletionRequest):
                     async for chunk in agent_wrapper.acompletion_stream(messages, model=request.model):
                         yield f"data: {json.dumps(chunk)}\n\n"
                     yield "data: [DONE]\n\n"
-                except Exception as e:
-                    logger.error(f"Streaming error: {str(e)}")
+                except Exception:
+                    logger.exception("Streaming error:")
                     yield "data: {{'error': 'Streaming failed due to an internal error.'}}\n\n"
             return StreamingResponse(generate(), media_type="text/event-stream")
         else:
             # Return a regular response
@@ -105,16 +108,11 @@ async def chat_completions(request: ChatCompletionRequest):
             # Transform to proper OpenAI format
             return ChatCompletionResponse(
-                model=request.model,
-                choices=[
-                    ChatCompletionChoice(
-                        message={"role": "assistant", "content": content}
-                    )
-                ]
+                model=request.model, choices=[ChatCompletionChoice(message={"role": "assistant", "content": content})]
             )
     except Exception as e:
-        logger.error(f"Error in chat completion: {str(e)}")
+        logger.exception("Error in chat completion:")
         raise HTTPException(status_code=500, detail=str(e))
@@ -124,7 +122,9 @@ async def direct_sql(request: DirectSQLRequest, background_tasks: BackgroundTask
     global agent_wrapper, mcp_session
     if agent_wrapper is None and mcp_session is None:
-        raise HTTPException(status_code=500, detail="No MCP session available. Make sure MindsDB server is running with MCP enabled.")
+        raise HTTPException(
+            status_code=500, detail="No MCP session available. Make sure MindsDB server is running with MCP enabled."
+        )
     try:
         # First try to use the agent's session if available
@@ -140,7 +140,7 @@ async def direct_sql(request: DirectSQLRequest, background_tasks: BackgroundTask
             raise HTTPException(status_code=500, detail="No MCP session available")
     except Exception as e:
-        logger.error(f"Error executing direct SQL: {str(e)}")
+        logger.exception("Error executing direct SQL:")
         raise HTTPException(status_code=500, detail=str(e))
@@ -150,32 +150,12 @@ async def list_models():
     global agent_wrapper
     if agent_wrapper is None:
-        return {
-            "object": "list",
-            "data": [
-                {
-                    "id": "mcp-agent",
-                    "object": "model",
-                    "created": 0,
-                    "owned_by": "mindsdb"
-                }
-            ]
-        }
+        return {"object": "list", "data": [{"id": "mcp-agent", "object": "model", "created": 0, "owned_by": "mindsdb"}]}
     # Return the actual model name if available
     model_name = agent_wrapper.agent.args.get("model_name", "mcp-agent")
-    return {
-        "object": "list",
-        "data": [
-            {
-                "id": model_name,
-                "object": "model",
-                "created": 0,
-                "owned_by": "mindsdb"
-            }
-        ]
-    }
+    return {"object": "list", "data": [{"id": model_name, "object": "model", "created": 0, "owned_by": "mindsdb"}]}
 @app.get("/health")
@@ -189,7 +169,9 @@ async def health_check():
     }
     if agent_wrapper is not None:
-        health_status["mcp_connected"] = hasattr(agent_wrapper.agent, "session") and agent_wrapper.agent.session is not None
+        health_status["mcp_connected"] = (
+            hasattr(agent_wrapper.agent, "session") and agent_wrapper.agent.session is not None
+        )
         health_status["agent_name"] = agent_wrapper.agent.agent.name
         health_status["model_name"] = agent_wrapper.agent.args.get("model_name", "unknown")
@@ -209,7 +191,7 @@ async def test_mcp_connection():
                 return {
                     "status": "ok",
                     "message": "Successfully connected to MCP server",
-                    "tools": [tool.name for tool in tools_response.tools]
+                    "tools": [tool.name for tool in tools_response.tools],
                 }
             except Exception:
                 # If error, close existing session and create a new one
@@ -217,11 +199,7 @@ async def test_mcp_connection():
                 mcp_session = None
         # Create a new MCP session - connect to running server
-        server_params = StdioServerParameters(
-            command="python",
-            args=["-m", "mindsdb", "--api=mcp"],
-            env=None
-        )
+        server_params = StdioServerParameters(command="python", args=["-m", "mindsdb", "--api=mcp"], env=None)
         stdio_transport = await exit_stack.enter_async_context(stdio_client(server_params))
         stdio, write = stdio_transport
@@ -238,11 +216,11 @@ async def test_mcp_connection():
         return {
             "status": "ok",
             "message": "Successfully connected to MCP server",
-            "tools": [tool.name for tool in tools_response.tools]
+            "tools": [tool.name for tool in tools_response.tools],
         }
     except Exception as e:
-        logger.error(f"Error connecting to MCP server: {str(e)}")
-        error_detail = f"Error connecting to MCP server: {str(e)}. Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http"
+        logger.exception("Error connecting to MCP server:")
+        error_detail = f"Error connecting to MCP server: {str(e)}. Make sure MindsDB server is running with HTTP enabled: python -m mindsdb --api=http"
         raise HTTPException(status_code=500, detail=error_detail)
@@ -256,16 +234,13 @@ async def init_agent(agent_name: str, project_name: str, mcp_host: str, mcp_port
         logger.info("Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http")
         agent_wrapper = create_mcp_agent(
-            agent_name=agent_name,
-            project_name=project_name,
-            mcp_host=mcp_host,
-            mcp_port=mcp_port
+            agent_name=agent_name, project_name=project_name, mcp_host=mcp_host, mcp_port=mcp_port
         )
         logger.info("Agent initialized successfully")
         return True
-    except Exception as e:
-        logger.error(f"Failed to initialize agent: {str(e)}")
+    except Exception:
+        logger.exception("Failed to initialize agent:")
         return False
@@ -286,7 +261,7 @@ async def run_server_async(
     mcp_host: str = "127.0.0.1",
     mcp_port: int = 47337,
     host: str = "0.0.0.0",
-    port: int = 8000
+    port: int = 8000,
 ):
     """Run the FastAPI server"""
     # Initialize the agent
@@ -304,12 +279,13 @@ def run_server(
     mcp_host: str = "127.0.0.1",
     mcp_port: int = 47337,
     host: str = "0.0.0.0",
-    port: int = 8000
+    port: int = 8000,
 ):
     """Run the FastAPI server"""
     logger.info("Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http")
     # Initialize database
     from mindsdb.interfaces.storage import db
     db.init()
     # Run initialization in the event loop
@@ -341,5 +317,5 @@ if __name__ == "__main__":
         mcp_host=args.mcp_host,
         mcp_port=args.mcp_port,
         host=args.host,
-        port=args.port
+        port=args.port,
     )

MindsDB 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl

Potentially problematic release.

MindsDB 25.9.2.0a1py3-none-any.whl → 25.9.3rc1py3-none-any.whl