MindsDB 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (116) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +39 -20
  3. mindsdb/api/a2a/agent.py +7 -9
  4. mindsdb/api/a2a/common/server/server.py +3 -3
  5. mindsdb/api/a2a/common/server/task_manager.py +4 -4
  6. mindsdb/api/a2a/task_manager.py +15 -17
  7. mindsdb/api/common/middleware.py +9 -11
  8. mindsdb/api/executor/command_executor.py +2 -4
  9. mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
  10. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
  11. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
  12. mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
  13. mindsdb/api/executor/exceptions.py +29 -10
  14. mindsdb/api/executor/planner/plan_join.py +17 -3
  15. mindsdb/api/executor/sql_query/sql_query.py +74 -74
  16. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
  17. mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
  18. mindsdb/api/executor/utilities/functions.py +6 -6
  19. mindsdb/api/executor/utilities/sql.py +32 -16
  20. mindsdb/api/http/gui.py +5 -11
  21. mindsdb/api/http/initialize.py +8 -10
  22. mindsdb/api/http/namespaces/agents.py +10 -12
  23. mindsdb/api/http/namespaces/analysis.py +13 -20
  24. mindsdb/api/http/namespaces/auth.py +1 -1
  25. mindsdb/api/http/namespaces/config.py +15 -11
  26. mindsdb/api/http/namespaces/databases.py +140 -201
  27. mindsdb/api/http/namespaces/file.py +15 -4
  28. mindsdb/api/http/namespaces/handlers.py +7 -2
  29. mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
  30. mindsdb/api/http/namespaces/models.py +94 -126
  31. mindsdb/api/http/namespaces/projects.py +13 -22
  32. mindsdb/api/http/namespaces/sql.py +33 -25
  33. mindsdb/api/http/namespaces/tab.py +27 -37
  34. mindsdb/api/http/namespaces/views.py +1 -1
  35. mindsdb/api/http/start.py +14 -8
  36. mindsdb/api/mcp/__init__.py +2 -1
  37. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
  38. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
  39. mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
  40. mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
  41. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
  42. mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
  43. mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
  44. mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
  45. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +13 -1
  46. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
  47. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
  48. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  49. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  50. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
  51. mindsdb/integrations/libs/api_handler.py +10 -10
  52. mindsdb/integrations/libs/base.py +4 -4
  53. mindsdb/integrations/libs/llm/utils.py +2 -2
  54. mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
  55. mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
  56. mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
  57. mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
  58. mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
  59. mindsdb/integrations/libs/process_cache.py +132 -140
  60. mindsdb/integrations/libs/response.py +18 -12
  61. mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
  62. mindsdb/integrations/utilities/files/file_reader.py +6 -7
  63. mindsdb/integrations/utilities/rag/config_loader.py +37 -26
  64. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
  65. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
  66. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
  67. mindsdb/integrations/utilities/rag/settings.py +58 -133
  68. mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
  69. mindsdb/interfaces/agents/agents_controller.py +2 -1
  70. mindsdb/interfaces/agents/constants.py +0 -2
  71. mindsdb/interfaces/agents/litellm_server.py +34 -58
  72. mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
  73. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
  74. mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
  75. mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
  76. mindsdb/interfaces/chatbot/polling.py +30 -18
  77. mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
  78. mindsdb/interfaces/database/integrations.py +19 -2
  79. mindsdb/interfaces/file/file_controller.py +6 -6
  80. mindsdb/interfaces/functions/controller.py +1 -1
  81. mindsdb/interfaces/functions/to_markdown.py +2 -2
  82. mindsdb/interfaces/jobs/jobs_controller.py +5 -5
  83. mindsdb/interfaces/jobs/scheduler.py +3 -8
  84. mindsdb/interfaces/knowledge_base/controller.py +50 -23
  85. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
  86. mindsdb/interfaces/model/model_controller.py +170 -166
  87. mindsdb/interfaces/query_context/context_controller.py +14 -2
  88. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
  89. mindsdb/interfaces/skills/retrieval_tool.py +43 -50
  90. mindsdb/interfaces/skills/skill_tool.py +2 -2
  91. mindsdb/interfaces/skills/sql_agent.py +25 -19
  92. mindsdb/interfaces/storage/fs.py +114 -169
  93. mindsdb/interfaces/storage/json.py +19 -18
  94. mindsdb/interfaces/tabs/tabs_controller.py +49 -72
  95. mindsdb/interfaces/tasks/task_monitor.py +3 -9
  96. mindsdb/interfaces/tasks/task_thread.py +7 -9
  97. mindsdb/interfaces/triggers/trigger_task.py +7 -13
  98. mindsdb/interfaces/triggers/triggers_controller.py +47 -50
  99. mindsdb/migrations/migrate.py +16 -16
  100. mindsdb/utilities/api_status.py +58 -0
  101. mindsdb/utilities/config.py +49 -0
  102. mindsdb/utilities/exception.py +40 -1
  103. mindsdb/utilities/fs.py +0 -1
  104. mindsdb/utilities/hooks/profiling.py +17 -14
  105. mindsdb/utilities/langfuse.py +40 -45
  106. mindsdb/utilities/log.py +272 -0
  107. mindsdb/utilities/ml_task_queue/consumer.py +52 -58
  108. mindsdb/utilities/ml_task_queue/producer.py +26 -30
  109. mindsdb/utilities/render/sqlalchemy_render.py +7 -6
  110. mindsdb/utilities/utils.py +2 -2
  111. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +269 -264
  112. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +115 -115
  113. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
  114. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
  115. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
  116. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0
@@ -8,7 +8,7 @@ from langchain_core.embeddings import Embeddings
8
8
  from langchain_core.language_models import BaseChatModel
9
9
  from langchain_core.vectorstores import VectorStore
10
10
  from langchain_core.stores import BaseStore
11
- from pydantic import BaseModel, Field, field_validator
11
+ from pydantic import BaseModel, Field, field_validator, ConfigDict
12
12
  from langchain_text_splitters import TextSplitter
13
13
 
14
14
  DEFAULT_COLLECTION_NAME = "default_collection"
@@ -32,6 +32,11 @@ DEFAULT_VECTOR_STORE = Chroma
32
32
  DEFAULT_RERANKER_FLAG = False
33
33
  DEFAULT_RERANKING_MODEL = "gpt-4o"
34
34
  DEFAULT_LLM_ENDPOINT = "https://api.openai.com/v1"
35
+ DEFAULT_RERANKER_N = 1
36
+ DEFAULT_RERANKER_LOGPROBS = True
37
+ DEFAULT_RERANKER_TOP_LOGPROBS = 4
38
+ DEFAULT_RERANKER_MAX_TOKENS = 100
39
+ DEFAULT_VALID_CLASS_TOKENS = ["1", "2", "3", "4"]
35
40
  DEFAULT_AUTO_META_PROMPT_TEMPLATE = """
36
41
  Below is a json representation of a table with information about {description}.
37
42
  Return a JSON list with an entry for each column. Each entry should have
@@ -366,14 +371,13 @@ DEFAULT_NUM_QUERY_RETRIES = 2
366
371
 
367
372
 
368
373
  class LLMConfig(BaseModel):
369
- model_name: str = Field(
370
- default=DEFAULT_LLM_MODEL, description="LLM model to use for generation"
371
- )
374
+ model_name: str = Field(default=DEFAULT_LLM_MODEL, description="LLM model to use for generation")
372
375
  provider: str = Field(
373
376
  default=DEFAULT_LLM_MODEL_PROVIDER,
374
377
  description="LLM model provider to use for generation",
375
378
  )
376
379
  params: Dict[str, Any] = Field(default_factory=dict)
380
+ model_config = ConfigDict(protected_namespaces=())
377
381
 
378
382
 
379
383
  class MultiVectorRetrieverMode(Enum):
@@ -430,9 +434,7 @@ class SearchType(Enum):
430
434
 
431
435
  class SearchKwargs(BaseModel):
432
436
  k: int = Field(default=DEFAULT_K, description="Amount of documents to return", ge=1)
433
- filter: Optional[Dict[str, Any]] = Field(
434
- default=None, description="Filter by document metadata"
435
- )
437
+ filter: Optional[Dict[str, Any]] = Field(default=None, description="Filter by document metadata")
436
438
  # For similarity_score_threshold search type
437
439
  score_threshold: Optional[float] = Field(
438
440
  default=None,
@@ -441,9 +443,7 @@ class SearchKwargs(BaseModel):
441
443
  le=1.0,
442
444
  )
443
445
  # For MMR search type
444
- fetch_k: Optional[int] = Field(
445
- default=None, description="Amount of documents to pass to MMR algorithm", ge=1
446
- )
446
+ fetch_k: Optional[int] = Field(default=None, description="Amount of documents to pass to MMR algorithm", ge=1)
447
447
  lambda_mult: Optional[float] = Field(
448
448
  default=None,
449
449
  description="Diversity of results returned by MMR (1=min diversity, 0=max)",
@@ -459,9 +459,7 @@ class SearchKwargs(BaseModel):
459
459
 
460
460
  class LLMExample(BaseModel):
461
461
  input: str = Field(description="User input for the example")
462
- output: str = Field(
463
- description="What the LLM should generate for this example's input"
464
- )
462
+ output: str = Field(description="What the LLM should generate for this example's input")
465
463
 
466
464
 
467
465
  class ValueSchema(BaseModel):
@@ -502,41 +500,25 @@ class ValueSchema(BaseModel):
502
500
 
503
501
  class MetadataConfig(BaseModel):
504
502
  """Class to configure metadata for retrieval. Only supports very basic document name lookup at the moment."""
505
- table: str = Field(
506
- description="Source table for metadata."
507
- )
503
+
504
+ table: str = Field(description="Source table for metadata.")
508
505
  max_document_context: int = Field(
509
506
  # To work well with models with context window of 32768.
510
507
  default=16384,
511
- description="Truncate a document before using as context with an LLM if it exceeds this amount of tokens"
512
- )
513
- embeddings_table: str = Field(
514
- default="embeddings",
515
- description="Source table for embeddings"
516
- )
517
- id_column: str = Field(
518
- default="Id",
519
- description="Name of ID column in metadata table"
520
- )
521
- name_column: str = Field(
522
- default="Title",
523
- description="Name of column containing name or title of document"
524
- )
525
- name_column_index: Optional[str] = Field(
526
- default=None,
527
- description="Name of GIN index to use when looking up name."
508
+ description="Truncate a document before using as context with an LLM if it exceeds this amount of tokens",
528
509
  )
510
+ embeddings_table: str = Field(default="embeddings", description="Source table for embeddings")
511
+ id_column: str = Field(default="Id", description="Name of ID column in metadata table")
512
+ name_column: str = Field(default="Title", description="Name of column containing name or title of document")
513
+ name_column_index: Optional[str] = Field(default=None, description="Name of GIN index to use when looking up name.")
529
514
  content_column: str = Field(
530
- default="content",
531
- description="Name of column in embeddings table containing chunk content"
515
+ default="content", description="Name of column in embeddings table containing chunk content"
532
516
  )
533
517
  embeddings_metadata_column: str = Field(
534
- default="metadata",
535
- description="Name of column in embeddings table containing chunk metadata"
518
+ default="metadata", description="Name of column in embeddings table containing chunk metadata"
536
519
  )
537
520
  doc_id_key: str = Field(
538
- default="original_row_id",
539
- description="Metadata field that links an embedded chunk back to source document ID"
521
+ default="original_row_id", description="Metadata field that links an embedded chunk back to source document ID"
540
522
  )
541
523
 
542
524
 
@@ -552,14 +534,12 @@ class ColumnSchema(BaseModel):
552
534
  ]
553
535
  ] = Field(
554
536
  default=None,
555
- description="One of the following. A dict or ordered dict of {schema_value: ValueSchema, ...}, where schema value is the name given for this value description in the schema."
537
+ description="One of the following. A dict or ordered dict of {schema_value: ValueSchema, ...}, where schema value is the name given for this value description in the schema.",
556
538
  )
557
539
  example_questions: Optional[List[LLMExample]] = Field(
558
540
  default=None, description="Example questions where this table is useful."
559
541
  )
560
- max_filters: Optional[int] = Field(
561
- default=1, description="Maximum number of filters to generate for this column."
562
- )
542
+ max_filters: Optional[int] = Field(default=1, description="Maximum number of filters to generate for this column.")
563
543
  filter_threshold: Optional[float] = Field(
564
544
  default=0.0,
565
545
  description="Minimum relevance threshold to include metadata filters from this column.",
@@ -578,9 +558,7 @@ class TableSchema(BaseModel):
578
558
  table: str = Field(description="Name of table in the database")
579
559
  description: str = Field(description="Description of what the table represents")
580
560
  usage: str = Field(description="How and when to use this Table for search.")
581
- columns: Optional[
582
- Union[OrderedDict[str, ColumnSchema], Dict[str, ColumnSchema]]
583
- ] = Field(
561
+ columns: Optional[Union[OrderedDict[str, ColumnSchema], Dict[str, ColumnSchema]]] = Field(
584
562
  description="Dict or Ordered Dict of {column_name: ColumnSchemas} describing the metadata columns available for the table"
585
563
  )
586
564
  example_questions: Optional[List[LLMExample]] = Field(
@@ -590,9 +568,7 @@ class TableSchema(BaseModel):
590
568
  description="SQL join string to join this table with source documents table",
591
569
  default="",
592
570
  )
593
- max_filters: Optional[int] = Field(
594
- default=1, description="Maximum number of filters to generate for this table."
595
- )
571
+ max_filters: Optional[int] = Field(default=1, description="Maximum number of filters to generate for this table.")
596
572
  filter_threshold: Optional[float] = Field(
597
573
  default=0.0,
598
574
  description="Minimum relevance required to use this table to generate filters.",
@@ -675,12 +651,8 @@ class SQLRetrieverConfig(BaseModel):
675
651
  source_table: str = Field(
676
652
  description="Name of the source table containing the original documents that were embedded"
677
653
  )
678
- source_id_column: str = Field(
679
- description="Name of the column containing the UUID.", default="Id"
680
- )
681
- max_filters: Optional[int] = Field(
682
- description="Maximum number of filters to generate for sql queries.", default=10
683
- )
654
+ source_id_column: str = Field(description="Name of the column containing the UUID.", default="Id")
655
+ max_filters: Optional[int] = Field(description="Maximum number of filters to generate for sql queries.", default=10)
684
656
  filter_threshold: Optional[float] = Field(
685
657
  description="Minimum relevance required to use this Database to generate filters.",
686
658
  default=0.0,
@@ -728,6 +700,11 @@ class RerankerConfig(BaseModel):
728
700
  retry_delay: float = 1.0
729
701
  early_stop: bool = True # Whether to enable early stopping
730
702
  early_stop_threshold: float = 0.8 # Confidence threshold for early stopping
703
+ n: int = DEFAULT_RERANKER_N # Number of completions to generate
704
+ logprobs: bool = DEFAULT_RERANKER_LOGPROBS # Whether to include log probabilities
705
+ top_logprobs: int = DEFAULT_RERANKER_TOP_LOGPROBS # Number of top log probabilities to include
706
+ max_tokens: int = DEFAULT_RERANKER_MAX_TOKENS # Maximum tokens to generate
707
+ valid_class_tokens: List[str] = DEFAULT_VALID_CLASS_TOKENS # Valid class tokens to look for in the response
731
708
 
732
709
 
733
710
  class MultiHopRetrieverConfig(BaseModel):
@@ -737,9 +714,7 @@ class MultiHopRetrieverConfig(BaseModel):
737
714
  default=RetrieverType.VECTOR_STORE,
738
715
  description="Type of base retriever to use for multi-hop retrieval",
739
716
  )
740
- max_hops: int = Field(
741
- default=3, description="Maximum number of follow-up questions to generate", ge=1
742
- )
717
+ max_hops: int = Field(default=3, description="Maximum number of follow-up questions to generate", ge=1)
743
718
  reformulation_template: str = Field(
744
719
  default=DEFAULT_QUESTION_REFORMULATION_TEMPLATE,
745
720
  description="Template for reformulating questions",
@@ -751,48 +726,29 @@ class MultiHopRetrieverConfig(BaseModel):
751
726
 
752
727
 
753
728
  class RAGPipelineModel(BaseModel):
754
- documents: Optional[List[Document]] = Field(
755
- default=None, description="List of documents"
756
- )
729
+ documents: Optional[List[Document]] = Field(default=None, description="List of documents")
757
730
 
758
731
  vector_store_config: VectorStoreConfig = Field(
759
732
  default_factory=VectorStoreConfig, description="Vector store configuration"
760
733
  )
761
734
 
762
735
  llm: Optional[BaseChatModel] = Field(default=None, description="Language model")
763
- llm_model_name: str = Field(
764
- default=DEFAULT_LLM_MODEL, description="Language model name"
765
- )
766
- llm_provider: Optional[str] = Field(
767
- default=None, description="Language model provider"
768
- )
736
+ llm_model_name: str = Field(default=DEFAULT_LLM_MODEL, description="Language model name")
737
+ llm_provider: Optional[str] = Field(default=None, description="Language model provider")
769
738
  vector_store: VectorStore = Field(
770
739
  default_factory=lambda: vector_store_map[VectorStoreConfig().vector_store_type],
771
740
  description="Vector store",
772
741
  )
773
- db_connection_string: Optional[str] = Field(
774
- default=None, description="Database connection string"
775
- )
742
+ db_connection_string: Optional[str] = Field(default=None, description="Database connection string")
776
743
  metadata_config: Optional[MetadataConfig] = Field(
777
- default=None,
778
- description="Configuration for metadata to be used for retrieval"
744
+ default=None, description="Configuration for metadata to be used for retrieval"
779
745
  )
780
746
  table_name: str = Field(default=DEFAULT_TEST_TABLE_NAME, description="Table name")
781
- embedding_model: Optional[Embeddings] = Field(
782
- default=None, description="Embedding model"
783
- )
784
- rag_prompt_template: str = Field(
785
- default=DEFAULT_RAG_PROMPT_TEMPLATE, description="RAG prompt template"
786
- )
787
- retriever_prompt_template: Optional[Union[str, dict]] = Field(
788
- default=None, description="Retriever prompt template"
789
- )
790
- retriever_type: RetrieverType = Field(
791
- default=RetrieverType.VECTOR_STORE, description="Retriever type"
792
- )
793
- search_type: SearchType = Field(
794
- default=SearchType.SIMILARITY, description="Type of search to perform"
795
- )
747
+ embedding_model: Optional[Embeddings] = Field(default=None, description="Embedding model")
748
+ rag_prompt_template: str = Field(default=DEFAULT_RAG_PROMPT_TEMPLATE, description="RAG prompt template")
749
+ retriever_prompt_template: Optional[Union[str, dict]] = Field(default=None, description="Retriever prompt template")
750
+ retriever_type: RetrieverType = Field(default=RetrieverType.VECTOR_STORE, description="Retriever type")
751
+ search_type: SearchType = Field(default=SearchType.SIMILARITY, description="Type of search to perform")
796
752
  search_kwargs: SearchKwargs = Field(
797
753
  default_factory=SearchKwargs,
798
754
  description="Search configuration for the retriever",
@@ -811,39 +767,23 @@ class RAGPipelineModel(BaseModel):
811
767
  multi_retriever_mode: MultiVectorRetrieverMode = Field(
812
768
  default=MultiVectorRetrieverMode.BOTH, description="Multi retriever mode"
813
769
  )
814
- max_concurrency: int = Field(
815
- default=DEFAULT_MAX_CONCURRENCY, description="Maximum concurrency"
816
- )
770
+ max_concurrency: int = Field(default=DEFAULT_MAX_CONCURRENCY, description="Maximum concurrency")
817
771
  id_key: int = Field(default=DEFAULT_ID_KEY, description="ID key")
818
772
  parent_store: Optional[BaseStore] = Field(default=None, description="Parent store")
819
- text_splitter: Optional[TextSplitter] = Field(
820
- default=None, description="Text splitter"
821
- )
773
+ text_splitter: Optional[TextSplitter] = Field(default=None, description="Text splitter")
822
774
  chunk_size: int = Field(default=DEFAULT_CHUNK_SIZE, description="Chunk size")
823
- chunk_overlap: int = Field(
824
- default=DEFAULT_CHUNK_OVERLAP, description="Chunk overlap"
825
- )
775
+ chunk_overlap: int = Field(default=DEFAULT_CHUNK_OVERLAP, description="Chunk overlap")
826
776
 
827
777
  # Auto retriever specific
828
- auto_retriever_filter_columns: Optional[List[str]] = Field(
829
- default=None, description="Filter columns"
830
- )
831
- cardinality_threshold: int = Field(
832
- default=DEFAULT_CARDINALITY_THRESHOLD, description="Cardinality threshold"
833
- )
778
+ auto_retriever_filter_columns: Optional[List[str]] = Field(default=None, description="Filter columns")
779
+ cardinality_threshold: int = Field(default=DEFAULT_CARDINALITY_THRESHOLD, description="Cardinality threshold")
834
780
  content_column_name: str = Field(
835
781
  default=DEFAULT_CONTENT_COLUMN_NAME,
836
782
  description="Content column name (the column we will get embeddings)",
837
783
  )
838
- dataset_description: str = Field(
839
- default=DEFAULT_DATASET_DESCRIPTION, description="Description of the dataset"
840
- )
841
- reranker: bool = Field(
842
- default=DEFAULT_RERANKER_FLAG, description="Whether to use reranker"
843
- )
844
- reranker_config: RerankerConfig = Field(
845
- default_factory=RerankerConfig, description="Reranker configuration"
846
- )
784
+ dataset_description: str = Field(default=DEFAULT_DATASET_DESCRIPTION, description="Description of the dataset")
785
+ reranker: bool = Field(default=DEFAULT_RERANKER_FLAG, description="Whether to use reranker")
786
+ reranker_config: RerankerConfig = Field(default_factory=RerankerConfig, description="Reranker configuration")
847
787
 
848
788
  multi_hop_config: Optional[MultiHopRetrieverConfig] = Field(
849
789
  default=None,
@@ -856,9 +796,7 @@ class RAGPipelineModel(BaseModel):
856
796
  """Validate that multi_hop_config is set when using multi-hop retrieval."""
857
797
  values = info.data
858
798
  if values.get("retriever_type") == RetrieverType.MULTI_HOP and v is None:
859
- raise ValueError(
860
- "multi_hop_config must be set when using multi-hop retrieval"
861
- )
799
+ raise ValueError("multi_hop_config must be set when using multi-hop retrieval")
862
800
  return v
863
801
 
864
802
  class Config:
@@ -889,13 +827,9 @@ class RAGPipelineModel(BaseModel):
889
827
  if v.lambda_mult is not None and (v.lambda_mult < 0 or v.lambda_mult > 1):
890
828
  raise ValueError("lambda_mult must be between 0 and 1")
891
829
  if v.fetch_k is None and v.lambda_mult is not None:
892
- raise ValueError(
893
- "fetch_k is required when using lambda_mult with MMR search type"
894
- )
830
+ raise ValueError("fetch_k is required when using lambda_mult with MMR search type")
895
831
  if v.lambda_mult is None and v.fetch_k is not None:
896
- raise ValueError(
897
- "lambda_mult is required when using fetch_k with MMR search type"
898
- )
832
+ raise ValueError("lambda_mult is required when using fetch_k with MMR search type")
899
833
  elif search_type != SearchType.MMR:
900
834
  if v.fetch_k is not None:
901
835
  raise ValueError("fetch_k is only valid for MMR search type")
@@ -904,20 +838,11 @@ class RAGPipelineModel(BaseModel):
904
838
 
905
839
  # Validate similarity_score_threshold parameters
906
840
  if search_type == SearchType.SIMILARITY_SCORE_THRESHOLD:
907
- if v.score_threshold is not None and (
908
- v.score_threshold < 0 or v.score_threshold > 1
909
- ):
841
+ if v.score_threshold is not None and (v.score_threshold < 0 or v.score_threshold > 1):
910
842
  raise ValueError("score_threshold must be between 0 and 1")
911
843
  if v.score_threshold is None:
912
- raise ValueError(
913
- "score_threshold is required for similarity_score_threshold search type"
914
- )
915
- elif (
916
- search_type != SearchType.SIMILARITY_SCORE_THRESHOLD
917
- and v.score_threshold is not None
918
- ):
919
- raise ValueError(
920
- "score_threshold is only valid for similarity_score_threshold search type"
921
- )
844
+ raise ValueError("score_threshold is required for similarity_score_threshold search type")
845
+ elif search_type != SearchType.SIMILARITY_SCORE_THRESHOLD and v.score_threshold is not None:
846
+ raise ValueError("score_threshold is only valid for similarity_score_threshold search type")
922
847
 
923
848
  return v
@@ -45,15 +45,11 @@ class FileSplitterConfig:
45
45
  headers_to_split_on=DEFAULT_MARKDOWN_HEADERS_TO_SPLIT_ON
46
46
  )
47
47
  # Splitter to use for HTML splitting
48
- html_splitter: HTMLHeaderTextSplitter = HTMLHeaderTextSplitter(
49
- headers_to_split_on=DEFAULT_HTML_HEADERS_TO_SPLIT_ON
50
- )
48
+ html_splitter: HTMLHeaderTextSplitter = HTMLHeaderTextSplitter(headers_to_split_on=DEFAULT_HTML_HEADERS_TO_SPLIT_ON)
51
49
 
52
50
  def __post_init__(self):
53
51
  if self.text_chunking_config is None:
54
- self.text_chunking_config = TextChunkingConfig(
55
- chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap
56
- )
52
+ self.text_chunking_config = TextChunkingConfig(chunk_size=self.chunk_size, chunk_overlap=self.chunk_overlap)
57
53
 
58
54
  if self.recursive_splitter is None:
59
55
  self.recursive_splitter = RecursiveCharacterTextSplitter(
@@ -85,9 +81,7 @@ class FileSplitter:
85
81
  ) -> Union[Callable, HTMLHeaderTextSplitter, MarkdownHeaderTextSplitter]:
86
82
  return self._extension_map.get(extension, self.default_splitter)()
87
83
 
88
- def split_documents(
89
- self, documents: List[Document], default_failover: bool = True
90
- ) -> List[Document]:
84
+ def split_documents(self, documents: List[Document], default_failover: bool = True) -> List[Document]:
91
85
  """Splits a list of documents representing files using the appropriate splitting & chunking strategies
92
86
 
93
87
  Args:
@@ -105,13 +99,9 @@ class FileSplitter:
105
99
  try:
106
100
  split_documents += split_func(document.page_content)
107
101
  except Exception as e:
108
- logger.error(
109
- f"Error splitting document with extension {extension}: {str(e)}"
110
- )
102
+ logger.exception(f"Error splitting document with extension {extension}:")
111
103
  if not default_failover:
112
- raise ValueError(
113
- f"Error splitting document with extension {extension}"
114
- ) from e
104
+ raise ValueError(f"Error splitting document with extension {extension}") from e
115
105
  # Try default splitter as a failover, if enabled.
116
106
  split_func = self._split_func_by_extension(extension=None)
117
107
  split_documents += split_func(document.page_content)
@@ -190,7 +190,8 @@ class AgentsController:
190
190
  db.session.commit()
191
191
 
192
192
  except Exception as e:
193
- raise ValueError(f"Failed to auto-create or update SQL skill: {str(e)}")
193
+ logger.exception("Failed to auto-create or update SQL skill:")
194
+ raise ValueError(f"Failed to auto-create or update SQL skill: {e}") from e
194
195
 
195
196
  return skill_name
196
197
 
@@ -260,8 +260,6 @@ MINDSDB_PREFIX = """You are an AI assistant powered by MindsDB. You have access
260
260
 
261
261
  For factual questions, ALWAYS use the available tools to look up information rather than relying on your internal knowledge.
262
262
 
263
- Here is the user's question: {{question}}
264
-
265
263
  TOOLS:
266
264
  ------
267
265
 
@@ -62,7 +62,9 @@ class ChatCompletionResponse(BaseModel):
62
62
  created: int = 0
63
63
  model: str
64
64
  choices: List[ChatCompletionChoice]
65
- usage: Dict[str, int] = Field(default_factory=lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
65
+ usage: Dict[str, int] = Field(
66
+ default_factory=lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
67
+ )
66
68
 
67
69
 
68
70
  class DirectSQLRequest(BaseModel):
@@ -74,14 +76,14 @@ async def chat_completions(request: ChatCompletionRequest):
74
76
  global agent_wrapper
75
77
 
76
78
  if agent_wrapper is None:
77
- raise HTTPException(status_code=500, detail="Agent not initialized. Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http")
79
+ raise HTTPException(
80
+ status_code=500,
81
+ detail="Agent not initialized. Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http",
82
+ )
78
83
 
79
84
  try:
80
85
  # Convert request to messages format
81
- messages = [
82
- {"role": msg.role, "content": msg.content}
83
- for msg in request.messages
84
- ]
86
+ messages = [{"role": msg.role, "content": msg.content} for msg in request.messages]
85
87
 
86
88
  if request.stream:
87
89
  # Return a streaming response
@@ -90,9 +92,10 @@ async def chat_completions(request: ChatCompletionRequest):
90
92
  async for chunk in agent_wrapper.acompletion_stream(messages, model=request.model):
91
93
  yield f"data: {json.dumps(chunk)}\n\n"
92
94
  yield "data: [DONE]\n\n"
93
- except Exception as e:
94
- logger.error(f"Streaming error: {str(e)}")
95
+ except Exception:
96
+ logger.exception("Streaming error:")
95
97
  yield "data: {{'error': 'Streaming failed due to an internal error.'}}\n\n"
98
+
96
99
  return StreamingResponse(generate(), media_type="text/event-stream")
97
100
  else:
98
101
  # Return a regular response
@@ -105,16 +108,11 @@ async def chat_completions(request: ChatCompletionRequest):
105
108
 
106
109
  # Transform to proper OpenAI format
107
110
  return ChatCompletionResponse(
108
- model=request.model,
109
- choices=[
110
- ChatCompletionChoice(
111
- message={"role": "assistant", "content": content}
112
- )
113
- ]
111
+ model=request.model, choices=[ChatCompletionChoice(message={"role": "assistant", "content": content})]
114
112
  )
115
113
 
116
114
  except Exception as e:
117
- logger.error(f"Error in chat completion: {str(e)}")
115
+ logger.exception("Error in chat completion:")
118
116
  raise HTTPException(status_code=500, detail=str(e))
119
117
 
120
118
 
@@ -124,7 +122,9 @@ async def direct_sql(request: DirectSQLRequest, background_tasks: BackgroundTask
124
122
  global agent_wrapper, mcp_session
125
123
 
126
124
  if agent_wrapper is None and mcp_session is None:
127
- raise HTTPException(status_code=500, detail="No MCP session available. Make sure MindsDB server is running with MCP enabled.")
125
+ raise HTTPException(
126
+ status_code=500, detail="No MCP session available. Make sure MindsDB server is running with MCP enabled."
127
+ )
128
128
 
129
129
  try:
130
130
  # First try to use the agent's session if available
@@ -140,7 +140,7 @@ async def direct_sql(request: DirectSQLRequest, background_tasks: BackgroundTask
140
140
  raise HTTPException(status_code=500, detail="No MCP session available")
141
141
 
142
142
  except Exception as e:
143
- logger.error(f"Error executing direct SQL: {str(e)}")
143
+ logger.exception("Error executing direct SQL:")
144
144
  raise HTTPException(status_code=500, detail=str(e))
145
145
 
146
146
 
@@ -150,32 +150,12 @@ async def list_models():
150
150
  global agent_wrapper
151
151
 
152
152
  if agent_wrapper is None:
153
- return {
154
- "object": "list",
155
- "data": [
156
- {
157
- "id": "mcp-agent",
158
- "object": "model",
159
- "created": 0,
160
- "owned_by": "mindsdb"
161
- }
162
- ]
163
- }
153
+ return {"object": "list", "data": [{"id": "mcp-agent", "object": "model", "created": 0, "owned_by": "mindsdb"}]}
164
154
 
165
155
  # Return the actual model name if available
166
156
  model_name = agent_wrapper.agent.args.get("model_name", "mcp-agent")
167
157
 
168
- return {
169
- "object": "list",
170
- "data": [
171
- {
172
- "id": model_name,
173
- "object": "model",
174
- "created": 0,
175
- "owned_by": "mindsdb"
176
- }
177
- ]
178
- }
158
+ return {"object": "list", "data": [{"id": model_name, "object": "model", "created": 0, "owned_by": "mindsdb"}]}
179
159
 
180
160
 
181
161
  @app.get("/health")
@@ -189,7 +169,9 @@ async def health_check():
189
169
  }
190
170
 
191
171
  if agent_wrapper is not None:
192
- health_status["mcp_connected"] = hasattr(agent_wrapper.agent, "session") and agent_wrapper.agent.session is not None
172
+ health_status["mcp_connected"] = (
173
+ hasattr(agent_wrapper.agent, "session") and agent_wrapper.agent.session is not None
174
+ )
193
175
  health_status["agent_name"] = agent_wrapper.agent.agent.name
194
176
  health_status["model_name"] = agent_wrapper.agent.args.get("model_name", "unknown")
195
177
 
@@ -209,7 +191,7 @@ async def test_mcp_connection():
209
191
  return {
210
192
  "status": "ok",
211
193
  "message": "Successfully connected to MCP server",
212
- "tools": [tool.name for tool in tools_response.tools]
194
+ "tools": [tool.name for tool in tools_response.tools],
213
195
  }
214
196
  except Exception:
215
197
  # If error, close existing session and create a new one
@@ -217,11 +199,7 @@ async def test_mcp_connection():
217
199
  mcp_session = None
218
200
 
219
201
  # Create a new MCP session - connect to running server
220
- server_params = StdioServerParameters(
221
- command="python",
222
- args=["-m", "mindsdb", "--api=mcp"],
223
- env=None
224
- )
202
+ server_params = StdioServerParameters(command="python", args=["-m", "mindsdb", "--api=mcp"], env=None)
225
203
 
226
204
  stdio_transport = await exit_stack.enter_async_context(stdio_client(server_params))
227
205
  stdio, write = stdio_transport
@@ -238,11 +216,11 @@ async def test_mcp_connection():
238
216
  return {
239
217
  "status": "ok",
240
218
  "message": "Successfully connected to MCP server",
241
- "tools": [tool.name for tool in tools_response.tools]
219
+ "tools": [tool.name for tool in tools_response.tools],
242
220
  }
243
221
  except Exception as e:
244
- logger.error(f"Error connecting to MCP server: {str(e)}")
245
- error_detail = f"Error connecting to MCP server: {str(e)}. Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http"
222
+ logger.exception("Error connecting to MCP server:")
223
+ error_detail = f"Error connecting to MCP server: {str(e)}. Make sure MindsDB server is running with HTTP enabled: python -m mindsdb --api=http"
246
224
  raise HTTPException(status_code=500, detail=error_detail)
247
225
 
248
226
 
@@ -256,16 +234,13 @@ async def init_agent(agent_name: str, project_name: str, mcp_host: str, mcp_port
256
234
  logger.info("Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http")
257
235
 
258
236
  agent_wrapper = create_mcp_agent(
259
- agent_name=agent_name,
260
- project_name=project_name,
261
- mcp_host=mcp_host,
262
- mcp_port=mcp_port
237
+ agent_name=agent_name, project_name=project_name, mcp_host=mcp_host, mcp_port=mcp_port
263
238
  )
264
239
 
265
240
  logger.info("Agent initialized successfully")
266
241
  return True
267
- except Exception as e:
268
- logger.error(f"Failed to initialize agent: {str(e)}")
242
+ except Exception:
243
+ logger.exception("Failed to initialize agent:")
269
244
  return False
270
245
 
271
246
 
@@ -286,7 +261,7 @@ async def run_server_async(
286
261
  mcp_host: str = "127.0.0.1",
287
262
  mcp_port: int = 47337,
288
263
  host: str = "0.0.0.0",
289
- port: int = 8000
264
+ port: int = 8000,
290
265
  ):
291
266
  """Run the FastAPI server"""
292
267
  # Initialize the agent
@@ -304,12 +279,13 @@ def run_server(
304
279
  mcp_host: str = "127.0.0.1",
305
280
  mcp_port: int = 47337,
306
281
  host: str = "0.0.0.0",
307
- port: int = 8000
282
+ port: int = 8000,
308
283
  ):
309
284
  """Run the FastAPI server"""
310
285
  logger.info("Make sure MindsDB server is running with MCP enabled: python -m mindsdb --api=mysql,mcp,http")
311
286
  # Initialize database
312
287
  from mindsdb.interfaces.storage import db
288
+
313
289
  db.init()
314
290
 
315
291
  # Run initialization in the event loop
@@ -341,5 +317,5 @@ if __name__ == "__main__":
341
317
  mcp_host=args.mcp_host,
342
318
  mcp_port=args.mcp_port,
343
319
  host=args.host,
344
- port=args.port
320
+ port=args.port,
345
321
  )