MindsDB 25.4.3.1__py3-none-any.whl → 25.4.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (43) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +18 -4
  3. mindsdb/api/executor/data_types/response_type.py +1 -0
  4. mindsdb/api/executor/datahub/classes/tables_row.py +3 -10
  5. mindsdb/api/executor/datahub/datanodes/datanode.py +7 -2
  6. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +44 -10
  7. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +57 -38
  8. mindsdb/api/executor/datahub/datanodes/project_datanode.py +39 -7
  9. mindsdb/api/executor/datahub/datanodes/system_tables.py +116 -109
  10. mindsdb/api/executor/planner/query_planner.py +10 -1
  11. mindsdb/api/executor/planner/steps.py +8 -2
  12. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +5 -5
  13. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +1 -1
  14. mindsdb/api/executor/sql_query/steps/insert_step.py +2 -1
  15. mindsdb/api/executor/sql_query/steps/prepare_steps.py +2 -3
  16. mindsdb/api/litellm/start.py +82 -0
  17. mindsdb/api/mysql/mysql_proxy/libs/constants/mysql.py +133 -0
  18. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +7 -2
  19. mindsdb/integrations/handlers/chromadb_handler/settings.py +1 -0
  20. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +13 -4
  21. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +14 -5
  22. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +14 -4
  23. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +34 -19
  24. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +21 -18
  25. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +14 -4
  26. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +1 -1
  27. mindsdb/integrations/libs/response.py +80 -32
  28. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +208 -13
  29. mindsdb/interfaces/agents/litellm_server.py +345 -0
  30. mindsdb/interfaces/agents/mcp_client_agent.py +252 -0
  31. mindsdb/interfaces/agents/run_mcp_agent.py +205 -0
  32. mindsdb/interfaces/knowledge_base/controller.py +17 -7
  33. mindsdb/interfaces/skills/skill_tool.py +7 -1
  34. mindsdb/interfaces/skills/sql_agent.py +8 -3
  35. mindsdb/utilities/config.py +8 -1
  36. mindsdb/utilities/starters.py +7 -0
  37. {mindsdb-25.4.3.1.dist-info → mindsdb-25.4.4.0.dist-info}/METADATA +232 -230
  38. {mindsdb-25.4.3.1.dist-info → mindsdb-25.4.4.0.dist-info}/RECORD +42 -39
  39. {mindsdb-25.4.3.1.dist-info → mindsdb-25.4.4.0.dist-info}/WHEEL +1 -1
  40. mindsdb/integrations/handlers/snowflake_handler/tests/test_snowflake_handler.py +0 -230
  41. /mindsdb/{integrations/handlers/snowflake_handler/tests → api/litellm}/__init__.py +0 -0
  42. {mindsdb-25.4.3.1.dist-info → mindsdb-25.4.4.0.dist-info}/licenses/LICENSE +0 -0
  43. {mindsdb-25.4.3.1.dist-info → mindsdb-25.4.4.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = 'MindsDB'
2
2
  __package_name__ = 'mindsdb'
3
- __version__ = '25.4.3.1'
3
+ __version__ = '25.4.4.0'
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = 'MindsDB Inc'
mindsdb/__main__.py CHANGED
@@ -25,7 +25,7 @@ from mindsdb.utilities.config import config
25
25
  from mindsdb.utilities.exception import EntityNotExistsError
26
26
  from mindsdb.utilities.starters import (
27
27
  start_http, start_mysql, start_mongo, start_postgres, start_ml_task_queue, start_scheduler, start_tasks,
28
- start_mcp
28
+ start_mcp, start_litellm
29
29
  )
30
30
  from mindsdb.utilities.ps import is_pid_listen_port, get_child_pids
31
31
  from mindsdb.utilities.functions import get_versions_where_predictors_become_obsolete
@@ -59,6 +59,7 @@ class TrunkProcessEnum(Enum):
59
59
  TASKS = 'tasks'
60
60
  ML_TASK_QUEUE = 'ml_task_queue'
61
61
  MCP = 'mcp'
62
+ LITELLM = 'litellm'
62
63
 
63
64
  @classmethod
64
65
  def _missing_(cls, value):
@@ -408,9 +409,11 @@ if __name__ == '__main__':
408
409
 
409
410
  clean_process_marks()
410
411
 
411
- http_api_config = config['api']['http']
412
- mysql_api_config = config['api']['mysql']
413
- mcp_api_config = config['api']['mcp']
412
+ # Get config values for APIs
413
+ http_api_config = config.get('api', {}).get('http', {})
414
+ mysql_api_config = config.get('api', {}).get('mysql', {})
415
+ mcp_api_config = config.get('api', {}).get('mcp', {})
416
+ litellm_api_config = config.get('api', {}).get('litellm', {})
414
417
  trunc_processes_struct = {
415
418
  TrunkProcessEnum.HTTP: TrunkProcessData(
416
419
  name=TrunkProcessEnum.HTTP.value,
@@ -471,6 +474,17 @@ if __name__ == '__main__':
471
474
  max_restart_interval_seconds=mcp_api_config.get(
472
475
  'max_restart_interval_seconds', TrunkProcessData.max_restart_interval_seconds
473
476
  )
477
+ ),
478
+ TrunkProcessEnum.LITELLM: TrunkProcessData(
479
+ name=TrunkProcessEnum.LITELLM.value,
480
+ entrypoint=start_litellm,
481
+ port=litellm_api_config.get('port', 8000),
482
+ args=(config.cmd_args.verbose,),
483
+ restart_on_failure=litellm_api_config.get('restart_on_failure', False),
484
+ max_restart_count=litellm_api_config.get('max_restart_count', TrunkProcessData.max_restart_count),
485
+ max_restart_interval_seconds=litellm_api_config.get(
486
+ 'max_restart_interval_seconds', TrunkProcessData.max_restart_interval_seconds
487
+ )
474
488
  )
475
489
  }
476
490
 
@@ -3,6 +3,7 @@ class RESPONSE_TYPE:
3
3
  OK = 'ok'
4
4
  TABLE = 'table'
5
5
  ERROR = 'error'
6
+ COLUMNS_TABLE = 'columns_table' # for queries to information_schema.columns
6
7
 
7
8
 
8
9
  RESPONSE_TYPE = RESPONSE_TYPE()
@@ -1,4 +1,4 @@
1
- from dataclasses import dataclass
1
+ from dataclasses import dataclass, astuple
2
2
  from datetime import datetime
3
3
 
4
4
 
@@ -12,7 +12,7 @@ class TABLES_ROW_TYPE:
12
12
  TABLES_ROW_TYPE = TABLES_ROW_TYPE()
13
13
 
14
14
 
15
- @dataclass
15
+ @dataclass(slots=True)
16
16
  class TablesRow:
17
17
  TABLE_CATALOG: str = 'def'
18
18
  TABLE_SCHEMA: str = 'information_schema'
@@ -37,17 +37,10 @@ class TablesRow:
37
37
  TABLE_COMMENT: str = ''
38
38
 
39
39
  def to_list(self) -> list:
40
- return [self.TABLE_CATALOG, self.TABLE_SCHEMA, self.TABLE_NAME,
41
- self.TABLE_TYPE, self.ENGINE, self.VERSION, self.ROW_FORMAT,
42
- self.TABLE_ROWS, self.AVG_ROW_LENGTH, self.DATA_LENGTH,
43
- self.MAX_DATA_LENGTH, self.INDEX_LENGTH, self.DATA_FREE,
44
- self.AUTO_INCREMENT, self.CREATE_TIME, self.UPDATE_TIME,
45
- self.CHECK_TIME, self.TABLE_COLLATION, self.CHECKSUM,
46
- self.CREATE_OPTIONS, self.TABLE_COMMENT]
40
+ return list(astuple(self))
47
41
 
48
42
  @staticmethod
49
43
  def from_dict(data: dict):
50
-
51
44
  del_keys = []
52
45
  data = {k.upper(): v for k, v in data.items()}
53
46
 
@@ -1,3 +1,5 @@
1
+ from pandas import DataFrame
2
+
1
3
  from mindsdb.api.executor.datahub.classes.response import DataHubResponse
2
4
 
3
5
 
@@ -13,8 +15,11 @@ class DataNode:
13
15
  def get_tables(self):
14
16
  pass
15
17
 
16
- def get_table_columns(self, tableName, schema_name=None):
18
+ def get_table_columns_df(self, table_name: str, schema_name: str | None = None) -> DataFrame:
19
+ pass
20
+
21
+ def get_table_columns_names(self, table_name: str, schema_name: str | None = None) -> list[str]:
17
22
  pass
18
23
 
19
24
  def query(self, query=None, native_query=None, session=None) -> DataHubResponse:
20
- return []
25
+ pass
@@ -1,3 +1,4 @@
1
+ from dataclasses import astuple
1
2
 
2
3
  import pandas as pd
3
4
  from mindsdb_sql_parser.ast.base import ASTNode
@@ -10,6 +11,7 @@ from mindsdb.api.executor.utilities.sql import query_df
10
11
  from mindsdb.api.executor.utilities.sql import get_query_tables
11
12
  from mindsdb.interfaces.database.projects import ProjectController
12
13
  from mindsdb.api.executor.datahub.classes.response import DataHubResponse
14
+ from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
13
15
  from mindsdb.utilities import log
14
16
 
15
17
  from .system_tables import (
@@ -111,16 +113,48 @@ class InformationSchemaDataNode(DataNode):
111
113
 
112
114
  return None
113
115
 
114
- def get_table_columns(self, tableName, schema_name=None):
115
- tn = tableName.upper()
116
- if tn in self.tables:
117
- return [
118
- {'name': name}
119
- for name in self.tables[tn].columns
120
- ]
121
- raise exc.TableNotExistError(
122
- f"Table information_schema.{tableName} does not exists"
123
- )
116
+ def get_table_columns_df(self, table_name: str, schema_name: str | None = None) -> pd.DataFrame:
117
+ """Get a DataFrame containing representation of information_schema.columns for the specified table.
118
+
119
+ Args:
120
+ table_name (str): The name of the table to get columns from.
121
+ schema_name (str | None): Not in use. The name of the schema to get columns from.
122
+
123
+ Returns:
124
+ pd.DataFrame: A DataFrame containing representation of information_schema.columns for the specified table.
125
+ The DataFrame has list of columns as in the integrations.libs.response.INF_SCHEMA_COLUMNS_NAMES
126
+ but only 'COLUMN_NAME' column is filled with the actual column names.
127
+ Other columns are filled with None.
128
+ """
129
+ table_name = table_name.upper()
130
+ if table_name not in self.tables:
131
+ raise exc.TableNotExistError(
132
+ f"Table information_schema.{table_name} does not exists"
133
+ )
134
+ table_columns_names = self.tables[table_name].columns
135
+ df = pd.DataFrame([[table_columns_names]], columns=[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME])
136
+ for column_name in astuple(INF_SCHEMA_COLUMNS_NAMES):
137
+ if column_name == INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME:
138
+ continue
139
+ df[column_name] = None
140
+ return df
141
+
142
+ def get_table_columns_names(self, table_name: str, schema_name: str | None = None) -> list[str]:
143
+ """Get a list of column names for the specified table.
144
+
145
+ Args:
146
+ table_name (str): The name of the table to get columns from.
147
+ schema_name (str | None): Not in use. The name of the schema to get columns from.
148
+
149
+ Returns:
150
+ list[str]: A list of column names for the specified table.
151
+ """
152
+ table_name = table_name.upper()
153
+ if table_name not in self.tables:
154
+ raise exc.TableNotExistError(
155
+ f"Table information_schema.{table_name} does not exists"
156
+ )
157
+ return self.tables[table_name].columns
124
158
 
125
159
  def get_integrations_names(self):
126
160
  integration_names = self.integration_controller.get_all().keys()
@@ -1,6 +1,6 @@
1
1
  import time
2
2
  import inspect
3
- from typing import Optional
3
+ from dataclasses import astuple
4
4
 
5
5
  import numpy as np
6
6
  from numpy import dtype as np_dtype
@@ -18,11 +18,12 @@ from mindsdb.api.executor.datahub.datanodes.datanode import DataNode
18
18
  from mindsdb.api.executor.datahub.classes.tables_row import TablesRow
19
19
  from mindsdb.api.executor.data_types.response_type import RESPONSE_TYPE
20
20
  from mindsdb.api.executor.sql_query.result_set import ResultSet
21
+ from mindsdb.integrations.libs.response import HandlerResponse, INF_SCHEMA_COLUMNS_NAMES
21
22
  from mindsdb.integrations.utilities.utils import get_class_name
22
23
  from mindsdb.metrics import metrics
23
24
  from mindsdb.utilities import log
24
25
  from mindsdb.utilities.profiler import profiler
25
- from mindsdb.integrations.libs.response import HandlerResponse
26
+ from mindsdb.api.executor.datahub.datanodes.system_tables import infer_mysql_type
26
27
 
27
28
  logger = log.getLogger(__name__)
28
29
 
@@ -55,47 +56,65 @@ class IntegrationDataNode(DataNode):
55
56
  else:
56
57
  raise Exception(f"Can't get tables: {response.error_message}")
57
58
 
58
- def get_table_columns(self, table_name: str, schema_name: Optional[str] = None):
59
+ def get_table_columns_df(self, table_name: str, schema_name: str | None = None) -> pd.DataFrame:
60
+ """Get a DataFrame containing representation of information_schema.columns for the specified table.
61
+
62
+ Args:
63
+ table_name (str): The name of the table to get columns from.
64
+ schema_name (str | None): The name of the schema to get columns from.
65
+
66
+ Returns:
67
+ pd.DataFrame: A DataFrame containing representation of information_schema.columns for the specified table.
68
+ The DataFrame has list of columns as in the integrations.libs.response.INF_SCHEMA_COLUMNS_NAMES.
69
+ """
59
70
  if 'schema_name' in inspect.signature(self.integration_handler.get_columns).parameters:
60
71
  response = self.integration_handler.get_columns(table_name, schema_name)
61
72
  else:
62
73
  response = self.integration_handler.get_columns(table_name)
63
- if response.type == RESPONSE_TYPE.TABLE:
64
- df = response.data_frame
65
- # case independent
66
- columns = [str(c).lower() for c in df.columns]
67
- df.columns = columns
68
-
69
- col_name = None
70
- # looking for specific column names
71
- for col in ('field', 'column_name', 'column', 'name'):
72
- if col in columns:
73
- col_name = columns.index(col)
74
- break
75
- # if not found - pick first one
76
- if col_name is None:
77
- col_name = 0
78
-
79
- names = df[df.columns[col_name]]
80
-
81
- # type
82
- if 'mysql_data_type' in columns:
83
- types = df['mysql_data_type']
84
- elif 'type' in columns:
85
- types = df['type']
86
- else:
87
- types = [None] * len(names)
88
74
 
89
- ret = []
90
- for i, name in enumerate(names):
91
- ret.append({
92
- 'name': name,
93
- 'type': types[i]
94
- })
75
+ if response.type == RESPONSE_TYPE.COLUMNS_TABLE:
76
+ return response.data_frame
77
+
78
+ if response.type != RESPONSE_TYPE.TABLE:
79
+ logger.warning(f"Wrong response type for handler's `get_columns` call: {response.type}")
80
+ return pd.DataFrame([], columns=astuple(INF_SCHEMA_COLUMNS_NAMES))
81
+
82
+ # region fallback for old handlers
83
+ df = response.data_frame
84
+ df.columns = [name.upper() for name in df.columns]
85
+ if 'FIELD' not in df.columns or 'TYPE' not in df.columns:
86
+ logger.warning(
87
+ f"Response from the handler's `get_columns` call does not contain required columns: f{df.columns}"
88
+ )
89
+ return pd.DataFrame([], columns=astuple(INF_SCHEMA_COLUMNS_NAMES))
90
+
91
+ new_df = df[['FIELD', 'TYPE']]
92
+ new_df.columns = ['COLUMN_NAME', 'DATA_TYPE']
93
+
94
+ new_df[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = new_df[
95
+ INF_SCHEMA_COLUMNS_NAMES.DATA_TYPE
96
+ ].apply(lambda x: infer_mysql_type(x).value)
97
+
98
+ for column_name in astuple(INF_SCHEMA_COLUMNS_NAMES):
99
+ if column_name in new_df.columns:
100
+ continue
101
+ new_df[column_name] = None
102
+ # endregion
103
+
104
+ return new_df
105
+
106
+ def get_table_columns_names(self, table_name: str, schema_name: str | None = None) -> list[str]:
107
+ """Get a list of column names for the specified table.
95
108
 
96
- return ret
109
+ Args:
110
+ table_name (str): The name of the table to get columns from.
111
+ schema_name (str | None): The name of the schema to get columns from.
97
112
 
98
- return []
113
+ Returns:
114
+ list[str]: A list of column names for the specified table.
115
+ """
116
+ df = self.get_table_columns_df(table_name, schema_name)
117
+ return df[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME].to_list()
99
118
 
100
119
  def drop_table(self, name: Identifier, if_exists=False):
101
120
  drop_ast = DropTables(
@@ -107,7 +126,7 @@ class IntegrationDataNode(DataNode):
107
126
  raise Exception(result.error_message)
108
127
 
109
128
  def create_table(self, table_name: Identifier, result_set: ResultSet = None, columns=None,
110
- is_replace=False, is_create=False) -> DataHubResponse:
129
+ is_replace=False, is_create=False, **kwargs) -> DataHubResponse:
111
130
  # is_create - create table
112
131
  # is_replace - drop table if exists
113
132
  # is_create==False and is_replace==False: just insert
@@ -248,7 +267,7 @@ class IntegrationDataNode(DataNode):
248
267
  return result
249
268
 
250
269
  @profiler.profile()
251
- def query(self, query: Optional[ASTNode] = None, native_query: Optional[str] = None, session=None) -> DataHubResponse:
270
+ def query(self, query: ASTNode | None = None, native_query: str | None = None, session=None) -> DataHubResponse:
252
271
  try:
253
272
  if query is not None:
254
273
  result: HandlerResponse = self._query(query)
@@ -1,4 +1,5 @@
1
1
  from copy import deepcopy
2
+ from dataclasses import astuple
2
3
 
3
4
  import pandas as pd
4
5
  from mindsdb_sql_parser import parse_sql
@@ -16,6 +17,7 @@ from mindsdb.api.executor.datahub.datanodes.datanode import DataNode
16
17
  from mindsdb.api.executor.datahub.classes.tables_row import TablesRow
17
18
  from mindsdb.api.executor.datahub.classes.response import DataHubResponse
18
19
  from mindsdb.utilities.partitioning import process_dataframe_in_partitions
20
+ from mindsdb.integrations.libs.response import INF_SCHEMA_COLUMNS_NAMES
19
21
 
20
22
 
21
23
  class ProjectDataNode(DataNode):
@@ -46,11 +48,41 @@ class ProjectDataNode(DataNode):
46
48
  result = [TablesRow.from_dict(row) for row in tables]
47
49
  return result
48
50
 
49
- def get_table_columns(self, table_name, schema_name=None):
50
- return [
51
- {'name': name}
52
- for name in self.project.get_columns(table_name)
53
- ]
51
+ def get_table_columns_df(self, table_name: str, schema_name: str | None = None) -> pd.DataFrame:
52
+ """Get a DataFrame containing representation of information_schema.columns for the specified table.
53
+
54
+ Args:
55
+ table_name (str): The name of the table to get columns from.
56
+ schema_name (str | None): Not in use. The name of the schema to get columns from.
57
+
58
+ Returns:
59
+ pd.DataFrame: A DataFrame containing representation of information_schema.columns for the specified table.
60
+ The DataFrame has list of columns as in the integrations.libs.response.INF_SCHEMA_COLUMNS_NAMES
61
+ but only 'COLUMN_NAME' column is filled with the actual column names.
62
+ Other columns are filled with None.
63
+ """
64
+ columns = self.project.get_columns(table_name)
65
+
66
+ data = []
67
+ row = {name: None for name in astuple(INF_SCHEMA_COLUMNS_NAMES)}
68
+ for column_name in columns:
69
+ r = row.copy()
70
+ r[INF_SCHEMA_COLUMNS_NAMES.COLUMN_NAME] = column_name
71
+ data.append(r)
72
+
73
+ return pd.DataFrame(data, columns=astuple(INF_SCHEMA_COLUMNS_NAMES))
74
+
75
+ def get_table_columns_names(self, table_name: str, schema_name: str | None = None) -> list[str]:
76
+ """Get a list of column names for the specified table.
77
+
78
+ Args:
79
+ table_name (str): The name of the table to get columns from.
80
+ schema_name (str | None): Not in use. The name of the schema to get columns from.
81
+
82
+ Returns:
83
+ list[str]: A list of column names for the specified table.
84
+ """
85
+ return self.project.get_columns(table_name)
54
86
 
55
87
  def predict(self, model_name: str, df, version=None, params=None):
56
88
  model_metadata = self.project.get_model(model_name)
@@ -148,7 +180,7 @@ class ProjectDataNode(DataNode):
148
180
  else:
149
181
  raise NotImplementedError(f"Query not supported {query}")
150
182
 
151
- def create_table(self, table_name: Identifier, result_set=None, is_replace=False, **kwargs) -> DataHubResponse:
183
+ def create_table(self, table_name: Identifier, result_set=None, is_replace=False, params=None, **kwargs) -> DataHubResponse:
152
184
  # is_create - create table
153
185
  # is_replace - drop table if exists
154
186
  # is_create==False and is_replace==False: just insert
@@ -164,6 +196,6 @@ class ProjectDataNode(DataNode):
164
196
  kb_table.clear()
165
197
 
166
198
  df = result_set.to_df()
167
- kb_table.insert(df)
199
+ kb_table.insert(df, params=params)
168
200
  return DataHubResponse()
169
201
  raise NotImplementedError(f"Can't create table {table_name}")