MindsDB 25.9.2.0a1__py3-none-any.whl → 25.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (163) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +40 -29
  3. mindsdb/api/a2a/__init__.py +1 -1
  4. mindsdb/api/a2a/agent.py +16 -10
  5. mindsdb/api/a2a/common/server/server.py +7 -3
  6. mindsdb/api/a2a/common/server/task_manager.py +12 -5
  7. mindsdb/api/a2a/common/types.py +66 -0
  8. mindsdb/api/a2a/task_manager.py +65 -17
  9. mindsdb/api/common/middleware.py +10 -12
  10. mindsdb/api/executor/command_executor.py +51 -40
  11. mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
  12. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
  13. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +101 -49
  14. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
  15. mindsdb/api/executor/datahub/datanodes/system_tables.py +3 -2
  16. mindsdb/api/executor/exceptions.py +29 -10
  17. mindsdb/api/executor/planner/plan_join.py +17 -3
  18. mindsdb/api/executor/planner/query_prepare.py +2 -20
  19. mindsdb/api/executor/sql_query/sql_query.py +74 -74
  20. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
  21. mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
  22. mindsdb/api/executor/utilities/functions.py +6 -6
  23. mindsdb/api/executor/utilities/sql.py +37 -20
  24. mindsdb/api/http/gui.py +5 -11
  25. mindsdb/api/http/initialize.py +75 -61
  26. mindsdb/api/http/namespaces/agents.py +10 -15
  27. mindsdb/api/http/namespaces/analysis.py +13 -20
  28. mindsdb/api/http/namespaces/auth.py +1 -1
  29. mindsdb/api/http/namespaces/chatbots.py +0 -5
  30. mindsdb/api/http/namespaces/config.py +15 -11
  31. mindsdb/api/http/namespaces/databases.py +140 -201
  32. mindsdb/api/http/namespaces/file.py +17 -4
  33. mindsdb/api/http/namespaces/handlers.py +17 -7
  34. mindsdb/api/http/namespaces/knowledge_bases.py +28 -7
  35. mindsdb/api/http/namespaces/models.py +94 -126
  36. mindsdb/api/http/namespaces/projects.py +13 -22
  37. mindsdb/api/http/namespaces/sql.py +33 -25
  38. mindsdb/api/http/namespaces/tab.py +27 -37
  39. mindsdb/api/http/namespaces/views.py +1 -1
  40. mindsdb/api/http/start.py +16 -10
  41. mindsdb/api/mcp/__init__.py +2 -1
  42. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
  43. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
  44. mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
  45. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
  46. mindsdb/integrations/handlers/byom_handler/byom_handler.py +165 -190
  47. mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
  48. mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
  49. mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
  50. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
  51. mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
  52. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
  53. mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
  54. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
  55. mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
  56. mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
  57. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
  58. mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
  59. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +14 -2
  60. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
  61. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
  62. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  63. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  64. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
  65. mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
  66. mindsdb/integrations/libs/api_handler.py +10 -10
  67. mindsdb/integrations/libs/base.py +4 -4
  68. mindsdb/integrations/libs/llm/utils.py +2 -2
  69. mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
  70. mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
  71. mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
  72. mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
  73. mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
  74. mindsdb/integrations/libs/process_cache.py +132 -140
  75. mindsdb/integrations/libs/response.py +18 -12
  76. mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
  77. mindsdb/integrations/utilities/files/file_reader.py +6 -7
  78. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
  79. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
  80. mindsdb/integrations/utilities/rag/config_loader.py +37 -26
  81. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +83 -30
  82. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
  83. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
  84. mindsdb/integrations/utilities/rag/settings.py +58 -133
  85. mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
  86. mindsdb/interfaces/agents/agents_controller.py +2 -3
  87. mindsdb/interfaces/agents/constants.py +0 -2
  88. mindsdb/interfaces/agents/litellm_server.py +34 -58
  89. mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
  90. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
  91. mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
  92. mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
  93. mindsdb/interfaces/chatbot/polling.py +30 -18
  94. mindsdb/interfaces/data_catalog/data_catalog_loader.py +16 -17
  95. mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
  96. mindsdb/interfaces/database/data_handlers_cache.py +190 -0
  97. mindsdb/interfaces/database/database.py +3 -3
  98. mindsdb/interfaces/database/integrations.py +7 -110
  99. mindsdb/interfaces/database/projects.py +2 -6
  100. mindsdb/interfaces/database/views.py +1 -4
  101. mindsdb/interfaces/file/file_controller.py +6 -6
  102. mindsdb/interfaces/functions/controller.py +1 -1
  103. mindsdb/interfaces/functions/to_markdown.py +2 -2
  104. mindsdb/interfaces/jobs/jobs_controller.py +5 -9
  105. mindsdb/interfaces/jobs/scheduler.py +3 -9
  106. mindsdb/interfaces/knowledge_base/controller.py +244 -128
  107. mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
  108. mindsdb/interfaces/knowledge_base/executor.py +11 -0
  109. mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
  110. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
  111. mindsdb/interfaces/model/model_controller.py +172 -168
  112. mindsdb/interfaces/query_context/context_controller.py +14 -2
  113. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +10 -14
  114. mindsdb/interfaces/skills/retrieval_tool.py +43 -50
  115. mindsdb/interfaces/skills/skill_tool.py +2 -2
  116. mindsdb/interfaces/skills/skills_controller.py +1 -4
  117. mindsdb/interfaces/skills/sql_agent.py +25 -19
  118. mindsdb/interfaces/storage/db.py +16 -6
  119. mindsdb/interfaces/storage/fs.py +114 -169
  120. mindsdb/interfaces/storage/json.py +19 -18
  121. mindsdb/interfaces/tabs/tabs_controller.py +49 -72
  122. mindsdb/interfaces/tasks/task_monitor.py +3 -9
  123. mindsdb/interfaces/tasks/task_thread.py +7 -9
  124. mindsdb/interfaces/triggers/trigger_task.py +7 -13
  125. mindsdb/interfaces/triggers/triggers_controller.py +47 -52
  126. mindsdb/migrations/migrate.py +16 -16
  127. mindsdb/utilities/api_status.py +58 -0
  128. mindsdb/utilities/config.py +68 -2
  129. mindsdb/utilities/exception.py +40 -1
  130. mindsdb/utilities/fs.py +0 -1
  131. mindsdb/utilities/hooks/profiling.py +17 -14
  132. mindsdb/utilities/json_encoder.py +24 -10
  133. mindsdb/utilities/langfuse.py +40 -45
  134. mindsdb/utilities/log.py +272 -0
  135. mindsdb/utilities/ml_task_queue/consumer.py +52 -58
  136. mindsdb/utilities/ml_task_queue/producer.py +26 -30
  137. mindsdb/utilities/render/sqlalchemy_render.py +22 -20
  138. mindsdb/utilities/starters.py +0 -10
  139. mindsdb/utilities/utils.py +2 -2
  140. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/METADATA +293 -276
  141. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/RECORD +144 -158
  142. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
  143. mindsdb/api/postgres/__init__.py +0 -0
  144. mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
  145. mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
  146. mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -189
  147. mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
  148. mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
  149. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
  150. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
  151. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
  152. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
  153. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -253
  154. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
  155. mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
  156. mindsdb/api/postgres/start.py +0 -11
  157. mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
  158. mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
  159. mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
  160. mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
  161. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/WHEEL +0 -0
  162. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/licenses/LICENSE +0 -0
  163. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- from typing import Any
1
+ from typing import Any, Union, TYPE_CHECKING
2
2
  import datetime
3
3
 
4
4
  import pymssql
@@ -9,22 +9,24 @@ from pandas.api import types as pd_types
9
9
  from mindsdb_sql_parser import parse_sql
10
10
  from mindsdb_sql_parser.ast.base import ASTNode
11
11
 
12
- from mindsdb.integrations.libs.base import DatabaseHandler
12
+ from mindsdb.integrations.libs.base import MetaDatabaseHandler
13
13
  from mindsdb.utilities import log
14
14
  from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
15
15
  from mindsdb.integrations.libs.response import (
16
16
  HandlerStatusResponse as StatusResponse,
17
17
  HandlerResponse as Response,
18
- RESPONSE_TYPE
18
+ RESPONSE_TYPE,
19
19
  )
20
20
  from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
21
21
 
22
+ if TYPE_CHECKING:
23
+ import pyodbc
22
24
 
23
25
  logger = log.getLogger(__name__)
24
26
 
25
27
 
26
28
  def _map_type(mssql_type_text: str) -> MYSQL_DATA_TYPE:
27
- """ Map MSSQL text types names to MySQL types as enum.
29
+ """Map MSSQL text types names to MySQL types as enum.
28
30
 
29
31
  Args:
30
32
  mssql_type_text (str): The name of the MSSQL type to map.
@@ -34,16 +36,16 @@ def _map_type(mssql_type_text: str) -> MYSQL_DATA_TYPE:
34
36
  """
35
37
  internal_type_name = mssql_type_text.lower()
36
38
  types_map = {
37
- ('tinyint', 'smallint', 'int', 'bigint'): MYSQL_DATA_TYPE.INT,
38
- ('bit',): MYSQL_DATA_TYPE.BOOL,
39
- ('money', 'smallmoney', 'float', 'real'): MYSQL_DATA_TYPE.FLOAT,
40
- ('decimal', 'numeric'): MYSQL_DATA_TYPE.DECIMAL,
41
- ('date',): MYSQL_DATA_TYPE.DATE,
42
- ('time',): MYSQL_DATA_TYPE.TIME,
43
- ('datetime2', 'datetimeoffset', 'datetime', 'smalldatetime'): MYSQL_DATA_TYPE.DATETIME,
44
- ('varchar', 'nvarchar'): MYSQL_DATA_TYPE.VARCHAR,
45
- ('char', 'text', 'nchar', 'ntext'): MYSQL_DATA_TYPE.TEXT,
46
- ('binary', 'varbinary', 'image'): MYSQL_DATA_TYPE.BINARY
39
+ ("tinyint", "smallint", "int", "bigint"): MYSQL_DATA_TYPE.INT,
40
+ ("bit",): MYSQL_DATA_TYPE.BOOL,
41
+ ("money", "smallmoney", "float", "real"): MYSQL_DATA_TYPE.FLOAT,
42
+ ("decimal", "numeric"): MYSQL_DATA_TYPE.DECIMAL,
43
+ ("date",): MYSQL_DATA_TYPE.DATE,
44
+ ("time",): MYSQL_DATA_TYPE.TIME,
45
+ ("datetime2", "datetimeoffset", "datetime", "smalldatetime"): MYSQL_DATA_TYPE.DATETIME,
46
+ ("varchar", "nvarchar"): MYSQL_DATA_TYPE.VARCHAR,
47
+ ("char", "text", "nchar", "ntext"): MYSQL_DATA_TYPE.TEXT,
48
+ ("binary", "varbinary", "image"): MYSQL_DATA_TYPE.BINARY,
47
49
  }
48
50
 
49
51
  for db_types_list, mysql_data_type in types_map.items():
@@ -54,86 +56,123 @@ def _map_type(mssql_type_text: str) -> MYSQL_DATA_TYPE:
54
56
  return MYSQL_DATA_TYPE.VARCHAR
55
57
 
56
58
 
57
- def _make_table_response(result: list[dict[str, Any]], cursor: pymssql.Cursor) -> Response:
59
+ def _make_table_response(
60
+ result: list[Union[dict[str, Any], tuple]], cursor: Union[pymssql.Cursor, "pyodbc.Cursor"], use_odbc: bool = False
61
+ ) -> Response:
58
62
  """Build response from result and cursor.
59
63
 
60
64
  Args:
61
- result (list[dict[str, Any]]): result of the query.
62
- cursor (pymssql.Cursor): cursor object.
65
+ result (list[Union[dict[str, Any], tuple]]): result of the query.
66
+ cursor (Union[pymssql.Cursor, pyodbc.Cursor]): cursor object.
67
+ use_odbc (bool): whether ODBC connection is being used.
63
68
 
64
69
  Returns:
65
70
  Response: response object.
66
71
  """
67
72
  description: list[tuple[Any]] = cursor.description
68
73
  mysql_types: list[MYSQL_DATA_TYPE] = []
74
+ columns = [x[0] for x in cursor.description]
69
75
 
70
- data_frame = pd.DataFrame(
71
- result,
72
- columns=[x[0] for x in cursor.description]
73
- )
76
+ if not result:
77
+ data_frame = pd.DataFrame(columns=columns)
78
+ elif use_odbc:
79
+ # For pyodbc with large datasets, convert Row objects efficiently
80
+ # Using iterator with pd.DataFrame avoids intermediate list creation
81
+ try:
82
+ data_frame = pd.DataFrame(result, columns=columns)
83
+ except (ValueError, TypeError):
84
+ # Fallback: convert Row objects to tuples
85
+ data_frame = pd.DataFrame.from_records((tuple(row) for row in result), columns=columns)
86
+ else:
87
+ data_frame = pd.DataFrame(result, columns=columns)
74
88
 
75
89
  for column in description:
76
90
  column_name = column[0]
77
91
  column_type = column[1]
78
92
  column_dtype = data_frame[column_name].dtype
79
- match column_type:
80
- case pymssql.NUMBER:
81
- if pd_types.is_integer_dtype(column_dtype):
82
- mysql_types.append(MYSQL_DATA_TYPE.INT)
83
- elif pd_types.is_float_dtype(column_dtype):
84
- mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
85
- elif pd_types.is_bool_dtype(column_dtype):
86
- # it is 'bit' type
87
- mysql_types.append(MYSQL_DATA_TYPE.TINYINT)
88
- else:
89
- mysql_types.append(MYSQL_DATA_TYPE.DOUBLE)
90
- case pymssql.DECIMAL:
91
- mysql_types.append(MYSQL_DATA_TYPE.DECIMAL)
92
- case pymssql.STRING:
93
- mysql_types.append(MYSQL_DATA_TYPE.TEXT)
94
- case pymssql.DATETIME:
93
+
94
+ if use_odbc:
95
+ # For pyodbc, use type inference based on pandas dtype
96
+ if pd_types.is_integer_dtype(column_dtype):
97
+ mysql_types.append(MYSQL_DATA_TYPE.INT)
98
+ elif pd_types.is_float_dtype(column_dtype):
99
+ mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
100
+ elif pd_types.is_bool_dtype(column_dtype):
101
+ mysql_types.append(MYSQL_DATA_TYPE.TINYINT)
102
+ elif pd_types.is_datetime64_any_dtype(column_dtype):
95
103
  mysql_types.append(MYSQL_DATA_TYPE.DATETIME)
96
- case pymssql.BINARY:
97
- # DATE and TIME types returned as 'BINARY' type, and dataframe type is 'object', so it is not possible
98
- # to infer correct mysql type for them
99
- if pd_types.is_datetime64_any_dtype(column_dtype):
100
- # pymssql return datetimes as 'binary' type
101
- # if timezone is present, then it is datetime.timezone
102
- series = data_frame[column_name]
103
- if (
104
- series.dt.tz is not None
105
- and isinstance(series.dt.tz, datetime.timezone)
106
- and series.dt.tz != datetime.timezone.utc
107
- ):
108
- series = series.dt.tz_convert('UTC')
109
- data_frame[column_name] = series.dt.tz_localize(None)
104
+ elif pd_types.is_object_dtype(column_dtype):
105
+ if len(data_frame) > 0 and isinstance(
106
+ data_frame[column_name].iloc[0], (datetime.datetime, datetime.date, datetime.time)
107
+ ):
110
108
  mysql_types.append(MYSQL_DATA_TYPE.DATETIME)
111
109
  else:
112
- mysql_types.append(MYSQL_DATA_TYPE.BINARY)
113
- case _:
114
- logger.warning(f"Unknown type: {column_type}, use TEXT as fallback.")
110
+ mysql_types.append(MYSQL_DATA_TYPE.TEXT)
111
+ else:
115
112
  mysql_types.append(MYSQL_DATA_TYPE.TEXT)
116
-
117
- return Response(
118
- RESPONSE_TYPE.TABLE,
119
- data_frame=data_frame,
120
- mysql_types=mysql_types
121
- )
122
-
123
-
124
- class SqlServerHandler(DatabaseHandler):
113
+ else:
114
+ match column_type:
115
+ case pymssql.NUMBER:
116
+ if pd_types.is_integer_dtype(column_dtype):
117
+ mysql_types.append(MYSQL_DATA_TYPE.INT)
118
+ elif pd_types.is_float_dtype(column_dtype):
119
+ mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
120
+ elif pd_types.is_bool_dtype(column_dtype):
121
+ mysql_types.append(MYSQL_DATA_TYPE.TINYINT)
122
+ else:
123
+ mysql_types.append(MYSQL_DATA_TYPE.DOUBLE)
124
+ case pymssql.DECIMAL:
125
+ mysql_types.append(MYSQL_DATA_TYPE.DECIMAL)
126
+ case pymssql.STRING:
127
+ mysql_types.append(MYSQL_DATA_TYPE.TEXT)
128
+ case pymssql.DATETIME:
129
+ mysql_types.append(MYSQL_DATA_TYPE.DATETIME)
130
+ case pymssql.BINARY:
131
+ # DATE and TIME types returned as 'BINARY' type, and dataframe type is 'object', so it is not possible
132
+ # to infer correct mysql type for them
133
+ if pd_types.is_datetime64_any_dtype(column_dtype):
134
+ # pymssql return datetimes as 'binary' type
135
+ # if timezone is present, then it is datetime.timezone
136
+ series = data_frame[column_name]
137
+ if (
138
+ series.dt.tz is not None
139
+ and isinstance(series.dt.tz, datetime.timezone)
140
+ and series.dt.tz != datetime.timezone.utc
141
+ ):
142
+ series = series.dt.tz_convert("UTC")
143
+ data_frame[column_name] = series.dt.tz_localize(None)
144
+ mysql_types.append(MYSQL_DATA_TYPE.DATETIME)
145
+ else:
146
+ mysql_types.append(MYSQL_DATA_TYPE.BINARY)
147
+ case _:
148
+ logger.warning(f"Unknown type: {column_type}, use TEXT as fallback.")
149
+ mysql_types.append(MYSQL_DATA_TYPE.TEXT)
150
+
151
+ return Response(RESPONSE_TYPE.TABLE, data_frame=data_frame, mysql_types=mysql_types)
152
+
153
+
154
+ class SqlServerHandler(MetaDatabaseHandler):
125
155
  """
126
156
  This handler handles connection and execution of the Microsoft SQL Server statements.
157
+ Supports both native pymssql connections and ODBC connections via pyodbc.
158
+
159
+ To use ODBC connection, specify either:
160
+ - 'use_odbc': True in connection parameters, or
161
+ - 'driver': '<ODBC driver name>' in connection parameters
127
162
  """
128
- name = 'mssql'
163
+
164
+ name = "mssql"
129
165
 
130
166
  def __init__(self, name, **kwargs):
131
167
  super().__init__(name)
132
168
  self.parser = parse_sql
133
- self.connection_args = kwargs.get('connection_data')
134
- self.dialect = 'mssql'
135
- self.database = self.connection_args.get('database')
136
- self.renderer = SqlalchemyRender('mssql')
169
+ self.connection_args = kwargs.get("connection_data")
170
+ self.dialect = "mssql"
171
+ self.database = self.connection_args.get("database")
172
+ self.renderer = SqlalchemyRender("mssql")
173
+
174
+ # Determine if ODBC should be used
175
+ self.use_odbc = self.connection_args.get("use_odbc", False) or "driver" in self.connection_args
137
176
 
138
177
  self.connection = None
139
178
  self.is_connected = False
@@ -145,41 +184,113 @@ class SqlServerHandler(DatabaseHandler):
145
184
  def connect(self):
146
185
  """
147
186
  Establishes a connection to a Microsoft SQL Server database.
187
+ Uses either pymssql (native) or pyodbc based on configuration.
148
188
 
149
189
  Raises:
150
- pymssql._mssql.OperationalError: If an error occurs while connecting to the Microsoft SQL Server database.
190
+ pymssql._mssql.OperationalError or pyodbc.Error: If an error occurs while connecting to the database.
151
191
 
152
192
  Returns:
153
- pymssql.Connection: A connection object to the Microsoft SQL Server database.
193
+ Union[pymssql.Connection, pyodbc.Connection]: A connection object to the Microsoft SQL Server database.
154
194
  """
155
195
 
156
196
  if self.is_connected is True:
157
197
  return self.connection
158
198
 
199
+ if self.use_odbc:
200
+ return self._connect_odbc()
201
+ else:
202
+ return self._connect_pymssql()
203
+
204
+ def _connect_pymssql(self):
205
+ """Connect using pymssql (native FreeTDS-based connection)."""
159
206
  # Mandatory connection parameters
160
- if not all(key in self.connection_args for key in ['host', 'user', 'password', 'database']):
161
- raise ValueError('Required parameters (host, user, password, database) must be provided.')
207
+ if not all(key in self.connection_args for key in ["host", "user", "password", "database"]):
208
+ raise ValueError("Required parameters (host, user, password, database) must be provided.")
162
209
 
163
210
  config = {
164
- 'host': self.connection_args.get('host'),
165
- 'user': self.connection_args.get('user'),
166
- 'password': self.connection_args.get('password'),
167
- 'database': self.connection_args.get('database')
211
+ "host": self.connection_args.get("host"),
212
+ "user": self.connection_args.get("user"),
213
+ "password": self.connection_args.get("password"),
214
+ "database": self.connection_args.get("database"),
168
215
  }
169
216
 
170
217
  # Optional connection parameters
171
- if 'port' in self.connection_args:
172
- config['port'] = self.connection_args.get('port')
218
+ if "port" in self.connection_args:
219
+ config["port"] = self.connection_args.get("port")
173
220
 
174
- if 'server' in self.connection_args:
175
- config['server'] = self.connection_args.get('server')
221
+ if "server" in self.connection_args:
222
+ config["server"] = self.connection_args.get("server")
176
223
 
177
224
  try:
178
225
  self.connection = pymssql.connect(**config)
179
226
  self.is_connected = True
180
227
  return self.connection
181
228
  except OperationalError as e:
182
- logger.error(f'Error connecting to Microsoft SQL Server {self.database}, {e}!')
229
+ logger.error(f"Error connecting to Microsoft SQL Server {self.database}, {e}!")
230
+ self.is_connected = False
231
+ raise
232
+
233
+ def _connect_odbc(self):
234
+ """Connect using pyodbc (ODBC connection)."""
235
+ try:
236
+ import pyodbc
237
+ except ImportError as e:
238
+ raise ImportError(
239
+ "pyodbc is not installed. Install it with 'pip install pyodbc' or "
240
+ "'pip install mindsdb[mssql-odbc]' to use ODBC connections."
241
+ ) from e
242
+
243
+ # Mandatory connection parameters
244
+ if not all(key in self.connection_args for key in ["host", "user", "password", "database"]):
245
+ raise ValueError("Required parameters (host, user, password, database) must be provided.")
246
+
247
+ driver = self.connection_args.get("driver", "ODBC Driver 17 for SQL Server")
248
+ host = self.connection_args.get("host")
249
+ port = self.connection_args.get("port", 1433)
250
+ database = self.connection_args.get("database")
251
+ user = self.connection_args.get("user")
252
+ password = self.connection_args.get("password")
253
+
254
+ conn_str_parts = [
255
+ f"DRIVER={{{driver}}}",
256
+ f"SERVER={host},{port}",
257
+ f"DATABASE={database}",
258
+ f"UID={user}",
259
+ f"PWD={password}",
260
+ ]
261
+
262
+ # Add optional parameters
263
+ if "encrypt" in self.connection_args:
264
+ conn_str_parts.append(f"Encrypt={self.connection_args.get('encrypt', 'yes')}")
265
+ if "trust_server_certificate" in self.connection_args:
266
+ conn_str_parts.append(
267
+ f"TrustServerCertificate={self.connection_args.get('trust_server_certificate', 'yes')}"
268
+ )
269
+
270
+ if "connection_string_args" in self.connection_args:
271
+ conn_str_parts.append(self.connection_args["connection_string_args"])
272
+
273
+ conn_str = ";".join(conn_str_parts)
274
+
275
+ try:
276
+ self.connection = pyodbc.connect(conn_str, timeout=10)
277
+ self.is_connected = True
278
+ return self.connection
279
+ except pyodbc.Error as e:
280
+ logger.error(f"Error connecting to Microsoft SQL Server {self.database} via ODBC, {e}!")
281
+ self.is_connected = False
282
+
283
+ # Check if it's a driver not found error
284
+ error_msg = str(e)
285
+ if "Driver" in error_msg and ("not found" in error_msg or "specified" in error_msg):
286
+ raise ConnectionError(
287
+ f"ODBC Driver not found: {driver}. "
288
+ f"Please install the Microsoft ODBC Driver for SQL Server. "
289
+ f"Error: {e}"
290
+ ) from e
291
+ raise
292
+ except Exception as e:
293
+ logger.error(f"Error connecting to Microsoft SQL Server {self.database} via ODBC, {e}!")
183
294
  self.is_connected = False
184
295
  raise
185
296
 
@@ -208,10 +319,10 @@ class SqlServerHandler(DatabaseHandler):
208
319
  connection = self.connect()
209
320
  with connection.cursor() as cur:
210
321
  # Execute a simple query to test the connection
211
- cur.execute('select 1;')
322
+ cur.execute("select 1;")
212
323
  response.success = True
213
324
  except OperationalError as e:
214
- logger.error(f'Error connecting to Microsoft SQL Server {self.database}, {e}!')
325
+ logger.error(f"Error connecting to Microsoft SQL Server {self.database}, {e}!")
215
326
  response.error_message = str(e)
216
327
 
217
328
  if response.success and need_to_close:
@@ -235,23 +346,35 @@ class SqlServerHandler(DatabaseHandler):
235
346
  need_to_close = self.is_connected is False
236
347
 
237
348
  connection = self.connect()
238
- with connection.cursor(as_dict=True) as cur:
239
- try:
240
- cur.execute(query)
241
- if cur.description:
242
- result = cur.fetchall()
243
- response = _make_table_response(result, cur)
244
- else:
245
- response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount)
246
- connection.commit()
247
- except Exception as e:
248
- logger.error(f'Error running query: {query} on {self.database}, {e}!')
249
- response = Response(
250
- RESPONSE_TYPE.ERROR,
251
- error_code=0,
252
- error_message=str(e)
253
- )
254
- connection.rollback()
349
+
350
+ if self.use_odbc:
351
+ with connection.cursor() as cur:
352
+ try:
353
+ cur.execute(query)
354
+ if cur.description:
355
+ result = cur.fetchall()
356
+ response = _make_table_response(result, cur, use_odbc=True)
357
+ else:
358
+ response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount)
359
+ connection.commit()
360
+ except Exception as e:
361
+ logger.exception(f"Error running query: {query} on {self.database}, {e}!")
362
+ response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e))
363
+ connection.rollback()
364
+ else:
365
+ with connection.cursor(as_dict=True) as cur:
366
+ try:
367
+ cur.execute(query)
368
+ if cur.description:
369
+ result = cur.fetchall()
370
+ response = _make_table_response(result, cur, use_odbc=False)
371
+ else:
372
+ response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount)
373
+ connection.commit()
374
+ except Exception as e:
375
+ logger.exception(f"Error running query: {query} on {self.database}, {e}!")
376
+ response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e))
377
+ connection.rollback()
255
378
 
256
379
  if need_to_close is True:
257
380
  self.disconnect()
@@ -326,3 +449,218 @@ class SqlServerHandler(DatabaseHandler):
326
449
  result = self.native_query(query)
327
450
  result.to_columns_table_response(map_type_fn=_map_type)
328
451
  return result
452
+
453
+ def meta_get_tables(self, table_names: list[str] | None = None) -> Response:
454
+ """
455
+ Retrieves metadata information about the tables in the Microsoft SQL Server database
456
+ to be stored in the data catalog.
457
+
458
+ Args:
459
+ table_names (list): A list of table names for which to retrieve metadata information.
460
+
461
+ Returns:
462
+ Response: A response object containing the metadata information, formatted as per the `Response` class.
463
+ """
464
+ query = f"""
465
+ SELECT
466
+ t.TABLE_NAME as table_name,
467
+ t.TABLE_SCHEMA as table_schema,
468
+ t.TABLE_TYPE as table_type,
469
+ CAST(ep.value AS NVARCHAR(MAX)) as table_description,
470
+ SUM(p.rows) as row_count
471
+ FROM {self.database}.INFORMATION_SCHEMA.TABLES t
472
+ LEFT JOIN {self.database}.sys.tables st
473
+ ON t.TABLE_NAME = st.name
474
+ LEFT JOIN {self.database}.sys.schemas s
475
+ ON st.schema_id = s.schema_id AND t.TABLE_SCHEMA = s.name
476
+ LEFT JOIN {self.database}.sys.extended_properties ep
477
+ ON st.object_id = ep.major_id
478
+ AND ep.minor_id = 0
479
+ AND ep.class = 1
480
+ AND ep.name = 'MS_Description'
481
+ LEFT JOIN {self.database}.sys.partitions p
482
+ ON st.object_id = p.object_id
483
+ AND p.index_id IN (0, 1)
484
+ WHERE t.TABLE_TYPE IN ('BASE TABLE', 'VIEW')
485
+ AND t.TABLE_SCHEMA NOT IN ('sys', 'INFORMATION_SCHEMA')
486
+ GROUP BY t.TABLE_NAME, t.TABLE_SCHEMA, t.TABLE_TYPE, ep.value
487
+ """
488
+
489
+ if table_names is not None and len(table_names) > 0:
490
+ quoted_names = [f"'{t}'" for t in table_names]
491
+ query += f" HAVING t.TABLE_NAME IN ({','.join(quoted_names)})"
492
+
493
+ result = self.native_query(query)
494
+ return result
495
+
496
+ def meta_get_columns(self, table_names: list[str] | None = None) -> Response:
497
+ """
498
+ Retrieves column metadata for the specified tables (or all tables if no list is provided).
499
+
500
+ Args:
501
+ table_names (list): A list of table names for which to retrieve column metadata.
502
+
503
+ Returns:
504
+ Response: A response object containing the column metadata.
505
+ """
506
+ query = f"""
507
+ SELECT
508
+ c.TABLE_NAME as table_name,
509
+ c.COLUMN_NAME as column_name,
510
+ c.DATA_TYPE as data_type,
511
+ CAST(ep.value AS NVARCHAR(MAX)) as column_description,
512
+ c.COLUMN_DEFAULT as column_default,
513
+ CASE WHEN c.IS_NULLABLE = 'YES' THEN 1 ELSE 0 END as is_nullable
514
+ FROM {self.database}.INFORMATION_SCHEMA.COLUMNS c
515
+ LEFT JOIN {self.database}.sys.tables st
516
+ ON c.TABLE_NAME = st.name
517
+ LEFT JOIN {self.database}.sys.schemas s
518
+ ON st.schema_id = s.schema_id AND c.TABLE_SCHEMA = s.name
519
+ LEFT JOIN {self.database}.sys.columns sc
520
+ ON st.object_id = sc.object_id AND c.COLUMN_NAME = sc.name
521
+ LEFT JOIN {self.database}.sys.extended_properties ep
522
+ ON st.object_id = ep.major_id
523
+ AND sc.column_id = ep.minor_id
524
+ AND ep.name = 'MS_Description'
525
+ WHERE c.TABLE_SCHEMA NOT IN ('sys', 'INFORMATION_SCHEMA')
526
+ """
527
+
528
+ if table_names is not None and len(table_names) > 0:
529
+ quoted_names = [f"'{t}'" for t in table_names]
530
+ query += f" AND c.TABLE_NAME IN ({','.join(quoted_names)})"
531
+
532
+ result = self.native_query(query)
533
+ return result
534
+
535
+ def meta_get_column_statistics(self, table_names: list[str] | None = None) -> Response:
536
+ """
537
+ Retrieves column statistics (e.g., null percentage, distinct value count, min/max values)
538
+ for the specified tables or all tables if no list is provided.
539
+
540
+ Note: Uses SQL Server's sys.dm_db_stats_properties and sys.dm_db_stats_histogram
541
+ (similar to PostgreSQL's pg_stats). Statistics are only available for columns that
542
+ have statistics objects created by SQL Server (typically indexed columns or columns
543
+ used in queries after AUTO_CREATE_STATISTICS).
544
+
545
+ Args:
546
+ table_names (list): A list of table names for which to retrieve column statistics.
547
+
548
+ Returns:
549
+ Response: A response object containing the column statistics.
550
+ """
551
+ table_filter = ""
552
+ if table_names is not None and len(table_names) > 0:
553
+ quoted_names = [f"'{t}'" for t in table_names]
554
+ table_filter = f" AND t.name IN ({','.join(quoted_names)})"
555
+
556
+ # Using OUTER APPLY to handle table-valued functions properly
557
+ # This is equivalent to PostgreSQL's pg_stats view approach
558
+ # Includes all statistics: auto-created, user-created, and index-based
559
+ # dm_db_stats_histogram columns: range_high_key, range_rows, equal_rows,
560
+ # distinct_range_rows, average_range_rows
561
+ query = f"""
562
+ SELECT DISTINCT
563
+ t.name AS TABLE_NAME,
564
+ c.name AS COLUMN_NAME,
565
+ CAST(NULL AS DECIMAL(10,2)) AS NULL_PERCENTAGE,
566
+ CAST(h.distinct_count AS BIGINT) AS DISTINCT_VALUES_COUNT,
567
+ NULL AS MOST_COMMON_VALUES,
568
+ NULL AS MOST_COMMON_FREQUENCIES,
569
+ CAST(h.min_value AS NVARCHAR(MAX)) AS MINIMUM_VALUE,
570
+ CAST(h.max_value AS NVARCHAR(MAX)) AS MAXIMUM_VALUE
571
+ FROM {self.database}.sys.tables t
572
+ INNER JOIN {self.database}.sys.schemas s
573
+ ON t.schema_id = s.schema_id
574
+ INNER JOIN {self.database}.sys.columns c
575
+ ON t.object_id = c.object_id
576
+ LEFT JOIN {self.database}.sys.stats st
577
+ ON st.object_id = t.object_id
578
+ LEFT JOIN {self.database}.sys.stats_columns sc
579
+ ON sc.object_id = st.object_id
580
+ AND sc.stats_id = st.stats_id
581
+ AND sc.column_id = c.column_id
582
+ AND sc.stats_column_id = 1 -- Only leading column in multi-column stats
583
+ OUTER APPLY (
584
+ SELECT
585
+ MIN(CAST(range_high_key AS NVARCHAR(MAX))) AS min_value,
586
+ MAX(CAST(range_high_key AS NVARCHAR(MAX))) AS max_value,
587
+ SUM(CAST(distinct_range_rows AS BIGINT)) + COUNT(*) AS distinct_count
588
+ FROM {self.database}.sys.dm_db_stats_histogram(st.object_id, st.stats_id)
589
+ WHERE st.object_id IS NOT NULL
590
+ ) h
591
+ WHERE s.name NOT IN ('sys', 'INFORMATION_SCHEMA')
592
+ {table_filter}
593
+ ORDER BY t.name, c.name
594
+ """
595
+
596
+ result = self.native_query(query)
597
+ return result
598
+
599
+ def meta_get_primary_keys(self, table_names: list[str] | None = None) -> Response:
600
+ """
601
+ Retrieves primary key information for the specified tables (or all tables if no list is provided).
602
+
603
+ Args:
604
+ table_names (list): A list of table names for which to retrieve primary key information.
605
+
606
+ Returns:
607
+ Response: A response object containing the primary key information.
608
+ """
609
+ query = f"""
610
+ SELECT
611
+ tc.TABLE_NAME as table_name,
612
+ kcu.COLUMN_NAME as column_name,
613
+ kcu.ORDINAL_POSITION as ordinal_position,
614
+ tc.CONSTRAINT_NAME as constraint_name
615
+ FROM {self.database}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
616
+ INNER JOIN {self.database}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
617
+ ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
618
+ AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
619
+ AND tc.TABLE_NAME = kcu.TABLE_NAME
620
+ WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
621
+ """
622
+
623
+ if table_names is not None and len(table_names) > 0:
624
+ quoted_names = [f"'{t}'" for t in table_names]
625
+ query += f" AND tc.TABLE_NAME IN ({','.join(quoted_names)})"
626
+
627
+ query += " ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION"
628
+
629
+ result = self.native_query(query)
630
+ return result
631
+
632
+ def meta_get_foreign_keys(self, table_names: list[str] | None = None) -> Response:
633
+ """
634
+ Retrieves foreign key information for the specified tables (or all tables if no list is provided).
635
+
636
+ Args:
637
+ table_names (list): A list of table names for which to retrieve foreign key information.
638
+
639
+ Returns:
640
+ Response: A response object containing the foreign key information.
641
+ """
642
+ query = f"""
643
+ SELECT
644
+ OBJECT_NAME(fk.referenced_object_id) as parent_table_name,
645
+ COL_NAME(fkc.referenced_object_id, fkc.referenced_column_id) as parent_column_name,
646
+ OBJECT_NAME(fk.parent_object_id) as child_table_name,
647
+ COL_NAME(fkc.parent_object_id, fkc.parent_column_id) as child_column_name,
648
+ fk.name as constraint_name
649
+ FROM {self.database}.sys.foreign_keys fk
650
+ INNER JOIN {self.database}.sys.foreign_key_columns fkc
651
+ ON fk.object_id = fkc.constraint_object_id
652
+ INNER JOIN {self.database}.sys.tables t
653
+ ON fk.parent_object_id = t.object_id
654
+ INNER JOIN {self.database}.sys.schemas s
655
+ ON t.schema_id = s.schema_id
656
+ WHERE s.name NOT IN ('sys', 'INFORMATION_SCHEMA')
657
+ """
658
+
659
+ if table_names is not None and len(table_names) > 0:
660
+ quoted_names = [f"'{t}'" for t in table_names]
661
+ query += f" AND OBJECT_NAME(fk.parent_object_id) IN ({','.join(quoted_names)})"
662
+
663
+ query += " ORDER BY child_table_name, constraint_name"
664
+
665
+ result = self.native_query(query)
666
+ return result
@@ -0,0 +1,3 @@
1
+ pymssql >= 2.1.4
2
+ pyodbc >= 5.2.0
3
+