MindsDB 25.9.3rc1__py3-none-any.whl → 25.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +1 -9
- mindsdb/api/a2a/__init__.py +1 -1
- mindsdb/api/a2a/agent.py +9 -1
- mindsdb/api/a2a/common/server/server.py +4 -0
- mindsdb/api/a2a/common/server/task_manager.py +8 -1
- mindsdb/api/a2a/common/types.py +66 -0
- mindsdb/api/a2a/task_manager.py +50 -0
- mindsdb/api/common/middleware.py +1 -1
- mindsdb/api/executor/command_executor.py +49 -36
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +2 -2
- mindsdb/api/executor/datahub/datanodes/system_tables.py +2 -1
- mindsdb/api/executor/planner/query_prepare.py +2 -20
- mindsdb/api/executor/utilities/sql.py +5 -4
- mindsdb/api/http/initialize.py +76 -60
- mindsdb/api/http/namespaces/agents.py +0 -3
- mindsdb/api/http/namespaces/chatbots.py +0 -5
- mindsdb/api/http/namespaces/file.py +2 -0
- mindsdb/api/http/namespaces/handlers.py +10 -5
- mindsdb/api/http/namespaces/knowledge_bases.py +20 -0
- mindsdb/api/http/namespaces/sql.py +2 -2
- mindsdb/api/http/start.py +2 -2
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +2 -10
- mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
- mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
- mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
- mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
- mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
- mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
- mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
- mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +2 -2
- mindsdb/integrations/handlers/shopify_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +57 -3
- mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
- mindsdb/integrations/libs/response.py +2 -2
- mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
- mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +24 -21
- mindsdb/interfaces/agents/agents_controller.py +0 -2
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +6 -7
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
- mindsdb/interfaces/database/data_handlers_cache.py +190 -0
- mindsdb/interfaces/database/database.py +3 -3
- mindsdb/interfaces/database/integrations.py +1 -121
- mindsdb/interfaces/database/projects.py +2 -6
- mindsdb/interfaces/database/views.py +1 -4
- mindsdb/interfaces/jobs/jobs_controller.py +0 -4
- mindsdb/interfaces/jobs/scheduler.py +0 -1
- mindsdb/interfaces/knowledge_base/controller.py +197 -108
- mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
- mindsdb/interfaces/knowledge_base/executor.py +11 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
- mindsdb/interfaces/model/model_controller.py +4 -4
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +4 -10
- mindsdb/interfaces/skills/skills_controller.py +1 -4
- mindsdb/interfaces/storage/db.py +16 -6
- mindsdb/interfaces/triggers/triggers_controller.py +1 -3
- mindsdb/utilities/config.py +19 -2
- mindsdb/utilities/exception.py +2 -2
- mindsdb/utilities/json_encoder.py +24 -10
- mindsdb/utilities/render/sqlalchemy_render.py +15 -14
- mindsdb/utilities/starters.py +0 -10
- {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0.dist-info}/METADATA +278 -264
- {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0.dist-info}/RECORD +72 -86
- mindsdb/api/postgres/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -182
- mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -265
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
- mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
- mindsdb/api/postgres/start.py +0 -11
- mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
- mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
- mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
- mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
- {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0.dist-info}/WHEEL +0 -0
- {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any
|
|
1
|
+
from typing import Any, Union, TYPE_CHECKING
|
|
2
2
|
import datetime
|
|
3
3
|
|
|
4
4
|
import pymssql
|
|
@@ -9,22 +9,24 @@ from pandas.api import types as pd_types
|
|
|
9
9
|
from mindsdb_sql_parser import parse_sql
|
|
10
10
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
11
11
|
|
|
12
|
-
from mindsdb.integrations.libs.base import
|
|
12
|
+
from mindsdb.integrations.libs.base import MetaDatabaseHandler
|
|
13
13
|
from mindsdb.utilities import log
|
|
14
14
|
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
15
15
|
from mindsdb.integrations.libs.response import (
|
|
16
16
|
HandlerStatusResponse as StatusResponse,
|
|
17
17
|
HandlerResponse as Response,
|
|
18
|
-
RESPONSE_TYPE
|
|
18
|
+
RESPONSE_TYPE,
|
|
19
19
|
)
|
|
20
20
|
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
|
|
21
21
|
|
|
22
|
+
if TYPE_CHECKING:
|
|
23
|
+
import pyodbc
|
|
22
24
|
|
|
23
25
|
logger = log.getLogger(__name__)
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
def _map_type(mssql_type_text: str) -> MYSQL_DATA_TYPE:
|
|
27
|
-
"""
|
|
29
|
+
"""Map MSSQL text types names to MySQL types as enum.
|
|
28
30
|
|
|
29
31
|
Args:
|
|
30
32
|
mssql_type_text (str): The name of the MSSQL type to map.
|
|
@@ -34,16 +36,16 @@ def _map_type(mssql_type_text: str) -> MYSQL_DATA_TYPE:
|
|
|
34
36
|
"""
|
|
35
37
|
internal_type_name = mssql_type_text.lower()
|
|
36
38
|
types_map = {
|
|
37
|
-
(
|
|
38
|
-
(
|
|
39
|
-
(
|
|
40
|
-
(
|
|
41
|
-
(
|
|
42
|
-
(
|
|
43
|
-
(
|
|
44
|
-
(
|
|
45
|
-
(
|
|
46
|
-
(
|
|
39
|
+
("tinyint", "smallint", "int", "bigint"): MYSQL_DATA_TYPE.INT,
|
|
40
|
+
("bit",): MYSQL_DATA_TYPE.BOOL,
|
|
41
|
+
("money", "smallmoney", "float", "real"): MYSQL_DATA_TYPE.FLOAT,
|
|
42
|
+
("decimal", "numeric"): MYSQL_DATA_TYPE.DECIMAL,
|
|
43
|
+
("date",): MYSQL_DATA_TYPE.DATE,
|
|
44
|
+
("time",): MYSQL_DATA_TYPE.TIME,
|
|
45
|
+
("datetime2", "datetimeoffset", "datetime", "smalldatetime"): MYSQL_DATA_TYPE.DATETIME,
|
|
46
|
+
("varchar", "nvarchar"): MYSQL_DATA_TYPE.VARCHAR,
|
|
47
|
+
("char", "text", "nchar", "ntext"): MYSQL_DATA_TYPE.TEXT,
|
|
48
|
+
("binary", "varbinary", "image"): MYSQL_DATA_TYPE.BINARY,
|
|
47
49
|
}
|
|
48
50
|
|
|
49
51
|
for db_types_list, mysql_data_type in types_map.items():
|
|
@@ -54,86 +56,123 @@ def _map_type(mssql_type_text: str) -> MYSQL_DATA_TYPE:
|
|
|
54
56
|
return MYSQL_DATA_TYPE.VARCHAR
|
|
55
57
|
|
|
56
58
|
|
|
57
|
-
def _make_table_response(
|
|
59
|
+
def _make_table_response(
|
|
60
|
+
result: list[Union[dict[str, Any], tuple]], cursor: Union[pymssql.Cursor, "pyodbc.Cursor"], use_odbc: bool = False
|
|
61
|
+
) -> Response:
|
|
58
62
|
"""Build response from result and cursor.
|
|
59
63
|
|
|
60
64
|
Args:
|
|
61
|
-
result (list[dict[str, Any]]): result of the query.
|
|
62
|
-
cursor (pymssql.Cursor): cursor object.
|
|
65
|
+
result (list[Union[dict[str, Any], tuple]]): result of the query.
|
|
66
|
+
cursor (Union[pymssql.Cursor, pyodbc.Cursor]): cursor object.
|
|
67
|
+
use_odbc (bool): whether ODBC connection is being used.
|
|
63
68
|
|
|
64
69
|
Returns:
|
|
65
70
|
Response: response object.
|
|
66
71
|
"""
|
|
67
72
|
description: list[tuple[Any]] = cursor.description
|
|
68
73
|
mysql_types: list[MYSQL_DATA_TYPE] = []
|
|
74
|
+
columns = [x[0] for x in cursor.description]
|
|
69
75
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
76
|
+
if not result:
|
|
77
|
+
data_frame = pd.DataFrame(columns=columns)
|
|
78
|
+
elif use_odbc:
|
|
79
|
+
# For pyodbc with large datasets, convert Row objects efficiently
|
|
80
|
+
# Using iterator with pd.DataFrame avoids intermediate list creation
|
|
81
|
+
try:
|
|
82
|
+
data_frame = pd.DataFrame(result, columns=columns)
|
|
83
|
+
except (ValueError, TypeError):
|
|
84
|
+
# Fallback: convert Row objects to tuples
|
|
85
|
+
data_frame = pd.DataFrame.from_records((tuple(row) for row in result), columns=columns)
|
|
86
|
+
else:
|
|
87
|
+
data_frame = pd.DataFrame(result, columns=columns)
|
|
74
88
|
|
|
75
89
|
for column in description:
|
|
76
90
|
column_name = column[0]
|
|
77
91
|
column_type = column[1]
|
|
78
92
|
column_dtype = data_frame[column_name].dtype
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
mysql_types.append(MYSQL_DATA_TYPE.DOUBLE)
|
|
90
|
-
case pymssql.DECIMAL:
|
|
91
|
-
mysql_types.append(MYSQL_DATA_TYPE.DECIMAL)
|
|
92
|
-
case pymssql.STRING:
|
|
93
|
-
mysql_types.append(MYSQL_DATA_TYPE.TEXT)
|
|
94
|
-
case pymssql.DATETIME:
|
|
93
|
+
|
|
94
|
+
if use_odbc:
|
|
95
|
+
# For pyodbc, use type inference based on pandas dtype
|
|
96
|
+
if pd_types.is_integer_dtype(column_dtype):
|
|
97
|
+
mysql_types.append(MYSQL_DATA_TYPE.INT)
|
|
98
|
+
elif pd_types.is_float_dtype(column_dtype):
|
|
99
|
+
mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
|
|
100
|
+
elif pd_types.is_bool_dtype(column_dtype):
|
|
101
|
+
mysql_types.append(MYSQL_DATA_TYPE.TINYINT)
|
|
102
|
+
elif pd_types.is_datetime64_any_dtype(column_dtype):
|
|
95
103
|
mysql_types.append(MYSQL_DATA_TYPE.DATETIME)
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
# pymssql return datetimes as 'binary' type
|
|
101
|
-
# if timezone is present, then it is datetime.timezone
|
|
102
|
-
series = data_frame[column_name]
|
|
103
|
-
if (
|
|
104
|
-
series.dt.tz is not None
|
|
105
|
-
and isinstance(series.dt.tz, datetime.timezone)
|
|
106
|
-
and series.dt.tz != datetime.timezone.utc
|
|
107
|
-
):
|
|
108
|
-
series = series.dt.tz_convert('UTC')
|
|
109
|
-
data_frame[column_name] = series.dt.tz_localize(None)
|
|
104
|
+
elif pd_types.is_object_dtype(column_dtype):
|
|
105
|
+
if len(data_frame) > 0 and isinstance(
|
|
106
|
+
data_frame[column_name].iloc[0], (datetime.datetime, datetime.date, datetime.time)
|
|
107
|
+
):
|
|
110
108
|
mysql_types.append(MYSQL_DATA_TYPE.DATETIME)
|
|
111
109
|
else:
|
|
112
|
-
mysql_types.append(MYSQL_DATA_TYPE.
|
|
113
|
-
|
|
114
|
-
logger.warning(f"Unknown type: {column_type}, use TEXT as fallback.")
|
|
110
|
+
mysql_types.append(MYSQL_DATA_TYPE.TEXT)
|
|
111
|
+
else:
|
|
115
112
|
mysql_types.append(MYSQL_DATA_TYPE.TEXT)
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
113
|
+
else:
|
|
114
|
+
match column_type:
|
|
115
|
+
case pymssql.NUMBER:
|
|
116
|
+
if pd_types.is_integer_dtype(column_dtype):
|
|
117
|
+
mysql_types.append(MYSQL_DATA_TYPE.INT)
|
|
118
|
+
elif pd_types.is_float_dtype(column_dtype):
|
|
119
|
+
mysql_types.append(MYSQL_DATA_TYPE.FLOAT)
|
|
120
|
+
elif pd_types.is_bool_dtype(column_dtype):
|
|
121
|
+
mysql_types.append(MYSQL_DATA_TYPE.TINYINT)
|
|
122
|
+
else:
|
|
123
|
+
mysql_types.append(MYSQL_DATA_TYPE.DOUBLE)
|
|
124
|
+
case pymssql.DECIMAL:
|
|
125
|
+
mysql_types.append(MYSQL_DATA_TYPE.DECIMAL)
|
|
126
|
+
case pymssql.STRING:
|
|
127
|
+
mysql_types.append(MYSQL_DATA_TYPE.TEXT)
|
|
128
|
+
case pymssql.DATETIME:
|
|
129
|
+
mysql_types.append(MYSQL_DATA_TYPE.DATETIME)
|
|
130
|
+
case pymssql.BINARY:
|
|
131
|
+
# DATE and TIME types returned as 'BINARY' type, and dataframe type is 'object', so it is not possible
|
|
132
|
+
# to infer correct mysql type for them
|
|
133
|
+
if pd_types.is_datetime64_any_dtype(column_dtype):
|
|
134
|
+
# pymssql return datetimes as 'binary' type
|
|
135
|
+
# if timezone is present, then it is datetime.timezone
|
|
136
|
+
series = data_frame[column_name]
|
|
137
|
+
if (
|
|
138
|
+
series.dt.tz is not None
|
|
139
|
+
and isinstance(series.dt.tz, datetime.timezone)
|
|
140
|
+
and series.dt.tz != datetime.timezone.utc
|
|
141
|
+
):
|
|
142
|
+
series = series.dt.tz_convert("UTC")
|
|
143
|
+
data_frame[column_name] = series.dt.tz_localize(None)
|
|
144
|
+
mysql_types.append(MYSQL_DATA_TYPE.DATETIME)
|
|
145
|
+
else:
|
|
146
|
+
mysql_types.append(MYSQL_DATA_TYPE.BINARY)
|
|
147
|
+
case _:
|
|
148
|
+
logger.warning(f"Unknown type: {column_type}, use TEXT as fallback.")
|
|
149
|
+
mysql_types.append(MYSQL_DATA_TYPE.TEXT)
|
|
150
|
+
|
|
151
|
+
return Response(RESPONSE_TYPE.TABLE, data_frame=data_frame, mysql_types=mysql_types)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
class SqlServerHandler(MetaDatabaseHandler):
|
|
125
155
|
"""
|
|
126
156
|
This handler handles connection and execution of the Microsoft SQL Server statements.
|
|
157
|
+
Supports both native pymssql connections and ODBC connections via pyodbc.
|
|
158
|
+
|
|
159
|
+
To use ODBC connection, specify either:
|
|
160
|
+
- 'use_odbc': True in connection parameters, or
|
|
161
|
+
- 'driver': '<ODBC driver name>' in connection parameters
|
|
127
162
|
"""
|
|
128
|
-
|
|
163
|
+
|
|
164
|
+
name = "mssql"
|
|
129
165
|
|
|
130
166
|
def __init__(self, name, **kwargs):
|
|
131
167
|
super().__init__(name)
|
|
132
168
|
self.parser = parse_sql
|
|
133
|
-
self.connection_args = kwargs.get(
|
|
134
|
-
self.dialect =
|
|
135
|
-
self.database = self.connection_args.get(
|
|
136
|
-
self.renderer = SqlalchemyRender(
|
|
169
|
+
self.connection_args = kwargs.get("connection_data")
|
|
170
|
+
self.dialect = "mssql"
|
|
171
|
+
self.database = self.connection_args.get("database")
|
|
172
|
+
self.renderer = SqlalchemyRender("mssql")
|
|
173
|
+
|
|
174
|
+
# Determine if ODBC should be used
|
|
175
|
+
self.use_odbc = self.connection_args.get("use_odbc", False) or "driver" in self.connection_args
|
|
137
176
|
|
|
138
177
|
self.connection = None
|
|
139
178
|
self.is_connected = False
|
|
@@ -145,41 +184,113 @@ class SqlServerHandler(DatabaseHandler):
|
|
|
145
184
|
def connect(self):
|
|
146
185
|
"""
|
|
147
186
|
Establishes a connection to a Microsoft SQL Server database.
|
|
187
|
+
Uses either pymssql (native) or pyodbc based on configuration.
|
|
148
188
|
|
|
149
189
|
Raises:
|
|
150
|
-
pymssql._mssql.OperationalError: If an error occurs while connecting to the
|
|
190
|
+
pymssql._mssql.OperationalError or pyodbc.Error: If an error occurs while connecting to the database.
|
|
151
191
|
|
|
152
192
|
Returns:
|
|
153
|
-
pymssql.Connection: A connection object to the Microsoft SQL Server database.
|
|
193
|
+
Union[pymssql.Connection, pyodbc.Connection]: A connection object to the Microsoft SQL Server database.
|
|
154
194
|
"""
|
|
155
195
|
|
|
156
196
|
if self.is_connected is True:
|
|
157
197
|
return self.connection
|
|
158
198
|
|
|
199
|
+
if self.use_odbc:
|
|
200
|
+
return self._connect_odbc()
|
|
201
|
+
else:
|
|
202
|
+
return self._connect_pymssql()
|
|
203
|
+
|
|
204
|
+
def _connect_pymssql(self):
|
|
205
|
+
"""Connect using pymssql (native FreeTDS-based connection)."""
|
|
159
206
|
# Mandatory connection parameters
|
|
160
|
-
if not all(key in self.connection_args for key in [
|
|
161
|
-
raise ValueError(
|
|
207
|
+
if not all(key in self.connection_args for key in ["host", "user", "password", "database"]):
|
|
208
|
+
raise ValueError("Required parameters (host, user, password, database) must be provided.")
|
|
162
209
|
|
|
163
210
|
config = {
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
211
|
+
"host": self.connection_args.get("host"),
|
|
212
|
+
"user": self.connection_args.get("user"),
|
|
213
|
+
"password": self.connection_args.get("password"),
|
|
214
|
+
"database": self.connection_args.get("database"),
|
|
168
215
|
}
|
|
169
216
|
|
|
170
217
|
# Optional connection parameters
|
|
171
|
-
if
|
|
172
|
-
config[
|
|
218
|
+
if "port" in self.connection_args:
|
|
219
|
+
config["port"] = self.connection_args.get("port")
|
|
173
220
|
|
|
174
|
-
if
|
|
175
|
-
config[
|
|
221
|
+
if "server" in self.connection_args:
|
|
222
|
+
config["server"] = self.connection_args.get("server")
|
|
176
223
|
|
|
177
224
|
try:
|
|
178
225
|
self.connection = pymssql.connect(**config)
|
|
179
226
|
self.is_connected = True
|
|
180
227
|
return self.connection
|
|
181
228
|
except OperationalError as e:
|
|
182
|
-
logger.error(f
|
|
229
|
+
logger.error(f"Error connecting to Microsoft SQL Server {self.database}, {e}!")
|
|
230
|
+
self.is_connected = False
|
|
231
|
+
raise
|
|
232
|
+
|
|
233
|
+
def _connect_odbc(self):
|
|
234
|
+
"""Connect using pyodbc (ODBC connection)."""
|
|
235
|
+
try:
|
|
236
|
+
import pyodbc
|
|
237
|
+
except ImportError as e:
|
|
238
|
+
raise ImportError(
|
|
239
|
+
"pyodbc is not installed. Install it with 'pip install pyodbc' or "
|
|
240
|
+
"'pip install mindsdb[mssql-odbc]' to use ODBC connections."
|
|
241
|
+
) from e
|
|
242
|
+
|
|
243
|
+
# Mandatory connection parameters
|
|
244
|
+
if not all(key in self.connection_args for key in ["host", "user", "password", "database"]):
|
|
245
|
+
raise ValueError("Required parameters (host, user, password, database) must be provided.")
|
|
246
|
+
|
|
247
|
+
driver = self.connection_args.get("driver", "ODBC Driver 17 for SQL Server")
|
|
248
|
+
host = self.connection_args.get("host")
|
|
249
|
+
port = self.connection_args.get("port", 1433)
|
|
250
|
+
database = self.connection_args.get("database")
|
|
251
|
+
user = self.connection_args.get("user")
|
|
252
|
+
password = self.connection_args.get("password")
|
|
253
|
+
|
|
254
|
+
conn_str_parts = [
|
|
255
|
+
f"DRIVER={{{driver}}}",
|
|
256
|
+
f"SERVER={host},{port}",
|
|
257
|
+
f"DATABASE={database}",
|
|
258
|
+
f"UID={user}",
|
|
259
|
+
f"PWD={password}",
|
|
260
|
+
]
|
|
261
|
+
|
|
262
|
+
# Add optional parameters
|
|
263
|
+
if "encrypt" in self.connection_args:
|
|
264
|
+
conn_str_parts.append(f"Encrypt={self.connection_args.get('encrypt', 'yes')}")
|
|
265
|
+
if "trust_server_certificate" in self.connection_args:
|
|
266
|
+
conn_str_parts.append(
|
|
267
|
+
f"TrustServerCertificate={self.connection_args.get('trust_server_certificate', 'yes')}"
|
|
268
|
+
)
|
|
269
|
+
|
|
270
|
+
if "connection_string_args" in self.connection_args:
|
|
271
|
+
conn_str_parts.append(self.connection_args["connection_string_args"])
|
|
272
|
+
|
|
273
|
+
conn_str = ";".join(conn_str_parts)
|
|
274
|
+
|
|
275
|
+
try:
|
|
276
|
+
self.connection = pyodbc.connect(conn_str, timeout=10)
|
|
277
|
+
self.is_connected = True
|
|
278
|
+
return self.connection
|
|
279
|
+
except pyodbc.Error as e:
|
|
280
|
+
logger.error(f"Error connecting to Microsoft SQL Server {self.database} via ODBC, {e}!")
|
|
281
|
+
self.is_connected = False
|
|
282
|
+
|
|
283
|
+
# Check if it's a driver not found error
|
|
284
|
+
error_msg = str(e)
|
|
285
|
+
if "Driver" in error_msg and ("not found" in error_msg or "specified" in error_msg):
|
|
286
|
+
raise ConnectionError(
|
|
287
|
+
f"ODBC Driver not found: {driver}. "
|
|
288
|
+
f"Please install the Microsoft ODBC Driver for SQL Server. "
|
|
289
|
+
f"Error: {e}"
|
|
290
|
+
) from e
|
|
291
|
+
raise
|
|
292
|
+
except Exception as e:
|
|
293
|
+
logger.error(f"Error connecting to Microsoft SQL Server {self.database} via ODBC, {e}!")
|
|
183
294
|
self.is_connected = False
|
|
184
295
|
raise
|
|
185
296
|
|
|
@@ -208,10 +319,10 @@ class SqlServerHandler(DatabaseHandler):
|
|
|
208
319
|
connection = self.connect()
|
|
209
320
|
with connection.cursor() as cur:
|
|
210
321
|
# Execute a simple query to test the connection
|
|
211
|
-
cur.execute(
|
|
322
|
+
cur.execute("select 1;")
|
|
212
323
|
response.success = True
|
|
213
324
|
except OperationalError as e:
|
|
214
|
-
logger.error(f
|
|
325
|
+
logger.error(f"Error connecting to Microsoft SQL Server {self.database}, {e}!")
|
|
215
326
|
response.error_message = str(e)
|
|
216
327
|
|
|
217
328
|
if response.success and need_to_close:
|
|
@@ -235,23 +346,35 @@ class SqlServerHandler(DatabaseHandler):
|
|
|
235
346
|
need_to_close = self.is_connected is False
|
|
236
347
|
|
|
237
348
|
connection = self.connect()
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
error_code=0,
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
349
|
+
|
|
350
|
+
if self.use_odbc:
|
|
351
|
+
with connection.cursor() as cur:
|
|
352
|
+
try:
|
|
353
|
+
cur.execute(query)
|
|
354
|
+
if cur.description:
|
|
355
|
+
result = cur.fetchall()
|
|
356
|
+
response = _make_table_response(result, cur, use_odbc=True)
|
|
357
|
+
else:
|
|
358
|
+
response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount)
|
|
359
|
+
connection.commit()
|
|
360
|
+
except Exception as e:
|
|
361
|
+
logger.exception(f"Error running query: {query} on {self.database}, {e}!")
|
|
362
|
+
response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e))
|
|
363
|
+
connection.rollback()
|
|
364
|
+
else:
|
|
365
|
+
with connection.cursor(as_dict=True) as cur:
|
|
366
|
+
try:
|
|
367
|
+
cur.execute(query)
|
|
368
|
+
if cur.description:
|
|
369
|
+
result = cur.fetchall()
|
|
370
|
+
response = _make_table_response(result, cur, use_odbc=False)
|
|
371
|
+
else:
|
|
372
|
+
response = Response(RESPONSE_TYPE.OK, affected_rows=cur.rowcount)
|
|
373
|
+
connection.commit()
|
|
374
|
+
except Exception as e:
|
|
375
|
+
logger.exception(f"Error running query: {query} on {self.database}, {e}!")
|
|
376
|
+
response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e))
|
|
377
|
+
connection.rollback()
|
|
255
378
|
|
|
256
379
|
if need_to_close is True:
|
|
257
380
|
self.disconnect()
|
|
@@ -326,3 +449,218 @@ class SqlServerHandler(DatabaseHandler):
|
|
|
326
449
|
result = self.native_query(query)
|
|
327
450
|
result.to_columns_table_response(map_type_fn=_map_type)
|
|
328
451
|
return result
|
|
452
|
+
|
|
453
|
+
def meta_get_tables(self, table_names: list[str] | None = None) -> Response:
|
|
454
|
+
"""
|
|
455
|
+
Retrieves metadata information about the tables in the Microsoft SQL Server database
|
|
456
|
+
to be stored in the data catalog.
|
|
457
|
+
|
|
458
|
+
Args:
|
|
459
|
+
table_names (list): A list of table names for which to retrieve metadata information.
|
|
460
|
+
|
|
461
|
+
Returns:
|
|
462
|
+
Response: A response object containing the metadata information, formatted as per the `Response` class.
|
|
463
|
+
"""
|
|
464
|
+
query = f"""
|
|
465
|
+
SELECT
|
|
466
|
+
t.TABLE_NAME as table_name,
|
|
467
|
+
t.TABLE_SCHEMA as table_schema,
|
|
468
|
+
t.TABLE_TYPE as table_type,
|
|
469
|
+
CAST(ep.value AS NVARCHAR(MAX)) as table_description,
|
|
470
|
+
SUM(p.rows) as row_count
|
|
471
|
+
FROM {self.database}.INFORMATION_SCHEMA.TABLES t
|
|
472
|
+
LEFT JOIN {self.database}.sys.tables st
|
|
473
|
+
ON t.TABLE_NAME = st.name
|
|
474
|
+
LEFT JOIN {self.database}.sys.schemas s
|
|
475
|
+
ON st.schema_id = s.schema_id AND t.TABLE_SCHEMA = s.name
|
|
476
|
+
LEFT JOIN {self.database}.sys.extended_properties ep
|
|
477
|
+
ON st.object_id = ep.major_id
|
|
478
|
+
AND ep.minor_id = 0
|
|
479
|
+
AND ep.class = 1
|
|
480
|
+
AND ep.name = 'MS_Description'
|
|
481
|
+
LEFT JOIN {self.database}.sys.partitions p
|
|
482
|
+
ON st.object_id = p.object_id
|
|
483
|
+
AND p.index_id IN (0, 1)
|
|
484
|
+
WHERE t.TABLE_TYPE IN ('BASE TABLE', 'VIEW')
|
|
485
|
+
AND t.TABLE_SCHEMA NOT IN ('sys', 'INFORMATION_SCHEMA')
|
|
486
|
+
GROUP BY t.TABLE_NAME, t.TABLE_SCHEMA, t.TABLE_TYPE, ep.value
|
|
487
|
+
"""
|
|
488
|
+
|
|
489
|
+
if table_names is not None and len(table_names) > 0:
|
|
490
|
+
quoted_names = [f"'{t}'" for t in table_names]
|
|
491
|
+
query += f" HAVING t.TABLE_NAME IN ({','.join(quoted_names)})"
|
|
492
|
+
|
|
493
|
+
result = self.native_query(query)
|
|
494
|
+
return result
|
|
495
|
+
|
|
496
|
+
def meta_get_columns(self, table_names: list[str] | None = None) -> Response:
|
|
497
|
+
"""
|
|
498
|
+
Retrieves column metadata for the specified tables (or all tables if no list is provided).
|
|
499
|
+
|
|
500
|
+
Args:
|
|
501
|
+
table_names (list): A list of table names for which to retrieve column metadata.
|
|
502
|
+
|
|
503
|
+
Returns:
|
|
504
|
+
Response: A response object containing the column metadata.
|
|
505
|
+
"""
|
|
506
|
+
query = f"""
|
|
507
|
+
SELECT
|
|
508
|
+
c.TABLE_NAME as table_name,
|
|
509
|
+
c.COLUMN_NAME as column_name,
|
|
510
|
+
c.DATA_TYPE as data_type,
|
|
511
|
+
CAST(ep.value AS NVARCHAR(MAX)) as column_description,
|
|
512
|
+
c.COLUMN_DEFAULT as column_default,
|
|
513
|
+
CASE WHEN c.IS_NULLABLE = 'YES' THEN 1 ELSE 0 END as is_nullable
|
|
514
|
+
FROM {self.database}.INFORMATION_SCHEMA.COLUMNS c
|
|
515
|
+
LEFT JOIN {self.database}.sys.tables st
|
|
516
|
+
ON c.TABLE_NAME = st.name
|
|
517
|
+
LEFT JOIN {self.database}.sys.schemas s
|
|
518
|
+
ON st.schema_id = s.schema_id AND c.TABLE_SCHEMA = s.name
|
|
519
|
+
LEFT JOIN {self.database}.sys.columns sc
|
|
520
|
+
ON st.object_id = sc.object_id AND c.COLUMN_NAME = sc.name
|
|
521
|
+
LEFT JOIN {self.database}.sys.extended_properties ep
|
|
522
|
+
ON st.object_id = ep.major_id
|
|
523
|
+
AND sc.column_id = ep.minor_id
|
|
524
|
+
AND ep.name = 'MS_Description'
|
|
525
|
+
WHERE c.TABLE_SCHEMA NOT IN ('sys', 'INFORMATION_SCHEMA')
|
|
526
|
+
"""
|
|
527
|
+
|
|
528
|
+
if table_names is not None and len(table_names) > 0:
|
|
529
|
+
quoted_names = [f"'{t}'" for t in table_names]
|
|
530
|
+
query += f" AND c.TABLE_NAME IN ({','.join(quoted_names)})"
|
|
531
|
+
|
|
532
|
+
result = self.native_query(query)
|
|
533
|
+
return result
|
|
534
|
+
|
|
535
|
+
def meta_get_column_statistics(self, table_names: list[str] | None = None) -> Response:
|
|
536
|
+
"""
|
|
537
|
+
Retrieves column statistics (e.g., null percentage, distinct value count, min/max values)
|
|
538
|
+
for the specified tables or all tables if no list is provided.
|
|
539
|
+
|
|
540
|
+
Note: Uses SQL Server's sys.dm_db_stats_properties and sys.dm_db_stats_histogram
|
|
541
|
+
(similar to PostgreSQL's pg_stats). Statistics are only available for columns that
|
|
542
|
+
have statistics objects created by SQL Server (typically indexed columns or columns
|
|
543
|
+
used in queries after AUTO_CREATE_STATISTICS).
|
|
544
|
+
|
|
545
|
+
Args:
|
|
546
|
+
table_names (list): A list of table names for which to retrieve column statistics.
|
|
547
|
+
|
|
548
|
+
Returns:
|
|
549
|
+
Response: A response object containing the column statistics.
|
|
550
|
+
"""
|
|
551
|
+
table_filter = ""
|
|
552
|
+
if table_names is not None and len(table_names) > 0:
|
|
553
|
+
quoted_names = [f"'{t}'" for t in table_names]
|
|
554
|
+
table_filter = f" AND t.name IN ({','.join(quoted_names)})"
|
|
555
|
+
|
|
556
|
+
# Using OUTER APPLY to handle table-valued functions properly
|
|
557
|
+
# This is equivalent to PostgreSQL's pg_stats view approach
|
|
558
|
+
# Includes all statistics: auto-created, user-created, and index-based
|
|
559
|
+
# dm_db_stats_histogram columns: range_high_key, range_rows, equal_rows,
|
|
560
|
+
# distinct_range_rows, average_range_rows
|
|
561
|
+
query = f"""
|
|
562
|
+
SELECT DISTINCT
|
|
563
|
+
t.name AS TABLE_NAME,
|
|
564
|
+
c.name AS COLUMN_NAME,
|
|
565
|
+
CAST(NULL AS DECIMAL(10,2)) AS NULL_PERCENTAGE,
|
|
566
|
+
CAST(h.distinct_count AS BIGINT) AS DISTINCT_VALUES_COUNT,
|
|
567
|
+
NULL AS MOST_COMMON_VALUES,
|
|
568
|
+
NULL AS MOST_COMMON_FREQUENCIES,
|
|
569
|
+
CAST(h.min_value AS NVARCHAR(MAX)) AS MINIMUM_VALUE,
|
|
570
|
+
CAST(h.max_value AS NVARCHAR(MAX)) AS MAXIMUM_VALUE
|
|
571
|
+
FROM {self.database}.sys.tables t
|
|
572
|
+
INNER JOIN {self.database}.sys.schemas s
|
|
573
|
+
ON t.schema_id = s.schema_id
|
|
574
|
+
INNER JOIN {self.database}.sys.columns c
|
|
575
|
+
ON t.object_id = c.object_id
|
|
576
|
+
LEFT JOIN {self.database}.sys.stats st
|
|
577
|
+
ON st.object_id = t.object_id
|
|
578
|
+
LEFT JOIN {self.database}.sys.stats_columns sc
|
|
579
|
+
ON sc.object_id = st.object_id
|
|
580
|
+
AND sc.stats_id = st.stats_id
|
|
581
|
+
AND sc.column_id = c.column_id
|
|
582
|
+
AND sc.stats_column_id = 1 -- Only leading column in multi-column stats
|
|
583
|
+
OUTER APPLY (
|
|
584
|
+
SELECT
|
|
585
|
+
MIN(CAST(range_high_key AS NVARCHAR(MAX))) AS min_value,
|
|
586
|
+
MAX(CAST(range_high_key AS NVARCHAR(MAX))) AS max_value,
|
|
587
|
+
SUM(CAST(distinct_range_rows AS BIGINT)) + COUNT(*) AS distinct_count
|
|
588
|
+
FROM {self.database}.sys.dm_db_stats_histogram(st.object_id, st.stats_id)
|
|
589
|
+
WHERE st.object_id IS NOT NULL
|
|
590
|
+
) h
|
|
591
|
+
WHERE s.name NOT IN ('sys', 'INFORMATION_SCHEMA')
|
|
592
|
+
{table_filter}
|
|
593
|
+
ORDER BY t.name, c.name
|
|
594
|
+
"""
|
|
595
|
+
|
|
596
|
+
result = self.native_query(query)
|
|
597
|
+
return result
|
|
598
|
+
|
|
599
|
+
def meta_get_primary_keys(self, table_names: list[str] | None = None) -> Response:
|
|
600
|
+
"""
|
|
601
|
+
Retrieves primary key information for the specified tables (or all tables if no list is provided).
|
|
602
|
+
|
|
603
|
+
Args:
|
|
604
|
+
table_names (list): A list of table names for which to retrieve primary key information.
|
|
605
|
+
|
|
606
|
+
Returns:
|
|
607
|
+
Response: A response object containing the primary key information.
|
|
608
|
+
"""
|
|
609
|
+
query = f"""
|
|
610
|
+
SELECT
|
|
611
|
+
tc.TABLE_NAME as table_name,
|
|
612
|
+
kcu.COLUMN_NAME as column_name,
|
|
613
|
+
kcu.ORDINAL_POSITION as ordinal_position,
|
|
614
|
+
tc.CONSTRAINT_NAME as constraint_name
|
|
615
|
+
FROM {self.database}.INFORMATION_SCHEMA.TABLE_CONSTRAINTS tc
|
|
616
|
+
INNER JOIN {self.database}.INFORMATION_SCHEMA.KEY_COLUMN_USAGE kcu
|
|
617
|
+
ON tc.CONSTRAINT_NAME = kcu.CONSTRAINT_NAME
|
|
618
|
+
AND tc.TABLE_SCHEMA = kcu.TABLE_SCHEMA
|
|
619
|
+
AND tc.TABLE_NAME = kcu.TABLE_NAME
|
|
620
|
+
WHERE tc.CONSTRAINT_TYPE = 'PRIMARY KEY'
|
|
621
|
+
"""
|
|
622
|
+
|
|
623
|
+
if table_names is not None and len(table_names) > 0:
|
|
624
|
+
quoted_names = [f"'{t}'" for t in table_names]
|
|
625
|
+
query += f" AND tc.TABLE_NAME IN ({','.join(quoted_names)})"
|
|
626
|
+
|
|
627
|
+
query += " ORDER BY tc.TABLE_NAME, kcu.ORDINAL_POSITION"
|
|
628
|
+
|
|
629
|
+
result = self.native_query(query)
|
|
630
|
+
return result
|
|
631
|
+
|
|
632
|
+
def meta_get_foreign_keys(self, table_names: list[str] | None = None) -> Response:
|
|
633
|
+
"""
|
|
634
|
+
Retrieves foreign key information for the specified tables (or all tables if no list is provided).
|
|
635
|
+
|
|
636
|
+
Args:
|
|
637
|
+
table_names (list): A list of table names for which to retrieve foreign key information.
|
|
638
|
+
|
|
639
|
+
Returns:
|
|
640
|
+
Response: A response object containing the foreign key information.
|
|
641
|
+
"""
|
|
642
|
+
query = f"""
|
|
643
|
+
SELECT
|
|
644
|
+
OBJECT_NAME(fk.referenced_object_id) as parent_table_name,
|
|
645
|
+
COL_NAME(fkc.referenced_object_id, fkc.referenced_column_id) as parent_column_name,
|
|
646
|
+
OBJECT_NAME(fk.parent_object_id) as child_table_name,
|
|
647
|
+
COL_NAME(fkc.parent_object_id, fkc.parent_column_id) as child_column_name,
|
|
648
|
+
fk.name as constraint_name
|
|
649
|
+
FROM {self.database}.sys.foreign_keys fk
|
|
650
|
+
INNER JOIN {self.database}.sys.foreign_key_columns fkc
|
|
651
|
+
ON fk.object_id = fkc.constraint_object_id
|
|
652
|
+
INNER JOIN {self.database}.sys.tables t
|
|
653
|
+
ON fk.parent_object_id = t.object_id
|
|
654
|
+
INNER JOIN {self.database}.sys.schemas s
|
|
655
|
+
ON t.schema_id = s.schema_id
|
|
656
|
+
WHERE s.name NOT IN ('sys', 'INFORMATION_SCHEMA')
|
|
657
|
+
"""
|
|
658
|
+
|
|
659
|
+
if table_names is not None and len(table_names) > 0:
|
|
660
|
+
quoted_names = [f"'{t}'" for t in table_names]
|
|
661
|
+
query += f" AND OBJECT_NAME(fk.parent_object_id) IN ({','.join(quoted_names)})"
|
|
662
|
+
|
|
663
|
+
query += " ORDER BY child_table_name, constraint_name"
|
|
664
|
+
|
|
665
|
+
result = self.native_query(query)
|
|
666
|
+
return result
|