MindsDB 25.9.3rc1__py3-none-any.whl → 25.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (88) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +1 -9
  3. mindsdb/api/a2a/__init__.py +1 -1
  4. mindsdb/api/a2a/agent.py +9 -1
  5. mindsdb/api/a2a/common/server/server.py +4 -0
  6. mindsdb/api/a2a/common/server/task_manager.py +8 -1
  7. mindsdb/api/a2a/common/types.py +66 -0
  8. mindsdb/api/a2a/task_manager.py +50 -0
  9. mindsdb/api/common/middleware.py +1 -1
  10. mindsdb/api/executor/command_executor.py +49 -36
  11. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
  12. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +2 -2
  13. mindsdb/api/executor/datahub/datanodes/system_tables.py +2 -1
  14. mindsdb/api/executor/planner/query_prepare.py +2 -20
  15. mindsdb/api/executor/utilities/sql.py +5 -4
  16. mindsdb/api/http/initialize.py +76 -60
  17. mindsdb/api/http/namespaces/agents.py +0 -3
  18. mindsdb/api/http/namespaces/chatbots.py +0 -5
  19. mindsdb/api/http/namespaces/file.py +2 -0
  20. mindsdb/api/http/namespaces/handlers.py +10 -5
  21. mindsdb/api/http/namespaces/knowledge_bases.py +20 -0
  22. mindsdb/api/http/namespaces/sql.py +2 -2
  23. mindsdb/api/http/start.py +2 -2
  24. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
  25. mindsdb/integrations/handlers/byom_handler/byom_handler.py +2 -10
  26. mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
  27. mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
  28. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
  29. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
  30. mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
  31. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
  32. mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
  33. mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
  34. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
  35. mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
  36. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +2 -2
  37. mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
  38. mindsdb/integrations/libs/response.py +2 -2
  39. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
  40. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
  41. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +24 -21
  42. mindsdb/interfaces/agents/agents_controller.py +0 -2
  43. mindsdb/interfaces/data_catalog/data_catalog_loader.py +6 -7
  44. mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
  45. mindsdb/interfaces/database/data_handlers_cache.py +190 -0
  46. mindsdb/interfaces/database/database.py +3 -3
  47. mindsdb/interfaces/database/integrations.py +1 -121
  48. mindsdb/interfaces/database/projects.py +2 -6
  49. mindsdb/interfaces/database/views.py +1 -4
  50. mindsdb/interfaces/jobs/jobs_controller.py +0 -4
  51. mindsdb/interfaces/jobs/scheduler.py +0 -1
  52. mindsdb/interfaces/knowledge_base/controller.py +197 -108
  53. mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
  54. mindsdb/interfaces/knowledge_base/executor.py +11 -0
  55. mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
  56. mindsdb/interfaces/model/model_controller.py +4 -4
  57. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +4 -10
  58. mindsdb/interfaces/skills/skills_controller.py +1 -4
  59. mindsdb/interfaces/storage/db.py +16 -6
  60. mindsdb/interfaces/triggers/triggers_controller.py +1 -3
  61. mindsdb/utilities/config.py +19 -2
  62. mindsdb/utilities/exception.py +2 -2
  63. mindsdb/utilities/json_encoder.py +24 -10
  64. mindsdb/utilities/render/sqlalchemy_render.py +15 -14
  65. mindsdb/utilities/starters.py +0 -10
  66. {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0rc1.dist-info}/METADATA +276 -264
  67. {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0rc1.dist-info}/RECORD +70 -84
  68. mindsdb/api/postgres/__init__.py +0 -0
  69. mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
  70. mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
  71. mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -182
  72. mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
  73. mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
  74. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
  75. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
  76. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
  77. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
  78. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -265
  79. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
  80. mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
  81. mindsdb/api/postgres/start.py +0 -11
  82. mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
  83. mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
  84. mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
  85. mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
  86. {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0rc1.dist-info}/WHEEL +0 -0
  87. {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0rc1.dist-info}/licenses/LICENSE +0 -0
  88. {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0rc1.dist-info}/top_level.txt +0 -0
@@ -8,8 +8,7 @@ from http import HTTPStatus
8
8
 
9
9
 
10
10
  import requests
11
- from flask import Flask, url_for, make_response, request, send_from_directory
12
- from flask.json import dumps
11
+ from flask import Flask, url_for, request, send_from_directory
13
12
  from flask_compress import Compress
14
13
  from flask_restx import Api
15
14
  from werkzeug.exceptions import HTTPException
@@ -47,7 +46,7 @@ from mindsdb.metrics.server import init_metrics
47
46
  from mindsdb.utilities import log
48
47
  from mindsdb.utilities.config import config
49
48
  from mindsdb.utilities.context import context as ctx
50
- from mindsdb.utilities.json_encoder import CustomJSONProvider
49
+ from mindsdb.utilities.json_encoder import ORJSONProvider
51
50
  from mindsdb.utilities.ps import is_pid_listen_port, wait_func_is_true
52
51
  from mindsdb.utilities.sentry import sentry_sdk # noqa: F401
53
52
  from mindsdb.utilities.otel import trace # noqa: F401
@@ -88,14 +87,8 @@ class Swagger_Api(Api):
88
87
  return url_for(self.endpoint("specs"), _external=False)
89
88
 
90
89
 
91
- def custom_output_json(data, code, headers=None):
92
- resp = make_response(dumps(data, cls=CustomJSONProvider), code)
93
- resp.headers.extend(headers or {})
94
- return resp
95
-
96
-
97
90
  def get_last_compatible_gui_version() -> Version | bool:
98
- logger.debug("Getting last compatible frontend..")
91
+ logger.debug("Getting last compatible frontend...")
99
92
  try:
100
93
  res = requests.get(
101
94
  "https://mindsdb-web-builds.s3.amazonaws.com/compatible-config.json",
@@ -154,7 +147,7 @@ def get_last_compatible_gui_version() -> Version | bool:
154
147
  all_lower_versions = [parse_version(x) for x in lower_versions.keys()]
155
148
  gui_version_lv = gui_versions[all_lower_versions[-1].base_version]
156
149
  except Exception:
157
- logger.exception("Error in compatible-config.json structure:")
150
+ logger.exception("Error in compatible-config.json structure")
158
151
  return False
159
152
 
160
153
  logger.debug(f"Last compatible frontend version: {gui_version_lv}.")
@@ -178,7 +171,6 @@ def get_current_gui_version() -> Version:
178
171
 
179
172
 
180
173
  def initialize_static():
181
- logger.debug("Initializing static..")
182
174
  last_gui_version_lv = get_last_compatible_gui_version()
183
175
  current_gui_version_lv = get_current_gui_version()
184
176
  required_gui_version = config["gui"].get("version")
@@ -206,22 +198,28 @@ def initialize_static():
206
198
  return success
207
199
 
208
200
 
209
- def initialize_app():
201
+ def initialize_app(is_restart: bool = False):
210
202
  static_root = config["paths"]["static"]
211
203
  logger.debug(f"Static route: {static_root}")
212
- gui_exists = Path(static_root).joinpath("index.html").is_file()
213
- logger.debug(f"Does GUI already exist.. {'YES' if gui_exists else 'NO'}")
214
204
  init_static_thread = None
205
+ if not is_restart:
206
+ gui_exists = Path(static_root).joinpath("index.html").is_file()
207
+ logger.debug(f"Does GUI already exist.. {'YES' if gui_exists else 'NO'}")
208
+
209
+ if config["gui"]["autoupdate"] is True or (config["gui"]["open_on_start"] is True and gui_exists is False):
210
+ logger.debug("Initializing static...")
211
+ init_static_thread = threading.Thread(target=initialize_static, name="initialize_static")
212
+ init_static_thread.start()
213
+ else:
214
+ logger.debug(f"Skip initializing static: config['gui']={config['gui']}, gui_exists={gui_exists}")
215
215
 
216
- if config["gui"]["autoupdate"] is True or (config["gui"]["open_on_start"] is True and gui_exists is False):
217
- init_static_thread = threading.Thread(target=initialize_static, name="initialize_static")
218
- init_static_thread.start()
216
+ app, api = initialize_flask()
219
217
 
220
- # Wait for static initialization.
221
- if config["gui"]["open_on_start"] is True and init_static_thread is not None:
222
- init_static_thread.join()
218
+ if not is_restart and config["gui"]["open_on_start"]:
219
+ if init_static_thread is not None:
220
+ init_static_thread.join()
221
+ open_gui(init_static_thread)
223
222
 
224
- app, api = initialize_flask(config, init_static_thread)
225
223
  Compress(app)
226
224
 
227
225
  initialize_interfaces(app)
@@ -240,22 +238,30 @@ def initialize_app():
240
238
  "The endpoint you are trying to access does not exist on the server.",
241
239
  )
242
240
 
243
- # Normalize the path.
244
- full_path = os.path.normpath(os.path.join(static_root, path))
241
+ try:
242
+ # Ensure the requested path is within the static directory
243
+ # https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.is_relative_to
244
+ requested_path = (static_root / path).resolve()
245
+
246
+ if not requested_path.is_relative_to(static_root.resolve()):
247
+ return http_error(
248
+ HTTPStatus.FORBIDDEN,
249
+ "Forbidden",
250
+ "You are not allowed to access the requested resource.",
251
+ )
252
+
253
+ if requested_path.is_file():
254
+ return send_from_directory(static_root, path)
255
+ else:
256
+ return send_from_directory(static_root, "index.html")
245
257
 
246
- # Check for directory traversal attacks.
247
- if not full_path.startswith(str(static_root)):
258
+ except (ValueError, OSError):
248
259
  return http_error(
249
- HTTPStatus.FORBIDDEN,
250
- "Forbidden",
251
- "You are not allowed to access the requested resource.",
260
+ HTTPStatus.BAD_REQUEST,
261
+ "Bad Request",
262
+ "Invalid path requested.",
252
263
  )
253
264
 
254
- if os.path.isfile(full_path):
255
- return send_from_directory(static_root, path)
256
- else:
257
- return send_from_directory(static_root, "index.html")
258
-
259
265
  protected_namespaces = [
260
266
  tab_ns,
261
267
  utils_ns,
@@ -309,7 +315,6 @@ def initialize_app():
309
315
 
310
316
  @app.before_request
311
317
  def before_request():
312
- logger.debug(f"HTTP {request.method}: {request.path}")
313
318
  ctx.set_default()
314
319
 
315
320
  h = request.headers.get("Authorization")
@@ -348,15 +353,15 @@ def initialize_app():
348
353
  if company_id is not None:
349
354
  try:
350
355
  company_id = int(company_id)
351
- except Exception:
352
- logger.exception(f"Could not parse company id: {company_id} | exception:")
356
+ except Exception as e:
357
+ logger.error(f"Could not parse company id: {company_id} | exception: {e}")
353
358
  company_id = None
354
359
 
355
360
  if user_class is not None:
356
361
  try:
357
362
  user_class = int(user_class)
358
- except Exception:
359
- logger.exception(f"Could not parse user_class: {user_class} | exception:")
363
+ except Exception as e:
364
+ logger.error(f"Could not parse user_class: {user_class} | exception: {e}")
360
365
  user_class = 0
361
366
  else:
362
367
  user_class = 0
@@ -370,8 +375,8 @@ def initialize_app():
370
375
  return app
371
376
 
372
377
 
373
- def initialize_flask(config, init_static_thread):
374
- logger.debug("Initializing flask..")
378
+ def initialize_flask():
379
+ logger.debug("Initializing flask...")
375
380
  # region required for windows https://github.com/mindsdb/mindsdb/issues/2526
376
381
  mimetypes.add_type("text/css", ".css")
377
382
  mimetypes.add_type("text/javascript", ".js")
@@ -392,7 +397,7 @@ def initialize_flask(config, init_static_thread):
392
397
 
393
398
  app.config["SEND_FILE_MAX_AGE_DEFAULT"] = 60
394
399
  app.config["SWAGGER_HOST"] = "http://localhost:8000/mindsdb"
395
- app.json = CustomJSONProvider()
400
+ app.json = ORJSONProvider(app)
396
401
 
397
402
  authorizations = {"apikey": {"type": "apiKey", "in": "header", "name": "Authorization"}}
398
403
 
@@ -406,30 +411,41 @@ def initialize_flask(config, init_static_thread):
406
411
  doc="/doc/",
407
412
  )
408
413
 
409
- api.representations["application/json"] = custom_output_json
414
+ def __output_json_orjson(data, code, headers=None):
415
+ from flask import current_app, make_response
410
416
 
411
- port = config["api"]["http"]["port"]
412
- host = config["api"]["http"]["host"]
417
+ dumped = current_app.json.dumps(data)
418
+ resp = make_response(dumped, code)
419
+ if headers:
420
+ resp.headers.extend(headers)
421
+ resp.mimetype = "application/json"
422
+ return resp
413
423
 
414
- if config["gui"]["open_on_start"]:
415
- if host in ("", "0.0.0.0"):
416
- url = f"http://127.0.0.1:{port}/"
417
- else:
418
- url = f"http://{host}:{port}/"
419
- logger.info(f" - GUI available at {url}")
420
-
421
- pid = os.getpid()
422
- thread = threading.Thread(
423
- target=_open_webbrowser,
424
- args=(url, pid, port, init_static_thread, config["paths"]["static"]),
425
- daemon=True,
426
- name="open_webbrowser",
427
- )
428
- thread.start()
424
+ api.representations["application/json"] = __output_json_orjson
429
425
 
430
426
  return app, api
431
427
 
432
428
 
429
+ def open_gui(init_static_thread):
430
+ port = config["api"]["http"]["port"]
431
+ host = config["api"]["http"]["host"]
432
+
433
+ if host in ("", "0.0.0.0"):
434
+ url = f"http://127.0.0.1:{port}/"
435
+ else:
436
+ url = f"http://{host}:{port}/"
437
+ logger.info(f" - GUI available at {url}")
438
+
439
+ pid = os.getpid()
440
+ thread = threading.Thread(
441
+ target=_open_webbrowser,
442
+ args=(url, pid, port, init_static_thread, config["paths"]["static"]),
443
+ daemon=True,
444
+ name="open_webbrowser",
445
+ )
446
+ thread.start()
447
+
448
+
433
449
  def initialize_interfaces(app):
434
450
  app.integration_controller = integration_controller
435
451
  app.database_controller = DatabaseController()
@@ -26,9 +26,6 @@ def create_agent(project_name, name, agent):
26
26
  if name is None:
27
27
  return http_error(HTTPStatus.BAD_REQUEST, "Missing field", 'Missing "name" field for agent')
28
28
 
29
- if not name.islower():
30
- return http_error(HTTPStatus.BAD_REQUEST, "Wrong name", f"The name must be in lower case: {name}")
31
-
32
29
  model_name = agent.get("model_name")
33
30
  provider = agent.get("provider")
34
31
  skills = agent.get("skills", [])
@@ -18,9 +18,6 @@ def create_chatbot(project_name, name, chatbot):
18
18
  if name is None:
19
19
  return http_error(HTTPStatus.BAD_REQUEST, "Missing field", 'Missing "name" field for chatbot')
20
20
 
21
- if not name.islower():
22
- return http_error(HTTPStatus.BAD_REQUEST, "Wrong name", f"The name must be in lower case: {name}")
23
-
24
21
  model_name = chatbot.get("model_name", None)
25
22
  agent_name = chatbot.get("agent_name", None)
26
23
  if model_name is None and agent_name is None:
@@ -241,8 +238,6 @@ class ChatBotResource(Resource):
241
238
  "Chatbot already exists",
242
239
  f"Chatbot with name {name} already exists. Please choose a different one.",
243
240
  )
244
- if not name.islower():
245
- return http_error(HTTPStatus.BAD_REQUEST, "Wrong name", f"The name must be in lower case: {name}")
246
241
 
247
242
  if existing_chatbot is None:
248
243
  # Create
@@ -196,6 +196,8 @@ class File(Resource):
196
196
  return http_error(400, "Wrong content.", "Archive must contain data file in root.")
197
197
 
198
198
  try:
199
+ if not Path(mindsdb_file_name).suffix == "":
200
+ return http_error(400, "Error", "File name cannot contain extension.")
199
201
  ca.file_controller.save_file(mindsdb_file_name, file_path, file_name=original_file_name)
200
202
  except FileProcessingError as e:
201
203
  return http_error(400, "Error", str(e))
@@ -125,11 +125,16 @@ def prepare_formdata():
125
125
  params[name] = value
126
126
 
127
127
  def on_file(file):
128
- file_name = file.field_name.decode()
129
- if file_name not in ("code", "modules"):
130
- raise ValueError(f"Wrong field name: {file_name}")
131
- params[file_name] = file.file_object
132
- file_names.append(file_name)
128
+ file_name = file.file_name.decode()
129
+ if Path(file_name).name != file_name:
130
+ raise ValueError(f"Wrong file name: {file_name}")
131
+
132
+ field_name = file.field_name.decode()
133
+ if field_name not in ("code", "modules"):
134
+ raise ValueError(f"Wrong field name: {field_name}")
135
+
136
+ params[field_name] = file.file_object
137
+ file_names.append(field_name)
133
138
 
134
139
  temp_dir_path = tempfile.mkdtemp(prefix="mindsdb_file_")
135
140
 
@@ -241,6 +241,26 @@ class KnowledgeBaseResource(Resource):
241
241
  if kb_data.get("query"):
242
242
  table.insert_query_result(kb_data["query"], project_name)
243
243
 
244
+ # update KB
245
+ update_kb_data = {}
246
+ if "params" in kb_data:
247
+ allowed_keys = [
248
+ "id_column",
249
+ "metadata_columns",
250
+ "content_columns",
251
+ "preprocessing",
252
+ "reranking_model",
253
+ "embedding_model",
254
+ ]
255
+ update_kb_data = {k: v for k, v in kb_data["params"].items() if k in allowed_keys}
256
+ if update_kb_data or "preprocessing" in kb_data:
257
+ session.kb_controller.update(
258
+ knowledge_base_name,
259
+ project.name,
260
+ params=update_kb_data,
261
+ preprocessing_config=kb_data.get("preprocessing"),
262
+ )
263
+
244
264
  except ExecutorException as e:
245
265
  logger.exception("Error during preprocessing and insertion:")
246
266
  return http_error(
@@ -63,13 +63,13 @@ class Query(Resource):
63
63
  }
64
64
  logger.warning(f"Error query processing: {e}")
65
65
  except QueryError as e:
66
- error_type = "expected" if e.is_acceptable else "unexpected"
66
+ error_type = "expected" if e.is_expected else "unexpected"
67
67
  query_response = {
68
68
  "type": SQL_RESPONSE_TYPE.ERROR,
69
69
  "error_code": 0,
70
70
  "error_message": str(e),
71
71
  }
72
- if e.is_acceptable:
72
+ if e.is_expected:
73
73
  logger.warning(f"Query failed due to expected reason: {e}")
74
74
  else:
75
75
  logger.exception("Error query processing:")
mindsdb/api/http/start.py CHANGED
@@ -23,12 +23,12 @@ gc.enable()
23
23
  logger = log.getLogger(__name__)
24
24
 
25
25
 
26
- def start(verbose, app: Flask = None):
26
+ def start(verbose, app: Flask = None, is_restart: bool = False):
27
27
  db.init()
28
28
  init_lexer_parsers()
29
29
 
30
30
  if app is None:
31
- app = initialize_app()
31
+ app = initialize_app(is_restart)
32
32
 
33
33
  port = config["api"]["http"]["port"]
34
34
  host = config["api"]["http"]["host"]
@@ -4,6 +4,7 @@ from typing import Any
4
4
  from array import array
5
5
 
6
6
  import numpy as np
7
+ import orjson
7
8
  from numpy import dtype as np_dtype
8
9
  import pandas as pd
9
10
  from pandas.api import types as pd_types
@@ -21,7 +22,8 @@ from mindsdb.utilities.json_encoder import CustomJSONEncoder
21
22
 
22
23
  logger = log.getLogger(__name__)
23
24
 
24
- json_encoder = CustomJSONEncoder()
25
+ # Pre-bind default encoder for custom types so we can serialize JSON consistently
26
+ _default_json = CustomJSONEncoder().default
25
27
 
26
28
 
27
29
  def column_to_mysql_column_dict(column: Column, database_name: str | None = None) -> dict[str, str | int]:
@@ -115,7 +117,11 @@ def _dump_str(var: Any) -> str | None:
115
117
  return str(var)[2:-1]
116
118
  if isinstance(var, (dict, list)):
117
119
  try:
118
- return json_encoder.encode(var)
120
+ return orjson.dumps(
121
+ var,
122
+ default=_default_json,
123
+ option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_PASSTHROUGH_DATETIME,
124
+ ).decode("utf-8")
119
125
  except Exception:
120
126
  return str(var)
121
127
  # pd.isna returns array of bools for list
@@ -1,10 +1,9 @@
1
1
  """BYOM: Bring Your Own Model
2
2
 
3
3
  env vars to contloll BYOM:
4
- - MINDSDB_BYOM_ENABLED - can BYOM be uysed or not. Locally enabled by default.
4
+ - MINDSDB_BYOM_ENABLED - can BYOM be used or not. Locally enabled by default.
5
5
  - MINDSDB_BYOM_INHOUSE_ENABLED - enable or disable 'inhouse' BYOM usage. Locally enabled by default.
6
6
  - MINDSDB_BYOM_DEFAULT_TYPE - [inhouse|venv] default byom type. Locally it is 'venv' by default.
7
- - MINDSDB_BYOM_TYPE - [safe|unsafe] - obsolete, same as above.
8
7
  """
9
8
 
10
9
  import os
@@ -73,15 +72,8 @@ class BYOMHandler(BaseMLEngine):
73
72
  self._default_byom_type = BYOM_TYPE.VENV
74
73
  if os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE") is not None:
75
74
  self._default_byom_type = BYOM_TYPE[os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE").upper()]
76
- else:
77
- env_var = os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE")
78
- if env_var == "SAVE":
79
- self._default_byom_type = BYOM_TYPE["VENV"]
80
- elif env_var == "UNSAVE":
81
- self._default_byom_type = BYOM_TYPE["INHOUSE"]
82
- else:
83
- raise KeyError
84
75
  except KeyError:
76
+ logger.warning(f"Wrong value of env var MINDSDB_BYOM_DEFAULT_TYPE, {BYOM_TYPE.VENV} will be used")
85
77
  self._default_byom_type = BYOM_TYPE.VENV
86
78
  # endregion
87
79
 
@@ -1,24 +1,50 @@
1
1
  from typing import Text, Dict, Any, Optional
2
2
 
3
+ import pandas as pd
3
4
  from databricks.sql import connect, RequestError, ServerOperationError
4
5
  from databricks.sql.client import Connection
5
6
  from databricks.sqlalchemy import DatabricksDialect
6
7
  from mindsdb_sql_parser.ast.base import ASTNode
7
- from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
8
- import pandas as pd
9
8
 
9
+ from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
10
10
  from mindsdb.integrations.libs.base import DatabaseHandler
11
11
  from mindsdb.integrations.libs.response import (
12
12
  HandlerStatusResponse as StatusResponse,
13
13
  HandlerResponse as Response,
14
14
  RESPONSE_TYPE,
15
+ INF_SCHEMA_COLUMNS_NAMES_SET,
15
16
  )
16
17
  from mindsdb.utilities import log
18
+ from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
17
19
 
18
20
 
19
21
  logger = log.getLogger(__name__)
20
22
 
21
23
 
24
+ def _map_type(internal_type_name: str | None) -> MYSQL_DATA_TYPE:
25
+ """Map MyDatabricks SQL text types names to MySQL types as enum.
26
+
27
+ Args:
28
+ internal_type_name (str): The name of the Databricks type to map.
29
+
30
+ Returns:
31
+ MYSQL_DATA_TYPE: The MySQL type enum that corresponds to the MySQL text type name.
32
+ """
33
+ if not isinstance(internal_type_name, str):
34
+ return MYSQL_DATA_TYPE.TEXT
35
+ if internal_type_name.upper() == "STRING":
36
+ return MYSQL_DATA_TYPE.TEXT
37
+ if internal_type_name.upper() == "LONG":
38
+ return MYSQL_DATA_TYPE.BIGINT
39
+ if internal_type_name.upper() == "SHORT":
40
+ return MYSQL_DATA_TYPE.SMALLINT
41
+ try:
42
+ return MYSQL_DATA_TYPE(internal_type_name.upper())
43
+ except Exception:
44
+ logger.info(f"Databricks handler: unknown type: {internal_type_name}, use TEXT as fallback.")
45
+ return MYSQL_DATA_TYPE.TEXT
46
+
47
+
22
48
  class DatabricksHandler(DatabaseHandler):
23
49
  """
24
50
  This handler handles the connection and execution of SQL statements on Databricks.
@@ -64,11 +90,8 @@ class DatabricksHandler(DatabaseHandler):
64
90
  return self.connection
65
91
 
66
92
  # Mandatory connection parameters.
67
- if not all(
68
- key in self.connection_data
69
- for key in ["server_hostname", "http_path", "access_token"]
70
- ):
71
- raise ValueError('Required parameters (server_hostname, http_path, access_token) must be provided.')
93
+ if not all(key in self.connection_data for key in ["server_hostname", "http_path", "access_token"]):
94
+ raise ValueError("Required parameters (server_hostname, http_path, access_token) must be provided.")
72
95
 
73
96
  config = {
74
97
  "server_hostname": self.connection_data["server_hostname"],
@@ -88,19 +111,17 @@ class DatabricksHandler(DatabaseHandler):
88
111
  config[parameter] = self.connection_data[parameter]
89
112
 
90
113
  try:
91
- self.connection = connect(
92
- **config
93
- )
114
+ self.connection = connect(**config)
94
115
  self.is_connected = True
95
116
  return self.connection
96
117
  except RequestError as request_error:
97
- logger.error(f'Request error when connecting to Databricks: {request_error}')
118
+ logger.error(f"Request error when connecting to Databricks: {request_error}")
98
119
  raise
99
120
  except RuntimeError as runtime_error:
100
- logger.error(f'Runtime error when connecting to Databricks: {runtime_error}')
121
+ logger.error(f"Runtime error when connecting to Databricks: {runtime_error}")
101
122
  raise
102
123
  except Exception as unknown_error:
103
- logger.error(f'Unknown error when connecting to Databricks: {unknown_error}')
124
+ logger.error(f"Unknown error when connecting to Databricks: {unknown_error}")
104
125
  raise
105
126
 
106
127
  def disconnect(self):
@@ -129,7 +150,7 @@ class DatabricksHandler(DatabaseHandler):
129
150
 
130
151
  # Execute a simple query to check the connection.
131
152
  query = "SELECT 1 FROM information_schema.schemata"
132
- if 'schema' in self.connection_data:
153
+ if "schema" in self.connection_data:
133
154
  query += f" WHERE schema_name = '{self.connection_data['schema']}'"
134
155
 
135
156
  with connection.cursor() as cursor:
@@ -138,14 +159,14 @@ class DatabricksHandler(DatabaseHandler):
138
159
 
139
160
  # If the query does not return a result, the schema does not exist.
140
161
  if not result:
141
- raise ValueError(f'The schema {self.connection_data["schema"]} does not exist!')
162
+ raise ValueError(f"The schema {self.connection_data['schema']} does not exist!")
142
163
 
143
164
  response.success = True
144
165
  except (ValueError, RequestError, RuntimeError, ServerOperationError) as known_error:
145
- logger.error(f'Connection check to Databricks failed, {known_error}!')
166
+ logger.error(f"Connection check to Databricks failed, {known_error}!")
146
167
  response.error_message = str(known_error)
147
168
  except Exception as unknown_error:
148
- logger.error(f'Connection check to Databricks failed due to an unknown error, {unknown_error}!')
169
+ logger.error(f"Connection check to Databricks failed due to an unknown error, {unknown_error}!")
149
170
  response.error_message = str(unknown_error)
150
171
 
151
172
  if response.success and need_to_close:
@@ -176,30 +197,18 @@ class DatabricksHandler(DatabaseHandler):
176
197
  if result:
177
198
  response = Response(
178
199
  RESPONSE_TYPE.TABLE,
179
- data_frame=pd.DataFrame(
180
- result, columns=[x[0] for x in cursor.description]
181
- ),
200
+ data_frame=pd.DataFrame(result, columns=[x[0] for x in cursor.description]),
182
201
  )
183
202
 
184
203
  else:
185
204
  response = Response(RESPONSE_TYPE.OK)
186
205
  connection.commit()
187
206
  except ServerOperationError as server_error:
188
- logger.error(
189
- f'Server error running query: {query} on Databricks, {server_error}!'
190
- )
191
- response = Response(
192
- RESPONSE_TYPE.ERROR,
193
- error_message=str(server_error)
194
- )
207
+ logger.error(f"Server error running query: {query} on Databricks, {server_error}!")
208
+ response = Response(RESPONSE_TYPE.ERROR, error_message=str(server_error))
195
209
  except Exception as unknown_error:
196
- logger.error(
197
- f'Unknown error running query: {query} on Databricks, {unknown_error}!'
198
- )
199
- response = Response(
200
- RESPONSE_TYPE.ERROR,
201
- error_message=str(unknown_error)
202
- )
210
+ logger.error(f"Unknown error running query: {query} on Databricks, {unknown_error}!")
211
+ response = Response(RESPONSE_TYPE.ERROR, error_message=str(unknown_error))
203
212
 
204
213
  if need_to_close is True:
205
214
  self.disconnect()
@@ -220,29 +229,44 @@ class DatabricksHandler(DatabaseHandler):
220
229
  query_str = renderer.get_string(query, with_failback=True)
221
230
  return self.native_query(query_str)
222
231
 
223
- def get_tables(self) -> Response:
232
+ def get_tables(self, all: bool = False) -> Response:
224
233
  """
225
234
  Retrieves a list of all non-system tables in the connected schema of the Databricks workspace.
226
235
 
236
+ Args:
237
+ all (bool): If True - return tables from all schemas.
238
+
227
239
  Returns:
228
240
  Response: A response object containing a list of tables in the connected schema.
229
241
  """
230
- query = """
231
- SHOW TABLES;
242
+ all_filter = "and table_schema = current_schema()"
243
+ if all is True:
244
+ all_filter = ""
245
+ query = f"""
246
+ SELECT
247
+ table_schema,
248
+ table_name,
249
+ table_type
250
+ FROM
251
+ information_schema.tables
252
+ WHERE
253
+ table_schema != 'information_schema'
254
+ {all_filter}
232
255
  """
233
256
  result = self.native_query(query)
234
-
235
- df = result.data_frame
236
- if df is not None:
237
- result.data_frame = df.rename(columns={"tableName": "table_name", "database": "schema_name"})
257
+ if result.resp_type == RESPONSE_TYPE.OK:
258
+ result = Response(
259
+ RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))
260
+ )
238
261
  return result
239
262
 
240
- def get_columns(self, table_name: Text) -> Response:
263
+ def get_columns(self, table_name: str, schema_name: str | None = None) -> Response:
241
264
  """
242
265
  Retrieves column details for a specified table in the Databricks workspace.
243
266
 
244
267
  Args:
245
- table_name (Text): The name of the table for which to retrieve column information.
268
+ table_name (str): The name of the table for which to retrieve column information.
269
+ schema_name (str|None): The name of the schema in which the table is located.
246
270
 
247
271
  Raises:
248
272
  ValueError: If the 'table_name' is not a valid string.
@@ -253,9 +277,37 @@ class DatabricksHandler(DatabaseHandler):
253
277
  if not table_name or not isinstance(table_name, str):
254
278
  raise ValueError("Invalid table name provided.")
255
279
 
256
- query = f"DESCRIBE TABLE {table_name};"
280
+ if isinstance(schema_name, str):
281
+ schema_name = f"'{schema_name}'"
282
+ else:
283
+ schema_name = "current_schema()"
284
+ query = f"""
285
+ SELECT
286
+ COLUMN_NAME,
287
+ DATA_TYPE,
288
+ ORDINAL_POSITION,
289
+ COLUMN_DEFAULT,
290
+ IS_NULLABLE,
291
+ CHARACTER_MAXIMUM_LENGTH,
292
+ CHARACTER_OCTET_LENGTH,
293
+ NUMERIC_PRECISION,
294
+ NUMERIC_SCALE,
295
+ DATETIME_PRECISION,
296
+ null as CHARACTER_SET_NAME,
297
+ null as COLLATION_NAME
298
+ FROM
299
+ information_schema.columns
300
+ WHERE
301
+ table_name = '{table_name}'
302
+ AND
303
+ table_schema = {schema_name}
304
+ """
305
+
257
306
  result = self.native_query(query)
307
+ if result.resp_type == RESPONSE_TYPE.OK:
308
+ result = Response(
309
+ RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))
310
+ )
311
+ result.to_columns_table_response(map_type_fn=_map_type)
258
312
 
259
- df = result.data_frame
260
- result.data_frame = df.rename(columns={"col_name": "column_name"})
261
313
  return result