MindsDB 25.9.3rc1__py3-none-any.whl → 25.10.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +1 -9
- mindsdb/api/a2a/__init__.py +1 -1
- mindsdb/api/a2a/agent.py +9 -1
- mindsdb/api/a2a/common/server/server.py +4 -0
- mindsdb/api/a2a/common/server/task_manager.py +8 -1
- mindsdb/api/a2a/common/types.py +66 -0
- mindsdb/api/a2a/task_manager.py +50 -0
- mindsdb/api/common/middleware.py +1 -1
- mindsdb/api/executor/command_executor.py +49 -36
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +2 -2
- mindsdb/api/executor/datahub/datanodes/system_tables.py +2 -1
- mindsdb/api/executor/planner/query_prepare.py +2 -20
- mindsdb/api/executor/utilities/sql.py +5 -4
- mindsdb/api/http/initialize.py +76 -60
- mindsdb/api/http/namespaces/agents.py +0 -3
- mindsdb/api/http/namespaces/chatbots.py +0 -5
- mindsdb/api/http/namespaces/file.py +2 -0
- mindsdb/api/http/namespaces/handlers.py +10 -5
- mindsdb/api/http/namespaces/knowledge_bases.py +20 -0
- mindsdb/api/http/namespaces/sql.py +2 -2
- mindsdb/api/http/start.py +2 -2
- mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +2 -10
- mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
- mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
- mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
- mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
- mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
- mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
- mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
- mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
- mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
- mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +2 -2
- mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
- mindsdb/integrations/libs/response.py +2 -2
- mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
- mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +24 -21
- mindsdb/interfaces/agents/agents_controller.py +0 -2
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +6 -7
- mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
- mindsdb/interfaces/database/data_handlers_cache.py +190 -0
- mindsdb/interfaces/database/database.py +3 -3
- mindsdb/interfaces/database/integrations.py +1 -121
- mindsdb/interfaces/database/projects.py +2 -6
- mindsdb/interfaces/database/views.py +1 -4
- mindsdb/interfaces/jobs/jobs_controller.py +0 -4
- mindsdb/interfaces/jobs/scheduler.py +0 -1
- mindsdb/interfaces/knowledge_base/controller.py +197 -108
- mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
- mindsdb/interfaces/knowledge_base/executor.py +11 -0
- mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
- mindsdb/interfaces/model/model_controller.py +4 -4
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +4 -10
- mindsdb/interfaces/skills/skills_controller.py +1 -4
- mindsdb/interfaces/storage/db.py +16 -6
- mindsdb/interfaces/triggers/triggers_controller.py +1 -3
- mindsdb/utilities/config.py +19 -2
- mindsdb/utilities/exception.py +2 -2
- mindsdb/utilities/json_encoder.py +24 -10
- mindsdb/utilities/render/sqlalchemy_render.py +15 -14
- mindsdb/utilities/starters.py +0 -10
- {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0rc1.dist-info}/METADATA +276 -264
- {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0rc1.dist-info}/RECORD +70 -84
- mindsdb/api/postgres/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -182
- mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
- mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -265
- mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
- mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
- mindsdb/api/postgres/start.py +0 -11
- mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
- mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
- mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
- mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
- {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0rc1.dist-info}/WHEEL +0 -0
- {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0rc1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.9.3rc1.dist-info → mindsdb-25.10.0rc1.dist-info}/top_level.txt +0 -0
mindsdb/api/http/initialize.py
CHANGED
|
@@ -8,8 +8,7 @@ from http import HTTPStatus
|
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
import requests
|
|
11
|
-
from flask import Flask, url_for,
|
|
12
|
-
from flask.json import dumps
|
|
11
|
+
from flask import Flask, url_for, request, send_from_directory
|
|
13
12
|
from flask_compress import Compress
|
|
14
13
|
from flask_restx import Api
|
|
15
14
|
from werkzeug.exceptions import HTTPException
|
|
@@ -47,7 +46,7 @@ from mindsdb.metrics.server import init_metrics
|
|
|
47
46
|
from mindsdb.utilities import log
|
|
48
47
|
from mindsdb.utilities.config import config
|
|
49
48
|
from mindsdb.utilities.context import context as ctx
|
|
50
|
-
from mindsdb.utilities.json_encoder import
|
|
49
|
+
from mindsdb.utilities.json_encoder import ORJSONProvider
|
|
51
50
|
from mindsdb.utilities.ps import is_pid_listen_port, wait_func_is_true
|
|
52
51
|
from mindsdb.utilities.sentry import sentry_sdk # noqa: F401
|
|
53
52
|
from mindsdb.utilities.otel import trace # noqa: F401
|
|
@@ -88,14 +87,8 @@ class Swagger_Api(Api):
|
|
|
88
87
|
return url_for(self.endpoint("specs"), _external=False)
|
|
89
88
|
|
|
90
89
|
|
|
91
|
-
def custom_output_json(data, code, headers=None):
|
|
92
|
-
resp = make_response(dumps(data, cls=CustomJSONProvider), code)
|
|
93
|
-
resp.headers.extend(headers or {})
|
|
94
|
-
return resp
|
|
95
|
-
|
|
96
|
-
|
|
97
90
|
def get_last_compatible_gui_version() -> Version | bool:
|
|
98
|
-
logger.debug("Getting last compatible frontend
|
|
91
|
+
logger.debug("Getting last compatible frontend...")
|
|
99
92
|
try:
|
|
100
93
|
res = requests.get(
|
|
101
94
|
"https://mindsdb-web-builds.s3.amazonaws.com/compatible-config.json",
|
|
@@ -154,7 +147,7 @@ def get_last_compatible_gui_version() -> Version | bool:
|
|
|
154
147
|
all_lower_versions = [parse_version(x) for x in lower_versions.keys()]
|
|
155
148
|
gui_version_lv = gui_versions[all_lower_versions[-1].base_version]
|
|
156
149
|
except Exception:
|
|
157
|
-
logger.exception("Error in compatible-config.json structure
|
|
150
|
+
logger.exception("Error in compatible-config.json structure")
|
|
158
151
|
return False
|
|
159
152
|
|
|
160
153
|
logger.debug(f"Last compatible frontend version: {gui_version_lv}.")
|
|
@@ -178,7 +171,6 @@ def get_current_gui_version() -> Version:
|
|
|
178
171
|
|
|
179
172
|
|
|
180
173
|
def initialize_static():
|
|
181
|
-
logger.debug("Initializing static..")
|
|
182
174
|
last_gui_version_lv = get_last_compatible_gui_version()
|
|
183
175
|
current_gui_version_lv = get_current_gui_version()
|
|
184
176
|
required_gui_version = config["gui"].get("version")
|
|
@@ -206,22 +198,28 @@ def initialize_static():
|
|
|
206
198
|
return success
|
|
207
199
|
|
|
208
200
|
|
|
209
|
-
def initialize_app():
|
|
201
|
+
def initialize_app(is_restart: bool = False):
|
|
210
202
|
static_root = config["paths"]["static"]
|
|
211
203
|
logger.debug(f"Static route: {static_root}")
|
|
212
|
-
gui_exists = Path(static_root).joinpath("index.html").is_file()
|
|
213
|
-
logger.debug(f"Does GUI already exist.. {'YES' if gui_exists else 'NO'}")
|
|
214
204
|
init_static_thread = None
|
|
205
|
+
if not is_restart:
|
|
206
|
+
gui_exists = Path(static_root).joinpath("index.html").is_file()
|
|
207
|
+
logger.debug(f"Does GUI already exist.. {'YES' if gui_exists else 'NO'}")
|
|
208
|
+
|
|
209
|
+
if config["gui"]["autoupdate"] is True or (config["gui"]["open_on_start"] is True and gui_exists is False):
|
|
210
|
+
logger.debug("Initializing static...")
|
|
211
|
+
init_static_thread = threading.Thread(target=initialize_static, name="initialize_static")
|
|
212
|
+
init_static_thread.start()
|
|
213
|
+
else:
|
|
214
|
+
logger.debug(f"Skip initializing static: config['gui']={config['gui']}, gui_exists={gui_exists}")
|
|
215
215
|
|
|
216
|
-
|
|
217
|
-
init_static_thread = threading.Thread(target=initialize_static, name="initialize_static")
|
|
218
|
-
init_static_thread.start()
|
|
216
|
+
app, api = initialize_flask()
|
|
219
217
|
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
218
|
+
if not is_restart and config["gui"]["open_on_start"]:
|
|
219
|
+
if init_static_thread is not None:
|
|
220
|
+
init_static_thread.join()
|
|
221
|
+
open_gui(init_static_thread)
|
|
223
222
|
|
|
224
|
-
app, api = initialize_flask(config, init_static_thread)
|
|
225
223
|
Compress(app)
|
|
226
224
|
|
|
227
225
|
initialize_interfaces(app)
|
|
@@ -240,22 +238,30 @@ def initialize_app():
|
|
|
240
238
|
"The endpoint you are trying to access does not exist on the server.",
|
|
241
239
|
)
|
|
242
240
|
|
|
243
|
-
|
|
244
|
-
|
|
241
|
+
try:
|
|
242
|
+
# Ensure the requested path is within the static directory
|
|
243
|
+
# https://docs.python.org/3/library/pathlib.html#pathlib.PurePath.is_relative_to
|
|
244
|
+
requested_path = (static_root / path).resolve()
|
|
245
|
+
|
|
246
|
+
if not requested_path.is_relative_to(static_root.resolve()):
|
|
247
|
+
return http_error(
|
|
248
|
+
HTTPStatus.FORBIDDEN,
|
|
249
|
+
"Forbidden",
|
|
250
|
+
"You are not allowed to access the requested resource.",
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
if requested_path.is_file():
|
|
254
|
+
return send_from_directory(static_root, path)
|
|
255
|
+
else:
|
|
256
|
+
return send_from_directory(static_root, "index.html")
|
|
245
257
|
|
|
246
|
-
|
|
247
|
-
if not full_path.startswith(str(static_root)):
|
|
258
|
+
except (ValueError, OSError):
|
|
248
259
|
return http_error(
|
|
249
|
-
HTTPStatus.
|
|
250
|
-
"
|
|
251
|
-
"
|
|
260
|
+
HTTPStatus.BAD_REQUEST,
|
|
261
|
+
"Bad Request",
|
|
262
|
+
"Invalid path requested.",
|
|
252
263
|
)
|
|
253
264
|
|
|
254
|
-
if os.path.isfile(full_path):
|
|
255
|
-
return send_from_directory(static_root, path)
|
|
256
|
-
else:
|
|
257
|
-
return send_from_directory(static_root, "index.html")
|
|
258
|
-
|
|
259
265
|
protected_namespaces = [
|
|
260
266
|
tab_ns,
|
|
261
267
|
utils_ns,
|
|
@@ -309,7 +315,6 @@ def initialize_app():
|
|
|
309
315
|
|
|
310
316
|
@app.before_request
|
|
311
317
|
def before_request():
|
|
312
|
-
logger.debug(f"HTTP {request.method}: {request.path}")
|
|
313
318
|
ctx.set_default()
|
|
314
319
|
|
|
315
320
|
h = request.headers.get("Authorization")
|
|
@@ -348,15 +353,15 @@ def initialize_app():
|
|
|
348
353
|
if company_id is not None:
|
|
349
354
|
try:
|
|
350
355
|
company_id = int(company_id)
|
|
351
|
-
except Exception:
|
|
352
|
-
logger.
|
|
356
|
+
except Exception as e:
|
|
357
|
+
logger.error(f"Could not parse company id: {company_id} | exception: {e}")
|
|
353
358
|
company_id = None
|
|
354
359
|
|
|
355
360
|
if user_class is not None:
|
|
356
361
|
try:
|
|
357
362
|
user_class = int(user_class)
|
|
358
|
-
except Exception:
|
|
359
|
-
logger.
|
|
363
|
+
except Exception as e:
|
|
364
|
+
logger.error(f"Could not parse user_class: {user_class} | exception: {e}")
|
|
360
365
|
user_class = 0
|
|
361
366
|
else:
|
|
362
367
|
user_class = 0
|
|
@@ -370,8 +375,8 @@ def initialize_app():
|
|
|
370
375
|
return app
|
|
371
376
|
|
|
372
377
|
|
|
373
|
-
def initialize_flask(
|
|
374
|
-
logger.debug("Initializing flask
|
|
378
|
+
def initialize_flask():
|
|
379
|
+
logger.debug("Initializing flask...")
|
|
375
380
|
# region required for windows https://github.com/mindsdb/mindsdb/issues/2526
|
|
376
381
|
mimetypes.add_type("text/css", ".css")
|
|
377
382
|
mimetypes.add_type("text/javascript", ".js")
|
|
@@ -392,7 +397,7 @@ def initialize_flask(config, init_static_thread):
|
|
|
392
397
|
|
|
393
398
|
app.config["SEND_FILE_MAX_AGE_DEFAULT"] = 60
|
|
394
399
|
app.config["SWAGGER_HOST"] = "http://localhost:8000/mindsdb"
|
|
395
|
-
app.json =
|
|
400
|
+
app.json = ORJSONProvider(app)
|
|
396
401
|
|
|
397
402
|
authorizations = {"apikey": {"type": "apiKey", "in": "header", "name": "Authorization"}}
|
|
398
403
|
|
|
@@ -406,30 +411,41 @@ def initialize_flask(config, init_static_thread):
|
|
|
406
411
|
doc="/doc/",
|
|
407
412
|
)
|
|
408
413
|
|
|
409
|
-
|
|
414
|
+
def __output_json_orjson(data, code, headers=None):
|
|
415
|
+
from flask import current_app, make_response
|
|
410
416
|
|
|
411
|
-
|
|
412
|
-
|
|
417
|
+
dumped = current_app.json.dumps(data)
|
|
418
|
+
resp = make_response(dumped, code)
|
|
419
|
+
if headers:
|
|
420
|
+
resp.headers.extend(headers)
|
|
421
|
+
resp.mimetype = "application/json"
|
|
422
|
+
return resp
|
|
413
423
|
|
|
414
|
-
|
|
415
|
-
if host in ("", "0.0.0.0"):
|
|
416
|
-
url = f"http://127.0.0.1:{port}/"
|
|
417
|
-
else:
|
|
418
|
-
url = f"http://{host}:{port}/"
|
|
419
|
-
logger.info(f" - GUI available at {url}")
|
|
420
|
-
|
|
421
|
-
pid = os.getpid()
|
|
422
|
-
thread = threading.Thread(
|
|
423
|
-
target=_open_webbrowser,
|
|
424
|
-
args=(url, pid, port, init_static_thread, config["paths"]["static"]),
|
|
425
|
-
daemon=True,
|
|
426
|
-
name="open_webbrowser",
|
|
427
|
-
)
|
|
428
|
-
thread.start()
|
|
424
|
+
api.representations["application/json"] = __output_json_orjson
|
|
429
425
|
|
|
430
426
|
return app, api
|
|
431
427
|
|
|
432
428
|
|
|
429
|
+
def open_gui(init_static_thread):
|
|
430
|
+
port = config["api"]["http"]["port"]
|
|
431
|
+
host = config["api"]["http"]["host"]
|
|
432
|
+
|
|
433
|
+
if host in ("", "0.0.0.0"):
|
|
434
|
+
url = f"http://127.0.0.1:{port}/"
|
|
435
|
+
else:
|
|
436
|
+
url = f"http://{host}:{port}/"
|
|
437
|
+
logger.info(f" - GUI available at {url}")
|
|
438
|
+
|
|
439
|
+
pid = os.getpid()
|
|
440
|
+
thread = threading.Thread(
|
|
441
|
+
target=_open_webbrowser,
|
|
442
|
+
args=(url, pid, port, init_static_thread, config["paths"]["static"]),
|
|
443
|
+
daemon=True,
|
|
444
|
+
name="open_webbrowser",
|
|
445
|
+
)
|
|
446
|
+
thread.start()
|
|
447
|
+
|
|
448
|
+
|
|
433
449
|
def initialize_interfaces(app):
|
|
434
450
|
app.integration_controller = integration_controller
|
|
435
451
|
app.database_controller = DatabaseController()
|
|
@@ -26,9 +26,6 @@ def create_agent(project_name, name, agent):
|
|
|
26
26
|
if name is None:
|
|
27
27
|
return http_error(HTTPStatus.BAD_REQUEST, "Missing field", 'Missing "name" field for agent')
|
|
28
28
|
|
|
29
|
-
if not name.islower():
|
|
30
|
-
return http_error(HTTPStatus.BAD_REQUEST, "Wrong name", f"The name must be in lower case: {name}")
|
|
31
|
-
|
|
32
29
|
model_name = agent.get("model_name")
|
|
33
30
|
provider = agent.get("provider")
|
|
34
31
|
skills = agent.get("skills", [])
|
|
@@ -18,9 +18,6 @@ def create_chatbot(project_name, name, chatbot):
|
|
|
18
18
|
if name is None:
|
|
19
19
|
return http_error(HTTPStatus.BAD_REQUEST, "Missing field", 'Missing "name" field for chatbot')
|
|
20
20
|
|
|
21
|
-
if not name.islower():
|
|
22
|
-
return http_error(HTTPStatus.BAD_REQUEST, "Wrong name", f"The name must be in lower case: {name}")
|
|
23
|
-
|
|
24
21
|
model_name = chatbot.get("model_name", None)
|
|
25
22
|
agent_name = chatbot.get("agent_name", None)
|
|
26
23
|
if model_name is None and agent_name is None:
|
|
@@ -241,8 +238,6 @@ class ChatBotResource(Resource):
|
|
|
241
238
|
"Chatbot already exists",
|
|
242
239
|
f"Chatbot with name {name} already exists. Please choose a different one.",
|
|
243
240
|
)
|
|
244
|
-
if not name.islower():
|
|
245
|
-
return http_error(HTTPStatus.BAD_REQUEST, "Wrong name", f"The name must be in lower case: {name}")
|
|
246
241
|
|
|
247
242
|
if existing_chatbot is None:
|
|
248
243
|
# Create
|
|
@@ -196,6 +196,8 @@ class File(Resource):
|
|
|
196
196
|
return http_error(400, "Wrong content.", "Archive must contain data file in root.")
|
|
197
197
|
|
|
198
198
|
try:
|
|
199
|
+
if not Path(mindsdb_file_name).suffix == "":
|
|
200
|
+
return http_error(400, "Error", "File name cannot contain extension.")
|
|
199
201
|
ca.file_controller.save_file(mindsdb_file_name, file_path, file_name=original_file_name)
|
|
200
202
|
except FileProcessingError as e:
|
|
201
203
|
return http_error(400, "Error", str(e))
|
|
@@ -125,11 +125,16 @@ def prepare_formdata():
|
|
|
125
125
|
params[name] = value
|
|
126
126
|
|
|
127
127
|
def on_file(file):
|
|
128
|
-
file_name = file.
|
|
129
|
-
if file_name
|
|
130
|
-
raise ValueError(f"Wrong
|
|
131
|
-
|
|
132
|
-
|
|
128
|
+
file_name = file.file_name.decode()
|
|
129
|
+
if Path(file_name).name != file_name:
|
|
130
|
+
raise ValueError(f"Wrong file name: {file_name}")
|
|
131
|
+
|
|
132
|
+
field_name = file.field_name.decode()
|
|
133
|
+
if field_name not in ("code", "modules"):
|
|
134
|
+
raise ValueError(f"Wrong field name: {field_name}")
|
|
135
|
+
|
|
136
|
+
params[field_name] = file.file_object
|
|
137
|
+
file_names.append(field_name)
|
|
133
138
|
|
|
134
139
|
temp_dir_path = tempfile.mkdtemp(prefix="mindsdb_file_")
|
|
135
140
|
|
|
@@ -241,6 +241,26 @@ class KnowledgeBaseResource(Resource):
|
|
|
241
241
|
if kb_data.get("query"):
|
|
242
242
|
table.insert_query_result(kb_data["query"], project_name)
|
|
243
243
|
|
|
244
|
+
# update KB
|
|
245
|
+
update_kb_data = {}
|
|
246
|
+
if "params" in kb_data:
|
|
247
|
+
allowed_keys = [
|
|
248
|
+
"id_column",
|
|
249
|
+
"metadata_columns",
|
|
250
|
+
"content_columns",
|
|
251
|
+
"preprocessing",
|
|
252
|
+
"reranking_model",
|
|
253
|
+
"embedding_model",
|
|
254
|
+
]
|
|
255
|
+
update_kb_data = {k: v for k, v in kb_data["params"].items() if k in allowed_keys}
|
|
256
|
+
if update_kb_data or "preprocessing" in kb_data:
|
|
257
|
+
session.kb_controller.update(
|
|
258
|
+
knowledge_base_name,
|
|
259
|
+
project.name,
|
|
260
|
+
params=update_kb_data,
|
|
261
|
+
preprocessing_config=kb_data.get("preprocessing"),
|
|
262
|
+
)
|
|
263
|
+
|
|
244
264
|
except ExecutorException as e:
|
|
245
265
|
logger.exception("Error during preprocessing and insertion:")
|
|
246
266
|
return http_error(
|
|
@@ -63,13 +63,13 @@ class Query(Resource):
|
|
|
63
63
|
}
|
|
64
64
|
logger.warning(f"Error query processing: {e}")
|
|
65
65
|
except QueryError as e:
|
|
66
|
-
error_type = "expected" if e.
|
|
66
|
+
error_type = "expected" if e.is_expected else "unexpected"
|
|
67
67
|
query_response = {
|
|
68
68
|
"type": SQL_RESPONSE_TYPE.ERROR,
|
|
69
69
|
"error_code": 0,
|
|
70
70
|
"error_message": str(e),
|
|
71
71
|
}
|
|
72
|
-
if e.
|
|
72
|
+
if e.is_expected:
|
|
73
73
|
logger.warning(f"Query failed due to expected reason: {e}")
|
|
74
74
|
else:
|
|
75
75
|
logger.exception("Error query processing:")
|
mindsdb/api/http/start.py
CHANGED
|
@@ -23,12 +23,12 @@ gc.enable()
|
|
|
23
23
|
logger = log.getLogger(__name__)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
def start(verbose, app: Flask = None):
|
|
26
|
+
def start(verbose, app: Flask = None, is_restart: bool = False):
|
|
27
27
|
db.init()
|
|
28
28
|
init_lexer_parsers()
|
|
29
29
|
|
|
30
30
|
if app is None:
|
|
31
|
-
app = initialize_app()
|
|
31
|
+
app = initialize_app(is_restart)
|
|
32
32
|
|
|
33
33
|
port = config["api"]["http"]["port"]
|
|
34
34
|
host = config["api"]["http"]["host"]
|
|
@@ -4,6 +4,7 @@ from typing import Any
|
|
|
4
4
|
from array import array
|
|
5
5
|
|
|
6
6
|
import numpy as np
|
|
7
|
+
import orjson
|
|
7
8
|
from numpy import dtype as np_dtype
|
|
8
9
|
import pandas as pd
|
|
9
10
|
from pandas.api import types as pd_types
|
|
@@ -21,7 +22,8 @@ from mindsdb.utilities.json_encoder import CustomJSONEncoder
|
|
|
21
22
|
|
|
22
23
|
logger = log.getLogger(__name__)
|
|
23
24
|
|
|
24
|
-
|
|
25
|
+
# Pre-bind default encoder for custom types so we can serialize JSON consistently
|
|
26
|
+
_default_json = CustomJSONEncoder().default
|
|
25
27
|
|
|
26
28
|
|
|
27
29
|
def column_to_mysql_column_dict(column: Column, database_name: str | None = None) -> dict[str, str | int]:
|
|
@@ -115,7 +117,11 @@ def _dump_str(var: Any) -> str | None:
|
|
|
115
117
|
return str(var)[2:-1]
|
|
116
118
|
if isinstance(var, (dict, list)):
|
|
117
119
|
try:
|
|
118
|
-
return
|
|
120
|
+
return orjson.dumps(
|
|
121
|
+
var,
|
|
122
|
+
default=_default_json,
|
|
123
|
+
option=orjson.OPT_SERIALIZE_NUMPY | orjson.OPT_PASSTHROUGH_DATETIME,
|
|
124
|
+
).decode("utf-8")
|
|
119
125
|
except Exception:
|
|
120
126
|
return str(var)
|
|
121
127
|
# pd.isna returns array of bools for list
|
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
"""BYOM: Bring Your Own Model
|
|
2
2
|
|
|
3
3
|
env vars to contloll BYOM:
|
|
4
|
-
- MINDSDB_BYOM_ENABLED - can BYOM be
|
|
4
|
+
- MINDSDB_BYOM_ENABLED - can BYOM be used or not. Locally enabled by default.
|
|
5
5
|
- MINDSDB_BYOM_INHOUSE_ENABLED - enable or disable 'inhouse' BYOM usage. Locally enabled by default.
|
|
6
6
|
- MINDSDB_BYOM_DEFAULT_TYPE - [inhouse|venv] default byom type. Locally it is 'venv' by default.
|
|
7
|
-
- MINDSDB_BYOM_TYPE - [safe|unsafe] - obsolete, same as above.
|
|
8
7
|
"""
|
|
9
8
|
|
|
10
9
|
import os
|
|
@@ -73,15 +72,8 @@ class BYOMHandler(BaseMLEngine):
|
|
|
73
72
|
self._default_byom_type = BYOM_TYPE.VENV
|
|
74
73
|
if os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE") is not None:
|
|
75
74
|
self._default_byom_type = BYOM_TYPE[os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE").upper()]
|
|
76
|
-
else:
|
|
77
|
-
env_var = os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE")
|
|
78
|
-
if env_var == "SAVE":
|
|
79
|
-
self._default_byom_type = BYOM_TYPE["VENV"]
|
|
80
|
-
elif env_var == "UNSAVE":
|
|
81
|
-
self._default_byom_type = BYOM_TYPE["INHOUSE"]
|
|
82
|
-
else:
|
|
83
|
-
raise KeyError
|
|
84
75
|
except KeyError:
|
|
76
|
+
logger.warning(f"Wrong value of env var MINDSDB_BYOM_DEFAULT_TYPE, {BYOM_TYPE.VENV} will be used")
|
|
85
77
|
self._default_byom_type = BYOM_TYPE.VENV
|
|
86
78
|
# endregion
|
|
87
79
|
|
|
@@ -1,24 +1,50 @@
|
|
|
1
1
|
from typing import Text, Dict, Any, Optional
|
|
2
2
|
|
|
3
|
+
import pandas as pd
|
|
3
4
|
from databricks.sql import connect, RequestError, ServerOperationError
|
|
4
5
|
from databricks.sql.client import Connection
|
|
5
6
|
from databricks.sqlalchemy import DatabricksDialect
|
|
6
7
|
from mindsdb_sql_parser.ast.base import ASTNode
|
|
7
|
-
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
8
|
-
import pandas as pd
|
|
9
8
|
|
|
9
|
+
from mindsdb.utilities.render.sqlalchemy_render import SqlalchemyRender
|
|
10
10
|
from mindsdb.integrations.libs.base import DatabaseHandler
|
|
11
11
|
from mindsdb.integrations.libs.response import (
|
|
12
12
|
HandlerStatusResponse as StatusResponse,
|
|
13
13
|
HandlerResponse as Response,
|
|
14
14
|
RESPONSE_TYPE,
|
|
15
|
+
INF_SCHEMA_COLUMNS_NAMES_SET,
|
|
15
16
|
)
|
|
16
17
|
from mindsdb.utilities import log
|
|
18
|
+
from mindsdb.api.mysql.mysql_proxy.libs.constants.mysql import MYSQL_DATA_TYPE
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
logger = log.getLogger(__name__)
|
|
20
22
|
|
|
21
23
|
|
|
24
|
+
def _map_type(internal_type_name: str | None) -> MYSQL_DATA_TYPE:
|
|
25
|
+
"""Map MyDatabricks SQL text types names to MySQL types as enum.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
internal_type_name (str): The name of the Databricks type to map.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
MYSQL_DATA_TYPE: The MySQL type enum that corresponds to the MySQL text type name.
|
|
32
|
+
"""
|
|
33
|
+
if not isinstance(internal_type_name, str):
|
|
34
|
+
return MYSQL_DATA_TYPE.TEXT
|
|
35
|
+
if internal_type_name.upper() == "STRING":
|
|
36
|
+
return MYSQL_DATA_TYPE.TEXT
|
|
37
|
+
if internal_type_name.upper() == "LONG":
|
|
38
|
+
return MYSQL_DATA_TYPE.BIGINT
|
|
39
|
+
if internal_type_name.upper() == "SHORT":
|
|
40
|
+
return MYSQL_DATA_TYPE.SMALLINT
|
|
41
|
+
try:
|
|
42
|
+
return MYSQL_DATA_TYPE(internal_type_name.upper())
|
|
43
|
+
except Exception:
|
|
44
|
+
logger.info(f"Databricks handler: unknown type: {internal_type_name}, use TEXT as fallback.")
|
|
45
|
+
return MYSQL_DATA_TYPE.TEXT
|
|
46
|
+
|
|
47
|
+
|
|
22
48
|
class DatabricksHandler(DatabaseHandler):
|
|
23
49
|
"""
|
|
24
50
|
This handler handles the connection and execution of SQL statements on Databricks.
|
|
@@ -64,11 +90,8 @@ class DatabricksHandler(DatabaseHandler):
|
|
|
64
90
|
return self.connection
|
|
65
91
|
|
|
66
92
|
# Mandatory connection parameters.
|
|
67
|
-
if not all(
|
|
68
|
-
|
|
69
|
-
for key in ["server_hostname", "http_path", "access_token"]
|
|
70
|
-
):
|
|
71
|
-
raise ValueError('Required parameters (server_hostname, http_path, access_token) must be provided.')
|
|
93
|
+
if not all(key in self.connection_data for key in ["server_hostname", "http_path", "access_token"]):
|
|
94
|
+
raise ValueError("Required parameters (server_hostname, http_path, access_token) must be provided.")
|
|
72
95
|
|
|
73
96
|
config = {
|
|
74
97
|
"server_hostname": self.connection_data["server_hostname"],
|
|
@@ -88,19 +111,17 @@ class DatabricksHandler(DatabaseHandler):
|
|
|
88
111
|
config[parameter] = self.connection_data[parameter]
|
|
89
112
|
|
|
90
113
|
try:
|
|
91
|
-
self.connection = connect(
|
|
92
|
-
**config
|
|
93
|
-
)
|
|
114
|
+
self.connection = connect(**config)
|
|
94
115
|
self.is_connected = True
|
|
95
116
|
return self.connection
|
|
96
117
|
except RequestError as request_error:
|
|
97
|
-
logger.error(f
|
|
118
|
+
logger.error(f"Request error when connecting to Databricks: {request_error}")
|
|
98
119
|
raise
|
|
99
120
|
except RuntimeError as runtime_error:
|
|
100
|
-
logger.error(f
|
|
121
|
+
logger.error(f"Runtime error when connecting to Databricks: {runtime_error}")
|
|
101
122
|
raise
|
|
102
123
|
except Exception as unknown_error:
|
|
103
|
-
logger.error(f
|
|
124
|
+
logger.error(f"Unknown error when connecting to Databricks: {unknown_error}")
|
|
104
125
|
raise
|
|
105
126
|
|
|
106
127
|
def disconnect(self):
|
|
@@ -129,7 +150,7 @@ class DatabricksHandler(DatabaseHandler):
|
|
|
129
150
|
|
|
130
151
|
# Execute a simple query to check the connection.
|
|
131
152
|
query = "SELECT 1 FROM information_schema.schemata"
|
|
132
|
-
if
|
|
153
|
+
if "schema" in self.connection_data:
|
|
133
154
|
query += f" WHERE schema_name = '{self.connection_data['schema']}'"
|
|
134
155
|
|
|
135
156
|
with connection.cursor() as cursor:
|
|
@@ -138,14 +159,14 @@ class DatabricksHandler(DatabaseHandler):
|
|
|
138
159
|
|
|
139
160
|
# If the query does not return a result, the schema does not exist.
|
|
140
161
|
if not result:
|
|
141
|
-
raise ValueError(f
|
|
162
|
+
raise ValueError(f"The schema {self.connection_data['schema']} does not exist!")
|
|
142
163
|
|
|
143
164
|
response.success = True
|
|
144
165
|
except (ValueError, RequestError, RuntimeError, ServerOperationError) as known_error:
|
|
145
|
-
logger.error(f
|
|
166
|
+
logger.error(f"Connection check to Databricks failed, {known_error}!")
|
|
146
167
|
response.error_message = str(known_error)
|
|
147
168
|
except Exception as unknown_error:
|
|
148
|
-
logger.error(f
|
|
169
|
+
logger.error(f"Connection check to Databricks failed due to an unknown error, {unknown_error}!")
|
|
149
170
|
response.error_message = str(unknown_error)
|
|
150
171
|
|
|
151
172
|
if response.success and need_to_close:
|
|
@@ -176,30 +197,18 @@ class DatabricksHandler(DatabaseHandler):
|
|
|
176
197
|
if result:
|
|
177
198
|
response = Response(
|
|
178
199
|
RESPONSE_TYPE.TABLE,
|
|
179
|
-
data_frame=pd.DataFrame(
|
|
180
|
-
result, columns=[x[0] for x in cursor.description]
|
|
181
|
-
),
|
|
200
|
+
data_frame=pd.DataFrame(result, columns=[x[0] for x in cursor.description]),
|
|
182
201
|
)
|
|
183
202
|
|
|
184
203
|
else:
|
|
185
204
|
response = Response(RESPONSE_TYPE.OK)
|
|
186
205
|
connection.commit()
|
|
187
206
|
except ServerOperationError as server_error:
|
|
188
|
-
logger.error(
|
|
189
|
-
|
|
190
|
-
)
|
|
191
|
-
response = Response(
|
|
192
|
-
RESPONSE_TYPE.ERROR,
|
|
193
|
-
error_message=str(server_error)
|
|
194
|
-
)
|
|
207
|
+
logger.error(f"Server error running query: {query} on Databricks, {server_error}!")
|
|
208
|
+
response = Response(RESPONSE_TYPE.ERROR, error_message=str(server_error))
|
|
195
209
|
except Exception as unknown_error:
|
|
196
|
-
logger.error(
|
|
197
|
-
|
|
198
|
-
)
|
|
199
|
-
response = Response(
|
|
200
|
-
RESPONSE_TYPE.ERROR,
|
|
201
|
-
error_message=str(unknown_error)
|
|
202
|
-
)
|
|
210
|
+
logger.error(f"Unknown error running query: {query} on Databricks, {unknown_error}!")
|
|
211
|
+
response = Response(RESPONSE_TYPE.ERROR, error_message=str(unknown_error))
|
|
203
212
|
|
|
204
213
|
if need_to_close is True:
|
|
205
214
|
self.disconnect()
|
|
@@ -220,29 +229,44 @@ class DatabricksHandler(DatabaseHandler):
|
|
|
220
229
|
query_str = renderer.get_string(query, with_failback=True)
|
|
221
230
|
return self.native_query(query_str)
|
|
222
231
|
|
|
223
|
-
def get_tables(self) -> Response:
|
|
232
|
+
def get_tables(self, all: bool = False) -> Response:
|
|
224
233
|
"""
|
|
225
234
|
Retrieves a list of all non-system tables in the connected schema of the Databricks workspace.
|
|
226
235
|
|
|
236
|
+
Args:
|
|
237
|
+
all (bool): If True - return tables from all schemas.
|
|
238
|
+
|
|
227
239
|
Returns:
|
|
228
240
|
Response: A response object containing a list of tables in the connected schema.
|
|
229
241
|
"""
|
|
230
|
-
|
|
231
|
-
|
|
242
|
+
all_filter = "and table_schema = current_schema()"
|
|
243
|
+
if all is True:
|
|
244
|
+
all_filter = ""
|
|
245
|
+
query = f"""
|
|
246
|
+
SELECT
|
|
247
|
+
table_schema,
|
|
248
|
+
table_name,
|
|
249
|
+
table_type
|
|
250
|
+
FROM
|
|
251
|
+
information_schema.tables
|
|
252
|
+
WHERE
|
|
253
|
+
table_schema != 'information_schema'
|
|
254
|
+
{all_filter}
|
|
232
255
|
"""
|
|
233
256
|
result = self.native_query(query)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
257
|
+
if result.resp_type == RESPONSE_TYPE.OK:
|
|
258
|
+
result = Response(
|
|
259
|
+
RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))
|
|
260
|
+
)
|
|
238
261
|
return result
|
|
239
262
|
|
|
240
|
-
def get_columns(self, table_name:
|
|
263
|
+
def get_columns(self, table_name: str, schema_name: str | None = None) -> Response:
|
|
241
264
|
"""
|
|
242
265
|
Retrieves column details for a specified table in the Databricks workspace.
|
|
243
266
|
|
|
244
267
|
Args:
|
|
245
|
-
table_name (
|
|
268
|
+
table_name (str): The name of the table for which to retrieve column information.
|
|
269
|
+
schema_name (str|None): The name of the schema in which the table is located.
|
|
246
270
|
|
|
247
271
|
Raises:
|
|
248
272
|
ValueError: If the 'table_name' is not a valid string.
|
|
@@ -253,9 +277,37 @@ class DatabricksHandler(DatabaseHandler):
|
|
|
253
277
|
if not table_name or not isinstance(table_name, str):
|
|
254
278
|
raise ValueError("Invalid table name provided.")
|
|
255
279
|
|
|
256
|
-
|
|
280
|
+
if isinstance(schema_name, str):
|
|
281
|
+
schema_name = f"'{schema_name}'"
|
|
282
|
+
else:
|
|
283
|
+
schema_name = "current_schema()"
|
|
284
|
+
query = f"""
|
|
285
|
+
SELECT
|
|
286
|
+
COLUMN_NAME,
|
|
287
|
+
DATA_TYPE,
|
|
288
|
+
ORDINAL_POSITION,
|
|
289
|
+
COLUMN_DEFAULT,
|
|
290
|
+
IS_NULLABLE,
|
|
291
|
+
CHARACTER_MAXIMUM_LENGTH,
|
|
292
|
+
CHARACTER_OCTET_LENGTH,
|
|
293
|
+
NUMERIC_PRECISION,
|
|
294
|
+
NUMERIC_SCALE,
|
|
295
|
+
DATETIME_PRECISION,
|
|
296
|
+
null as CHARACTER_SET_NAME,
|
|
297
|
+
null as COLLATION_NAME
|
|
298
|
+
FROM
|
|
299
|
+
information_schema.columns
|
|
300
|
+
WHERE
|
|
301
|
+
table_name = '{table_name}'
|
|
302
|
+
AND
|
|
303
|
+
table_schema = {schema_name}
|
|
304
|
+
"""
|
|
305
|
+
|
|
257
306
|
result = self.native_query(query)
|
|
307
|
+
if result.resp_type == RESPONSE_TYPE.OK:
|
|
308
|
+
result = Response(
|
|
309
|
+
RESPONSE_TYPE.TABLE, data_frame=pd.DataFrame([], columns=list(INF_SCHEMA_COLUMNS_NAMES_SET))
|
|
310
|
+
)
|
|
311
|
+
result.to_columns_table_response(map_type_fn=_map_type)
|
|
258
312
|
|
|
259
|
-
df = result.data_frame
|
|
260
|
-
result.data_frame = df.rename(columns={"col_name": "column_name"})
|
|
261
313
|
return result
|