MindsDB 25.4.4.0__py3-none-any.whl → 25.5.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +107 -125
- mindsdb/api/executor/command_executor.py +14 -3
- mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +8 -0
- mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +2 -1
- mindsdb/api/executor/datahub/datanodes/system_tables.py +10 -13
- mindsdb/api/executor/planner/query_plan.py +1 -0
- mindsdb/api/executor/planner/query_planner.py +9 -1
- mindsdb/api/executor/sql_query/sql_query.py +24 -8
- mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +21 -3
- mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +3 -1
- mindsdb/api/http/initialize.py +20 -3
- mindsdb/api/http/namespaces/analysis.py +14 -1
- mindsdb/api/http/namespaces/config.py +19 -11
- mindsdb/api/http/namespaces/tree.py +1 -1
- mindsdb/api/http/start.py +7 -2
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +4 -8
- mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -4
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +2 -2
- mindsdb/integrations/handlers/bigquery_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/gmail_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/google_analytics_handler/requirements.txt +2 -1
- mindsdb/integrations/handlers/google_books_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/google_content_shopping_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/google_fit_handler/requirements.txt +2 -0
- mindsdb/integrations/handlers/google_search_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/jira_handler/jira_handler.archived.py +75 -0
- mindsdb/integrations/handlers/jira_handler/jira_handler.py +113 -38
- mindsdb/integrations/handlers/jira_handler/jira_tables.py +229 -0
- mindsdb/integrations/handlers/jira_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/lightfm_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -2
- mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
- mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/ms_one_drive_handler/requirements.txt +2 -0
- mindsdb/integrations/handlers/ms_teams_handler/requirements.txt +3 -1
- mindsdb/integrations/handlers/openai_handler/helpers.py +3 -5
- mindsdb/integrations/handlers/openai_handler/openai_handler.py +25 -12
- mindsdb/integrations/handlers/snowflake_handler/requirements.txt +1 -1
- mindsdb/integrations/handlers/togetherai_handler/__about__.py +9 -0
- mindsdb/integrations/handlers/togetherai_handler/__init__.py +20 -0
- mindsdb/integrations/handlers/togetherai_handler/creation_args.py +14 -0
- mindsdb/integrations/handlers/togetherai_handler/icon.svg +15 -0
- mindsdb/integrations/handlers/togetherai_handler/model_using_args.py +5 -0
- mindsdb/integrations/handlers/togetherai_handler/requirements.txt +2 -0
- mindsdb/integrations/handlers/togetherai_handler/settings.py +33 -0
- mindsdb/integrations/handlers/togetherai_handler/togetherai_handler.py +234 -0
- mindsdb/integrations/handlers/vertex_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/youtube_handler/requirements.txt +1 -0
- mindsdb/integrations/utilities/files/file_reader.py +5 -2
- mindsdb/integrations/utilities/handler_utils.py +4 -0
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +360 -0
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +6 -346
- mindsdb/interfaces/agents/constants.py +14 -2
- mindsdb/interfaces/agents/langchain_agent.py +2 -4
- mindsdb/interfaces/database/projects.py +1 -7
- mindsdb/interfaces/functions/controller.py +14 -16
- mindsdb/interfaces/functions/to_markdown.py +9 -124
- mindsdb/interfaces/knowledge_base/controller.py +109 -92
- mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +28 -5
- mindsdb/interfaces/knowledge_base/utils.py +10 -15
- mindsdb/interfaces/model/model_controller.py +0 -2
- mindsdb/interfaces/query_context/context_controller.py +55 -15
- mindsdb/interfaces/query_context/query_task.py +19 -0
- mindsdb/interfaces/skills/sql_agent.py +33 -11
- mindsdb/interfaces/storage/db.py +2 -2
- mindsdb/interfaces/tasks/task_monitor.py +5 -1
- mindsdb/interfaces/tasks/task_thread.py +6 -0
- mindsdb/migrations/migrate.py +0 -2
- mindsdb/migrations/versions/2025-04-22_53502b6d63bf_query_database.py +27 -0
- mindsdb/utilities/config.py +15 -3
- mindsdb/utilities/context.py +2 -1
- mindsdb/utilities/functions.py +0 -36
- mindsdb/utilities/langfuse.py +19 -10
- mindsdb/utilities/otel/__init__.py +9 -193
- mindsdb/utilities/otel/metric_handlers/__init__.py +5 -1
- mindsdb/utilities/otel/prepare.py +198 -0
- mindsdb/utilities/sql.py +83 -0
- {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/METADATA +662 -592
- {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/RECORD +85 -69
- {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/WHEEL +1 -1
- mindsdb/api/mysql/mysql_proxy/classes/sql_statement_parser.py +0 -151
- {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
__title__ = 'MindsDB'
|
|
2
2
|
__package_name__ = 'mindsdb'
|
|
3
|
-
__version__ = '25.
|
|
3
|
+
__version__ = '25.5.3.0'
|
|
4
4
|
__description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
|
|
5
5
|
__email__ = "jorge@mindsdb.com"
|
|
6
6
|
__author__ = 'MindsDB Inc'
|
mindsdb/__main__.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
import gc
|
|
2
|
+
gc.disable()
|
|
1
3
|
import os
|
|
2
4
|
import sys
|
|
3
5
|
import time
|
|
@@ -12,7 +14,7 @@ from enum import Enum
|
|
|
12
14
|
from dataclasses import dataclass, field
|
|
13
15
|
from typing import Callable, Optional, Tuple, List
|
|
14
16
|
|
|
15
|
-
from
|
|
17
|
+
from sqlalchemy import func
|
|
16
18
|
from sqlalchemy.orm.attributes import flag_modified
|
|
17
19
|
|
|
18
20
|
from mindsdb.utilities import log
|
|
@@ -22,17 +24,12 @@ logger.debug("Starting MindsDB...")
|
|
|
22
24
|
|
|
23
25
|
from mindsdb.__about__ import __version__ as mindsdb_version
|
|
24
26
|
from mindsdb.utilities.config import config
|
|
25
|
-
from mindsdb.utilities.exception import EntityNotExistsError
|
|
26
27
|
from mindsdb.utilities.starters import (
|
|
27
|
-
start_http, start_mysql, start_mongo, start_postgres, start_ml_task_queue,
|
|
28
|
-
start_mcp, start_litellm
|
|
28
|
+
start_http, start_mysql, start_mongo, start_postgres, start_ml_task_queue,
|
|
29
|
+
start_scheduler, start_tasks, start_mcp, start_litellm
|
|
29
30
|
)
|
|
30
31
|
from mindsdb.utilities.ps import is_pid_listen_port, get_child_pids
|
|
31
|
-
from mindsdb.utilities.functions import get_versions_where_predictors_become_obsolete
|
|
32
|
-
from mindsdb.interfaces.database.integrations import integration_controller
|
|
33
|
-
from mindsdb.interfaces.database.projects import ProjectController
|
|
34
32
|
import mindsdb.interfaces.storage.db as db
|
|
35
|
-
from mindsdb.integrations.utilities.install import install_dependencies
|
|
36
33
|
from mindsdb.utilities.fs import clean_process_marks, clean_unlinked_process_marks
|
|
37
34
|
from mindsdb.utilities.context import context as ctx
|
|
38
35
|
from mindsdb.utilities.auth import register_oauth_client, get_aws_meta_data
|
|
@@ -47,6 +44,8 @@ try:
|
|
|
47
44
|
except RuntimeError:
|
|
48
45
|
logger.info('Torch multiprocessing context already set, ignoring...')
|
|
49
46
|
|
|
47
|
+
gc.enable()
|
|
48
|
+
|
|
50
49
|
_stop_event = threading.Event()
|
|
51
50
|
|
|
52
51
|
|
|
@@ -213,7 +212,96 @@ def do_clean_process_marks():
|
|
|
213
212
|
set_error_model_status_by_pids(unexisting_pids)
|
|
214
213
|
|
|
215
214
|
|
|
215
|
+
def create_permanent_integrations():
|
|
216
|
+
"""
|
|
217
|
+
Create permanent integrations, for now only the 'files' integration.
|
|
218
|
+
NOTE: this is intentional to avoid importing integration_controller
|
|
219
|
+
"""
|
|
220
|
+
integration_name = 'files'
|
|
221
|
+
existing = db.session.query(db.Integration).filter_by(name=integration_name, company_id=None).first()
|
|
222
|
+
if existing is None:
|
|
223
|
+
integration_record = db.Integration(
|
|
224
|
+
name=integration_name,
|
|
225
|
+
data={},
|
|
226
|
+
engine=integration_name,
|
|
227
|
+
company_id=None,
|
|
228
|
+
)
|
|
229
|
+
db.session.add(integration_record)
|
|
230
|
+
try:
|
|
231
|
+
db.session.commit()
|
|
232
|
+
except Exception as e:
|
|
233
|
+
logger.error(f"Failed to commit permanent integration {integration_name}: {e}")
|
|
234
|
+
db.session.rollback()
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def validate_default_project() -> None:
|
|
238
|
+
"""Handle 'default_project' config option.
|
|
239
|
+
Project with the name specified in 'default_project' must exists and be marked with
|
|
240
|
+
'is_default' metadata. If it is not possible, then terminate the process with error.
|
|
241
|
+
Note: this can be done using 'project_controller', but we want to save init time and used RAM.
|
|
242
|
+
"""
|
|
243
|
+
new_default_project_name = config.get('default_project')
|
|
244
|
+
logger.debug(f"Checking if default project {new_default_project_name} exists")
|
|
245
|
+
filter_company_id = ctx.company_id if ctx.company_id is not None else 0
|
|
246
|
+
|
|
247
|
+
current_default_project: db.Project | None = (
|
|
248
|
+
db.Project.query.filter(
|
|
249
|
+
db.Project.company_id == filter_company_id,
|
|
250
|
+
db.Project.metadata_['is_default'].as_boolean() == True # noqa
|
|
251
|
+
).first()
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
if current_default_project is None:
|
|
255
|
+
# Legacy: If the default project does not exist, mark the new one as default.
|
|
256
|
+
existing_project = db.Project.query.filter(
|
|
257
|
+
db.Project.company_id == filter_company_id,
|
|
258
|
+
func.lower(db.Project.name) == func.lower(new_default_project_name)
|
|
259
|
+
).first()
|
|
260
|
+
if existing_project is None:
|
|
261
|
+
logger.critical(f"A project with the name '{new_default_project_name}' does not exist")
|
|
262
|
+
sys.exit(1)
|
|
263
|
+
|
|
264
|
+
existing_project.metadata_ = {'is_default': True}
|
|
265
|
+
flag_modified(existing_project, 'metadata_')
|
|
266
|
+
db.session.commit()
|
|
267
|
+
elif current_default_project.name != new_default_project_name:
|
|
268
|
+
# If the default project exists, but the name is different, update the name.
|
|
269
|
+
existing_project = db.Project.query.filter(
|
|
270
|
+
db.Project.company_id == filter_company_id,
|
|
271
|
+
func.lower(db.Project.name) == func.lower(new_default_project_name)
|
|
272
|
+
).first()
|
|
273
|
+
if existing_project is not None:
|
|
274
|
+
logger.critical(f"A project with the name '{new_default_project_name}' already exists")
|
|
275
|
+
sys.exit(1)
|
|
276
|
+
current_default_project.name = new_default_project_name
|
|
277
|
+
db.session.commit()
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def start_process(trunc_process_data: TrunkProcessData) -> None:
|
|
281
|
+
"""Start a process.
|
|
282
|
+
|
|
283
|
+
Args:
|
|
284
|
+
trunc_process_data (TrunkProcessData): The data of the process to start.
|
|
285
|
+
"""
|
|
286
|
+
mp_ctx = mp.get_context("spawn")
|
|
287
|
+
logger.info(f"{trunc_process_data.name} API: starting...")
|
|
288
|
+
try:
|
|
289
|
+
trunc_process_data.process = mp_ctx.Process(
|
|
290
|
+
target=trunc_process_data.entrypoint,
|
|
291
|
+
args=trunc_process_data.args,
|
|
292
|
+
name=trunc_process_data.name
|
|
293
|
+
)
|
|
294
|
+
trunc_process_data.process.start()
|
|
295
|
+
except Exception as e:
|
|
296
|
+
logger.error(
|
|
297
|
+
f"Failed to start {trunc_process_data.name} API with exception {e}\n{traceback.format_exc()}"
|
|
298
|
+
)
|
|
299
|
+
close_api_gracefully(trunc_processes_struct)
|
|
300
|
+
raise e
|
|
301
|
+
|
|
302
|
+
|
|
216
303
|
if __name__ == '__main__':
|
|
304
|
+
mp.freeze_support()
|
|
217
305
|
# warn if less than 1Gb of free RAM
|
|
218
306
|
if psutil.virtual_memory().available < (1 << 30):
|
|
219
307
|
logger.warning(
|
|
@@ -270,7 +358,6 @@ if __name__ == '__main__':
|
|
|
270
358
|
pass
|
|
271
359
|
|
|
272
360
|
db.init()
|
|
273
|
-
mp.freeze_support()
|
|
274
361
|
|
|
275
362
|
environment = config["environment"]
|
|
276
363
|
if environment == "aws_marketplace":
|
|
@@ -287,52 +374,6 @@ if __name__ == '__main__':
|
|
|
287
374
|
except Exception:
|
|
288
375
|
pass
|
|
289
376
|
|
|
290
|
-
is_cloud = config.is_cloud
|
|
291
|
-
|
|
292
|
-
if not is_cloud:
|
|
293
|
-
logger.debug("Applying database migrations")
|
|
294
|
-
try:
|
|
295
|
-
from mindsdb.migrations import migrate
|
|
296
|
-
migrate.migrate_to_head()
|
|
297
|
-
except Exception as e:
|
|
298
|
-
logger.error(f"Error! Something went wrong during DB migrations: {e}")
|
|
299
|
-
|
|
300
|
-
logger.debug(f"Checking if default project {config.get('default_project')} exists")
|
|
301
|
-
project_controller = ProjectController()
|
|
302
|
-
|
|
303
|
-
try:
|
|
304
|
-
current_default_project = project_controller.get(is_default=True)
|
|
305
|
-
except EntityNotExistsError:
|
|
306
|
-
# In previous versions, the default project could be deleted. This is no longer possible.
|
|
307
|
-
current_default_project = None
|
|
308
|
-
|
|
309
|
-
if current_default_project:
|
|
310
|
-
if current_default_project.record.name != config.get('default_project'):
|
|
311
|
-
try:
|
|
312
|
-
project_controller.get(name=config.get('default_project'))
|
|
313
|
-
log.critical(f"A project with the name '{config.get('default_project')}' already exists")
|
|
314
|
-
sys.exit(1)
|
|
315
|
-
except EntityNotExistsError:
|
|
316
|
-
pass
|
|
317
|
-
project_controller.update(current_default_project.record.id, new_name=config.get('default_project'))
|
|
318
|
-
|
|
319
|
-
# Legacy: If the default project does not exist, mark the new one as default.
|
|
320
|
-
else:
|
|
321
|
-
try:
|
|
322
|
-
project_controller.get(name=config.get('default_project'))
|
|
323
|
-
except EntityNotExistsError:
|
|
324
|
-
log.critical(
|
|
325
|
-
f"A project with the name '{config.get('default_project')}' does not exist"
|
|
326
|
-
)
|
|
327
|
-
raise
|
|
328
|
-
|
|
329
|
-
project_controller.update(
|
|
330
|
-
name=config.get('default_project'),
|
|
331
|
-
new_metadata={
|
|
332
|
-
"is_default": True
|
|
333
|
-
}
|
|
334
|
-
)
|
|
335
|
-
|
|
336
377
|
apis = os.getenv('MINDSDB_APIS') or config.cmd_args.api
|
|
337
378
|
|
|
338
379
|
if apis is None: # If "--api" option is not specified, start the default APIs
|
|
@@ -342,27 +383,6 @@ if __name__ == '__main__':
|
|
|
342
383
|
else: # The user has provided a list of APIs to start
|
|
343
384
|
api_arr = [TrunkProcessEnum(name) for name in apis.split(',')]
|
|
344
385
|
|
|
345
|
-
if config.cmd_args.install_handlers is not None:
|
|
346
|
-
handlers_list = [s.strip() for s in config.cmd_args.install_handlers.split(",")]
|
|
347
|
-
# import_meta = handler_meta.get('import', {})
|
|
348
|
-
for handler_name, handler_meta in integration_controller.get_handlers_import_status().items():
|
|
349
|
-
if handler_name not in handlers_list:
|
|
350
|
-
continue
|
|
351
|
-
import_meta = handler_meta.get("import", {})
|
|
352
|
-
if import_meta.get("success") is True:
|
|
353
|
-
logger.info(f"{'{0: <18}'.format(handler_name)} - already installed")
|
|
354
|
-
continue
|
|
355
|
-
result = install_dependencies(import_meta.get("dependencies", []))
|
|
356
|
-
if result.get("success") is True:
|
|
357
|
-
logger.info(
|
|
358
|
-
f"{'{0: <18}'.format(handler_name)} - successfully installed"
|
|
359
|
-
)
|
|
360
|
-
else:
|
|
361
|
-
logger.info(
|
|
362
|
-
f"{'{0: <18}'.format(handler_name)} - error during dependencies installation: {result.get('error_message', 'unknown error')}"
|
|
363
|
-
)
|
|
364
|
-
sys.exit(0)
|
|
365
|
-
|
|
366
386
|
logger.info(f"Version: {mindsdb_version}")
|
|
367
387
|
logger.info(f"Configuration file: {config.config_path or 'absent'}")
|
|
368
388
|
logger.info(f"Storage path: {config.paths['root']}")
|
|
@@ -370,42 +390,22 @@ if __name__ == '__main__':
|
|
|
370
390
|
logger.debug(f"System config: {config.auto_config}")
|
|
371
391
|
logger.debug(f"Env config: {config.env_config}")
|
|
372
392
|
|
|
393
|
+
is_cloud = config.is_cloud
|
|
373
394
|
unexisting_pids = clean_unlinked_process_marks()
|
|
374
395
|
if not is_cloud:
|
|
396
|
+
logger.debug("Applying database migrations")
|
|
397
|
+
try:
|
|
398
|
+
from mindsdb.migrations import migrate
|
|
399
|
+
migrate.migrate_to_head()
|
|
400
|
+
except Exception as e:
|
|
401
|
+
logger.error(f"Error! Something went wrong during DB migrations: {e}")
|
|
402
|
+
|
|
403
|
+
validate_default_project()
|
|
404
|
+
|
|
375
405
|
if len(unexisting_pids) > 0:
|
|
376
406
|
set_error_model_status_by_pids(unexisting_pids)
|
|
377
407
|
set_error_model_status_for_unfinished()
|
|
378
|
-
|
|
379
|
-
integration_controller.create_permanent_integrations()
|
|
380
|
-
|
|
381
|
-
# region Mark old predictors as outdated
|
|
382
|
-
is_modified = False
|
|
383
|
-
predictor_records = (
|
|
384
|
-
db.session.query(db.Predictor)
|
|
385
|
-
.filter(db.Predictor.deleted_at.is_(None))
|
|
386
|
-
.all()
|
|
387
|
-
)
|
|
388
|
-
if len(predictor_records) > 0:
|
|
389
|
-
(
|
|
390
|
-
sucess,
|
|
391
|
-
compatible_versions,
|
|
392
|
-
) = get_versions_where_predictors_become_obsolete()
|
|
393
|
-
if sucess is True:
|
|
394
|
-
compatible_versions = [version.parse(x) for x in compatible_versions]
|
|
395
|
-
mindsdb_version_parsed = version.parse(mindsdb_version)
|
|
396
|
-
compatible_versions = [x for x in compatible_versions if x <= mindsdb_version_parsed]
|
|
397
|
-
if len(compatible_versions) > 0:
|
|
398
|
-
last_compatible_version = compatible_versions[-1]
|
|
399
|
-
for predictor_record in predictor_records:
|
|
400
|
-
if (
|
|
401
|
-
isinstance(predictor_record.mindsdb_version, str)
|
|
402
|
-
and version.parse(predictor_record.mindsdb_version) < last_compatible_version
|
|
403
|
-
):
|
|
404
|
-
predictor_record.update_status = "available"
|
|
405
|
-
is_modified = True
|
|
406
|
-
if is_modified is True:
|
|
407
|
-
db.session.commit()
|
|
408
|
-
# endregion
|
|
408
|
+
create_permanent_integrations()
|
|
409
409
|
|
|
410
410
|
clean_process_marks()
|
|
411
411
|
|
|
@@ -503,24 +503,6 @@ if __name__ == '__main__':
|
|
|
503
503
|
if config.cmd_args.ml_task_queue_consumer is True:
|
|
504
504
|
trunc_processes_struct[TrunkProcessEnum.ML_TASK_QUEUE].need_to_run = True
|
|
505
505
|
|
|
506
|
-
def start_process(trunc_process_data):
|
|
507
|
-
# TODO this 'ctx' is eclipsing 'context' class imported as 'ctx'
|
|
508
|
-
ctx = mp.get_context("spawn")
|
|
509
|
-
logger.info(f"{trunc_process_data.name} API: starting...")
|
|
510
|
-
try:
|
|
511
|
-
trunc_process_data.process = ctx.Process(
|
|
512
|
-
target=trunc_process_data.entrypoint,
|
|
513
|
-
args=trunc_process_data.args,
|
|
514
|
-
name=trunc_process_data.name
|
|
515
|
-
)
|
|
516
|
-
trunc_process_data.process.start()
|
|
517
|
-
except Exception as e:
|
|
518
|
-
logger.error(
|
|
519
|
-
f"Failed to start {trunc_process_data.name} API with exception {e}\n{traceback.format_exc()}"
|
|
520
|
-
)
|
|
521
|
-
close_api_gracefully(trunc_processes_struct)
|
|
522
|
-
raise e
|
|
523
|
-
|
|
524
506
|
for trunc_process_data in trunc_processes_struct.values():
|
|
525
507
|
if trunc_process_data.started is True or trunc_process_data.need_to_run is False:
|
|
526
508
|
continue
|
|
@@ -5,7 +5,6 @@ from typing import Optional
|
|
|
5
5
|
from functools import reduce
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
|
-
from mindsdb_evaluator.accuracy.general import evaluate_accuracy
|
|
9
8
|
from mindsdb_sql_parser import parse_sql
|
|
10
9
|
from mindsdb_sql_parser.ast import (
|
|
11
10
|
Alter,
|
|
@@ -585,6 +584,8 @@ class ExecuteCommands:
|
|
|
585
584
|
)
|
|
586
585
|
elif statement_type is Insert:
|
|
587
586
|
query = SQLQuery(statement, session=self.session, database=database_name)
|
|
587
|
+
if query.fetched_data.length() > 0:
|
|
588
|
+
return self.answer_select(query)
|
|
588
589
|
return ExecuteAnswer(
|
|
589
590
|
affected_rows=query.fetched_data.affected_rows
|
|
590
591
|
)
|
|
@@ -670,7 +671,7 @@ class ExecuteCommands:
|
|
|
670
671
|
command = target.op.lower()
|
|
671
672
|
args = [arg.value for arg in target.args if isinstance(arg, Constant)]
|
|
672
673
|
if command == 'query_resume':
|
|
673
|
-
ret = SQLQuery(None, session=self.session,
|
|
674
|
+
ret = SQLQuery(None, session=self.session, query_id=args[0])
|
|
674
675
|
return self.answer_select(ret)
|
|
675
676
|
|
|
676
677
|
elif command == 'query_cancel':
|
|
@@ -812,6 +813,8 @@ class ExecuteCommands:
|
|
|
812
813
|
return ExecuteAnswer()
|
|
813
814
|
|
|
814
815
|
def answer_evaluate_metric(self, statement, database_name):
|
|
816
|
+
# heavy import, so we do it here on-demand
|
|
817
|
+
from mindsdb_evaluator.accuracy.general import evaluate_accuracy
|
|
815
818
|
try:
|
|
816
819
|
sqlquery = SQLQuery(statement.data, session=self.session, database=database_name)
|
|
817
820
|
except Exception as e:
|
|
@@ -868,13 +871,21 @@ class ExecuteCommands:
|
|
|
868
871
|
else:
|
|
869
872
|
raise WrongArgumentError(f'Unknown describe type: {obj_type}')
|
|
870
873
|
|
|
871
|
-
|
|
874
|
+
parts = obj_name.parts
|
|
875
|
+
if len(parts) > 2:
|
|
876
|
+
raise WrongArgumentError(
|
|
877
|
+
f"Invalid object name: {obj_name.to_string()}.\n"
|
|
878
|
+
"Only models support three-part namespaces."
|
|
879
|
+
)
|
|
880
|
+
|
|
881
|
+
name = parts[-1]
|
|
872
882
|
where = BinaryOperation(op='=', args=[
|
|
873
883
|
Identifier('name'),
|
|
874
884
|
Constant(name)
|
|
875
885
|
])
|
|
876
886
|
|
|
877
887
|
if obj_type in project_objects:
|
|
888
|
+
database_name = parts[0] if len(parts) > 1 else database_name
|
|
878
889
|
where = BinaryOperation(op='and', args=[
|
|
879
890
|
where,
|
|
880
891
|
BinaryOperation(op='=', args=[Identifier('project'), Constant(database_name)])
|
|
@@ -22,6 +22,8 @@ from .mindsdb_tables import (
|
|
|
22
22
|
ModelsTable, DatabasesTable, MLEnginesTable, HandlersTable, JobsTable, QueriesTable,
|
|
23
23
|
ChatbotsTable, KBTable, SkillsTable, AgentsTable, ViewsTable, TriggersTable)
|
|
24
24
|
|
|
25
|
+
from mindsdb.api.executor.datahub.classes.tables_row import TablesRow
|
|
26
|
+
|
|
25
27
|
|
|
26
28
|
logger = log.getLogger(__name__)
|
|
27
29
|
|
|
@@ -166,6 +168,12 @@ class InformationSchemaDataNode(DataNode):
|
|
|
166
168
|
return [x.lower() for x in projects]
|
|
167
169
|
|
|
168
170
|
def get_tables(self):
|
|
171
|
+
return [
|
|
172
|
+
TablesRow(TABLE_NAME=name)
|
|
173
|
+
for name in self.tables.keys()
|
|
174
|
+
]
|
|
175
|
+
|
|
176
|
+
def get_tree_tables(self):
|
|
169
177
|
return {
|
|
170
178
|
name: table
|
|
171
179
|
for name, table in self.tables.items()
|
|
@@ -451,7 +451,8 @@ class ViewsTable(MdbTable):
|
|
|
451
451
|
|
|
452
452
|
class QueriesTable(MdbTable):
|
|
453
453
|
name = 'QUERIES'
|
|
454
|
-
columns = ["ID", "STARTED_AT", "FINISHED_AT", "PROCESSED_ROWS", "ERROR", "SQL", "
|
|
454
|
+
columns = ["ID", "STARTED_AT", "FINISHED_AT", "PROCESSED_ROWS", "ERROR", "SQL", "DATABASE",
|
|
455
|
+
"PARAMETERS", "CONTEXT", "UPDATED_AT"]
|
|
455
456
|
|
|
456
457
|
@classmethod
|
|
457
458
|
def get_data(cls, **kwargs):
|
|
@@ -311,20 +311,17 @@ class ColumnsTable(Table):
|
|
|
311
311
|
result = []
|
|
312
312
|
for db_name in databases:
|
|
313
313
|
tables = {}
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
314
|
+
|
|
315
|
+
dn = inf_schema.get(db_name)
|
|
316
|
+
if dn is None:
|
|
317
|
+
continue
|
|
318
|
+
|
|
319
|
+
if tables_names is None:
|
|
320
|
+
list_tables = [t.TABLE_NAME for t in dn.get_tables()]
|
|
319
321
|
else:
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
if tables_names is None:
|
|
325
|
-
tables_names = [t.TABLE_NAME for t in dn.get_tables()]
|
|
326
|
-
for table_name in tables_names:
|
|
327
|
-
tables[table_name] = dn.get_table_columns_df(table_name)
|
|
322
|
+
list_tables = tables_names
|
|
323
|
+
for table_name in list_tables:
|
|
324
|
+
tables[table_name] = dn.get_table_columns_df(table_name)
|
|
328
325
|
|
|
329
326
|
for table_name, table_columns_df in tables.items():
|
|
330
327
|
for _, row in table_columns_df.iterrows():
|
|
@@ -29,6 +29,9 @@ from mindsdb.utilities.config import config
|
|
|
29
29
|
|
|
30
30
|
default_project = config.get('default_project')
|
|
31
31
|
|
|
32
|
+
# This includes built-in MindsDB SQL functions and functions to be executed via DuckDB consistently.
|
|
33
|
+
MINDSDB_SQL_FUNCTIONS = {'llm', 'to_markdown', 'hash'}
|
|
34
|
+
|
|
32
35
|
|
|
33
36
|
class QueryPlanner:
|
|
34
37
|
|
|
@@ -237,7 +240,7 @@ class QueryPlanner:
|
|
|
237
240
|
|
|
238
241
|
def find_objects(node, is_table, **kwargs):
|
|
239
242
|
if isinstance(node, Function):
|
|
240
|
-
if node.namespace is not None or node.op.lower() in
|
|
243
|
+
if node.namespace is not None or node.op.lower() in MINDSDB_SQL_FUNCTIONS:
|
|
241
244
|
user_functions.append(node)
|
|
242
245
|
|
|
243
246
|
if is_table:
|
|
@@ -847,6 +850,7 @@ class QueryPlanner:
|
|
|
847
850
|
# handle fetchdataframe partitioning
|
|
848
851
|
steps_out = []
|
|
849
852
|
|
|
853
|
+
step = None
|
|
850
854
|
partition_step = None
|
|
851
855
|
for step in plan.steps:
|
|
852
856
|
if isinstance(step, FetchDataframeStep) and step.params is not None:
|
|
@@ -898,6 +902,10 @@ class QueryPlanner:
|
|
|
898
902
|
continue
|
|
899
903
|
|
|
900
904
|
steps_out.append(step)
|
|
905
|
+
|
|
906
|
+
if plan.is_resumable and isinstance(step, InsertToTable):
|
|
907
|
+
plan.is_async = True
|
|
908
|
+
|
|
901
909
|
plan.steps = steps_out
|
|
902
910
|
return plan
|
|
903
911
|
|
|
@@ -12,7 +12,9 @@ import inspect
|
|
|
12
12
|
from textwrap import dedent
|
|
13
13
|
from typing import Union, Dict
|
|
14
14
|
|
|
15
|
+
import pandas as pd
|
|
15
16
|
from mindsdb_sql_parser import parse_sql, ASTNode
|
|
17
|
+
|
|
16
18
|
from mindsdb.api.executor.planner.steps import (
|
|
17
19
|
ApplyTimeseriesPredictorStep,
|
|
18
20
|
ApplyPredictorRowStep,
|
|
@@ -47,9 +49,16 @@ class SQLQuery:
|
|
|
47
49
|
step_handlers = {}
|
|
48
50
|
|
|
49
51
|
def __init__(self, sql: Union[ASTNode, str], session, execute: bool = True,
|
|
50
|
-
database: str = None, query_id: int = None):
|
|
52
|
+
database: str = None, query_id: int = None, stop_event=None):
|
|
51
53
|
self.session = session
|
|
52
54
|
|
|
55
|
+
self.query_id = query_id
|
|
56
|
+
if self.query_id is not None:
|
|
57
|
+
# get sql and database from resumed query
|
|
58
|
+
run_query = query_context_controller.get_query(self.query_id)
|
|
59
|
+
sql = run_query.sql
|
|
60
|
+
database = run_query.database
|
|
61
|
+
|
|
53
62
|
if database is not None:
|
|
54
63
|
self.database = database
|
|
55
64
|
else:
|
|
@@ -69,12 +78,7 @@ class SQLQuery:
|
|
|
69
78
|
|
|
70
79
|
self.outer_query = None
|
|
71
80
|
self.run_query = None
|
|
72
|
-
self.
|
|
73
|
-
if query_id is not None:
|
|
74
|
-
# resume query
|
|
75
|
-
run_query = query_context_controller.get_query(self.query_id)
|
|
76
|
-
run_query.clear_error()
|
|
77
|
-
sql = run_query.sql
|
|
81
|
+
self.stop_event = stop_event
|
|
78
82
|
|
|
79
83
|
if isinstance(sql, str):
|
|
80
84
|
self.query = parse_sql(sql)
|
|
@@ -240,7 +244,19 @@ class SQLQuery:
|
|
|
240
244
|
if self.query_id is not None:
|
|
241
245
|
self.run_query = query_context_controller.get_query(self.query_id)
|
|
242
246
|
else:
|
|
243
|
-
self.run_query = query_context_controller.create_query(self.context['query_str'])
|
|
247
|
+
self.run_query = query_context_controller.create_query(self.context['query_str'], database=self.database)
|
|
248
|
+
|
|
249
|
+
if self.planner.plan.is_async and ctx.task_id is None:
|
|
250
|
+
# add to task
|
|
251
|
+
self.run_query.add_to_task()
|
|
252
|
+
# return query info
|
|
253
|
+
# columns in upper case
|
|
254
|
+
rec = {k.upper(): v for k, v in self.run_query.get_info().items()}
|
|
255
|
+
self.fetched_data = ResultSet().from_df(pd.DataFrame([rec]))
|
|
256
|
+
self.columns_list = self.fetched_data.columns
|
|
257
|
+
return
|
|
258
|
+
self.run_query.mark_as_run()
|
|
259
|
+
|
|
244
260
|
ctx.run_query_id = self.run_query.record.id
|
|
245
261
|
|
|
246
262
|
step_result = None
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import datetime as dt
|
|
2
2
|
import re
|
|
3
3
|
|
|
4
|
-
import dateinfer
|
|
5
4
|
import pandas as pd
|
|
6
5
|
|
|
7
6
|
from mindsdb_sql_parser.ast import (
|
|
@@ -262,7 +261,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
262
261
|
return predictor_data
|
|
263
262
|
|
|
264
263
|
def get_date_format(samples):
|
|
265
|
-
#
|
|
264
|
+
# Try common formats first with explicit patterns
|
|
266
265
|
for date_format, pattern in (
|
|
267
266
|
('%Y-%m-%d', r'[\d]{4}-[\d]{2}-[\d]{2}'),
|
|
268
267
|
('%Y-%m-%d %H:%M:%S', r'[\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2}'),
|
|
@@ -280,7 +279,26 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
|
|
|
280
279
|
if date_format is not None:
|
|
281
280
|
return date_format
|
|
282
281
|
|
|
283
|
-
|
|
282
|
+
# Use dateparser as fallback and infer format
|
|
283
|
+
try:
|
|
284
|
+
# Parse the first sample to get its format
|
|
285
|
+
# The import is heavy, so we do it here on-demand
|
|
286
|
+
import dateparser
|
|
287
|
+
parsed_date = dateparser.parse(samples[0])
|
|
288
|
+
if parsed_date is None:
|
|
289
|
+
raise ValueError("Could not parse date")
|
|
290
|
+
|
|
291
|
+
# Verify the format works for all samples
|
|
292
|
+
for sample in samples[1:]:
|
|
293
|
+
if dateparser.parse(sample) is None:
|
|
294
|
+
raise ValueError("Inconsistent date formats in samples")
|
|
295
|
+
# Convert to strftime format based on the input
|
|
296
|
+
if re.search(r'\d{2}:\d{2}:\d{2}', samples[0]):
|
|
297
|
+
return '%Y-%m-%d %H:%M:%S'
|
|
298
|
+
return '%Y-%m-%d'
|
|
299
|
+
except (ValueError, AttributeError):
|
|
300
|
+
# If dateparser fails, return a basic format as last resort
|
|
301
|
+
return '%Y-%m-%d'
|
|
284
302
|
|
|
285
303
|
model_types = predictor_metadata['model_types']
|
|
286
304
|
if model_types.get(order_col) in ('float', 'integer'):
|
|
@@ -222,7 +222,9 @@ class FetchDataframePartitionCall(BaseStepCall):
|
|
|
222
222
|
else:
|
|
223
223
|
executor.shutdown()
|
|
224
224
|
raise e
|
|
225
|
-
|
|
225
|
+
if self.sql_query.stop_event is not None and self.sql_query.stop_event.is_set():
|
|
226
|
+
executor.shutdown()
|
|
227
|
+
raise RuntimeError('Query is interrupted')
|
|
226
228
|
# TODO
|
|
227
229
|
# 1. get next batch without updating track_value:
|
|
228
230
|
# it allows to keep queue_in filled with data between fetching batches
|
mindsdb/api/http/initialize.py
CHANGED
|
@@ -50,12 +50,29 @@ from mindsdb.utilities.json_encoder import CustomJSONProvider
|
|
|
50
50
|
from mindsdb.utilities.ps import is_pid_listen_port, wait_func_is_true
|
|
51
51
|
from mindsdb.utilities.sentry import sentry_sdk # noqa: F401
|
|
52
52
|
from mindsdb.utilities.otel import trace # noqa: F401
|
|
53
|
-
from opentelemetry.instrumentation.flask import FlaskInstrumentor # noqa: F401
|
|
54
|
-
from opentelemetry.instrumentation.requests import RequestsInstrumentor # noqa: F401
|
|
55
53
|
|
|
56
54
|
logger = log.getLogger(__name__)
|
|
57
55
|
|
|
58
56
|
|
|
57
|
+
class _NoOpFlaskInstrumentor:
|
|
58
|
+
def instrument_app(self, app):
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class _NoOpRequestsInstrumentor:
|
|
63
|
+
def instrument(self):
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
from opentelemetry.instrumentation.flask import FlaskInstrumentor
|
|
69
|
+
from opentelemetry.instrumentation.requests import RequestsInstrumentor
|
|
70
|
+
except ImportError:
|
|
71
|
+
logger.debug("OpenTelemetry is not avaiable. Please run `pip install -r requirements/requirements-opentelemetry.txt` to use it.")
|
|
72
|
+
FlaskInstrumentor = _NoOpFlaskInstrumentor
|
|
73
|
+
RequestsInstrumentor = _NoOpRequestsInstrumentor
|
|
74
|
+
|
|
75
|
+
|
|
59
76
|
class Swagger_Api(Api):
|
|
60
77
|
"""
|
|
61
78
|
This is a modification of the base Flask Restplus Api class due to the issue described here
|
|
@@ -376,7 +393,7 @@ def initialize_flask(config, init_static_thread, no_studio):
|
|
|
376
393
|
app = Flask(__name__, **kwargs)
|
|
377
394
|
init_metrics(app)
|
|
378
395
|
|
|
379
|
-
# Instrument Flask app
|
|
396
|
+
# Instrument Flask app and requests using either real or no-op instrumentors
|
|
380
397
|
FlaskInstrumentor().instrument_app(app)
|
|
381
398
|
RequestsInstrumentor().instrument()
|
|
382
399
|
|
|
@@ -79,7 +79,14 @@ class QueryAnalysis(Resource):
|
|
|
79
79
|
|
|
80
80
|
column_names = [x["name"] for x in result.columns]
|
|
81
81
|
df = DataFrame(result.data, columns=column_names)
|
|
82
|
-
|
|
82
|
+
try:
|
|
83
|
+
analysis = analyze_df(df)
|
|
84
|
+
except ImportError:
|
|
85
|
+
return {
|
|
86
|
+
'analysis': {},
|
|
87
|
+
'timestamp': time.time(),
|
|
88
|
+
'error': 'To use this feature, please install the "dataprep_ml" package.'
|
|
89
|
+
}
|
|
83
90
|
|
|
84
91
|
query_tables = [
|
|
85
92
|
table.to_string() for table in get_query_tables(ast)
|
|
@@ -107,6 +114,12 @@ class DataAnalysis(Resource):
|
|
|
107
114
|
try:
|
|
108
115
|
analysis = analyze_df(DataFrame(data, columns=column_names))
|
|
109
116
|
return {"analysis": analysis, "timestamp": time.time()}
|
|
117
|
+
except ImportError:
|
|
118
|
+
return {
|
|
119
|
+
'analysis': {},
|
|
120
|
+
'timestamp': timestamp,
|
|
121
|
+
'error': 'To use this feature, please install the "dataprep_ml" package.'
|
|
122
|
+
}
|
|
110
123
|
except Exception as e:
|
|
111
124
|
# Don't want analysis exceptions to show up on UI.
|
|
112
125
|
# TODO: Fix analysis so it doesn't throw exceptions at all.
|