MindsDB 25.4.4.0__py3-none-any.whl → 25.5.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (86) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +107 -125
  3. mindsdb/api/executor/command_executor.py +14 -3
  4. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +8 -0
  5. mindsdb/api/executor/datahub/datanodes/mindsdb_tables.py +2 -1
  6. mindsdb/api/executor/datahub/datanodes/system_tables.py +10 -13
  7. mindsdb/api/executor/planner/query_plan.py +1 -0
  8. mindsdb/api/executor/planner/query_planner.py +9 -1
  9. mindsdb/api/executor/sql_query/sql_query.py +24 -8
  10. mindsdb/api/executor/sql_query/steps/apply_predictor_step.py +21 -3
  11. mindsdb/api/executor/sql_query/steps/fetch_dataframe_partition.py +3 -1
  12. mindsdb/api/http/initialize.py +20 -3
  13. mindsdb/api/http/namespaces/analysis.py +14 -1
  14. mindsdb/api/http/namespaces/config.py +19 -11
  15. mindsdb/api/http/namespaces/tree.py +1 -1
  16. mindsdb/api/http/start.py +7 -2
  17. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +4 -8
  18. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -4
  19. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +2 -2
  20. mindsdb/integrations/handlers/bigquery_handler/requirements.txt +1 -0
  21. mindsdb/integrations/handlers/chromadb_handler/requirements.txt +1 -0
  22. mindsdb/integrations/handlers/gmail_handler/requirements.txt +1 -0
  23. mindsdb/integrations/handlers/google_analytics_handler/requirements.txt +2 -1
  24. mindsdb/integrations/handlers/google_books_handler/requirements.txt +1 -1
  25. mindsdb/integrations/handlers/google_calendar_handler/requirements.txt +1 -0
  26. mindsdb/integrations/handlers/google_content_shopping_handler/requirements.txt +1 -1
  27. mindsdb/integrations/handlers/google_fit_handler/requirements.txt +2 -0
  28. mindsdb/integrations/handlers/google_search_handler/requirements.txt +1 -1
  29. mindsdb/integrations/handlers/jira_handler/jira_handler.archived.py +75 -0
  30. mindsdb/integrations/handlers/jira_handler/jira_handler.py +113 -38
  31. mindsdb/integrations/handlers/jira_handler/jira_tables.py +229 -0
  32. mindsdb/integrations/handlers/jira_handler/requirements.txt +1 -0
  33. mindsdb/integrations/handlers/lightfm_handler/requirements.txt +1 -0
  34. mindsdb/integrations/handlers/lightwood_handler/lightwood_handler.py +0 -2
  35. mindsdb/integrations/handlers/lightwood_handler/requirements.txt +4 -4
  36. mindsdb/integrations/handlers/lindorm_handler/requirements.txt +1 -0
  37. mindsdb/integrations/handlers/ms_one_drive_handler/requirements.txt +2 -0
  38. mindsdb/integrations/handlers/ms_teams_handler/requirements.txt +3 -1
  39. mindsdb/integrations/handlers/openai_handler/helpers.py +3 -5
  40. mindsdb/integrations/handlers/openai_handler/openai_handler.py +25 -12
  41. mindsdb/integrations/handlers/snowflake_handler/requirements.txt +1 -1
  42. mindsdb/integrations/handlers/togetherai_handler/__about__.py +9 -0
  43. mindsdb/integrations/handlers/togetherai_handler/__init__.py +20 -0
  44. mindsdb/integrations/handlers/togetherai_handler/creation_args.py +14 -0
  45. mindsdb/integrations/handlers/togetherai_handler/icon.svg +15 -0
  46. mindsdb/integrations/handlers/togetherai_handler/model_using_args.py +5 -0
  47. mindsdb/integrations/handlers/togetherai_handler/requirements.txt +2 -0
  48. mindsdb/integrations/handlers/togetherai_handler/settings.py +33 -0
  49. mindsdb/integrations/handlers/togetherai_handler/togetherai_handler.py +234 -0
  50. mindsdb/integrations/handlers/vertex_handler/requirements.txt +1 -0
  51. mindsdb/integrations/handlers/youtube_handler/requirements.txt +1 -0
  52. mindsdb/integrations/utilities/files/file_reader.py +5 -2
  53. mindsdb/integrations/utilities/handler_utils.py +4 -0
  54. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +360 -0
  55. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +6 -346
  56. mindsdb/interfaces/agents/constants.py +14 -2
  57. mindsdb/interfaces/agents/langchain_agent.py +2 -4
  58. mindsdb/interfaces/database/projects.py +1 -7
  59. mindsdb/interfaces/functions/controller.py +14 -16
  60. mindsdb/interfaces/functions/to_markdown.py +9 -124
  61. mindsdb/interfaces/knowledge_base/controller.py +109 -92
  62. mindsdb/interfaces/knowledge_base/preprocessing/document_preprocessor.py +28 -5
  63. mindsdb/interfaces/knowledge_base/utils.py +10 -15
  64. mindsdb/interfaces/model/model_controller.py +0 -2
  65. mindsdb/interfaces/query_context/context_controller.py +55 -15
  66. mindsdb/interfaces/query_context/query_task.py +19 -0
  67. mindsdb/interfaces/skills/sql_agent.py +33 -11
  68. mindsdb/interfaces/storage/db.py +2 -2
  69. mindsdb/interfaces/tasks/task_monitor.py +5 -1
  70. mindsdb/interfaces/tasks/task_thread.py +6 -0
  71. mindsdb/migrations/migrate.py +0 -2
  72. mindsdb/migrations/versions/2025-04-22_53502b6d63bf_query_database.py +27 -0
  73. mindsdb/utilities/config.py +15 -3
  74. mindsdb/utilities/context.py +2 -1
  75. mindsdb/utilities/functions.py +0 -36
  76. mindsdb/utilities/langfuse.py +19 -10
  77. mindsdb/utilities/otel/__init__.py +9 -193
  78. mindsdb/utilities/otel/metric_handlers/__init__.py +5 -1
  79. mindsdb/utilities/otel/prepare.py +198 -0
  80. mindsdb/utilities/sql.py +83 -0
  81. {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/METADATA +662 -592
  82. {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/RECORD +85 -69
  83. {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/WHEEL +1 -1
  84. mindsdb/api/mysql/mysql_proxy/classes/sql_statement_parser.py +0 -151
  85. {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/licenses/LICENSE +0 -0
  86. {mindsdb-25.4.4.0.dist-info → mindsdb-25.5.3.0.dist-info}/top_level.txt +0 -0
mindsdb/__about__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  __title__ = 'MindsDB'
2
2
  __package_name__ = 'mindsdb'
3
- __version__ = '25.4.4.0'
3
+ __version__ = '25.5.3.0'
4
4
  __description__ = "MindsDB's AI SQL Server enables developers to build AI tools that need access to real-time data to perform their tasks"
5
5
  __email__ = "jorge@mindsdb.com"
6
6
  __author__ = 'MindsDB Inc'
mindsdb/__main__.py CHANGED
@@ -1,3 +1,5 @@
1
+ import gc
2
+ gc.disable()
1
3
  import os
2
4
  import sys
3
5
  import time
@@ -12,7 +14,7 @@ from enum import Enum
12
14
  from dataclasses import dataclass, field
13
15
  from typing import Callable, Optional, Tuple, List
14
16
 
15
- from packaging import version
17
+ from sqlalchemy import func
16
18
  from sqlalchemy.orm.attributes import flag_modified
17
19
 
18
20
  from mindsdb.utilities import log
@@ -22,17 +24,12 @@ logger.debug("Starting MindsDB...")
22
24
 
23
25
  from mindsdb.__about__ import __version__ as mindsdb_version
24
26
  from mindsdb.utilities.config import config
25
- from mindsdb.utilities.exception import EntityNotExistsError
26
27
  from mindsdb.utilities.starters import (
27
- start_http, start_mysql, start_mongo, start_postgres, start_ml_task_queue, start_scheduler, start_tasks,
28
- start_mcp, start_litellm
28
+ start_http, start_mysql, start_mongo, start_postgres, start_ml_task_queue,
29
+ start_scheduler, start_tasks, start_mcp, start_litellm
29
30
  )
30
31
  from mindsdb.utilities.ps import is_pid_listen_port, get_child_pids
31
- from mindsdb.utilities.functions import get_versions_where_predictors_become_obsolete
32
- from mindsdb.interfaces.database.integrations import integration_controller
33
- from mindsdb.interfaces.database.projects import ProjectController
34
32
  import mindsdb.interfaces.storage.db as db
35
- from mindsdb.integrations.utilities.install import install_dependencies
36
33
  from mindsdb.utilities.fs import clean_process_marks, clean_unlinked_process_marks
37
34
  from mindsdb.utilities.context import context as ctx
38
35
  from mindsdb.utilities.auth import register_oauth_client, get_aws_meta_data
@@ -47,6 +44,8 @@ try:
47
44
  except RuntimeError:
48
45
  logger.info('Torch multiprocessing context already set, ignoring...')
49
46
 
47
+ gc.enable()
48
+
50
49
  _stop_event = threading.Event()
51
50
 
52
51
 
@@ -213,7 +212,96 @@ def do_clean_process_marks():
213
212
  set_error_model_status_by_pids(unexisting_pids)
214
213
 
215
214
 
215
+ def create_permanent_integrations():
216
+ """
217
+ Create permanent integrations, for now only the 'files' integration.
218
+ NOTE: this is intentional to avoid importing integration_controller
219
+ """
220
+ integration_name = 'files'
221
+ existing = db.session.query(db.Integration).filter_by(name=integration_name, company_id=None).first()
222
+ if existing is None:
223
+ integration_record = db.Integration(
224
+ name=integration_name,
225
+ data={},
226
+ engine=integration_name,
227
+ company_id=None,
228
+ )
229
+ db.session.add(integration_record)
230
+ try:
231
+ db.session.commit()
232
+ except Exception as e:
233
+ logger.error(f"Failed to commit permanent integration {integration_name}: {e}")
234
+ db.session.rollback()
235
+
236
+
237
+ def validate_default_project() -> None:
238
+ """Handle 'default_project' config option.
239
+ Project with the name specified in 'default_project' must exists and be marked with
240
+ 'is_default' metadata. If it is not possible, then terminate the process with error.
241
+ Note: this can be done using 'project_controller', but we want to save init time and used RAM.
242
+ """
243
+ new_default_project_name = config.get('default_project')
244
+ logger.debug(f"Checking if default project {new_default_project_name} exists")
245
+ filter_company_id = ctx.company_id if ctx.company_id is not None else 0
246
+
247
+ current_default_project: db.Project | None = (
248
+ db.Project.query.filter(
249
+ db.Project.company_id == filter_company_id,
250
+ db.Project.metadata_['is_default'].as_boolean() == True # noqa
251
+ ).first()
252
+ )
253
+
254
+ if current_default_project is None:
255
+ # Legacy: If the default project does not exist, mark the new one as default.
256
+ existing_project = db.Project.query.filter(
257
+ db.Project.company_id == filter_company_id,
258
+ func.lower(db.Project.name) == func.lower(new_default_project_name)
259
+ ).first()
260
+ if existing_project is None:
261
+ logger.critical(f"A project with the name '{new_default_project_name}' does not exist")
262
+ sys.exit(1)
263
+
264
+ existing_project.metadata_ = {'is_default': True}
265
+ flag_modified(existing_project, 'metadata_')
266
+ db.session.commit()
267
+ elif current_default_project.name != new_default_project_name:
268
+ # If the default project exists, but the name is different, update the name.
269
+ existing_project = db.Project.query.filter(
270
+ db.Project.company_id == filter_company_id,
271
+ func.lower(db.Project.name) == func.lower(new_default_project_name)
272
+ ).first()
273
+ if existing_project is not None:
274
+ logger.critical(f"A project with the name '{new_default_project_name}' already exists")
275
+ sys.exit(1)
276
+ current_default_project.name = new_default_project_name
277
+ db.session.commit()
278
+
279
+
280
+ def start_process(trunc_process_data: TrunkProcessData) -> None:
281
+ """Start a process.
282
+
283
+ Args:
284
+ trunc_process_data (TrunkProcessData): The data of the process to start.
285
+ """
286
+ mp_ctx = mp.get_context("spawn")
287
+ logger.info(f"{trunc_process_data.name} API: starting...")
288
+ try:
289
+ trunc_process_data.process = mp_ctx.Process(
290
+ target=trunc_process_data.entrypoint,
291
+ args=trunc_process_data.args,
292
+ name=trunc_process_data.name
293
+ )
294
+ trunc_process_data.process.start()
295
+ except Exception as e:
296
+ logger.error(
297
+ f"Failed to start {trunc_process_data.name} API with exception {e}\n{traceback.format_exc()}"
298
+ )
299
+ close_api_gracefully(trunc_processes_struct)
300
+ raise e
301
+
302
+
216
303
  if __name__ == '__main__':
304
+ mp.freeze_support()
217
305
  # warn if less than 1Gb of free RAM
218
306
  if psutil.virtual_memory().available < (1 << 30):
219
307
  logger.warning(
@@ -270,7 +358,6 @@ if __name__ == '__main__':
270
358
  pass
271
359
 
272
360
  db.init()
273
- mp.freeze_support()
274
361
 
275
362
  environment = config["environment"]
276
363
  if environment == "aws_marketplace":
@@ -287,52 +374,6 @@ if __name__ == '__main__':
287
374
  except Exception:
288
375
  pass
289
376
 
290
- is_cloud = config.is_cloud
291
-
292
- if not is_cloud:
293
- logger.debug("Applying database migrations")
294
- try:
295
- from mindsdb.migrations import migrate
296
- migrate.migrate_to_head()
297
- except Exception as e:
298
- logger.error(f"Error! Something went wrong during DB migrations: {e}")
299
-
300
- logger.debug(f"Checking if default project {config.get('default_project')} exists")
301
- project_controller = ProjectController()
302
-
303
- try:
304
- current_default_project = project_controller.get(is_default=True)
305
- except EntityNotExistsError:
306
- # In previous versions, the default project could be deleted. This is no longer possible.
307
- current_default_project = None
308
-
309
- if current_default_project:
310
- if current_default_project.record.name != config.get('default_project'):
311
- try:
312
- project_controller.get(name=config.get('default_project'))
313
- log.critical(f"A project with the name '{config.get('default_project')}' already exists")
314
- sys.exit(1)
315
- except EntityNotExistsError:
316
- pass
317
- project_controller.update(current_default_project.record.id, new_name=config.get('default_project'))
318
-
319
- # Legacy: If the default project does not exist, mark the new one as default.
320
- else:
321
- try:
322
- project_controller.get(name=config.get('default_project'))
323
- except EntityNotExistsError:
324
- log.critical(
325
- f"A project with the name '{config.get('default_project')}' does not exist"
326
- )
327
- raise
328
-
329
- project_controller.update(
330
- name=config.get('default_project'),
331
- new_metadata={
332
- "is_default": True
333
- }
334
- )
335
-
336
377
  apis = os.getenv('MINDSDB_APIS') or config.cmd_args.api
337
378
 
338
379
  if apis is None: # If "--api" option is not specified, start the default APIs
@@ -342,27 +383,6 @@ if __name__ == '__main__':
342
383
  else: # The user has provided a list of APIs to start
343
384
  api_arr = [TrunkProcessEnum(name) for name in apis.split(',')]
344
385
 
345
- if config.cmd_args.install_handlers is not None:
346
- handlers_list = [s.strip() for s in config.cmd_args.install_handlers.split(",")]
347
- # import_meta = handler_meta.get('import', {})
348
- for handler_name, handler_meta in integration_controller.get_handlers_import_status().items():
349
- if handler_name not in handlers_list:
350
- continue
351
- import_meta = handler_meta.get("import", {})
352
- if import_meta.get("success") is True:
353
- logger.info(f"{'{0: <18}'.format(handler_name)} - already installed")
354
- continue
355
- result = install_dependencies(import_meta.get("dependencies", []))
356
- if result.get("success") is True:
357
- logger.info(
358
- f"{'{0: <18}'.format(handler_name)} - successfully installed"
359
- )
360
- else:
361
- logger.info(
362
- f"{'{0: <18}'.format(handler_name)} - error during dependencies installation: {result.get('error_message', 'unknown error')}"
363
- )
364
- sys.exit(0)
365
-
366
386
  logger.info(f"Version: {mindsdb_version}")
367
387
  logger.info(f"Configuration file: {config.config_path or 'absent'}")
368
388
  logger.info(f"Storage path: {config.paths['root']}")
@@ -370,42 +390,22 @@ if __name__ == '__main__':
370
390
  logger.debug(f"System config: {config.auto_config}")
371
391
  logger.debug(f"Env config: {config.env_config}")
372
392
 
393
+ is_cloud = config.is_cloud
373
394
  unexisting_pids = clean_unlinked_process_marks()
374
395
  if not is_cloud:
396
+ logger.debug("Applying database migrations")
397
+ try:
398
+ from mindsdb.migrations import migrate
399
+ migrate.migrate_to_head()
400
+ except Exception as e:
401
+ logger.error(f"Error! Something went wrong during DB migrations: {e}")
402
+
403
+ validate_default_project()
404
+
375
405
  if len(unexisting_pids) > 0:
376
406
  set_error_model_status_by_pids(unexisting_pids)
377
407
  set_error_model_status_for_unfinished()
378
-
379
- integration_controller.create_permanent_integrations()
380
-
381
- # region Mark old predictors as outdated
382
- is_modified = False
383
- predictor_records = (
384
- db.session.query(db.Predictor)
385
- .filter(db.Predictor.deleted_at.is_(None))
386
- .all()
387
- )
388
- if len(predictor_records) > 0:
389
- (
390
- sucess,
391
- compatible_versions,
392
- ) = get_versions_where_predictors_become_obsolete()
393
- if sucess is True:
394
- compatible_versions = [version.parse(x) for x in compatible_versions]
395
- mindsdb_version_parsed = version.parse(mindsdb_version)
396
- compatible_versions = [x for x in compatible_versions if x <= mindsdb_version_parsed]
397
- if len(compatible_versions) > 0:
398
- last_compatible_version = compatible_versions[-1]
399
- for predictor_record in predictor_records:
400
- if (
401
- isinstance(predictor_record.mindsdb_version, str)
402
- and version.parse(predictor_record.mindsdb_version) < last_compatible_version
403
- ):
404
- predictor_record.update_status = "available"
405
- is_modified = True
406
- if is_modified is True:
407
- db.session.commit()
408
- # endregion
408
+ create_permanent_integrations()
409
409
 
410
410
  clean_process_marks()
411
411
 
@@ -503,24 +503,6 @@ if __name__ == '__main__':
503
503
  if config.cmd_args.ml_task_queue_consumer is True:
504
504
  trunc_processes_struct[TrunkProcessEnum.ML_TASK_QUEUE].need_to_run = True
505
505
 
506
- def start_process(trunc_process_data):
507
- # TODO this 'ctx' is eclipsing 'context' class imported as 'ctx'
508
- ctx = mp.get_context("spawn")
509
- logger.info(f"{trunc_process_data.name} API: starting...")
510
- try:
511
- trunc_process_data.process = ctx.Process(
512
- target=trunc_process_data.entrypoint,
513
- args=trunc_process_data.args,
514
- name=trunc_process_data.name
515
- )
516
- trunc_process_data.process.start()
517
- except Exception as e:
518
- logger.error(
519
- f"Failed to start {trunc_process_data.name} API with exception {e}\n{traceback.format_exc()}"
520
- )
521
- close_api_gracefully(trunc_processes_struct)
522
- raise e
523
-
524
506
  for trunc_process_data in trunc_processes_struct.values():
525
507
  if trunc_process_data.started is True or trunc_process_data.need_to_run is False:
526
508
  continue
@@ -5,7 +5,6 @@ from typing import Optional
5
5
  from functools import reduce
6
6
 
7
7
  import pandas as pd
8
- from mindsdb_evaluator.accuracy.general import evaluate_accuracy
9
8
  from mindsdb_sql_parser import parse_sql
10
9
  from mindsdb_sql_parser.ast import (
11
10
  Alter,
@@ -585,6 +584,8 @@ class ExecuteCommands:
585
584
  )
586
585
  elif statement_type is Insert:
587
586
  query = SQLQuery(statement, session=self.session, database=database_name)
587
+ if query.fetched_data.length() > 0:
588
+ return self.answer_select(query)
588
589
  return ExecuteAnswer(
589
590
  affected_rows=query.fetched_data.affected_rows
590
591
  )
@@ -670,7 +671,7 @@ class ExecuteCommands:
670
671
  command = target.op.lower()
671
672
  args = [arg.value for arg in target.args if isinstance(arg, Constant)]
672
673
  if command == 'query_resume':
673
- ret = SQLQuery(None, session=self.session, database=database_name, query_id=args[0])
674
+ ret = SQLQuery(None, session=self.session, query_id=args[0])
674
675
  return self.answer_select(ret)
675
676
 
676
677
  elif command == 'query_cancel':
@@ -812,6 +813,8 @@ class ExecuteCommands:
812
813
  return ExecuteAnswer()
813
814
 
814
815
  def answer_evaluate_metric(self, statement, database_name):
816
+ # heavy import, so we do it here on-demand
817
+ from mindsdb_evaluator.accuracy.general import evaluate_accuracy
815
818
  try:
816
819
  sqlquery = SQLQuery(statement.data, session=self.session, database=database_name)
817
820
  except Exception as e:
@@ -868,13 +871,21 @@ class ExecuteCommands:
868
871
  else:
869
872
  raise WrongArgumentError(f'Unknown describe type: {obj_type}')
870
873
 
871
- name = obj_name.parts[-1]
874
+ parts = obj_name.parts
875
+ if len(parts) > 2:
876
+ raise WrongArgumentError(
877
+ f"Invalid object name: {obj_name.to_string()}.\n"
878
+ "Only models support three-part namespaces."
879
+ )
880
+
881
+ name = parts[-1]
872
882
  where = BinaryOperation(op='=', args=[
873
883
  Identifier('name'),
874
884
  Constant(name)
875
885
  ])
876
886
 
877
887
  if obj_type in project_objects:
888
+ database_name = parts[0] if len(parts) > 1 else database_name
878
889
  where = BinaryOperation(op='and', args=[
879
890
  where,
880
891
  BinaryOperation(op='=', args=[Identifier('project'), Constant(database_name)])
@@ -22,6 +22,8 @@ from .mindsdb_tables import (
22
22
  ModelsTable, DatabasesTable, MLEnginesTable, HandlersTable, JobsTable, QueriesTable,
23
23
  ChatbotsTable, KBTable, SkillsTable, AgentsTable, ViewsTable, TriggersTable)
24
24
 
25
+ from mindsdb.api.executor.datahub.classes.tables_row import TablesRow
26
+
25
27
 
26
28
  logger = log.getLogger(__name__)
27
29
 
@@ -166,6 +168,12 @@ class InformationSchemaDataNode(DataNode):
166
168
  return [x.lower() for x in projects]
167
169
 
168
170
  def get_tables(self):
171
+ return [
172
+ TablesRow(TABLE_NAME=name)
173
+ for name in self.tables.keys()
174
+ ]
175
+
176
+ def get_tree_tables(self):
169
177
  return {
170
178
  name: table
171
179
  for name, table in self.tables.items()
@@ -451,7 +451,8 @@ class ViewsTable(MdbTable):
451
451
 
452
452
  class QueriesTable(MdbTable):
453
453
  name = 'QUERIES'
454
- columns = ["ID", "STARTED_AT", "FINISHED_AT", "PROCESSED_ROWS", "ERROR", "SQL", "PARAMETERS", "CONTEXT", "UPDATED_AT"]
454
+ columns = ["ID", "STARTED_AT", "FINISHED_AT", "PROCESSED_ROWS", "ERROR", "SQL", "DATABASE",
455
+ "PARAMETERS", "CONTEXT", "UPDATED_AT"]
455
456
 
456
457
  @classmethod
457
458
  def get_data(cls, **kwargs):
@@ -311,20 +311,17 @@ class ColumnsTable(Table):
311
311
  result = []
312
312
  for db_name in databases:
313
313
  tables = {}
314
- if db_name == 'information_schema':
315
- for table_name, table in inf_schema.tables.items():
316
- tables[table_name] = [
317
- {'name': name} for name in table.columns
318
- ]
314
+
315
+ dn = inf_schema.get(db_name)
316
+ if dn is None:
317
+ continue
318
+
319
+ if tables_names is None:
320
+ list_tables = [t.TABLE_NAME for t in dn.get_tables()]
319
321
  else:
320
- dn = inf_schema.get(db_name)
321
- if dn is None:
322
- continue
323
-
324
- if tables_names is None:
325
- tables_names = [t.TABLE_NAME for t in dn.get_tables()]
326
- for table_name in tables_names:
327
- tables[table_name] = dn.get_table_columns_df(table_name)
322
+ list_tables = tables_names
323
+ for table_name in list_tables:
324
+ tables[table_name] = dn.get_table_columns_df(table_name)
328
325
 
329
326
  for table_name, table_columns_df in tables.items():
330
327
  for _, row in table_columns_df.iterrows():
@@ -3,6 +3,7 @@ class QueryPlan:
3
3
  def __init__(self, steps=None, **kwargs):
4
4
  self.steps = []
5
5
  self.is_resumable = False
6
+ self.is_async = False
6
7
 
7
8
  if steps:
8
9
  for step in steps:
@@ -29,6 +29,9 @@ from mindsdb.utilities.config import config
29
29
 
30
30
  default_project = config.get('default_project')
31
31
 
32
+ # This includes built-in MindsDB SQL functions and functions to be executed via DuckDB consistently.
33
+ MINDSDB_SQL_FUNCTIONS = {'llm', 'to_markdown', 'hash'}
34
+
32
35
 
33
36
  class QueryPlanner:
34
37
 
@@ -237,7 +240,7 @@ class QueryPlanner:
237
240
 
238
241
  def find_objects(node, is_table, **kwargs):
239
242
  if isinstance(node, Function):
240
- if node.namespace is not None or node.op.lower() in ('llm',):
243
+ if node.namespace is not None or node.op.lower() in MINDSDB_SQL_FUNCTIONS:
241
244
  user_functions.append(node)
242
245
 
243
246
  if is_table:
@@ -847,6 +850,7 @@ class QueryPlanner:
847
850
  # handle fetchdataframe partitioning
848
851
  steps_out = []
849
852
 
853
+ step = None
850
854
  partition_step = None
851
855
  for step in plan.steps:
852
856
  if isinstance(step, FetchDataframeStep) and step.params is not None:
@@ -898,6 +902,10 @@ class QueryPlanner:
898
902
  continue
899
903
 
900
904
  steps_out.append(step)
905
+
906
+ if plan.is_resumable and isinstance(step, InsertToTable):
907
+ plan.is_async = True
908
+
901
909
  plan.steps = steps_out
902
910
  return plan
903
911
 
@@ -12,7 +12,9 @@ import inspect
12
12
  from textwrap import dedent
13
13
  from typing import Union, Dict
14
14
 
15
+ import pandas as pd
15
16
  from mindsdb_sql_parser import parse_sql, ASTNode
17
+
16
18
  from mindsdb.api.executor.planner.steps import (
17
19
  ApplyTimeseriesPredictorStep,
18
20
  ApplyPredictorRowStep,
@@ -47,9 +49,16 @@ class SQLQuery:
47
49
  step_handlers = {}
48
50
 
49
51
  def __init__(self, sql: Union[ASTNode, str], session, execute: bool = True,
50
- database: str = None, query_id: int = None):
52
+ database: str = None, query_id: int = None, stop_event=None):
51
53
  self.session = session
52
54
 
55
+ self.query_id = query_id
56
+ if self.query_id is not None:
57
+ # get sql and database from resumed query
58
+ run_query = query_context_controller.get_query(self.query_id)
59
+ sql = run_query.sql
60
+ database = run_query.database
61
+
53
62
  if database is not None:
54
63
  self.database = database
55
64
  else:
@@ -69,12 +78,7 @@ class SQLQuery:
69
78
 
70
79
  self.outer_query = None
71
80
  self.run_query = None
72
- self.query_id = query_id
73
- if query_id is not None:
74
- # resume query
75
- run_query = query_context_controller.get_query(self.query_id)
76
- run_query.clear_error()
77
- sql = run_query.sql
81
+ self.stop_event = stop_event
78
82
 
79
83
  if isinstance(sql, str):
80
84
  self.query = parse_sql(sql)
@@ -240,7 +244,19 @@ class SQLQuery:
240
244
  if self.query_id is not None:
241
245
  self.run_query = query_context_controller.get_query(self.query_id)
242
246
  else:
243
- self.run_query = query_context_controller.create_query(self.context['query_str'])
247
+ self.run_query = query_context_controller.create_query(self.context['query_str'], database=self.database)
248
+
249
+ if self.planner.plan.is_async and ctx.task_id is None:
250
+ # add to task
251
+ self.run_query.add_to_task()
252
+ # return query info
253
+ # columns in upper case
254
+ rec = {k.upper(): v for k, v in self.run_query.get_info().items()}
255
+ self.fetched_data = ResultSet().from_df(pd.DataFrame([rec]))
256
+ self.columns_list = self.fetched_data.columns
257
+ return
258
+ self.run_query.mark_as_run()
259
+
244
260
  ctx.run_query_id = self.run_query.record.id
245
261
 
246
262
  step_result = None
@@ -1,7 +1,6 @@
1
1
  import datetime as dt
2
2
  import re
3
3
 
4
- import dateinfer
5
4
  import pandas as pd
6
5
 
7
6
  from mindsdb_sql_parser.ast import (
@@ -262,7 +261,7 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
262
261
  return predictor_data
263
262
 
264
263
  def get_date_format(samples):
265
- # dateinfer reads sql date 2020-04-01 as yyyy-dd-mm. workaround for in
264
+ # Try common formats first with explicit patterns
266
265
  for date_format, pattern in (
267
266
  ('%Y-%m-%d', r'[\d]{4}-[\d]{2}-[\d]{2}'),
268
267
  ('%Y-%m-%d %H:%M:%S', r'[\d]{4}-[\d]{2}-[\d]{2} [\d]{2}:[\d]{2}:[\d]{2}'),
@@ -280,7 +279,26 @@ class ApplyPredictorStepCall(ApplyPredictorBaseCall):
280
279
  if date_format is not None:
281
280
  return date_format
282
281
 
283
- return dateinfer.infer(samples)
282
+ # Use dateparser as fallback and infer format
283
+ try:
284
+ # Parse the first sample to get its format
285
+ # The import is heavy, so we do it here on-demand
286
+ import dateparser
287
+ parsed_date = dateparser.parse(samples[0])
288
+ if parsed_date is None:
289
+ raise ValueError("Could not parse date")
290
+
291
+ # Verify the format works for all samples
292
+ for sample in samples[1:]:
293
+ if dateparser.parse(sample) is None:
294
+ raise ValueError("Inconsistent date formats in samples")
295
+ # Convert to strftime format based on the input
296
+ if re.search(r'\d{2}:\d{2}:\d{2}', samples[0]):
297
+ return '%Y-%m-%d %H:%M:%S'
298
+ return '%Y-%m-%d'
299
+ except (ValueError, AttributeError):
300
+ # If dateparser fails, return a basic format as last resort
301
+ return '%Y-%m-%d'
284
302
 
285
303
  model_types = predictor_metadata['model_types']
286
304
  if model_types.get(order_col) in ('float', 'integer'):
@@ -222,7 +222,9 @@ class FetchDataframePartitionCall(BaseStepCall):
222
222
  else:
223
223
  executor.shutdown()
224
224
  raise e
225
-
225
+ if self.sql_query.stop_event is not None and self.sql_query.stop_event.is_set():
226
+ executor.shutdown()
227
+ raise RuntimeError('Query is interrupted')
226
228
  # TODO
227
229
  # 1. get next batch without updating track_value:
228
230
  # it allows to keep queue_in filled with data between fetching batches
@@ -50,12 +50,29 @@ from mindsdb.utilities.json_encoder import CustomJSONProvider
50
50
  from mindsdb.utilities.ps import is_pid_listen_port, wait_func_is_true
51
51
  from mindsdb.utilities.sentry import sentry_sdk # noqa: F401
52
52
  from mindsdb.utilities.otel import trace # noqa: F401
53
- from opentelemetry.instrumentation.flask import FlaskInstrumentor # noqa: F401
54
- from opentelemetry.instrumentation.requests import RequestsInstrumentor # noqa: F401
55
53
 
56
54
  logger = log.getLogger(__name__)
57
55
 
58
56
 
57
+ class _NoOpFlaskInstrumentor:
58
+ def instrument_app(self, app):
59
+ pass
60
+
61
+
62
+ class _NoOpRequestsInstrumentor:
63
+ def instrument(self):
64
+ pass
65
+
66
+
67
+ try:
68
+ from opentelemetry.instrumentation.flask import FlaskInstrumentor
69
+ from opentelemetry.instrumentation.requests import RequestsInstrumentor
70
+ except ImportError:
71
+ logger.debug("OpenTelemetry is not avaiable. Please run `pip install -r requirements/requirements-opentelemetry.txt` to use it.")
72
+ FlaskInstrumentor = _NoOpFlaskInstrumentor
73
+ RequestsInstrumentor = _NoOpRequestsInstrumentor
74
+
75
+
59
76
  class Swagger_Api(Api):
60
77
  """
61
78
  This is a modification of the base Flask Restplus Api class due to the issue described here
@@ -376,7 +393,7 @@ def initialize_flask(config, init_static_thread, no_studio):
376
393
  app = Flask(__name__, **kwargs)
377
394
  init_metrics(app)
378
395
 
379
- # Instrument Flask app for OpenTelemetry
396
+ # Instrument Flask app and requests using either real or no-op instrumentors
380
397
  FlaskInstrumentor().instrument_app(app)
381
398
  RequestsInstrumentor().instrument()
382
399
 
@@ -79,7 +79,14 @@ class QueryAnalysis(Resource):
79
79
 
80
80
  column_names = [x["name"] for x in result.columns]
81
81
  df = DataFrame(result.data, columns=column_names)
82
- analysis = analyze_df(df)
82
+ try:
83
+ analysis = analyze_df(df)
84
+ except ImportError:
85
+ return {
86
+ 'analysis': {},
87
+ 'timestamp': time.time(),
88
+ 'error': 'To use this feature, please install the "dataprep_ml" package.'
89
+ }
83
90
 
84
91
  query_tables = [
85
92
  table.to_string() for table in get_query_tables(ast)
@@ -107,6 +114,12 @@ class DataAnalysis(Resource):
107
114
  try:
108
115
  analysis = analyze_df(DataFrame(data, columns=column_names))
109
116
  return {"analysis": analysis, "timestamp": time.time()}
117
+ except ImportError:
118
+ return {
119
+ 'analysis': {},
120
+ 'timestamp': timestamp,
121
+ 'error': 'To use this feature, please install the "dataprep_ml" package.'
122
+ }
110
123
  except Exception as e:
111
124
  # Don't want analysis exceptions to show up on UI.
112
125
  # TODO: Fix analysis so it doesn't throw exceptions at all.