MindsDB 25.9.1.2__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (120) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +39 -20
  3. mindsdb/api/a2a/agent.py +7 -9
  4. mindsdb/api/a2a/common/server/server.py +3 -3
  5. mindsdb/api/a2a/common/server/task_manager.py +4 -4
  6. mindsdb/api/a2a/task_manager.py +15 -17
  7. mindsdb/api/common/middleware.py +9 -11
  8. mindsdb/api/executor/command_executor.py +2 -4
  9. mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
  10. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
  11. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
  12. mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
  13. mindsdb/api/executor/exceptions.py +29 -10
  14. mindsdb/api/executor/planner/plan_join.py +17 -3
  15. mindsdb/api/executor/sql_query/sql_query.py +74 -74
  16. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
  17. mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
  18. mindsdb/api/executor/utilities/functions.py +6 -6
  19. mindsdb/api/executor/utilities/sql.py +32 -16
  20. mindsdb/api/http/gui.py +5 -11
  21. mindsdb/api/http/initialize.py +8 -10
  22. mindsdb/api/http/namespaces/agents.py +10 -12
  23. mindsdb/api/http/namespaces/analysis.py +13 -20
  24. mindsdb/api/http/namespaces/auth.py +1 -1
  25. mindsdb/api/http/namespaces/config.py +15 -11
  26. mindsdb/api/http/namespaces/databases.py +140 -201
  27. mindsdb/api/http/namespaces/file.py +15 -4
  28. mindsdb/api/http/namespaces/handlers.py +7 -2
  29. mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
  30. mindsdb/api/http/namespaces/models.py +94 -126
  31. mindsdb/api/http/namespaces/projects.py +13 -22
  32. mindsdb/api/http/namespaces/sql.py +33 -25
  33. mindsdb/api/http/namespaces/tab.py +27 -37
  34. mindsdb/api/http/namespaces/views.py +1 -1
  35. mindsdb/api/http/start.py +14 -8
  36. mindsdb/api/mcp/__init__.py +2 -1
  37. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
  38. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
  39. mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
  40. mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
  41. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
  42. mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
  43. mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +11 -5
  44. mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
  45. mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
  46. mindsdb/integrations/handlers/openai_handler/openai_handler.py +1 -1
  47. mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +20 -2
  48. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +18 -3
  49. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
  50. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
  51. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  52. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  53. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
  54. mindsdb/integrations/libs/api_handler.py +10 -10
  55. mindsdb/integrations/libs/base.py +4 -4
  56. mindsdb/integrations/libs/llm/utils.py +2 -2
  57. mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
  58. mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
  59. mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
  60. mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
  61. mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
  62. mindsdb/integrations/libs/process_cache.py +132 -140
  63. mindsdb/integrations/libs/response.py +18 -12
  64. mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
  65. mindsdb/integrations/utilities/files/file_reader.py +6 -7
  66. mindsdb/integrations/utilities/rag/config_loader.py +37 -26
  67. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
  68. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
  69. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
  70. mindsdb/integrations/utilities/rag/settings.py +58 -133
  71. mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
  72. mindsdb/interfaces/agents/agents_controller.py +2 -1
  73. mindsdb/interfaces/agents/constants.py +0 -2
  74. mindsdb/interfaces/agents/litellm_server.py +34 -58
  75. mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
  76. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
  77. mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
  78. mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
  79. mindsdb/interfaces/chatbot/polling.py +30 -18
  80. mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
  81. mindsdb/interfaces/database/integrations.py +19 -2
  82. mindsdb/interfaces/file/file_controller.py +6 -6
  83. mindsdb/interfaces/functions/controller.py +1 -1
  84. mindsdb/interfaces/functions/to_markdown.py +2 -2
  85. mindsdb/interfaces/jobs/jobs_controller.py +5 -5
  86. mindsdb/interfaces/jobs/scheduler.py +3 -8
  87. mindsdb/interfaces/knowledge_base/controller.py +54 -25
  88. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
  89. mindsdb/interfaces/model/model_controller.py +170 -166
  90. mindsdb/interfaces/query_context/context_controller.py +14 -2
  91. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
  92. mindsdb/interfaces/skills/retrieval_tool.py +43 -50
  93. mindsdb/interfaces/skills/skill_tool.py +2 -2
  94. mindsdb/interfaces/skills/sql_agent.py +25 -19
  95. mindsdb/interfaces/storage/fs.py +114 -169
  96. mindsdb/interfaces/storage/json.py +19 -18
  97. mindsdb/interfaces/storage/model_fs.py +54 -92
  98. mindsdb/interfaces/tabs/tabs_controller.py +49 -72
  99. mindsdb/interfaces/tasks/task_monitor.py +3 -9
  100. mindsdb/interfaces/tasks/task_thread.py +7 -9
  101. mindsdb/interfaces/triggers/trigger_task.py +7 -13
  102. mindsdb/interfaces/triggers/triggers_controller.py +47 -50
  103. mindsdb/migrations/migrate.py +16 -16
  104. mindsdb/utilities/api_status.py +58 -0
  105. mindsdb/utilities/config.py +49 -0
  106. mindsdb/utilities/exception.py +40 -1
  107. mindsdb/utilities/fs.py +0 -1
  108. mindsdb/utilities/hooks/profiling.py +17 -14
  109. mindsdb/utilities/langfuse.py +40 -45
  110. mindsdb/utilities/log.py +272 -0
  111. mindsdb/utilities/ml_task_queue/consumer.py +52 -58
  112. mindsdb/utilities/ml_task_queue/producer.py +26 -30
  113. mindsdb/utilities/render/sqlalchemy_render.py +8 -7
  114. mindsdb/utilities/utils.py +2 -2
  115. {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +266 -261
  116. {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +119 -119
  117. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
  118. {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
  119. {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
  120. {mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0
@@ -202,8 +202,8 @@ class MetaDatabaseHandler(DatabaseHandler):
202
202
  logger.error(
203
203
  f"Error retrieving column statistics for table {table_name}: {result.error_message}"
204
204
  )
205
- except Exception as e:
206
- logger.error(f"Exception occurred while retrieving column statistics for table {table_name}: {e}")
205
+ except Exception:
206
+ logger.exception(f"Exception occurred while retrieving column statistics for table {table_name}:")
207
207
 
208
208
  if not results:
209
209
  logger.warning("No column statistics could be retrieved for the specified tables.")
@@ -355,8 +355,8 @@ class ArgProbeMixin:
355
355
  """
356
356
  try:
357
357
  source_code = self.get_source_code(method_name)
358
- except Exception as e:
359
- logger.error(f"Failed to get source code of method {method_name} in {self.__class__.__name__}. Reason: {e}")
358
+ except Exception:
359
+ logger.exception(f"Failed to get source code of method {method_name} in {self.__class__.__name__}. Reason:")
360
360
  return []
361
361
 
362
362
  # parse the source code
@@ -304,10 +304,10 @@ def ft_jsonl_validation(
304
304
  assistant_key=assistant_key,
305
305
  )
306
306
  except Exception as e:
307
- raise Exception(f"{prefix}{e}")
307
+ raise Exception(f"{prefix}{e}") from e
308
308
 
309
309
  except Exception as e:
310
- raise Exception(f"Fine-tuning data format is not valid. Got {e}")
310
+ raise Exception(f"Fine-tuning data format is not valid. Got {e}") from e
311
311
 
312
312
 
313
313
  def ft_chat_format_validation(
@@ -13,16 +13,13 @@ def create_engine_process(connection_args: dict, integration_id: int, module_pat
13
13
 
14
14
  result = None
15
15
 
16
- if hasattr(module.Handler, 'create_engine'):
16
+ if hasattr(module.Handler, "create_engine"):
17
17
  engine_storage = HandlerStorage(integration_id)
18
18
  try:
19
- result = module.Handler(
20
- engine_storage=engine_storage,
21
- model_storage=None
22
- ).create_engine(connection_args=connection_args)
19
+ result = module.Handler(engine_storage=engine_storage, model_storage=None).create_engine(
20
+ connection_args=connection_args
21
+ )
23
22
  except NotImplementedError:
24
23
  return None
25
- except Exception as e:
26
- raise e
27
24
 
28
25
  return result
@@ -11,16 +11,11 @@ def func_call_process(name: str, args: dict, integration_id: int, module_path: s
11
11
 
12
12
  result = None
13
13
 
14
- if hasattr(module.Handler, 'function_call'):
14
+ if hasattr(module.Handler, "function_call"):
15
15
  engine_storage = HandlerStorage(integration_id)
16
16
  try:
17
- result = module.Handler(
18
- engine_storage=engine_storage,
19
- model_storage=None
20
- ).function_call(name, args)
17
+ result = module.Handler(engine_storage=engine_storage, model_storage=None).function_call(name, args)
21
18
  except NotImplementedError:
22
19
  return None
23
- except Exception as e:
24
- raise e
25
20
 
26
21
  return result
@@ -1,6 +1,5 @@
1
1
  import os
2
2
  import importlib
3
- import traceback
4
3
  import datetime as dt
5
4
 
6
5
  from sqlalchemy.orm.attributes import flag_modified
@@ -25,56 +24,52 @@ from mindsdb.integrations.libs.ml_handler_process.handlers_cacher import handler
25
24
  logger = log.getLogger(__name__)
26
25
 
27
26
 
28
- @mark_process(name='learn')
29
- def learn_process(data_integration_ref: dict, problem_definition: dict, fetch_data_query: str,
30
- project_name: str, model_id: int, integration_id: int, base_model_id: int,
31
- set_active: bool, module_path: str):
32
- ctx.profiling = {
33
- 'level': 0,
34
- 'enabled': True,
35
- 'pointer': None,
36
- 'tree': None
37
- }
38
- profiler.set_meta(query='learn_process', api='http', environment=Config().get('environment'))
39
- with profiler.Context('learn_process'):
27
+ @mark_process(name="learn")
28
+ def learn_process(
29
+ data_integration_ref: dict,
30
+ problem_definition: dict,
31
+ fetch_data_query: str,
32
+ project_name: str,
33
+ model_id: int,
34
+ integration_id: int,
35
+ base_model_id: int,
36
+ set_active: bool,
37
+ module_path: str,
38
+ ):
39
+ ctx.profiling = {"level": 0, "enabled": True, "pointer": None, "tree": None}
40
+ profiler.set_meta(query="learn_process", api="http", environment=Config().get("environment"))
41
+ with profiler.Context("learn_process"):
40
42
  from mindsdb.interfaces.database.database import DatabaseController
41
43
 
42
44
  try:
43
45
  predictor_record = db.Predictor.query.with_for_update().get(model_id)
44
- predictor_record.training_metadata['process_id'] = os.getpid()
45
- flag_modified(predictor_record, 'training_metadata')
46
+ predictor_record.training_metadata["process_id"] = os.getpid()
47
+ flag_modified(predictor_record, "training_metadata")
46
48
  db.session.commit()
47
49
 
48
- target = problem_definition.get('target', None)
50
+ target = problem_definition.get("target", None)
49
51
  training_data_df = None
50
52
  if data_integration_ref is not None:
51
53
  database_controller = DatabaseController()
52
54
  sql_session = make_sql_session()
53
- if data_integration_ref['type'] == 'integration':
54
- integration_name = database_controller.get_integration(data_integration_ref['id'])['name']
55
+ if data_integration_ref["type"] == "integration":
56
+ integration_name = database_controller.get_integration(data_integration_ref["id"])["name"]
55
57
  query = Select(
56
58
  targets=[Star()],
57
- from_table=NativeQuery(
58
- integration=Identifier(integration_name),
59
- query=fetch_data_query
60
- )
59
+ from_table=NativeQuery(integration=Identifier(integration_name), query=fetch_data_query),
61
60
  )
62
61
  sqlquery = SQLQuery(query, session=sql_session)
63
- if data_integration_ref['type'] == 'system':
62
+ if data_integration_ref["type"] == "system":
64
63
  query = Select(
65
- targets=[Star()],
66
- from_table=NativeQuery(
67
- integration=Identifier('log'),
68
- query=fetch_data_query
69
- )
64
+ targets=[Star()], from_table=NativeQuery(integration=Identifier("log"), query=fetch_data_query)
70
65
  )
71
66
  sqlquery = SQLQuery(query, session=sql_session)
72
- elif data_integration_ref['type'] == 'view':
67
+ elif data_integration_ref["type"] == "view":
73
68
  project = database_controller.get_project(project_name)
74
69
  query_ast = parse_sql(fetch_data_query)
75
70
  view_meta = project.get_view_meta(query_ast)
76
- sqlquery = SQLQuery(view_meta['query_ast'], session=sql_session)
77
- elif data_integration_ref['type'] == 'project':
71
+ sqlquery = SQLQuery(view_meta["query_ast"], session=sql_session)
72
+ elif data_integration_ref["type"] == "project":
78
73
  query_ast = parse_sql(fetch_data_query)
79
74
  sqlquery = SQLQuery(query_ast, session=sql_session)
80
75
 
@@ -97,17 +92,13 @@ def learn_process(data_integration_ref: dict, problem_definition: dict, fetch_da
97
92
 
98
93
  handlerStorage = HandlerStorage(integration_id)
99
94
  modelStorage = ModelStorage(model_id)
100
- modelStorage.fileStorage.push() # FIXME
95
+ modelStorage.fileStorage.push() # FIXME
101
96
 
102
97
  kwargs = {}
103
98
  if base_model_id is not None:
104
- kwargs['base_model_storage'] = ModelStorage(base_model_id)
105
- kwargs['base_model_storage'].fileStorage.pull()
106
- ml_handler = module.Handler(
107
- engine_storage=handlerStorage,
108
- model_storage=modelStorage,
109
- **kwargs
110
- )
99
+ kwargs["base_model_storage"] = ModelStorage(base_model_id)
100
+ kwargs["base_model_storage"].fileStorage.pull()
101
+ ml_handler = module.Handler(engine_storage=handlerStorage, model_storage=modelStorage, **kwargs)
111
102
  handlers_cacher[predictor_record.id] = ml_handler
112
103
 
113
104
  if not ml_handler.generative and target is not None:
@@ -119,18 +110,19 @@ def learn_process(data_integration_ref: dict, problem_definition: dict, fetch_da
119
110
  training_data_df.rename(columns={target_found: target}, inplace=True)
120
111
  else:
121
112
  raise Exception(
122
- f'Prediction target "{target}" not found in training dataframe: {list(training_data_df.columns)}')
113
+ f'Prediction target "{target}" not found in training dataframe: {list(training_data_df.columns)}'
114
+ )
123
115
 
124
116
  # create new model
125
117
  if base_model_id is None:
126
- with profiler.Context('create'):
118
+ with profiler.Context("create"):
127
119
  ml_handler.create(target, df=training_data_df, args=problem_definition)
128
120
 
129
121
  # fine-tune (partially train) existing model
130
122
  else:
131
123
  # load model from previous version, use it as starting point
132
- with profiler.Context('finetune'):
133
- problem_definition['base_model_id'] = base_model_id
124
+ with profiler.Context("finetune"):
125
+ problem_definition["base_model_id"] = base_model_id
134
126
  ml_handler.finetune(df=training_data_df, args=problem_definition)
135
127
 
136
128
  predictor_record.status = PREDICTOR_STATUS.COMPLETE
@@ -139,9 +131,7 @@ def learn_process(data_integration_ref: dict, problem_definition: dict, fetch_da
139
131
  # if retrain and set_active after success creation
140
132
  if set_active is True:
141
133
  models = get_model_records(
142
- name=predictor_record.name,
143
- project_id=predictor_record.project_id,
144
- active=None
134
+ name=predictor_record.name, project_id=predictor_record.project_id, active=None
145
135
  )
146
136
  for model in models:
147
137
  model.active = False
@@ -149,7 +139,7 @@ def learn_process(data_integration_ref: dict, problem_definition: dict, fetch_da
149
139
  models.sort(key=lambda x: x.created_at)
150
140
  models[-1].active = True
151
141
  except Exception as e:
152
- logger.error(traceback.format_exc())
142
+ logger.exception("Error during 'learn' process:")
153
143
  error_message = format_exception_error(e)
154
144
 
155
145
  predictor_record = db.Predictor.query.with_for_update().get(model_id)
@@ -13,16 +13,13 @@ def update_engine_process(connection_args: dict, integration_id: int, module_pat
13
13
 
14
14
  result = None
15
15
 
16
- if hasattr(module.Handler, 'update_engine'):
16
+ if hasattr(module.Handler, "update_engine"):
17
17
  engine_storage = HandlerStorage(integration_id)
18
18
  try:
19
- result = module.Handler(
20
- engine_storage=engine_storage,
21
- model_storage=None
22
- ).update_engine(connection_args=connection_args)
19
+ result = module.Handler(engine_storage=engine_storage, model_storage=None).update_engine(
20
+ connection_args=connection_args
21
+ )
23
22
  except NotImplementedError:
24
23
  return None
25
- except Exception as e:
26
- raise e
27
24
 
28
25
  return result
@@ -11,17 +11,12 @@ def update_process(args: dict, integration_id: int, module_path: str, model_id:
11
11
 
12
12
  result = None
13
13
 
14
- if hasattr(module.Handler, 'upgate'):
14
+ if hasattr(module.Handler, "upgate"):
15
15
  engine_storage = HandlerStorage(integration_id)
16
16
  model_storage = ModelStorage(model_id)
17
17
  try:
18
- result = module.Handler(
19
- engine_storage=engine_storage,
20
- model_storage=model_storage
21
- ).upgate(args=args)
18
+ result = module.Handler(engine_storage=engine_storage, model_storage=model_storage).upgate(args=args)
22
19
  except NotImplementedError:
23
20
  return None
24
- except Exception as e:
25
- raise e
26
21
 
27
22
  return result