MindsDB 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (116) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +39 -20
  3. mindsdb/api/a2a/agent.py +7 -9
  4. mindsdb/api/a2a/common/server/server.py +3 -3
  5. mindsdb/api/a2a/common/server/task_manager.py +4 -4
  6. mindsdb/api/a2a/task_manager.py +15 -17
  7. mindsdb/api/common/middleware.py +9 -11
  8. mindsdb/api/executor/command_executor.py +2 -4
  9. mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
  10. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
  11. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
  12. mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
  13. mindsdb/api/executor/exceptions.py +29 -10
  14. mindsdb/api/executor/planner/plan_join.py +17 -3
  15. mindsdb/api/executor/sql_query/sql_query.py +74 -74
  16. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
  17. mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
  18. mindsdb/api/executor/utilities/functions.py +6 -6
  19. mindsdb/api/executor/utilities/sql.py +32 -16
  20. mindsdb/api/http/gui.py +5 -11
  21. mindsdb/api/http/initialize.py +8 -10
  22. mindsdb/api/http/namespaces/agents.py +10 -12
  23. mindsdb/api/http/namespaces/analysis.py +13 -20
  24. mindsdb/api/http/namespaces/auth.py +1 -1
  25. mindsdb/api/http/namespaces/config.py +15 -11
  26. mindsdb/api/http/namespaces/databases.py +140 -201
  27. mindsdb/api/http/namespaces/file.py +15 -4
  28. mindsdb/api/http/namespaces/handlers.py +7 -2
  29. mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
  30. mindsdb/api/http/namespaces/models.py +94 -126
  31. mindsdb/api/http/namespaces/projects.py +13 -22
  32. mindsdb/api/http/namespaces/sql.py +33 -25
  33. mindsdb/api/http/namespaces/tab.py +27 -37
  34. mindsdb/api/http/namespaces/views.py +1 -1
  35. mindsdb/api/http/start.py +14 -8
  36. mindsdb/api/mcp/__init__.py +2 -1
  37. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
  38. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
  39. mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
  40. mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
  41. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
  42. mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
  43. mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
  44. mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
  45. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +13 -1
  46. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
  47. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
  48. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  49. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  50. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
  51. mindsdb/integrations/libs/api_handler.py +10 -10
  52. mindsdb/integrations/libs/base.py +4 -4
  53. mindsdb/integrations/libs/llm/utils.py +2 -2
  54. mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
  55. mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
  56. mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
  57. mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
  58. mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
  59. mindsdb/integrations/libs/process_cache.py +132 -140
  60. mindsdb/integrations/libs/response.py +18 -12
  61. mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
  62. mindsdb/integrations/utilities/files/file_reader.py +6 -7
  63. mindsdb/integrations/utilities/rag/config_loader.py +37 -26
  64. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
  65. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
  66. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
  67. mindsdb/integrations/utilities/rag/settings.py +58 -133
  68. mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
  69. mindsdb/interfaces/agents/agents_controller.py +2 -1
  70. mindsdb/interfaces/agents/constants.py +0 -2
  71. mindsdb/interfaces/agents/litellm_server.py +34 -58
  72. mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
  73. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
  74. mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
  75. mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
  76. mindsdb/interfaces/chatbot/polling.py +30 -18
  77. mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
  78. mindsdb/interfaces/database/integrations.py +19 -2
  79. mindsdb/interfaces/file/file_controller.py +6 -6
  80. mindsdb/interfaces/functions/controller.py +1 -1
  81. mindsdb/interfaces/functions/to_markdown.py +2 -2
  82. mindsdb/interfaces/jobs/jobs_controller.py +5 -5
  83. mindsdb/interfaces/jobs/scheduler.py +3 -8
  84. mindsdb/interfaces/knowledge_base/controller.py +50 -23
  85. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
  86. mindsdb/interfaces/model/model_controller.py +170 -166
  87. mindsdb/interfaces/query_context/context_controller.py +14 -2
  88. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
  89. mindsdb/interfaces/skills/retrieval_tool.py +43 -50
  90. mindsdb/interfaces/skills/skill_tool.py +2 -2
  91. mindsdb/interfaces/skills/sql_agent.py +25 -19
  92. mindsdb/interfaces/storage/fs.py +114 -169
  93. mindsdb/interfaces/storage/json.py +19 -18
  94. mindsdb/interfaces/tabs/tabs_controller.py +49 -72
  95. mindsdb/interfaces/tasks/task_monitor.py +3 -9
  96. mindsdb/interfaces/tasks/task_thread.py +7 -9
  97. mindsdb/interfaces/triggers/trigger_task.py +7 -13
  98. mindsdb/interfaces/triggers/triggers_controller.py +47 -50
  99. mindsdb/migrations/migrate.py +16 -16
  100. mindsdb/utilities/api_status.py +58 -0
  101. mindsdb/utilities/config.py +49 -0
  102. mindsdb/utilities/exception.py +40 -1
  103. mindsdb/utilities/fs.py +0 -1
  104. mindsdb/utilities/hooks/profiling.py +17 -14
  105. mindsdb/utilities/langfuse.py +40 -45
  106. mindsdb/utilities/log.py +272 -0
  107. mindsdb/utilities/ml_task_queue/consumer.py +52 -58
  108. mindsdb/utilities/ml_task_queue/producer.py +26 -30
  109. mindsdb/utilities/render/sqlalchemy_render.py +7 -6
  110. mindsdb/utilities/utils.py +2 -2
  111. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +269 -264
  112. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +115 -115
  113. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
  114. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
  115. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
  116. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0
@@ -1,4 +1,4 @@
1
- """ BYOM: Bring Your Own Model
1
+ """BYOM: Bring Your Own Model
2
2
 
3
3
  env vars to contloll BYOM:
4
4
  - MINDSDB_BYOM_ENABLED - can BYOM be uysed or not. Locally enabled by default.
@@ -7,7 +7,6 @@ env vars to contloll BYOM:
7
7
  - MINDSDB_BYOM_TYPE - [safe|unsafe] - obsolete, same as above.
8
8
  """
9
9
 
10
-
11
10
  import os
12
11
  import re
13
12
  import sys
@@ -15,7 +14,6 @@ import shutil
15
14
  import pickle
16
15
  import tarfile
17
16
  import tempfile
18
- import traceback
19
17
  import subprocess
20
18
  from enum import Enum
21
19
  from pathlib import Path
@@ -36,28 +34,33 @@ import mindsdb.utilities.profiler as profiler
36
34
 
37
35
 
38
36
  from .proc_wrapper import (
39
- pd_decode, pd_encode, encode, decode, BYOM_METHOD,
40
- import_string, find_model_class, check_module
37
+ pd_decode,
38
+ pd_encode,
39
+ encode,
40
+ decode,
41
+ BYOM_METHOD,
42
+ import_string,
43
+ find_model_class,
44
+ check_module,
41
45
  )
42
46
  from .__about__ import __version__
43
47
 
44
48
 
45
- BYOM_TYPE = Enum('BYOM_TYPE', ['INHOUSE', 'VENV'])
49
+ BYOM_TYPE = Enum("BYOM_TYPE", ["INHOUSE", "VENV"])
46
50
 
47
51
  logger = log.getLogger(__name__)
48
52
 
49
53
 
50
54
  class BYOMHandler(BaseMLEngine):
51
-
52
- name = 'byom'
55
+ name = "byom"
53
56
 
54
57
  def __init__(self, model_storage, engine_storage, **kwargs) -> None:
55
58
  # region check availability
56
- is_cloud = Config().get('cloud', False)
59
+ is_cloud = Config().get("cloud", False)
57
60
  if is_cloud is True:
58
- byom_enabled = os.environ.get('MINDSDB_BYOM_ENABLED', 'false').lower()
59
- if byom_enabled not in ('true', '1'):
60
- raise RuntimeError('BYOM is disabled on cloud')
61
+ byom_enabled = os.environ.get("MINDSDB_BYOM_ENABLED", "false").lower()
62
+ if byom_enabled not in ("true", "1"):
63
+ raise RuntimeError("BYOM is disabled on cloud")
61
64
  # endregion
62
65
 
63
66
  self.model_wrapper = None
@@ -68,16 +71,14 @@ class BYOMHandler(BaseMLEngine):
68
71
  # region read and save set default byom type
69
72
  try:
70
73
  self._default_byom_type = BYOM_TYPE.VENV
71
- if os.environ.get('MINDSDB_BYOM_DEFAULT_TYPE') is not None:
72
- self._default_byom_type = BYOM_TYPE[
73
- os.environ.get('MINDSDB_BYOM_DEFAULT_TYPE').upper()
74
- ]
74
+ if os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE") is not None:
75
+ self._default_byom_type = BYOM_TYPE[os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE").upper()]
75
76
  else:
76
- env_var = os.environ.get('MINDSDB_BYOM_DEFAULT_TYPE')
77
- if env_var == 'SAVE':
78
- self._default_byom_type = BYOM_TYPE['VENV']
79
- elif env_var == 'UNSAVE':
80
- self._default_byom_type = BYOM_TYPE['INHOUSE']
77
+ env_var = os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE")
78
+ if env_var == "SAVE":
79
+ self._default_byom_type = BYOM_TYPE["VENV"]
80
+ elif env_var == "UNSAVE":
81
+ self._default_byom_type = BYOM_TYPE["INHOUSE"]
81
82
  else:
82
83
  raise KeyError
83
84
  except KeyError:
@@ -85,11 +86,11 @@ class BYOMHandler(BaseMLEngine):
85
86
  # endregion
86
87
 
87
88
  # region check if 'inhouse' BYOM is enabled
88
- env_var = os.environ.get('MINDSDB_BYOM_INHOUSE_ENABLED')
89
+ env_var = os.environ.get("MINDSDB_BYOM_INHOUSE_ENABLED")
89
90
  if env_var is None:
90
91
  self._inhouse_enabled = False if is_cloud else True
91
92
  else:
92
- self._inhouse_enabled = env_var.lower() in ('true', '1')
93
+ self._inhouse_enabled = env_var.lower() in ("true", "1")
93
94
  # endregion
94
95
 
95
96
  super().__init__(model_storage, engine_storage, **kwargs)
@@ -117,18 +118,18 @@ class BYOMHandler(BaseMLEngine):
117
118
  def create_validation(target: str, args: dict = None, **kwargs) -> None:
118
119
  if isinstance(args, dict) is False:
119
120
  return
120
- using_args = args.get('using', {})
121
- engine_version = using_args.get('engine_version')
121
+ using_args = args.get("using", {})
122
+ engine_version = using_args.get("engine_version")
122
123
  if engine_version is not None:
123
124
  engine_version = BYOMHandler.normalize_engine_version(engine_version)
124
125
  else:
125
- connection_args = kwargs['handler_storage'].get_connection_args()
126
- versions = connection_args.get('versions')
126
+ connection_args = kwargs["handler_storage"].get_connection_args()
127
+ versions = connection_args.get("versions")
127
128
  if isinstance(versions, dict):
128
129
  engine_version = max([int(x) for x in versions.keys()])
129
130
  else:
130
131
  engine_version = 1
131
- using_args['engine_version'] = engine_version
132
+ using_args["engine_version"] = engine_version
132
133
 
133
134
  def get_model_engine_version(self) -> int:
134
135
  """Return current model engine version
@@ -136,7 +137,7 @@ class BYOMHandler(BaseMLEngine):
136
137
  Returns:
137
138
  int: engine version
138
139
  """
139
- engine_version = self.model_storage.get_info()['learn_args'].get('using', {}).get('engine_version')
140
+ engine_version = self.model_storage.get_info()["learn_args"].get("using", {}).get("engine_version")
140
141
  engine_version = BYOMHandler.normalize_engine_version(engine_version)
141
142
  return engine_version
142
143
 
@@ -154,28 +155,26 @@ class BYOMHandler(BaseMLEngine):
154
155
  version = 1
155
156
  if isinstance(version, str):
156
157
  version = int(version)
157
- version_mark = ''
158
+ version_mark = ""
158
159
  if version > 1:
159
- version_mark = f'_{version}'
160
+ version_mark = f"_{version}"
160
161
  version_str = str(version)
161
162
 
162
163
  self.engine_storage.fileStorage.pull()
163
164
  try:
164
- code = self.engine_storage.fileStorage.file_get(f'code{version_mark}')
165
- modules_str = self.engine_storage.fileStorage.file_get(f'modules{version_mark}')
165
+ code = self.engine_storage.fileStorage.file_get(f"code{version_mark}")
166
+ modules_str = self.engine_storage.fileStorage.file_get(f"modules{version_mark}")
166
167
  except FileNotFoundError:
167
168
  raise Exception(f"Engine version '{version}' does not exists")
168
169
 
169
170
  if version_str not in self.model_wrappers:
170
171
  connection_args = self.engine_storage.get_connection_args()
171
- version_meta = connection_args['versions'][version_str]
172
+ version_meta = connection_args["versions"][version_str]
172
173
 
173
174
  try:
174
- engine_version_type = BYOM_TYPE[
175
- version_meta.get('type', self._default_byom_type.name).upper()
176
- ]
175
+ engine_version_type = BYOM_TYPE[version_meta.get("type", self._default_byom_type.name).upper()]
177
176
  except KeyError:
178
- raise Exception('Unknown BYOM engine type')
177
+ raise Exception("Unknown BYOM engine type")
179
178
 
180
179
  if engine_version_type == BYOM_TYPE.INHOUSE:
181
180
  if self._inhouse_enabled is False:
@@ -185,20 +184,20 @@ class BYOMHandler(BaseMLEngine):
185
184
  code=code,
186
185
  modules_str=modules_str,
187
186
  engine_id=self.engine_storage.integration_id,
188
- engine_version=version
187
+ engine_version=version,
189
188
  )
190
189
  self.model_wrappers[version_str] = self.inhouse_model_wrapper
191
190
  elif engine_version_type == BYOM_TYPE.VENV:
192
- if version_meta.get('venv_status') != 'ready':
193
- version_meta['venv_status'] = 'creating'
191
+ if version_meta.get("venv_status") != "ready":
192
+ version_meta["venv_status"] = "creating"
194
193
  self.engine_storage.update_connection_args(connection_args)
195
194
  self.model_wrappers[version_str] = ModelWrapperSafe(
196
195
  code=code,
197
196
  modules_str=modules_str,
198
197
  engine_id=self.engine_storage.integration_id,
199
- engine_version=version
198
+ engine_version=version,
200
199
  )
201
- version_meta['venv_status'] = 'ready'
200
+ version_meta["venv_status"] = "ready"
202
201
  self.engine_storage.update_connection_args(connection_args)
203
202
 
204
203
  return self.model_wrappers[version_str]
@@ -206,130 +205,114 @@ class BYOMHandler(BaseMLEngine):
206
205
  def describe(self, attribute: Optional[str] = None) -> pd.DataFrame:
207
206
  engine_version = self.get_model_engine_version()
208
207
  mp = self._get_model_proxy(engine_version)
209
- model_state = self.model_storage.file_get('model')
208
+ model_state = self.model_storage.file_get("model")
210
209
  return mp.describe(model_state, attribute)
211
210
 
212
211
  def create(self, target, df=None, args=None, **kwargs):
213
- using_args = args.get('using', {})
214
- engine_version = using_args.get('engine_version')
212
+ using_args = args.get("using", {})
213
+ engine_version = using_args.get("engine_version")
215
214
 
216
215
  model_proxy = self._get_model_proxy(engine_version)
217
216
  model_state = model_proxy.train(df, target, args)
218
217
 
219
- self.model_storage.file_set('model', model_state)
218
+ self.model_storage.file_set("model", model_state)
220
219
 
221
220
  # TODO return columns?
222
221
 
223
222
  def convert_type(field_type):
224
223
  if pd_types.is_integer_dtype(field_type):
225
- return 'integer'
224
+ return "integer"
226
225
  elif pd_types.is_numeric_dtype(field_type):
227
- return 'float'
226
+ return "float"
228
227
  elif pd_types.is_datetime64_any_dtype(field_type):
229
- return 'datetime'
228
+ return "datetime"
230
229
  else:
231
- return 'categorical'
230
+ return "categorical"
232
231
 
233
- columns = {
234
- target: convert_type(object)
235
- }
232
+ columns = {target: convert_type(object)}
236
233
 
237
234
  self.model_storage.columns_set(columns)
238
235
 
239
236
  def predict(self, df, args=None):
240
- pred_args = args.get('predict_params', {})
237
+ pred_args = args.get("predict_params", {})
241
238
 
242
- engine_version = pred_args.get('engine_version')
239
+ engine_version = pred_args.get("engine_version")
243
240
  if engine_version is not None:
244
241
  engine_version = int(engine_version)
245
242
  else:
246
243
  engine_version = self.get_model_engine_version()
247
244
 
248
245
  model_proxy = self._get_model_proxy(engine_version)
249
- model_state = self.model_storage.file_get('model')
246
+ model_state = self.model_storage.file_get("model")
250
247
  pred_df = model_proxy.predict(df, model_state, pred_args)
251
248
 
252
249
  return pred_df
253
250
 
254
251
  def create_engine(self, connection_args):
255
- code_path = Path(connection_args['code'])
256
- self.engine_storage.fileStorage.file_set(
257
- 'code',
258
- code_path.read_bytes()
259
- )
252
+ code_path = Path(connection_args["code"])
253
+ self.engine_storage.fileStorage.file_set("code", code_path.read_bytes())
260
254
 
261
- requirements_path = Path(connection_args['modules'])
262
- self.engine_storage.fileStorage.file_set(
263
- 'modules',
264
- requirements_path.read_bytes()
265
- )
255
+ requirements_path = Path(connection_args["modules"])
256
+ self.engine_storage.fileStorage.file_set("modules", requirements_path.read_bytes())
266
257
 
267
258
  self.engine_storage.fileStorage.push()
268
259
 
269
- self.engine_storage.update_connection_args({
270
- 'handler_version': __version__,
271
- 'mode': connection_args.get('mode'),
272
- 'versions': {
273
- '1': {
274
- 'code': code_path.name,
275
- 'requirements': requirements_path.name,
276
- 'type': self.normalize_byom_type(
277
- connection_args.get('type')
278
- ).name.lower()
279
- }
260
+ self.engine_storage.update_connection_args(
261
+ {
262
+ "handler_version": __version__,
263
+ "mode": connection_args.get("mode"),
264
+ "versions": {
265
+ "1": {
266
+ "code": code_path.name,
267
+ "requirements": requirements_path.name,
268
+ "type": self.normalize_byom_type(connection_args.get("type")).name.lower(),
269
+ }
270
+ },
280
271
  }
281
- })
272
+ )
282
273
 
283
274
  model_proxy = self._get_model_proxy()
284
275
  try:
285
- info = model_proxy.check(connection_args.get('mode'))
286
- self.engine_storage.json_set('methods', info['methods'])
276
+ info = model_proxy.check(connection_args.get("mode"))
277
+ self.engine_storage.json_set("methods", info["methods"])
287
278
 
288
279
  except Exception as e:
289
- if hasattr(model_proxy, 'remove_venv'):
280
+ if hasattr(model_proxy, "remove_venv"):
290
281
  model_proxy.remove_venv()
291
282
  raise e
292
283
 
293
284
  def update_engine(self, connection_args: dict) -> None:
294
285
  """Add new version of engine
295
286
 
296
- Args:
297
- connection_args (dict): paths to code and requirements
287
+ Args:
288
+ connection_args (dict): paths to code and requirements
298
289
  """
299
- code_path = Path(connection_args['code'])
300
- requirements_path = Path(connection_args['modules'])
290
+ code_path = Path(connection_args["code"])
291
+ requirements_path = Path(connection_args["modules"])
301
292
 
302
293
  engine_connection_args = self.engine_storage.get_connection_args()
303
- if isinstance(engine_connection_args, dict) is False or 'handler_version' not in engine_connection_args:
294
+ if isinstance(engine_connection_args, dict) is False or "handler_version" not in engine_connection_args:
304
295
  engine_connection_args = {
305
- 'handler_version': __version__,
306
- 'versions': {
307
- '1': {
308
- 'code': 'code.py',
309
- 'requirements': 'requirements.txt',
310
- 'type': self._default_byom_type.name.lower()
296
+ "handler_version": __version__,
297
+ "versions": {
298
+ "1": {
299
+ "code": "code.py",
300
+ "requirements": "requirements.txt",
301
+ "type": self._default_byom_type.name.lower(),
311
302
  }
312
- }
303
+ },
313
304
  }
314
- new_version = str(max([int(x) for x in engine_connection_args['versions'].keys()]) + 1)
315
-
316
- engine_connection_args['versions'][new_version] = {
317
- 'code': code_path.name,
318
- 'requirements': requirements_path.name,
319
- 'type': self.normalize_byom_type(
320
- connection_args.get('type')
321
- ).name.lower()
305
+ new_version = str(max([int(x) for x in engine_connection_args["versions"].keys()]) + 1)
306
+
307
+ engine_connection_args["versions"][new_version] = {
308
+ "code": code_path.name,
309
+ "requirements": requirements_path.name,
310
+ "type": self.normalize_byom_type(connection_args.get("type")).name.lower(),
322
311
  }
323
312
 
324
- self.engine_storage.fileStorage.file_set(
325
- f'code_{new_version}',
326
- code_path.read_bytes()
327
- )
313
+ self.engine_storage.fileStorage.file_set(f"code_{new_version}", code_path.read_bytes())
328
314
 
329
- self.engine_storage.fileStorage.file_set(
330
- f'modules_{new_version}',
331
- requirements_path.read_bytes()
332
- )
315
+ self.engine_storage.fileStorage.file_set(f"modules_{new_version}", requirements_path.read_bytes())
333
316
  self.engine_storage.fileStorage.push()
334
317
 
335
318
  self.engine_storage.update_connection_args(engine_connection_args)
@@ -337,28 +320,28 @@ class BYOMHandler(BaseMLEngine):
337
320
  model_proxy = self._get_model_proxy(new_version)
338
321
  try:
339
322
  methods = model_proxy.check()
340
- self.engine_storage.json_set('methods', methods)
323
+ self.engine_storage.json_set("methods", methods)
341
324
 
342
325
  except Exception as e:
343
- if hasattr(model_proxy, 'remove_venv'):
326
+ if hasattr(model_proxy, "remove_venv"):
344
327
  model_proxy.remove_venv()
345
328
  raise e
346
329
 
347
330
  def function_list(self):
348
- return self.engine_storage.json_get('methods')
331
+ return self.engine_storage.json_get("methods")
349
332
 
350
333
  def function_call(self, name, args):
351
334
  mp = self._get_model_proxy()
352
335
  return mp.func_call(name, args)
353
336
 
354
337
  def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:
355
- using_args = args.get('using', {})
356
- engine_version = using_args.get('engine_version')
338
+ using_args = args.get("using", {})
339
+ engine_version = using_args.get("engine_version")
357
340
 
358
341
  model_storage = self.model_storage
359
342
  # TODO: should probably refactor at some point, as a bit of the logic is shared with lightwood's finetune logic
360
343
  try:
361
- base_predictor_id = args['base_model_id']
344
+ base_predictor_id = args["base_model_id"]
362
345
  base_predictor_record = db.Predictor.query.get(base_predictor_id)
363
346
  if base_predictor_record.status != PREDICTOR_STATUS.COMPLETE:
364
347
  raise Exception("Base model must be in status 'complete'")
@@ -366,33 +349,34 @@ class BYOMHandler(BaseMLEngine):
366
349
  predictor_id = model_storage.predictor_id
367
350
  predictor_record = db.Predictor.query.get(predictor_id)
368
351
 
369
- predictor_record.data = {'training_log': 'training'} # TODO move to ModelStorage (don't work w/ db directly)
352
+ predictor_record.data = {
353
+ "training_log": "training"
354
+ } # TODO move to ModelStorage (don't work w/ db directly)
370
355
  predictor_record.training_start_at = datetime.now()
371
356
  predictor_record.status = PREDICTOR_STATUS.FINETUNING # TODO: parallel execution block
372
357
  db.session.commit()
373
358
 
374
359
  model_proxy = self._get_model_proxy(engine_version)
375
- model_state = self.base_model_storage.file_get('model')
376
- model_state = model_proxy.finetune(df, model_state, args=args.get('using', {}))
360
+ model_state = self.base_model_storage.file_get("model")
361
+ model_state = model_proxy.finetune(df, model_state, args=args.get("using", {}))
377
362
 
378
363
  # region hack to speedup file saving
379
- with profiler.Context('finetune-byom-write-file'):
380
- dest_abs_path = model_storage.fileStorage.folder_path / 'model'
381
- with open(dest_abs_path, 'wb') as fd:
364
+ with profiler.Context("finetune-byom-write-file"):
365
+ dest_abs_path = model_storage.fileStorage.folder_path / "model"
366
+ with open(dest_abs_path, "wb") as fd:
382
367
  fd.write(model_state)
383
368
  model_storage.fileStorage.push(compression_level=0)
384
369
  # endregion
385
370
 
386
- predictor_record.update_status = 'up_to_date'
371
+ predictor_record.update_status = "up_to_date"
387
372
  predictor_record.status = PREDICTOR_STATUS.COMPLETE
388
373
  predictor_record.training_stop_at = datetime.now()
389
374
  db.session.commit()
390
375
 
391
376
  except Exception as e:
392
- logger.error(e)
377
+ logger.error("Unexpected error during BYOM finetune:", exc_info=True)
393
378
  predictor_id = model_storage.predictor_id
394
379
  predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
395
- logger.error(traceback.format_exc())
396
380
  error_message = format_exception_error(e)
397
381
  predictor_record.data = {"error": error_message}
398
382
  predictor_record.status = PREDICTOR_STATUS.ERROR
@@ -406,8 +390,7 @@ class BYOMHandler(BaseMLEngine):
406
390
 
407
391
 
408
392
  class ModelWrapperUnsafe:
409
- """ Model wrapper that executes learn/predict in current process
410
- """
393
+ """Model wrapper that executes learn/predict in current process"""
411
394
 
412
395
  def __init__(self, code, modules_str, engine_id, engine_version: int):
413
396
  self.module = import_string(code)
@@ -444,7 +427,7 @@ class ModelWrapperUnsafe:
444
427
  return pickle.dumps(self.model_instance.__dict__, protocol=5)
445
428
 
446
429
  def describe(self, model_state, attribute: Optional[str] = None) -> pd.DataFrame:
447
- if hasattr(self.model_instance, 'describe'):
430
+ if hasattr(self.model_instance, "describe"):
448
431
  model_state = pickle.loads(model_state)
449
432
  self.model_instance.__dict__ = model_state
450
433
  return self.model_instance.describe(attribute)
@@ -460,15 +443,14 @@ class ModelWrapperUnsafe:
460
443
 
461
444
 
462
445
  class ModelWrapperSafe:
463
- """ Model wrapper that executes learn/predict in venv
464
- """
446
+ """Model wrapper that executes learn/predict in venv"""
465
447
 
466
448
  def __init__(self, code, modules_str, engine_id, engine_version: int):
467
449
  self.code = code
468
450
  modules = self.parse_requirements(modules_str)
469
451
 
470
452
  self.config = Config()
471
- self.is_cloud = Config().get('cloud', False)
453
+ self.is_cloud = Config().get("cloud", False)
472
454
 
473
455
  self.env_path = None
474
456
  self.env_storage_path = None
@@ -478,37 +460,37 @@ class ModelWrapperSafe:
478
460
  try:
479
461
  import virtualenv
480
462
 
481
- base_path = self.config.get('byom', {}).get('venv_path')
463
+ base_path = self.config.get("byom", {}).get("venv_path")
482
464
  if base_path is None:
483
465
  # create in root path
484
- base_path = Path(self.config.paths['root']) / 'venvs'
466
+ base_path = Path(self.config.paths["root"]) / "venvs"
485
467
  else:
486
468
  base_path = Path(base_path)
487
469
  base_path.mkdir(parents=True, exist_ok=True)
488
470
 
489
- env_folder_name = f'env_{engine_id}'
471
+ env_folder_name = f"env_{engine_id}"
490
472
  if isinstance(engine_version, int) and engine_version > 1:
491
- env_folder_name = f'{env_folder_name}_{engine_version}'
473
+ env_folder_name = f"{env_folder_name}_{engine_version}"
492
474
 
493
475
  self.env_storage_path = base_path / env_folder_name
494
476
  if self.is_cloud:
495
- bese_env_path = Path(tempfile.gettempdir()) / 'mindsdb' / 'venv'
477
+ bese_env_path = Path(tempfile.gettempdir()) / "mindsdb" / "venv"
496
478
  bese_env_path.mkdir(parents=True, exist_ok=True)
497
479
  self.env_path = bese_env_path / env_folder_name
498
- tar_path = self.env_storage_path.with_suffix('.tar')
480
+ tar_path = self.env_storage_path.with_suffix(".tar")
499
481
  if self.env_path.exists() is False and tar_path.exists() is True:
500
482
  with tarfile.open(tar_path) as tar:
501
483
  safe_extract(tar, path=bese_env_path)
502
484
  else:
503
485
  self.env_path = self.env_storage_path
504
486
 
505
- if sys.platform in ('win32', 'cygwin'):
506
- exectable_folder_name = 'Scripts'
487
+ if sys.platform in ("win32", "cygwin"):
488
+ exectable_folder_name = "Scripts"
507
489
  else:
508
- exectable_folder_name = 'bin'
490
+ exectable_folder_name = "bin"
509
491
 
510
- pip_cmd = self.env_path / exectable_folder_name / 'pip'
511
- self.python_path = self.env_path / exectable_folder_name / 'python'
492
+ pip_cmd = self.env_path / exectable_folder_name / "pip"
493
+ self.python_path = self.env_path / exectable_folder_name / "python"
512
494
 
513
495
  if self.env_path.exists():
514
496
  # already exists. it means requirements are already installed
@@ -516,7 +498,7 @@ class ModelWrapperSafe:
516
498
 
517
499
  # create
518
500
  logger.info(f"Creating new environment: {self.env_path}")
519
- virtualenv.cli_run(['-p', sys.executable, str(self.env_path)])
501
+ virtualenv.cli_run(["-p", sys.executable, str(self.env_path)])
520
502
  logger.info(f"Created new environment: {self.env_path}")
521
503
 
522
504
  if len(modules) > 0:
@@ -537,13 +519,14 @@ class ModelWrapperSafe:
537
519
  if self.is_cloud and self.env_storage_path != self.env_path:
538
520
  old_cwd = os.getcwd()
539
521
  os.chdir(str(bese_env_path))
540
- tar_path = self.env_path.with_suffix('.tar')
541
- with tarfile.open(name=str(tar_path), mode='w') as tar:
522
+ tar_path = self.env_path.with_suffix(".tar")
523
+ with tarfile.open(name=str(tar_path), mode="w") as tar:
542
524
  tar.add(str(self.env_path.name))
543
525
  os.chdir(old_cwd)
544
526
  subprocess.run(
545
- ['cp', '-R', '--no-preserve=mode,ownership', str(tar_path), str(base_path / tar_path.name)],
546
- check=True, shell=False
527
+ ["cp", "-R", "--no-preserve=mode,ownership", str(tar_path), str(base_path / tar_path.name)],
528
+ check=True,
529
+ shell=False,
547
530
  )
548
531
  tar_path.unlink()
549
532
 
@@ -552,46 +535,46 @@ class ModelWrapperSafe:
552
535
  shutil.rmtree(str(self.env_path))
553
536
 
554
537
  if self.is_cloud:
555
- tar_path = self.env_storage_path.with_suffix('.tar')
538
+ tar_path = self.env_storage_path.with_suffix(".tar")
556
539
  tar_path.unlink()
557
540
 
558
541
  def parse_requirements(self, requirements):
559
542
  # get requirements from string
560
543
  # they should be located at the top of the file, before code
561
544
 
562
- pattern = '^[\w\\[\\]-]+[=!<>\s]*[\d\.]*[,=!<>\s]*[\d\.]*$' # noqa
545
+ pattern = "^[\w\\[\\]-]+[=!<>\s]*[\d\.]*[,=!<>\s]*[\d\.]*$" # noqa
563
546
  modules = []
564
- for line in requirements.split(b'\n'):
547
+ for line in requirements.split(b"\n"):
565
548
  line = line.decode().strip()
566
549
  if line:
567
550
  if re.match(pattern, line):
568
551
  modules.append(line)
569
552
  else:
570
- raise Exception(f'Wrong requirement: {line}')
553
+ raise Exception(f"Wrong requirement: {line}")
571
554
 
572
- is_pandas = any([m.lower().startswith('pandas') for m in modules])
555
+ is_pandas = any([m.lower().startswith("pandas") for m in modules])
573
556
  if not is_pandas:
574
- modules.append('pandas>=2.0.0,<2.1.0')
575
- modules.append('numpy<2.0.0')
557
+ modules.append("pandas>=2.0.0,<2.1.0")
558
+ modules.append("numpy<2.0.0")
576
559
 
577
560
  # for dataframe serialization
578
- modules.append('pyarrow==19.0.0')
561
+ modules.append("pyarrow==19.0.0")
579
562
  return modules
580
563
 
581
564
  def install_modules(self, modules, pip_cmd):
582
565
  # install in current environment using pip
583
566
  for module in modules:
584
567
  logger.debug(f"BYOM install module: {module}")
585
- p = subprocess.Popen([pip_cmd, 'install', module], stderr=subprocess.PIPE)
568
+ p = subprocess.Popen([pip_cmd, "install", module], stderr=subprocess.PIPE)
586
569
  p.wait()
587
570
  if p.returncode != 0:
588
- raise Exception(f'Problem with installing module {module}: {p.stderr.read()}')
571
+ raise Exception(f"Problem with installing module {module}: {p.stderr.read()}")
589
572
 
590
573
  def _run_command(self, params):
591
574
  logger.debug(f"BYOM run command: {params.get('method')}")
592
575
  params_enc = encode(params)
593
576
 
594
- wrapper_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'proc_wrapper.py')
577
+ wrapper_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "proc_wrapper.py")
595
578
  p = subprocess.Popen(
596
579
  [str(self.python_path), wrapper_path],
597
580
  stdin=subprocess.PIPE,
@@ -613,44 +596,44 @@ class ModelWrapperSafe:
613
596
 
614
597
  def check(self, mode: str = None):
615
598
  params = {
616
- 'method': BYOM_METHOD.CHECK.value,
617
- 'code': self.code,
618
- 'mode': mode,
599
+ "method": BYOM_METHOD.CHECK.value,
600
+ "code": self.code,
601
+ "mode": mode,
619
602
  }
620
603
  return self._run_command(params)
621
604
 
622
605
  def train(self, df, target, args):
623
606
  params = {
624
- 'method': BYOM_METHOD.TRAIN.value,
625
- 'code': self.code,
626
- 'df': None,
627
- 'to_predict': target,
628
- 'args': args,
607
+ "method": BYOM_METHOD.TRAIN.value,
608
+ "code": self.code,
609
+ "df": None,
610
+ "to_predict": target,
611
+ "args": args,
629
612
  }
630
613
  if df is not None:
631
- params['df'] = pd_encode(df)
614
+ params["df"] = pd_encode(df)
632
615
 
633
616
  model_state = self._run_command(params)
634
617
  return model_state
635
618
 
636
619
  def predict(self, df, model_state, args):
637
620
  params = {
638
- 'method': BYOM_METHOD.PREDICT.value,
639
- 'code': self.code,
640
- 'model_state': model_state,
641
- 'df': pd_encode(df),
642
- 'args': args,
621
+ "method": BYOM_METHOD.PREDICT.value,
622
+ "code": self.code,
623
+ "model_state": model_state,
624
+ "df": pd_encode(df),
625
+ "args": args,
643
626
  }
644
627
  pred_df = self._run_command(params)
645
628
  return pd_decode(pred_df)
646
629
 
647
630
  def finetune(self, df, model_state, args):
648
631
  params = {
649
- 'method': BYOM_METHOD.FINETUNE.value,
650
- 'code': self.code,
651
- 'model_state': model_state,
652
- 'df': pd_encode(df),
653
- 'args': args,
632
+ "method": BYOM_METHOD.FINETUNE.value,
633
+ "code": self.code,
634
+ "model_state": model_state,
635
+ "df": pd_encode(df),
636
+ "args": args,
654
637
  }
655
638
 
656
639
  model_state = self._run_command(params)
@@ -658,10 +641,10 @@ class ModelWrapperSafe:
658
641
 
659
642
  def describe(self, model_state, attribute: Optional[str] = None) -> pd.DataFrame:
660
643
  params = {
661
- 'method': BYOM_METHOD.DESCRIBE.value,
662
- 'code': self.code,
663
- 'model_state': model_state,
664
- 'attribute': attribute
644
+ "method": BYOM_METHOD.DESCRIBE.value,
645
+ "code": self.code,
646
+ "model_state": model_state,
647
+ "attribute": attribute,
665
648
  }
666
649
  enc_df = self._run_command(params)
667
650
  df = pd_decode(enc_df)
@@ -669,10 +652,10 @@ class ModelWrapperSafe:
669
652
 
670
653
  def func_call(self, func_name, args):
671
654
  params = {
672
- 'method': BYOM_METHOD.FUNC_CALL.value,
673
- 'code': self.code,
674
- 'func_name': func_name,
675
- 'args': args,
655
+ "method": BYOM_METHOD.FUNC_CALL.value,
656
+ "code": self.code,
657
+ "func_name": func_name,
658
+ "args": args,
676
659
  }
677
660
  result = self._run_command(params)
678
661
  return result