MindsDB 25.9.2.0a1__py3-none-any.whl → 25.10.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (163) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +40 -29
  3. mindsdb/api/a2a/__init__.py +1 -1
  4. mindsdb/api/a2a/agent.py +16 -10
  5. mindsdb/api/a2a/common/server/server.py +7 -3
  6. mindsdb/api/a2a/common/server/task_manager.py +12 -5
  7. mindsdb/api/a2a/common/types.py +66 -0
  8. mindsdb/api/a2a/task_manager.py +65 -17
  9. mindsdb/api/common/middleware.py +10 -12
  10. mindsdb/api/executor/command_executor.py +51 -40
  11. mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
  12. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
  13. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +101 -49
  14. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
  15. mindsdb/api/executor/datahub/datanodes/system_tables.py +3 -2
  16. mindsdb/api/executor/exceptions.py +29 -10
  17. mindsdb/api/executor/planner/plan_join.py +17 -3
  18. mindsdb/api/executor/planner/query_prepare.py +2 -20
  19. mindsdb/api/executor/sql_query/sql_query.py +74 -74
  20. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
  21. mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
  22. mindsdb/api/executor/utilities/functions.py +6 -6
  23. mindsdb/api/executor/utilities/sql.py +37 -20
  24. mindsdb/api/http/gui.py +5 -11
  25. mindsdb/api/http/initialize.py +75 -61
  26. mindsdb/api/http/namespaces/agents.py +10 -15
  27. mindsdb/api/http/namespaces/analysis.py +13 -20
  28. mindsdb/api/http/namespaces/auth.py +1 -1
  29. mindsdb/api/http/namespaces/chatbots.py +0 -5
  30. mindsdb/api/http/namespaces/config.py +15 -11
  31. mindsdb/api/http/namespaces/databases.py +140 -201
  32. mindsdb/api/http/namespaces/file.py +17 -4
  33. mindsdb/api/http/namespaces/handlers.py +17 -7
  34. mindsdb/api/http/namespaces/knowledge_bases.py +28 -7
  35. mindsdb/api/http/namespaces/models.py +94 -126
  36. mindsdb/api/http/namespaces/projects.py +13 -22
  37. mindsdb/api/http/namespaces/sql.py +33 -25
  38. mindsdb/api/http/namespaces/tab.py +27 -37
  39. mindsdb/api/http/namespaces/views.py +1 -1
  40. mindsdb/api/http/start.py +16 -10
  41. mindsdb/api/mcp/__init__.py +2 -1
  42. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
  43. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
  44. mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
  45. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
  46. mindsdb/integrations/handlers/byom_handler/byom_handler.py +165 -190
  47. mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
  48. mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
  49. mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
  50. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
  51. mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
  52. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
  53. mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
  54. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
  55. mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
  56. mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
  57. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
  58. mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
  59. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +14 -2
  60. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
  61. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
  62. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  63. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  64. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
  65. mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
  66. mindsdb/integrations/libs/api_handler.py +10 -10
  67. mindsdb/integrations/libs/base.py +4 -4
  68. mindsdb/integrations/libs/llm/utils.py +2 -2
  69. mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
  70. mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
  71. mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
  72. mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
  73. mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
  74. mindsdb/integrations/libs/process_cache.py +132 -140
  75. mindsdb/integrations/libs/response.py +18 -12
  76. mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
  77. mindsdb/integrations/utilities/files/file_reader.py +6 -7
  78. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
  79. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
  80. mindsdb/integrations/utilities/rag/config_loader.py +37 -26
  81. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +83 -30
  82. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
  83. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
  84. mindsdb/integrations/utilities/rag/settings.py +58 -133
  85. mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
  86. mindsdb/interfaces/agents/agents_controller.py +2 -3
  87. mindsdb/interfaces/agents/constants.py +0 -2
  88. mindsdb/interfaces/agents/litellm_server.py +34 -58
  89. mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
  90. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
  91. mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
  92. mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
  93. mindsdb/interfaces/chatbot/polling.py +30 -18
  94. mindsdb/interfaces/data_catalog/data_catalog_loader.py +16 -17
  95. mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
  96. mindsdb/interfaces/database/data_handlers_cache.py +190 -0
  97. mindsdb/interfaces/database/database.py +3 -3
  98. mindsdb/interfaces/database/integrations.py +7 -110
  99. mindsdb/interfaces/database/projects.py +2 -6
  100. mindsdb/interfaces/database/views.py +1 -4
  101. mindsdb/interfaces/file/file_controller.py +6 -6
  102. mindsdb/interfaces/functions/controller.py +1 -1
  103. mindsdb/interfaces/functions/to_markdown.py +2 -2
  104. mindsdb/interfaces/jobs/jobs_controller.py +5 -9
  105. mindsdb/interfaces/jobs/scheduler.py +3 -9
  106. mindsdb/interfaces/knowledge_base/controller.py +244 -128
  107. mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
  108. mindsdb/interfaces/knowledge_base/executor.py +11 -0
  109. mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
  110. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
  111. mindsdb/interfaces/model/model_controller.py +172 -168
  112. mindsdb/interfaces/query_context/context_controller.py +14 -2
  113. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +10 -14
  114. mindsdb/interfaces/skills/retrieval_tool.py +43 -50
  115. mindsdb/interfaces/skills/skill_tool.py +2 -2
  116. mindsdb/interfaces/skills/skills_controller.py +1 -4
  117. mindsdb/interfaces/skills/sql_agent.py +25 -19
  118. mindsdb/interfaces/storage/db.py +16 -6
  119. mindsdb/interfaces/storage/fs.py +114 -169
  120. mindsdb/interfaces/storage/json.py +19 -18
  121. mindsdb/interfaces/tabs/tabs_controller.py +49 -72
  122. mindsdb/interfaces/tasks/task_monitor.py +3 -9
  123. mindsdb/interfaces/tasks/task_thread.py +7 -9
  124. mindsdb/interfaces/triggers/trigger_task.py +7 -13
  125. mindsdb/interfaces/triggers/triggers_controller.py +47 -52
  126. mindsdb/migrations/migrate.py +16 -16
  127. mindsdb/utilities/api_status.py +58 -0
  128. mindsdb/utilities/config.py +68 -2
  129. mindsdb/utilities/exception.py +40 -1
  130. mindsdb/utilities/fs.py +0 -1
  131. mindsdb/utilities/hooks/profiling.py +17 -14
  132. mindsdb/utilities/json_encoder.py +24 -10
  133. mindsdb/utilities/langfuse.py +40 -45
  134. mindsdb/utilities/log.py +272 -0
  135. mindsdb/utilities/ml_task_queue/consumer.py +52 -58
  136. mindsdb/utilities/ml_task_queue/producer.py +26 -30
  137. mindsdb/utilities/render/sqlalchemy_render.py +22 -20
  138. mindsdb/utilities/starters.py +0 -10
  139. mindsdb/utilities/utils.py +2 -2
  140. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/METADATA +293 -276
  141. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/RECORD +144 -158
  142. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
  143. mindsdb/api/postgres/__init__.py +0 -0
  144. mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
  145. mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
  146. mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -189
  147. mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
  148. mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
  149. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
  150. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
  151. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
  152. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
  153. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -253
  154. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
  155. mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
  156. mindsdb/api/postgres/start.py +0 -11
  157. mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
  158. mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
  159. mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
  160. mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
  161. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/WHEEL +0 -0
  162. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/licenses/LICENSE +0 -0
  163. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0rc1.dist-info}/top_level.txt +0 -0
@@ -1,13 +1,11 @@
1
- """ BYOM: Bring Your Own Model
1
+ """BYOM: Bring Your Own Model
2
2
 
3
3
  env vars to contloll BYOM:
4
- - MINDSDB_BYOM_ENABLED - can BYOM be uysed or not. Locally enabled by default.
4
+ - MINDSDB_BYOM_ENABLED - can BYOM be used or not. Locally enabled by default.
5
5
  - MINDSDB_BYOM_INHOUSE_ENABLED - enable or disable 'inhouse' BYOM usage. Locally enabled by default.
6
6
  - MINDSDB_BYOM_DEFAULT_TYPE - [inhouse|venv] default byom type. Locally it is 'venv' by default.
7
- - MINDSDB_BYOM_TYPE - [safe|unsafe] - obsolete, same as above.
8
7
  """
9
8
 
10
-
11
9
  import os
12
10
  import re
13
11
  import sys
@@ -15,7 +13,6 @@ import shutil
15
13
  import pickle
16
14
  import tarfile
17
15
  import tempfile
18
- import traceback
19
16
  import subprocess
20
17
  from enum import Enum
21
18
  from pathlib import Path
@@ -36,28 +33,33 @@ import mindsdb.utilities.profiler as profiler
36
33
 
37
34
 
38
35
  from .proc_wrapper import (
39
- pd_decode, pd_encode, encode, decode, BYOM_METHOD,
40
- import_string, find_model_class, check_module
36
+ pd_decode,
37
+ pd_encode,
38
+ encode,
39
+ decode,
40
+ BYOM_METHOD,
41
+ import_string,
42
+ find_model_class,
43
+ check_module,
41
44
  )
42
45
  from .__about__ import __version__
43
46
 
44
47
 
45
- BYOM_TYPE = Enum('BYOM_TYPE', ['INHOUSE', 'VENV'])
48
+ BYOM_TYPE = Enum("BYOM_TYPE", ["INHOUSE", "VENV"])
46
49
 
47
50
  logger = log.getLogger(__name__)
48
51
 
49
52
 
50
53
  class BYOMHandler(BaseMLEngine):
51
-
52
- name = 'byom'
54
+ name = "byom"
53
55
 
54
56
  def __init__(self, model_storage, engine_storage, **kwargs) -> None:
55
57
  # region check availability
56
- is_cloud = Config().get('cloud', False)
58
+ is_cloud = Config().get("cloud", False)
57
59
  if is_cloud is True:
58
- byom_enabled = os.environ.get('MINDSDB_BYOM_ENABLED', 'false').lower()
59
- if byom_enabled not in ('true', '1'):
60
- raise RuntimeError('BYOM is disabled on cloud')
60
+ byom_enabled = os.environ.get("MINDSDB_BYOM_ENABLED", "false").lower()
61
+ if byom_enabled not in ("true", "1"):
62
+ raise RuntimeError("BYOM is disabled on cloud")
61
63
  # endregion
62
64
 
63
65
  self.model_wrapper = None
@@ -68,28 +70,19 @@ class BYOMHandler(BaseMLEngine):
68
70
  # region read and save set default byom type
69
71
  try:
70
72
  self._default_byom_type = BYOM_TYPE.VENV
71
- if os.environ.get('MINDSDB_BYOM_DEFAULT_TYPE') is not None:
72
- self._default_byom_type = BYOM_TYPE[
73
- os.environ.get('MINDSDB_BYOM_DEFAULT_TYPE').upper()
74
- ]
75
- else:
76
- env_var = os.environ.get('MINDSDB_BYOM_DEFAULT_TYPE')
77
- if env_var == 'SAVE':
78
- self._default_byom_type = BYOM_TYPE['VENV']
79
- elif env_var == 'UNSAVE':
80
- self._default_byom_type = BYOM_TYPE['INHOUSE']
81
- else:
82
- raise KeyError
73
+ if os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE") is not None:
74
+ self._default_byom_type = BYOM_TYPE[os.environ.get("MINDSDB_BYOM_DEFAULT_TYPE").upper()]
83
75
  except KeyError:
76
+ logger.warning(f"Wrong value of env var MINDSDB_BYOM_DEFAULT_TYPE, {BYOM_TYPE.VENV} will be used")
84
77
  self._default_byom_type = BYOM_TYPE.VENV
85
78
  # endregion
86
79
 
87
80
  # region check if 'inhouse' BYOM is enabled
88
- env_var = os.environ.get('MINDSDB_BYOM_INHOUSE_ENABLED')
81
+ env_var = os.environ.get("MINDSDB_BYOM_INHOUSE_ENABLED")
89
82
  if env_var is None:
90
83
  self._inhouse_enabled = False if is_cloud else True
91
84
  else:
92
- self._inhouse_enabled = env_var.lower() in ('true', '1')
85
+ self._inhouse_enabled = env_var.lower() in ("true", "1")
93
86
  # endregion
94
87
 
95
88
  super().__init__(model_storage, engine_storage, **kwargs)
@@ -117,18 +110,18 @@ class BYOMHandler(BaseMLEngine):
117
110
  def create_validation(target: str, args: dict = None, **kwargs) -> None:
118
111
  if isinstance(args, dict) is False:
119
112
  return
120
- using_args = args.get('using', {})
121
- engine_version = using_args.get('engine_version')
113
+ using_args = args.get("using", {})
114
+ engine_version = using_args.get("engine_version")
122
115
  if engine_version is not None:
123
116
  engine_version = BYOMHandler.normalize_engine_version(engine_version)
124
117
  else:
125
- connection_args = kwargs['handler_storage'].get_connection_args()
126
- versions = connection_args.get('versions')
118
+ connection_args = kwargs["handler_storage"].get_connection_args()
119
+ versions = connection_args.get("versions")
127
120
  if isinstance(versions, dict):
128
121
  engine_version = max([int(x) for x in versions.keys()])
129
122
  else:
130
123
  engine_version = 1
131
- using_args['engine_version'] = engine_version
124
+ using_args["engine_version"] = engine_version
132
125
 
133
126
  def get_model_engine_version(self) -> int:
134
127
  """Return current model engine version
@@ -136,7 +129,7 @@ class BYOMHandler(BaseMLEngine):
136
129
  Returns:
137
130
  int: engine version
138
131
  """
139
- engine_version = self.model_storage.get_info()['learn_args'].get('using', {}).get('engine_version')
132
+ engine_version = self.model_storage.get_info()["learn_args"].get("using", {}).get("engine_version")
140
133
  engine_version = BYOMHandler.normalize_engine_version(engine_version)
141
134
  return engine_version
142
135
 
@@ -154,28 +147,26 @@ class BYOMHandler(BaseMLEngine):
154
147
  version = 1
155
148
  if isinstance(version, str):
156
149
  version = int(version)
157
- version_mark = ''
150
+ version_mark = ""
158
151
  if version > 1:
159
- version_mark = f'_{version}'
152
+ version_mark = f"_{version}"
160
153
  version_str = str(version)
161
154
 
162
155
  self.engine_storage.fileStorage.pull()
163
156
  try:
164
- code = self.engine_storage.fileStorage.file_get(f'code{version_mark}')
165
- modules_str = self.engine_storage.fileStorage.file_get(f'modules{version_mark}')
157
+ code = self.engine_storage.fileStorage.file_get(f"code{version_mark}")
158
+ modules_str = self.engine_storage.fileStorage.file_get(f"modules{version_mark}")
166
159
  except FileNotFoundError:
167
160
  raise Exception(f"Engine version '{version}' does not exists")
168
161
 
169
162
  if version_str not in self.model_wrappers:
170
163
  connection_args = self.engine_storage.get_connection_args()
171
- version_meta = connection_args['versions'][version_str]
164
+ version_meta = connection_args["versions"][version_str]
172
165
 
173
166
  try:
174
- engine_version_type = BYOM_TYPE[
175
- version_meta.get('type', self._default_byom_type.name).upper()
176
- ]
167
+ engine_version_type = BYOM_TYPE[version_meta.get("type", self._default_byom_type.name).upper()]
177
168
  except KeyError:
178
- raise Exception('Unknown BYOM engine type')
169
+ raise Exception("Unknown BYOM engine type")
179
170
 
180
171
  if engine_version_type == BYOM_TYPE.INHOUSE:
181
172
  if self._inhouse_enabled is False:
@@ -185,20 +176,20 @@ class BYOMHandler(BaseMLEngine):
185
176
  code=code,
186
177
  modules_str=modules_str,
187
178
  engine_id=self.engine_storage.integration_id,
188
- engine_version=version
179
+ engine_version=version,
189
180
  )
190
181
  self.model_wrappers[version_str] = self.inhouse_model_wrapper
191
182
  elif engine_version_type == BYOM_TYPE.VENV:
192
- if version_meta.get('venv_status') != 'ready':
193
- version_meta['venv_status'] = 'creating'
183
+ if version_meta.get("venv_status") != "ready":
184
+ version_meta["venv_status"] = "creating"
194
185
  self.engine_storage.update_connection_args(connection_args)
195
186
  self.model_wrappers[version_str] = ModelWrapperSafe(
196
187
  code=code,
197
188
  modules_str=modules_str,
198
189
  engine_id=self.engine_storage.integration_id,
199
- engine_version=version
190
+ engine_version=version,
200
191
  )
201
- version_meta['venv_status'] = 'ready'
192
+ version_meta["venv_status"] = "ready"
202
193
  self.engine_storage.update_connection_args(connection_args)
203
194
 
204
195
  return self.model_wrappers[version_str]
@@ -206,130 +197,114 @@ class BYOMHandler(BaseMLEngine):
206
197
  def describe(self, attribute: Optional[str] = None) -> pd.DataFrame:
207
198
  engine_version = self.get_model_engine_version()
208
199
  mp = self._get_model_proxy(engine_version)
209
- model_state = self.model_storage.file_get('model')
200
+ model_state = self.model_storage.file_get("model")
210
201
  return mp.describe(model_state, attribute)
211
202
 
212
203
  def create(self, target, df=None, args=None, **kwargs):
213
- using_args = args.get('using', {})
214
- engine_version = using_args.get('engine_version')
204
+ using_args = args.get("using", {})
205
+ engine_version = using_args.get("engine_version")
215
206
 
216
207
  model_proxy = self._get_model_proxy(engine_version)
217
208
  model_state = model_proxy.train(df, target, args)
218
209
 
219
- self.model_storage.file_set('model', model_state)
210
+ self.model_storage.file_set("model", model_state)
220
211
 
221
212
  # TODO return columns?
222
213
 
223
214
  def convert_type(field_type):
224
215
  if pd_types.is_integer_dtype(field_type):
225
- return 'integer'
216
+ return "integer"
226
217
  elif pd_types.is_numeric_dtype(field_type):
227
- return 'float'
218
+ return "float"
228
219
  elif pd_types.is_datetime64_any_dtype(field_type):
229
- return 'datetime'
220
+ return "datetime"
230
221
  else:
231
- return 'categorical'
222
+ return "categorical"
232
223
 
233
- columns = {
234
- target: convert_type(object)
235
- }
224
+ columns = {target: convert_type(object)}
236
225
 
237
226
  self.model_storage.columns_set(columns)
238
227
 
239
228
  def predict(self, df, args=None):
240
- pred_args = args.get('predict_params', {})
229
+ pred_args = args.get("predict_params", {})
241
230
 
242
- engine_version = pred_args.get('engine_version')
231
+ engine_version = pred_args.get("engine_version")
243
232
  if engine_version is not None:
244
233
  engine_version = int(engine_version)
245
234
  else:
246
235
  engine_version = self.get_model_engine_version()
247
236
 
248
237
  model_proxy = self._get_model_proxy(engine_version)
249
- model_state = self.model_storage.file_get('model')
238
+ model_state = self.model_storage.file_get("model")
250
239
  pred_df = model_proxy.predict(df, model_state, pred_args)
251
240
 
252
241
  return pred_df
253
242
 
254
243
  def create_engine(self, connection_args):
255
- code_path = Path(connection_args['code'])
256
- self.engine_storage.fileStorage.file_set(
257
- 'code',
258
- code_path.read_bytes()
259
- )
244
+ code_path = Path(connection_args["code"])
245
+ self.engine_storage.fileStorage.file_set("code", code_path.read_bytes())
260
246
 
261
- requirements_path = Path(connection_args['modules'])
262
- self.engine_storage.fileStorage.file_set(
263
- 'modules',
264
- requirements_path.read_bytes()
265
- )
247
+ requirements_path = Path(connection_args["modules"])
248
+ self.engine_storage.fileStorage.file_set("modules", requirements_path.read_bytes())
266
249
 
267
250
  self.engine_storage.fileStorage.push()
268
251
 
269
- self.engine_storage.update_connection_args({
270
- 'handler_version': __version__,
271
- 'mode': connection_args.get('mode'),
272
- 'versions': {
273
- '1': {
274
- 'code': code_path.name,
275
- 'requirements': requirements_path.name,
276
- 'type': self.normalize_byom_type(
277
- connection_args.get('type')
278
- ).name.lower()
279
- }
252
+ self.engine_storage.update_connection_args(
253
+ {
254
+ "handler_version": __version__,
255
+ "mode": connection_args.get("mode"),
256
+ "versions": {
257
+ "1": {
258
+ "code": code_path.name,
259
+ "requirements": requirements_path.name,
260
+ "type": self.normalize_byom_type(connection_args.get("type")).name.lower(),
261
+ }
262
+ },
280
263
  }
281
- })
264
+ )
282
265
 
283
266
  model_proxy = self._get_model_proxy()
284
267
  try:
285
- info = model_proxy.check(connection_args.get('mode'))
286
- self.engine_storage.json_set('methods', info['methods'])
268
+ info = model_proxy.check(connection_args.get("mode"))
269
+ self.engine_storage.json_set("methods", info["methods"])
287
270
 
288
271
  except Exception as e:
289
- if hasattr(model_proxy, 'remove_venv'):
272
+ if hasattr(model_proxy, "remove_venv"):
290
273
  model_proxy.remove_venv()
291
274
  raise e
292
275
 
293
276
  def update_engine(self, connection_args: dict) -> None:
294
277
  """Add new version of engine
295
278
 
296
- Args:
297
- connection_args (dict): paths to code and requirements
279
+ Args:
280
+ connection_args (dict): paths to code and requirements
298
281
  """
299
- code_path = Path(connection_args['code'])
300
- requirements_path = Path(connection_args['modules'])
282
+ code_path = Path(connection_args["code"])
283
+ requirements_path = Path(connection_args["modules"])
301
284
 
302
285
  engine_connection_args = self.engine_storage.get_connection_args()
303
- if isinstance(engine_connection_args, dict) is False or 'handler_version' not in engine_connection_args:
286
+ if isinstance(engine_connection_args, dict) is False or "handler_version" not in engine_connection_args:
304
287
  engine_connection_args = {
305
- 'handler_version': __version__,
306
- 'versions': {
307
- '1': {
308
- 'code': 'code.py',
309
- 'requirements': 'requirements.txt',
310
- 'type': self._default_byom_type.name.lower()
288
+ "handler_version": __version__,
289
+ "versions": {
290
+ "1": {
291
+ "code": "code.py",
292
+ "requirements": "requirements.txt",
293
+ "type": self._default_byom_type.name.lower(),
311
294
  }
312
- }
295
+ },
313
296
  }
314
- new_version = str(max([int(x) for x in engine_connection_args['versions'].keys()]) + 1)
315
-
316
- engine_connection_args['versions'][new_version] = {
317
- 'code': code_path.name,
318
- 'requirements': requirements_path.name,
319
- 'type': self.normalize_byom_type(
320
- connection_args.get('type')
321
- ).name.lower()
297
+ new_version = str(max([int(x) for x in engine_connection_args["versions"].keys()]) + 1)
298
+
299
+ engine_connection_args["versions"][new_version] = {
300
+ "code": code_path.name,
301
+ "requirements": requirements_path.name,
302
+ "type": self.normalize_byom_type(connection_args.get("type")).name.lower(),
322
303
  }
323
304
 
324
- self.engine_storage.fileStorage.file_set(
325
- f'code_{new_version}',
326
- code_path.read_bytes()
327
- )
305
+ self.engine_storage.fileStorage.file_set(f"code_{new_version}", code_path.read_bytes())
328
306
 
329
- self.engine_storage.fileStorage.file_set(
330
- f'modules_{new_version}',
331
- requirements_path.read_bytes()
332
- )
307
+ self.engine_storage.fileStorage.file_set(f"modules_{new_version}", requirements_path.read_bytes())
333
308
  self.engine_storage.fileStorage.push()
334
309
 
335
310
  self.engine_storage.update_connection_args(engine_connection_args)
@@ -337,28 +312,28 @@ class BYOMHandler(BaseMLEngine):
337
312
  model_proxy = self._get_model_proxy(new_version)
338
313
  try:
339
314
  methods = model_proxy.check()
340
- self.engine_storage.json_set('methods', methods)
315
+ self.engine_storage.json_set("methods", methods)
341
316
 
342
317
  except Exception as e:
343
- if hasattr(model_proxy, 'remove_venv'):
318
+ if hasattr(model_proxy, "remove_venv"):
344
319
  model_proxy.remove_venv()
345
320
  raise e
346
321
 
347
322
  def function_list(self):
348
- return self.engine_storage.json_get('methods')
323
+ return self.engine_storage.json_get("methods")
349
324
 
350
325
  def function_call(self, name, args):
351
326
  mp = self._get_model_proxy()
352
327
  return mp.func_call(name, args)
353
328
 
354
329
  def finetune(self, df: Optional[pd.DataFrame] = None, args: Optional[Dict] = None) -> None:
355
- using_args = args.get('using', {})
356
- engine_version = using_args.get('engine_version')
330
+ using_args = args.get("using", {})
331
+ engine_version = using_args.get("engine_version")
357
332
 
358
333
  model_storage = self.model_storage
359
334
  # TODO: should probably refactor at some point, as a bit of the logic is shared with lightwood's finetune logic
360
335
  try:
361
- base_predictor_id = args['base_model_id']
336
+ base_predictor_id = args["base_model_id"]
362
337
  base_predictor_record = db.Predictor.query.get(base_predictor_id)
363
338
  if base_predictor_record.status != PREDICTOR_STATUS.COMPLETE:
364
339
  raise Exception("Base model must be in status 'complete'")
@@ -366,33 +341,34 @@ class BYOMHandler(BaseMLEngine):
366
341
  predictor_id = model_storage.predictor_id
367
342
  predictor_record = db.Predictor.query.get(predictor_id)
368
343
 
369
- predictor_record.data = {'training_log': 'training'} # TODO move to ModelStorage (don't work w/ db directly)
344
+ predictor_record.data = {
345
+ "training_log": "training"
346
+ } # TODO move to ModelStorage (don't work w/ db directly)
370
347
  predictor_record.training_start_at = datetime.now()
371
348
  predictor_record.status = PREDICTOR_STATUS.FINETUNING # TODO: parallel execution block
372
349
  db.session.commit()
373
350
 
374
351
  model_proxy = self._get_model_proxy(engine_version)
375
- model_state = self.base_model_storage.file_get('model')
376
- model_state = model_proxy.finetune(df, model_state, args=args.get('using', {}))
352
+ model_state = self.base_model_storage.file_get("model")
353
+ model_state = model_proxy.finetune(df, model_state, args=args.get("using", {}))
377
354
 
378
355
  # region hack to speedup file saving
379
- with profiler.Context('finetune-byom-write-file'):
380
- dest_abs_path = model_storage.fileStorage.folder_path / 'model'
381
- with open(dest_abs_path, 'wb') as fd:
356
+ with profiler.Context("finetune-byom-write-file"):
357
+ dest_abs_path = model_storage.fileStorage.folder_path / "model"
358
+ with open(dest_abs_path, "wb") as fd:
382
359
  fd.write(model_state)
383
360
  model_storage.fileStorage.push(compression_level=0)
384
361
  # endregion
385
362
 
386
- predictor_record.update_status = 'up_to_date'
363
+ predictor_record.update_status = "up_to_date"
387
364
  predictor_record.status = PREDICTOR_STATUS.COMPLETE
388
365
  predictor_record.training_stop_at = datetime.now()
389
366
  db.session.commit()
390
367
 
391
368
  except Exception as e:
392
- logger.error(e)
369
+ logger.error("Unexpected error during BYOM finetune:", exc_info=True)
393
370
  predictor_id = model_storage.predictor_id
394
371
  predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
395
- logger.error(traceback.format_exc())
396
372
  error_message = format_exception_error(e)
397
373
  predictor_record.data = {"error": error_message}
398
374
  predictor_record.status = PREDICTOR_STATUS.ERROR
@@ -406,8 +382,7 @@ class BYOMHandler(BaseMLEngine):
406
382
 
407
383
 
408
384
  class ModelWrapperUnsafe:
409
- """ Model wrapper that executes learn/predict in current process
410
- """
385
+ """Model wrapper that executes learn/predict in current process"""
411
386
 
412
387
  def __init__(self, code, modules_str, engine_id, engine_version: int):
413
388
  self.module = import_string(code)
@@ -444,7 +419,7 @@ class ModelWrapperUnsafe:
444
419
  return pickle.dumps(self.model_instance.__dict__, protocol=5)
445
420
 
446
421
  def describe(self, model_state, attribute: Optional[str] = None) -> pd.DataFrame:
447
- if hasattr(self.model_instance, 'describe'):
422
+ if hasattr(self.model_instance, "describe"):
448
423
  model_state = pickle.loads(model_state)
449
424
  self.model_instance.__dict__ = model_state
450
425
  return self.model_instance.describe(attribute)
@@ -460,15 +435,14 @@ class ModelWrapperUnsafe:
460
435
 
461
436
 
462
437
  class ModelWrapperSafe:
463
- """ Model wrapper that executes learn/predict in venv
464
- """
438
+ """Model wrapper that executes learn/predict in venv"""
465
439
 
466
440
  def __init__(self, code, modules_str, engine_id, engine_version: int):
467
441
  self.code = code
468
442
  modules = self.parse_requirements(modules_str)
469
443
 
470
444
  self.config = Config()
471
- self.is_cloud = Config().get('cloud', False)
445
+ self.is_cloud = Config().get("cloud", False)
472
446
 
473
447
  self.env_path = None
474
448
  self.env_storage_path = None
@@ -478,37 +452,37 @@ class ModelWrapperSafe:
478
452
  try:
479
453
  import virtualenv
480
454
 
481
- base_path = self.config.get('byom', {}).get('venv_path')
455
+ base_path = self.config.get("byom", {}).get("venv_path")
482
456
  if base_path is None:
483
457
  # create in root path
484
- base_path = Path(self.config.paths['root']) / 'venvs'
458
+ base_path = Path(self.config.paths["root"]) / "venvs"
485
459
  else:
486
460
  base_path = Path(base_path)
487
461
  base_path.mkdir(parents=True, exist_ok=True)
488
462
 
489
- env_folder_name = f'env_{engine_id}'
463
+ env_folder_name = f"env_{engine_id}"
490
464
  if isinstance(engine_version, int) and engine_version > 1:
491
- env_folder_name = f'{env_folder_name}_{engine_version}'
465
+ env_folder_name = f"{env_folder_name}_{engine_version}"
492
466
 
493
467
  self.env_storage_path = base_path / env_folder_name
494
468
  if self.is_cloud:
495
- bese_env_path = Path(tempfile.gettempdir()) / 'mindsdb' / 'venv'
469
+ bese_env_path = Path(tempfile.gettempdir()) / "mindsdb" / "venv"
496
470
  bese_env_path.mkdir(parents=True, exist_ok=True)
497
471
  self.env_path = bese_env_path / env_folder_name
498
- tar_path = self.env_storage_path.with_suffix('.tar')
472
+ tar_path = self.env_storage_path.with_suffix(".tar")
499
473
  if self.env_path.exists() is False and tar_path.exists() is True:
500
474
  with tarfile.open(tar_path) as tar:
501
475
  safe_extract(tar, path=bese_env_path)
502
476
  else:
503
477
  self.env_path = self.env_storage_path
504
478
 
505
- if sys.platform in ('win32', 'cygwin'):
506
- exectable_folder_name = 'Scripts'
479
+ if sys.platform in ("win32", "cygwin"):
480
+ exectable_folder_name = "Scripts"
507
481
  else:
508
- exectable_folder_name = 'bin'
482
+ exectable_folder_name = "bin"
509
483
 
510
- pip_cmd = self.env_path / exectable_folder_name / 'pip'
511
- self.python_path = self.env_path / exectable_folder_name / 'python'
484
+ pip_cmd = self.env_path / exectable_folder_name / "pip"
485
+ self.python_path = self.env_path / exectable_folder_name / "python"
512
486
 
513
487
  if self.env_path.exists():
514
488
  # already exists. it means requirements are already installed
@@ -516,7 +490,7 @@ class ModelWrapperSafe:
516
490
 
517
491
  # create
518
492
  logger.info(f"Creating new environment: {self.env_path}")
519
- virtualenv.cli_run(['-p', sys.executable, str(self.env_path)])
493
+ virtualenv.cli_run(["-p", sys.executable, str(self.env_path)])
520
494
  logger.info(f"Created new environment: {self.env_path}")
521
495
 
522
496
  if len(modules) > 0:
@@ -537,13 +511,14 @@ class ModelWrapperSafe:
537
511
  if self.is_cloud and self.env_storage_path != self.env_path:
538
512
  old_cwd = os.getcwd()
539
513
  os.chdir(str(bese_env_path))
540
- tar_path = self.env_path.with_suffix('.tar')
541
- with tarfile.open(name=str(tar_path), mode='w') as tar:
514
+ tar_path = self.env_path.with_suffix(".tar")
515
+ with tarfile.open(name=str(tar_path), mode="w") as tar:
542
516
  tar.add(str(self.env_path.name))
543
517
  os.chdir(old_cwd)
544
518
  subprocess.run(
545
- ['cp', '-R', '--no-preserve=mode,ownership', str(tar_path), str(base_path / tar_path.name)],
546
- check=True, shell=False
519
+ ["cp", "-R", "--no-preserve=mode,ownership", str(tar_path), str(base_path / tar_path.name)],
520
+ check=True,
521
+ shell=False,
547
522
  )
548
523
  tar_path.unlink()
549
524
 
@@ -552,46 +527,46 @@ class ModelWrapperSafe:
552
527
  shutil.rmtree(str(self.env_path))
553
528
 
554
529
  if self.is_cloud:
555
- tar_path = self.env_storage_path.with_suffix('.tar')
530
+ tar_path = self.env_storage_path.with_suffix(".tar")
556
531
  tar_path.unlink()
557
532
 
558
533
  def parse_requirements(self, requirements):
559
534
  # get requirements from string
560
535
  # they should be located at the top of the file, before code
561
536
 
562
- pattern = '^[\w\\[\\]-]+[=!<>\s]*[\d\.]*[,=!<>\s]*[\d\.]*$' # noqa
537
+ pattern = "^[\w\\[\\]-]+[=!<>\s]*[\d\.]*[,=!<>\s]*[\d\.]*$" # noqa
563
538
  modules = []
564
- for line in requirements.split(b'\n'):
539
+ for line in requirements.split(b"\n"):
565
540
  line = line.decode().strip()
566
541
  if line:
567
542
  if re.match(pattern, line):
568
543
  modules.append(line)
569
544
  else:
570
- raise Exception(f'Wrong requirement: {line}')
545
+ raise Exception(f"Wrong requirement: {line}")
571
546
 
572
- is_pandas = any([m.lower().startswith('pandas') for m in modules])
547
+ is_pandas = any([m.lower().startswith("pandas") for m in modules])
573
548
  if not is_pandas:
574
- modules.append('pandas>=2.0.0,<2.1.0')
575
- modules.append('numpy<2.0.0')
549
+ modules.append("pandas>=2.0.0,<2.1.0")
550
+ modules.append("numpy<2.0.0")
576
551
 
577
552
  # for dataframe serialization
578
- modules.append('pyarrow==19.0.0')
553
+ modules.append("pyarrow==19.0.0")
579
554
  return modules
580
555
 
581
556
  def install_modules(self, modules, pip_cmd):
582
557
  # install in current environment using pip
583
558
  for module in modules:
584
559
  logger.debug(f"BYOM install module: {module}")
585
- p = subprocess.Popen([pip_cmd, 'install', module], stderr=subprocess.PIPE)
560
+ p = subprocess.Popen([pip_cmd, "install", module], stderr=subprocess.PIPE)
586
561
  p.wait()
587
562
  if p.returncode != 0:
588
- raise Exception(f'Problem with installing module {module}: {p.stderr.read()}')
563
+ raise Exception(f"Problem with installing module {module}: {p.stderr.read()}")
589
564
 
590
565
  def _run_command(self, params):
591
566
  logger.debug(f"BYOM run command: {params.get('method')}")
592
567
  params_enc = encode(params)
593
568
 
594
- wrapper_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'proc_wrapper.py')
569
+ wrapper_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "proc_wrapper.py")
595
570
  p = subprocess.Popen(
596
571
  [str(self.python_path), wrapper_path],
597
572
  stdin=subprocess.PIPE,
@@ -613,44 +588,44 @@ class ModelWrapperSafe:
613
588
 
614
589
  def check(self, mode: str = None):
615
590
  params = {
616
- 'method': BYOM_METHOD.CHECK.value,
617
- 'code': self.code,
618
- 'mode': mode,
591
+ "method": BYOM_METHOD.CHECK.value,
592
+ "code": self.code,
593
+ "mode": mode,
619
594
  }
620
595
  return self._run_command(params)
621
596
 
622
597
  def train(self, df, target, args):
623
598
  params = {
624
- 'method': BYOM_METHOD.TRAIN.value,
625
- 'code': self.code,
626
- 'df': None,
627
- 'to_predict': target,
628
- 'args': args,
599
+ "method": BYOM_METHOD.TRAIN.value,
600
+ "code": self.code,
601
+ "df": None,
602
+ "to_predict": target,
603
+ "args": args,
629
604
  }
630
605
  if df is not None:
631
- params['df'] = pd_encode(df)
606
+ params["df"] = pd_encode(df)
632
607
 
633
608
  model_state = self._run_command(params)
634
609
  return model_state
635
610
 
636
611
  def predict(self, df, model_state, args):
637
612
  params = {
638
- 'method': BYOM_METHOD.PREDICT.value,
639
- 'code': self.code,
640
- 'model_state': model_state,
641
- 'df': pd_encode(df),
642
- 'args': args,
613
+ "method": BYOM_METHOD.PREDICT.value,
614
+ "code": self.code,
615
+ "model_state": model_state,
616
+ "df": pd_encode(df),
617
+ "args": args,
643
618
  }
644
619
  pred_df = self._run_command(params)
645
620
  return pd_decode(pred_df)
646
621
 
647
622
  def finetune(self, df, model_state, args):
648
623
  params = {
649
- 'method': BYOM_METHOD.FINETUNE.value,
650
- 'code': self.code,
651
- 'model_state': model_state,
652
- 'df': pd_encode(df),
653
- 'args': args,
624
+ "method": BYOM_METHOD.FINETUNE.value,
625
+ "code": self.code,
626
+ "model_state": model_state,
627
+ "df": pd_encode(df),
628
+ "args": args,
654
629
  }
655
630
 
656
631
  model_state = self._run_command(params)
@@ -658,10 +633,10 @@ class ModelWrapperSafe:
658
633
 
659
634
  def describe(self, model_state, attribute: Optional[str] = None) -> pd.DataFrame:
660
635
  params = {
661
- 'method': BYOM_METHOD.DESCRIBE.value,
662
- 'code': self.code,
663
- 'model_state': model_state,
664
- 'attribute': attribute
636
+ "method": BYOM_METHOD.DESCRIBE.value,
637
+ "code": self.code,
638
+ "model_state": model_state,
639
+ "attribute": attribute,
665
640
  }
666
641
  enc_df = self._run_command(params)
667
642
  df = pd_decode(enc_df)
@@ -669,10 +644,10 @@ class ModelWrapperSafe:
669
644
 
670
645
  def func_call(self, func_name, args):
671
646
  params = {
672
- 'method': BYOM_METHOD.FUNC_CALL.value,
673
- 'code': self.code,
674
- 'func_name': func_name,
675
- 'args': args,
647
+ "method": BYOM_METHOD.FUNC_CALL.value,
648
+ "code": self.code,
649
+ "func_name": func_name,
650
+ "args": args,
676
651
  }
677
652
  result = self._run_command(params)
678
653
  return result