MindsDB 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (116) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +39 -20
  3. mindsdb/api/a2a/agent.py +7 -9
  4. mindsdb/api/a2a/common/server/server.py +3 -3
  5. mindsdb/api/a2a/common/server/task_manager.py +4 -4
  6. mindsdb/api/a2a/task_manager.py +15 -17
  7. mindsdb/api/common/middleware.py +9 -11
  8. mindsdb/api/executor/command_executor.py +2 -4
  9. mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
  10. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
  11. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
  12. mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
  13. mindsdb/api/executor/exceptions.py +29 -10
  14. mindsdb/api/executor/planner/plan_join.py +17 -3
  15. mindsdb/api/executor/sql_query/sql_query.py +74 -74
  16. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
  17. mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
  18. mindsdb/api/executor/utilities/functions.py +6 -6
  19. mindsdb/api/executor/utilities/sql.py +32 -16
  20. mindsdb/api/http/gui.py +5 -11
  21. mindsdb/api/http/initialize.py +8 -10
  22. mindsdb/api/http/namespaces/agents.py +10 -12
  23. mindsdb/api/http/namespaces/analysis.py +13 -20
  24. mindsdb/api/http/namespaces/auth.py +1 -1
  25. mindsdb/api/http/namespaces/config.py +15 -11
  26. mindsdb/api/http/namespaces/databases.py +140 -201
  27. mindsdb/api/http/namespaces/file.py +15 -4
  28. mindsdb/api/http/namespaces/handlers.py +7 -2
  29. mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
  30. mindsdb/api/http/namespaces/models.py +94 -126
  31. mindsdb/api/http/namespaces/projects.py +13 -22
  32. mindsdb/api/http/namespaces/sql.py +33 -25
  33. mindsdb/api/http/namespaces/tab.py +27 -37
  34. mindsdb/api/http/namespaces/views.py +1 -1
  35. mindsdb/api/http/start.py +14 -8
  36. mindsdb/api/mcp/__init__.py +2 -1
  37. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
  38. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
  39. mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
  40. mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
  41. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
  42. mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
  43. mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
  44. mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
  45. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +13 -1
  46. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
  47. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
  48. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  49. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  50. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
  51. mindsdb/integrations/libs/api_handler.py +10 -10
  52. mindsdb/integrations/libs/base.py +4 -4
  53. mindsdb/integrations/libs/llm/utils.py +2 -2
  54. mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
  55. mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
  56. mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
  57. mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
  58. mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
  59. mindsdb/integrations/libs/process_cache.py +132 -140
  60. mindsdb/integrations/libs/response.py +18 -12
  61. mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
  62. mindsdb/integrations/utilities/files/file_reader.py +6 -7
  63. mindsdb/integrations/utilities/rag/config_loader.py +37 -26
  64. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
  65. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
  66. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
  67. mindsdb/integrations/utilities/rag/settings.py +58 -133
  68. mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
  69. mindsdb/interfaces/agents/agents_controller.py +2 -1
  70. mindsdb/interfaces/agents/constants.py +0 -2
  71. mindsdb/interfaces/agents/litellm_server.py +34 -58
  72. mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
  73. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
  74. mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
  75. mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
  76. mindsdb/interfaces/chatbot/polling.py +30 -18
  77. mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
  78. mindsdb/interfaces/database/integrations.py +19 -2
  79. mindsdb/interfaces/file/file_controller.py +6 -6
  80. mindsdb/interfaces/functions/controller.py +1 -1
  81. mindsdb/interfaces/functions/to_markdown.py +2 -2
  82. mindsdb/interfaces/jobs/jobs_controller.py +5 -5
  83. mindsdb/interfaces/jobs/scheduler.py +3 -8
  84. mindsdb/interfaces/knowledge_base/controller.py +50 -23
  85. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
  86. mindsdb/interfaces/model/model_controller.py +170 -166
  87. mindsdb/interfaces/query_context/context_controller.py +14 -2
  88. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
  89. mindsdb/interfaces/skills/retrieval_tool.py +43 -50
  90. mindsdb/interfaces/skills/skill_tool.py +2 -2
  91. mindsdb/interfaces/skills/sql_agent.py +25 -19
  92. mindsdb/interfaces/storage/fs.py +114 -169
  93. mindsdb/interfaces/storage/json.py +19 -18
  94. mindsdb/interfaces/tabs/tabs_controller.py +49 -72
  95. mindsdb/interfaces/tasks/task_monitor.py +3 -9
  96. mindsdb/interfaces/tasks/task_thread.py +7 -9
  97. mindsdb/interfaces/triggers/trigger_task.py +7 -13
  98. mindsdb/interfaces/triggers/triggers_controller.py +47 -50
  99. mindsdb/migrations/migrate.py +16 -16
  100. mindsdb/utilities/api_status.py +58 -0
  101. mindsdb/utilities/config.py +49 -0
  102. mindsdb/utilities/exception.py +40 -1
  103. mindsdb/utilities/fs.py +0 -1
  104. mindsdb/utilities/hooks/profiling.py +17 -14
  105. mindsdb/utilities/langfuse.py +40 -45
  106. mindsdb/utilities/log.py +272 -0
  107. mindsdb/utilities/ml_task_queue/consumer.py +52 -58
  108. mindsdb/utilities/ml_task_queue/producer.py +26 -30
  109. mindsdb/utilities/render/sqlalchemy_render.py +7 -6
  110. mindsdb/utilities/utils.py +2 -2
  111. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +269 -264
  112. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +115 -115
  113. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
  114. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
  115. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
  116. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,6 @@
1
1
  import time
2
2
  import threading
3
+ import traceback
3
4
  from typing import Optional, Callable
4
5
  from concurrent.futures import ProcessPoolExecutor, Future
5
6
 
@@ -17,7 +18,7 @@ from mindsdb.integrations.libs.ml_handler_process import (
17
18
  create_engine_process,
18
19
  update_engine_process,
19
20
  create_validation_process,
20
- func_call_process
21
+ func_call_process,
21
22
  )
22
23
 
23
24
 
@@ -44,11 +45,13 @@ class MLProcessException(Exception):
44
45
  If exception can not be pickled (pickle.loads(pickle.dumps(e))) then it may lead to termination of the ML process.
45
46
  Also in this case, the error sent to the user will not be relevant. This wrapper should prevent it.
46
47
  """
48
+
47
49
  base_exception_bytes: bytes = None
48
50
 
49
51
  def __init__(self, base_exception: Exception, message: str = None) -> None:
50
52
  super().__init__(message)
51
- self.message = f'{base_exception.__class__.__name__}: {base_exception}'
53
+ traceback_text = "\n".join(traceback.format_exception(base_exception))
54
+ self.message = f"{base_exception.__class__.__name__}: {base_exception}\n{traceback_text}"
52
55
 
53
56
  @property
54
57
  def base_exception(self) -> Exception:
@@ -56,18 +59,19 @@ class MLProcessException(Exception):
56
59
 
57
60
 
58
61
  class WarmProcess:
59
- """ Class-wrapper for a process that persist for a long time. The process
60
- may be initialized with any handler requirements. Current implimentation
61
- is based on ProcessPoolExecutor just because of multiprocessing.pool
62
- produce daemon processes, which can not be used for learning. That
63
- bahaviour may be changed only using inheritance.
62
+ """Class-wrapper for a process that persist for a long time. The process
63
+ may be initialized with any handler requirements. Current implimentation
64
+ is based on ProcessPoolExecutor just because of multiprocessing.pool
65
+ produce daemon processes, which can not be used for learning. That
66
+ bahaviour may be changed only using inheritance.
64
67
  """
68
+
65
69
  def __init__(self, initializer: Optional[Callable] = None, initargs: tuple = ()):
66
- """ create and init new process
70
+ """create and init new process
67
71
 
68
- Args:
69
- initializer (Callable): the same as ProcessPoolExecutor initializer
70
- initargs (tuple): the same as ProcessPoolExecutor initargs
72
+ Args:
73
+ initializer (Callable): the same as ProcessPoolExecutor initializer
74
+ initargs (tuple): the same as ProcessPoolExecutor initargs
71
75
  """
72
76
  self.pool = ProcessPoolExecutor(1, initializer=initializer, initargs=initargs)
73
77
  self.last_usage_at = time.time()
@@ -91,18 +95,17 @@ class WarmProcess:
91
95
  self.pool.shutdown(wait=wait)
92
96
 
93
97
  def _init_done_callback(self, _task):
94
- """ callback for initial task
95
- """
98
+ """callback for initial task"""
96
99
  self._init_done = True
97
100
 
98
101
  def _update_last_usage_at_callback(self, _task):
99
102
  self.last_usage_at = time.time()
100
103
 
101
104
  def ready(self) -> bool:
102
- """ check is process ready to get a task or not
105
+ """check is process ready to get a task or not
103
106
 
104
- Returns:
105
- bool
107
+ Returns:
108
+ bool
106
109
  """
107
110
  if self._init_done is False:
108
111
  self.task.result()
@@ -112,51 +115,49 @@ class WarmProcess:
112
115
  return False
113
116
 
114
117
  def add_marker(self, marker: tuple):
115
- """ remember that that process processed task for that model
118
+ """remember that that process processed task for that model
116
119
 
117
- Args:
118
- marker (tuple): identifier of model
120
+ Args:
121
+ marker (tuple): identifier of model
119
122
  """
120
123
  if marker is not None:
121
124
  self._markers.add(marker)
122
125
 
123
126
  def has_marker(self, marker: tuple) -> bool:
124
- """ check if that process processed task for model
127
+ """check if that process processed task for model
125
128
 
126
- Args:
127
- marker (tuple): identifier of model
129
+ Args:
130
+ marker (tuple): identifier of model
128
131
 
129
- Returns:
130
- bool
132
+ Returns:
133
+ bool
131
134
  """
132
135
  if marker is None:
133
136
  return False
134
137
  return marker in self._markers
135
138
 
136
139
  def is_marked(self) -> bool:
137
- """ check if process has any marker
140
+ """check if process has any marker
138
141
 
139
- Returns:
140
- bool
142
+ Returns:
143
+ bool
141
144
  """
142
145
  return len(self._markers) > 0
143
146
 
144
147
  def apply_async(self, func: Callable, *args: tuple, **kwargs: dict) -> Future:
145
- """ Run new task
148
+ """Run new task
146
149
 
147
- Args:
148
- func (Callable): function to run
149
- args (tuple): args to be passed to function
150
- kwargs (dict): kwargs to be passed to function
150
+ Args:
151
+ func (Callable): function to run
152
+ args (tuple): args to be passed to function
153
+ kwargs (dict): kwargs to be passed to function
151
154
 
152
- Returns:
153
- Future
155
+ Returns:
156
+ Future
154
157
  """
155
158
  if not self.ready():
156
- raise Exception('Process task is not ready')
157
- self.task = self.pool.submit(
158
- func, *args, **kwargs
159
- )
159
+ raise Exception("Process task is not ready")
160
+ self.task = self.pool.submit(func, *args, **kwargs)
160
161
  self.task.add_done_callback(self._update_last_usage_at_callback)
161
162
  self.last_usage_at = time.time()
162
163
  return self.task
@@ -173,11 +174,11 @@ def warm_function(func, context: str, *args, **kwargs):
173
174
 
174
175
 
175
176
  class ProcessCache:
176
- """ simple cache for WarmProcess-es
177
- """
177
+ """simple cache for WarmProcess-es"""
178
+
178
179
  def __init__(self, ttl: int = 120):
179
- """ Args:
180
- ttl (int) time to live for unused process
180
+ """Args:
181
+ ttl (int) time to live for unused process
181
182
  """
182
183
  self.cache = {}
183
184
  self._init = False
@@ -191,42 +192,37 @@ class ProcessCache:
191
192
  self._stop_clean()
192
193
 
193
194
  def _start_clean(self) -> None:
194
- """ start worker that close connections after ttl expired
195
- """
196
- if (
197
- isinstance(self.cleaner_thread, threading.Thread)
198
- and self.cleaner_thread.is_alive()
199
- ):
195
+ """start worker that close connections after ttl expired"""
196
+ if isinstance(self.cleaner_thread, threading.Thread) and self.cleaner_thread.is_alive():
200
197
  return
201
198
  self._stop_event.clear()
202
- self.cleaner_thread = threading.Thread(target=self._clean, name='ProcessCache.clean')
199
+ self.cleaner_thread = threading.Thread(target=self._clean, name="ProcessCache.clean")
203
200
  self.cleaner_thread.daemon = True
204
201
  self.cleaner_thread.start()
205
202
 
206
203
  def _stop_clean(self) -> None:
207
- """ stop clean worker
208
- """
204
+ """stop clean worker"""
209
205
  self._stop_event.set()
210
206
 
211
207
  def init(self):
212
- """ run processes for specified handlers
213
- """
208
+ """run processes for specified handlers"""
214
209
  from mindsdb.interfaces.database.integrations import integration_controller
210
+
215
211
  preload_handlers = {}
216
212
  config = Config()
217
- is_cloud = config.get('cloud', False) # noqa
213
+ is_cloud = config.get("cloud", False) # noqa
218
214
 
219
- if config['ml_task_queue']['type'] != 'redis':
215
+ if config["ml_task_queue"]["type"] != "redis":
220
216
  if is_cloud:
221
- lightwood_handler = integration_controller.get_handler_module('lightwood')
217
+ lightwood_handler = integration_controller.get_handler_module("lightwood")
222
218
  if lightwood_handler is not None and lightwood_handler.Handler is not None:
223
219
  preload_handlers[lightwood_handler.Handler] = 4 if is_cloud else 1
224
220
 
225
- huggingface_handler = integration_controller.get_handler_module('huggingface')
221
+ huggingface_handler = integration_controller.get_handler_module("huggingface")
226
222
  if huggingface_handler is not None and huggingface_handler.Handler is not None:
227
223
  preload_handlers[huggingface_handler.Handler] = 1
228
224
 
229
- openai_handler = integration_controller.get_handler_module('openai')
225
+ openai_handler = integration_controller.get_handler_module("openai")
230
226
  if openai_handler is not None and openai_handler.Handler is not None:
231
227
  preload_handlers[openai_handler.Handler] = 1
232
228
 
@@ -236,146 +232,144 @@ class ProcessCache:
236
232
  for handler in preload_handlers:
237
233
  self._keep_alive[handler.name] = preload_handlers[handler]
238
234
  self.cache[handler.name] = {
239
- 'last_usage_at': time.time(),
240
- 'handler_module': handler.__module__,
241
- 'processes': [
235
+ "last_usage_at": time.time(),
236
+ "handler_module": handler.__module__,
237
+ "processes": [
242
238
  WarmProcess(init_ml_handler, (handler.__module__,))
243
239
  for _x in range(preload_handlers[handler])
244
- ]
240
+ ],
245
241
  }
246
242
 
247
- def apply_async(self, task_type: ML_TASK_TYPE, model_id: Optional[int],
248
- payload: dict, dataframe: Optional[DataFrame] = None) -> Future:
249
- """ run new task. If possible - do it in existing process, if not - start new one.
243
+ def apply_async(
244
+ self, task_type: ML_TASK_TYPE, model_id: Optional[int], payload: dict, dataframe: Optional[DataFrame] = None
245
+ ) -> Future:
246
+ """run new task. If possible - do it in existing process, if not - start new one.
250
247
 
251
- Args:
252
- task_type (ML_TASK_TYPE): type of the task (learn, predict, etc)
253
- model_id (int): id of the model
254
- payload (dict): any 'lightweight' data that needs to be send in the process
255
- dataframe (DataFrame): DataFrame to be send in the process
248
+ Args:
249
+ task_type (ML_TASK_TYPE): type of the task (learn, predict, etc)
250
+ model_id (int): id of the model
251
+ payload (dict): any 'lightweight' data that needs to be send in the process
252
+ dataframe (DataFrame): DataFrame to be send in the process
256
253
 
257
- Returns:
258
- Future
254
+ Returns:
255
+ Future
259
256
  """
260
257
  self._start_clean()
261
- handler_module_path = payload['handler_meta']['module_path']
262
- integration_id = payload['handler_meta']['integration_id']
258
+ handler_module_path = payload["handler_meta"]["module_path"]
259
+ integration_id = payload["handler_meta"]["integration_id"]
263
260
  if task_type in (ML_TASK_TYPE.LEARN, ML_TASK_TYPE.FINETUNE):
264
261
  func = learn_process
265
262
  kwargs = {
266
- 'data_integration_ref': payload['data_integration_ref'],
267
- 'problem_definition': payload['problem_definition'],
268
- 'fetch_data_query': payload['fetch_data_query'],
269
- 'project_name': payload['project_name'],
270
- 'model_id': model_id,
271
- 'base_model_id': payload.get('base_model_id'),
272
- 'set_active': payload['set_active'],
273
- 'integration_id': integration_id,
274
- 'module_path': handler_module_path
263
+ "data_integration_ref": payload["data_integration_ref"],
264
+ "problem_definition": payload["problem_definition"],
265
+ "fetch_data_query": payload["fetch_data_query"],
266
+ "project_name": payload["project_name"],
267
+ "model_id": model_id,
268
+ "base_model_id": payload.get("base_model_id"),
269
+ "set_active": payload["set_active"],
270
+ "integration_id": integration_id,
271
+ "module_path": handler_module_path,
275
272
  }
276
273
  elif task_type == ML_TASK_TYPE.PREDICT:
277
274
  func = predict_process
278
275
  kwargs = {
279
- 'predictor_record': payload['predictor_record'],
280
- 'ml_engine_name': payload['handler_meta']['engine'],
281
- 'args': payload['args'],
282
- 'dataframe': dataframe,
283
- 'integration_id': integration_id,
284
- 'module_path': handler_module_path
276
+ "predictor_record": payload["predictor_record"],
277
+ "ml_engine_name": payload["handler_meta"]["engine"],
278
+ "args": payload["args"],
279
+ "dataframe": dataframe,
280
+ "integration_id": integration_id,
281
+ "module_path": handler_module_path,
285
282
  }
286
283
  elif task_type == ML_TASK_TYPE.DESCRIBE:
287
284
  func = describe_process
288
285
  kwargs = {
289
- 'attribute': payload.get('attribute'),
290
- 'model_id': model_id,
291
- 'integration_id': integration_id,
292
- 'module_path': handler_module_path
286
+ "attribute": payload.get("attribute"),
287
+ "model_id": model_id,
288
+ "integration_id": integration_id,
289
+ "module_path": handler_module_path,
293
290
  }
294
291
  elif task_type == ML_TASK_TYPE.CREATE_VALIDATION:
295
292
  func = create_validation_process
296
293
  kwargs = {
297
- 'target': payload.get('target'),
298
- 'args': payload.get('args'),
299
- 'integration_id': integration_id,
300
- 'module_path': handler_module_path
294
+ "target": payload.get("target"),
295
+ "args": payload.get("args"),
296
+ "integration_id": integration_id,
297
+ "module_path": handler_module_path,
301
298
  }
302
299
  elif task_type == ML_TASK_TYPE.CREATE_ENGINE:
303
300
  func = create_engine_process
304
301
  kwargs = {
305
- 'connection_args': payload['connection_args'],
306
- 'integration_id': integration_id,
307
- 'module_path': handler_module_path
302
+ "connection_args": payload["connection_args"],
303
+ "integration_id": integration_id,
304
+ "module_path": handler_module_path,
308
305
  }
309
306
  elif task_type == ML_TASK_TYPE.UPDATE_ENGINE:
310
307
  func = update_engine_process
311
308
  kwargs = {
312
- 'connection_args': payload['connection_args'],
313
- 'integration_id': integration_id,
314
- 'module_path': handler_module_path
309
+ "connection_args": payload["connection_args"],
310
+ "integration_id": integration_id,
311
+ "module_path": handler_module_path,
315
312
  }
316
313
  elif task_type == ML_TASK_TYPE.UPDATE:
317
314
  func = update_process
318
315
  kwargs = {
319
- 'args': payload['args'],
320
- 'integration_id': integration_id,
321
- 'model_id': model_id,
322
- 'module_path': handler_module_path
316
+ "args": payload["args"],
317
+ "integration_id": integration_id,
318
+ "model_id": model_id,
319
+ "module_path": handler_module_path,
323
320
  }
324
321
  elif task_type == ML_TASK_TYPE.FUNC_CALL:
325
322
  func = func_call_process
326
323
  kwargs = {
327
- 'name': payload['name'],
328
- 'args': payload['args'],
329
- 'integration_id': integration_id,
330
- 'module_path': handler_module_path
324
+ "name": payload["name"],
325
+ "args": payload["args"],
326
+ "integration_id": integration_id,
327
+ "module_path": handler_module_path,
331
328
  }
332
329
  else:
333
- raise Exception(f'Unknown ML task type: {task_type}')
330
+ raise Exception(f"Unknown ML task type: {task_type}")
334
331
 
335
- ml_engine_name = payload['handler_meta']['engine']
336
- model_marker = (model_id, payload['context']['company_id'])
332
+ ml_engine_name = payload["handler_meta"]["engine"]
333
+ model_marker = (model_id, payload["context"]["company_id"])
337
334
  with self._lock:
338
335
  if ml_engine_name not in self.cache:
339
336
  warm_process = WarmProcess(init_ml_handler, (handler_module_path,))
340
337
  self.cache[ml_engine_name] = {
341
- 'last_usage_at': None,
342
- 'handler_module': handler_module_path,
343
- 'processes': [warm_process]
338
+ "last_usage_at": None,
339
+ "handler_module": handler_module_path,
340
+ "processes": [warm_process],
344
341
  }
345
342
  else:
346
343
  warm_process = None
347
344
  if model_marker is not None:
348
345
  try:
349
346
  warm_process = next(
350
- p for p in self.cache[ml_engine_name]['processes']
347
+ p
348
+ for p in self.cache[ml_engine_name]["processes"]
351
349
  if p.ready() and p.has_marker(model_marker)
352
350
  )
353
351
  except StopIteration:
354
352
  pass
355
353
  if warm_process is None:
356
354
  try:
357
- warm_process = next(
358
- p for p in self.cache[ml_engine_name]['processes']
359
- if p.ready()
360
- )
355
+ warm_process = next(p for p in self.cache[ml_engine_name]["processes"] if p.ready())
361
356
  except StopIteration:
362
357
  pass
363
358
  if warm_process is None:
364
359
  warm_process = WarmProcess(init_ml_handler, (handler_module_path,))
365
- self.cache[ml_engine_name]['processes'].append(warm_process)
360
+ self.cache[ml_engine_name]["processes"].append(warm_process)
366
361
 
367
- task = warm_process.apply_async(warm_function, func, payload['context'], **kwargs)
368
- self.cache[ml_engine_name]['last_usage_at'] = time.time()
362
+ task = warm_process.apply_async(warm_function, func, payload["context"], **kwargs)
363
+ self.cache[ml_engine_name]["last_usage_at"] = time.time()
369
364
  warm_process.add_marker(model_marker)
370
365
  return task
371
366
 
372
367
  def _clean(self) -> None:
373
- """ worker that stop unused processes
374
- """
368
+ """worker that stop unused processes"""
375
369
  while self._stop_event.wait(timeout=10) is False:
376
370
  with self._lock:
377
371
  for handler_name in self.cache.keys():
378
- processes = self.cache[handler_name]['processes']
372
+ processes = self.cache[handler_name]["processes"]
379
373
  processes.sort(key=lambda x: x.is_marked())
380
374
 
381
375
  expected_count = 0
@@ -395,9 +389,7 @@ class ProcessCache:
395
389
  break
396
390
 
397
391
  while expected_count > len(processes):
398
- processes.append(
399
- WarmProcess(init_ml_handler, (self.cache[handler_name]['handler_module'],))
400
- )
392
+ processes.append(WarmProcess(init_ml_handler, (self.cache[handler_name]["handler_module"],)))
401
393
 
402
394
  def shutdown(self, wait: bool = True) -> None:
403
395
  """Call 'shutdown' for each process cache
@@ -406,25 +398,25 @@ class ProcessCache:
406
398
  """
407
399
  with self._lock:
408
400
  for handler_name in self.cache:
409
- for process in self.cache[handler_name]['processes']:
401
+ for process in self.cache[handler_name]["processes"]:
410
402
  process.shutdown(wait=wait)
411
- self.cache[handler_name]['processes'] = []
403
+ self.cache[handler_name]["processes"] = []
412
404
 
413
405
  def remove_processes_for_handler(self, handler_name: str) -> None:
414
406
  """
415
- Remove all warm processes for a given handler.
416
- This is useful when the previous processes use an outdated instance of the handler.
417
- A good example is when the dependencies for a handler are installed after attempting to use the handler.
407
+ Remove all warm processes for a given handler.
408
+ This is useful when the previous processes use an outdated instance of the handler.
409
+ A good example is when the dependencies for a handler are installed after attempting to use the handler.
418
410
 
419
- Args:
420
- handler_name (str): name of the handler.
411
+ Args:
412
+ handler_name (str): name of the handler.
421
413
  """
422
414
  with self._lock:
423
415
  if handler_name in self.cache:
424
- for process in self.cache[handler_name]['processes']:
416
+ for process in self.cache[handler_name]["processes"]:
425
417
  process.shutdown()
426
418
 
427
- self.cache[handler_name]['processes'] = []
419
+ self.cache[handler_name]["processes"] = []
428
420
 
429
421
 
430
422
  process_cache = ProcessCache()
@@ -1,3 +1,4 @@
1
+ import sys
1
2
  from typing import Callable
2
3
  from dataclasses import dataclass, fields
3
4
 
@@ -41,9 +42,15 @@ INF_SCHEMA_COLUMNS_NAMES_SET = set(f.name for f in fields(INF_SCHEMA_COLUMNS_NAM
41
42
 
42
43
  class HandlerResponse:
43
44
  def __init__(
44
- self, resp_type: RESPONSE_TYPE, data_frame: pandas.DataFrame = None, query: ASTNode = 0,
45
- error_code: int = 0, error_message: str | None = None, affected_rows: int | None = None,
46
- mysql_types: list[MYSQL_DATA_TYPE] | None = None
45
+ self,
46
+ resp_type: RESPONSE_TYPE,
47
+ data_frame: pandas.DataFrame = None,
48
+ query: ASTNode = 0,
49
+ error_code: int = 0,
50
+ error_message: str | None = None,
51
+ affected_rows: int | None = None,
52
+ mysql_types: list[MYSQL_DATA_TYPE] | None = None,
53
+ is_acceptable_error: bool = False,
47
54
  ) -> None:
48
55
  self.resp_type = resp_type
49
56
  self.query = query
@@ -54,6 +61,11 @@ class HandlerResponse:
54
61
  if isinstance(self.affected_rows, int) is False or self.affected_rows < 0:
55
62
  self.affected_rows = 0
56
63
  self.mysql_types = mysql_types
64
+ self.is_acceptable_error = is_acceptable_error
65
+ self.exception = None
66
+ current_exception = sys.exc_info()
67
+ if current_exception[0] is not None:
68
+ self.exception = current_exception[1]
57
69
 
58
70
  @property
59
71
  def type(self):
@@ -71,9 +83,7 @@ class HandlerResponse:
71
83
  f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}, "
72
84
  f"the error is: {self.error_message}"
73
85
  )
74
- raise ValueError(
75
- f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}"
76
- )
86
+ raise ValueError(f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}")
77
87
 
78
88
  self.data_frame.columns = [name.upper() for name in self.data_frame.columns]
79
89
  self.data_frame[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = self.data_frame[
@@ -83,9 +93,7 @@ class HandlerResponse:
83
93
  # region validate df
84
94
  current_columns_set = set(self.data_frame.columns)
85
95
  if INF_SCHEMA_COLUMNS_NAMES_SET != current_columns_set:
86
- raise ValueError(
87
- f"Columns set for INFORMATION_SCHEMA.COLUMNS is wrong: {list(current_columns_set)}"
88
- )
96
+ raise ValueError(f"Columns set for INFORMATION_SCHEMA.COLUMNS is wrong: {list(current_columns_set)}")
89
97
  # endregion
90
98
 
91
99
  self.data_frame = self.data_frame.astype(
@@ -112,9 +120,7 @@ class HandlerResponse:
112
120
  try:
113
121
  data = None
114
122
  if self.data_frame is not None:
115
- data = self.data_frame.to_json(
116
- orient="split", index=False, date_format="iso"
117
- )
123
+ data = self.data_frame.to_json(orient="split", index=False, date_format="iso")
118
124
  except Exception as e:
119
125
  logger.error("%s.to_json: error - %s", self.__class__.__name__, e)
120
126
  data = None
@@ -593,6 +593,32 @@ class VectorStoreHandler(BaseHandler):
593
593
  """
594
594
  raise NotImplementedError(f"Hybrid search not supported for VectorStoreHandler {self.name}")
595
595
 
596
+ def check_existing_ids(self, table_name: str, ids: List[str]) -> List[str]:
597
+ """
598
+ Check which IDs from the provided list already exist in the table.
599
+
600
+ Args:
601
+ table_name (str): Name of the table to check
602
+ ids (List[str]): List of IDs to check for existence
603
+
604
+ Returns:
605
+ List[str]: List of IDs that already exist in the table
606
+ """
607
+ if not ids:
608
+ return []
609
+
610
+ try:
611
+ # Query existing IDs
612
+ df_existing = self.select(
613
+ table_name,
614
+ columns=[TableField.ID.value],
615
+ conditions=[FilterCondition(column=TableField.ID.value, op=FilterOperator.IN, value=ids)],
616
+ )
617
+ return list(df_existing[TableField.ID.value]) if not df_existing.empty else []
618
+ except Exception:
619
+ # If select fails for any reason, return empty list to be safe
620
+ return []
621
+
596
622
  def create_index(self, *args, **kwargs):
597
623
  """
598
624
  Create an index on the specified table.
@@ -1,17 +1,16 @@
1
- from dataclasses import dataclass, astuple
2
- import traceback
3
- import json
4
1
  import csv
5
- from io import BytesIO, StringIO, IOBase
6
- from pathlib import Path
2
+ import json
7
3
  import codecs
4
+ from io import BytesIO, StringIO, IOBase
8
5
  from typing import List, Generator
6
+ from pathlib import Path
7
+ from dataclasses import dataclass, astuple
9
8
 
10
9
  import filetype
11
10
  import pandas as pd
12
11
  from charset_normalizer import from_bytes
13
- from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter
14
12
 
13
+ from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter
15
14
  from mindsdb.utilities import log
16
15
 
17
16
  logger = log.getLogger(__name__)
@@ -76,7 +75,7 @@ def decode(file_obj: IOBase) -> StringIO:
76
75
 
77
76
  data_str = StringIO(byte_str.decode(encoding, errors))
78
77
  except Exception as e:
79
- logger.error(traceback.format_exc())
78
+ logger.exception("Error during file decode:")
80
79
  raise FileProcessingError("Could not load into string") from e
81
80
 
82
81
  return data_str