MindsDB 25.9.2.0a1__py3-none-any.whl → 25.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MindsDB might be problematic. Click here for more details.

Files changed (164) hide show
  1. mindsdb/__about__.py +1 -1
  2. mindsdb/__main__.py +40 -29
  3. mindsdb/api/a2a/__init__.py +1 -1
  4. mindsdb/api/a2a/agent.py +16 -10
  5. mindsdb/api/a2a/common/server/server.py +7 -3
  6. mindsdb/api/a2a/common/server/task_manager.py +12 -5
  7. mindsdb/api/a2a/common/types.py +66 -0
  8. mindsdb/api/a2a/task_manager.py +65 -17
  9. mindsdb/api/common/middleware.py +10 -12
  10. mindsdb/api/executor/command_executor.py +51 -40
  11. mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
  12. mindsdb/api/executor/datahub/datanodes/information_schema_datanode.py +7 -13
  13. mindsdb/api/executor/datahub/datanodes/integration_datanode.py +101 -49
  14. mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
  15. mindsdb/api/executor/datahub/datanodes/system_tables.py +3 -2
  16. mindsdb/api/executor/exceptions.py +29 -10
  17. mindsdb/api/executor/planner/plan_join.py +17 -3
  18. mindsdb/api/executor/planner/query_prepare.py +2 -20
  19. mindsdb/api/executor/sql_query/sql_query.py +74 -74
  20. mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
  21. mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
  22. mindsdb/api/executor/utilities/functions.py +6 -6
  23. mindsdb/api/executor/utilities/sql.py +37 -20
  24. mindsdb/api/http/gui.py +5 -11
  25. mindsdb/api/http/initialize.py +75 -61
  26. mindsdb/api/http/namespaces/agents.py +10 -15
  27. mindsdb/api/http/namespaces/analysis.py +13 -20
  28. mindsdb/api/http/namespaces/auth.py +1 -1
  29. mindsdb/api/http/namespaces/chatbots.py +0 -5
  30. mindsdb/api/http/namespaces/config.py +15 -11
  31. mindsdb/api/http/namespaces/databases.py +140 -201
  32. mindsdb/api/http/namespaces/file.py +17 -4
  33. mindsdb/api/http/namespaces/handlers.py +17 -7
  34. mindsdb/api/http/namespaces/knowledge_bases.py +28 -7
  35. mindsdb/api/http/namespaces/models.py +94 -126
  36. mindsdb/api/http/namespaces/projects.py +13 -22
  37. mindsdb/api/http/namespaces/sql.py +33 -25
  38. mindsdb/api/http/namespaces/tab.py +27 -37
  39. mindsdb/api/http/namespaces/views.py +1 -1
  40. mindsdb/api/http/start.py +16 -10
  41. mindsdb/api/mcp/__init__.py +2 -1
  42. mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
  43. mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
  44. mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
  45. mindsdb/api/mysql/mysql_proxy/utilities/dump.py +8 -2
  46. mindsdb/integrations/handlers/byom_handler/byom_handler.py +165 -190
  47. mindsdb/integrations/handlers/databricks_handler/databricks_handler.py +98 -46
  48. mindsdb/integrations/handlers/druid_handler/druid_handler.py +32 -40
  49. mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
  50. mindsdb/integrations/handlers/gitlab_handler/gitlab_handler.py +5 -2
  51. mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
  52. mindsdb/integrations/handlers/mssql_handler/mssql_handler.py +438 -100
  53. mindsdb/integrations/handlers/mssql_handler/requirements_odbc.txt +3 -0
  54. mindsdb/integrations/handlers/mysql_handler/mysql_handler.py +235 -3
  55. mindsdb/integrations/handlers/oracle_handler/__init__.py +2 -0
  56. mindsdb/integrations/handlers/oracle_handler/connection_args.py +7 -1
  57. mindsdb/integrations/handlers/oracle_handler/oracle_handler.py +321 -16
  58. mindsdb/integrations/handlers/oracle_handler/requirements.txt +1 -1
  59. mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +14 -2
  60. mindsdb/integrations/handlers/shopify_handler/requirements.txt +1 -0
  61. mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +80 -13
  62. mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
  63. mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
  64. mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
  65. mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
  66. mindsdb/integrations/handlers/zendesk_handler/zendesk_tables.py +144 -111
  67. mindsdb/integrations/libs/api_handler.py +10 -10
  68. mindsdb/integrations/libs/base.py +4 -4
  69. mindsdb/integrations/libs/llm/utils.py +2 -2
  70. mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
  71. mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
  72. mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
  73. mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
  74. mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
  75. mindsdb/integrations/libs/process_cache.py +132 -140
  76. mindsdb/integrations/libs/response.py +18 -12
  77. mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
  78. mindsdb/integrations/utilities/files/file_reader.py +6 -7
  79. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/__init__.py +1 -0
  80. mindsdb/integrations/utilities/handlers/auth_utilities/snowflake/snowflake_jwt_gen.py +151 -0
  81. mindsdb/integrations/utilities/rag/config_loader.py +37 -26
  82. mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +83 -30
  83. mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
  84. mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
  85. mindsdb/integrations/utilities/rag/settings.py +58 -133
  86. mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
  87. mindsdb/interfaces/agents/agents_controller.py +2 -3
  88. mindsdb/interfaces/agents/constants.py +0 -2
  89. mindsdb/interfaces/agents/litellm_server.py +34 -58
  90. mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
  91. mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
  92. mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
  93. mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
  94. mindsdb/interfaces/chatbot/polling.py +30 -18
  95. mindsdb/interfaces/data_catalog/data_catalog_loader.py +16 -17
  96. mindsdb/interfaces/data_catalog/data_catalog_reader.py +15 -4
  97. mindsdb/interfaces/database/data_handlers_cache.py +190 -0
  98. mindsdb/interfaces/database/database.py +3 -3
  99. mindsdb/interfaces/database/integrations.py +7 -110
  100. mindsdb/interfaces/database/projects.py +2 -6
  101. mindsdb/interfaces/database/views.py +1 -4
  102. mindsdb/interfaces/file/file_controller.py +6 -6
  103. mindsdb/interfaces/functions/controller.py +1 -1
  104. mindsdb/interfaces/functions/to_markdown.py +2 -2
  105. mindsdb/interfaces/jobs/jobs_controller.py +5 -9
  106. mindsdb/interfaces/jobs/scheduler.py +3 -9
  107. mindsdb/interfaces/knowledge_base/controller.py +244 -128
  108. mindsdb/interfaces/knowledge_base/evaluate.py +36 -41
  109. mindsdb/interfaces/knowledge_base/executor.py +11 -0
  110. mindsdb/interfaces/knowledge_base/llm_client.py +51 -17
  111. mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
  112. mindsdb/interfaces/model/model_controller.py +172 -168
  113. mindsdb/interfaces/query_context/context_controller.py +14 -2
  114. mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +10 -14
  115. mindsdb/interfaces/skills/retrieval_tool.py +43 -50
  116. mindsdb/interfaces/skills/skill_tool.py +2 -2
  117. mindsdb/interfaces/skills/skills_controller.py +1 -4
  118. mindsdb/interfaces/skills/sql_agent.py +25 -19
  119. mindsdb/interfaces/storage/db.py +16 -6
  120. mindsdb/interfaces/storage/fs.py +114 -169
  121. mindsdb/interfaces/storage/json.py +19 -18
  122. mindsdb/interfaces/tabs/tabs_controller.py +49 -72
  123. mindsdb/interfaces/tasks/task_monitor.py +3 -9
  124. mindsdb/interfaces/tasks/task_thread.py +7 -9
  125. mindsdb/interfaces/triggers/trigger_task.py +7 -13
  126. mindsdb/interfaces/triggers/triggers_controller.py +47 -52
  127. mindsdb/migrations/migrate.py +16 -16
  128. mindsdb/utilities/api_status.py +58 -0
  129. mindsdb/utilities/config.py +68 -2
  130. mindsdb/utilities/exception.py +40 -1
  131. mindsdb/utilities/fs.py +0 -1
  132. mindsdb/utilities/hooks/profiling.py +17 -14
  133. mindsdb/utilities/json_encoder.py +24 -10
  134. mindsdb/utilities/langfuse.py +40 -45
  135. mindsdb/utilities/log.py +272 -0
  136. mindsdb/utilities/ml_task_queue/consumer.py +52 -58
  137. mindsdb/utilities/ml_task_queue/producer.py +26 -30
  138. mindsdb/utilities/render/sqlalchemy_render.py +22 -20
  139. mindsdb/utilities/starters.py +0 -10
  140. mindsdb/utilities/utils.py +2 -2
  141. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/METADATA +286 -267
  142. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/RECORD +145 -159
  143. mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
  144. mindsdb/api/postgres/__init__.py +0 -0
  145. mindsdb/api/postgres/postgres_proxy/__init__.py +0 -0
  146. mindsdb/api/postgres/postgres_proxy/executor/__init__.py +0 -1
  147. mindsdb/api/postgres/postgres_proxy/executor/executor.py +0 -189
  148. mindsdb/api/postgres/postgres_proxy/postgres_packets/__init__.py +0 -0
  149. mindsdb/api/postgres/postgres_proxy/postgres_packets/errors.py +0 -322
  150. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_fields.py +0 -34
  151. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message.py +0 -31
  152. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_formats.py +0 -1265
  153. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_message_identifiers.py +0 -31
  154. mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +0 -253
  155. mindsdb/api/postgres/postgres_proxy/postgres_proxy.py +0 -477
  156. mindsdb/api/postgres/postgres_proxy/utilities/__init__.py +0 -10
  157. mindsdb/api/postgres/start.py +0 -11
  158. mindsdb/integrations/handlers/mssql_handler/tests/__init__.py +0 -0
  159. mindsdb/integrations/handlers/mssql_handler/tests/test_mssql_handler.py +0 -169
  160. mindsdb/integrations/handlers/oracle_handler/tests/__init__.py +0 -0
  161. mindsdb/integrations/handlers/oracle_handler/tests/test_oracle_handler.py +0 -32
  162. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/WHEEL +0 -0
  163. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/licenses/LICENSE +0 -0
  164. {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.10.0.dist-info}/top_level.txt +0 -0
mindsdb/utilities/log.py CHANGED
@@ -144,3 +144,275 @@ def getLogger(name=None):
144
144
  """
145
145
  initialize_logging()
146
146
  return logging.getLogger(name)
147
+
148
+
149
+ def log_ram_info(logger: logging.Logger) -> None:
150
+ """Log RAM/memory information to the provided logger.
151
+
152
+ This function logs memory usage information: total, available, used memory in GB and memory
153
+ usage percentage. The logging only occurs if the logger is enabled for DEBUG level.
154
+
155
+ Args:
156
+ logger (logging.Logger): The logger instance to use for outputting memory information.
157
+ """
158
+ if logger.isEnabledFor(logging.DEBUG) is False:
159
+ return
160
+
161
+ try:
162
+ import psutil
163
+
164
+ memory = psutil.virtual_memory()
165
+ total_memory_gb = memory.total / (1024**3)
166
+ available_memory_gb = memory.available / (1024**3)
167
+ used_memory_gb = memory.used / (1024**3)
168
+ memory_percent = memory.percent
169
+ logger.debug(
170
+ f"Memory: {total_memory_gb:.1f}GB total, {available_memory_gb:.1f}GB available, {used_memory_gb:.1f}GB used ({memory_percent:.1f}%)"
171
+ )
172
+ except Exception as e:
173
+ logger.debug(f"Failed to get memory information: {e}")
174
+
175
+
176
+ def log_system_info(logger: logging.Logger) -> None:
177
+ """Log detailed system information for debugging purposes.
178
+
179
+ The function only logs system information (if the logger is configured for DEBUG level):
180
+ - Operating system details (OS type, version, distribution, architecture)
181
+ - CPU information (processor type, physical and logical core counts)
182
+ - Memory information (total, available, used memory in GB and percentage)
183
+ - GPU information (NVIDIA, AMD, Intel graphics cards with memory details)
184
+
185
+ Args:
186
+ logger (logging.Logger): The logger instance to use for outputting system information.
187
+ Must be configured for DEBUG level to see the output.
188
+
189
+ Returns:
190
+ None
191
+
192
+ Note:
193
+ - For Linux systems, attempts to detect distribution via /etc/os-release, /etc/issue, or lsb_release
194
+ - For Windows systems, uses wmic commands to get detailed OS and GPU information
195
+ - For macOS systems, uses sw_vers and system_profiler commands
196
+ - GPU detection supports NVIDIA (via nvidia-smi), AMD (via rocm-smi), and fallback methods
197
+ - All subprocess calls have timeout protection to prevent hanging
198
+ - If any system information gathering fails, it logs the error and continues
199
+ """
200
+ if logger.isEnabledFor(logging.DEBUG) is False:
201
+ return
202
+
203
+ try:
204
+ import os
205
+ import shutil
206
+ import psutil
207
+ import platform
208
+ import subprocess
209
+
210
+ # region OS information
211
+ os_system = platform.system()
212
+ os_release = platform.release()
213
+ os_machine = platform.machine()
214
+
215
+ os_details = []
216
+
217
+ if os_system == "Linux":
218
+ # Try to detect Linux distribution
219
+ distro_info = "Unknown Linux"
220
+ try:
221
+ # Check for /etc/os-release (most modern distributions)
222
+ if os.path.exists("/etc/os-release"):
223
+ with open("/etc/os-release", "r") as f:
224
+ os_release_data = {}
225
+ for line in f:
226
+ if "=" in line:
227
+ key, value = line.strip().split("=", 1)
228
+ os_release_data[key] = value.strip('"')
229
+
230
+ if "PRETTY_NAME" in os_release_data:
231
+ distro_info = os_release_data["PRETTY_NAME"]
232
+ elif "NAME" in os_release_data and "VERSION" in os_release_data:
233
+ distro_info = f"{os_release_data['NAME']} {os_release_data['VERSION']}"
234
+ elif "ID" in os_release_data:
235
+ distro_info = os_release_data["ID"].title()
236
+ # Fallback to /etc/issue
237
+ elif os.path.exists("/etc/issue"):
238
+ with open("/etc/issue", "r") as f:
239
+ issue_content = f.read().strip()
240
+ if issue_content:
241
+ distro_info = issue_content.split("\n")[0]
242
+ # Fallback to lsb_release
243
+ else:
244
+ try:
245
+ result = subprocess.run(["lsb_release", "-d"], capture_output=True, text=True, timeout=2)
246
+ if result.returncode == 0:
247
+ distro_info = result.stdout.split(":")[-1].strip()
248
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
249
+ pass
250
+ except Exception:
251
+ pass
252
+
253
+ os_details.append(f"{distro_info} (kernel {os_release})")
254
+
255
+ elif os_system == "Windows":
256
+ os_name = "Windows"
257
+ os_version = "unknown"
258
+ try:
259
+ result = subprocess.run(
260
+ ["wmic", "os", "get", "Caption,Version", "/format:list"], capture_output=True, text=True, timeout=3
261
+ )
262
+ if result.returncode == 0:
263
+ windows_info = {}
264
+ for line in result.stdout.strip().split("\n"):
265
+ if "=" in line:
266
+ key, value = line.strip().split("=", 1)
267
+ windows_info[key] = value.strip()
268
+
269
+ if "Caption" in windows_info and "Version" in windows_info:
270
+ os_name = windows_info["Caption"]
271
+ os_version = windows_info["Version"]
272
+ except Exception:
273
+ pass
274
+ os_details.append(f"{os_name} {os_release} (version {os_version})")
275
+
276
+ elif os_system == "Darwin": # macOS
277
+ os_name = "macOS"
278
+ os_version = "unknown"
279
+ try:
280
+ result = subprocess.run(
281
+ ["sw_vers", "-productName", "-productVersion"], capture_output=True, text=True, timeout=3
282
+ )
283
+ if result.returncode == 0:
284
+ lines = result.stdout.strip().split("\n")
285
+ if len(lines) >= 2:
286
+ os_name = lines[0].strip()
287
+ os_version = lines[1].strip()
288
+ except Exception:
289
+ pass
290
+ os_details.append(f"{os_name} {os_release} (version {os_version})")
291
+ else:
292
+ os_details.append(f"{os_system} {os_release}")
293
+
294
+ os_details.append(f"({os_machine})")
295
+ os_info = " ".join(os_details)
296
+ logger.debug(f"Operating System: {os_info}")
297
+ # endregion
298
+
299
+ # region CPU information
300
+ cpu_info = platform.processor()
301
+ if not cpu_info or cpu_info == "":
302
+ cpu_info = platform.machine()
303
+ cpu_count = psutil.cpu_count(logical=False)
304
+ cpu_count_logical = psutil.cpu_count(logical=True)
305
+ logger.debug(f"CPU: {cpu_info} ({cpu_count} physical cores, {cpu_count_logical} logical cores)")
306
+ # endregion
307
+
308
+ # memory information
309
+ log_ram_info(logger)
310
+
311
+ # region GPU information
312
+ gpu_info = []
313
+ try:
314
+ # Check for NVIDIA GPU (works on Linux, Windows, macOS)
315
+ nvidia_smi_path = shutil.which("nvidia-smi")
316
+ if nvidia_smi_path:
317
+ try:
318
+ result = subprocess.run(
319
+ [nvidia_smi_path, "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
320
+ capture_output=True,
321
+ text=True,
322
+ timeout=3,
323
+ )
324
+ if result.returncode == 0:
325
+ for line in result.stdout.strip().split("\n"):
326
+ if line.strip():
327
+ parts = line.split(", ")
328
+ if len(parts) >= 2:
329
+ gpu_name = parts[0].strip()
330
+ gpu_memory = parts[1].strip()
331
+ gpu_info.append(f"{gpu_name} ({gpu_memory}MB)")
332
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
333
+ pass
334
+
335
+ # Check for AMD GPU (rocm-smi on Linux, wmic on Windows)
336
+ if not gpu_info: # Only check AMD if no NVIDIA GPU found
337
+ if platform.system() == "Windows":
338
+ # Use wmic on Windows to detect AMD GPU
339
+ try:
340
+ result = subprocess.run(
341
+ ["wmic", "path", "win32_VideoController", "get", "name"],
342
+ capture_output=True,
343
+ text=True,
344
+ timeout=3,
345
+ )
346
+ if result.returncode == 0:
347
+ for line in result.stdout.strip().split("\n"):
348
+ line = line.strip()
349
+ if line and line != "Name" and "AMD" in line.upper():
350
+ gpu_info.append(line)
351
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
352
+ pass
353
+ else:
354
+ # Use rocm-smi on Linux/macOS
355
+ rocm_smi_path = shutil.which("rocm-smi")
356
+ if rocm_smi_path:
357
+ try:
358
+ result = subprocess.run(
359
+ [rocm_smi_path, "--showproductname"], capture_output=True, text=True, timeout=3
360
+ )
361
+ if result.returncode == 0:
362
+ for line in result.stdout.strip().split("\n"):
363
+ if "Product Name" in line:
364
+ gpu_name = line.split(":")[-1].strip()
365
+ gpu_info.append(gpu_name)
366
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
367
+ pass
368
+
369
+ # Fallback: Try to detect any GPU using platform-specific methods
370
+ if not gpu_info:
371
+ if platform.system() == "Windows":
372
+ try:
373
+ # Use wmic to get all video controllers
374
+ result = subprocess.run(
375
+ ["wmic", "path", "win32_VideoController", "get", "name"],
376
+ capture_output=True,
377
+ text=True,
378
+ timeout=3,
379
+ )
380
+ if result.returncode == 0:
381
+ for line in result.stdout.strip().split("\n"):
382
+ line = line.strip()
383
+ if (
384
+ line
385
+ and line != "Name"
386
+ and any(
387
+ keyword in line.upper()
388
+ for keyword in ["NVIDIA", "AMD", "INTEL", "RADEON", "GEFORCE"]
389
+ )
390
+ ):
391
+ gpu_info.append(line)
392
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
393
+ pass
394
+ elif platform.system() == "Darwin": # macOS
395
+ try:
396
+ # Use system_profiler on macOS
397
+ result = subprocess.run(
398
+ ["system_profiler", "SPDisplaysDataType"], capture_output=True, text=True, timeout=3
399
+ )
400
+ if result.returncode == 0:
401
+ for line in result.stdout.strip().split("\n"):
402
+ if "Chipset Model:" in line:
403
+ gpu_name = line.split(":")[-1].strip()
404
+ gpu_info.append(gpu_name)
405
+ except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
406
+ pass
407
+
408
+ except Exception:
409
+ pass
410
+
411
+ if gpu_info:
412
+ logger.debug(f"GPU: {', '.join(gpu_info)}")
413
+ else:
414
+ logger.debug("GPU: Not detected or not supported")
415
+ # endregion
416
+
417
+ except Exception as e:
418
+ logger.debug(f"Failed to get system information: {e}")
@@ -24,7 +24,7 @@ from mindsdb.utilities.ml_task_queue.const import (
24
24
  ML_TASK_STATUS,
25
25
  TASKS_STREAM_NAME,
26
26
  TASKS_STREAM_CONSUMER_NAME,
27
- TASKS_STREAM_CONSUMER_GROUP_NAME
27
+ TASKS_STREAM_CONSUMER_GROUP_NAME,
28
28
  )
29
29
  from mindsdb.utilities import log
30
30
  from mindsdb.utilities.sentry import sentry_sdk # noqa: F401
@@ -33,9 +33,10 @@ logger = log.getLogger(__name__)
33
33
 
34
34
 
35
35
  def _save_thread_link(func: Callable) -> Callable:
36
- """ Decorator for MLTaskConsumer.
37
- Save thread in which func is executed to a list.
36
+ """Decorator for MLTaskConsumer.
37
+ Save thread in which func is executed to a list.
38
38
  """
39
+
39
40
  @wraps(func)
40
41
  def wrapper(self, *args, **kwargs) -> None:
41
42
  current_thread = threading.current_thread()
@@ -45,22 +46,23 @@ def _save_thread_link(func: Callable) -> Callable:
45
46
  finally:
46
47
  self._listen_message_threads.remove(current_thread)
47
48
  return result
49
+
48
50
  return wrapper
49
51
 
50
52
 
51
53
  class MLTaskConsumer(BaseRedisQueue):
52
- """ Listener of ML tasks queue and tasks executioner.
53
- Each new message waited and executed in separate thread.
54
-
55
- Attributes:
56
- _ready_event (Event): set if ready to start new queue listen thread
57
- _stop_event (Event): set if need to stop all threads/processes
58
- cpu_stat (list[float]): CPU usage statistic. Each value is 0-100 float representing CPU usage in %
59
- _collect_cpu_stat_thread (Thread): pointer to thread that collecting CPU usage statistic
60
- _listen_message_threads (list[Thread]): list of pointers to threads where queue messages are listening/processing
61
- db (Redis): database object
62
- cache: redis cache abstrtaction
63
- consumer_group: redis consumer group object
54
+ """Listener of ML tasks queue and tasks executioner.
55
+ Each new message waited and executed in separate thread.
56
+
57
+ Attributes:
58
+ _ready_event (Event): set if ready to start new queue listen thread
59
+ _stop_event (Event): set if need to stop all threads/processes
60
+ cpu_stat (list[float]): CPU usage statistic. Each value is 0-100 float representing CPU usage in %
61
+ _collect_cpu_stat_thread (Thread): pointer to thread that collecting CPU usage statistic
62
+ _listen_message_threads (list[Thread]): list of pointers to threads where queue messages are listening/processing
63
+ db (Redis): database object
64
+ cache: redis cache abstrtaction
65
+ consumer_group: redis consumer group object
64
66
  """
65
67
 
66
68
  def __init__(self) -> None:
@@ -75,7 +77,7 @@ class MLTaskConsumer(BaseRedisQueue):
75
77
  # region collect cpu usage statistic
76
78
  self.cpu_stat = [0] * 10
77
79
  self._collect_cpu_stat_thread = threading.Thread(
78
- target=self._collect_cpu_stat, name='MLTaskConsumer._collect_cpu_stat'
80
+ target=self._collect_cpu_stat, name="MLTaskConsumer._collect_cpu_stat"
79
81
  )
80
82
  self._collect_cpu_stat_thread.start()
81
83
  # endregion
@@ -83,14 +85,14 @@ class MLTaskConsumer(BaseRedisQueue):
83
85
  self._listen_message_threads = []
84
86
 
85
87
  # region connect to redis
86
- config = Config().get('ml_task_queue', {})
88
+ config = Config().get("ml_task_queue", {})
87
89
  self.db = Database(
88
- host=config.get('host', 'localhost'),
89
- port=config.get('port', 6379),
90
- db=config.get('db', 0),
91
- username=config.get('username'),
92
- password=config.get('password'),
93
- protocol=3
90
+ host=config.get("host", "localhost"),
91
+ port=config.get("port", 6379),
92
+ db=config.get("db", 0),
93
+ username=config.get("username"),
94
+ password=config.get("password"),
95
+ protocol=3,
94
96
  )
95
97
  self.wait_redis_ping(60)
96
98
 
@@ -102,30 +104,29 @@ class MLTaskConsumer(BaseRedisQueue):
102
104
  # endregion
103
105
 
104
106
  def _collect_cpu_stat(self) -> None:
105
- """ Collect CPU usage statistic. Executerd in thread.
106
- """
107
+ """Collect CPU usage statistic. Executerd in thread."""
107
108
  while self._stop_event.is_set() is False:
108
109
  self.cpu_stat = self.cpu_stat[1:]
109
110
  self.cpu_stat.append(psutil.cpu_percent())
110
111
  time.sleep(1)
111
112
 
112
113
  def get_avg_cpu_usage(self) -> float:
113
- """ get average CPU usage for last period (10s by default)
114
+ """get average CPU usage for last period (10s by default)
114
115
 
115
- Returns:
116
- float: 0-100 value, average CPU usage
116
+ Returns:
117
+ float: 0-100 value, average CPU usage
117
118
  """
118
119
  return sum(self.cpu_stat) / len(self.cpu_stat)
119
120
 
120
121
  def wait_free_resources(self) -> None:
121
- """ Sleep in thread untill there are free resources. Checks:
122
- - avg CPU usage is less than 60%
123
- - current CPU usage is less than 60%
124
- - current tasks count is less than (N CPU cores) / 8
122
+ """Sleep in thread untill there are free resources. Checks:
123
+ - avg CPU usage is less than 60%
124
+ - current CPU usage is less than 60%
125
+ - current tasks count is less than (N CPU cores) / 8
125
126
  """
126
127
  config = Config()
127
- is_cloud = config.get('cloud', False)
128
- processes_dir = Path(tempfile.gettempdir()).joinpath('mindsdb/processes/learn/')
128
+ is_cloud = config.get("cloud", False)
129
+ processes_dir = Path(tempfile.gettempdir()).joinpath("mindsdb/processes/learn/")
129
130
  while True:
130
131
  while self.get_avg_cpu_usage() > 60 or max(self.cpu_stat[-3:]) > 60:
131
132
  time.sleep(1)
@@ -139,8 +140,7 @@ class MLTaskConsumer(BaseRedisQueue):
139
140
 
140
141
  @_save_thread_link
141
142
  def _listen(self) -> None:
142
- """ Listen message queue untill get new message. Execute task.
143
- """
143
+ """Listen message queue untill get new message. Execute task."""
144
144
  message = None
145
145
  while message is None:
146
146
  self.wait_free_resources()
@@ -150,8 +150,8 @@ class MLTaskConsumer(BaseRedisQueue):
150
150
 
151
151
  try:
152
152
  message = self.consumer_group.read(count=1, block=1000, consumer=TASKS_STREAM_CONSUMER_NAME)
153
- except RedisConnectionError as e:
154
- logger.error(f"Can't connect to Redis: {e}")
153
+ except RedisConnectionError:
154
+ logger.exception("Can't connect to Redis:")
155
155
  self._stop_event.set()
156
156
  return
157
157
  except Exception:
@@ -168,13 +168,13 @@ class MLTaskConsumer(BaseRedisQueue):
168
168
  self.consumer_group.streams[TASKS_STREAM_NAME].ack(message_id)
169
169
  self.consumer_group.streams[TASKS_STREAM_NAME].delete(message_id)
170
170
 
171
- payload = from_bytes(message_content[b'payload'])
172
- task_type = ML_TASK_TYPE(message_content[b'task_type'])
173
- model_id = int(message_content[b'model_id'])
174
- company_id = message_content[b'company_id']
171
+ payload = from_bytes(message_content[b"payload"])
172
+ task_type = ML_TASK_TYPE(message_content[b"task_type"])
173
+ model_id = int(message_content[b"model_id"])
174
+ company_id = message_content[b"company_id"]
175
175
  if len(company_id) == 0:
176
176
  company_id = None
177
- redis_key = RedisKey(message_content.get(b'redis_key'))
177
+ redis_key = RedisKey(message_content.get(b"redis_key"))
178
178
 
179
179
  # region read dataframe
180
180
  dataframe_bytes = self.cache.get(redis_key.dataframe)
@@ -184,16 +184,13 @@ class MLTaskConsumer(BaseRedisQueue):
184
184
  self.cache.delete(redis_key.dataframe)
185
185
  # endregion
186
186
 
187
- ctx.load(payload['context'])
187
+ ctx.load(payload["context"])
188
188
  finally:
189
189
  self._ready_event.set()
190
190
 
191
191
  try:
192
192
  task = process_cache.apply_async(
193
- task_type=task_type,
194
- model_id=model_id,
195
- payload=payload,
196
- dataframe=dataframe
193
+ task_type=task_type, model_id=model_id, payload=payload, dataframe=dataframe
197
194
  )
198
195
  status_notifier = StatusNotifier(redis_key, ML_TASK_STATUS.PROCESSING, self.db, self.cache)
199
196
  status_notifier.start()
@@ -215,20 +212,18 @@ class MLTaskConsumer(BaseRedisQueue):
215
212
  self.cache.set(redis_key.status, ML_TASK_STATUS.COMPLETE.value, 180)
216
213
 
217
214
  def run(self) -> None:
218
- """ Start new listen thread each time when _ready_event is set
219
- """
215
+ """Start new listen thread each time when _ready_event is set"""
220
216
  self._ready_event.set()
221
217
  while self._stop_event.is_set() is False:
222
218
  self._ready_event.wait(timeout=1)
223
219
  if self._ready_event.is_set() is False:
224
220
  continue
225
221
  self._ready_event.clear()
226
- threading.Thread(target=self._listen, name='MLTaskConsumer._listen').start()
222
+ threading.Thread(target=self._listen, name="MLTaskConsumer._listen").start()
227
223
  self.stop()
228
224
 
229
225
  def stop(self) -> None:
230
- """ Stop all executing threads
231
- """
226
+ """Stop all executing threads"""
232
227
  self._stop_event.set()
233
228
  for thread in (*self._listen_message_threads, self._collect_cpu_stat_thread):
234
229
  try:
@@ -238,17 +233,16 @@ class MLTaskConsumer(BaseRedisQueue):
238
233
  pass
239
234
 
240
235
 
241
- @mark_process(name='internal', custom_mark='ml_task_consumer')
236
+ @mark_process(name="internal", custom_mark="ml_task_consumer")
242
237
  def start(verbose: bool) -> None:
243
- """ Create task queue consumer and start listen the queue
244
- """
238
+ """Create task queue consumer and start listen the queue"""
245
239
  consumer = MLTaskConsumer()
246
240
  signal.signal(signal.SIGTERM, lambda _x, _y: consumer.stop())
247
241
  try:
248
242
  consumer.run()
249
243
  except Exception as e:
250
244
  consumer.stop()
251
- logger.error(f'Got exception: {e}', flush=True)
245
+ logger.error(f"Got exception: {e}", flush=True)
252
246
  raise
253
247
  finally:
254
- logger.info('Consumer process stopped', flush=True)
248
+ logger.info("Consumer process stopped", flush=True)
@@ -8,11 +8,7 @@ from mindsdb.utilities.config import Config
8
8
  from mindsdb.utilities.ml_task_queue.utils import RedisKey, to_bytes
9
9
  from mindsdb.utilities.ml_task_queue.task import Task
10
10
  from mindsdb.utilities.ml_task_queue.base import BaseRedisQueue
11
- from mindsdb.utilities.ml_task_queue.const import (
12
- TASKS_STREAM_NAME,
13
- ML_TASK_TYPE,
14
- ML_TASK_STATUS
15
- )
11
+ from mindsdb.utilities.ml_task_queue.const import TASKS_STREAM_NAME, ML_TASK_TYPE, ML_TASK_STATUS
16
12
  from mindsdb.utilities import log
17
13
  from mindsdb.utilities.sentry import sentry_sdk # noqa: F401
18
14
 
@@ -20,25 +16,25 @@ logger = log.getLogger(__name__)
20
16
 
21
17
 
22
18
  class MLTaskProducer(BaseRedisQueue):
23
- """ Interface around the redis for putting tasks to the queue
19
+ """Interface around the redis for putting tasks to the queue
24
20
 
25
- Attributes:
26
- db (Redis): database object
27
- stream
28
- cache
29
- pubsub
21
+ Attributes:
22
+ db (Redis): database object
23
+ stream
24
+ cache
25
+ pubsub
30
26
  """
31
27
 
32
28
  def __init__(self) -> None:
33
- config = Config().get('ml_task_queue', {})
29
+ config = Config().get("ml_task_queue", {})
34
30
 
35
31
  self.db = Database(
36
- host=config.get('host', 'localhost'),
37
- port=config.get('port', 6379),
38
- db=config.get('db', 0),
39
- username=config.get('username'),
40
- password=config.get('password'),
41
- protocol=3
32
+ host=config.get("host", "localhost"),
33
+ port=config.get("port", 6379),
34
+ db=config.get("db", 0),
35
+ username=config.get("username"),
36
+ password=config.get("password"),
37
+ protocol=3,
42
38
  )
43
39
  self.wait_redis_ping(60)
44
40
 
@@ -47,26 +43,26 @@ class MLTaskProducer(BaseRedisQueue):
47
43
  self.pubsub = self.db.pubsub()
48
44
 
49
45
  def apply_async(self, task_type: ML_TASK_TYPE, model_id: int, payload: dict, dataframe: DataFrame = None) -> Task:
50
- ''' Add tasks to the queue
46
+ """Add tasks to the queue
51
47
 
52
- Args:
53
- task_type (ML_TASK_TYPE): type of the task
54
- model_id (int): model identifier
55
- payload (dict): lightweight model data that will be added to stream message
56
- dataframe (DataFrame): dataframe will be transfered via regular redis storage
48
+ Args:
49
+ task_type (ML_TASK_TYPE): type of the task
50
+ model_id (int): model identifier
51
+ payload (dict): lightweight model data that will be added to stream message
52
+ dataframe (DataFrame): dataframe will be transfered via regular redis storage
57
53
 
58
- Returns:
59
- Task: object representing the task
60
- '''
54
+ Returns:
55
+ Task: object representing the task
56
+ """
61
57
  try:
62
58
  payload = pickle.dumps(payload, protocol=5)
63
59
  redis_key = RedisKey.new()
64
60
  message = {
65
61
  "task_type": task_type.value,
66
- "company_id": '' if ctx.company_id is None else ctx.company_id, # None can not be dumped
62
+ "company_id": "" if ctx.company_id is None else ctx.company_id, # None can not be dumped
67
63
  "model_id": model_id,
68
64
  "payload": payload,
69
- "redis_key": redis_key.base
65
+ "redis_key": redis_key.base,
70
66
  }
71
67
 
72
68
  self.wait_redis_ping()
@@ -77,5 +73,5 @@ class MLTaskProducer(BaseRedisQueue):
77
73
  self.stream.add(message)
78
74
  return Task(self.db, redis_key)
79
75
  except ConnectionError:
80
- logger.error('Cant send message to redis: connect failed')
76
+ logger.exception("Cant send message to redis: connect failed")
81
77
  raise