MindsDB 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MindsDB might be problematic. Click here for more details.
- mindsdb/__about__.py +1 -1
- mindsdb/__main__.py +39 -20
- mindsdb/api/a2a/agent.py +7 -9
- mindsdb/api/a2a/common/server/server.py +3 -3
- mindsdb/api/a2a/common/server/task_manager.py +4 -4
- mindsdb/api/a2a/task_manager.py +15 -17
- mindsdb/api/common/middleware.py +9 -11
- mindsdb/api/executor/command_executor.py +2 -4
- mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
- mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
- mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
- mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
- mindsdb/api/executor/exceptions.py +29 -10
- mindsdb/api/executor/planner/plan_join.py +17 -3
- mindsdb/api/executor/sql_query/sql_query.py +74 -74
- mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
- mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
- mindsdb/api/executor/utilities/functions.py +6 -6
- mindsdb/api/executor/utilities/sql.py +32 -16
- mindsdb/api/http/gui.py +5 -11
- mindsdb/api/http/initialize.py +8 -10
- mindsdb/api/http/namespaces/agents.py +10 -12
- mindsdb/api/http/namespaces/analysis.py +13 -20
- mindsdb/api/http/namespaces/auth.py +1 -1
- mindsdb/api/http/namespaces/config.py +15 -11
- mindsdb/api/http/namespaces/databases.py +140 -201
- mindsdb/api/http/namespaces/file.py +15 -4
- mindsdb/api/http/namespaces/handlers.py +7 -2
- mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
- mindsdb/api/http/namespaces/models.py +94 -126
- mindsdb/api/http/namespaces/projects.py +13 -22
- mindsdb/api/http/namespaces/sql.py +33 -25
- mindsdb/api/http/namespaces/tab.py +27 -37
- mindsdb/api/http/namespaces/views.py +1 -1
- mindsdb/api/http/start.py +14 -8
- mindsdb/api/mcp/__init__.py +2 -1
- mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
- mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
- mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
- mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
- mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
- mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
- mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
- mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
- mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +13 -1
- mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
- mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
- mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
- mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
- mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
- mindsdb/integrations/libs/api_handler.py +10 -10
- mindsdb/integrations/libs/base.py +4 -4
- mindsdb/integrations/libs/llm/utils.py +2 -2
- mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
- mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
- mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
- mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
- mindsdb/integrations/libs/process_cache.py +132 -140
- mindsdb/integrations/libs/response.py +18 -12
- mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
- mindsdb/integrations/utilities/files/file_reader.py +6 -7
- mindsdb/integrations/utilities/rag/config_loader.py +37 -26
- mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
- mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
- mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
- mindsdb/integrations/utilities/rag/settings.py +58 -133
- mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
- mindsdb/interfaces/agents/agents_controller.py +2 -1
- mindsdb/interfaces/agents/constants.py +0 -2
- mindsdb/interfaces/agents/litellm_server.py +34 -58
- mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
- mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
- mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
- mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
- mindsdb/interfaces/chatbot/polling.py +30 -18
- mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
- mindsdb/interfaces/database/integrations.py +19 -2
- mindsdb/interfaces/file/file_controller.py +6 -6
- mindsdb/interfaces/functions/controller.py +1 -1
- mindsdb/interfaces/functions/to_markdown.py +2 -2
- mindsdb/interfaces/jobs/jobs_controller.py +5 -5
- mindsdb/interfaces/jobs/scheduler.py +3 -8
- mindsdb/interfaces/knowledge_base/controller.py +50 -23
- mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
- mindsdb/interfaces/model/model_controller.py +170 -166
- mindsdb/interfaces/query_context/context_controller.py +14 -2
- mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
- mindsdb/interfaces/skills/retrieval_tool.py +43 -50
- mindsdb/interfaces/skills/skill_tool.py +2 -2
- mindsdb/interfaces/skills/sql_agent.py +25 -19
- mindsdb/interfaces/storage/fs.py +114 -169
- mindsdb/interfaces/storage/json.py +19 -18
- mindsdb/interfaces/tabs/tabs_controller.py +49 -72
- mindsdb/interfaces/tasks/task_monitor.py +3 -9
- mindsdb/interfaces/tasks/task_thread.py +7 -9
- mindsdb/interfaces/triggers/trigger_task.py +7 -13
- mindsdb/interfaces/triggers/triggers_controller.py +47 -50
- mindsdb/migrations/migrate.py +16 -16
- mindsdb/utilities/api_status.py +58 -0
- mindsdb/utilities/config.py +49 -0
- mindsdb/utilities/exception.py +40 -1
- mindsdb/utilities/fs.py +0 -1
- mindsdb/utilities/hooks/profiling.py +17 -14
- mindsdb/utilities/langfuse.py +40 -45
- mindsdb/utilities/log.py +272 -0
- mindsdb/utilities/ml_task_queue/consumer.py +52 -58
- mindsdb/utilities/ml_task_queue/producer.py +26 -30
- mindsdb/utilities/render/sqlalchemy_render.py +7 -6
- mindsdb/utilities/utils.py +2 -2
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +269 -264
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +115 -115
- mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
- {mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0
mindsdb/utilities/log.py
CHANGED
|
@@ -144,3 +144,275 @@ def getLogger(name=None):
|
|
|
144
144
|
"""
|
|
145
145
|
initialize_logging()
|
|
146
146
|
return logging.getLogger(name)
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def log_ram_info(logger: logging.Logger) -> None:
|
|
150
|
+
"""Log RAM/memory information to the provided logger.
|
|
151
|
+
|
|
152
|
+
This function logs memory usage information: total, available, used memory in GB and memory
|
|
153
|
+
usage percentage. The logging only occurs if the logger is enabled for DEBUG level.
|
|
154
|
+
|
|
155
|
+
Args:
|
|
156
|
+
logger (logging.Logger): The logger instance to use for outputting memory information.
|
|
157
|
+
"""
|
|
158
|
+
if logger.isEnabledFor(logging.DEBUG) is False:
|
|
159
|
+
return
|
|
160
|
+
|
|
161
|
+
try:
|
|
162
|
+
import psutil
|
|
163
|
+
|
|
164
|
+
memory = psutil.virtual_memory()
|
|
165
|
+
total_memory_gb = memory.total / (1024**3)
|
|
166
|
+
available_memory_gb = memory.available / (1024**3)
|
|
167
|
+
used_memory_gb = memory.used / (1024**3)
|
|
168
|
+
memory_percent = memory.percent
|
|
169
|
+
logger.debug(
|
|
170
|
+
f"Memory: {total_memory_gb:.1f}GB total, {available_memory_gb:.1f}GB available, {used_memory_gb:.1f}GB used ({memory_percent:.1f}%)"
|
|
171
|
+
)
|
|
172
|
+
except Exception as e:
|
|
173
|
+
logger.debug(f"Failed to get memory information: {e}")
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def log_system_info(logger: logging.Logger) -> None:
|
|
177
|
+
"""Log detailed system information for debugging purposes.
|
|
178
|
+
|
|
179
|
+
The function only logs system information (if the logger is configured for DEBUG level):
|
|
180
|
+
- Operating system details (OS type, version, distribution, architecture)
|
|
181
|
+
- CPU information (processor type, physical and logical core counts)
|
|
182
|
+
- Memory information (total, available, used memory in GB and percentage)
|
|
183
|
+
- GPU information (NVIDIA, AMD, Intel graphics cards with memory details)
|
|
184
|
+
|
|
185
|
+
Args:
|
|
186
|
+
logger (logging.Logger): The logger instance to use for outputting system information.
|
|
187
|
+
Must be configured for DEBUG level to see the output.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
None
|
|
191
|
+
|
|
192
|
+
Note:
|
|
193
|
+
- For Linux systems, attempts to detect distribution via /etc/os-release, /etc/issue, or lsb_release
|
|
194
|
+
- For Windows systems, uses wmic commands to get detailed OS and GPU information
|
|
195
|
+
- For macOS systems, uses sw_vers and system_profiler commands
|
|
196
|
+
- GPU detection supports NVIDIA (via nvidia-smi), AMD (via rocm-smi), and fallback methods
|
|
197
|
+
- All subprocess calls have timeout protection to prevent hanging
|
|
198
|
+
- If any system information gathering fails, it logs the error and continues
|
|
199
|
+
"""
|
|
200
|
+
if logger.isEnabledFor(logging.DEBUG) is False:
|
|
201
|
+
return
|
|
202
|
+
|
|
203
|
+
try:
|
|
204
|
+
import os
|
|
205
|
+
import shutil
|
|
206
|
+
import psutil
|
|
207
|
+
import platform
|
|
208
|
+
import subprocess
|
|
209
|
+
|
|
210
|
+
# region OS information
|
|
211
|
+
os_system = platform.system()
|
|
212
|
+
os_release = platform.release()
|
|
213
|
+
os_machine = platform.machine()
|
|
214
|
+
|
|
215
|
+
os_details = []
|
|
216
|
+
|
|
217
|
+
if os_system == "Linux":
|
|
218
|
+
# Try to detect Linux distribution
|
|
219
|
+
distro_info = "Unknown Linux"
|
|
220
|
+
try:
|
|
221
|
+
# Check for /etc/os-release (most modern distributions)
|
|
222
|
+
if os.path.exists("/etc/os-release"):
|
|
223
|
+
with open("/etc/os-release", "r") as f:
|
|
224
|
+
os_release_data = {}
|
|
225
|
+
for line in f:
|
|
226
|
+
if "=" in line:
|
|
227
|
+
key, value = line.strip().split("=", 1)
|
|
228
|
+
os_release_data[key] = value.strip('"')
|
|
229
|
+
|
|
230
|
+
if "PRETTY_NAME" in os_release_data:
|
|
231
|
+
distro_info = os_release_data["PRETTY_NAME"]
|
|
232
|
+
elif "NAME" in os_release_data and "VERSION" in os_release_data:
|
|
233
|
+
distro_info = f"{os_release_data['NAME']} {os_release_data['VERSION']}"
|
|
234
|
+
elif "ID" in os_release_data:
|
|
235
|
+
distro_info = os_release_data["ID"].title()
|
|
236
|
+
# Fallback to /etc/issue
|
|
237
|
+
elif os.path.exists("/etc/issue"):
|
|
238
|
+
with open("/etc/issue", "r") as f:
|
|
239
|
+
issue_content = f.read().strip()
|
|
240
|
+
if issue_content:
|
|
241
|
+
distro_info = issue_content.split("\n")[0]
|
|
242
|
+
# Fallback to lsb_release
|
|
243
|
+
else:
|
|
244
|
+
try:
|
|
245
|
+
result = subprocess.run(["lsb_release", "-d"], capture_output=True, text=True, timeout=2)
|
|
246
|
+
if result.returncode == 0:
|
|
247
|
+
distro_info = result.stdout.split(":")[-1].strip()
|
|
248
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
249
|
+
pass
|
|
250
|
+
except Exception:
|
|
251
|
+
pass
|
|
252
|
+
|
|
253
|
+
os_details.append(f"{distro_info} (kernel {os_release})")
|
|
254
|
+
|
|
255
|
+
elif os_system == "Windows":
|
|
256
|
+
os_name = "Windows"
|
|
257
|
+
os_version = "unknown"
|
|
258
|
+
try:
|
|
259
|
+
result = subprocess.run(
|
|
260
|
+
["wmic", "os", "get", "Caption,Version", "/format:list"], capture_output=True, text=True, timeout=3
|
|
261
|
+
)
|
|
262
|
+
if result.returncode == 0:
|
|
263
|
+
windows_info = {}
|
|
264
|
+
for line in result.stdout.strip().split("\n"):
|
|
265
|
+
if "=" in line:
|
|
266
|
+
key, value = line.strip().split("=", 1)
|
|
267
|
+
windows_info[key] = value.strip()
|
|
268
|
+
|
|
269
|
+
if "Caption" in windows_info and "Version" in windows_info:
|
|
270
|
+
os_name = windows_info["Caption"]
|
|
271
|
+
os_version = windows_info["Version"]
|
|
272
|
+
except Exception:
|
|
273
|
+
pass
|
|
274
|
+
os_details.append(f"{os_name} {os_release} (version {os_version})")
|
|
275
|
+
|
|
276
|
+
elif os_system == "Darwin": # macOS
|
|
277
|
+
os_name = "macOS"
|
|
278
|
+
os_version = "unknown"
|
|
279
|
+
try:
|
|
280
|
+
result = subprocess.run(
|
|
281
|
+
["sw_vers", "-productName", "-productVersion"], capture_output=True, text=True, timeout=3
|
|
282
|
+
)
|
|
283
|
+
if result.returncode == 0:
|
|
284
|
+
lines = result.stdout.strip().split("\n")
|
|
285
|
+
if len(lines) >= 2:
|
|
286
|
+
os_name = lines[0].strip()
|
|
287
|
+
os_version = lines[1].strip()
|
|
288
|
+
except Exception:
|
|
289
|
+
pass
|
|
290
|
+
os_details.append(f"{os_name} {os_release} (version {os_version})")
|
|
291
|
+
else:
|
|
292
|
+
os_details.append(f"{os_system} {os_release}")
|
|
293
|
+
|
|
294
|
+
os_details.append(f"({os_machine})")
|
|
295
|
+
os_info = " ".join(os_details)
|
|
296
|
+
logger.debug(f"Operating System: {os_info}")
|
|
297
|
+
# endregion
|
|
298
|
+
|
|
299
|
+
# region CPU information
|
|
300
|
+
cpu_info = platform.processor()
|
|
301
|
+
if not cpu_info or cpu_info == "":
|
|
302
|
+
cpu_info = platform.machine()
|
|
303
|
+
cpu_count = psutil.cpu_count(logical=False)
|
|
304
|
+
cpu_count_logical = psutil.cpu_count(logical=True)
|
|
305
|
+
logger.debug(f"CPU: {cpu_info} ({cpu_count} physical cores, {cpu_count_logical} logical cores)")
|
|
306
|
+
# endregion
|
|
307
|
+
|
|
308
|
+
# memory information
|
|
309
|
+
log_ram_info(logger)
|
|
310
|
+
|
|
311
|
+
# region GPU information
|
|
312
|
+
gpu_info = []
|
|
313
|
+
try:
|
|
314
|
+
# Check for NVIDIA GPU (works on Linux, Windows, macOS)
|
|
315
|
+
nvidia_smi_path = shutil.which("nvidia-smi")
|
|
316
|
+
if nvidia_smi_path:
|
|
317
|
+
try:
|
|
318
|
+
result = subprocess.run(
|
|
319
|
+
[nvidia_smi_path, "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
|
|
320
|
+
capture_output=True,
|
|
321
|
+
text=True,
|
|
322
|
+
timeout=3,
|
|
323
|
+
)
|
|
324
|
+
if result.returncode == 0:
|
|
325
|
+
for line in result.stdout.strip().split("\n"):
|
|
326
|
+
if line.strip():
|
|
327
|
+
parts = line.split(", ")
|
|
328
|
+
if len(parts) >= 2:
|
|
329
|
+
gpu_name = parts[0].strip()
|
|
330
|
+
gpu_memory = parts[1].strip()
|
|
331
|
+
gpu_info.append(f"{gpu_name} ({gpu_memory}MB)")
|
|
332
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
333
|
+
pass
|
|
334
|
+
|
|
335
|
+
# Check for AMD GPU (rocm-smi on Linux, wmic on Windows)
|
|
336
|
+
if not gpu_info: # Only check AMD if no NVIDIA GPU found
|
|
337
|
+
if platform.system() == "Windows":
|
|
338
|
+
# Use wmic on Windows to detect AMD GPU
|
|
339
|
+
try:
|
|
340
|
+
result = subprocess.run(
|
|
341
|
+
["wmic", "path", "win32_VideoController", "get", "name"],
|
|
342
|
+
capture_output=True,
|
|
343
|
+
text=True,
|
|
344
|
+
timeout=3,
|
|
345
|
+
)
|
|
346
|
+
if result.returncode == 0:
|
|
347
|
+
for line in result.stdout.strip().split("\n"):
|
|
348
|
+
line = line.strip()
|
|
349
|
+
if line and line != "Name" and "AMD" in line.upper():
|
|
350
|
+
gpu_info.append(line)
|
|
351
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
352
|
+
pass
|
|
353
|
+
else:
|
|
354
|
+
# Use rocm-smi on Linux/macOS
|
|
355
|
+
rocm_smi_path = shutil.which("rocm-smi")
|
|
356
|
+
if rocm_smi_path:
|
|
357
|
+
try:
|
|
358
|
+
result = subprocess.run(
|
|
359
|
+
[rocm_smi_path, "--showproductname"], capture_output=True, text=True, timeout=3
|
|
360
|
+
)
|
|
361
|
+
if result.returncode == 0:
|
|
362
|
+
for line in result.stdout.strip().split("\n"):
|
|
363
|
+
if "Product Name" in line:
|
|
364
|
+
gpu_name = line.split(":")[-1].strip()
|
|
365
|
+
gpu_info.append(gpu_name)
|
|
366
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
367
|
+
pass
|
|
368
|
+
|
|
369
|
+
# Fallback: Try to detect any GPU using platform-specific methods
|
|
370
|
+
if not gpu_info:
|
|
371
|
+
if platform.system() == "Windows":
|
|
372
|
+
try:
|
|
373
|
+
# Use wmic to get all video controllers
|
|
374
|
+
result = subprocess.run(
|
|
375
|
+
["wmic", "path", "win32_VideoController", "get", "name"],
|
|
376
|
+
capture_output=True,
|
|
377
|
+
text=True,
|
|
378
|
+
timeout=3,
|
|
379
|
+
)
|
|
380
|
+
if result.returncode == 0:
|
|
381
|
+
for line in result.stdout.strip().split("\n"):
|
|
382
|
+
line = line.strip()
|
|
383
|
+
if (
|
|
384
|
+
line
|
|
385
|
+
and line != "Name"
|
|
386
|
+
and any(
|
|
387
|
+
keyword in line.upper()
|
|
388
|
+
for keyword in ["NVIDIA", "AMD", "INTEL", "RADEON", "GEFORCE"]
|
|
389
|
+
)
|
|
390
|
+
):
|
|
391
|
+
gpu_info.append(line)
|
|
392
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
393
|
+
pass
|
|
394
|
+
elif platform.system() == "Darwin": # macOS
|
|
395
|
+
try:
|
|
396
|
+
# Use system_profiler on macOS
|
|
397
|
+
result = subprocess.run(
|
|
398
|
+
["system_profiler", "SPDisplaysDataType"], capture_output=True, text=True, timeout=3
|
|
399
|
+
)
|
|
400
|
+
if result.returncode == 0:
|
|
401
|
+
for line in result.stdout.strip().split("\n"):
|
|
402
|
+
if "Chipset Model:" in line:
|
|
403
|
+
gpu_name = line.split(":")[-1].strip()
|
|
404
|
+
gpu_info.append(gpu_name)
|
|
405
|
+
except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
|
|
406
|
+
pass
|
|
407
|
+
|
|
408
|
+
except Exception:
|
|
409
|
+
pass
|
|
410
|
+
|
|
411
|
+
if gpu_info:
|
|
412
|
+
logger.debug(f"GPU: {', '.join(gpu_info)}")
|
|
413
|
+
else:
|
|
414
|
+
logger.debug("GPU: Not detected or not supported")
|
|
415
|
+
# endregion
|
|
416
|
+
|
|
417
|
+
except Exception as e:
|
|
418
|
+
logger.debug(f"Failed to get system information: {e}")
|
|
@@ -24,7 +24,7 @@ from mindsdb.utilities.ml_task_queue.const import (
|
|
|
24
24
|
ML_TASK_STATUS,
|
|
25
25
|
TASKS_STREAM_NAME,
|
|
26
26
|
TASKS_STREAM_CONSUMER_NAME,
|
|
27
|
-
TASKS_STREAM_CONSUMER_GROUP_NAME
|
|
27
|
+
TASKS_STREAM_CONSUMER_GROUP_NAME,
|
|
28
28
|
)
|
|
29
29
|
from mindsdb.utilities import log
|
|
30
30
|
from mindsdb.utilities.sentry import sentry_sdk # noqa: F401
|
|
@@ -33,9 +33,10 @@ logger = log.getLogger(__name__)
|
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def _save_thread_link(func: Callable) -> Callable:
|
|
36
|
-
"""
|
|
37
|
-
|
|
36
|
+
"""Decorator for MLTaskConsumer.
|
|
37
|
+
Save thread in which func is executed to a list.
|
|
38
38
|
"""
|
|
39
|
+
|
|
39
40
|
@wraps(func)
|
|
40
41
|
def wrapper(self, *args, **kwargs) -> None:
|
|
41
42
|
current_thread = threading.current_thread()
|
|
@@ -45,22 +46,23 @@ def _save_thread_link(func: Callable) -> Callable:
|
|
|
45
46
|
finally:
|
|
46
47
|
self._listen_message_threads.remove(current_thread)
|
|
47
48
|
return result
|
|
49
|
+
|
|
48
50
|
return wrapper
|
|
49
51
|
|
|
50
52
|
|
|
51
53
|
class MLTaskConsumer(BaseRedisQueue):
|
|
52
|
-
"""
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
54
|
+
"""Listener of ML tasks queue and tasks executioner.
|
|
55
|
+
Each new message waited and executed in separate thread.
|
|
56
|
+
|
|
57
|
+
Attributes:
|
|
58
|
+
_ready_event (Event): set if ready to start new queue listen thread
|
|
59
|
+
_stop_event (Event): set if need to stop all threads/processes
|
|
60
|
+
cpu_stat (list[float]): CPU usage statistic. Each value is 0-100 float representing CPU usage in %
|
|
61
|
+
_collect_cpu_stat_thread (Thread): pointer to thread that collecting CPU usage statistic
|
|
62
|
+
_listen_message_threads (list[Thread]): list of pointers to threads where queue messages are listening/processing
|
|
63
|
+
db (Redis): database object
|
|
64
|
+
cache: redis cache abstrtaction
|
|
65
|
+
consumer_group: redis consumer group object
|
|
64
66
|
"""
|
|
65
67
|
|
|
66
68
|
def __init__(self) -> None:
|
|
@@ -75,7 +77,7 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
75
77
|
# region collect cpu usage statistic
|
|
76
78
|
self.cpu_stat = [0] * 10
|
|
77
79
|
self._collect_cpu_stat_thread = threading.Thread(
|
|
78
|
-
target=self._collect_cpu_stat, name=
|
|
80
|
+
target=self._collect_cpu_stat, name="MLTaskConsumer._collect_cpu_stat"
|
|
79
81
|
)
|
|
80
82
|
self._collect_cpu_stat_thread.start()
|
|
81
83
|
# endregion
|
|
@@ -83,14 +85,14 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
83
85
|
self._listen_message_threads = []
|
|
84
86
|
|
|
85
87
|
# region connect to redis
|
|
86
|
-
config = Config().get(
|
|
88
|
+
config = Config().get("ml_task_queue", {})
|
|
87
89
|
self.db = Database(
|
|
88
|
-
host=config.get(
|
|
89
|
-
port=config.get(
|
|
90
|
-
db=config.get(
|
|
91
|
-
username=config.get(
|
|
92
|
-
password=config.get(
|
|
93
|
-
protocol=3
|
|
90
|
+
host=config.get("host", "localhost"),
|
|
91
|
+
port=config.get("port", 6379),
|
|
92
|
+
db=config.get("db", 0),
|
|
93
|
+
username=config.get("username"),
|
|
94
|
+
password=config.get("password"),
|
|
95
|
+
protocol=3,
|
|
94
96
|
)
|
|
95
97
|
self.wait_redis_ping(60)
|
|
96
98
|
|
|
@@ -102,30 +104,29 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
102
104
|
# endregion
|
|
103
105
|
|
|
104
106
|
def _collect_cpu_stat(self) -> None:
|
|
105
|
-
"""
|
|
106
|
-
"""
|
|
107
|
+
"""Collect CPU usage statistic. Executerd in thread."""
|
|
107
108
|
while self._stop_event.is_set() is False:
|
|
108
109
|
self.cpu_stat = self.cpu_stat[1:]
|
|
109
110
|
self.cpu_stat.append(psutil.cpu_percent())
|
|
110
111
|
time.sleep(1)
|
|
111
112
|
|
|
112
113
|
def get_avg_cpu_usage(self) -> float:
|
|
113
|
-
"""
|
|
114
|
+
"""get average CPU usage for last period (10s by default)
|
|
114
115
|
|
|
115
|
-
|
|
116
|
-
|
|
116
|
+
Returns:
|
|
117
|
+
float: 0-100 value, average CPU usage
|
|
117
118
|
"""
|
|
118
119
|
return sum(self.cpu_stat) / len(self.cpu_stat)
|
|
119
120
|
|
|
120
121
|
def wait_free_resources(self) -> None:
|
|
121
|
-
"""
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
122
|
+
"""Sleep in thread untill there are free resources. Checks:
|
|
123
|
+
- avg CPU usage is less than 60%
|
|
124
|
+
- current CPU usage is less than 60%
|
|
125
|
+
- current tasks count is less than (N CPU cores) / 8
|
|
125
126
|
"""
|
|
126
127
|
config = Config()
|
|
127
|
-
is_cloud = config.get(
|
|
128
|
-
processes_dir = Path(tempfile.gettempdir()).joinpath(
|
|
128
|
+
is_cloud = config.get("cloud", False)
|
|
129
|
+
processes_dir = Path(tempfile.gettempdir()).joinpath("mindsdb/processes/learn/")
|
|
129
130
|
while True:
|
|
130
131
|
while self.get_avg_cpu_usage() > 60 or max(self.cpu_stat[-3:]) > 60:
|
|
131
132
|
time.sleep(1)
|
|
@@ -139,8 +140,7 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
139
140
|
|
|
140
141
|
@_save_thread_link
|
|
141
142
|
def _listen(self) -> None:
|
|
142
|
-
"""
|
|
143
|
-
"""
|
|
143
|
+
"""Listen message queue untill get new message. Execute task."""
|
|
144
144
|
message = None
|
|
145
145
|
while message is None:
|
|
146
146
|
self.wait_free_resources()
|
|
@@ -150,8 +150,8 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
150
150
|
|
|
151
151
|
try:
|
|
152
152
|
message = self.consumer_group.read(count=1, block=1000, consumer=TASKS_STREAM_CONSUMER_NAME)
|
|
153
|
-
except RedisConnectionError
|
|
154
|
-
logger.
|
|
153
|
+
except RedisConnectionError:
|
|
154
|
+
logger.exception("Can't connect to Redis:")
|
|
155
155
|
self._stop_event.set()
|
|
156
156
|
return
|
|
157
157
|
except Exception:
|
|
@@ -168,13 +168,13 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
168
168
|
self.consumer_group.streams[TASKS_STREAM_NAME].ack(message_id)
|
|
169
169
|
self.consumer_group.streams[TASKS_STREAM_NAME].delete(message_id)
|
|
170
170
|
|
|
171
|
-
payload = from_bytes(message_content[b
|
|
172
|
-
task_type = ML_TASK_TYPE(message_content[b
|
|
173
|
-
model_id = int(message_content[b
|
|
174
|
-
company_id = message_content[b
|
|
171
|
+
payload = from_bytes(message_content[b"payload"])
|
|
172
|
+
task_type = ML_TASK_TYPE(message_content[b"task_type"])
|
|
173
|
+
model_id = int(message_content[b"model_id"])
|
|
174
|
+
company_id = message_content[b"company_id"]
|
|
175
175
|
if len(company_id) == 0:
|
|
176
176
|
company_id = None
|
|
177
|
-
redis_key = RedisKey(message_content.get(b
|
|
177
|
+
redis_key = RedisKey(message_content.get(b"redis_key"))
|
|
178
178
|
|
|
179
179
|
# region read dataframe
|
|
180
180
|
dataframe_bytes = self.cache.get(redis_key.dataframe)
|
|
@@ -184,16 +184,13 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
184
184
|
self.cache.delete(redis_key.dataframe)
|
|
185
185
|
# endregion
|
|
186
186
|
|
|
187
|
-
ctx.load(payload[
|
|
187
|
+
ctx.load(payload["context"])
|
|
188
188
|
finally:
|
|
189
189
|
self._ready_event.set()
|
|
190
190
|
|
|
191
191
|
try:
|
|
192
192
|
task = process_cache.apply_async(
|
|
193
|
-
task_type=task_type,
|
|
194
|
-
model_id=model_id,
|
|
195
|
-
payload=payload,
|
|
196
|
-
dataframe=dataframe
|
|
193
|
+
task_type=task_type, model_id=model_id, payload=payload, dataframe=dataframe
|
|
197
194
|
)
|
|
198
195
|
status_notifier = StatusNotifier(redis_key, ML_TASK_STATUS.PROCESSING, self.db, self.cache)
|
|
199
196
|
status_notifier.start()
|
|
@@ -215,20 +212,18 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
215
212
|
self.cache.set(redis_key.status, ML_TASK_STATUS.COMPLETE.value, 180)
|
|
216
213
|
|
|
217
214
|
def run(self) -> None:
|
|
218
|
-
"""
|
|
219
|
-
"""
|
|
215
|
+
"""Start new listen thread each time when _ready_event is set"""
|
|
220
216
|
self._ready_event.set()
|
|
221
217
|
while self._stop_event.is_set() is False:
|
|
222
218
|
self._ready_event.wait(timeout=1)
|
|
223
219
|
if self._ready_event.is_set() is False:
|
|
224
220
|
continue
|
|
225
221
|
self._ready_event.clear()
|
|
226
|
-
threading.Thread(target=self._listen, name=
|
|
222
|
+
threading.Thread(target=self._listen, name="MLTaskConsumer._listen").start()
|
|
227
223
|
self.stop()
|
|
228
224
|
|
|
229
225
|
def stop(self) -> None:
|
|
230
|
-
"""
|
|
231
|
-
"""
|
|
226
|
+
"""Stop all executing threads"""
|
|
232
227
|
self._stop_event.set()
|
|
233
228
|
for thread in (*self._listen_message_threads, self._collect_cpu_stat_thread):
|
|
234
229
|
try:
|
|
@@ -238,17 +233,16 @@ class MLTaskConsumer(BaseRedisQueue):
|
|
|
238
233
|
pass
|
|
239
234
|
|
|
240
235
|
|
|
241
|
-
@mark_process(name=
|
|
236
|
+
@mark_process(name="internal", custom_mark="ml_task_consumer")
|
|
242
237
|
def start(verbose: bool) -> None:
|
|
243
|
-
"""
|
|
244
|
-
"""
|
|
238
|
+
"""Create task queue consumer and start listen the queue"""
|
|
245
239
|
consumer = MLTaskConsumer()
|
|
246
240
|
signal.signal(signal.SIGTERM, lambda _x, _y: consumer.stop())
|
|
247
241
|
try:
|
|
248
242
|
consumer.run()
|
|
249
243
|
except Exception as e:
|
|
250
244
|
consumer.stop()
|
|
251
|
-
logger.error(f
|
|
245
|
+
logger.error(f"Got exception: {e}", flush=True)
|
|
252
246
|
raise
|
|
253
247
|
finally:
|
|
254
|
-
logger.info(
|
|
248
|
+
logger.info("Consumer process stopped", flush=True)
|
|
@@ -8,11 +8,7 @@ from mindsdb.utilities.config import Config
|
|
|
8
8
|
from mindsdb.utilities.ml_task_queue.utils import RedisKey, to_bytes
|
|
9
9
|
from mindsdb.utilities.ml_task_queue.task import Task
|
|
10
10
|
from mindsdb.utilities.ml_task_queue.base import BaseRedisQueue
|
|
11
|
-
from mindsdb.utilities.ml_task_queue.const import
|
|
12
|
-
TASKS_STREAM_NAME,
|
|
13
|
-
ML_TASK_TYPE,
|
|
14
|
-
ML_TASK_STATUS
|
|
15
|
-
)
|
|
11
|
+
from mindsdb.utilities.ml_task_queue.const import TASKS_STREAM_NAME, ML_TASK_TYPE, ML_TASK_STATUS
|
|
16
12
|
from mindsdb.utilities import log
|
|
17
13
|
from mindsdb.utilities.sentry import sentry_sdk # noqa: F401
|
|
18
14
|
|
|
@@ -20,25 +16,25 @@ logger = log.getLogger(__name__)
|
|
|
20
16
|
|
|
21
17
|
|
|
22
18
|
class MLTaskProducer(BaseRedisQueue):
|
|
23
|
-
"""
|
|
19
|
+
"""Interface around the redis for putting tasks to the queue
|
|
24
20
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
21
|
+
Attributes:
|
|
22
|
+
db (Redis): database object
|
|
23
|
+
stream
|
|
24
|
+
cache
|
|
25
|
+
pubsub
|
|
30
26
|
"""
|
|
31
27
|
|
|
32
28
|
def __init__(self) -> None:
|
|
33
|
-
config = Config().get(
|
|
29
|
+
config = Config().get("ml_task_queue", {})
|
|
34
30
|
|
|
35
31
|
self.db = Database(
|
|
36
|
-
host=config.get(
|
|
37
|
-
port=config.get(
|
|
38
|
-
db=config.get(
|
|
39
|
-
username=config.get(
|
|
40
|
-
password=config.get(
|
|
41
|
-
protocol=3
|
|
32
|
+
host=config.get("host", "localhost"),
|
|
33
|
+
port=config.get("port", 6379),
|
|
34
|
+
db=config.get("db", 0),
|
|
35
|
+
username=config.get("username"),
|
|
36
|
+
password=config.get("password"),
|
|
37
|
+
protocol=3,
|
|
42
38
|
)
|
|
43
39
|
self.wait_redis_ping(60)
|
|
44
40
|
|
|
@@ -47,26 +43,26 @@ class MLTaskProducer(BaseRedisQueue):
|
|
|
47
43
|
self.pubsub = self.db.pubsub()
|
|
48
44
|
|
|
49
45
|
def apply_async(self, task_type: ML_TASK_TYPE, model_id: int, payload: dict, dataframe: DataFrame = None) -> Task:
|
|
50
|
-
|
|
46
|
+
"""Add tasks to the queue
|
|
51
47
|
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
48
|
+
Args:
|
|
49
|
+
task_type (ML_TASK_TYPE): type of the task
|
|
50
|
+
model_id (int): model identifier
|
|
51
|
+
payload (dict): lightweight model data that will be added to stream message
|
|
52
|
+
dataframe (DataFrame): dataframe will be transfered via regular redis storage
|
|
57
53
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
54
|
+
Returns:
|
|
55
|
+
Task: object representing the task
|
|
56
|
+
"""
|
|
61
57
|
try:
|
|
62
58
|
payload = pickle.dumps(payload, protocol=5)
|
|
63
59
|
redis_key = RedisKey.new()
|
|
64
60
|
message = {
|
|
65
61
|
"task_type": task_type.value,
|
|
66
|
-
"company_id":
|
|
62
|
+
"company_id": "" if ctx.company_id is None else ctx.company_id, # None can not be dumped
|
|
67
63
|
"model_id": model_id,
|
|
68
64
|
"payload": payload,
|
|
69
|
-
"redis_key": redis_key.base
|
|
65
|
+
"redis_key": redis_key.base,
|
|
70
66
|
}
|
|
71
67
|
|
|
72
68
|
self.wait_redis_ping()
|
|
@@ -77,5 +73,5 @@ class MLTaskProducer(BaseRedisQueue):
|
|
|
77
73
|
self.stream.add(message)
|
|
78
74
|
return Task(self.db, redis_key)
|
|
79
75
|
except ConnectionError:
|
|
80
|
-
logger.
|
|
76
|
+
logger.exception("Cant send message to redis: connect failed")
|
|
81
77
|
raise
|
|
@@ -383,7 +383,7 @@ class SqlalchemyRender:
|
|
|
383
383
|
elif isinstance(t, ast.Parameter):
|
|
384
384
|
col = sa.column(t.value, is_literal=True)
|
|
385
385
|
if t.alias:
|
|
386
|
-
raise RenderError()
|
|
386
|
+
raise RenderError("Parameter aliases are not supported in the renderer")
|
|
387
387
|
elif isinstance(t, ast.Tuple):
|
|
388
388
|
col = [self.to_expression(i) for i in t.items]
|
|
389
389
|
elif isinstance(t, ast.Variable):
|
|
@@ -574,17 +574,18 @@ class SqlalchemyRender:
|
|
|
574
574
|
else:
|
|
575
575
|
condition = self.to_expression(item["condition"])
|
|
576
576
|
|
|
577
|
-
if "ASOF" in join_type:
|
|
577
|
+
if "ASOF" in join_type or "RIGHT" in join_type:
|
|
578
578
|
raise NotImplementedError(f"Unsupported join type: {join_type}")
|
|
579
|
-
|
|
579
|
+
|
|
580
580
|
is_full = False
|
|
581
|
-
|
|
582
|
-
|
|
581
|
+
is_outer = False
|
|
582
|
+
if join_type in ("LEFT JOIN", "LEFT OUTER JOIN"):
|
|
583
|
+
is_outer = True
|
|
583
584
|
if join_type == "FULL JOIN":
|
|
584
585
|
is_full = True
|
|
585
586
|
|
|
586
587
|
# perform join
|
|
587
|
-
query =
|
|
588
|
+
query = query.join(table, condition, isouter=is_outer, full=is_full)
|
|
588
589
|
elif isinstance(from_table, (ast.Union, ast.Intersect, ast.Except)):
|
|
589
590
|
alias = None
|
|
590
591
|
if from_table.alias:
|
mindsdb/utilities/utils.py
CHANGED
|
@@ -22,13 +22,13 @@ def parse_csv_attributes(csv_attributes: typing.Optional[str] = "") -> typing.Di
|
|
|
22
22
|
for row in reader:
|
|
23
23
|
for pair in row:
|
|
24
24
|
# Match key=value pattern
|
|
25
|
-
match = re.match(r
|
|
25
|
+
match = re.match(r"^\s*([^=]+?)\s*=\s*(.+?)\s*$", pair)
|
|
26
26
|
if match:
|
|
27
27
|
key, value = match.groups()
|
|
28
28
|
attributes[key.strip()] = value.strip()
|
|
29
29
|
else:
|
|
30
30
|
raise ValueError(f"Invalid attribute format: {pair}")
|
|
31
31
|
except Exception as e:
|
|
32
|
-
raise ValueError(f"Failed to parse csv_attributes='{csv_attributes}': {e}")
|
|
32
|
+
raise ValueError(f"Failed to parse csv_attributes='{csv_attributes}': {e}") from e
|
|
33
33
|
|
|
34
34
|
return attributes
|