PyPI - MindsDB - Versions diffs - 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl - Mend

MindsDB 25.9.2.0a1py3-none-any.whl → 25.9.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (116) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +39 -20
mindsdb/api/a2a/agent.py +7 -9
mindsdb/api/a2a/common/server/server.py +3 -3
mindsdb/api/a2a/common/server/task_manager.py +4 -4
mindsdb/api/a2a/task_manager.py +15 -17
mindsdb/api/common/middleware.py +9 -11
mindsdb/api/executor/command_executor.py +2 -4
mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
mindsdb/api/executor/exceptions.py +29 -10
mindsdb/api/executor/planner/plan_join.py +17 -3
mindsdb/api/executor/sql_query/sql_query.py +74 -74
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
mindsdb/api/executor/utilities/functions.py +6 -6
mindsdb/api/executor/utilities/sql.py +32 -16
mindsdb/api/http/gui.py +5 -11
mindsdb/api/http/initialize.py +8 -10
mindsdb/api/http/namespaces/agents.py +10 -12
mindsdb/api/http/namespaces/analysis.py +13 -20
mindsdb/api/http/namespaces/auth.py +1 -1
mindsdb/api/http/namespaces/config.py +15 -11
mindsdb/api/http/namespaces/databases.py +140 -201
mindsdb/api/http/namespaces/file.py +15 -4
mindsdb/api/http/namespaces/handlers.py +7 -2
mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
mindsdb/api/http/namespaces/models.py +94 -126
mindsdb/api/http/namespaces/projects.py +13 -22
mindsdb/api/http/namespaces/sql.py +33 -25
mindsdb/api/http/namespaces/tab.py +27 -37
mindsdb/api/http/namespaces/views.py +1 -1
mindsdb/api/http/start.py +14 -8
mindsdb/api/mcp/__init__.py +2 -1
mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +13 -1
mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
mindsdb/integrations/libs/api_handler.py +10 -10
mindsdb/integrations/libs/base.py +4 -4
mindsdb/integrations/libs/llm/utils.py +2 -2
mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
mindsdb/integrations/libs/process_cache.py +132 -140
mindsdb/integrations/libs/response.py +18 -12
mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
mindsdb/integrations/utilities/files/file_reader.py +6 -7
mindsdb/integrations/utilities/rag/config_loader.py +37 -26
mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
mindsdb/integrations/utilities/rag/settings.py +58 -133
mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
mindsdb/interfaces/agents/agents_controller.py +2 -1
mindsdb/interfaces/agents/constants.py +0 -2
mindsdb/interfaces/agents/litellm_server.py +34 -58
mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
mindsdb/interfaces/chatbot/polling.py +30 -18
mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
mindsdb/interfaces/database/integrations.py +19 -2
mindsdb/interfaces/file/file_controller.py +6 -6
mindsdb/interfaces/functions/controller.py +1 -1
mindsdb/interfaces/functions/to_markdown.py +2 -2
mindsdb/interfaces/jobs/jobs_controller.py +5 -5
mindsdb/interfaces/jobs/scheduler.py +3 -8
mindsdb/interfaces/knowledge_base/controller.py +50 -23
mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
mindsdb/interfaces/model/model_controller.py +170 -166
mindsdb/interfaces/query_context/context_controller.py +14 -2
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
mindsdb/interfaces/skills/retrieval_tool.py +43 -50
mindsdb/interfaces/skills/skill_tool.py +2 -2
mindsdb/interfaces/skills/sql_agent.py +25 -19
mindsdb/interfaces/storage/fs.py +114 -169
mindsdb/interfaces/storage/json.py +19 -18
mindsdb/interfaces/tabs/tabs_controller.py +49 -72
mindsdb/interfaces/tasks/task_monitor.py +3 -9
mindsdb/interfaces/tasks/task_thread.py +7 -9
mindsdb/interfaces/triggers/trigger_task.py +7 -13
mindsdb/interfaces/triggers/triggers_controller.py +47 -50
mindsdb/migrations/migrate.py +16 -16
mindsdb/utilities/api_status.py +58 -0
mindsdb/utilities/config.py +49 -0
mindsdb/utilities/exception.py +40 -1
mindsdb/utilities/fs.py +0 -1
mindsdb/utilities/hooks/profiling.py +17 -14
mindsdb/utilities/langfuse.py +40 -45
mindsdb/utilities/log.py +272 -0
mindsdb/utilities/ml_task_queue/consumer.py +52 -58
mindsdb/utilities/ml_task_queue/producer.py +26 -30
mindsdb/utilities/render/sqlalchemy_render.py +7 -6
mindsdb/utilities/utils.py +2 -2
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +269 -264
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +115 -115
mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0

mindsdb/utilities/log.py CHANGED Viewed

@@ -144,3 +144,275 @@ def getLogger(name=None):
     """
     initialize_logging()
     return logging.getLogger(name)
+def log_ram_info(logger: logging.Logger) -> None:
+    """Log RAM/memory information to the provided logger.
+    This function logs memory usage information: total, available, used memory in GB and memory
+    usage percentage. The logging only occurs if the logger is enabled for DEBUG level.
+    Args:
+        logger (logging.Logger): The logger instance to use for outputting memory information.
+    """
+    if logger.isEnabledFor(logging.DEBUG) is False:
+        return
+    try:
+        import psutil
+        memory = psutil.virtual_memory()
+        total_memory_gb = memory.total / (1024**3)
+        available_memory_gb = memory.available / (1024**3)
+        used_memory_gb = memory.used / (1024**3)
+        memory_percent = memory.percent
+        logger.debug(
+            f"Memory: {total_memory_gb:.1f}GB total, {available_memory_gb:.1f}GB available, {used_memory_gb:.1f}GB used ({memory_percent:.1f}%)"
+        )
+    except Exception as e:
+        logger.debug(f"Failed to get memory information: {e}")
+def log_system_info(logger: logging.Logger) -> None:
+    """Log detailed system information for debugging purposes.
+    The function only logs system information (if the logger is configured for DEBUG level):
+    - Operating system details (OS type, version, distribution, architecture)
+    - CPU information (processor type, physical and logical core counts)
+    - Memory information (total, available, used memory in GB and percentage)
+    - GPU information (NVIDIA, AMD, Intel graphics cards with memory details)
+    Args:
+        logger (logging.Logger): The logger instance to use for outputting system information.
+                                 Must be configured for DEBUG level to see the output.
+    Returns:
+        None
+    Note:
+        - For Linux systems, attempts to detect distribution via /etc/os-release, /etc/issue, or lsb_release
+        - For Windows systems, uses wmic commands to get detailed OS and GPU information
+        - For macOS systems, uses sw_vers and system_profiler commands
+        - GPU detection supports NVIDIA (via nvidia-smi), AMD (via rocm-smi), and fallback methods
+        - All subprocess calls have timeout protection to prevent hanging
+        - If any system information gathering fails, it logs the error and continues
+    """
+    if logger.isEnabledFor(logging.DEBUG) is False:
+        return
+    try:
+        import os
+        import shutil
+        import psutil
+        import platform
+        import subprocess
+        # region OS information
+        os_system = platform.system()
+        os_release = platform.release()
+        os_machine = platform.machine()
+        os_details = []
+        if os_system == "Linux":
+            # Try to detect Linux distribution
+            distro_info = "Unknown Linux"
+            try:
+                # Check for /etc/os-release (most modern distributions)
+                if os.path.exists("/etc/os-release"):
+                    with open("/etc/os-release", "r") as f:
+                        os_release_data = {}
+                        for line in f:
+                            if "=" in line:
+                                key, value = line.strip().split("=", 1)
+                                os_release_data[key] = value.strip('"')
+                        if "PRETTY_NAME" in os_release_data:
+                            distro_info = os_release_data["PRETTY_NAME"]
+                        elif "NAME" in os_release_data and "VERSION" in os_release_data:
+                            distro_info = f"{os_release_data['NAME']} {os_release_data['VERSION']}"
+                        elif "ID" in os_release_data:
+                            distro_info = os_release_data["ID"].title()
+                # Fallback to /etc/issue
+                elif os.path.exists("/etc/issue"):
+                    with open("/etc/issue", "r") as f:
+                        issue_content = f.read().strip()
+                        if issue_content:
+                            distro_info = issue_content.split("\n")[0]
+                # Fallback to lsb_release
+                else:
+                    try:
+                        result = subprocess.run(["lsb_release", "-d"], capture_output=True, text=True, timeout=2)
+                        if result.returncode == 0:
+                            distro_info = result.stdout.split(":")[-1].strip()
+                    except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+                        pass
+            except Exception:
+                pass
+            os_details.append(f"{distro_info} (kernel {os_release})")
+        elif os_system == "Windows":
+            os_name = "Windows"
+            os_version = "unknown"
+            try:
+                result = subprocess.run(
+                    ["wmic", "os", "get", "Caption,Version", "/format:list"], capture_output=True, text=True, timeout=3
+                )
+                if result.returncode == 0:
+                    windows_info = {}
+                    for line in result.stdout.strip().split("\n"):
+                        if "=" in line:
+                            key, value = line.strip().split("=", 1)
+                            windows_info[key] = value.strip()
+                    if "Caption" in windows_info and "Version" in windows_info:
+                        os_name = windows_info["Caption"]
+                        os_version = windows_info["Version"]
+            except Exception:
+                pass
+            os_details.append(f"{os_name} {os_release} (version {os_version})")
+        elif os_system == "Darwin":  # macOS
+            os_name = "macOS"
+            os_version = "unknown"
+            try:
+                result = subprocess.run(
+                    ["sw_vers", "-productName", "-productVersion"], capture_output=True, text=True, timeout=3
+                )
+                if result.returncode == 0:
+                    lines = result.stdout.strip().split("\n")
+                    if len(lines) >= 2:
+                        os_name = lines[0].strip()
+                        os_version = lines[1].strip()
+            except Exception:
+                pass
+            os_details.append(f"{os_name} {os_release} (version {os_version})")
+        else:
+            os_details.append(f"{os_system} {os_release}")
+        os_details.append(f"({os_machine})")
+        os_info = " ".join(os_details)
+        logger.debug(f"Operating System: {os_info}")
+        # endregion
+        # region CPU information
+        cpu_info = platform.processor()
+        if not cpu_info or cpu_info == "":
+            cpu_info = platform.machine()
+        cpu_count = psutil.cpu_count(logical=False)
+        cpu_count_logical = psutil.cpu_count(logical=True)
+        logger.debug(f"CPU: {cpu_info} ({cpu_count} physical cores, {cpu_count_logical} logical cores)")
+        # endregion
+        # memory information
+        log_ram_info(logger)
+        # region GPU information
+        gpu_info = []
+        try:
+            # Check for NVIDIA GPU (works on Linux, Windows, macOS)
+            nvidia_smi_path = shutil.which("nvidia-smi")
+            if nvidia_smi_path:
+                try:
+                    result = subprocess.run(
+                        [nvidia_smi_path, "--query-gpu=name,memory.total", "--format=csv,noheader,nounits"],
+                        capture_output=True,
+                        text=True,
+                        timeout=3,
+                    )
+                    if result.returncode == 0:
+                        for line in result.stdout.strip().split("\n"):
+                            if line.strip():
+                                parts = line.split(", ")
+                                if len(parts) >= 2:
+                                    gpu_name = parts[0].strip()
+                                    gpu_memory = parts[1].strip()
+                                    gpu_info.append(f"{gpu_name} ({gpu_memory}MB)")
+                except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+                    pass
+            # Check for AMD GPU (rocm-smi on Linux, wmic on Windows)
+            if not gpu_info:  # Only check AMD if no NVIDIA GPU found
+                if platform.system() == "Windows":
+                    # Use wmic on Windows to detect AMD GPU
+                    try:
+                        result = subprocess.run(
+                            ["wmic", "path", "win32_VideoController", "get", "name"],
+                            capture_output=True,
+                            text=True,
+                            timeout=3,
+                        )
+                        if result.returncode == 0:
+                            for line in result.stdout.strip().split("\n"):
+                                line = line.strip()
+                                if line and line != "Name" and "AMD" in line.upper():
+                                    gpu_info.append(line)
+                    except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+                        pass
+                else:
+                    # Use rocm-smi on Linux/macOS
+                    rocm_smi_path = shutil.which("rocm-smi")
+                    if rocm_smi_path:
+                        try:
+                            result = subprocess.run(
+                                [rocm_smi_path, "--showproductname"], capture_output=True, text=True, timeout=3
+                            )
+                            if result.returncode == 0:
+                                for line in result.stdout.strip().split("\n"):
+                                    if "Product Name" in line:
+                                        gpu_name = line.split(":")[-1].strip()
+                                        gpu_info.append(gpu_name)
+                        except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+                            pass
+            # Fallback: Try to detect any GPU using platform-specific methods
+            if not gpu_info:
+                if platform.system() == "Windows":
+                    try:
+                        # Use wmic to get all video controllers
+                        result = subprocess.run(
+                            ["wmic", "path", "win32_VideoController", "get", "name"],
+                            capture_output=True,
+                            text=True,
+                            timeout=3,
+                        )
+                        if result.returncode == 0:
+                            for line in result.stdout.strip().split("\n"):
+                                line = line.strip()
+                                if (
+                                    line
+                                    and line != "Name"
+                                    and any(
+                                        keyword in line.upper()
+                                        for keyword in ["NVIDIA", "AMD", "INTEL", "RADEON", "GEFORCE"]
+                                    )
+                                ):
+                                    gpu_info.append(line)
+                    except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+                        pass
+                elif platform.system() == "Darwin":  # macOS
+                    try:
+                        # Use system_profiler on macOS
+                        result = subprocess.run(
+                            ["system_profiler", "SPDisplaysDataType"], capture_output=True, text=True, timeout=3
+                        )
+                        if result.returncode == 0:
+                            for line in result.stdout.strip().split("\n"):
+                                if "Chipset Model:" in line:
+                                    gpu_name = line.split(":")[-1].strip()
+                                    gpu_info.append(gpu_name)
+                    except (subprocess.TimeoutExpired, FileNotFoundError, OSError):
+                        pass
+        except Exception:
+            pass
+        if gpu_info:
+            logger.debug(f"GPU: {', '.join(gpu_info)}")
+        else:
+            logger.debug("GPU: Not detected or not supported")
+        # endregion
+    except Exception as e:
+        logger.debug(f"Failed to get system information: {e}")

mindsdb/utilities/ml_task_queue/consumer.py CHANGED Viewed

@@ -24,7 +24,7 @@ from mindsdb.utilities.ml_task_queue.const import (
     ML_TASK_STATUS,
     TASKS_STREAM_NAME,
     TASKS_STREAM_CONSUMER_NAME,
-    TASKS_STREAM_CONSUMER_GROUP_NAME
+    TASKS_STREAM_CONSUMER_GROUP_NAME,
 )
 from mindsdb.utilities import log
 from mindsdb.utilities.sentry import sentry_sdk  # noqa: F401
@@ -33,9 +33,10 @@ logger = log.getLogger(__name__)
 def _save_thread_link(func: Callable) -> Callable:
-    """ Decorator for MLTaskConsumer.
-        Save thread in which func is executed to a list.
+    """Decorator for MLTaskConsumer.
+    Save thread in which func is executed to a list.
     """
     @wraps(func)
     def wrapper(self, *args, **kwargs) -> None:
         current_thread = threading.current_thread()
@@ -45,22 +46,23 @@ def _save_thread_link(func: Callable) -> Callable:
         finally:
             self._listen_message_threads.remove(current_thread)
         return result
     return wrapper
 class MLTaskConsumer(BaseRedisQueue):
-    """ Listener of ML tasks queue and tasks executioner.
-        Each new message waited and executed in separate thread.
-        Attributes:
-            _ready_event (Event): set if ready to start new queue listen thread
-            _stop_event (Event): set if need to stop all threads/processes
-            cpu_stat (list[float]): CPU usage statistic. Each value is 0-100 float representing CPU usage in %
-            _collect_cpu_stat_thread (Thread): pointer to thread that collecting CPU usage statistic
-            _listen_message_threads (list[Thread]): list of pointers to threads where queue messages are listening/processing
-            db (Redis): database object
-            cache: redis cache abstrtaction
-            consumer_group: redis consumer group object
+    """Listener of ML tasks queue and tasks executioner.
+    Each new message waited and executed in separate thread.
+    Attributes:
+        _ready_event (Event): set if ready to start new queue listen thread
+        _stop_event (Event): set if need to stop all threads/processes
+        cpu_stat (list[float]): CPU usage statistic. Each value is 0-100 float representing CPU usage in %
+        _collect_cpu_stat_thread (Thread): pointer to thread that collecting CPU usage statistic
+        _listen_message_threads (list[Thread]): list of pointers to threads where queue messages are listening/processing
+        db (Redis): database object
+        cache: redis cache abstrtaction
+        consumer_group: redis consumer group object
     """
     def __init__(self) -> None:
@@ -75,7 +77,7 @@ class MLTaskConsumer(BaseRedisQueue):
         # region collect cpu usage statistic
         self.cpu_stat = [0] * 10
         self._collect_cpu_stat_thread = threading.Thread(
-            target=self._collect_cpu_stat, name='MLTaskConsumer._collect_cpu_stat'
+            target=self._collect_cpu_stat, name="MLTaskConsumer._collect_cpu_stat"
         )
         self._collect_cpu_stat_thread.start()
         # endregion
@@ -83,14 +85,14 @@ class MLTaskConsumer(BaseRedisQueue):
         self._listen_message_threads = []
         # region connect to redis
-        config = Config().get('ml_task_queue', {})
+        config = Config().get("ml_task_queue", {})
         self.db = Database(
-            host=config.get('host', 'localhost'),
-            port=config.get('port', 6379),
-            db=config.get('db', 0),
-            username=config.get('username'),
-            password=config.get('password'),
-            protocol=3
+            host=config.get("host", "localhost"),
+            port=config.get("port", 6379),
+            db=config.get("db", 0),
+            username=config.get("username"),
+            password=config.get("password"),
+            protocol=3,
         )
         self.wait_redis_ping(60)
@@ -102,30 +104,29 @@ class MLTaskConsumer(BaseRedisQueue):
         # endregion
     def _collect_cpu_stat(self) -> None:
-        """ Collect CPU usage statistic. Executerd in thread.
-        """
+        """Collect CPU usage statistic. Executerd in thread."""
         while self._stop_event.is_set() is False:
             self.cpu_stat = self.cpu_stat[1:]
             self.cpu_stat.append(psutil.cpu_percent())
             time.sleep(1)
     def get_avg_cpu_usage(self) -> float:
-        """ get average CPU usage for last period (10s by default)
+        """get average CPU usage for last period (10s by default)
-            Returns:
-                float: 0-100 value, average CPU usage
+        Returns:
+            float: 0-100 value, average CPU usage
         """
         return sum(self.cpu_stat) / len(self.cpu_stat)
     def wait_free_resources(self) -> None:
-        """ Sleep in thread untill there are free resources. Checks:
-            - avg CPU usage is less than 60%
-            - current CPU usage is less than 60%
-            - current tasks count is less than (N CPU cores) / 8
+        """Sleep in thread untill there are free resources. Checks:
+        - avg CPU usage is less than 60%
+        - current CPU usage is less than 60%
+        - current tasks count is less than (N CPU cores) / 8
         """
         config = Config()
-        is_cloud = config.get('cloud', False)
-        processes_dir = Path(tempfile.gettempdir()).joinpath('mindsdb/processes/learn/')
+        is_cloud = config.get("cloud", False)
+        processes_dir = Path(tempfile.gettempdir()).joinpath("mindsdb/processes/learn/")
         while True:
             while self.get_avg_cpu_usage() > 60 or max(self.cpu_stat[-3:]) > 60:
                 time.sleep(1)
@@ -139,8 +140,7 @@ class MLTaskConsumer(BaseRedisQueue):
     @_save_thread_link
     def _listen(self) -> None:
-        """ Listen message queue untill get new message. Execute task.
-        """
+        """Listen message queue untill get new message. Execute task."""
         message = None
         while message is None:
             self.wait_free_resources()
@@ -150,8 +150,8 @@ class MLTaskConsumer(BaseRedisQueue):
             try:
                 message = self.consumer_group.read(count=1, block=1000, consumer=TASKS_STREAM_CONSUMER_NAME)
-            except RedisConnectionError as e:
-                logger.error(f"Can't connect to Redis: {e}")
+            except RedisConnectionError:
+                logger.exception("Can't connect to Redis:")
                 self._stop_event.set()
                 return
             except Exception:
@@ -168,13 +168,13 @@ class MLTaskConsumer(BaseRedisQueue):
             self.consumer_group.streams[TASKS_STREAM_NAME].ack(message_id)
             self.consumer_group.streams[TASKS_STREAM_NAME].delete(message_id)
-            payload = from_bytes(message_content[b'payload'])
-            task_type = ML_TASK_TYPE(message_content[b'task_type'])
-            model_id = int(message_content[b'model_id'])
-            company_id = message_content[b'company_id']
+            payload = from_bytes(message_content[b"payload"])
+            task_type = ML_TASK_TYPE(message_content[b"task_type"])
+            model_id = int(message_content[b"model_id"])
+            company_id = message_content[b"company_id"]
             if len(company_id) == 0:
                 company_id = None
-            redis_key = RedisKey(message_content.get(b'redis_key'))
+            redis_key = RedisKey(message_content.get(b"redis_key"))
             # region read dataframe
             dataframe_bytes = self.cache.get(redis_key.dataframe)
@@ -184,16 +184,13 @@ class MLTaskConsumer(BaseRedisQueue):
                 self.cache.delete(redis_key.dataframe)
             # endregion
-            ctx.load(payload['context'])
+            ctx.load(payload["context"])
         finally:
             self._ready_event.set()
         try:
             task = process_cache.apply_async(
-                task_type=task_type,
-                model_id=model_id,
-                payload=payload,
-                dataframe=dataframe
+                task_type=task_type, model_id=model_id, payload=payload, dataframe=dataframe
             )
             status_notifier = StatusNotifier(redis_key, ML_TASK_STATUS.PROCESSING, self.db, self.cache)
             status_notifier.start()
@@ -215,20 +212,18 @@ class MLTaskConsumer(BaseRedisQueue):
             self.cache.set(redis_key.status, ML_TASK_STATUS.COMPLETE.value, 180)
     def run(self) -> None:
-        """ Start new listen thread each time when _ready_event is set
-        """
+        """Start new listen thread each time when _ready_event is set"""
         self._ready_event.set()
         while self._stop_event.is_set() is False:
             self._ready_event.wait(timeout=1)
             if self._ready_event.is_set() is False:
                 continue
             self._ready_event.clear()
-            threading.Thread(target=self._listen, name='MLTaskConsumer._listen').start()
+            threading.Thread(target=self._listen, name="MLTaskConsumer._listen").start()
         self.stop()
     def stop(self) -> None:
-        """ Stop all executing threads
-        """
+        """Stop all executing threads"""
         self._stop_event.set()
         for thread in (*self._listen_message_threads, self._collect_cpu_stat_thread):
             try:
@@ -238,17 +233,16 @@ class MLTaskConsumer(BaseRedisQueue):
                 pass
-@mark_process(name='internal', custom_mark='ml_task_consumer')
+@mark_process(name="internal", custom_mark="ml_task_consumer")
 def start(verbose: bool) -> None:
-    """ Create task queue consumer and start listen the queue
-    """
+    """Create task queue consumer and start listen the queue"""
     consumer = MLTaskConsumer()
     signal.signal(signal.SIGTERM, lambda _x, _y: consumer.stop())
     try:
         consumer.run()
     except Exception as e:
         consumer.stop()
-        logger.error(f'Got exception: {e}', flush=True)
+        logger.error(f"Got exception: {e}", flush=True)
         raise
     finally:
-        logger.info('Consumer process stopped', flush=True)
+        logger.info("Consumer process stopped", flush=True)

mindsdb/utilities/ml_task_queue/producer.py CHANGED Viewed

@@ -8,11 +8,7 @@ from mindsdb.utilities.config import Config
 from mindsdb.utilities.ml_task_queue.utils import RedisKey, to_bytes
 from mindsdb.utilities.ml_task_queue.task import Task
 from mindsdb.utilities.ml_task_queue.base import BaseRedisQueue
-from mindsdb.utilities.ml_task_queue.const import (
-    TASKS_STREAM_NAME,
-    ML_TASK_TYPE,
-    ML_TASK_STATUS
-)
+from mindsdb.utilities.ml_task_queue.const import TASKS_STREAM_NAME, ML_TASK_TYPE, ML_TASK_STATUS
 from mindsdb.utilities import log
 from mindsdb.utilities.sentry import sentry_sdk  # noqa: F401
@@ -20,25 +16,25 @@ logger = log.getLogger(__name__)
 class MLTaskProducer(BaseRedisQueue):
-    """ Interface around the redis for putting tasks to the queue
+    """Interface around the redis for putting tasks to the queue
-        Attributes:
-            db (Redis): database object
-            stream
-            cache
-            pubsub
+    Attributes:
+        db (Redis): database object
+        stream
+        cache
+        pubsub
     """
     def __init__(self) -> None:
-        config = Config().get('ml_task_queue', {})
+        config = Config().get("ml_task_queue", {})
         self.db = Database(
-            host=config.get('host', 'localhost'),
-            port=config.get('port', 6379),
-            db=config.get('db', 0),
-            username=config.get('username'),
-            password=config.get('password'),
-            protocol=3
+            host=config.get("host", "localhost"),
+            port=config.get("port", 6379),
+            db=config.get("db", 0),
+            username=config.get("username"),
+            password=config.get("password"),
+            protocol=3,
         )
         self.wait_redis_ping(60)
@@ -47,26 +43,26 @@ class MLTaskProducer(BaseRedisQueue):
         self.pubsub = self.db.pubsub()
     def apply_async(self, task_type: ML_TASK_TYPE, model_id: int, payload: dict, dataframe: DataFrame = None) -> Task:
-        ''' Add tasks to the queue
+        """Add tasks to the queue
-            Args:
-                task_type (ML_TASK_TYPE): type of the task
-                model_id (int): model identifier
-                payload (dict): lightweight model data that will be added to stream message
-                dataframe (DataFrame): dataframe will be transfered via regular redis storage
+        Args:
+            task_type (ML_TASK_TYPE): type of the task
+            model_id (int): model identifier
+            payload (dict): lightweight model data that will be added to stream message
+            dataframe (DataFrame): dataframe will be transfered via regular redis storage
-            Returns:
-                Task: object representing the task
-        '''
+        Returns:
+            Task: object representing the task
+        """
         try:
             payload = pickle.dumps(payload, protocol=5)
             redis_key = RedisKey.new()
             message = {
                 "task_type": task_type.value,
-                "company_id": '' if ctx.company_id is None else ctx.company_id,     # None can not be dumped
+                "company_id": "" if ctx.company_id is None else ctx.company_id,  # None can not be dumped
                 "model_id": model_id,
                 "payload": payload,
-                "redis_key": redis_key.base
+                "redis_key": redis_key.base,
             }
             self.wait_redis_ping()
@@ -77,5 +73,5 @@ class MLTaskProducer(BaseRedisQueue):
             self.stream.add(message)
             return Task(self.db, redis_key)
         except ConnectionError:
-            logger.error('Cant send message to redis: connect failed')
+            logger.exception("Cant send message to redis: connect failed")
             raise

mindsdb/utilities/render/sqlalchemy_render.py CHANGED Viewed

@@ -383,7 +383,7 @@ class SqlalchemyRender:
         elif isinstance(t, ast.Parameter):
             col = sa.column(t.value, is_literal=True)
             if t.alias:
-                raise RenderError()
+                raise RenderError("Parameter aliases are not supported in the renderer")
         elif isinstance(t, ast.Tuple):
             col = [self.to_expression(i) for i in t.items]
         elif isinstance(t, ast.Variable):
@@ -574,17 +574,18 @@ class SqlalchemyRender:
                         else:
                             condition = self.to_expression(item["condition"])
-                        if "ASOF" in join_type:
+                        if "ASOF" in join_type or "RIGHT" in join_type:
                             raise NotImplementedError(f"Unsupported join type: {join_type}")
-                        method = "join"
                         is_full = False
-                        if join_type == "LEFT JOIN":
-                            method = "outerjoin"
+                        is_outer = False
+                        if join_type in ("LEFT JOIN", "LEFT OUTER JOIN"):
+                            is_outer = True
                         if join_type == "FULL JOIN":
                             is_full = True
                         # perform join
-                        query = getattr(query, method)(table, condition, full=is_full)
+                        query = query.join(table, condition, isouter=is_outer, full=is_full)
             elif isinstance(from_table, (ast.Union, ast.Intersect, ast.Except)):
                 alias = None
                 if from_table.alias:

mindsdb/utilities/utils.py CHANGED Viewed

@@ -22,13 +22,13 @@ def parse_csv_attributes(csv_attributes: typing.Optional[str] = "") -> typing.Di
         for row in reader:
             for pair in row:
                 # Match key=value pattern
-                match = re.match(r'^\s*([^=]+?)\s*=\s*(.+?)\s*$', pair)
+                match = re.match(r"^\s*([^=]+?)\s*=\s*(.+?)\s*$", pair)
                 if match:
                     key, value = match.groups()
                     attributes[key.strip()] = value.strip()
                 else:
                     raise ValueError(f"Invalid attribute format: {pair}")
     except Exception as e:
-        raise ValueError(f"Failed to parse csv_attributes='{csv_attributes}': {e}")
+        raise ValueError(f"Failed to parse csv_attributes='{csv_attributes}': {e}") from e
     return attributes

MindsDB 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl

Potentially problematic release.

MindsDB 25.9.2.0a1py3-none-any.whl → 25.9.3rc1py3-none-any.whl