PyPI - MindsDB - Versions diffs - 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl - Mend

MindsDB 25.9.2.0a1py3-none-any.whl → 25.9.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (116) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +39 -20
mindsdb/api/a2a/agent.py +7 -9
mindsdb/api/a2a/common/server/server.py +3 -3
mindsdb/api/a2a/common/server/task_manager.py +4 -4
mindsdb/api/a2a/task_manager.py +15 -17
mindsdb/api/common/middleware.py +9 -11
mindsdb/api/executor/command_executor.py +2 -4
mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
mindsdb/api/executor/exceptions.py +29 -10
mindsdb/api/executor/planner/plan_join.py +17 -3
mindsdb/api/executor/sql_query/sql_query.py +74 -74
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
mindsdb/api/executor/utilities/functions.py +6 -6
mindsdb/api/executor/utilities/sql.py +32 -16
mindsdb/api/http/gui.py +5 -11
mindsdb/api/http/initialize.py +8 -10
mindsdb/api/http/namespaces/agents.py +10 -12
mindsdb/api/http/namespaces/analysis.py +13 -20
mindsdb/api/http/namespaces/auth.py +1 -1
mindsdb/api/http/namespaces/config.py +15 -11
mindsdb/api/http/namespaces/databases.py +140 -201
mindsdb/api/http/namespaces/file.py +15 -4
mindsdb/api/http/namespaces/handlers.py +7 -2
mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
mindsdb/api/http/namespaces/models.py +94 -126
mindsdb/api/http/namespaces/projects.py +13 -22
mindsdb/api/http/namespaces/sql.py +33 -25
mindsdb/api/http/namespaces/tab.py +27 -37
mindsdb/api/http/namespaces/views.py +1 -1
mindsdb/api/http/start.py +14 -8
mindsdb/api/mcp/__init__.py +2 -1
mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +13 -1
mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
mindsdb/integrations/libs/api_handler.py +10 -10
mindsdb/integrations/libs/base.py +4 -4
mindsdb/integrations/libs/llm/utils.py +2 -2
mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
mindsdb/integrations/libs/process_cache.py +132 -140
mindsdb/integrations/libs/response.py +18 -12
mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
mindsdb/integrations/utilities/files/file_reader.py +6 -7
mindsdb/integrations/utilities/rag/config_loader.py +37 -26
mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
mindsdb/integrations/utilities/rag/settings.py +58 -133
mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
mindsdb/interfaces/agents/agents_controller.py +2 -1
mindsdb/interfaces/agents/constants.py +0 -2
mindsdb/interfaces/agents/litellm_server.py +34 -58
mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
mindsdb/interfaces/chatbot/polling.py +30 -18
mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
mindsdb/interfaces/database/integrations.py +19 -2
mindsdb/interfaces/file/file_controller.py +6 -6
mindsdb/interfaces/functions/controller.py +1 -1
mindsdb/interfaces/functions/to_markdown.py +2 -2
mindsdb/interfaces/jobs/jobs_controller.py +5 -5
mindsdb/interfaces/jobs/scheduler.py +3 -8
mindsdb/interfaces/knowledge_base/controller.py +50 -23
mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
mindsdb/interfaces/model/model_controller.py +170 -166
mindsdb/interfaces/query_context/context_controller.py +14 -2
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
mindsdb/interfaces/skills/retrieval_tool.py +43 -50
mindsdb/interfaces/skills/skill_tool.py +2 -2
mindsdb/interfaces/skills/sql_agent.py +25 -19
mindsdb/interfaces/storage/fs.py +114 -169
mindsdb/interfaces/storage/json.py +19 -18
mindsdb/interfaces/tabs/tabs_controller.py +49 -72
mindsdb/interfaces/tasks/task_monitor.py +3 -9
mindsdb/interfaces/tasks/task_thread.py +7 -9
mindsdb/interfaces/triggers/trigger_task.py +7 -13
mindsdb/interfaces/triggers/triggers_controller.py +47 -50
mindsdb/migrations/migrate.py +16 -16
mindsdb/utilities/api_status.py +58 -0
mindsdb/utilities/config.py +49 -0
mindsdb/utilities/exception.py +40 -1
mindsdb/utilities/fs.py +0 -1
mindsdb/utilities/hooks/profiling.py +17 -14
mindsdb/utilities/langfuse.py +40 -45
mindsdb/utilities/log.py +272 -0
mindsdb/utilities/ml_task_queue/consumer.py +52 -58
mindsdb/utilities/ml_task_queue/producer.py +26 -30
mindsdb/utilities/render/sqlalchemy_render.py +7 -6
mindsdb/utilities/utils.py +2 -2
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +269 -264
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +115 -115
mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.9.2.0a1.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0

mindsdb/integrations/libs/process_cache.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import time
 import threading
+import traceback
 from typing import Optional, Callable
 from concurrent.futures import ProcessPoolExecutor, Future
@@ -17,7 +18,7 @@ from mindsdb.integrations.libs.ml_handler_process import (
     create_engine_process,
     update_engine_process,
     create_validation_process,
-    func_call_process
+    func_call_process,
 )
@@ -44,11 +45,13 @@ class MLProcessException(Exception):
     If exception can not be pickled (pickle.loads(pickle.dumps(e))) then it may lead to termination of the ML process.
     Also in this case, the error sent to the user will not be relevant. This wrapper should prevent it.
     """
     base_exception_bytes: bytes = None
     def __init__(self, base_exception: Exception, message: str = None) -> None:
         super().__init__(message)
-        self.message = f'{base_exception.__class__.__name__}: {base_exception}'
+        traceback_text = "\n".join(traceback.format_exception(base_exception))
+        self.message = f"{base_exception.__class__.__name__}: {base_exception}\n{traceback_text}"
     @property
     def base_exception(self) -> Exception:
@@ -56,18 +59,19 @@ class MLProcessException(Exception):
 class WarmProcess:
-    """ Class-wrapper for a process that persist for a long time. The process
-        may be initialized with any handler requirements. Current implimentation
-        is based on ProcessPoolExecutor just because of multiprocessing.pool
-        produce daemon processes, which can not be used for learning. That
-        bahaviour may be changed only using inheritance.
+    """Class-wrapper for a process that persist for a long time. The process
+    may be initialized with any handler requirements. Current implimentation
+    is based on ProcessPoolExecutor just because of multiprocessing.pool
+    produce daemon processes, which can not be used for learning. That
+    bahaviour may be changed only using inheritance.
     """
     def __init__(self, initializer: Optional[Callable] = None, initargs: tuple = ()):
-        """ create and init new process
+        """create and init new process
-            Args:
-                initializer (Callable): the same as ProcessPoolExecutor initializer
-                initargs (tuple): the same as ProcessPoolExecutor initargs
+        Args:
+            initializer (Callable): the same as ProcessPoolExecutor initializer
+            initargs (tuple): the same as ProcessPoolExecutor initargs
         """
         self.pool = ProcessPoolExecutor(1, initializer=initializer, initargs=initargs)
         self.last_usage_at = time.time()
@@ -91,18 +95,17 @@ class WarmProcess:
         self.pool.shutdown(wait=wait)
     def _init_done_callback(self, _task):
-        """ callback for initial task
-        """
+        """callback for initial task"""
         self._init_done = True
     def _update_last_usage_at_callback(self, _task):
         self.last_usage_at = time.time()
     def ready(self) -> bool:
-        """ check is process ready to get a task or not
+        """check is process ready to get a task or not
-            Returns:
-                bool
+        Returns:
+            bool
         """
         if self._init_done is False:
             self.task.result()
@@ -112,51 +115,49 @@ class WarmProcess:
         return False
     def add_marker(self, marker: tuple):
-        """ remember that that process processed task for that model
+        """remember that that process processed task for that model
-            Args:
-                marker (tuple): identifier of model
+        Args:
+            marker (tuple): identifier of model
         """
         if marker is not None:
             self._markers.add(marker)
     def has_marker(self, marker: tuple) -> bool:
-        """ check if that process processed task for model
+        """check if that process processed task for model
-            Args:
-                marker (tuple): identifier of model
+        Args:
+            marker (tuple): identifier of model
-            Returns:
-                bool
+        Returns:
+            bool
         """
         if marker is None:
             return False
         return marker in self._markers
     def is_marked(self) -> bool:
-        """ check if process has any marker
+        """check if process has any marker
-            Returns:
-                bool
+        Returns:
+            bool
         """
         return len(self._markers) > 0
     def apply_async(self, func: Callable, *args: tuple, **kwargs: dict) -> Future:
-        """ Run new task
+        """Run new task
-            Args:
-                func (Callable): function to run
-                args (tuple): args to be passed to function
-                kwargs (dict): kwargs to be passed to function
+        Args:
+            func (Callable): function to run
+            args (tuple): args to be passed to function
+            kwargs (dict): kwargs to be passed to function
-            Returns:
-                Future
+        Returns:
+            Future
         """
         if not self.ready():
-            raise Exception('Process task is not ready')
-        self.task = self.pool.submit(
-            func, *args, **kwargs
-        )
+            raise Exception("Process task is not ready")
+        self.task = self.pool.submit(func, *args, **kwargs)
         self.task.add_done_callback(self._update_last_usage_at_callback)
         self.last_usage_at = time.time()
         return self.task
@@ -173,11 +174,11 @@ def warm_function(func, context: str, *args, **kwargs):
 class ProcessCache:
-    """ simple cache for WarmProcess-es
-    """
+    """simple cache for WarmProcess-es"""
     def __init__(self, ttl: int = 120):
-        """ Args:
-            ttl (int) time to live for unused process
+        """Args:
+        ttl (int) time to live for unused process
         """
         self.cache = {}
         self._init = False
@@ -191,42 +192,37 @@ class ProcessCache:
         self._stop_clean()
     def _start_clean(self) -> None:
-        """ start worker that close connections after ttl expired
-        """
-        if (
-            isinstance(self.cleaner_thread, threading.Thread)
-            and self.cleaner_thread.is_alive()
-        ):
+        """start worker that close connections after ttl expired"""
+        if isinstance(self.cleaner_thread, threading.Thread) and self.cleaner_thread.is_alive():
             return
         self._stop_event.clear()
-        self.cleaner_thread = threading.Thread(target=self._clean, name='ProcessCache.clean')
+        self.cleaner_thread = threading.Thread(target=self._clean, name="ProcessCache.clean")
         self.cleaner_thread.daemon = True
         self.cleaner_thread.start()
     def _stop_clean(self) -> None:
-        """ stop clean worker
-        """
+        """stop clean worker"""
         self._stop_event.set()
     def init(self):
-        """ run processes for specified handlers
-        """
+        """run processes for specified handlers"""
         from mindsdb.interfaces.database.integrations import integration_controller
         preload_handlers = {}
         config = Config()
-        is_cloud = config.get('cloud', False) # noqa
+        is_cloud = config.get("cloud", False)  # noqa
-        if config['ml_task_queue']['type'] != 'redis':
+        if config["ml_task_queue"]["type"] != "redis":
             if is_cloud:
-                lightwood_handler = integration_controller.get_handler_module('lightwood')
+                lightwood_handler = integration_controller.get_handler_module("lightwood")
                 if lightwood_handler is not None and lightwood_handler.Handler is not None:
                     preload_handlers[lightwood_handler.Handler] = 4 if is_cloud else 1
-                huggingface_handler = integration_controller.get_handler_module('huggingface')
+                huggingface_handler = integration_controller.get_handler_module("huggingface")
                 if huggingface_handler is not None and huggingface_handler.Handler is not None:
                     preload_handlers[huggingface_handler.Handler] = 1
-                openai_handler = integration_controller.get_handler_module('openai')
+                openai_handler = integration_controller.get_handler_module("openai")
                 if openai_handler is not None and openai_handler.Handler is not None:
                     preload_handlers[openai_handler.Handler] = 1
@@ -236,146 +232,144 @@ class ProcessCache:
                 for handler in preload_handlers:
                     self._keep_alive[handler.name] = preload_handlers[handler]
                     self.cache[handler.name] = {
-                        'last_usage_at': time.time(),
-                        'handler_module': handler.__module__,
-                        'processes': [
+                        "last_usage_at": time.time(),
+                        "handler_module": handler.__module__,
+                        "processes": [
                             WarmProcess(init_ml_handler, (handler.__module__,))
                             for _x in range(preload_handlers[handler])
-                        ]
+                        ],
                     }
-    def apply_async(self, task_type: ML_TASK_TYPE, model_id: Optional[int],
-                    payload: dict, dataframe: Optional[DataFrame] = None) -> Future:
-        """ run new task. If possible - do it in existing process, if not - start new one.
+    def apply_async(
+        self, task_type: ML_TASK_TYPE, model_id: Optional[int], payload: dict, dataframe: Optional[DataFrame] = None
+    ) -> Future:
+        """run new task. If possible - do it in existing process, if not - start new one.
-            Args:
-                task_type (ML_TASK_TYPE): type of the task (learn, predict, etc)
-                model_id (int): id of the model
-                payload (dict): any 'lightweight' data that needs to be send in the process
-                dataframe (DataFrame): DataFrame to be send in the process
+        Args:
+            task_type (ML_TASK_TYPE): type of the task (learn, predict, etc)
+            model_id (int): id of the model
+            payload (dict): any 'lightweight' data that needs to be send in the process
+            dataframe (DataFrame): DataFrame to be send in the process
-            Returns:
-                Future
+        Returns:
+            Future
         """
         self._start_clean()
-        handler_module_path = payload['handler_meta']['module_path']
-        integration_id = payload['handler_meta']['integration_id']
+        handler_module_path = payload["handler_meta"]["module_path"]
+        integration_id = payload["handler_meta"]["integration_id"]
         if task_type in (ML_TASK_TYPE.LEARN, ML_TASK_TYPE.FINETUNE):
             func = learn_process
             kwargs = {
-                'data_integration_ref': payload['data_integration_ref'],
-                'problem_definition': payload['problem_definition'],
-                'fetch_data_query': payload['fetch_data_query'],
-                'project_name': payload['project_name'],
-                'model_id': model_id,
-                'base_model_id': payload.get('base_model_id'),
-                'set_active': payload['set_active'],
-                'integration_id': integration_id,
-                'module_path': handler_module_path
+                "data_integration_ref": payload["data_integration_ref"],
+                "problem_definition": payload["problem_definition"],
+                "fetch_data_query": payload["fetch_data_query"],
+                "project_name": payload["project_name"],
+                "model_id": model_id,
+                "base_model_id": payload.get("base_model_id"),
+                "set_active": payload["set_active"],
+                "integration_id": integration_id,
+                "module_path": handler_module_path,
             }
         elif task_type == ML_TASK_TYPE.PREDICT:
             func = predict_process
             kwargs = {
-                'predictor_record': payload['predictor_record'],
-                'ml_engine_name': payload['handler_meta']['engine'],
-                'args': payload['args'],
-                'dataframe': dataframe,
-                'integration_id': integration_id,
-                'module_path': handler_module_path
+                "predictor_record": payload["predictor_record"],
+                "ml_engine_name": payload["handler_meta"]["engine"],
+                "args": payload["args"],
+                "dataframe": dataframe,
+                "integration_id": integration_id,
+                "module_path": handler_module_path,
             }
         elif task_type == ML_TASK_TYPE.DESCRIBE:
             func = describe_process
             kwargs = {
-                'attribute': payload.get('attribute'),
-                'model_id': model_id,
-                'integration_id': integration_id,
-                'module_path': handler_module_path
+                "attribute": payload.get("attribute"),
+                "model_id": model_id,
+                "integration_id": integration_id,
+                "module_path": handler_module_path,
             }
         elif task_type == ML_TASK_TYPE.CREATE_VALIDATION:
             func = create_validation_process
             kwargs = {
-                'target': payload.get('target'),
-                'args': payload.get('args'),
-                'integration_id': integration_id,
-                'module_path': handler_module_path
+                "target": payload.get("target"),
+                "args": payload.get("args"),
+                "integration_id": integration_id,
+                "module_path": handler_module_path,
             }
         elif task_type == ML_TASK_TYPE.CREATE_ENGINE:
             func = create_engine_process
             kwargs = {
-                'connection_args': payload['connection_args'],
-                'integration_id': integration_id,
-                'module_path': handler_module_path
+                "connection_args": payload["connection_args"],
+                "integration_id": integration_id,
+                "module_path": handler_module_path,
             }
         elif task_type == ML_TASK_TYPE.UPDATE_ENGINE:
             func = update_engine_process
             kwargs = {
-                'connection_args': payload['connection_args'],
-                'integration_id': integration_id,
-                'module_path': handler_module_path
+                "connection_args": payload["connection_args"],
+                "integration_id": integration_id,
+                "module_path": handler_module_path,
             }
         elif task_type == ML_TASK_TYPE.UPDATE:
             func = update_process
             kwargs = {
-                'args': payload['args'],
-                'integration_id': integration_id,
-                'model_id': model_id,
-                'module_path': handler_module_path
+                "args": payload["args"],
+                "integration_id": integration_id,
+                "model_id": model_id,
+                "module_path": handler_module_path,
             }
         elif task_type == ML_TASK_TYPE.FUNC_CALL:
             func = func_call_process
             kwargs = {
-                'name': payload['name'],
-                'args': payload['args'],
-                'integration_id': integration_id,
-                'module_path': handler_module_path
+                "name": payload["name"],
+                "args": payload["args"],
+                "integration_id": integration_id,
+                "module_path": handler_module_path,
             }
         else:
-            raise Exception(f'Unknown ML task type: {task_type}')
+            raise Exception(f"Unknown ML task type: {task_type}")
-        ml_engine_name = payload['handler_meta']['engine']
-        model_marker = (model_id, payload['context']['company_id'])
+        ml_engine_name = payload["handler_meta"]["engine"]
+        model_marker = (model_id, payload["context"]["company_id"])
         with self._lock:
             if ml_engine_name not in self.cache:
                 warm_process = WarmProcess(init_ml_handler, (handler_module_path,))
                 self.cache[ml_engine_name] = {
-                    'last_usage_at': None,
-                    'handler_module': handler_module_path,
-                    'processes': [warm_process]
+                    "last_usage_at": None,
+                    "handler_module": handler_module_path,
+                    "processes": [warm_process],
                 }
             else:
                 warm_process = None
                 if model_marker is not None:
                     try:
                         warm_process = next(
-                            p for p in self.cache[ml_engine_name]['processes']
+                            p
+                            for p in self.cache[ml_engine_name]["processes"]
                             if p.ready() and p.has_marker(model_marker)
                         )
                     except StopIteration:
                         pass
                 if warm_process is None:
                     try:
-                        warm_process = next(
-                            p for p in self.cache[ml_engine_name]['processes']
-                            if p.ready()
-                        )
+                        warm_process = next(p for p in self.cache[ml_engine_name]["processes"] if p.ready())
                     except StopIteration:
                         pass
                 if warm_process is None:
                     warm_process = WarmProcess(init_ml_handler, (handler_module_path,))
-                    self.cache[ml_engine_name]['processes'].append(warm_process)
+                    self.cache[ml_engine_name]["processes"].append(warm_process)
-            task = warm_process.apply_async(warm_function, func, payload['context'], **kwargs)
-            self.cache[ml_engine_name]['last_usage_at'] = time.time()
+            task = warm_process.apply_async(warm_function, func, payload["context"], **kwargs)
+            self.cache[ml_engine_name]["last_usage_at"] = time.time()
             warm_process.add_marker(model_marker)
         return task
     def _clean(self) -> None:
-        """ worker that stop unused processes
-        """
+        """worker that stop unused processes"""
         while self._stop_event.wait(timeout=10) is False:
             with self._lock:
                 for handler_name in self.cache.keys():
-                    processes = self.cache[handler_name]['processes']
+                    processes = self.cache[handler_name]["processes"]
                     processes.sort(key=lambda x: x.is_marked())
                     expected_count = 0
@@ -395,9 +389,7 @@ class ProcessCache:
                             break
                     while expected_count > len(processes):
-                        processes.append(
-                            WarmProcess(init_ml_handler, (self.cache[handler_name]['handler_module'],))
-                        )
+                        processes.append(WarmProcess(init_ml_handler, (self.cache[handler_name]["handler_module"],)))
     def shutdown(self, wait: bool = True) -> None:
         """Call 'shutdown' for each process cache
@@ -406,25 +398,25 @@ class ProcessCache:
         """
         with self._lock:
             for handler_name in self.cache:
-                for process in self.cache[handler_name]['processes']:
+                for process in self.cache[handler_name]["processes"]:
                     process.shutdown(wait=wait)
-                self.cache[handler_name]['processes'] = []
+                self.cache[handler_name]["processes"] = []
     def remove_processes_for_handler(self, handler_name: str) -> None:
         """
-            Remove all warm processes for a given handler.
-            This is useful when the previous processes use an outdated instance of the handler.
-            A good example is when the dependencies for a handler are installed after attempting to use the handler.
+        Remove all warm processes for a given handler.
+        This is useful when the previous processes use an outdated instance of the handler.
+        A good example is when the dependencies for a handler are installed after attempting to use the handler.
-            Args:
-                handler_name (str): name of the handler.
+        Args:
+            handler_name (str): name of the handler.
         """
         with self._lock:
             if handler_name in self.cache:
-                for process in self.cache[handler_name]['processes']:
+                for process in self.cache[handler_name]["processes"]:
                     process.shutdown()
-                self.cache[handler_name]['processes'] = []
+                self.cache[handler_name]["processes"] = []
 process_cache = ProcessCache()

mindsdb/integrations/libs/response.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import sys
 from typing import Callable
 from dataclasses import dataclass, fields
@@ -41,9 +42,15 @@ INF_SCHEMA_COLUMNS_NAMES_SET = set(f.name for f in fields(INF_SCHEMA_COLUMNS_NAM
 class HandlerResponse:
     def __init__(
-            self, resp_type: RESPONSE_TYPE, data_frame: pandas.DataFrame = None, query: ASTNode = 0,
-            error_code: int = 0, error_message: str | None = None, affected_rows: int | None = None,
-            mysql_types: list[MYSQL_DATA_TYPE] | None = None
+        self,
+        resp_type: RESPONSE_TYPE,
+        data_frame: pandas.DataFrame = None,
+        query: ASTNode = 0,
+        error_code: int = 0,
+        error_message: str | None = None,
+        affected_rows: int | None = None,
+        mysql_types: list[MYSQL_DATA_TYPE] | None = None,
+        is_acceptable_error: bool = False,
     ) -> None:
         self.resp_type = resp_type
         self.query = query
@@ -54,6 +61,11 @@ class HandlerResponse:
         if isinstance(self.affected_rows, int) is False or self.affected_rows < 0:
             self.affected_rows = 0
         self.mysql_types = mysql_types
+        self.is_acceptable_error = is_acceptable_error
+        self.exception = None
+        current_exception = sys.exc_info()
+        if current_exception[0] is not None:
+            self.exception = current_exception[1]
     @property
     def type(self):
@@ -71,9 +83,7 @@ class HandlerResponse:
                     f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}, "
                     f"the error is: {self.error_message}"
                 )
-            raise ValueError(
-                f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}"
-            )
+            raise ValueError(f"Cannot convert {self.resp_type} to {RESPONSE_TYPE.COLUMNS_TABLE}")
         self.data_frame.columns = [name.upper() for name in self.data_frame.columns]
         self.data_frame[INF_SCHEMA_COLUMNS_NAMES.MYSQL_DATA_TYPE] = self.data_frame[
@@ -83,9 +93,7 @@ class HandlerResponse:
         # region validate df
         current_columns_set = set(self.data_frame.columns)
         if INF_SCHEMA_COLUMNS_NAMES_SET != current_columns_set:
-            raise ValueError(
-                f"Columns set for INFORMATION_SCHEMA.COLUMNS is wrong: {list(current_columns_set)}"
-            )
+            raise ValueError(f"Columns set for INFORMATION_SCHEMA.COLUMNS is wrong: {list(current_columns_set)}")
         # endregion
         self.data_frame = self.data_frame.astype(
@@ -112,9 +120,7 @@ class HandlerResponse:
         try:
             data = None
             if self.data_frame is not None:
-                data = self.data_frame.to_json(
-                    orient="split", index=False, date_format="iso"
-                )
+                data = self.data_frame.to_json(orient="split", index=False, date_format="iso")
         except Exception as e:
             logger.error("%s.to_json: error - %s", self.__class__.__name__, e)
             data = None

mindsdb/integrations/libs/vectordatabase_handler.py CHANGED Viewed

@@ -593,6 +593,32 @@ class VectorStoreHandler(BaseHandler):
         """
         raise NotImplementedError(f"Hybrid search not supported for VectorStoreHandler {self.name}")
+    def check_existing_ids(self, table_name: str, ids: List[str]) -> List[str]:
+        """
+        Check which IDs from the provided list already exist in the table.
+        Args:
+            table_name (str): Name of the table to check
+            ids (List[str]): List of IDs to check for existence
+        Returns:
+            List[str]: List of IDs that already exist in the table
+        """
+        if not ids:
+            return []
+        try:
+            # Query existing IDs
+            df_existing = self.select(
+                table_name,
+                columns=[TableField.ID.value],
+                conditions=[FilterCondition(column=TableField.ID.value, op=FilterOperator.IN, value=ids)],
+            )
+            return list(df_existing[TableField.ID.value]) if not df_existing.empty else []
+        except Exception:
+            # If select fails for any reason, return empty list to be safe
+            return []
     def create_index(self, *args, **kwargs):
         """
         Create an index on the specified table.

mindsdb/integrations/utilities/files/file_reader.py CHANGED Viewed

@@ -1,17 +1,16 @@
-from dataclasses import dataclass, astuple
-import traceback
-import json
 import csv
-from io import BytesIO, StringIO, IOBase
-from pathlib import Path
+import json
 import codecs
+from io import BytesIO, StringIO, IOBase
 from typing import List, Generator
+from pathlib import Path
+from dataclasses import dataclass, astuple
 import filetype
 import pandas as pd
 from charset_normalizer import from_bytes
-from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter
+from mindsdb.interfaces.knowledge_base.preprocessing.text_splitter import TextSplitter
 from mindsdb.utilities import log
 logger = log.getLogger(__name__)
@@ -76,7 +75,7 @@ def decode(file_obj: IOBase) -> StringIO:
                 data_str = StringIO(byte_str.decode(encoding, errors))
     except Exception as e:
-        logger.error(traceback.format_exc())
+        logger.exception("Error during file decode:")
         raise FileProcessingError("Could not load into string") from e
     return data_str

MindsDB 25.9.2.0a1__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl

Potentially problematic release.

MindsDB 25.9.2.0a1py3-none-any.whl → 25.9.3rc1py3-none-any.whl