PyPI - MindsDB - Versions diffs - 25.9.1.2__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl - Mend

MindsDB 25.9.1.2py3-none-any.whl → 25.9.3rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of MindsDB might be problematic. Click here for more details.

Files changed (120) hide show

mindsdb/__about__.py +1 -1
mindsdb/__main__.py +39 -20
mindsdb/api/a2a/agent.py +7 -9
mindsdb/api/a2a/common/server/server.py +3 -3
mindsdb/api/a2a/common/server/task_manager.py +4 -4
mindsdb/api/a2a/task_manager.py +15 -17
mindsdb/api/common/middleware.py +9 -11
mindsdb/api/executor/command_executor.py +2 -4
mindsdb/api/executor/datahub/datanodes/datanode.py +2 -2
mindsdb/api/executor/datahub/datanodes/integration_datanode.py +100 -48
mindsdb/api/executor/datahub/datanodes/project_datanode.py +8 -4
mindsdb/api/executor/datahub/datanodes/system_tables.py +1 -1
mindsdb/api/executor/exceptions.py +29 -10
mindsdb/api/executor/planner/plan_join.py +17 -3
mindsdb/api/executor/sql_query/sql_query.py +74 -74
mindsdb/api/executor/sql_query/steps/fetch_dataframe.py +1 -2
mindsdb/api/executor/sql_query/steps/subselect_step.py +0 -1
mindsdb/api/executor/utilities/functions.py +6 -6
mindsdb/api/executor/utilities/sql.py +32 -16
mindsdb/api/http/gui.py +5 -11
mindsdb/api/http/initialize.py +8 -10
mindsdb/api/http/namespaces/agents.py +10 -12
mindsdb/api/http/namespaces/analysis.py +13 -20
mindsdb/api/http/namespaces/auth.py +1 -1
mindsdb/api/http/namespaces/config.py +15 -11
mindsdb/api/http/namespaces/databases.py +140 -201
mindsdb/api/http/namespaces/file.py +15 -4
mindsdb/api/http/namespaces/handlers.py +7 -2
mindsdb/api/http/namespaces/knowledge_bases.py +8 -7
mindsdb/api/http/namespaces/models.py +94 -126
mindsdb/api/http/namespaces/projects.py +13 -22
mindsdb/api/http/namespaces/sql.py +33 -25
mindsdb/api/http/namespaces/tab.py +27 -37
mindsdb/api/http/namespaces/views.py +1 -1
mindsdb/api/http/start.py +14 -8
mindsdb/api/mcp/__init__.py +2 -1
mindsdb/api/mysql/mysql_proxy/executor/mysql_executor.py +15 -20
mindsdb/api/mysql/mysql_proxy/mysql_proxy.py +26 -50
mindsdb/api/mysql/mysql_proxy/utilities/__init__.py +0 -1
mindsdb/api/postgres/postgres_proxy/executor/executor.py +6 -13
mindsdb/api/postgres/postgres_proxy/postgres_packets/postgres_packets.py +40 -28
mindsdb/integrations/handlers/byom_handler/byom_handler.py +168 -185
mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py +11 -5
mindsdb/integrations/handlers/file_handler/file_handler.py +7 -0
mindsdb/integrations/handlers/lightwood_handler/functions.py +45 -79
mindsdb/integrations/handlers/openai_handler/openai_handler.py +1 -1
mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py +20 -2
mindsdb/integrations/handlers/postgres_handler/postgres_handler.py +18 -3
mindsdb/integrations/handlers/shopify_handler/shopify_handler.py +25 -12
mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py +2 -1
mindsdb/integrations/handlers/statsforecast_handler/requirements.txt +1 -0
mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt +1 -0
mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py +4 -4
mindsdb/integrations/libs/api_handler.py +10 -10
mindsdb/integrations/libs/base.py +4 -4
mindsdb/integrations/libs/llm/utils.py +2 -2
mindsdb/integrations/libs/ml_handler_process/create_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/func_call_process.py +2 -7
mindsdb/integrations/libs/ml_handler_process/learn_process.py +37 -47
mindsdb/integrations/libs/ml_handler_process/update_engine_process.py +4 -7
mindsdb/integrations/libs/ml_handler_process/update_process.py +2 -7
mindsdb/integrations/libs/process_cache.py +132 -140
mindsdb/integrations/libs/response.py +18 -12
mindsdb/integrations/libs/vectordatabase_handler.py +26 -0
mindsdb/integrations/utilities/files/file_reader.py +6 -7
mindsdb/integrations/utilities/rag/config_loader.py +37 -26
mindsdb/integrations/utilities/rag/rerankers/base_reranker.py +59 -9
mindsdb/integrations/utilities/rag/rerankers/reranker_compressor.py +4 -4
mindsdb/integrations/utilities/rag/retrievers/sql_retriever.py +55 -133
mindsdb/integrations/utilities/rag/settings.py +58 -133
mindsdb/integrations/utilities/rag/splitters/file_splitter.py +5 -15
mindsdb/interfaces/agents/agents_controller.py +2 -1
mindsdb/interfaces/agents/constants.py +0 -2
mindsdb/interfaces/agents/litellm_server.py +34 -58
mindsdb/interfaces/agents/mcp_client_agent.py +10 -10
mindsdb/interfaces/agents/mindsdb_database_agent.py +5 -5
mindsdb/interfaces/agents/run_mcp_agent.py +12 -21
mindsdb/interfaces/chatbot/chatbot_task.py +20 -23
mindsdb/interfaces/chatbot/polling.py +30 -18
mindsdb/interfaces/data_catalog/data_catalog_loader.py +10 -10
mindsdb/interfaces/database/integrations.py +19 -2
mindsdb/interfaces/file/file_controller.py +6 -6
mindsdb/interfaces/functions/controller.py +1 -1
mindsdb/interfaces/functions/to_markdown.py +2 -2
mindsdb/interfaces/jobs/jobs_controller.py +5 -5
mindsdb/interfaces/jobs/scheduler.py +3 -8
mindsdb/interfaces/knowledge_base/controller.py +54 -25
mindsdb/interfaces/knowledge_base/preprocessing/json_chunker.py +40 -61
mindsdb/interfaces/model/model_controller.py +170 -166
mindsdb/interfaces/query_context/context_controller.py +14 -2
mindsdb/interfaces/skills/custom/text2sql/mindsdb_sql_toolkit.py +6 -4
mindsdb/interfaces/skills/retrieval_tool.py +43 -50
mindsdb/interfaces/skills/skill_tool.py +2 -2
mindsdb/interfaces/skills/sql_agent.py +25 -19
mindsdb/interfaces/storage/fs.py +114 -169
mindsdb/interfaces/storage/json.py +19 -18
mindsdb/interfaces/storage/model_fs.py +54 -92
mindsdb/interfaces/tabs/tabs_controller.py +49 -72
mindsdb/interfaces/tasks/task_monitor.py +3 -9
mindsdb/interfaces/tasks/task_thread.py +7 -9
mindsdb/interfaces/triggers/trigger_task.py +7 -13
mindsdb/interfaces/triggers/triggers_controller.py +47 -50
mindsdb/migrations/migrate.py +16 -16
mindsdb/utilities/api_status.py +58 -0
mindsdb/utilities/config.py +49 -0
mindsdb/utilities/exception.py +40 -1
mindsdb/utilities/fs.py +0 -1
mindsdb/utilities/hooks/profiling.py +17 -14
mindsdb/utilities/langfuse.py +40 -45
mindsdb/utilities/log.py +272 -0
mindsdb/utilities/ml_task_queue/consumer.py +52 -58
mindsdb/utilities/ml_task_queue/producer.py +26 -30
mindsdb/utilities/render/sqlalchemy_render.py +8 -7
mindsdb/utilities/utils.py +2 -2
{mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/METADATA +266 -261
{mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/RECORD +119 -119
mindsdb/api/mysql/mysql_proxy/utilities/exceptions.py +0 -14
{mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/WHEEL +0 -0
{mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/licenses/LICENSE +0 -0
{mindsdb-25.9.1.2.dist-info → mindsdb-25.9.3rc1.dist-info}/top_level.txt +0 -0

mindsdb/integrations/handlers/chromadb_handler/chromadb_handler.py CHANGED Viewed

@@ -17,7 +17,6 @@ from mindsdb.integrations.libs.vectordatabase_handler import (
     TableField,
     VectorStoreHandler,
 )
-from mindsdb.interfaces.storage.model_fs import HandlerStorage
 from mindsdb.utilities import log
 logger = log.getLogger(__name__)
@@ -55,7 +54,7 @@ class ChromaDBHandler(VectorStoreHandler):
     def __init__(self, name: str, **kwargs):
         super().__init__(name)
-        self.handler_storage = HandlerStorage(kwargs.get("integration_id"))
+        self.handler_storage = kwargs["handler_storage"]
         self._client = None
         self.persist_directory = None
         self.is_connected = False
@@ -73,8 +72,6 @@ class ChromaDBHandler(VectorStoreHandler):
             "hnsw:space": config.distance,
         }
-        self.connect()
     def validate_connection_parameters(self, name, **kwargs):
         """
         Validate the connection parameters.
@@ -88,7 +85,7 @@ class ChromaDBHandler(VectorStoreHandler):
         if config.persist_directory:
             if os.path.isabs(config.persist_directory):
                 self.persist_directory = config.persist_directory
-            elif not self.handler_storage.is_temporal:
+            else:
                 # get full persistence directory from handler storage
                 self.persist_directory = self.handler_storage.folder_get(config.persist_directory)
                 self._use_handler_storage = True
@@ -149,6 +146,7 @@ class ChromaDBHandler(VectorStoreHandler):
         need_to_close = self.is_connected is False
         try:
+            self.connect()
             self._client.heartbeat()
             response_code.success = True
         except Exception as e:
@@ -233,6 +231,7 @@ class ChromaDBHandler(VectorStoreHandler):
         offset: int = None,
         limit: int = None,
     ) -> pd.DataFrame:
+        self.connect()
         collection = self._client.get_collection(table_name)
         filters = self._translate_metadata_condition(conditions)
@@ -399,6 +398,7 @@ class ChromaDBHandler(VectorStoreHandler):
         Insert/Upsert data into ChromaDB collection.
         If records with same IDs exist, they will be updated.
         """
+        self.connect()
         collection = self._client.get_or_create_collection(collection_name, metadata=self.create_collection_metadata)
         # Convert metadata from string to dict if needed
@@ -449,6 +449,7 @@ class ChromaDBHandler(VectorStoreHandler):
         """
         Update data in the ChromaDB database.
         """
+        self.connect()
         collection = self._client.get_collection(table_name)
         # drop columns with all None values
@@ -466,6 +467,7 @@ class ChromaDBHandler(VectorStoreHandler):
         self._sync()
     def delete(self, table_name: str, conditions: List[FilterCondition] = None):
+        self.connect()
         filters = self._translate_metadata_condition(conditions)
         # get id filters
         id_filters = [condition.value for condition in conditions if condition.column == TableField.ID.value] or None
@@ -480,6 +482,7 @@ class ChromaDBHandler(VectorStoreHandler):
         """
         Create a collection with the given name in the ChromaDB database.
         """
+        self.connect()
         self._client.create_collection(
             table_name, get_or_create=if_not_exists, metadata=self.create_collection_metadata
         )
@@ -489,6 +492,7 @@ class ChromaDBHandler(VectorStoreHandler):
         """
         Delete a collection from the ChromaDB database.
         """
+        self.connect()
         try:
             self._client.delete_collection(table_name)
             self._sync()
@@ -502,6 +506,7 @@ class ChromaDBHandler(VectorStoreHandler):
         """
         Get the list of collections in the ChromaDB database.
         """
+        self.connect()
         collections = self._client.list_collections()
         collections_name = pd.DataFrame(
             columns=["table_name"],
@@ -511,6 +516,7 @@ class ChromaDBHandler(VectorStoreHandler):
     def get_columns(self, table_name: str) -> HandlerResponse:
         # check if collection exists
+        self.connect()
         try:
             _ = self._client.get_collection(table_name)
         except ValueError:

mindsdb/integrations/handlers/file_handler/file_handler.py CHANGED Viewed

@@ -50,6 +50,7 @@ class FileHandler(DatabaseHandler):
         self.chunk_size = connection_data.get("chunk_size", DEFAULT_CHUNK_SIZE)
         self.chunk_overlap = connection_data.get("chunk_overlap", DEFAULT_CHUNK_OVERLAP)
         self.file_controller = file_controller
+        self.thread_safe = True
     def connect(self, **kwargs):
         return
@@ -83,6 +84,12 @@ class FileHandler(DatabaseHandler):
                 table_name = table_identifier.parts[-1]
                 try:
                     self.file_controller.delete_file(table_name)
+                except FileNotFoundError as e:
+                    if not query.if_exists:
+                        return Response(
+                            RESPONSE_TYPE.ERROR,
+                            error_message=f"Can't delete table '{table_name}': {e}",
+                        )
                 except Exception as e:
                     return Response(
                         RESPONSE_TYPE.ERROR,

mindsdb/integrations/handlers/lightwood_handler/functions.py CHANGED Viewed

@@ -27,51 +27,40 @@ logger = log.getLogger(__name__)
 def create_learn_mark():
-    if os.name == 'posix':
-        p = Path(tempfile.gettempdir()).joinpath('mindsdb/learn_processes/')
+    if os.name == "posix":
+        p = Path(tempfile.gettempdir()).joinpath("mindsdb/learn_processes/")
         p.mkdir(parents=True, exist_ok=True)
-        p.joinpath(f'{os.getpid()}').touch()
+        p.joinpath(f"{os.getpid()}").touch()
 def delete_learn_mark():
-    if os.name == 'posix':
-        p = (
-            Path(tempfile.gettempdir())
-            .joinpath('mindsdb/learn_processes/')
-            .joinpath(f'{os.getpid()}')
-        )
+    if os.name == "posix":
+        p = Path(tempfile.gettempdir()).joinpath("mindsdb/learn_processes/").joinpath(f"{os.getpid()}")
         if p.exists():
             p.unlink()
-@mark_process(name='learn')
+@mark_process(name="learn")
 @profiler.profile()
 def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = None):
+    model_storage.training_state_set(current_state_num=1, total_states=5, state_name="Generating problem definition")
+    json_ai_override = args.pop("using", {})
-    model_storage.training_state_set(
-        current_state_num=1, total_states=5, state_name='Generating problem definition'
-    )
-    json_ai_override = args.pop('using', {})
-    if 'dtype_dict' in json_ai_override:
-        args['dtype_dict'] = json_ai_override.pop('dtype_dict')
+    if "dtype_dict" in json_ai_override:
+        args["dtype_dict"] = json_ai_override.pop("dtype_dict")
-    if 'problem_definition' in json_ai_override:
-        args = {**args, **json_ai_override['problem_definition']}
+    if "problem_definition" in json_ai_override:
+        args = {**args, **json_ai_override["problem_definition"]}
-    if 'timeseries_settings' in args:
-        for tss_key in [
-            f.name for f in dataclasses.fields(lightwood.api.TimeseriesSettings)
-        ]:
-            k = f'timeseries_settings.{tss_key}'
+    if "timeseries_settings" in args:
+        for tss_key in [f.name for f in dataclasses.fields(lightwood.api.TimeseriesSettings)]:
+            k = f"timeseries_settings.{tss_key}"
             if k in json_ai_override:
-                args['timeseries_settings'][tss_key] = json_ai_override.pop(k)
+                args["timeseries_settings"][tss_key] = json_ai_override.pop(k)
     problem_definition = lightwood.ProblemDefinition.from_dict(args)
-    model_storage.training_state_set(
-        current_state_num=2, total_states=5, state_name='Generating JsonAI'
-    )
+    model_storage.training_state_set(current_state_num=2, total_states=5, state_name="Generating JsonAI")
     json_ai = lightwood.json_ai_from_problem(df, problem_definition)
     json_ai = json_ai.to_dict()
     unpack_jsonai_old_args(json_ai_override)
@@ -79,9 +68,7 @@ def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = N
     rep_recur(json_ai, json_ai_override)
     json_ai = JsonAI.from_dict(json_ai)
-    model_storage.training_state_set(
-        current_state_num=3, total_states=5, state_name='Generating code'
-    )
+    model_storage.training_state_set(current_state_num=3, total_states=5, state_name="Generating code")
     code = lightwood.code_from_json_ai(json_ai)
     predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
@@ -89,33 +76,27 @@ def run_generate(df: DataFrame, predictor_id: int, model_storage, args: dict = N
     db.session.commit()
     json_storage = get_json_storage(resource_id=predictor_id)
-    json_storage.set('json_ai', json_ai.to_dict())
+    json_storage.set("json_ai", json_ai.to_dict())
-@mark_process(name='learn')
+@mark_process(name="learn")
 @profiler.profile()
 def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
     try:
         predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
         assert predictor_record is not None
-        predictor_record.data = {'training_log': 'training'}
+        predictor_record.data = {"training_log": "training"}
         predictor_record.status = PREDICTOR_STATUS.TRAINING
         db.session.commit()
-        model_storage.training_state_set(
-            current_state_num=4, total_states=5, state_name='Training model'
-        )
-        predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(
-            predictor_record.code
-        )
+        model_storage.training_state_set(current_state_num=4, total_states=5, state_name="Training model")
+        predictor: lightwood.PredictorInterface = lightwood.predictor_from_code(predictor_record.code)
         predictor.learn(df)
         db.session.refresh(predictor_record)
-        fs = FileStorage(
-            resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True
-        )
+        fs = FileStorage(resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True)
         predictor.save(fs.folder_path / fs.folder_name)
         fs.push(compression_level=0)
@@ -124,9 +105,7 @@ def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
         # getting training time for each tried model. it is possible to do
         # after training only
         fit_mixers = list(
-            predictor.runtime_log[x]
-            for x in predictor.runtime_log
-            if isinstance(x, tuple) and x[0] == "fit_mixer"
+            predictor.runtime_log[x] for x in predictor.runtime_log if isinstance(x, tuple) and x[0] == "fit_mixer"
         )
         submodel_data = predictor_record.data.get("submodel_data", [])
         # add training time to other mixers info
@@ -135,43 +114,39 @@ def run_fit(predictor_id: int, df: pd.DataFrame, model_storage) -> None:
                 submodel_data[i]["training_time"] = tr_time
         predictor_record.data["submodel_data"] = submodel_data
-        model_storage.training_state_set(
-            current_state_num=5, total_states=5, state_name='Complete'
-        )
+        model_storage.training_state_set(current_state_num=5, total_states=5, state_name="Complete")
         predictor_record.dtype_dict = predictor.dtype_dict
         db.session.commit()
     except Exception as e:
         db.session.refresh(predictor_record)
-        predictor_record.data = {'error': f'{traceback.format_exc()}\nMain error: {e}'}
+        predictor_record.data = {"error": f"{traceback.format_exc()}\nMain error: {e}"}
         db.session.commit()
         raise e
-@mark_process(name='learn')
+@mark_process(name="learn")
 def run_learn_remote(df: DataFrame, predictor_id: int) -> None:
     try:
         serialized_df = json.dumps(df.to_dict())
         predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
         resp = requests.post(
-            predictor_record.data['train_url'],
-            json={'df': serialized_df, 'target': predictor_record.to_predict[0]},
+            predictor_record.data["train_url"],
+            json={"df": serialized_df, "target": predictor_record.to_predict[0]},
         )
         assert resp.status_code == 200
-        predictor_record.data['status'] = 'complete'
+        predictor_record.data["status"] = "complete"
     except Exception:
-        predictor_record.data['status'] = 'error'
-        predictor_record.data['error'] = str(resp.text)
+        predictor_record.data["status"] = "error"
+        predictor_record.data["error"] = str(resp.text)
     db.session.commit()
-@mark_process(name='learn')
+@mark_process(name="learn")
 def run_learn(df: DataFrame, args: dict, model_storage) -> None:
     if df is None or df.shape[0] == 0:
-        raise Exception(
-            'No input data. Ensure the data source is healthy and try again.'
-        )
+        raise Exception("No input data. Ensure the data source is healthy and try again.")
     predictor_id = model_storage.predictor_id
@@ -187,15 +162,13 @@ def run_learn(df: DataFrame, args: dict, model_storage) -> None:
     db.session.commit()
-@mark_process(name='finetune')
+@mark_process(name="finetune")
 def run_finetune(df: DataFrame, args: dict, model_storage):
     try:
         if df is None or df.shape[0] == 0:
-            raise Exception(
-                'No input data. Ensure the data source is healthy and try again.'
-            )
+            raise Exception("No input data. Ensure the data source is healthy and try again.")
-        base_predictor_id = args['base_model_id']
+        base_predictor_id = args["base_model_id"]
         base_predictor_record = db.Predictor.query.get(base_predictor_id)
         if base_predictor_record.status != PREDICTOR_STATUS.COMPLETE:
             raise Exception("Base model must be in status 'complete'")
@@ -204,11 +177,9 @@ def run_finetune(df: DataFrame, args: dict, model_storage):
         predictor_record = db.Predictor.query.get(predictor_id)
         # TODO move this to ModelStorage (don't work with database directly)
-        predictor_record.data = {'training_log': 'training'}
+        predictor_record.data = {"training_log": "training"}
         predictor_record.training_start_at = datetime.now()
-        predictor_record.status = (
-            PREDICTOR_STATUS.FINETUNING
-        )  # TODO: parallel execution block
+        predictor_record.status = PREDICTOR_STATUS.FINETUNING  # TODO: parallel execution block
         db.session.commit()
         base_fs = FileStorage(
@@ -219,28 +190,23 @@ def run_finetune(df: DataFrame, args: dict, model_storage):
         predictor = lightwood.predictor_from_state(
             base_fs.folder_path / base_fs.folder_name, base_predictor_record.code
         )
-        predictor.adjust(df, adjust_args=args.get('using', {}))
+        predictor.adjust(df, adjust_args=args.get("using", {}))
-        fs = FileStorage(
-            resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True
-        )
+        fs = FileStorage(resource_group=RESOURCE_GROUP.PREDICTOR, resource_id=predictor_id, sync=True)
         predictor.save(fs.folder_path / fs.folder_name)
         fs.push(compression_level=0)
-        predictor_record.data = (
-            predictor.model_analysis.to_dict()
-        )  # todo: update accuracy in LW as post-finetune hook
+        predictor_record.data = predictor.model_analysis.to_dict()  # todo: update accuracy in LW as post-finetune hook
         predictor_record.code = base_predictor_record.code
-        predictor_record.update_status = 'up_to_date'
+        predictor_record.update_status = "up_to_date"
         predictor_record.status = PREDICTOR_STATUS.COMPLETE
         predictor_record.training_stop_at = datetime.now()
         db.session.commit()
     except Exception as e:
-        logger.error(e)
+        logger.error("Unexpected error during Lightwood model finetune:", exc_info=True)
         predictor_id = model_storage.predictor_id
         predictor_record = db.Predictor.query.with_for_update().get(predictor_id)
-        logger.error(traceback.format_exc())
         error_message = format_exception_error(e)
         predictor_record.data = {"error": error_message}
         predictor_record.status = PREDICTOR_STATUS.ERROR

mindsdb/integrations/handlers/openai_handler/openai_handler.py CHANGED Viewed

@@ -114,7 +114,7 @@ class OpenAIHandler(BaseMLEngine):
         except NotFoundError:
             pass
         except AuthenticationError as e:
-            if e.body["code"] == "invalid_api_key":
+            if isinstance(e.body, dict) and e.body.get("code") == "invalid_api_key":
                 raise Exception("Invalid api key")
             raise Exception(f"Something went wrong: {e}")

mindsdb/integrations/handlers/pgvector_handler/pgvector_handler.py CHANGED Viewed

@@ -17,7 +17,9 @@ from mindsdb_sql_parser.ast import (
     Delete,
     Update,
     Function,
+    DropTables,
 )
+from mindsdb_sql_parser.ast.base import ASTNode
 from pgvector.psycopg import register_vector
 from mindsdb.integrations.handlers.postgres_handler.postgres_handler import (
@@ -116,9 +118,22 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
             return Response(RESPONSE_TYPE.OK)
         return super().get_tables()
-    def native_query(self, query, params=None) -> Response:
+    def query(self, query: ASTNode) -> Response:
+        # Option to drop table of shared pgvector connection
+        if isinstance(query, DropTables):
+            query.tables = [self._check_table(table.parts[-1]) for table in query.tables]
+            query_str, params = self.renderer.get_exec_params(query, with_failback=True)
+            return self.native_query(query_str, params, no_restrict=True)
+        return super().query(query)
+    def native_query(self, query, params=None, no_restrict=False) -> Response:
+        """
+        Altered `native_query` method of postgres handler.
+        Restrict usage of native query from executor with shared pg vector connection
+          Exceptions: if it is used by pgvector itself (with no_restrict = True)
+        """
         # Prevent execute native queries
-        if self._is_shared_db:
+        if self._is_shared_db and not no_restrict:
             return Response(RESPONSE_TYPE.OK)
         return super().native_query(query, params=params)
@@ -550,6 +565,9 @@ class PgVectorHandler(PostgresHandler, VectorStoreHandler, KeywordSearchBase):
     def create_table(self, table_name: str):
         """Create a table with a vector column."""
+        table_name = self._check_table(table_name)
         with self.connection.cursor() as cur:
             # For sparse vectors, use sparsevec type
             vector_column_type = "sparsevec" if self._is_sparse else "vector"

mindsdb/integrations/handlers/postgres_handler/postgres_handler.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import time
 import json
+import logging
 from typing import Optional, Any
 import pandas as pd
@@ -279,7 +280,7 @@ class PostgresHandler(MetaDatabaseHandler):
         df.columns = columns
     @profiler.profile()
-    def native_query(self, query: str, params=None) -> Response:
+    def native_query(self, query: str, params=None, **kwargs) -> Response:
         """
         Executes a SQL query on the PostgreSQL database and returns the result.
@@ -304,8 +305,19 @@ class PostgresHandler(MetaDatabaseHandler):
                     result = cur.fetchall()
                     response = _make_table_response(result, cur)
                 connection.commit()
+            except (psycopg.ProgrammingError, psycopg.DataError) as e:
+                # These is 'expected' exceptions, they should not be treated as mindsdb's errors
+                # ProgrammingError: table not found or already exists, syntax error, etc
+                # DataError: division by zero, numeric value out of range, etc.
+                # https://www.psycopg.org/psycopg3/docs/api/errors.html
+                log_message = "Database query failed with error, likely due to invalid SQL query"
+                if logger.isEnabledFor(logging.DEBUG):
+                    log_message += f". Executed query:\n{query}"
+                logger.info(log_message)
+                response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e), is_acceptable_error=True)
+                connection.rollback()
             except Exception as e:
-                logger.error(f"Error running query: {query} on {self.database}, {e}!")
+                logger.error(f"Error running query:\n{query}\non {self.database}, {e}")
                 response = Response(RESPONSE_TYPE.ERROR, error_code=0, error_message=str(e))
                 connection.rollback()
@@ -466,7 +478,10 @@ class PostgresHandler(MetaDatabaseHandler):
             AND
                 table_schema = {schema_name}
         """
-        result = self.native_query(query)
+        # If it is used by pgvector handler - `native_query` method of pgvector handler will be used
+        #   in that case if shared pgvector db is used - `native_query` will be skipped (return  empty result)
+        #   `no_restrict` flag allows to execute native query, and it will call `native_query` of postgres handler
+        result = self.native_query(query, no_restrict=True)
         result.to_columns_table_response(map_type_fn=_map_type)
         return result

mindsdb/integrations/handlers/shopify_handler/shopify_handler.py CHANGED Viewed

@@ -1,7 +1,17 @@
 import shopify
 import requests
-from mindsdb.integrations.handlers.shopify_handler.shopify_tables import ProductsTable, CustomersTable, OrdersTable, InventoryLevelTable, LocationTable, CustomerReviews, CarrierServiceTable, ShippingZoneTable, SalesChannelTable
+from mindsdb.integrations.handlers.shopify_handler.shopify_tables import (
+    ProductsTable,
+    CustomersTable,
+    OrdersTable,
+    InventoryLevelTable,
+    LocationTable,
+    CustomerReviews,
+    CarrierServiceTable,
+    ShippingZoneTable,
+    SalesChannelTable,
+)
 from mindsdb.integrations.libs.api_handler import APIHandler
 from mindsdb.integrations.libs.response import (
     HandlerStatusResponse as StatusResponse,
@@ -9,7 +19,11 @@ from mindsdb.integrations.libs.response import (
 from mindsdb.utilities import log
 from mindsdb_sql_parser import parse_sql
-from mindsdb.integrations.libs.api_handler_exceptions import InvalidNativeQuery, ConnectionFailed, MissingConnectionParams
+from mindsdb.integrations.libs.api_handler_exceptions import (
+    InvalidNativeQuery,
+    ConnectionFailed,
+    MissingConnectionParams,
+)
 logger = log.getLogger(__name__)
@@ -19,7 +33,7 @@ class ShopifyHandler(APIHandler):
     The Shopify handler implementation.
     """
-    name = 'shopify'
+    name = "shopify"
     def __init__(self, name: str, **kwargs):
         """
@@ -81,10 +95,12 @@ class ShopifyHandler(APIHandler):
         if self.kwargs.get("connection_data") is None:
             raise MissingConnectionParams("Incomplete parameters passed to Shopify Handler")
-        api_session = shopify.Session(self.connection_data['shop_url'], '2021-10', self.connection_data['access_token'])
+        api_session = shopify.Session(self.connection_data["shop_url"], "2021-10", self.connection_data["access_token"])
-        self.yotpo_app_key = self.connection_data['yotpo_app_key'] if 'yotpo_app_key' in self.connection_data else None
-        self.yotpo_access_token = self.connection_data['yotpo_access_token'] if 'yotpo_access_token' in self.connection_data else None
+        self.yotpo_app_key = self.connection_data["yotpo_app_key"] if "yotpo_app_key" in self.connection_data else None
+        self.yotpo_access_token = (
+            self.connection_data["yotpo_access_token"] if "yotpo_access_token" in self.connection_data else None
+        )
         self.connection = api_session
@@ -107,16 +123,13 @@ class ShopifyHandler(APIHandler):
             shopify.Shop.current()
             response.success = True
         except Exception as e:
-            logger.error('Error connecting to Shopify!')
-            raise ConnectionFailed("Conenction to Shopify failed.")
+            logger.error("Error connecting to Shopify!")
             response.error_message = str(e)
+            raise ConnectionFailed("Conenction to Shopify failed.")
         if self.yotpo_app_key is not None and self.yotpo_access_token is not None:
             url = f"https://api.yotpo.com/v1/apps/{self.yotpo_app_key}/reviews?count=1&utoken={self.yotpo_access_token}"
-            headers = {
-                "accept": "application/json",
-                "Content-Type": "application/json"
-            }
+            headers = {"accept": "application/json", "Content-Type": "application/json"}
             if requests.get(url, headers=headers).status_code == 200:
                 response.success = True
             else:

mindsdb/integrations/handlers/snowflake_handler/snowflake_handler.py CHANGED Viewed

@@ -204,10 +204,11 @@ class SnowflakeHandler(MetaDatabaseHandler):
             "user": self.connection_data.get("user"),
             "password": self.connection_data.get("password"),
             "database": self.connection_data.get("database"),
+            "schema": self.connection_data.get("schema", "PUBLIC"),
         }
         # Optional connection parameters
-        optional_params = ["schema", "warehouse", "role"]
+        optional_params = ["warehouse", "role"]
         for param in optional_params:
             if param in self.connection_data:
                 config[param] = self.connection_data[param]

mindsdb/integrations/handlers/statsforecast_handler/requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 statsforecast==1.6.0
 scipy==1.15.3
+numba >=0.55.0, <=0.61.2

mindsdb/integrations/handlers/statsforecast_handler/requirements_extra.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 statsforecast==1.6.0
 scipy==1.15.3
+numba >=0.55.0, <=0.61.2

mindsdb/integrations/handlers/web_handler/urlcrawl_helpers.py CHANGED Viewed

@@ -170,9 +170,9 @@ def get_all_website_links(url, headers: dict = None) -> dict:
                 href = href.rstrip("/")
                 urls.add(href)
-    except Exception as e:
+    except Exception:
         error_message = traceback.format_exc().splitlines()[-1]
-        logger.error("An exception occurred: %s", str(e))
+        logger.exception("An exception occurred:")
         return {
             "url": url,
             "urls": urls,
@@ -238,9 +238,9 @@ def get_all_website_links_recursively(
     if url not in reviewed_urls and matches_filter:
         try:
             reviewed_urls[url] = get_all_website_links(url, headers=headers)
-        except Exception as e:
+        except Exception:
             error_message = traceback.format_exc().splitlines()[-1]
-            logger.error("An exception occurred: %s", str(e))
+            logger.exception("An exception occurred:")
             reviewed_urls[url] = {
                 "url": url,
                 "urls": [],

mindsdb/integrations/libs/api_handler.py CHANGED Viewed

@@ -550,8 +550,8 @@ class MetaAPIHandler(APIHandler):
                     if hasattr(table_class, "meta_get_tables"):
                         table_metadata = table_class.meta_get_tables(table_name, **kwargs)
                         df = pd.concat([df, pd.DataFrame([table_metadata])], ignore_index=True)
-                except Exception as e:
-                    logger.error(f"Error retrieving metadata for table {table_name}: {e}")
+                except Exception:
+                    logger.exception(f"Error retrieving metadata for table {table_name}:")
         return Response(RESPONSE_TYPE.TABLE, df)
@@ -572,8 +572,8 @@ class MetaAPIHandler(APIHandler):
                     if hasattr(table_class, "meta_get_columns"):
                         column_metadata = table_class.meta_get_columns(table_name, **kwargs)
                         df = pd.concat([df, pd.DataFrame(column_metadata)], ignore_index=True)
-                except Exception as e:
-                    logger.error(f"Error retrieving column metadata for table {table_name}: {e}")
+                except Exception:
+                    logger.exception(f"Error retrieving column metadata for table {table_name}:")
         return Response(RESPONSE_TYPE.TABLE, df)
@@ -594,8 +594,8 @@ class MetaAPIHandler(APIHandler):
                     if hasattr(table_class, "meta_get_column_statistics"):
                         column_statistics = table_class.meta_get_column_statistics(table_name, **kwargs)
                         df = pd.concat([df, pd.DataFrame(column_statistics)], ignore_index=True)
-                except Exception as e:
-                    logger.error(f"Error retrieving column statistics for table {table_name}: {e}")
+                except Exception:
+                    logger.exception(f"Error retrieving column statistics for table {table_name}:")
         return Response(RESPONSE_TYPE.TABLE, df)
@@ -616,8 +616,8 @@ class MetaAPIHandler(APIHandler):
                     if hasattr(table_class, "meta_get_primary_keys"):
                         primary_key_metadata = table_class.meta_get_primary_keys(table_name, **kwargs)
                         df = pd.concat([df, pd.DataFrame(primary_key_metadata)], ignore_index=True)
-                except Exception as e:
-                    logger.error(f"Error retrieving primary keys for table {table_name}: {e}")
+                except Exception:
+                    logger.exception(f"Error retrieving primary keys for table {table_name}:")
         return Response(RESPONSE_TYPE.TABLE, df)
@@ -641,8 +641,8 @@ class MetaAPIHandler(APIHandler):
                             table_name, all_tables=table_names if table_names else all_tables, **kwargs
                         )
                         df = pd.concat([df, pd.DataFrame(foreign_key_metadata)], ignore_index=True)
-                except Exception as e:
-                    logger.error(f"Error retrieving foreign keys for table {table_name}: {e}")
+                except Exception:
+                    logger.exception(f"Error retrieving foreign keys for table {table_name}:")
         return Response(RESPONSE_TYPE.TABLE, df)

MindsDB 25.9.1.2__py3-none-any.whl → 25.9.3rc1__py3-none-any.whl

Potentially problematic release.

MindsDB 25.9.1.2py3-none-any.whl → 25.9.3rc1py3-none-any.whl