PyPI - mlrun - Versions diffs - 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl - Mend

mlrun 1.10.0rc40py3-none-any.whl → 1.11.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (150) hide show

mlrun/__init__.py +3 -2
mlrun/__main__.py +0 -4
mlrun/artifacts/dataset.py +2 -2
mlrun/artifacts/plots.py +1 -1
mlrun/{model_monitoring/db/tsdb/tdengine → auth}/__init__.py +2 -3
mlrun/auth/nuclio.py +89 -0
mlrun/auth/providers.py +429 -0
mlrun/auth/utils.py +415 -0
mlrun/common/constants.py +7 -0
mlrun/common/model_monitoring/helpers.py +41 -4
mlrun/common/runtimes/constants.py +28 -0
mlrun/common/schemas/__init__.py +13 -3
mlrun/common/schemas/alert.py +2 -2
mlrun/common/schemas/api_gateway.py +3 -0
mlrun/common/schemas/auth.py +10 -10
mlrun/common/schemas/client_spec.py +4 -0
mlrun/common/schemas/constants.py +25 -0
mlrun/common/schemas/frontend_spec.py +1 -8
mlrun/common/schemas/function.py +24 -0
mlrun/common/schemas/hub.py +3 -2
mlrun/common/schemas/model_monitoring/__init__.py +1 -1
mlrun/common/schemas/model_monitoring/constants.py +2 -2
mlrun/common/schemas/secret.py +17 -2
mlrun/common/secrets.py +95 -1
mlrun/common/types.py +10 -10
mlrun/config.py +53 -15
mlrun/data_types/infer.py +2 -2
mlrun/datastore/__init__.py +2 -3
mlrun/datastore/base.py +274 -10
mlrun/datastore/datastore.py +1 -1
mlrun/datastore/datastore_profile.py +49 -17
mlrun/datastore/model_provider/huggingface_provider.py +6 -2
mlrun/datastore/model_provider/model_provider.py +2 -2
mlrun/datastore/model_provider/openai_provider.py +2 -2
mlrun/datastore/s3.py +15 -16
mlrun/datastore/sources.py +1 -1
mlrun/datastore/store_resources.py +4 -4
mlrun/datastore/storeytargets.py +16 -10
mlrun/datastore/targets.py +1 -1
mlrun/datastore/utils.py +16 -3
mlrun/datastore/v3io.py +1 -1
mlrun/db/base.py +36 -12
mlrun/db/httpdb.py +316 -101
mlrun/db/nopdb.py +29 -11
mlrun/errors.py +4 -2
mlrun/execution.py +11 -12
mlrun/feature_store/api.py +1 -1
mlrun/feature_store/common.py +1 -1
mlrun/feature_store/feature_vector_utils.py +1 -1
mlrun/feature_store/steps.py +8 -6
mlrun/frameworks/_common/utils.py +3 -3
mlrun/frameworks/_dl_common/loggers/logger.py +1 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -1
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +1 -1
mlrun/frameworks/_ml_common/utils.py +2 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +4 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +2 -1
mlrun/frameworks/onnx/dataset.py +2 -1
mlrun/frameworks/onnx/mlrun_interface.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +5 -4
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/frameworks/pytorch/utils.py +2 -1
mlrun/frameworks/sklearn/metric.py +2 -1
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +5 -4
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +2 -1
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +2 -1
mlrun/hub/__init__.py +37 -0
mlrun/hub/base.py +142 -0
mlrun/hub/module.py +67 -76
mlrun/hub/step.py +113 -0
mlrun/launcher/base.py +2 -1
mlrun/launcher/local.py +2 -1
mlrun/model.py +12 -2
mlrun/model_monitoring/__init__.py +0 -1
mlrun/model_monitoring/api.py +2 -2
mlrun/model_monitoring/applications/base.py +20 -6
mlrun/model_monitoring/applications/context.py +1 -0
mlrun/model_monitoring/controller.py +7 -17
mlrun/model_monitoring/db/_schedules.py +2 -16
mlrun/model_monitoring/db/_stats.py +2 -13
mlrun/model_monitoring/db/tsdb/__init__.py +9 -7
mlrun/model_monitoring/db/tsdb/base.py +2 -4
mlrun/model_monitoring/db/tsdb/preaggregate.py +234 -0
mlrun/model_monitoring/db/tsdb/stream_graph_steps.py +63 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_metrics_queries.py +414 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_predictions_queries.py +376 -0
mlrun/model_monitoring/db/tsdb/timescaledb/queries/timescaledb_results_queries.py +590 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connection.py +434 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_connector.py +541 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_operations.py +808 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_schema.py +502 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream.py +163 -0
mlrun/model_monitoring/db/tsdb/timescaledb/timescaledb_stream_graph_steps.py +60 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_dataframe_processor.py +141 -0
mlrun/model_monitoring/db/tsdb/timescaledb/utils/timescaledb_query_builder.py +585 -0
mlrun/model_monitoring/db/tsdb/timescaledb/writer_graph_steps.py +73 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +4 -6
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +147 -79
mlrun/model_monitoring/features_drift_table.py +2 -1
mlrun/model_monitoring/helpers.py +2 -1
mlrun/model_monitoring/stream_processing.py +18 -16
mlrun/model_monitoring/writer.py +4 -3
mlrun/package/__init__.py +2 -1
mlrun/platforms/__init__.py +0 -44
mlrun/platforms/iguazio.py +1 -1
mlrun/projects/operations.py +11 -10
mlrun/projects/project.py +81 -82
mlrun/run.py +4 -7
mlrun/runtimes/__init__.py +2 -204
mlrun/runtimes/base.py +89 -21
mlrun/runtimes/constants.py +225 -0
mlrun/runtimes/daskjob.py +4 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +2 -1
mlrun/runtimes/mounts.py +5 -0
mlrun/runtimes/nuclio/__init__.py +12 -8
mlrun/runtimes/nuclio/api_gateway.py +36 -6
mlrun/runtimes/nuclio/application/application.py +200 -32
mlrun/runtimes/nuclio/function.py +154 -49
mlrun/runtimes/nuclio/serving.py +55 -42
mlrun/runtimes/pod.py +59 -10
mlrun/secrets.py +46 -2
mlrun/serving/__init__.py +2 -0
mlrun/serving/remote.py +5 -5
mlrun/serving/routers.py +3 -3
mlrun/serving/server.py +46 -43
mlrun/serving/serving_wrapper.py +6 -2
mlrun/serving/states.py +554 -207
mlrun/serving/steps.py +1 -1
mlrun/serving/system_steps.py +42 -33
mlrun/track/trackers/mlflow_tracker.py +29 -31
mlrun/utils/helpers.py +89 -16
mlrun/utils/http.py +9 -2
mlrun/utils/notifications/notification/git.py +1 -1
mlrun/utils/notifications/notification/mail.py +39 -16
mlrun/utils/notifications/notification_pusher.py +2 -2
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +3 -4
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/METADATA +39 -49
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/RECORD +144 -130
mlrun/db/auth_utils.py +0 -152
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +0 -343
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -75
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +0 -281
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +0 -1368
mlrun/model_monitoring/db/tsdb/tdengine/writer_graph_steps.py +0 -51
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/licenses/LICENSE +0 -0
{mlrun-1.10.0rc40.dist-info → mlrun-1.11.0rc16.dist-info}/top_level.txt +0 -0

mlrun/datastore/base.py CHANGED Viewed

@@ -11,11 +11,14 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import datetime
+import os
+import os.path
 import tempfile
 import urllib.parse
 from base64 import b64encode
 from copy import copy
-from os import path, remove
+from types import ModuleType
 from typing import Optional, Union
 from urllib.parse import urlparse
@@ -26,6 +29,7 @@ import pyarrow
 import pytz
 import requests
+import mlrun.common.schemas
 import mlrun.config
 import mlrun.errors
 from mlrun.datastore.remote_client import BaseRemoteClient
@@ -156,6 +160,195 @@ class DataStore(BaseRemoteClient):
     def get_spark_options(self, path=None):
         return {}
+    @staticmethod
+    def _is_directory_in_range(
+        start_time: Optional[datetime.datetime],
+        end_time: Optional[datetime.datetime],
+        year: int,
+        month: Optional[int] = None,
+        day: Optional[int] = None,
+        hour: Optional[int] = None,
+        **kwargs,
+    ):
+        """Check if a partition directory (year=.., month=.., etc.) is in the time range."""
+        from dateutil.relativedelta import relativedelta
+        partition_start = datetime.datetime(
+            year=year,
+            month=month or 1,
+            day=day or 1,
+            hour=hour or 0,
+            tzinfo=start_time.tzinfo if start_time else end_time.tzinfo,
+        )
+        partition_end = (
+            partition_start
+            + relativedelta(
+                years=1 if month is None else 0,
+                months=1 if day is None and month is not None else 0,
+                days=1 if hour is None and day is not None else 0,
+                hours=1 if hour is not None else 0,
+            )
+            - datetime.timedelta(microseconds=1)
+        )
+        if (end_time and end_time < partition_start) or (
+            start_time and start_time > partition_end
+        ):
+            return False
+        return True
+    @staticmethod
+    def _list_partition_paths_helper(
+        paths: list[str],
+        start_time: Optional[datetime.datetime],
+        end_time: Optional[datetime.datetime],
+        current_path: str,
+        partition_level: str,
+        filesystem,
+    ):
+        directory_split = current_path.rsplit("/", 1)
+        time_unit = None
+        directory_start, directory_end = "", ""
+        if len(directory_split) == 2:
+            directory_start, directory_end = directory_split
+            time_unit = directory_end.split("=")[0] if "=" in directory_end else None
+        if not time_unit and directory_end.endswith((".parquet", ".pq")):
+            paths.append(directory_start.rstrip("/"))
+            return
+        elif time_unit and time_unit == partition_level:
+            paths.append(current_path.rstrip("/"))
+            return
+        directories = filesystem.ls(current_path, detail=True)
+        if len(directories) == 0:
+            return
+        for directory in directories:
+            current_path = directory["name"]
+            parts = [p for p in current_path.split("/") if "=" in p]
+            kwargs = {}
+            for part in parts:
+                key, value = part.split("=", 1)
+                if value.isdigit():
+                    value = int(value)
+                kwargs[key] = value
+            if DataStore._is_directory_in_range(start_time, end_time, **kwargs):
+                DataStore._list_partition_paths_helper(
+                    paths,
+                    start_time,
+                    end_time,
+                    current_path,
+                    partition_level,
+                    filesystem,
+                )
+    @staticmethod
+    def _list_partitioned_paths(
+        base_url: str,
+        start_time: Optional[datetime.datetime],
+        end_time: Optional[datetime.datetime],
+        partition_level: str,
+        filesystem,
+    ):
+        paths = []
+        parsed_base_url = urlparse(base_url)
+        base_path = parsed_base_url.path
+        if parsed_base_url.scheme not in ["v3io", "v3ios"]:
+            base_path = parsed_base_url.netloc + base_path
+        DataStore._list_partition_paths_helper(
+            paths, start_time, end_time, base_path, partition_level, filesystem
+        )
+        paths = [
+            DataStore._reconstruct_path_from_base_url(parsed_base_url, path)
+            for path in paths
+        ]
+        return paths
+    @staticmethod
+    def _reconstruct_path_from_base_url(
+        parsed_base_url: urllib.parse.ParseResult, returned_path: str
+    ) -> str:
+        scheme = parsed_base_url.scheme
+        authority = parsed_base_url.netloc
+        returned_path = returned_path.lstrip("/")
+        if scheme == "v3io":
+            return f"{scheme}://{authority}/{returned_path}"
+        else:
+            return f"{scheme}://{returned_path}"
+    @staticmethod
+    def _clean_filters_for_partitions(
+        filters: list[list[tuple]],
+        partition_keys: list[str],
+    ):
+        """
+        Remove partition keys from filters.
+        :param filters: pandas-style filters
+                Example: [[('year','=',2025),('month','=',11),('timestamp','>',ts1)]]
+        :param partition_keys: partition columns handled via directory
+        :return list of list of tuples: cleaned filters without partition keys
+        """
+        cleaned_filters = []
+        for group in filters:
+            new_group = [f for f in group if f[0] not in partition_keys]
+            if new_group:
+                cleaned_filters.append(new_group)
+        return cleaned_filters
+    @staticmethod
+    def _read_partitioned_parquet(
+        base_url: str,
+        start_time: Optional[datetime.datetime],
+        end_time: Optional[datetime.datetime],
+        partition_keys: list[str],
+        df_module: ModuleType,
+        filesystem: fsspec.AbstractFileSystem,
+        **kwargs,
+    ):
+        """
+        Reads only the relevant partitions and concatenates the results.
+        Note that partition_keys cannot be empty.
+        """
+        logger.debug(f"Starting partition discovery process for {base_url}")
+        paths = DataStore._list_partitioned_paths(
+            base_url,
+            start_time,
+            end_time,
+            partition_keys[-1],
+            filesystem,
+        )
+        dfs = []
+        for current_path in paths:
+            try:
+                kwargs["filters"] = DataStore._clean_filters_for_partitions(
+                    kwargs["filters"], partition_keys
+                )
+                df = df_module.read_parquet(current_path, **kwargs)
+                logger.debug(
+                    "Finished reading DataFrame from subpath",
+                    url=current_path,
+                )
+                dfs.append(df)
+            except FileNotFoundError as e:
+                # Skip partitions that don't exist or have no data
+                logger.warning(
+                    "Failed to read DataFrame", url=current_path, exception=e
+                )
+        final_df = pd.concat(dfs) if dfs else pd.DataFrame()
+        logger.debug(
+            "Finished reading partitioned parquet files",
+            url=base_url,
+            columns=final_df.columns,
+        )
+        return final_df
     @staticmethod
     def _parquet_reader(
         df_module,
@@ -165,6 +358,7 @@ class DataStore(BaseRemoteClient):
         start_time,
         end_time,
         additional_filters,
+        optimize_discovery,
     ):
         from storey.utils import find_filters, find_partitions
@@ -203,7 +397,10 @@ class DataStore(BaseRemoteClient):
                 )
             if start_time or end_time or additional_filters:
-                partitions_time_attributes = find_partitions(url, file_system)
+                partitions_time_attributes, partitions = find_partitions(
+                    url, file_system
+                )
+                logger.debug("Partitioned parquet read", partitions=partitions)
                 set_filters(
                     partitions_time_attributes,
                     start_time,
@@ -211,8 +408,28 @@ class DataStore(BaseRemoteClient):
                     additional_filters,
                     kwargs,
                 )
                 try:
-                    return df_module.read_parquet(*args, **kwargs)
+                    if (
+                        optimize_discovery
+                        and partitions_time_attributes
+                        and DataStore._verify_path_partition_level(
+                            urlparse(url).path, partitions
+                        )
+                        and (start_time or end_time)
+                    ):
+                        return DataStore._read_partitioned_parquet(
+                            url,
+                            start_time,
+                            end_time,
+                            partitions_time_attributes,
+                            df_module,
+                            file_system,
+                            **kwargs,
+                        )
+                    else:
+                        return df_module.read_parquet(*args, **kwargs)
                 except pyarrow.lib.ArrowInvalid as ex:
                     if not str(ex).startswith(
                         "Cannot compare timestamp with timezone to timestamp without timezone"
@@ -238,7 +455,24 @@ class DataStore(BaseRemoteClient):
                         additional_filters,
                         kwargs,
                     )
-                    return df_module.read_parquet(*args, **kwargs)
+                    if (
+                        optimize_discovery
+                        and partitions_time_attributes
+                        and DataStore._verify_path_partition_level(
+                            urlparse(url).path, partitions
+                        )
+                    ):
+                        return DataStore._read_partitioned_parquet(
+                            url,
+                            start_time_inner,
+                            end_time_inner,
+                            partitions_time_attributes,
+                            df_module,
+                            file_system,
+                            **kwargs,
+                        )
+                    else:
+                        return df_module.read_parquet(*args, **kwargs)
             else:
                 return df_module.read_parquet(*args, **kwargs)
@@ -261,6 +495,10 @@ class DataStore(BaseRemoteClient):
         file_url = self._sanitize_url(url)
         is_csv, is_json, drop_time_column = False, False, False
         file_system = self.filesystem
+        # Feature flag optimize partition discovery by providing specific partition levels urls to the parquet reader
+        optimize_discovery = kwargs.pop("optimize_discovery", True)
         if file_url.endswith(".csv") or format == "csv":
             is_csv = True
             drop_time_column = False
@@ -322,6 +560,7 @@ class DataStore(BaseRemoteClient):
                 start_time,
                 end_time,
                 additional_filters,
+                optimize_discovery,
             )
         elif file_url.endswith(".json") or format == "json":
@@ -347,7 +586,7 @@ class DataStore(BaseRemoteClient):
             temp_file = tempfile.NamedTemporaryFile(delete=False)
             self.download(self._join(subpath), temp_file.name)
             df = reader(temp_file.name, **kwargs)
-            remove(temp_file.name)
+            os.remove(temp_file.name)
         if is_json or is_csv:
             # for parquet file the time filtering is executed in `reader`
@@ -387,6 +626,26 @@ class DataStore(BaseRemoteClient):
         except ImportError:
             return False
+    @staticmethod
+    def _verify_path_partition_level(base_path: str, partitions: list[str]) -> bool:
+        if not partitions:
+            return False
+        path_parts = base_path.strip("/").split("/")
+        path_parts = [part.split("=")[0] for part in path_parts if "=" in part]
+        if "hour" in partitions:
+            hour_index = partitions.index("hour")
+        else:
+            return False
+        for i, part in enumerate(partitions):
+            if not (
+                part in path_parts
+                or part in ["year", "month", "day", "hour"]
+                or i > hour_index
+            ):
+                return False
+        return True
 class DataItem:
     """Data input/output class abstracting access to various local/remote data sources
@@ -439,7 +698,7 @@ class DataItem:
     @property
     def suffix(self):
         """DataItem suffix (file extension) e.g. '.png'"""
-        _, file_ext = path.splitext(self._path)
+        _, file_ext = os.path.splitext(self._path)
         return file_ext
     @property
@@ -548,7 +807,7 @@ class DataItem:
             return
         if self._local_path:
-            remove(self._local_path)
+            os.remove(self._local_path)
             self._local_path = ""
     def as_df(
@@ -648,8 +907,10 @@ def basic_auth_header(user, password):
     username = user.encode("latin1")
     password = password.encode("latin1")
     base = b64encode(b":".join((username, password))).strip()
-    authstr = "Basic " + base.decode("ascii")
-    return {"Authorization": authstr}
+    authstr = mlrun.common.schemas.AuthorizationHeaderPrefixes.basic + base.decode(
+        "ascii"
+    )
+    return {mlrun.common.schemas.HeaderNames.authorization: authstr}
 class HttpStore(DataStore):
@@ -696,7 +957,10 @@ class HttpStore(DataStore):
         token = self._get_secret_or_env("HTTPS_AUTH_TOKEN")
         if token:
             self._https_auth_token = token
-            self._headers.setdefault("Authorization", f"Bearer {token}")
+            self._headers.setdefault(
+                mlrun.common.schemas.HeaderNames.authorization,
+                f"{mlrun.common.schemas.AuthorizationHeaderPrefixes.bearer}{token}",
+            )
     def _validate_https_token(self):
         if self._https_auth_token and self._schema in ["http"]:

mlrun/datastore/datastore.py CHANGED Viewed

@@ -47,7 +47,7 @@ from .v3io import V3ioStore
 in_memory_store = InMemoryStore()
-def schema_to_store(schema) -> DataStore.__subclasses__():
+def schema_to_store(schema) -> type[DataStore]:
     # import store classes inside to enable making their dependencies optional (package extras)
     if not schema or schema in get_local_file_schema():

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -16,7 +16,7 @@ import ast
 import base64
 import json
 import typing
-from urllib.parse import ParseResult, urlparse
+from urllib.parse import ParseResult, quote, unquote, urlparse
 import pydantic.v1
 from deprecated import deprecated
@@ -283,8 +283,9 @@ class DatastoreProfileRedis(DatastoreProfile):
     def url_with_credentials(self):
         parsed_url = urlparse(self.endpoint_url)
-        username = self.username
-        password = self.password
+        # URL-encode username and password to handle special characters like @, :, /
+        username = quote(self.username, safe="") if self.username else None
+        password = quote(self.password, safe="") if self.password else None
         netloc = parsed_url.hostname
         if username:
             if password:
@@ -448,40 +449,71 @@ class DatastoreProfileHdfs(DatastoreProfile):
         return f"webhdfs://{self.host}:{self.http_port}{subpath}"
-class DatastoreProfileTDEngine(DatastoreProfile):
+class DatastoreProfilePostgreSQL(DatastoreProfile):
     """
-    A profile that holds the required parameters for a TDEngine database, with the websocket scheme.
-    https://docs.tdengine.com/developer-guide/connecting-to-tdengine/#websocket-connection
+    A profile that holds the required parameters for a PostgreSQL database.
+    PostgreSQL uses standard PostgreSQL connection parameters.
     """
-    type: str = pydantic.v1.Field("taosws")
+    type: str = pydantic.v1.Field("postgresql")
     _private_attributes = ["password"]
     user: str
     # The password cannot be empty in real world scenarios. It's here just because of the profiles completion design.
     password: typing.Optional[str]
     host: str
     port: int
+    database: str = "postgres"  # Default PostgreSQL admin database
-    def dsn(self) -> str:
-        """Get the Data Source Name of the configured TDEngine profile."""
-        return f"{self.type}://{self.user}:{self.password}@{self.host}:{self.port}"
+    def dsn(self, database: typing.Optional[str] = None) -> str:
+        """
+        Get the Data Source Name of the configured PostgreSQL profile.
+        :param database: Optional database name to use instead of the configured one.
+                        If None, uses the configured database.
+        :return: The DSN string.
+        """
+        db = database or self.database
+        # URL-encode credentials and database to handle special characters
+        user = quote(self.user, safe="")
+        password = quote(self.password or "", safe="")
+        db_encoded = quote(db, safe="")
+        return f"{self.type}://{user}:{password}@{self.host}:{self.port}/{db_encoded}"
+    def admin_dsn(self) -> str:
+        """
+        Get DSN for administrative operations using the 'postgres' database.
+        Assumes the default 'postgres' database exists (standard PostgreSQL setup).
+        Used for admin tasks like creating/dropping databases.
+        :return: DSN pointing to the 'postgres' database.
+        """
+        return self.dsn(database="postgres")
     @classmethod
-    def from_dsn(cls, dsn: str, profile_name: str) -> "DatastoreProfileTDEngine":
+    def from_dsn(cls, dsn: str, profile_name: str) -> "DatastoreProfilePostgreSQL":
         """
-        Construct a TDEngine profile from DSN (connection string) and a name for the profile.
+        Construct a PostgreSQL profile from DSN (connection string) and a name for the profile.
-        :param dsn:          The DSN (Data Source Name) of the TDEngine database, e.g.: ``"taosws://root:taosdata@localhost:6041"``.
+        :param dsn:          The DSN (Data Source Name) of the PostgreSQL database,
+                            e.g.: ``"postgresql://user:password@localhost:5432/mydb"``.
         :param profile_name: The new profile's name.
-        :return:             The TDEngine profile.
+        :return:             The PostgreSQL profile.
         """
         parsed_url = urlparse(dsn)
+        # URL-decode username, password, and database (urlparse doesn't decode them)
+        username = unquote(parsed_url.username) if parsed_url.username else None
+        password = unquote(parsed_url.password) if parsed_url.password else None
+        database = (
+            unquote(parsed_url.path.lstrip("/")) if parsed_url.path else "postgres"
+        )
         return cls(
             name=profile_name,
-            user=parsed_url.username,
-            password=parsed_url.password,
+            user=username,
+            password=password,
             host=parsed_url.hostname,
             port=parsed_url.port,
+            database=database or "postgres",
         )
@@ -552,7 +584,7 @@ _DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
     "gcs": DatastoreProfileGCS,
     "az": DatastoreProfileAzureBlob,
     "hdfs": DatastoreProfileHdfs,
-    "taosws": DatastoreProfileTDEngine,
+    "postgresql": DatastoreProfilePostgreSQL,
     "config": ConfigProfile,
     "openai": OpenAIProfile,
     "huggingface": HuggingFaceProfile,

mlrun/datastore/model_provider/huggingface_provider.py CHANGED Viewed

@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import threading
 from typing import TYPE_CHECKING, Any, Optional, Union
 import mlrun
@@ -41,6 +41,9 @@ class HuggingFaceProvider(ModelProvider):
     into memory for inference. Ensure you have the required CPU/GPU and memory to use this operation.
     """
+    #  locks for threading use cases
+    _client_lock = threading.Lock()
     def __init__(
         self,
         parent,
@@ -224,7 +227,8 @@ class HuggingFaceProvider(ModelProvider):
             self.options["model_kwargs"] = self.options.get("model_kwargs", {})
             self.options["model_kwargs"]["local_files_only"] = True
-            self._client = pipeline(model=self.model, **self.options)
+            with self._client_lock:
+                self._client = pipeline(model=self.model, **self.options)
             self._expected_operation_type = Pipeline
         except ImportError as exc:
             raise ImportError("transformers package is not installed") from exc

mlrun/datastore/model_provider/model_provider.py CHANGED Viewed

@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from collections.abc import Awaitable
-from typing import Any, Callable, Optional, Union
+from collections.abc import Awaitable, Callable
+from typing import Any, Optional, Union
 import mlrun.errors
 from mlrun.common.types import StrEnum

mlrun/datastore/model_provider/openai_provider.py CHANGED Viewed

@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import inspect
-from collections.abc import Awaitable
-from typing import TYPE_CHECKING, Any, Callable, Optional, Union
+from collections.abc import Awaitable, Callable
+from typing import TYPE_CHECKING, Any, Optional, Union
 import mlrun
 from mlrun.datastore.model_provider.model_provider import (

mlrun/datastore/s3.py CHANGED Viewed

@@ -18,12 +18,16 @@ from typing import Optional
 from urllib.parse import urlparse
 import boto3
+import botocore.exceptions
 from boto3.s3.transfer import TransferConfig
 from fsspec.registry import get_filesystem_class
 import mlrun.errors
 from .base import DataStore, FileStats, make_datastore_schema_sanitizer
+from .utils import parse_s3_bucket_and_key
+__all__ = ["parse_s3_bucket_and_key"]
 class S3Store(DataStore):
@@ -225,9 +229,17 @@ class S3Store(DataStore):
     def get(self, key, size=None, offset=0):
         bucket, key = self.get_bucket_and_key(key)
         obj = self.s3.Object(bucket, key)
-        if size or offset:
-            return obj.get(Range=S3Store.get_range(size, offset))["Body"].read()
-        return obj.get()["Body"].read()
+        try:
+            if size or offset:
+                return obj.get(Range=S3Store.get_range(size, offset))["Body"].read()
+            return obj.get()["Body"].read()
+        except botocore.exceptions.ClientError as exc:
+            if exc.response["Error"]["Code"] == "NoSuchKey":
+                # "NoSuchKey" errors codes - equivalent to `FileNotFoundError`
+                raise FileNotFoundError(f"s3://{bucket}/{key}") from exc
+            # Other errors are raised as-is
+            raise
     def put(self, key, data, append=False):
         data, _ = self._prepare_put_data(data, append)
@@ -259,16 +271,3 @@ class S3Store(DataStore):
         #  In order to raise an error if there is connection error, ML-7056.
         self.filesystem.exists(path=path)
         self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
-def parse_s3_bucket_and_key(s3_path):
-    try:
-        path_parts = s3_path.replace("s3://", "").split("/")
-        bucket = path_parts.pop(0)
-        key = "/".join(path_parts)
-    except Exception as exc:
-        raise mlrun.errors.MLRunInvalidArgumentError(
-            "failed to parse s3 bucket and key"
-        ) from exc
-    return bucket, key

mlrun/datastore/sources.py CHANGED Viewed

@@ -460,7 +460,7 @@ class ParquetSource(BaseSourceDriver):
             if not filter_tuple:
                 continue
             col_name, op, value = filter_tuple
-            if op.lower() in ("in", "not in") and isinstance(value, (list, tuple, set)):
+            if op.lower() in ("in", "not in") and isinstance(value, list | tuple | set):
                 none_exists = False
                 value = list(value)
                 for sub_value in value:

mlrun/datastore/store_resources.py CHANGED Viewed

@@ -76,9 +76,9 @@ class ResourceCache:
             return self._tabels[uri]
         if uri.startswith("v3io://") or uri.startswith("v3ios://"):
-            endpoint, uri = parse_path(uri)
+            endpoint, path = parse_path(uri)
             self._tabels[uri] = Table(
-                uri,
+                path,
                 V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
                 flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
             )
@@ -87,10 +87,10 @@ class ResourceCache:
         if uri.startswith("redis://") or uri.startswith("rediss://"):
             from storey.redis_driver import RedisDriver
-            endpoint, uri = parse_path(uri)
+            endpoint, path = parse_path(uri)
             endpoint = endpoint or mlrun.mlconf.redis.url
             self._tabels[uri] = Table(
-                uri,
+                path,
                 RedisDriver(redis_url=endpoint, key_prefix="/"),
                 flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
             )

mlrun 1.10.0rc40__py3-none-any.whl → 1.11.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.10.0rc40py3-none-any.whl → 1.11.0rc16py3-none-any.whl