PyPI - mlrun - Versions diffs - 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl - Mend

mlrun 1.7.0rc4py3-none-any.whl → 1.7.0rc20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show

mlrun/__init__.py +11 -1
mlrun/__main__.py +25 -111
mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
mlrun/alerts/alert.py +144 -0
mlrun/api/schemas/__init__.py +4 -3
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +38 -254
mlrun/artifacts/dataset.py +9 -190
mlrun/artifacts/manager.py +41 -47
mlrun/artifacts/model.py +30 -158
mlrun/artifacts/plots.py +23 -380
mlrun/common/constants.py +68 -0
mlrun/common/formatters/__init__.py +19 -0
mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
mlrun/common/formatters/base.py +78 -0
mlrun/common/formatters/function.py +41 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/{runtimes → common/runtimes}/constants.py +32 -4
mlrun/common/schemas/__init__.py +25 -4
mlrun/common/schemas/alert.py +203 -0
mlrun/common/schemas/api_gateway.py +148 -0
mlrun/common/schemas/artifact.py +15 -5
mlrun/common/schemas/auth.py +8 -2
mlrun/common/schemas/client_spec.py +2 -0
mlrun/common/schemas/frontend_spec.py +1 -0
mlrun/common/schemas/function.py +4 -0
mlrun/common/schemas/hub.py +7 -9
mlrun/common/schemas/model_monitoring/__init__.py +19 -3
mlrun/common/schemas/model_monitoring/constants.py +96 -26
mlrun/common/schemas/model_monitoring/grafana.py +9 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
mlrun/common/schemas/pipeline.py +0 -9
mlrun/common/schemas/project.py +22 -21
mlrun/common/types.py +7 -1
mlrun/config.py +87 -19
mlrun/data_types/data_types.py +4 -0
mlrun/data_types/to_pandas.py +9 -9
mlrun/datastore/__init__.py +5 -8
mlrun/datastore/alibaba_oss.py +130 -0
mlrun/datastore/azure_blob.py +4 -5
mlrun/datastore/base.py +69 -30
mlrun/datastore/datastore.py +10 -2
mlrun/datastore/datastore_profile.py +90 -6
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/hdfs.py +5 -0
mlrun/datastore/inmem.py +2 -2
mlrun/datastore/redis.py +2 -2
mlrun/datastore/s3.py +5 -0
mlrun/datastore/snowflake_utils.py +43 -0
mlrun/datastore/sources.py +172 -44
mlrun/datastore/store_resources.py +7 -7
mlrun/datastore/targets.py +285 -41
mlrun/datastore/utils.py +68 -5
mlrun/datastore/v3io.py +27 -50
mlrun/db/auth_utils.py +152 -0
mlrun/db/base.py +149 -14
mlrun/db/factory.py +1 -1
mlrun/db/httpdb.py +608 -178
mlrun/db/nopdb.py +191 -7
mlrun/errors.py +11 -0
mlrun/execution.py +37 -20
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +21 -52
mlrun/feature_store/feature_set.py +48 -23
mlrun/feature_store/feature_vector.py +2 -1
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/conversion.py +9 -9
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +9 -3
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +34 -24
mlrun/feature_store/steps.py +30 -19
mlrun/features.py +4 -13
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/parallel_coordinates.py +2 -1
mlrun/frameworks/pytorch/__init__.py +2 -2
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/tf_keras/__init__.py +5 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/k8s_utils.py +10 -11
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +6 -5
mlrun/launcher/client.py +8 -6
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +9 -3
mlrun/launcher/remote.py +9 -3
mlrun/lists.py +6 -2
mlrun/model.py +58 -19
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +127 -301
mlrun/model_monitoring/application.py +5 -296
mlrun/model_monitoring/applications/__init__.py +11 -0
mlrun/model_monitoring/applications/_application_steps.py +157 -0
mlrun/model_monitoring/applications/base.py +282 -0
mlrun/model_monitoring/applications/context.py +214 -0
mlrun/model_monitoring/applications/evidently_base.py +211 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +30 -36
mlrun/model_monitoring/db/__init__.py +18 -0
mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
mlrun/model_monitoring/db/tsdb/base.py +329 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
mlrun/model_monitoring/evidently_application.py +6 -118
mlrun/model_monitoring/features_drift_table.py +34 -22
mlrun/model_monitoring/helpers.py +100 -7
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +93 -228
mlrun/model_monitoring/tracking_policy.py +7 -1
mlrun/model_monitoring/writer.py +152 -124
mlrun/package/packagers_manager.py +1 -0
mlrun/package/utils/_formatter.py +2 -2
mlrun/platforms/__init__.py +11 -10
mlrun/platforms/iguazio.py +21 -202
mlrun/projects/operations.py +30 -16
mlrun/projects/pipelines.py +92 -99
mlrun/projects/project.py +757 -268
mlrun/render.py +15 -14
mlrun/run.py +160 -162
mlrun/runtimes/__init__.py +55 -3
mlrun/runtimes/base.py +33 -19
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +0 -28
mlrun/runtimes/kubejob.py +28 -122
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/abstract.py +8 -8
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/api_gateway.py +709 -0
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +523 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
mlrun/runtimes/nuclio/function.py +98 -58
mlrun/runtimes/nuclio/serving.py +36 -42
mlrun/runtimes/pod.py +196 -45
mlrun/runtimes/remotesparkjob.py +1 -1
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/runtimes/utils.py +6 -73
mlrun/secrets.py +6 -2
mlrun/serving/remote.py +2 -3
mlrun/serving/routers.py +7 -4
mlrun/serving/server.py +7 -8
mlrun/serving/states.py +73 -43
mlrun/serving/v2_serving.py +8 -7
mlrun/track/tracker.py +2 -1
mlrun/utils/async_http.py +25 -5
mlrun/utils/helpers.py +141 -75
mlrun/utils/http.py +1 -1
mlrun/utils/logger.py +39 -7
mlrun/utils/notifications/notification/__init__.py +14 -9
mlrun/utils/notifications/notification/base.py +12 -0
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +3 -1
mlrun/utils/notifications/notification/ipython.py +2 -0
mlrun/utils/notifications/notification/slack.py +101 -21
mlrun/utils/notifications/notification/webhook.py +11 -1
mlrun/utils/notifications/notification_pusher.py +147 -16
mlrun/utils/retryer.py +3 -2
mlrun/utils/v3io_clients.py +0 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
mlrun-1.7.0rc20.dist-info/RECORD +353 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
mlrun/kfpops.py +0 -868
mlrun/model_monitoring/batch.py +0 -974
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
mlrun/platforms/other.py +0 -305
mlrun-1.7.0rc4.dist-info/RECORD +0 -321
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0

mlrun/datastore/base.py CHANGED Viewed

@@ -27,6 +27,7 @@ import requests
 import urllib3
 from deprecated import deprecated
+import mlrun.config
 import mlrun.errors
 from mlrun.errors import err_to_str
 from mlrun.utils import StorePrefix, is_ipython, logger
@@ -34,10 +35,6 @@ from mlrun.utils import StorePrefix, is_ipython, logger
 from .store_resources import is_store_uri, parse_store_uri
 from .utils import filter_df_start_end_time, select_columns_from_df
-verify_ssl = False
-if not verify_ssl:
-    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 class FileStats:
     def __init__(self, size, modified, content_type=None):
@@ -182,11 +179,23 @@ class DataStore:
         return {}
     @staticmethod
-    def _parquet_reader(df_module, url, file_system, time_column, start_time, end_time):
+    def _parquet_reader(
+        df_module,
+        url,
+        file_system,
+        time_column,
+        start_time,
+        end_time,
+        additional_filters,
+    ):
         from storey.utils import find_filters, find_partitions
         def set_filters(
-            partitions_time_attributes, start_time_inner, end_time_inner, kwargs
+            partitions_time_attributes,
+            start_time_inner,
+            end_time_inner,
+            filters_inner,
+            kwargs,
         ):
             filters = []
             find_filters(
@@ -196,20 +205,23 @@ class DataStore:
                 filters,
                 time_column,
             )
+            if filters and filters_inner:
+                filters[0] += filters_inner
             kwargs["filters"] = filters
         def reader(*args, **kwargs):
-            if start_time or end_time:
-                if time_column is None:
-                    raise mlrun.errors.MLRunInvalidArgumentError(
-                        "When providing start_time or end_time, must provide time_column"
-                    )
+            if time_column is None and (start_time or end_time):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "When providing start_time or end_time, must provide time_column"
+                )
+            if start_time or end_time or additional_filters:
                 partitions_time_attributes = find_partitions(url, file_system)
                 set_filters(
                     partitions_time_attributes,
                     start_time,
                     end_time,
+                    additional_filters,
                     kwargs,
                 )
                 try:
@@ -220,6 +232,7 @@ class DataStore:
                     ):
                         raise ex
+                    # TODO: fix timezone issue (ML-6308)
                     if start_time.tzinfo:
                         start_time_inner = start_time.replace(tzinfo=None)
                         end_time_inner = end_time.replace(tzinfo=None)
@@ -231,6 +244,7 @@ class DataStore:
                         partitions_time_attributes,
                         start_time_inner,
                         end_time_inner,
+                        additional_filters,
                         kwargs,
                     )
                     return df_module.read_parquet(*args, **kwargs)
@@ -249,6 +263,7 @@ class DataStore:
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
         **kwargs,
     ):
         df_module = df_module or pd
@@ -313,7 +328,13 @@ class DataStore:
                 kwargs["columns"] = columns
             reader = self._parquet_reader(
-                df_module, url, file_system, time_column, start_time, end_time
+                df_module,
+                url,
+                file_system,
+                time_column,
+                start_time,
+                end_time,
+                additional_filters,
             )
         elif file_url.endswith(".json") or format == "json":
@@ -392,14 +413,15 @@ class DataItem:
         # reading run results using DataItem (run.artifact())
-        train_run = train_iris_func.run(inputs={'dataset': dataset},
-                                        params={'label_column': 'label'})
+        train_run = train_iris_func.run(
+            inputs={"dataset": dataset}, params={"label_column": "label"}
+        )
-        train_run.artifact('confusion-matrix').show()
-        test_set = train_run.artifact('test_set').as_df()
+        train_run.artifact("confusion-matrix").show()
+        test_set = train_run.artifact("test_set").as_df()
         # create and use DataItem from uri
-        data = mlrun.get_dataitem('http://xyz/data.json').get()
+        data = mlrun.get_dataitem("http://xyz/data.json").get()
     """
     def __init__(
@@ -541,6 +563,7 @@ class DataItem:
         time_column=None,
         start_time=None,
         end_time=None,
+        additional_filters=None,
         **kwargs,
     ):
         """return a dataframe object (generated from the dataitem).
@@ -552,6 +575,12 @@ class DataItem:
         :param end_time:    filters out data after this time
         :param time_column: Store timestamp_key will be used if None.
                             The results will be filtered by this column and start_time & end_time.
+        :param additional_filters: List of additional_filter conditions as tuples.
+                                    Each tuple should be in the format (column_name, operator, value).
+                                    Supported operators: "=", ">=", "<=", ">", "<".
+                                    Example: [("Product", "=", "Computer")]
+                                    For all supported filters, please see:
+                                    https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
         """
         df = self._store.as_df(
             self._url,
@@ -562,6 +591,7 @@ class DataItem:
             time_column=time_column,
             start_time=start_time,
             end_time=end_time,
+            additional_filters=additional_filters,
             **kwargs,
         )
         return df
@@ -633,17 +663,6 @@ def basic_auth_header(user, password):
     return {"Authorization": authstr}
-def http_get(url, headers=None, auth=None):
-    try:
-        response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
-    except OSError as exc:
-        raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
-    mlrun.errors.raise_for_status(response)
-    return response.content
 class HttpStore(DataStore):
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
         super().__init__(parent, name, schema, endpoint, secrets)
@@ -671,7 +690,7 @@ class HttpStore(DataStore):
         raise ValueError("unimplemented")
     def get(self, key, size=None, offset=0):
-        data = http_get(self.url + self._join(key), self._headers, self.auth)
+        data = self._http_get(self.url + self._join(key), self._headers, self.auth)
         if offset:
             data = data[offset:]
         if size:
@@ -691,6 +710,26 @@ class HttpStore(DataStore):
                 f"schema as it is not secure and is not recommended."
             )
+    def _http_get(
+        self,
+        url,
+        headers=None,
+        auth=None,
+    ):
+        # import here to prevent import cycle
+        from mlrun.config import config as mlconf
+        verify_ssl = mlconf.httpdb.http.verify
+        try:
+            if not verify_ssl:
+                urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
+            response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
+        except OSError as exc:
+            raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
+        mlrun.errors.raise_for_status(response)
+        return response.content
 # This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
 # Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.

mlrun/datastore/datastore.py CHANGED Viewed

@@ -98,6 +98,10 @@ def schema_to_store(schema):
         from .hdfs import HdfsStore
         return HdfsStore
+    elif schema == "oss":
+        from .alibaba_oss import OSSStore
+        return OSSStore
     else:
         raise ValueError(f"unsupported store scheme ({schema})")
@@ -219,6 +223,11 @@ class StoreManager:
             subpath = url[len("memory://") :]
             return in_memory_store, subpath, url
+        elif schema in get_local_file_schema():
+            # parse_url() will drop the windows drive-letter from the path for url like "c:\a\b".
+            # As a workaround, we set subpath to the url.
+            subpath = url.replace("file://", "", 1)
         if not schema and endpoint:
             if endpoint in self._stores.keys():
                 return self._stores[endpoint], subpath, url
@@ -237,8 +246,7 @@ class StoreManager:
         )
         if not secrets and not mlrun.config.is_running_as_api():
             self._stores[store_key] = store
-        # in file stores in windows path like c:\a\b the drive letter is dropped from the path, so we return the url
-        return store, url if store.kind == "file" else subpath, url
+        return store, subpath, url
     def reset_secrets(self):
         self._secrets = {}

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -16,6 +16,7 @@ import ast
 import base64
 import json
 import typing
+import warnings
 from urllib.parse import ParseResult, urlparse, urlunparse
 import pydantic
@@ -36,6 +37,7 @@ class DatastoreProfile(pydantic.BaseModel):
         extra = pydantic.Extra.forbid
     @pydantic.validator("name")
+    @classmethod
     def lower_case(cls, v):
         return v.lower()
@@ -68,6 +70,9 @@ class TemporaryClientDatastoreProfiles(metaclass=mlrun.utils.singleton.Singleton
     def get(self, key):
         return self._data.get(key, None)
+    def remove(self, key):
+        self._data.pop(key, None)
 class DatastoreProfileBasic(DatastoreProfile):
     type: str = pydantic.Field("basic")
@@ -79,13 +84,37 @@ class DatastoreProfileBasic(DatastoreProfile):
 class DatastoreProfileKafkaTarget(DatastoreProfile):
     type: str = pydantic.Field("kafka_target")
     _private_attributes = "kwargs_private"
-    bootstrap_servers: str
+    bootstrap_servers: typing.Optional[str] = None
+    brokers: typing.Optional[str] = None
     topic: str
     kwargs_public: typing.Optional[dict]
     kwargs_private: typing.Optional[dict]
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        if not self.brokers and not self.bootstrap_servers:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "DatastoreProfileKafkaTarget requires the 'brokers' field to be set"
+            )
+        if self.bootstrap_servers:
+            if self.brokers:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "DatastoreProfileKafkaTarget cannot be created with both 'brokers' and 'bootstrap_servers'"
+                )
+            else:
+                self.brokers = self.bootstrap_servers
+                self.bootstrap_servers = None
+            warnings.warn(
+                "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
+                "use 'brokers' instead.",
+                # TODO: Remove this in 1.9.0
+                FutureWarning,
+            )
     def attributes(self):
-        attributes = {"bootstrap_servers": self.bootstrap_servers}
+        attributes = {"brokers": self.brokers or self.bootstrap_servers}
         if self.kwargs_public:
             attributes = merge(attributes, self.kwargs_public)
         if self.kwargs_private:
@@ -157,6 +186,18 @@ class DatastoreProfileS3(DatastoreProfile):
     assume_role_arn: typing.Optional[str] = None
     access_key_id: typing.Optional[str] = None
     secret_key: typing.Optional[str] = None
+    bucket: typing.Optional[str] = None
+    @pydantic.validator("bucket")
+    @classmethod
+    def check_bucket(cls, v):
+        if not v:
+            warnings.warn(
+                "The 'bucket' attribute will be mandatory starting from version 1.9",
+                FutureWarning,
+                stacklevel=2,
+            )
+        return v
     def secrets(self) -> dict:
         res = {}
@@ -175,7 +216,13 @@ class DatastoreProfileS3(DatastoreProfile):
         return res
     def url(self, subpath):
-        return f"s3:/{subpath}"
+        # TODO: There is an inconsistency with DatastoreProfileGCS. In DatastoreProfileGCS,
+        # we assume that the subpath can begin without a '/' character,
+        # while here we assume it always starts with one.
+        if self.bucket:
+            return f"s3://{self.bucket}{subpath}"
+        else:
+            return f"s3:/{subpath}"
 class DatastoreProfileRedis(DatastoreProfile):
@@ -244,18 +291,36 @@ class DatastoreProfileGCS(DatastoreProfile):
     _private_attributes = ("gcp_credentials",)
     credentials_path: typing.Optional[str] = None  # path to file.
     gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
+    bucket: typing.Optional[str] = None
+    @pydantic.validator("bucket")
+    @classmethod
+    def check_bucket(cls, v):
+        if not v:
+            warnings.warn(
+                "The 'bucket' attribute will be mandatory starting from version 1.9",
+                FutureWarning,
+                stacklevel=2,
+            )
+        return v
     @pydantic.validator("gcp_credentials", pre=True, always=True)
+    @classmethod
     def convert_dict_to_json(cls, v):
         if isinstance(v, dict):
             return json.dumps(v)
         return v
     def url(self, subpath) -> str:
+        # TODO: but there's something wrong with the subpath being assumed to not start with a slash here,
+        # but the opposite assumption is made in S3.
         if subpath.startswith("/"):
             #  in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
             subpath = subpath[1:]
-        return f"gcs://{subpath}"
+        if self.bucket:
+            return f"gcs://{self.bucket}/{subpath}"
+        else:
+            return f"gcs://{subpath}"
     def secrets(self) -> dict:
         res = {}
@@ -283,12 +348,27 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
     client_secret: typing.Optional[str] = None
     sas_token: typing.Optional[str] = None
     credential: typing.Optional[str] = None
+    container: typing.Optional[str] = None
+    @pydantic.validator("container")
+    @classmethod
+    def check_container(cls, v):
+        if not v:
+            warnings.warn(
+                "The 'container' attribute will be mandatory starting from version 1.9",
+                FutureWarning,
+                stacklevel=2,
+            )
+        return v
     def url(self, subpath) -> str:
         if subpath.startswith("/"):
-            #  in azure the path after schema is starts with bucket, wherefore it should not start with "/".
+            #  in azure the path after schema is starts with container, wherefore it should not start with "/".
             subpath = subpath[1:]
-        return f"az://{subpath}"
+        if self.container:
+            return f"az://{self.container}/{subpath}"
+        else:
+            return f"az://{subpath}"
     def secrets(self) -> dict:
         res = {}
@@ -460,3 +540,7 @@ def register_temporary_client_datastore_profile(profile: DatastoreProfile):
     It's beneficial for testing purposes.
     """
     TemporaryClientDatastoreProfiles().add(profile)
+def remove_temporary_client_datastore_profile(profile_name: str):
+    TemporaryClientDatastoreProfiles().remove(profile_name)

mlrun/datastore/google_cloud_storage.py CHANGED Viewed

@@ -132,7 +132,7 @@ class GoogleCloudStorageStore(DataStore):
         self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
     def get_spark_options(self):
-        res = None
+        res = {}
         st = self.get_storage_options()
         if "token" in st:
             res = {"spark.hadoop.google.cloud.auth.service.account.enable": "true"}

mlrun/datastore/hdfs.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+from urllib.parse import urlparse
 import fsspec
@@ -49,3 +50,7 @@ class HdfsStore(DataStore):
     @property
     def spark_url(self):
         return f"hdfs://{self.host}:{self.port}"
+    def rm(self, url, recursive=False, maxdepth=None):
+        path = urlparse(url).path
+        self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)

mlrun/datastore/inmem.py CHANGED Viewed

@@ -80,8 +80,8 @@ class InMemoryStore(DataStore):
             reader = df_module.read_json
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(f"file type unhandled {url}")
-        # InMemoryStore store do not filter on time
-        for field in ["time_column", "start_time", "end_time"]:
+        # InMemoryStore store – don't pass filters
+        for field in ["time_column", "start_time", "end_time", "additional_filters"]:
             kwargs.pop(field, None)
         return reader(item, **kwargs)

mlrun/datastore/redis.py CHANGED Viewed

@@ -31,7 +31,7 @@ class RedisStore(DataStore):
     """
     def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
-        REDIS_DEFAULT_PORT = "6379"
+        redis_default_port = "6379"
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
         self.headers = None
@@ -49,7 +49,7 @@ class RedisStore(DataStore):
         user = self._get_secret_or_env("REDIS_USER", "", credentials_prefix)
         password = self._get_secret_or_env("REDIS_PASSWORD", "", credentials_prefix)
         host = parsed_endpoint.hostname
-        port = parsed_endpoint.port if parsed_endpoint.port else REDIS_DEFAULT_PORT
+        port = parsed_endpoint.port if parsed_endpoint.port else redis_default_port
         schema = parsed_endpoint.scheme
         if user or password:
             endpoint = f"{schema}://{user}:{password}@{host}:{port}"

mlrun/datastore/s3.py CHANGED Viewed

@@ -198,6 +198,11 @@ class S3Store(DataStore):
         bucket = self.s3.Bucket(bucket)
         return [obj.key[key_length:] for obj in bucket.objects.filter(Prefix=key)]
+    def rm(self, path, recursive=False, maxdepth=None):
+        bucket, key = self.get_bucket_and_key(path)
+        path = f"{bucket}/{key}"
+        self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
 def parse_s3_bucket_and_key(s3_path):
     try:

mlrun/datastore/snowflake_utils.py ADDED Viewed

@@ -0,0 +1,43 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import mlrun
+def get_snowflake_password():
+    key = "SNOWFLAKE_PASSWORD"
+    snowflake_password = mlrun.get_secret_or_env(key)
+    if not snowflake_password:
+        raise mlrun.errors.MLRunInvalidArgumentError(
+            f"No password provided. Set password using the {key} "
+            "project secret or environment variable."
+        )
+    return snowflake_password
+def get_snowflake_spark_options(attributes):
+    return {
+        "format": "net.snowflake.spark.snowflake",
+        "sfURL": attributes.get("url"),
+        "sfUser": attributes.get("user"),
+        "sfPassword": get_snowflake_password(),
+        "sfDatabase": attributes.get("database"),
+        "sfSchema": attributes.get("schema"),
+        "sfWarehouse": attributes.get("warehouse"),
+        "application": "iguazio_platform",
+        "TIMESTAMP_TYPE_MAPPING": "TIMESTAMP_LTZ",
+    }

mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc4py3-none-any.whl → 1.7.0rc20py3-none-any.whl