PyPI - mlrun - Versions diffs - 1.7.2rc3__py3-none-any.whl → 1.8.0rc1__py3-none-any.whl - Mend

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (222) hide show

mlrun/__init__.py +14 -12
mlrun/__main__.py +3 -3
mlrun/alerts/alert.py +19 -12
mlrun/artifacts/__init__.py +0 -2
mlrun/artifacts/base.py +34 -11
mlrun/artifacts/dataset.py +16 -16
mlrun/artifacts/manager.py +13 -13
mlrun/artifacts/model.py +66 -53
mlrun/common/constants.py +6 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/feature_set.py +1 -0
mlrun/common/formatters/function.py +1 -0
mlrun/common/formatters/model_endpoint.py +30 -0
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/model_monitoring/__init__.py +0 -3
mlrun/common/model_monitoring/helpers.py +1 -1
mlrun/common/runtimes/constants.py +1 -2
mlrun/common/schemas/__init__.py +4 -2
mlrun/common/schemas/artifact.py +0 -6
mlrun/common/schemas/common.py +50 -0
mlrun/common/schemas/model_monitoring/__init__.py +8 -1
mlrun/common/schemas/model_monitoring/constants.py +62 -12
mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +21 -5
mlrun/common/schemas/partition.py +122 -0
mlrun/config.py +43 -15
mlrun/data_types/__init__.py +0 -2
mlrun/data_types/data_types.py +0 -1
mlrun/data_types/infer.py +3 -1
mlrun/data_types/spark.py +4 -4
mlrun/data_types/to_pandas.py +2 -11
mlrun/datastore/__init__.py +0 -2
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +12 -4
mlrun/datastore/datastore.py +9 -3
mlrun/datastore/datastore_profile.py +1 -1
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +4 -1
mlrun/datastore/sources.py +51 -49
mlrun/datastore/store_resources.py +0 -2
mlrun/datastore/targets.py +22 -23
mlrun/datastore/utils.py +2 -2
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +126 -62
mlrun/db/factory.py +3 -0
mlrun/db/httpdb.py +767 -231
mlrun/db/nopdb.py +126 -57
mlrun/errors.py +2 -2
mlrun/execution.py +55 -29
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +40 -40
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +27 -24
mlrun/feature_store/retrieval/base.py +14 -9
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/steps.py +2 -2
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +29 -27
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/launcher/base.py +3 -4
mlrun/launcher/local.py +1 -1
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +4 -3
mlrun/model.py +108 -44
mlrun/model_monitoring/__init__.py +1 -2
mlrun/model_monitoring/api.py +6 -6
mlrun/model_monitoring/applications/_application_steps.py +13 -15
mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
mlrun/model_monitoring/applications/results.py +55 -3
mlrun/model_monitoring/controller.py +185 -223
mlrun/model_monitoring/db/_schedules.py +156 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/stores/__init__.py +1 -1
mlrun/model_monitoring/db/stores/base/store.py +6 -65
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
mlrun/model_monitoring/db/tsdb/base.py +74 -22
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +66 -35
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +284 -51
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
mlrun/model_monitoring/helpers.py +97 -1
mlrun/model_monitoring/model_endpoint.py +4 -2
mlrun/model_monitoring/stream_processing.py +2 -2
mlrun/model_monitoring/tracking_policy.py +10 -3
mlrun/model_monitoring/writer.py +47 -26
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +1 -1
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +31 -14
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +3 -3
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/__init__.py +1 -6
mlrun/projects/operations.py +27 -27
mlrun/projects/pipelines.py +85 -215
mlrun/projects/project.py +444 -158
mlrun/run.py +9 -9
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +13 -10
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/generators.py +2 -1
mlrun/runtimes/kubejob.py +4 -5
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -11
mlrun/runtimes/nuclio/function.py +14 -13
mlrun/runtimes/nuclio/serving.py +9 -9
mlrun/runtimes/pod.py +74 -29
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +21 -11
mlrun/runtimes/utils.py +6 -5
mlrun/serving/merger.py +6 -4
mlrun/serving/remote.py +18 -17
mlrun/serving/routers.py +27 -27
mlrun/serving/server.py +1 -1
mlrun/serving/states.py +76 -71
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +4 -4
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +1 -1
mlrun/utils/helpers.py +72 -28
mlrun/utils/logger.py +104 -2
mlrun/utils/notifications/notification/base.py +23 -4
mlrun/utils/notifications/notification/console.py +1 -1
mlrun/utils/notifications/notification/git.py +6 -6
mlrun/utils/notifications/notification/ipython.py +5 -4
mlrun/utils/notifications/notification/slack.py +1 -1
mlrun/utils/notifications/notification/webhook.py +13 -17
mlrun/utils/notifications/notification_pusher.py +23 -19
mlrun/utils/regex.py +1 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/METADATA +186 -186
mlrun-1.8.0rc1.dist-info/RECORD +356 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/WHEEL +1 -1
mlrun-1.7.2rc3.dist-info/RECORD +0 -351
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/LICENSE +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc1.dist-info}/top_level.txt +0 -0

mlrun/config.py CHANGED Viewed

@@ -102,6 +102,9 @@ default_config = {
     "log_level": "INFO",
     # log formatter (options: human | human_extended | json)
     "log_formatter": "human",
+    # custom logger format, workes only with log_formatter: custom
+    # Note that your custom format must include those 4 fields - timestamp, level, message and more
+    "log_format_override": None,
     "submit_timeout": "180",  # timeout when submitting a new k8s resource
     # runtimes cleanup interval in seconds
     "runtimes_cleanup_interval": "300",
@@ -120,14 +123,6 @@ default_config = {
         "projects": {
             "summaries": {
                 "cache_interval": "30",
-                "feature_gates": {
-                    "artifacts": "enabled",
-                    "schedules": "enabled",
-                    "feature_sets": "enabled",
-                    "models": "enabled",
-                    "runs": "enabled",
-                    "pipelines": "enabled",
-                },
             },
         },
     },
@@ -140,6 +135,9 @@ default_config = {
             "delete_crd_resources_timeout": "5 minutes",
         },
     },
+    "object_retentions": {
+        "alert_activation": 14 * 7,  # days
+    },
     # the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
     # before deleting them (4 hours)
     "runtime_resources_deletion_grace_period": "14400",
@@ -314,7 +312,7 @@ default_config = {
                 },
                 "request_timeout": 45,  # seconds
             },
-            # see server.api.utils.helpers.ensure_running_on_chief
+            # see server.py.services.api.utils.helpers.ensure_running_on_chief
             "ensure_function_running_on_chief_mode": "enabled",
         },
         "port": 8080,
@@ -794,7 +792,7 @@ default_config = {
     "grafana_url": "",
     "alerts": {
         # supported modes: "enabled", "disabled".
-        "mode": "disabled",
+        "mode": "enabled",
         # maximum number of alerts we allow to be configured.
         # user will get an error when exceeding this
         "max_allowed": 10000,
@@ -851,6 +849,22 @@ class Config:
         name = self.__class__.__name__
         return f"{name}({self._cfg!r})"
+    def __iter__(self):
+        if isinstance(self._cfg, Mapping):
+            return self._cfg.__iter__()
+    def items(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self._cfg.items())
+    def keys(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self.data.keys())
+    def values(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self.data.values())
     def update(self, cfg, skip_errors=False):
         for key, value in cfg.items():
             if hasattr(self, key):
@@ -1043,6 +1057,17 @@ class Config:
                 f"is not allowed for iguazio version: {igz_version} < 3.5.1"
             )
+    def validate_object_retentions(self):
+        for table_name, retention_days in self.object_retentions.items():
+            if retention_days < 7 and not os.getenv("PARTITION_INTERVAL"):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"{table_name} partition interval must be greater than a week"
+                )
+            elif retention_days > 53 * 7:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"{table_name} partition interval must be less than a year"
+                )
     def resolve_chief_api_url(self) -> str:
         if self.httpdb.clusterization.chief.url:
             return self.httpdb.clusterization.chief.url
@@ -1201,9 +1226,9 @@ class Config:
     def get_model_monitoring_file_target_path(
         self,
-        project: str = "",
-        kind: str = "",
-        target: str = "online",
+        project: str,
+        kind: str,
+        target: typing.Literal["online", "offline"] = "online",
         artifact_path: typing.Optional[str] = None,
         function_name: typing.Optional[str] = None,
         **kwargs,
@@ -1381,9 +1406,12 @@ def _validate_config(config):
         pass
     config.verify_security_context_enrichment_mode_is_allowed()
+    config.validate_object_retentions()
-def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str = None):
+def _verify_gpu_requests_and_limits(
+    requests_gpu: typing.Optional[str] = None, limits_gpu: typing.Optional[str] = None
+):
     # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
     if requests_gpu and not limits_gpu:
         raise mlrun.errors.MLRunConflictError(
@@ -1396,7 +1424,7 @@ def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str =
         )
-def _convert_resources_to_str(config: dict = None):
+def _convert_resources_to_str(config: typing.Optional[dict] = None):
     resources_types = ["cpu", "memory", "gpu"]
     resource_requirements = ["requests", "limits"]
     if not config.get("default_function_pod_resources"):

mlrun/data_types/__init__.py CHANGED Viewed

@@ -11,8 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-# flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 from .data_types import (
     InferOptions,

mlrun/data_types/data_types.py CHANGED Viewed

@@ -124,7 +124,6 @@ def spark_to_value_type(data_type):
         "double": ValueType.DOUBLE,
         "boolean": ValueType.BOOL,
         "timestamp": ValueType.DATETIME,
-        "timestamp_ntz": ValueType.DATETIME,
         "string": ValueType.STRING,
         "array": "list",
         "map": "dict",

mlrun/data_types/infer.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from typing import Optional
 import numpy as np
 import packaging.version
 import pandas as pd
@@ -29,7 +31,7 @@ def infer_schema_from_df(
     df: pd.DataFrame,
     features,
     entities,
-    timestamp_key: str = None,
+    timestamp_key: Optional[str] = None,
     entity_columns=None,
     options: InferOptions = InferOptions.Null,
 ):

mlrun/data_types/spark.py CHANGED Viewed

@@ -14,11 +14,12 @@
 #
 from datetime import datetime
 from os import environ
+from typing import Optional
 import numpy as np
 import pytz
 from pyspark.sql.functions import to_utc_timestamp
-from pyspark.sql.types import BooleanType, DoubleType
+from pyspark.sql.types import BooleanType, DoubleType, TimestampType
 from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
 from mlrun.utils import logger
@@ -35,7 +36,7 @@ def infer_schema_from_df_spark(
     df,
     features,
     entities,
-    timestamp_key: str = None,
+    timestamp_key: Optional[str] = None,
     entity_columns=None,
     options: InferOptions = InferOptions.Null,
 ):
@@ -143,8 +144,7 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
     timestamp_columns = set()
     boolean_columns = set()
     for field in df_after_type_casts.schema.fields:
-        # covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
-        is_timestamp = field.dataType.typeName().startswith("timestamp")
+        is_timestamp = isinstance(field.dataType, TimestampType)
         is_boolean = isinstance(field.dataType, BooleanType)
         if is_timestamp:
             df_after_type_casts = df_after_type_casts.withColumn(

mlrun/data_types/to_pandas.py CHANGED Viewed

@@ -244,15 +244,6 @@ def _to_corrected_pandas_type(dt):
 def spark_df_to_pandas(spark_df):
-    import pyspark
-    if semver.parse(pyspark.__version__) >= semver.Version(3, 5, 0):
-        def to_pandas(spark_df_inner):
-            return spark_df_inner.toPandas()
-    else:
-        to_pandas = _to_pandas
     # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
     # when we upgrade pyspark, we should check whether this workaround is still necessary
     # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
@@ -271,9 +262,9 @@ def spark_df_to_pandas(spark_df):
                 )
                 type_conversion_dict[field.name] = "datetime64[ns]"
-        df = to_pandas(spark_df)
+        df = _to_pandas(spark_df)
         if type_conversion_dict:
             df = df.astype(type_conversion_dict)
         return df
     else:
-        return to_pandas(spark_df)
+        return _to_pandas(spark_df)

mlrun/datastore/__init__.py CHANGED Viewed

@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 __all__ = [
     "DataItem",
     "get_store_resource",

mlrun/datastore/alibaba_oss.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import time
 from datetime import datetime
 from pathlib import Path
+from typing import Optional
 from urllib.parse import urlparse
 import oss2
@@ -28,7 +29,9 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
 class OSSStore(DataStore):
     using_bucket = True
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets)
         # will be used in case user asks to assume a role and work through fsspec

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import time
 from pathlib import Path
+from typing import Optional
 from urllib.parse import urlparse
 from azure.storage.blob import BlobServiceClient
@@ -36,7 +37,9 @@ class AzureBlobStore(DataStore):
         1024 * 1024 * 8
     )  # for service_client property only, does not affect filesystem
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
         self._service_client = None
         self._storage_options = None

mlrun/datastore/base.py CHANGED Viewed

@@ -48,7 +48,7 @@ class FileStats:
 class DataStore:
     using_bucket = False
-    def __init__(self, parent, name, kind, endpoint="", secrets: dict = None):
+    def __init__(self, parent, name, kind, endpoint="", secrets: Optional[dict] = None):
         self._parent = parent
         self.kind = kind
         self.name = name
@@ -500,12 +500,18 @@ class DataItem:
         """DataItem url e.g. /dir/path, s3://bucket/path"""
         return self._url
-    def get(self, size=None, offset=0, encoding=None):
+    def get(
+        self,
+        size: Optional[int] = None,
+        offset: int = 0,
+        encoding: Optional[str] = None,
+    ) -> Union[bytes, str]:
         """read all or a byte range and return the content
         :param size:     number of bytes to get
         :param offset:   fetch from offset (in bytes)
         :param encoding: encoding (e.g. "utf-8") for converting bytes to str
+        :return:         the bytes/str content
         """
         body = self._store.get(self._path, size=size, offset=offset)
         if encoding and isinstance(body, bytes):
@@ -519,7 +525,7 @@ class DataItem:
         """
         self._store.download(self._path, target_path)
-    def put(self, data, append=False):
+    def put(self, data: Union[bytes, str], append: bool = False) -> None:
         """write/upload the data, append is only supported by some datastores
         :param data:   data (bytes/str) to write
@@ -687,7 +693,9 @@ def basic_auth_header(user, password):
 class HttpStore(DataStore):
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets)
         self._https_auth_token = None
         self._schema = schema

mlrun/datastore/datastore.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional
 from urllib.parse import urlparse
 from mergedeep import merge
@@ -178,12 +179,17 @@ class StoreManager:
         # which accepts a feature vector uri and generate the offline vector (parquet) for it if it doesnt exist
         if not target and not allow_empty_resources:
             raise mlrun.errors.MLRunInvalidArgumentError(
-                f"resource {url} does not have a valid/persistent offline target"
+                f"Resource {url} does not have a valid/persistent offline target"
             )
         return resource, target or ""
     def object(
-        self, url, key="", project="", allow_empty_resources=None, secrets: dict = None
+        self,
+        url,
+        key="",
+        project="",
+        allow_empty_resources=None,
+        secrets: Optional[dict] = None,
     ) -> DataItem:
         meta = artifact_url = None
         if is_store_uri(url):
@@ -205,7 +211,7 @@ class StoreManager:
         )
     def get_or_create_store(
-        self, url, secrets: dict = None, project_name=""
+        self, url, secrets: Optional[dict] = None, project_name=""
     ) -> (DataStore, str, str):
         schema, endpoint, parsed_url = parse_url(url)
         subpath = parsed_url.path

mlrun/datastore/datastore_profile.py CHANGED Viewed

@@ -489,7 +489,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
             )
-def datastore_profile_read(url, project_name="", secrets: dict = None):
+def datastore_profile_read(url, project_name="", secrets: typing.Optional[dict] = None):
     parsed_url = urlparse(url)
     if parsed_url.scheme.lower() != "ds":
         raise mlrun.errors.MLRunInvalidArgumentError(

mlrun/datastore/dbfs_store.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import pathlib
+from typing import Optional
 from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem
 from fsspec.registry import get_filesystem_class
@@ -81,7 +82,9 @@ class DatabricksFileSystemDisableCache(DatabricksFileSystem):
 # dbfs objects will be represented with the following URL: dbfs://<path>
 class DBFSStore(DataStore):
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
     @property

mlrun/datastore/filestore.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import time
 from os import listdir, makedirs, path, stat
 from shutil import copyfile
+from typing import Optional
 import fsspec
@@ -23,7 +24,9 @@ from .base import DataStore, FileStats
 class FileStore(DataStore):
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, "file", endpoint, secrets=secrets)
         self._item_path, self._real_path = None, None

mlrun/datastore/google_cloud_storage.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import json
 import os
 from pathlib import Path
+from typing import Optional
 from fsspec.registry import get_filesystem_class
 from google.auth.credentials import Credentials
@@ -33,7 +34,9 @@ class GoogleCloudStorageStore(DataStore):
     workers = 8
     chunk_size = 32 * 1024 * 1024
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
         self._storage_client = None
         self._storage_options = None

mlrun/datastore/hdfs.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
+from typing import Optional
 from urllib.parse import urlparse
 import fsspec
@@ -20,7 +21,9 @@ from mlrun.datastore.base import DataStore
 class HdfsStore(DataStore):
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets)
         self.host = self._get_secret_or_env("HDFS_HOST")

mlrun/datastore/inmem.py CHANGED Viewed

@@ -17,6 +17,7 @@ from io import BytesIO, StringIO
 import pandas as pd
 import mlrun
+import mlrun.utils.helpers
 from .base import DataStore, FileStats
@@ -35,7 +36,9 @@ class InMemoryStore(DataStore):
     def _get_item(self, key):
         if key not in self._items:
-            raise ValueError(f"item {key} not found in memory store")
+            raise mlrun.errors.MLRunNotFoundError(
+                f"item {key} not found in memory store"
+            )
         return self._items[key]
     def get(self, key, size=None, offset=0):

mlrun/datastore/redis.py CHANGED Viewed

@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional
 from urllib.parse import urlparse
 import redis
@@ -30,7 +31,9 @@ class RedisStore(DataStore):
     - key and value sizes are limited to 512MB
     """
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         redis_default_port = "6379"
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
         self.headers = None

mlrun/datastore/s3.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import time
+from typing import Optional
 import boto3
 from boto3.s3.transfer import TransferConfig
@@ -26,7 +27,9 @@ from .base import DataStore, FileStats, get_range, make_datastore_schema_sanitiz
 class S3Store(DataStore):
     using_bucket = True
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets)
         # will be used in case user asks to assume a role and work through fsspec
         self._temp_credentials = None

mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0rc1__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0rc1py3-none-any.whl