PyPI - mlrun - Versions diffs - 1.7.1rc10__py3-none-any.whl → 1.8.0rc11__py3-none-any.whl - Mend

mlrun 1.7.1rc10py3-none-any.whl → 1.8.0rc11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (259) hide show

mlrun/__init__.py +23 -21
mlrun/__main__.py +3 -3
mlrun/alerts/alert.py +148 -14
mlrun/artifacts/__init__.py +2 -3
mlrun/artifacts/base.py +55 -12
mlrun/artifacts/dataset.py +16 -16
mlrun/artifacts/document.py +378 -0
mlrun/artifacts/manager.py +26 -17
mlrun/artifacts/model.py +66 -53
mlrun/common/constants.py +8 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/feature_set.py +1 -0
mlrun/common/formatters/function.py +1 -0
mlrun/{model_monitoring/db/stores/base/__init__.py → common/formatters/model_endpoint.py} +16 -1
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/formatters/project.py +9 -0
mlrun/common/model_monitoring/__init__.py +0 -5
mlrun/common/model_monitoring/helpers.py +1 -29
mlrun/common/runtimes/constants.py +1 -2
mlrun/common/schemas/__init__.py +6 -2
mlrun/common/schemas/alert.py +111 -19
mlrun/common/schemas/api_gateway.py +3 -3
mlrun/common/schemas/artifact.py +11 -7
mlrun/common/schemas/auth.py +6 -4
mlrun/common/schemas/background_task.py +7 -7
mlrun/common/schemas/client_spec.py +2 -3
mlrun/common/schemas/clusterization_spec.py +2 -2
mlrun/common/schemas/common.py +53 -3
mlrun/common/schemas/constants.py +15 -0
mlrun/common/schemas/datastore_profile.py +1 -1
mlrun/common/schemas/feature_store.py +9 -9
mlrun/common/schemas/frontend_spec.py +4 -4
mlrun/common/schemas/function.py +10 -10
mlrun/common/schemas/hub.py +1 -1
mlrun/common/schemas/k8s.py +3 -3
mlrun/common/schemas/memory_reports.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +2 -1
mlrun/common/schemas/model_monitoring/constants.py +67 -14
mlrun/common/schemas/model_monitoring/grafana.py +1 -1
mlrun/common/schemas/model_monitoring/model_endpoints.py +92 -147
mlrun/common/schemas/notification.py +24 -3
mlrun/common/schemas/object.py +1 -1
mlrun/common/schemas/pagination.py +4 -4
mlrun/common/schemas/partition.py +137 -0
mlrun/common/schemas/pipeline.py +2 -2
mlrun/common/schemas/project.py +25 -17
mlrun/common/schemas/runs.py +2 -2
mlrun/common/schemas/runtime_resource.py +5 -5
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/secret.py +1 -1
mlrun/common/schemas/tag.py +3 -3
mlrun/common/schemas/workflow.py +5 -5
mlrun/config.py +68 -10
mlrun/data_types/__init__.py +0 -2
mlrun/data_types/data_types.py +1 -0
mlrun/data_types/infer.py +3 -1
mlrun/data_types/spark.py +5 -3
mlrun/data_types/to_pandas.py +11 -2
mlrun/datastore/__init__.py +2 -2
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +12 -4
mlrun/datastore/datastore.py +9 -3
mlrun/datastore/datastore_profile.py +79 -20
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +4 -1
mlrun/datastore/sources.py +52 -51
mlrun/datastore/store_resources.py +7 -4
mlrun/datastore/targets.py +23 -22
mlrun/datastore/utils.py +2 -2
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/vectorstore.py +229 -0
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +213 -83
mlrun/db/factory.py +0 -3
mlrun/db/httpdb.py +1265 -387
mlrun/db/nopdb.py +205 -74
mlrun/errors.py +2 -2
mlrun/execution.py +136 -50
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +41 -40
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +27 -24
mlrun/feature_store/retrieval/base.py +14 -9
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/steps.py +2 -2
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +29 -27
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/launcher/base.py +3 -4
mlrun/launcher/local.py +1 -1
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +4 -3
mlrun/model.py +117 -46
mlrun/model_monitoring/__init__.py +4 -4
mlrun/model_monitoring/api.py +72 -59
mlrun/model_monitoring/applications/_application_steps.py +17 -17
mlrun/model_monitoring/applications/base.py +165 -6
mlrun/model_monitoring/applications/context.py +88 -37
mlrun/model_monitoring/applications/evidently_base.py +0 -1
mlrun/model_monitoring/applications/histogram_data_drift.py +43 -21
mlrun/model_monitoring/applications/results.py +55 -3
mlrun/model_monitoring/controller.py +207 -239
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/_schedules.py +156 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/tsdb/base.py +78 -25
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +255 -29
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +78 -17
mlrun/model_monitoring/helpers.py +151 -49
mlrun/model_monitoring/stream_processing.py +99 -283
mlrun/model_monitoring/tracking_policy.py +10 -3
mlrun/model_monitoring/writer.py +48 -36
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +1 -1
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +31 -14
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +47 -16
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/operations.py +27 -27
mlrun/projects/pipelines.py +71 -36
mlrun/projects/project.py +890 -220
mlrun/run.py +53 -10
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +15 -11
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/generators.py +2 -1
mlrun/runtimes/kubejob.py +4 -5
mlrun/runtimes/mounts.py +572 -0
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -11
mlrun/runtimes/nuclio/function.py +19 -17
mlrun/runtimes/nuclio/serving.py +18 -13
mlrun/runtimes/pod.py +154 -45
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +21 -11
mlrun/runtimes/utils.py +6 -5
mlrun/serving/merger.py +6 -4
mlrun/serving/remote.py +18 -17
mlrun/serving/routers.py +185 -172
mlrun/serving/server.py +7 -1
mlrun/serving/states.py +97 -78
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +105 -72
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +1 -1
mlrun/utils/clones.py +1 -1
mlrun/utils/helpers.py +63 -19
mlrun/utils/logger.py +106 -4
mlrun/utils/notifications/notification/__init__.py +22 -19
mlrun/utils/notifications/notification/base.py +33 -14
mlrun/utils/notifications/notification/console.py +6 -6
mlrun/utils/notifications/notification/git.py +11 -11
mlrun/utils/notifications/notification/ipython.py +10 -9
mlrun/utils/notifications/notification/mail.py +176 -0
mlrun/utils/notifications/notification/slack.py +6 -6
mlrun/utils/notifications/notification/webhook.py +6 -6
mlrun/utils/notifications/notification_pusher.py +86 -44
mlrun/utils/regex.py +11 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/METADATA +29 -24
mlrun-1.8.0rc11.dist-info/RECORD +347 -0
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/store.py +0 -213
mlrun/model_monitoring/db/stores/sqldb/__init__.py +0 -13
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +0 -13
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
mlrun/model_monitoring/model_endpoint.py +0 -118
mlrun-1.7.1rc10.dist-info/RECORD +0 -351
{mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/LICENSE +0 -0
{mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/WHEEL +0 -0
{mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.1rc10.dist-info → mlrun-1.8.0rc11.dist-info}/top_level.txt +0 -0

mlrun/common/schemas/runs.py CHANGED Viewed

@@ -14,13 +14,13 @@
 import typing
-import pydantic
+import pydantic.v1
 from deprecated import deprecated
 import mlrun.common.types
-class RunIdentifier(pydantic.BaseModel):
+class RunIdentifier(pydantic.v1.BaseModel):
     kind: typing.Literal["run"] = "run"
     uid: typing.Optional[str]
     iter: typing.Optional[int]

mlrun/common/schemas/runtime_resource.py CHANGED Viewed

@@ -14,7 +14,7 @@
 #
 import typing
-import pydantic
+import pydantic.v1
 import mlrun.common.types
@@ -24,23 +24,23 @@ class ListRuntimeResourcesGroupByField(mlrun.common.types.StrEnum):
     project = "project"
-class RuntimeResource(pydantic.BaseModel):
+class RuntimeResource(pydantic.v1.BaseModel):
     name: str
     labels: dict[str, str] = {}
     status: typing.Optional[dict]
-class RuntimeResources(pydantic.BaseModel):
+class RuntimeResources(pydantic.v1.BaseModel):
     crd_resources: list[RuntimeResource] = []
     pod_resources: list[RuntimeResource] = []
     # only for dask runtime
     service_resources: typing.Optional[list[RuntimeResource]] = None
     class Config:
-        extra = pydantic.Extra.allow
+        extra = pydantic.v1.Extra.allow
-class KindRuntimeResources(pydantic.BaseModel):
+class KindRuntimeResources(pydantic.v1.BaseModel):
     kind: str
     resources: RuntimeResources

mlrun/common/schemas/schedule.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from datetime import datetime
 from typing import Any, Literal, Optional, Union
-from pydantic import BaseModel
+from pydantic.v1 import BaseModel
 import mlrun.common.types
 from mlrun.common.schemas.auth import Credentials

mlrun/common/schemas/secret.py CHANGED Viewed

@@ -14,7 +14,7 @@
 #
 from typing import Optional
-from pydantic import BaseModel, Field
+from pydantic.v1 import BaseModel, Field
 import mlrun.common.types

mlrun/common/schemas/tag.py CHANGED Viewed

@@ -13,17 +13,17 @@
 # limitations under the License.
 #
-import pydantic
+import pydantic.v1
 from .artifact import ArtifactIdentifier
-class Tag(pydantic.BaseModel):
+class Tag(pydantic.v1.BaseModel):
     name: str
     project: str
-class TagObjects(pydantic.BaseModel):
+class TagObjects(pydantic.v1.BaseModel):
     """Tag object"""
     kind: str

mlrun/common/schemas/workflow.py CHANGED Viewed

@@ -14,14 +14,14 @@
 #
 import typing
-import pydantic
+import pydantic.v1
 from mlrun.common.schemas.notification import Notification
 from mlrun.common.schemas.schedule import ScheduleCronTrigger
 from mlrun.common.types import StrEnum
-class WorkflowSpec(pydantic.BaseModel):
+class WorkflowSpec(pydantic.v1.BaseModel):
     name: str
     engine: typing.Optional[str] = None
     code: typing.Optional[str] = None
@@ -36,7 +36,7 @@ class WorkflowSpec(pydantic.BaseModel):
     workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
-class WorkflowRequest(pydantic.BaseModel):
+class WorkflowRequest(pydantic.v1.BaseModel):
     spec: typing.Optional[WorkflowSpec] = None
     arguments: typing.Optional[dict] = None
     artifact_path: typing.Optional[str] = None
@@ -46,7 +46,7 @@ class WorkflowRequest(pydantic.BaseModel):
     notifications: typing.Optional[list[Notification]] = None
-class WorkflowResponse(pydantic.BaseModel):
+class WorkflowResponse(pydantic.v1.BaseModel):
     project: str = None
     name: str = None
     status: str = None
@@ -54,7 +54,7 @@ class WorkflowResponse(pydantic.BaseModel):
     schedule: typing.Union[str, ScheduleCronTrigger] = None
-class GetWorkflowResponse(pydantic.BaseModel):
+class GetWorkflowResponse(pydantic.v1.BaseModel):
     workflow_id: str = None

mlrun/config.py CHANGED Viewed

@@ -102,6 +102,9 @@ default_config = {
     "log_level": "INFO",
     # log formatter (options: human | human_extended | json)
     "log_formatter": "human",
+    # custom logger format, workes only with log_formatter: custom
+    # Note that your custom format must include those 4 fields - timestamp, level, message and more
+    "log_format_override": None,
     "submit_timeout": "180",  # timeout when submitting a new k8s resource
     # runtimes cleanup interval in seconds
     "runtimes_cleanup_interval": "300",
@@ -132,6 +135,12 @@ default_config = {
             "delete_crd_resources_timeout": "5 minutes",
         },
     },
+    "object_retentions": {
+        "alert_activations": 14 * 7,  # days
+    },
+    # A safety margin to account for delays
+    # This ensures that extra partitions are available beyond the specified retention period
+    "partitions_buffer_multiplier": 3,
     # the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
     # before deleting them (4 hours)
     "runtime_resources_deletion_grace_period": "14400",
@@ -151,6 +160,7 @@ default_config = {
         # migration from artifacts to artifacts_v2 is done in batches, and requires a state file to keep track of the
         # migration progress.
         "artifact_migration_batch_size": 200,
+        "artifact_migration_v9_batch_size": 30000,
         "artifact_migration_state_file_path": "./db/_artifact_migration_state.json",
         "datasets": {
             "max_preview_columns": 100,
@@ -223,6 +233,7 @@ default_config = {
                 "delete_function": "900",
             },
             "runtimes": {"dask": "600"},
+            "push_notifications": "60",
         },
     },
     "function": {
@@ -306,7 +317,7 @@ default_config = {
                 },
                 "request_timeout": 45,  # seconds
             },
-            # see server.api.utils.helpers.ensure_running_on_chief
+            # see server.py.services.api.utils.helpers.ensure_running_on_chief
             "ensure_function_running_on_chief_mode": "enabled",
         },
         "port": 8080,
@@ -524,7 +535,7 @@ default_config = {
             "verbose": True,
         },
         "pagination": {
-            "default_page_size": 20,
+            "default_page_size": 200,
             "pagination_cache": {
                 "interval": 60,
                 "ttl": 3600,
@@ -598,8 +609,6 @@ default_config = {
         "default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
         "parquet_batching_max_events": 10_000,
         "parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
-        # See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
-        "endpoint_store_connection": "",
         # See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
         "tsdb_connection": "",
         # See mlrun.common.schemas.model_monitoring.constants.StreamKind for available options
@@ -786,17 +795,36 @@ default_config = {
     "grafana_url": "",
     "alerts": {
         # supported modes: "enabled", "disabled".
-        "mode": "disabled",
+        "mode": "enabled",
         # maximum number of alerts we allow to be configured.
         # user will get an error when exceeding this
         "max_allowed": 10000,
         # maximum allowed value for count in criteria field inside AlertConfig
         "max_criteria_count": 100,
+        # interval for periodic events generation job
+        "events_generation_interval": "30",
     },
     "auth_with_client_id": {
         "enabled": False,
         "request_timeout": 5,
     },
+    "services": {
+        # The running service name. One of: "api", "alerts"
+        "service_name": "api",
+        "hydra": {
+            # Comma separated list of services to run on the instance.
+            # Currently, this is only considered when the service_name is "api".
+            # "*" starts all services on the same instance,
+            # other options are considered as running only the api service.
+            "services": "*",
+        },
+    },
+    "notifications": {
+        "smtp": {
+            "config_secret_name": "mlrun-smtp-config",
+            "refresh_interval": "30",
+        }
+    },
 }
 _is_running_as_api = None
@@ -843,6 +871,22 @@ class Config:
         name = self.__class__.__name__
         return f"{name}({self._cfg!r})"
+    def __iter__(self):
+        if isinstance(self._cfg, Mapping):
+            return self._cfg.__iter__()
+    def items(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self._cfg.items())
+    def keys(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self.data.keys())
+    def values(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self.data.values())
     def update(self, cfg, skip_errors=False):
         for key, value in cfg.items():
             if hasattr(self, key):
@@ -1035,6 +1079,17 @@ class Config:
                 f"is not allowed for iguazio version: {igz_version} < 3.5.1"
             )
+    def validate_object_retentions(self):
+        for table_name, retention_days in self.object_retentions.items():
+            if retention_days < 7 and not os.getenv("PARTITION_INTERVAL"):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"{table_name} partition interval must be greater than a week"
+                )
+            elif retention_days > 53 * 7:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"{table_name} partition interval must be less than a year"
+                )
     def resolve_chief_api_url(self) -> str:
         if self.httpdb.clusterization.chief.url:
             return self.httpdb.clusterization.chief.url
@@ -1193,9 +1248,9 @@ class Config:
     def get_model_monitoring_file_target_path(
         self,
-        project: str = "",
-        kind: str = "",
-        target: str = "online",
+        project: str,
+        kind: str,
+        target: typing.Literal["online", "offline"] = "online",
         artifact_path: typing.Optional[str] = None,
         function_name: typing.Optional[str] = None,
         **kwargs,
@@ -1373,9 +1428,12 @@ def _validate_config(config):
         pass
     config.verify_security_context_enrichment_mode_is_allowed()
+    config.validate_object_retentions()
-def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str = None):
+def _verify_gpu_requests_and_limits(
+    requests_gpu: typing.Optional[str] = None, limits_gpu: typing.Optional[str] = None
+):
     # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
     if requests_gpu and not limits_gpu:
         raise mlrun.errors.MLRunConflictError(
@@ -1388,7 +1446,7 @@ def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str =
         )
-def _convert_resources_to_str(config: dict = None):
+def _convert_resources_to_str(config: typing.Optional[dict] = None):
     resources_types = ["cpu", "memory", "gpu"]
     resource_requirements = ["requests", "limits"]
     if not config.get("default_function_pod_resources"):

mlrun/data_types/__init__.py CHANGED Viewed

@@ -11,8 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-# flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 from .data_types import (
     InferOptions,

mlrun/data_types/data_types.py CHANGED Viewed

@@ -124,6 +124,7 @@ def spark_to_value_type(data_type):
         "double": ValueType.DOUBLE,
         "boolean": ValueType.BOOL,
         "timestamp": ValueType.DATETIME,
+        "timestamp_ntz": ValueType.DATETIME,
         "string": ValueType.STRING,
         "array": "list",
         "map": "dict",

mlrun/data_types/infer.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from typing import Optional
 import numpy as np
 import packaging.version
 import pandas as pd
@@ -29,7 +31,7 @@ def infer_schema_from_df(
     df: pd.DataFrame,
     features,
     entities,
-    timestamp_key: str = None,
+    timestamp_key: Optional[str] = None,
     entity_columns=None,
     options: InferOptions = InferOptions.Null,
 ):

mlrun/data_types/spark.py CHANGED Viewed

@@ -14,11 +14,12 @@
 #
 from datetime import datetime
 from os import environ
+from typing import Optional
 import numpy as np
 import pytz
 from pyspark.sql.functions import to_utc_timestamp
-from pyspark.sql.types import BooleanType, DoubleType, TimestampType
+from pyspark.sql.types import BooleanType, DoubleType
 from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
 from mlrun.utils import logger
@@ -35,7 +36,7 @@ def infer_schema_from_df_spark(
     df,
     features,
     entities,
-    timestamp_key: str = None,
+    timestamp_key: Optional[str] = None,
     entity_columns=None,
     options: InferOptions = InferOptions.Null,
 ):
@@ -143,7 +144,8 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
     timestamp_columns = set()
     boolean_columns = set()
     for field in df_after_type_casts.schema.fields:
-        is_timestamp = isinstance(field.dataType, TimestampType)
+        # covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
+        is_timestamp = field.dataType.typeName().startswith("timestamp")
         is_boolean = isinstance(field.dataType, BooleanType)
         if is_timestamp:
             df_after_type_casts = df_after_type_casts.withColumn(

mlrun/data_types/to_pandas.py CHANGED Viewed

@@ -244,6 +244,15 @@ def _to_corrected_pandas_type(dt):
 def spark_df_to_pandas(spark_df):
+    import pyspark
+    if semver.parse(pyspark.__version__) >= semver.Version(3, 5, 0):
+        def to_pandas(spark_df_inner):
+            return spark_df_inner.toPandas()
+    else:
+        to_pandas = _to_pandas
     # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
     # when we upgrade pyspark, we should check whether this workaround is still necessary
     # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
@@ -262,9 +271,9 @@ def spark_df_to_pandas(spark_df):
                 )
                 type_conversion_dict[field.name] = "datetime64[ns]"
-        df = _to_pandas(spark_df)
+        df = to_pandas(spark_df)
         if type_conversion_dict:
             df = df.astype(type_conversion_dict)
         return df
     else:
-        return _to_pandas(spark_df)
+        return to_pandas(spark_df)

mlrun/datastore/__init__.py CHANGED Viewed

@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 __all__ = [
     "DataItem",
     "get_store_resource",
@@ -32,6 +30,8 @@ __all__ = [
     "DatabricksFileSystemDisableCache",
     "DatabricksFileBugFixed",
     "get_stream_pusher",
+    "ConfigProfile",
+    "VectorStoreCollection",
 ]
 import fsspec

mlrun/datastore/alibaba_oss.py CHANGED Viewed

@@ -15,6 +15,7 @@
 import time
 from datetime import datetime
 from pathlib import Path
+from typing import Optional
 from urllib.parse import urlparse
 import oss2
@@ -28,7 +29,9 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
 class OSSStore(DataStore):
     using_bucket = True
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets)
         # will be used in case user asks to assume a role and work through fsspec

mlrun/datastore/azure_blob.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import time
 from pathlib import Path
+from typing import Optional
 from urllib.parse import urlparse
 from azure.storage.blob import BlobServiceClient
@@ -36,7 +37,9 @@ class AzureBlobStore(DataStore):
         1024 * 1024 * 8
     )  # for service_client property only, does not affect filesystem
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets=secrets)
         self._service_client = None
         self._storage_options = None

mlrun/datastore/base.py CHANGED Viewed

@@ -48,7 +48,7 @@ class FileStats:
 class DataStore:
     using_bucket = False
-    def __init__(self, parent, name, kind, endpoint="", secrets: dict = None):
+    def __init__(self, parent, name, kind, endpoint="", secrets: Optional[dict] = None):
         self._parent = parent
         self.kind = kind
         self.name = name
@@ -500,12 +500,18 @@ class DataItem:
         """DataItem url e.g. /dir/path, s3://bucket/path"""
         return self._url
-    def get(self, size=None, offset=0, encoding=None):
+    def get(
+        self,
+        size: Optional[int] = None,
+        offset: int = 0,
+        encoding: Optional[str] = None,
+    ) -> Union[bytes, str]:
         """read all or a byte range and return the content
         :param size:     number of bytes to get
         :param offset:   fetch from offset (in bytes)
         :param encoding: encoding (e.g. "utf-8") for converting bytes to str
+        :return:         the bytes/str content
         """
         body = self._store.get(self._path, size=size, offset=offset)
         if encoding and isinstance(body, bytes):
@@ -519,7 +525,7 @@ class DataItem:
         """
         self._store.download(self._path, target_path)
-    def put(self, data, append=False):
+    def put(self, data: Union[bytes, str], append: bool = False) -> None:
         """write/upload the data, append is only supported by some datastores
         :param data:   data (bytes/str) to write
@@ -687,7 +693,9 @@ def basic_auth_header(user, password):
 class HttpStore(DataStore):
-    def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
+    def __init__(
+        self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
+    ):
         super().__init__(parent, name, schema, endpoint, secrets)
         self._https_auth_token = None
         self._schema = schema

mlrun/datastore/datastore.py CHANGED Viewed

@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional
 from urllib.parse import urlparse
 from mergedeep import merge
@@ -178,12 +179,17 @@ class StoreManager:
         # which accepts a feature vector uri and generate the offline vector (parquet) for it if it doesnt exist
         if not target and not allow_empty_resources:
             raise mlrun.errors.MLRunInvalidArgumentError(
-                f"resource {url} does not have a valid/persistent offline target"
+                f"Resource {url} does not have a valid/persistent offline target"
             )
         return resource, target or ""
     def object(
-        self, url, key="", project="", allow_empty_resources=None, secrets: dict = None
+        self,
+        url,
+        key="",
+        project="",
+        allow_empty_resources=None,
+        secrets: Optional[dict] = None,
     ) -> DataItem:
         meta = artifact_url = None
         if is_store_uri(url):
@@ -205,7 +211,7 @@ class StoreManager:
         )
     def get_or_create_store(
-        self, url, secrets: dict = None, project_name=""
+        self, url, secrets: Optional[dict] = None, project_name=""
     ) -> (DataStore, str, str):
         schema, endpoint, parsed_url = parse_url(url)
         subpath = parsed_url.path

mlrun 1.7.1rc10__py3-none-any.whl → 1.8.0rc11__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.1rc10py3-none-any.whl → 1.8.0rc11py3-none-any.whl