PyPI - mlrun - Versions diffs - 1.7.2rc3__py3-none-any.whl → 1.8.0rc2__py3-none-any.whl - Mend

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0rc2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (250) hide show

mlrun/__init__.py +18 -18
mlrun/__main__.py +3 -3
mlrun/alerts/alert.py +19 -12
mlrun/artifacts/__init__.py +0 -2
mlrun/artifacts/base.py +34 -11
mlrun/artifacts/dataset.py +16 -16
mlrun/artifacts/manager.py +13 -13
mlrun/artifacts/model.py +66 -53
mlrun/common/constants.py +6 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/feature_set.py +1 -0
mlrun/common/formatters/function.py +1 -0
mlrun/common/formatters/model_endpoint.py +30 -0
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/formatters/project.py +9 -0
mlrun/common/model_monitoring/__init__.py +0 -3
mlrun/common/model_monitoring/helpers.py +1 -1
mlrun/common/runtimes/constants.py +1 -2
mlrun/common/schemas/__init__.py +7 -2
mlrun/common/schemas/alert.py +31 -18
mlrun/common/schemas/api_gateway.py +3 -3
mlrun/common/schemas/artifact.py +7 -13
mlrun/common/schemas/auth.py +6 -4
mlrun/common/schemas/background_task.py +7 -7
mlrun/common/schemas/client_spec.py +2 -2
mlrun/common/schemas/clusterization_spec.py +2 -2
mlrun/common/schemas/common.py +53 -3
mlrun/common/schemas/datastore_profile.py +1 -1
mlrun/common/schemas/feature_store.py +9 -9
mlrun/common/schemas/frontend_spec.py +4 -4
mlrun/common/schemas/function.py +10 -10
mlrun/common/schemas/hub.py +1 -1
mlrun/common/schemas/k8s.py +3 -3
mlrun/common/schemas/memory_reports.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +8 -1
mlrun/common/schemas/model_monitoring/constants.py +62 -12
mlrun/common/schemas/model_monitoring/grafana.py +1 -1
mlrun/common/schemas/model_monitoring/model_endpoint_v2.py +149 -0
mlrun/common/schemas/model_monitoring/model_endpoints.py +22 -6
mlrun/common/schemas/notification.py +18 -3
mlrun/common/schemas/object.py +1 -1
mlrun/common/schemas/pagination.py +4 -4
mlrun/common/schemas/partition.py +137 -0
mlrun/common/schemas/pipeline.py +2 -2
mlrun/common/schemas/project.py +22 -17
mlrun/common/schemas/runs.py +2 -2
mlrun/common/schemas/runtime_resource.py +5 -5
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/secret.py +1 -1
mlrun/common/schemas/tag.py +3 -3
mlrun/common/schemas/workflow.py +5 -5
mlrun/config.py +65 -15
mlrun/data_types/__init__.py +0 -2
mlrun/data_types/data_types.py +0 -1
mlrun/data_types/infer.py +3 -1
mlrun/data_types/spark.py +4 -4
mlrun/data_types/to_pandas.py +2 -11
mlrun/datastore/__init__.py +0 -2
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +12 -4
mlrun/datastore/datastore.py +9 -3
mlrun/datastore/datastore_profile.py +20 -20
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +4 -1
mlrun/datastore/sources.py +51 -49
mlrun/datastore/store_resources.py +0 -2
mlrun/datastore/targets.py +22 -23
mlrun/datastore/utils.py +2 -2
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +170 -64
mlrun/db/factory.py +3 -0
mlrun/db/httpdb.py +986 -238
mlrun/db/nopdb.py +155 -57
mlrun/errors.py +2 -2
mlrun/execution.py +55 -29
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +40 -40
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +27 -24
mlrun/feature_store/retrieval/base.py +14 -9
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/steps.py +2 -2
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +29 -27
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/launcher/base.py +3 -4
mlrun/launcher/local.py +1 -1
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +4 -3
mlrun/model.py +110 -46
mlrun/model_monitoring/__init__.py +1 -2
mlrun/model_monitoring/api.py +6 -6
mlrun/model_monitoring/applications/_application_steps.py +13 -15
mlrun/model_monitoring/applications/histogram_data_drift.py +41 -15
mlrun/model_monitoring/applications/results.py +55 -3
mlrun/model_monitoring/controller.py +185 -223
mlrun/model_monitoring/db/_schedules.py +156 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/stores/__init__.py +1 -1
mlrun/model_monitoring/db/stores/base/store.py +6 -65
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -25
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -97
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +2 -58
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -15
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +6 -257
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +9 -271
mlrun/model_monitoring/db/tsdb/base.py +76 -24
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +61 -6
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +253 -28
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -17
mlrun/model_monitoring/helpers.py +91 -1
mlrun/model_monitoring/model_endpoint.py +4 -2
mlrun/model_monitoring/stream_processing.py +16 -13
mlrun/model_monitoring/tracking_policy.py +10 -3
mlrun/model_monitoring/writer.py +47 -26
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +1 -1
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +31 -14
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +3 -16
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/operations.py +27 -27
mlrun/projects/pipelines.py +34 -35
mlrun/projects/project.py +535 -182
mlrun/run.py +13 -10
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +15 -11
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/generators.py +2 -1
mlrun/runtimes/kubejob.py +4 -5
mlrun/runtimes/mounts.py +572 -0
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -11
mlrun/runtimes/nuclio/function.py +13 -13
mlrun/runtimes/nuclio/serving.py +9 -9
mlrun/runtimes/pod.py +154 -45
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +21 -11
mlrun/runtimes/utils.py +6 -5
mlrun/serving/merger.py +6 -4
mlrun/serving/remote.py +18 -17
mlrun/serving/routers.py +27 -27
mlrun/serving/server.py +1 -1
mlrun/serving/states.py +76 -71
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +4 -4
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +1 -1
mlrun/utils/helpers.py +70 -16
mlrun/utils/logger.py +106 -4
mlrun/utils/notifications/notification/__init__.py +22 -19
mlrun/utils/notifications/notification/base.py +33 -14
mlrun/utils/notifications/notification/console.py +6 -6
mlrun/utils/notifications/notification/git.py +11 -11
mlrun/utils/notifications/notification/ipython.py +10 -9
mlrun/utils/notifications/notification/mail.py +149 -0
mlrun/utils/notifications/notification/slack.py +6 -6
mlrun/utils/notifications/notification/webhook.py +18 -22
mlrun/utils/notifications/notification_pusher.py +43 -31
mlrun/utils/regex.py +3 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/METADATA +18 -14
mlrun-1.8.0rc2.dist-info/RECORD +358 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/WHEEL +1 -1
mlrun-1.7.2rc3.dist-info/RECORD +0 -351
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/LICENSE +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.2rc3.dist-info → mlrun-1.8.0rc2.dist-info}/top_level.txt +0 -0

mlrun/common/schemas/partition.py ADDED Viewed

@@ -0,0 +1,137 @@
+# Copyright 2024 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+from datetime import datetime, timedelta
+from mlrun.common.types import StrEnum
+class PartitionInterval(StrEnum):
+    DAY = "DAY"
+    MONTH = "MONTH"
+    YEARWEEK = "YEARWEEK"
+    @classmethod
+    def is_valid(cls, value: str) -> bool:
+        return value in cls._value2member_map_
+    @classmethod
+    def valid_intervals(cls) -> list:
+        return list(cls._value2member_map_.keys())
+    def as_duration(self) -> timedelta:
+        """
+        Convert the partition interval to a duration-like timedelta.
+        Returns:
+            timedelta: A duration representing the partition interval.
+        """
+        if self == PartitionInterval.DAY:
+            return timedelta(days=1)
+        elif self == PartitionInterval.MONTH:
+            # Approximate a month as 30 days
+            return timedelta(days=30)
+        elif self == PartitionInterval.YEARWEEK:
+            return timedelta(weeks=1)
+    @classmethod
+    def from_function(cls, partition_function: str):
+        """
+        Returns the corresponding PartitionInterval for a given partition function,
+        or None if the function is not mapped.
+        :param partition_function: The partition function to map to an interval.
+        :return: PartitionInterval corresponding to the function, or None if no match is found.
+        """
+        partition_function_to_partitions_interval = {
+            "DAY": "DAY",
+            "DAYOFMONTH": "DAY",
+            "MONTH": "MONTH",
+            "YEARWEEK": "YEARWEEK",
+        }
+        interval = partition_function_to_partitions_interval.get(partition_function)
+        if interval and cls.is_valid(interval):
+            return cls[interval]
+        raise KeyError(f"Partition function: {partition_function} isn't supported")
+    def get_partition_info(
+        self,
+        start_datetime: datetime,
+        partition_number: int = 1,
+    ) -> list[tuple[str, str]]:
+        """
+        Generates partition details for a specified number of partitions starting from a given datetime.
+        :param start_datetime: The starting datetime used for generating partition details.
+        :param partition_number: The number of partitions to generate details for.
+        :return: A list of tuples:
+            - partition_name: The name for the partition.
+            - partition_value: The "LESS THAN" value for the next partition boundary.
+        """
+        partitioning_information_list = []
+        current_datetime = start_datetime
+        for _ in range(partition_number):
+            partition_name = self.get_partition_name(current_datetime)
+            partition_boundary_date = self.get_next_partition_time(current_datetime)
+            partition_value = self.get_partition_name(partition_boundary_date)
+            partitioning_information_list.append((partition_name, partition_value))
+            # Move to the next interval
+            current_datetime = partition_boundary_date
+        return partitioning_information_list
+    def get_next_partition_time(self, current_datetime: datetime) -> datetime:
+        """
+        Calculates the next partition boundary time based on the specified partition interval.
+        :param current_datetime: The current datetime from which the next interval is calculated.
+        :return: A datetime object representing the start of the next partition interval.
+            - If the interval is DAY, it advances by one day.
+            - If the interval is MONTH, it advances to the first day of the next month.
+            - If the interval is YEARWEEK, it advances by one week.
+        """
+        if self == PartitionInterval.DAY:
+            return current_datetime + timedelta(days=1)
+        elif self == PartitionInterval.MONTH:
+            return (current_datetime.replace(day=1) + timedelta(days=32)).replace(day=1)
+        elif self == PartitionInterval.YEARWEEK:
+            return current_datetime + timedelta(weeks=1)
+    def get_partition_name(self, current_datetime: datetime) -> str:
+        if self == PartitionInterval.DAY:
+            return current_datetime.strftime("%Y%m%d")
+        elif self == PartitionInterval.MONTH:
+            return current_datetime.strftime("%Y%m")
+        elif self == PartitionInterval.YEARWEEK:
+            year, week, _ = current_datetime.isocalendar()
+            return f"{year}{week:02d}"
+    def get_partition_expression(self):
+        if self == PartitionInterval.YEARWEEK:
+            return "YEARWEEK(activation_time, 1)"
+        else:
+            return f"{self}(activation_time)"
+    def get_number_of_partitions(self, days: int) -> int:
+        # Calculate the number partitions based on given number of days
+        if self == PartitionInterval.DAY:
+            return days
+        elif self == PartitionInterval.MONTH:
+            # Average number days in a month is 30.44
+            return int(days / 30.44)
+        elif self == PartitionInterval.YEARWEEK:
+            return int(days / 7)

mlrun/common/schemas/pipeline.py CHANGED Viewed

@@ -14,7 +14,7 @@
 #
 import typing
-import pydantic
+import pydantic.v1
 from deprecated import deprecated
 import mlrun.common.types
@@ -39,7 +39,7 @@ class PipelinesPagination(str):
     max_page_size = 200
-class PipelinesOutput(pydantic.BaseModel):
+class PipelinesOutput(pydantic.v1.BaseModel):
     # use the format query param to control what is returned
     runs: list[typing.Union[dict, str]]
     total_size: int

mlrun/common/schemas/project.py CHANGED Viewed

@@ -15,7 +15,7 @@
 import datetime
 import typing
-import pydantic
+import pydantic.v1
 from deprecated import deprecated
 import mlrun.common.types
@@ -40,14 +40,14 @@ class ProjectsFormat(mlrun.common.types.StrEnum):
     leader = "leader"
-class ProjectMetadata(pydantic.BaseModel):
+class ProjectMetadata(pydantic.v1.BaseModel):
     name: str
     created: typing.Optional[datetime.datetime] = None
     labels: typing.Optional[dict] = {}
     annotations: typing.Optional[dict] = {}
     class Config:
-        extra = pydantic.Extra.allow
+        extra = pydantic.v1.Extra.allow
 class ProjectDesiredState(mlrun.common.types.StrEnum):
@@ -77,7 +77,7 @@ class ProjectStatus(ObjectStatus):
     state: typing.Optional[ProjectState]
-class ProjectSpec(pydantic.BaseModel):
+class ProjectSpec(pydantic.v1.BaseModel):
     description: typing.Optional[str] = None
     owner: typing.Optional[str] = None
     goals: typing.Optional[str] = None
@@ -97,10 +97,10 @@ class ProjectSpec(pydantic.BaseModel):
     default_function_node_selector: typing.Optional[dict] = {}
     class Config:
-        extra = pydantic.Extra.allow
+        extra = pydantic.v1.Extra.allow
-class ProjectSpecOut(pydantic.BaseModel):
+class ProjectSpecOut(pydantic.v1.BaseModel):
     description: typing.Optional[str] = None
     owner: typing.Optional[str] = None
     goals: typing.Optional[str] = None
@@ -120,11 +120,11 @@ class ProjectSpecOut(pydantic.BaseModel):
     default_function_node_selector: typing.Optional[dict] = {}
     class Config:
-        extra = pydantic.Extra.allow
+        extra = pydantic.v1.Extra.allow
-class Project(pydantic.BaseModel):
-    kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
+class Project(pydantic.v1.BaseModel):
+    kind: ObjectKind = pydantic.v1.Field(ObjectKind.project, const=True)
     metadata: ProjectMetadata
     spec: ProjectSpec = ProjectSpec()
     status: ObjectStatus = ObjectStatus()
@@ -132,19 +132,19 @@ class Project(pydantic.BaseModel):
 # The reason we have a different schema for the response model is that we don't want to validate project.spec.build in
 # the response as the validation was added late and there may be corrupted values in the DB.
-class ProjectOut(pydantic.BaseModel):
-    kind: ObjectKind = pydantic.Field(ObjectKind.project, const=True)
+class ProjectOut(pydantic.v1.BaseModel):
+    kind: ObjectKind = pydantic.v1.Field(ObjectKind.project, const=True)
     metadata: ProjectMetadata
     spec: ProjectSpecOut = ProjectSpecOut()
     status: ObjectStatus = ObjectStatus()
-class ProjectOwner(pydantic.BaseModel):
+class ProjectOwner(pydantic.v1.BaseModel):
     username: str
     access_key: str
-class ProjectSummary(pydantic.BaseModel):
+class ProjectSummary(pydantic.v1.BaseModel):
     name: str
     files_count: int = 0
     feature_sets_count: int = 0
@@ -161,7 +161,7 @@ class ProjectSummary(pydantic.BaseModel):
     updated: typing.Optional[datetime.datetime] = None
-class IguazioProject(pydantic.BaseModel):
+class IguazioProject(pydantic.v1.BaseModel):
     data: dict
@@ -175,13 +175,18 @@ class IguazioProject(pydantic.BaseModel):
 # to add a specific classes for them. it's frustrating but couldn't find other workaround, see:
 # https://github.com/samuelcolvin/pydantic/issues/1423, https://github.com/samuelcolvin/pydantic/issues/619
 ProjectOutput = typing.TypeVar(
-    "ProjectOutput", ProjectOut, str, ProjectSummary, IguazioProject
+    "ProjectOutput",
+    ProjectOut,
+    str,
+    ProjectSummary,
+    IguazioProject,
+    tuple[str, datetime.datetime],
 )
-class ProjectsOutput(pydantic.BaseModel):
+class ProjectsOutput(pydantic.v1.BaseModel):
     projects: list[ProjectOutput]
-class ProjectSummariesOutput(pydantic.BaseModel):
+class ProjectSummariesOutput(pydantic.v1.BaseModel):
     project_summaries: list[ProjectSummary]

mlrun/common/schemas/runs.py CHANGED Viewed

@@ -14,13 +14,13 @@
 import typing
-import pydantic
+import pydantic.v1
 from deprecated import deprecated
 import mlrun.common.types
-class RunIdentifier(pydantic.BaseModel):
+class RunIdentifier(pydantic.v1.BaseModel):
     kind: typing.Literal["run"] = "run"
     uid: typing.Optional[str]
     iter: typing.Optional[int]

mlrun/common/schemas/runtime_resource.py CHANGED Viewed

@@ -14,7 +14,7 @@
 #
 import typing
-import pydantic
+import pydantic.v1
 import mlrun.common.types
@@ -24,23 +24,23 @@ class ListRuntimeResourcesGroupByField(mlrun.common.types.StrEnum):
     project = "project"
-class RuntimeResource(pydantic.BaseModel):
+class RuntimeResource(pydantic.v1.BaseModel):
     name: str
     labels: dict[str, str] = {}
     status: typing.Optional[dict]
-class RuntimeResources(pydantic.BaseModel):
+class RuntimeResources(pydantic.v1.BaseModel):
     crd_resources: list[RuntimeResource] = []
     pod_resources: list[RuntimeResource] = []
     # only for dask runtime
     service_resources: typing.Optional[list[RuntimeResource]] = None
     class Config:
-        extra = pydantic.Extra.allow
+        extra = pydantic.v1.Extra.allow
-class KindRuntimeResources(pydantic.BaseModel):
+class KindRuntimeResources(pydantic.v1.BaseModel):
     kind: str
     resources: RuntimeResources

mlrun/common/schemas/schedule.py CHANGED Viewed

@@ -15,7 +15,7 @@
 from datetime import datetime
 from typing import Any, Literal, Optional, Union
-from pydantic import BaseModel
+from pydantic.v1 import BaseModel
 import mlrun.common.types
 from mlrun.common.schemas.auth import Credentials

mlrun/common/schemas/secret.py CHANGED Viewed

@@ -14,7 +14,7 @@
 #
 from typing import Optional
-from pydantic import BaseModel, Field
+from pydantic.v1 import BaseModel, Field
 import mlrun.common.types

mlrun/common/schemas/tag.py CHANGED Viewed

@@ -13,17 +13,17 @@
 # limitations under the License.
 #
-import pydantic
+import pydantic.v1
 from .artifact import ArtifactIdentifier
-class Tag(pydantic.BaseModel):
+class Tag(pydantic.v1.BaseModel):
     name: str
     project: str
-class TagObjects(pydantic.BaseModel):
+class TagObjects(pydantic.v1.BaseModel):
     """Tag object"""
     kind: str

mlrun/common/schemas/workflow.py CHANGED Viewed

@@ -14,14 +14,14 @@
 #
 import typing
-import pydantic
+import pydantic.v1
 from mlrun.common.schemas.notification import Notification
 from mlrun.common.schemas.schedule import ScheduleCronTrigger
 from mlrun.common.types import StrEnum
-class WorkflowSpec(pydantic.BaseModel):
+class WorkflowSpec(pydantic.v1.BaseModel):
     name: str
     engine: typing.Optional[str] = None
     code: typing.Optional[str] = None
@@ -36,7 +36,7 @@ class WorkflowSpec(pydantic.BaseModel):
     workflow_runner_node_selector: typing.Optional[dict[str, str]] = None
-class WorkflowRequest(pydantic.BaseModel):
+class WorkflowRequest(pydantic.v1.BaseModel):
     spec: typing.Optional[WorkflowSpec] = None
     arguments: typing.Optional[dict] = None
     artifact_path: typing.Optional[str] = None
@@ -46,7 +46,7 @@ class WorkflowRequest(pydantic.BaseModel):
     notifications: typing.Optional[list[Notification]] = None
-class WorkflowResponse(pydantic.BaseModel):
+class WorkflowResponse(pydantic.v1.BaseModel):
     project: str = None
     name: str = None
     status: str = None
@@ -54,7 +54,7 @@ class WorkflowResponse(pydantic.BaseModel):
     schedule: typing.Union[str, ScheduleCronTrigger] = None
-class GetWorkflowResponse(pydantic.BaseModel):
+class GetWorkflowResponse(pydantic.v1.BaseModel):
     workflow_id: str = None

mlrun/config.py CHANGED Viewed

@@ -102,6 +102,9 @@ default_config = {
     "log_level": "INFO",
     # log formatter (options: human | human_extended | json)
     "log_formatter": "human",
+    # custom logger format, workes only with log_formatter: custom
+    # Note that your custom format must include those 4 fields - timestamp, level, message and more
+    "log_format_override": None,
     "submit_timeout": "180",  # timeout when submitting a new k8s resource
     # runtimes cleanup interval in seconds
     "runtimes_cleanup_interval": "300",
@@ -120,14 +123,6 @@ default_config = {
         "projects": {
             "summaries": {
                 "cache_interval": "30",
-                "feature_gates": {
-                    "artifacts": "enabled",
-                    "schedules": "enabled",
-                    "feature_sets": "enabled",
-                    "models": "enabled",
-                    "runs": "enabled",
-                    "pipelines": "enabled",
-                },
             },
         },
     },
@@ -140,6 +135,12 @@ default_config = {
             "delete_crd_resources_timeout": "5 minutes",
         },
     },
+    "object_retentions": {
+        "alert_activation": 14 * 7,  # days
+    },
+    # A safety margin to account for delays
+    # This ensures that extra partitions are available beyond the specified retention period
+    "partitions_buffer_multiplier": 3,
     # the grace period (in seconds) that will be given to runtime resources (after they're in terminal state)
     # before deleting them (4 hours)
     "runtime_resources_deletion_grace_period": "14400",
@@ -314,7 +315,7 @@ default_config = {
                 },
                 "request_timeout": 45,  # seconds
             },
-            # see server.api.utils.helpers.ensure_running_on_chief
+            # see server.py.services.api.utils.helpers.ensure_running_on_chief
             "ensure_function_running_on_chief_mode": "enabled",
         },
         "port": 8080,
@@ -794,17 +795,36 @@ default_config = {
     "grafana_url": "",
     "alerts": {
         # supported modes: "enabled", "disabled".
-        "mode": "disabled",
+        "mode": "enabled",
         # maximum number of alerts we allow to be configured.
         # user will get an error when exceeding this
         "max_allowed": 10000,
         # maximum allowed value for count in criteria field inside AlertConfig
         "max_criteria_count": 100,
+        # interval for periodic events generation job
+        "events_generation_interval": "30",
     },
     "auth_with_client_id": {
         "enabled": False,
         "request_timeout": 5,
     },
+    "services": {
+        # The running service name. One of: "api", "alerts"
+        "service_name": "api",
+        "hydra": {
+            # Comma separated list of services to run on the instance.
+            # Currently, this is only considered when the service_name is "api".
+            # "*" starts all services on the same instance,
+            # other options are considered as running only the api service.
+            "services": "*",
+        },
+    },
+    "notifications": {
+        "smtp": {
+            "config_secret_name": "mlrun-smtp-config",
+            "refresh_interval": "30",
+        }
+    },
 }
 _is_running_as_api = None
@@ -851,6 +871,22 @@ class Config:
         name = self.__class__.__name__
         return f"{name}({self._cfg!r})"
+    def __iter__(self):
+        if isinstance(self._cfg, Mapping):
+            return self._cfg.__iter__()
+    def items(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self._cfg.items())
+    def keys(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self.data.keys())
+    def values(self):
+        if isinstance(self._cfg, Mapping):
+            return iter(self.data.values())
     def update(self, cfg, skip_errors=False):
         for key, value in cfg.items():
             if hasattr(self, key):
@@ -1043,6 +1079,17 @@ class Config:
                 f"is not allowed for iguazio version: {igz_version} < 3.5.1"
             )
+    def validate_object_retentions(self):
+        for table_name, retention_days in self.object_retentions.items():
+            if retention_days < 7 and not os.getenv("PARTITION_INTERVAL"):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"{table_name} partition interval must be greater than a week"
+                )
+            elif retention_days > 53 * 7:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    f"{table_name} partition interval must be less than a year"
+                )
     def resolve_chief_api_url(self) -> str:
         if self.httpdb.clusterization.chief.url:
             return self.httpdb.clusterization.chief.url
@@ -1201,9 +1248,9 @@ class Config:
     def get_model_monitoring_file_target_path(
         self,
-        project: str = "",
-        kind: str = "",
-        target: str = "online",
+        project: str,
+        kind: str,
+        target: typing.Literal["online", "offline"] = "online",
         artifact_path: typing.Optional[str] = None,
         function_name: typing.Optional[str] = None,
         **kwargs,
@@ -1381,9 +1428,12 @@ def _validate_config(config):
         pass
     config.verify_security_context_enrichment_mode_is_allowed()
+    config.validate_object_retentions()
-def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str = None):
+def _verify_gpu_requests_and_limits(
+    requests_gpu: typing.Optional[str] = None, limits_gpu: typing.Optional[str] = None
+):
     # https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
     if requests_gpu and not limits_gpu:
         raise mlrun.errors.MLRunConflictError(
@@ -1396,7 +1446,7 @@ def _verify_gpu_requests_and_limits(requests_gpu: str = None, limits_gpu: str =
         )
-def _convert_resources_to_str(config: dict = None):
+def _convert_resources_to_str(config: typing.Optional[dict] = None):
     resources_types = ["cpu", "memory", "gpu"]
     resource_requirements = ["requests", "limits"]
     if not config.get("default_function_pod_resources"):

mlrun/data_types/__init__.py CHANGED Viewed

@@ -11,8 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
-# flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 from .data_types import (
     InferOptions,

mlrun/data_types/data_types.py CHANGED Viewed

@@ -124,7 +124,6 @@ def spark_to_value_type(data_type):
         "double": ValueType.DOUBLE,
         "boolean": ValueType.BOOL,
         "timestamp": ValueType.DATETIME,
-        "timestamp_ntz": ValueType.DATETIME,
         "string": ValueType.STRING,
         "array": "list",
         "map": "dict",

mlrun/data_types/infer.py CHANGED Viewed

@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
+from typing import Optional
 import numpy as np
 import packaging.version
 import pandas as pd
@@ -29,7 +31,7 @@ def infer_schema_from_df(
     df: pd.DataFrame,
     features,
     entities,
-    timestamp_key: str = None,
+    timestamp_key: Optional[str] = None,
     entity_columns=None,
     options: InferOptions = InferOptions.Null,
 ):

mlrun/data_types/spark.py CHANGED Viewed

@@ -14,11 +14,12 @@
 #
 from datetime import datetime
 from os import environ
+from typing import Optional
 import numpy as np
 import pytz
 from pyspark.sql.functions import to_utc_timestamp
-from pyspark.sql.types import BooleanType, DoubleType
+from pyspark.sql.types import BooleanType, DoubleType, TimestampType
 from mlrun.feature_store.retrieval.spark_merger import spark_df_to_pandas
 from mlrun.utils import logger
@@ -35,7 +36,7 @@ def infer_schema_from_df_spark(
     df,
     features,
     entities,
-    timestamp_key: str = None,
+    timestamp_key: Optional[str] = None,
     entity_columns=None,
     options: InferOptions = InferOptions.Null,
 ):
@@ -143,8 +144,7 @@ def get_df_stats_spark(df, options, num_bins=20, sample_size=None):
     timestamp_columns = set()
     boolean_columns = set()
     for field in df_after_type_casts.schema.fields:
-        # covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
-        is_timestamp = field.dataType.typeName().startswith("timestamp")
+        is_timestamp = isinstance(field.dataType, TimestampType)
         is_boolean = isinstance(field.dataType, BooleanType)
         if is_timestamp:
             df_after_type_casts = df_after_type_casts.withColumn(

mlrun/data_types/to_pandas.py CHANGED Viewed

@@ -244,15 +244,6 @@ def _to_corrected_pandas_type(dt):
 def spark_df_to_pandas(spark_df):
-    import pyspark
-    if semver.parse(pyspark.__version__) >= semver.Version(3, 5, 0):
-        def to_pandas(spark_df_inner):
-            return spark_df_inner.toPandas()
-    else:
-        to_pandas = _to_pandas
     # as of pyspark 3.2.3, toPandas fails to convert timestamps unless we work around the issue
     # when we upgrade pyspark, we should check whether this workaround is still necessary
     # see https://stackoverflow.com/questions/76389694/transforming-pyspark-to-pandas-dataframe
@@ -271,9 +262,9 @@ def spark_df_to_pandas(spark_df):
                 )
                 type_conversion_dict[field.name] = "datetime64[ns]"
-        df = to_pandas(spark_df)
+        df = _to_pandas(spark_df)
         if type_conversion_dict:
             df = df.astype(type_conversion_dict)
         return df
     else:
-        return to_pandas(spark_df)
+        return _to_pandas(spark_df)

mlrun/datastore/__init__.py CHANGED Viewed

@@ -12,8 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-# flake8: noqa  - this is until we take care of the F401 violations with respect to __all__ & sphinx
 __all__ = [
     "DataItem",
     "get_store_resource",

mlrun 1.7.2rc3__py3-none-any.whl → 1.8.0rc2__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.2rc3py3-none-any.whl → 1.8.0rc2py3-none-any.whl