PyPI - mlrun - Versions diffs - 1.7.2rc4__py3-none-any.whl → 1.8.0__py3-none-any.whl - Mend

mlrun 1.7.2rc4py3-none-any.whl → 1.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (275) hide show

mlrun/__init__.py +26 -22
mlrun/__main__.py +15 -16
mlrun/alerts/alert.py +150 -15
mlrun/api/schemas/__init__.py +1 -9
mlrun/artifacts/__init__.py +2 -3
mlrun/artifacts/base.py +62 -19
mlrun/artifacts/dataset.py +17 -17
mlrun/artifacts/document.py +454 -0
mlrun/artifacts/manager.py +28 -18
mlrun/artifacts/model.py +91 -59
mlrun/artifacts/plots.py +2 -2
mlrun/common/constants.py +8 -0
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -1
mlrun/common/formatters/feature_set.py +2 -0
mlrun/common/formatters/function.py +1 -0
mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
mlrun/common/formatters/pipeline.py +1 -2
mlrun/common/formatters/project.py +9 -0
mlrun/common/model_monitoring/__init__.py +0 -5
mlrun/common/model_monitoring/helpers.py +12 -62
mlrun/common/runtimes/constants.py +25 -4
mlrun/common/schemas/__init__.py +9 -5
mlrun/common/schemas/alert.py +114 -19
mlrun/common/schemas/api_gateway.py +3 -3
mlrun/common/schemas/artifact.py +22 -9
mlrun/common/schemas/auth.py +8 -4
mlrun/common/schemas/background_task.py +7 -7
mlrun/common/schemas/client_spec.py +4 -4
mlrun/common/schemas/clusterization_spec.py +2 -2
mlrun/common/schemas/common.py +53 -3
mlrun/common/schemas/constants.py +15 -0
mlrun/common/schemas/datastore_profile.py +1 -1
mlrun/common/schemas/feature_store.py +9 -9
mlrun/common/schemas/frontend_spec.py +4 -4
mlrun/common/schemas/function.py +10 -10
mlrun/common/schemas/hub.py +1 -1
mlrun/common/schemas/k8s.py +3 -3
mlrun/common/schemas/memory_reports.py +3 -3
mlrun/common/schemas/model_monitoring/__init__.py +4 -8
mlrun/common/schemas/model_monitoring/constants.py +127 -46
mlrun/common/schemas/model_monitoring/grafana.py +18 -12
mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
mlrun/common/schemas/notification.py +24 -3
mlrun/common/schemas/object.py +1 -1
mlrun/common/schemas/pagination.py +4 -4
mlrun/common/schemas/partition.py +142 -0
mlrun/common/schemas/pipeline.py +3 -3
mlrun/common/schemas/project.py +26 -18
mlrun/common/schemas/runs.py +3 -3
mlrun/common/schemas/runtime_resource.py +5 -5
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/secret.py +1 -1
mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
mlrun/common/schemas/tag.py +3 -3
mlrun/common/schemas/workflow.py +6 -5
mlrun/common/types.py +1 -0
mlrun/config.py +157 -89
mlrun/data_types/__init__.py +5 -3
mlrun/data_types/infer.py +13 -3
mlrun/data_types/spark.py +2 -1
mlrun/datastore/__init__.py +59 -18
mlrun/datastore/alibaba_oss.py +4 -1
mlrun/datastore/azure_blob.py +4 -1
mlrun/datastore/base.py +19 -24
mlrun/datastore/datastore.py +10 -4
mlrun/datastore/datastore_profile.py +178 -45
mlrun/datastore/dbfs_store.py +4 -1
mlrun/datastore/filestore.py +4 -1
mlrun/datastore/google_cloud_storage.py +4 -1
mlrun/datastore/hdfs.py +4 -1
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +4 -1
mlrun/datastore/s3.py +14 -3
mlrun/datastore/sources.py +89 -92
mlrun/datastore/store_resources.py +7 -4
mlrun/datastore/storeytargets.py +51 -16
mlrun/datastore/targets.py +38 -31
mlrun/datastore/utils.py +87 -4
mlrun/datastore/v3io.py +4 -1
mlrun/datastore/vectorstore.py +291 -0
mlrun/datastore/wasbfs/fs.py +13 -12
mlrun/db/base.py +286 -100
mlrun/db/httpdb.py +1562 -490
mlrun/db/nopdb.py +250 -83
mlrun/errors.py +6 -2
mlrun/execution.py +194 -50
mlrun/feature_store/__init__.py +2 -10
mlrun/feature_store/api.py +20 -458
mlrun/feature_store/common.py +9 -9
mlrun/feature_store/feature_set.py +20 -18
mlrun/feature_store/feature_vector.py +105 -479
mlrun/feature_store/feature_vector_utils.py +466 -0
mlrun/feature_store/retrieval/base.py +15 -11
mlrun/feature_store/retrieval/job.py +2 -1
mlrun/feature_store/retrieval/storey_merger.py +1 -1
mlrun/feature_store/steps.py +3 -3
mlrun/features.py +30 -13
mlrun/frameworks/__init__.py +1 -2
mlrun/frameworks/_common/__init__.py +1 -2
mlrun/frameworks/_common/artifacts_library.py +2 -2
mlrun/frameworks/_common/mlrun_interface.py +10 -6
mlrun/frameworks/_common/model_handler.py +31 -31
mlrun/frameworks/_common/producer.py +3 -1
mlrun/frameworks/_dl_common/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
mlrun/frameworks/_ml_common/__init__.py +1 -2
mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
mlrun/frameworks/_ml_common/model_handler.py +21 -21
mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/auto_mlrun/__init__.py +1 -2
mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
mlrun/frameworks/huggingface/__init__.py +1 -2
mlrun/frameworks/huggingface/model_server.py +9 -9
mlrun/frameworks/lgbm/__init__.py +47 -44
mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
mlrun/frameworks/lgbm/model_handler.py +15 -11
mlrun/frameworks/lgbm/model_server.py +11 -7
mlrun/frameworks/lgbm/utils.py +2 -2
mlrun/frameworks/onnx/__init__.py +1 -2
mlrun/frameworks/onnx/dataset.py +3 -3
mlrun/frameworks/onnx/mlrun_interface.py +2 -2
mlrun/frameworks/onnx/model_handler.py +7 -5
mlrun/frameworks/onnx/model_server.py +8 -6
mlrun/frameworks/parallel_coordinates.py +11 -11
mlrun/frameworks/pytorch/__init__.py +22 -23
mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
mlrun/frameworks/pytorch/model_handler.py +21 -17
mlrun/frameworks/pytorch/model_server.py +13 -9
mlrun/frameworks/sklearn/__init__.py +19 -18
mlrun/frameworks/sklearn/estimator.py +2 -2
mlrun/frameworks/sklearn/metric.py +3 -3
mlrun/frameworks/sklearn/metrics_library.py +8 -6
mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
mlrun/frameworks/sklearn/model_handler.py +4 -3
mlrun/frameworks/tf_keras/__init__.py +11 -12
mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
mlrun/frameworks/tf_keras/model_handler.py +17 -13
mlrun/frameworks/tf_keras/model_server.py +12 -8
mlrun/frameworks/xgboost/__init__.py +19 -18
mlrun/frameworks/xgboost/model_handler.py +13 -9
mlrun/k8s_utils.py +2 -5
mlrun/launcher/base.py +3 -4
mlrun/launcher/client.py +2 -2
mlrun/launcher/local.py +6 -2
mlrun/launcher/remote.py +1 -1
mlrun/lists.py +8 -4
mlrun/model.py +132 -46
mlrun/model_monitoring/__init__.py +3 -5
mlrun/model_monitoring/api.py +113 -98
mlrun/model_monitoring/applications/__init__.py +0 -5
mlrun/model_monitoring/applications/_application_steps.py +81 -50
mlrun/model_monitoring/applications/base.py +467 -14
mlrun/model_monitoring/applications/context.py +212 -134
mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
mlrun/model_monitoring/applications/evidently/base.py +146 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
mlrun/model_monitoring/applications/results.py +67 -15
mlrun/model_monitoring/controller.py +701 -315
mlrun/model_monitoring/db/__init__.py +0 -2
mlrun/model_monitoring/db/_schedules.py +242 -0
mlrun/model_monitoring/db/_stats.py +189 -0
mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
mlrun/model_monitoring/db/tsdb/base.py +243 -49
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
mlrun/model_monitoring/helpers.py +356 -114
mlrun/model_monitoring/stream_processing.py +190 -345
mlrun/model_monitoring/tracking_policy.py +11 -4
mlrun/model_monitoring/writer.py +49 -90
mlrun/package/__init__.py +3 -6
mlrun/package/context_handler.py +2 -2
mlrun/package/packager.py +12 -9
mlrun/package/packagers/__init__.py +0 -2
mlrun/package/packagers/default_packager.py +14 -11
mlrun/package/packagers/numpy_packagers.py +16 -7
mlrun/package/packagers/pandas_packagers.py +18 -18
mlrun/package/packagers/python_standard_library_packagers.py +25 -11
mlrun/package/packagers_manager.py +35 -32
mlrun/package/utils/__init__.py +0 -3
mlrun/package/utils/_pickler.py +6 -6
mlrun/platforms/__init__.py +47 -16
mlrun/platforms/iguazio.py +4 -1
mlrun/projects/operations.py +30 -30
mlrun/projects/pipelines.py +116 -47
mlrun/projects/project.py +1292 -329
mlrun/render.py +5 -9
mlrun/run.py +57 -14
mlrun/runtimes/__init__.py +1 -3
mlrun/runtimes/base.py +30 -22
mlrun/runtimes/daskjob.py +9 -9
mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
mlrun/runtimes/function_reference.py +5 -2
mlrun/runtimes/generators.py +3 -2
mlrun/runtimes/kubejob.py +6 -7
mlrun/runtimes/mounts.py +574 -0
mlrun/runtimes/mpijob/__init__.py +0 -2
mlrun/runtimes/mpijob/abstract.py +7 -6
mlrun/runtimes/nuclio/api_gateway.py +7 -7
mlrun/runtimes/nuclio/application/application.py +11 -13
mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
mlrun/runtimes/nuclio/function.py +127 -70
mlrun/runtimes/nuclio/serving.py +105 -37
mlrun/runtimes/pod.py +159 -54
mlrun/runtimes/remotesparkjob.py +3 -2
mlrun/runtimes/sparkjob/__init__.py +0 -2
mlrun/runtimes/sparkjob/spark3job.py +22 -12
mlrun/runtimes/utils.py +7 -6
mlrun/secrets.py +2 -2
mlrun/serving/__init__.py +8 -0
mlrun/serving/merger.py +7 -5
mlrun/serving/remote.py +35 -22
mlrun/serving/routers.py +186 -240
mlrun/serving/server.py +41 -10
mlrun/serving/states.py +432 -118
mlrun/serving/utils.py +13 -2
mlrun/serving/v1_serving.py +3 -2
mlrun/serving/v2_serving.py +161 -203
mlrun/track/__init__.py +1 -1
mlrun/track/tracker.py +2 -2
mlrun/track/trackers/mlflow_tracker.py +6 -5
mlrun/utils/async_http.py +35 -22
mlrun/utils/clones.py +7 -4
mlrun/utils/helpers.py +511 -58
mlrun/utils/logger.py +119 -13
mlrun/utils/notifications/notification/__init__.py +22 -19
mlrun/utils/notifications/notification/base.py +39 -15
mlrun/utils/notifications/notification/console.py +6 -6
mlrun/utils/notifications/notification/git.py +11 -11
mlrun/utils/notifications/notification/ipython.py +10 -9
mlrun/utils/notifications/notification/mail.py +176 -0
mlrun/utils/notifications/notification/slack.py +16 -8
mlrun/utils/notifications/notification/webhook.py +24 -8
mlrun/utils/notifications/notification_pusher.py +191 -200
mlrun/utils/regex.py +12 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/METADATA +69 -54
mlrun-1.8.0.dist-info/RECORD +351 -0
{mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
mlrun/model_monitoring/applications/evidently_base.py +0 -137
mlrun/model_monitoring/db/stores/__init__.py +0 -136
mlrun/model_monitoring/db/stores/base/store.py +0 -213
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
mlrun/model_monitoring/model_endpoint.py +0 -118
mlrun-1.7.2rc4.dist-info/RECORD +0 -351
{mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
{mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0

mlrun/utils/helpers.py CHANGED Viewed

@@ -13,8 +13,10 @@
 # limitations under the License.
 import asyncio
+import base64
 import enum
 import functools
+import gzip
 import hashlib
 import inspect
 import itertools
@@ -23,35 +25,43 @@ import os
 import re
 import string
 import sys
+import traceback
 import typing
 import uuid
 import warnings
-from datetime import datetime, timezone
+from datetime import datetime, timedelta, timezone
 from importlib import import_module, reload
 from os import path
 from types import ModuleType
 from typing import Any, Optional
+from urllib.parse import urlparse
 import git
 import inflection
 import numpy as np
 import packaging.version
 import pandas
+import pytz
 import semver
 import yaml
 from dateutil import parser
-from mlrun_pipelines.models import PipelineRun
 from pandas import Timedelta, Timestamp
 from yaml.representer import RepresenterError
 import mlrun
+import mlrun.common.constants as mlrun_constants
 import mlrun.common.helpers
+import mlrun.common.runtimes.constants as runtimes_constants
 import mlrun.common.schemas
 import mlrun.errors
 import mlrun.utils.regex
 import mlrun.utils.version.version
+import mlrun_pipelines.common.constants
+import mlrun_pipelines.models
+import mlrun_pipelines.utils
 from mlrun.common.constants import MYSQL_MEDIUMBLOB_SIZE_BYTES
 from mlrun.config import config
+from mlrun_pipelines.models import PipelineRun
 from .logger import create_logger
 from .retryer import (  # noqa: F401
@@ -85,14 +95,19 @@ class StorePrefix:
     Artifact = "artifacts"
     Model = "models"
     Dataset = "datasets"
+    Document = "documents"
     @classmethod
     def is_artifact(cls, prefix):
-        return prefix in [cls.Artifact, cls.Model, cls.Dataset]
+        return prefix in [cls.Artifact, cls.Model, cls.Dataset, cls.Document]
     @classmethod
     def kind_to_prefix(cls, kind):
-        kind_map = {"model": cls.Model, "dataset": cls.Dataset}
+        kind_map = {
+            "model": cls.Model,
+            "dataset": cls.Dataset,
+            "document": cls.Document,
+        }
         return kind_map.get(kind, cls.Artifact)
     @classmethod
@@ -103,6 +118,7 @@ class StorePrefix:
             cls.Dataset,
             cls.FeatureSet,
             cls.FeatureVector,
+            cls.Document,
         ]
@@ -111,21 +127,27 @@ def get_artifact_target(item: dict, project=None):
     project_str = project or item["metadata"].get("project")
     tree = item["metadata"].get("tree")
     tag = item["metadata"].get("tag")
+    iter = item["metadata"].get("iter")
     kind = item.get("kind")
+    uid = item["metadata"].get("uid")
     if kind in {"dataset", "model", "artifact"} and db_key:
         target = (
             f"{DB_SCHEMA}://{StorePrefix.kind_to_prefix(kind)}/{project_str}/{db_key}"
         )
+        if iter:
+            target = f"{target}#{iter}"
         target += f":{tag}" if tag else ":latest"
         if tree:
             target += f"@{tree}"
+        if uid:
+            target += f"^{uid}"
         return target
     return item["spec"].get("target_path")
-# TODO: left for migrations testing purposes. Remove in 1.8.0.
+# TODO: Remove once data migration v5 is obsolete
 def is_legacy_artifact(artifact):
     if isinstance(artifact, dict):
         return "metadata" not in artifact
@@ -167,6 +189,7 @@ class RunKeys:
     inputs = "inputs"
     returns = "returns"
     artifacts = "artifacts"
+    artifact_uris = "artifact_uris"
     outputs = "outputs"
     data_stores = "data_stores"
     secrets = "secret_sources"
@@ -220,7 +243,7 @@ def verify_field_regex(
 def validate_builder_source(
-    source: str, pull_at_runtime: bool = False, workdir: str = None
+    source: str, pull_at_runtime: bool = False, workdir: Optional[str] = None
 ):
     if pull_at_runtime or not source:
         return
@@ -268,12 +291,14 @@ def validate_tag_name(
 def validate_artifact_key_name(
     artifact_key: str, field_name: str, raise_on_failure: bool = True
 ) -> bool:
+    field_type = "key" if field_name == "artifact.key" else "db_key"
     return mlrun.utils.helpers.verify_field_regex(
         field_name,
         artifact_key,
         mlrun.utils.regex.artifact_key,
         raise_on_failure=raise_on_failure,
-        log_message="Slashes are not permitted in the artifact key (both \\ and /)",
+        log_message=f"Artifact {field_type} must start and end with an alphanumeric character, and may only contain "
+        "letters, numbers, hyphens, underscores, and dots.",
     )
@@ -354,8 +379,8 @@ def verify_field_list_of_type(
 def verify_dict_items_type(
     name: str,
     dictionary: dict,
-    expected_keys_types: list = None,
-    expected_values_types: list = None,
+    expected_keys_types: Optional[list] = None,
+    expected_values_types: Optional[list] = None,
 ):
     if dictionary:
         if not isinstance(dictionary, dict):
@@ -372,7 +397,7 @@ def verify_dict_items_type(
             ) from exc
-def verify_list_items_type(list_, expected_types: list = None):
+def verify_list_items_type(list_, expected_types: Optional[list] = None):
     if list_ and expected_types:
         list_items_types = set(map(type, list_))
         expected_types = set(expected_types)
@@ -396,6 +421,32 @@ def now_date(tz: timezone = timezone.utc) -> datetime:
     return datetime.now(tz=tz)
+def datetime_to_mysql_ts(datetime_object: datetime) -> datetime:
+    """
+    Convert a Python datetime object to a MySQL-compatible timestamp string,
+    rounded to the nearest millisecond.
+    Example: 2024-12-18T16:36:05.235687+00:00 -> 2024-12-18T16:36:05.236000
+    :param datetime_object: A Python datetime object.
+    :return: A MySQL-compatible timestamp string with millisecond precision.
+    """
+    if not datetime_object.tzinfo:
+        datetime_object = datetime_object.replace(tzinfo=timezone.utc)
+    # Round to the nearest millisecond
+    ms = round(datetime_object.microsecond / 1000) * 1000
+    if ms == 1000000:
+        datetime_object += timedelta(seconds=1)
+        ms = 0
+    return datetime_object.replace(microsecond=ms)
+def datetime_min(tz: timezone = timezone.utc) -> datetime:
+    return datetime(1970, 1, 1, tzinfo=tz)
 datetime_now = now_date
@@ -448,7 +499,6 @@ def get_in(obj, keys, default=None):
     """
     if isinstance(keys, str):
         keys = keys.split(".")
     for key in keys:
         if not obj or key not in obj:
             return default
@@ -663,8 +713,8 @@ def dict_to_json(struct):
 def parse_artifact_uri(uri, default_project=""):
     """
-    Parse artifact URI into project, key, tag, iter, tree
-    URI format: [<project>/]<key>[#<iter>][:<tag>][@<tree>]
+    Parse artifact URI into project, key, tag, iter, tree, uid
+    URI format: [<project>/]<key>[#<iter>][:<tag>][@<tree>][^<uid>]
     :param uri:            uri to parse
     :param default_project: default project name if not in URI
@@ -674,6 +724,7 @@ def parse_artifact_uri(uri, default_project=""):
         [2] = iteration
         [3] = tag
         [4] = tree
+        [5] = uid
     """
     uri_pattern = mlrun.utils.regex.artifact_uri_pattern
     match = re.match(uri_pattern, uri)
@@ -698,6 +749,7 @@ def parse_artifact_uri(uri, default_project=""):
         iteration,
         group_dict["tag"],
         group_dict["tree"],
+        group_dict["uid"],
     )
@@ -712,7 +764,9 @@ def generate_object_uri(project, name, tag=None, hash_key=None):
     return uri
-def generate_artifact_uri(project, key, tag=None, iter=None, tree=None):
+def generate_artifact_uri(
+    project, key, tag=None, iter=None, tree=None, uid=None
+) -> str:
     artifact_uri = f"{project}/{key}"
     if iter is not None:
         artifact_uri = f"{artifact_uri}#{iter}"
@@ -720,6 +774,8 @@ def generate_artifact_uri(project, key, tag=None, iter=None, tree=None):
         artifact_uri = f"{artifact_uri}:{tag}"
     if tree is not None:
         artifact_uri = f"{artifact_uri}@{tree}"
+    if uid is not None:
+        artifact_uri = f"{artifact_uri}^{uid}"
     return artifact_uri
@@ -816,7 +872,9 @@ def _convert_python_package_version_to_image_tag(version: typing.Optional[str]):
 def enrich_image_url(
-    image_url: str, client_version: str = None, client_python_version: str = None
+    image_url: str,
+    client_version: Optional[str] = None,
+    client_python_version: Optional[str] = None,
 ) -> str:
     client_version = _convert_python_package_version_to_image_tag(client_version)
     server_version = _convert_python_package_version_to_image_tag(
@@ -856,7 +914,7 @@ def enrich_image_url(
 def resolve_image_tag_suffix(
-    mlrun_version: str = None, python_version: str = None
+    mlrun_version: Optional[str] = None, python_version: Optional[str] = None
 ) -> str:
     """
     resolves what suffix should be appended to the image tag
@@ -989,49 +1047,165 @@ async def retry_until_successful_async(
     ).run()
-def get_ui_url(project, uid=None):
-    url = ""
+def get_project_url(project: str) -> str:
+    """
+    Generate the base URL for a given project.
+    :param project: The project name.
+    :return: The base URL for the project, or an empty string if the base URL is not resolved.
+    """
     if mlrun.mlconf.resolve_ui_url():
-        url = f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}/jobs"
-        if uid:
-            url += f"/monitor/{uid}/overview"
+        return f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}"
+    return ""
+def get_run_url(project: str, uid: str, name: str) -> str:
+    """
+    Generate the URL for a specific run.
+    :param project: The project name.
+    :param uid: The run UID.
+    :param name: The run name.
+    :return: The URL for the run, or an empty string if the base URL is not resolved.
+    """
+    runs_url = get_runs_url(project)
+    if not runs_url:
+        return ""
+    return f"{runs_url}/monitor-jobs/{name}/{uid}/overview"
+def get_runs_url(project: str) -> str:
+    """
+    Generate the URL for the runs of a given project.
+    :param project: The project name.
+    :return: The URL for the runs, or an empty string if the base URL is not resolved.
+    """
+    base_url = get_project_url(project)
+    if not base_url:
+        return ""
+    return f"{base_url}/jobs"
+def get_model_endpoint_url(
+    project: str,
+    model_name: Optional[str] = None,
+    model_endpoint_id: Optional[str] = None,
+) -> str:
+    """
+    Generate the URL for a specific model endpoint.
+    :param project: The project name.
+    :param model_name: The model name.
+    :param model_endpoint_id: The model endpoint ID.
+    :return: The URL for the model endpoint, or an empty string if the base URL is not resolved.
+    """
+    base_url = get_project_url(project)
+    if not base_url:
+        return ""
+    url = f"{base_url}/models"
+    if model_name and model_endpoint_id:
+        url += f"/model-endpoints/{model_name}/{model_endpoint_id}/overview"
     return url
-def get_model_endpoint_url(project, model_name, model_endpoint_id):
-    url = ""
-    if mlrun.mlconf.resolve_ui_url():
-        url = f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}/models"
-        if model_name:
-            url += f"/model-endpoints/{model_name}/{model_endpoint_id}/overview"
+def get_workflow_url(
+    project: str,
+    id: Optional[str] = None,
+) -> str:
+    """
+    Generate the URL for a specific workflow.
+    :param project: The project name.
+    :param id: The workflow ID.
+    :return: The URL for the workflow, or an empty string if the base URL is not resolved.
+    """
+    base_url = get_project_url(project)
+    if not base_url:
+        return ""
+    url = f"{base_url}/jobs/monitor-workflows/workflow"
+    if id:
+        url += f"/{id}"
     return url
-def get_workflow_url(project, id=None):
-    url = ""
-    if mlrun.mlconf.resolve_ui_url():
-        url = (
-            f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}"
-            f"/{project}/jobs/monitor-workflows/workflow/{id}"
+def get_kfp_list_runs_filter(
+    project_name: Optional[str] = None,
+    end_date: Optional[str] = None,
+    start_date: Optional[str] = None,
+) -> str:
+    """
+    Generates a filter for listing Kubeflow Pipelines (KFP) runs.
+    :param project_name: The name of the project. If "*", it won't filter by project.
+    :param end_date: The latest creation date for filtering runs (ISO 8601 format).
+    :param start_date: The earliest creation date for filtering runs (ISO 8601 format).
+    :return: A JSON-formatted filter string for KFP.
+    """
+    # KFP filter operation codes
+    kfp_less_than_or_equal_op = 7  # '<='
+    kfp_greater_than_or_equal_op = 5  # '>='
+    kfp_substring_op = 9  # Substring match
+    filters = {"predicates": []}
+    if end_date:
+        filters["predicates"].append(
+            {
+                "key": "created_at",
+                "op": kfp_less_than_or_equal_op,
+                "timestamp_value": end_date,
+            }
         )
-    return url
+    if project_name and project_name != "*":
+        filters["predicates"].append(
+            {
+                "key": "name",
+                "op": kfp_substring_op,
+                "string_value": project_name,
+            }
+        )
+    if start_date:
+        filters["predicates"].append(
+            {
+                "key": "created_at",
+                "op": kfp_greater_than_or_equal_op,
+                "timestamp_value": start_date,
+            }
+        )
+    return json.dumps(filters)
-def get_kfp_project_filter(project_name: str) -> str:
+def validate_and_convert_date(date_input: str) -> str:
     """
-    Generates a filter string for KFP runs, using a substring predicate
-    on the run's 'name' field. This is used as a heuristic to retrieve runs that are associated
-    with a specific project. The 'op: 9' operator indicates that the filter checks if the
-    project name appears as a substring in the run's name, ensuring that we can identify
-    runs belonging to the desired project.
+    Converts any recognizable date string into a standardized RFC 3339 format.
+    :param date_input: A date string in a recognizable format.
     """
-    is_substring_op = 9
-    project_name_filter = {
-        "predicates": [
-            {"key": "name", "op": is_substring_op, "string_value": project_name}
-        ]
-    }
-    return json.dumps(project_name_filter)
+    try:
+        dt_object = parser.parse(date_input)
+        if dt_object.tzinfo is not None:
+            # Convert to UTC if it's in a different timezone
+            dt_object = dt_object.astimezone(pytz.utc)
+        else:
+            # If no timezone info is present, assume it's in local time
+            local_tz = pytz.timezone("UTC")
+            dt_object = local_tz.localize(dt_object)
+        # Convert the datetime object to an RFC 3339-compliant string.
+        # RFC 3339 requires timestamps to be in ISO 8601 format with a 'Z' suffix for UTC time.
+        # The isoformat() method adds a "+00:00" suffix for UTC by default,
+        # so we replace it with "Z" to ensure compliance.
+        formatted_date = dt_object.isoformat().replace("+00:00", "Z")
+        formatted_date = formatted_date.rstrip("Z") + "Z"
+        return formatted_date
+    except (ValueError, OverflowError) as e:
+        raise ValueError(
+            f"Invalid date format: {date_input}."
+            f" Date format must adhere to the RFC 3339 standard (e.g., 'YYYY-MM-DDTHH:MM:SSZ' for UTC)."
+        ) from e
 def are_strings_in_exception_chain_messages(
@@ -1175,7 +1349,7 @@ def get_function(function, namespaces, reload_modules: bool = False):
 def get_handler_extended(
     handler_path: str,
     context=None,
-    class_args: dict = None,
+    class_args: Optional[dict] = None,
     namespaces=None,
     reload_modules: bool = False,
 ):
@@ -1217,7 +1391,11 @@ def get_handler_extended(
 def datetime_from_iso(time_str: str) -> Optional[datetime]:
     if not time_str:
         return
-    return parser.isoparse(time_str)
+    dt = parser.isoparse(time_str)
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    # ensure the datetime is in UTC, converting if necessary
+    return dt.astimezone(timezone.utc)
 def datetime_to_iso(time_obj: Optional[datetime]) -> Optional[str]:
@@ -1256,6 +1434,21 @@ def has_timezone(timestamp):
         return False
+def format_datetime(dt: datetime, fmt: Optional[str] = None) -> str:
+    if dt is None:
+        return ""
+    # If the datetime is naive
+    if dt.tzinfo is None:
+        dt = dt.replace(tzinfo=timezone.utc)
+    # TODO: Once Python 3.12 is the minimal version, use %:z to format the timezone offset with a colon
+    formatted_time = dt.strftime(fmt or "%Y-%m-%d %H:%M:%S.%f%z")
+    # For versions earlier than Python 3.12, we manually insert the colon in the timezone offset
+    return formatted_time[:-2] + ":" + formatted_time[-2:]
 def as_list(element: Any) -> list[Any]:
     return element if isinstance(element, list) else [element]
@@ -1309,6 +1502,17 @@ def to_non_empty_values_dict(input_dict: dict) -> dict:
     return {key: value for key, value in input_dict.items() if value}
+def get_enriched_gpu_limits(function_limits: dict) -> dict[str, int]:
+    """
+    Creates new limits containing the GPU-related limits from the function's limits,
+    mapping each to zero. This is used for pods like Kaniko and Argo pods, which inherit
+    GPU-related selectors but do not require GPU resources. By setting these
+    limits to zero, the pods receive the necessary tolerations from the cloud provider for scheduling,
+    without actually consuming GPU resources.
+    """
+    return {resource: 0 for resource in function_limits if "/gpu" in resource.lower()}
 def str_to_timestamp(time_str: str, now_time: Timestamp = None):
     """convert fixed/relative time string to Pandas Timestamp
@@ -1347,6 +1551,16 @@ def str_to_timestamp(time_str: str, now_time: Timestamp = None):
     return Timestamp(time_str)
+def str_to_bool(value: str) -> bool:
+    """Convert a string to a boolean value."""
+    value = value.lower()
+    if value in ("true", "1", "t", "y", "yes", "on"):
+        return True
+    if value in ("false", "0", "f", "n", "no", "off"):
+        return False
+    raise ValueError(f"invalid boolean value: {value}")
 def is_link_artifact(artifact):
     if isinstance(artifact, dict):
         return (
@@ -1625,7 +1839,9 @@ setting partitioned=False"""
 def is_ecr_url(registry: str) -> bool:
     # example URL: <aws_account_id>.dkr.ecr.<region>.amazonaws.com
-    return ".ecr." in registry and ".amazonaws.com" in registry
+    parsed_url = urlparse(f"https://{registry}")
+    hostname = parsed_url.hostname
+    return hostname and ".ecr." in hostname and hostname.endswith(".amazonaws.com")
 def get_local_file_schema() -> list:
@@ -1660,7 +1876,14 @@ def get_serving_spec():
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Failed to find serving spec in env var or config file"
             )
-    spec = json.loads(data)
+    # Attempt to decode and decompress, or use as-is for backward compatibility
+    try:
+        decoded_data = base64.b64decode(data)
+        decompressed_data = gzip.decompress(decoded_data)
+        spec = json.loads(decompressed_data.decode("utf-8"))
+    except (OSError, gzip.BadGzipFile, base64.binascii.Error, json.JSONDecodeError):
+        spec = json.loads(data)
     return spec
@@ -1694,17 +1917,22 @@ def merge_dicts_with_precedence(*dicts: dict) -> dict:
 def validate_component_version_compatibility(
-    component_name: typing.Literal["iguazio", "nuclio"], *min_versions: str
+    component_name: typing.Literal["iguazio", "nuclio", "mlrun-client"],
+    *min_versions: str,
+    mlrun_client_version: Optional[str] = None,
 ):
     """
     :param component_name: Name of the component to validate compatibility for.
     :param min_versions: Valid minimum version(s) required, assuming no 2 versions has equal major and minor.
+    :param mlrun_client_version: Client version to validate when component_name is "mlrun-client".
     """
     parsed_min_versions = [
         semver.VersionInfo.parse(min_version) for min_version in min_versions
     ]
     parsed_current_version = None
     component_current_version = None
+    # For mlrun client we don't assume compatability if we fail to parse the client version
+    assume_compatible = component_name not in ["mlrun-client"]
     try:
         if component_name == "iguazio":
             component_current_version = mlrun.mlconf.igz_version
@@ -1721,18 +1949,29 @@ def validate_component_version_compatibility(
             parsed_current_version = semver.VersionInfo.parse(
                 mlrun.mlconf.nuclio_version
             )
+        if component_name == "mlrun-client":
+            # dev version, assume compatible
+            if mlrun_client_version and (
+                mlrun_client_version.startswith("0.0.0+")
+                or "unstable" in mlrun_client_version
+            ):
+                return True
+            component_current_version = mlrun_client_version
+            parsed_current_version = semver.Version.parse(mlrun_client_version)
         if not parsed_current_version:
-            return True
+            return assume_compatible
     except ValueError:
         # only log when version is set but invalid
         if component_current_version:
             logger.warning(
-                "Unable to parse current version, assuming compatibility",
+                "Unable to parse current version",
                 component_name=component_name,
                 current_version=component_current_version,
                 min_versions=min_versions,
+                assume_compatible=assume_compatible,
             )
-        return True
+        return assume_compatible
     # Feature might have been back-ported e.g. nuclio node selection is supported from
     # 1.5.20 and 1.6.10 but not in 1.6.9 - therefore we reverse sort to validate against 1.6.x 1st and
@@ -1797,9 +2036,8 @@ def _reload(module, max_recursion_depth):
 def run_with_retry(
     retry_count: int,
     func: typing.Callable,
-    retry_on_exceptions: typing.Union[
-        type[Exception],
-        tuple[type[Exception]],
+    retry_on_exceptions: Optional[
+        typing.Union[type[Exception], tuple[type[Exception]]]
     ] = None,
     *args,
     **kwargs,
@@ -1832,3 +2070,218 @@ def run_with_retry(
             if attempt == retry_count:
                 raise
     raise last_exception
+def join_urls(base_url: Optional[str], path: Optional[str]) -> str:
+    """
+    Joins a base URL with a path, ensuring proper handling of slashes.
+    :param base_url: The base URL (e.g., "http://example.com").
+    :param path: The path to append to the base URL (e.g., "/path/to/resource").
+    :return: A unified URL with exactly one slash between base_url and path.
+    """
+    if base_url is None:
+        base_url = ""
+    return f"{base_url.rstrip('/')}/{path.lstrip('/')}" if path else base_url
+class Workflow:
+    @staticmethod
+    def get_workflow_steps(
+        db: "mlrun.db.RunDBInterface", workflow_id: str, project: str
+    ) -> list:
+        steps = []
+        def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
+            # on kfp 1.8 argo sets the pod hostname differently than what we have with kfp 2.5
+            # therefore, the heuristic needs to change. what we do here is first trying against 1.8 conventions
+            # and if we can't find it then falling back to 2.5
+            try:
+                # runner_pod = x-y-N
+                _runs = db.list_runs(
+                    project=project,
+                    labels=f"{mlrun_constants.MLRunInternalLabels.runner_pod}={_step.node_name}",
+                )
+                if not _runs:
+                    try:
+                        # x-y-N -> x-y, N
+                        node_name_initials, node_name_generated_id = (
+                            _step.node_name.rsplit("-", 1)
+                        )
+                    except ValueError:
+                        # defensive programming, if the node name is not in the expected format
+                        node_name_initials = _step.node_name
+                        node_name_generated_id = ""
+                    # compile the expected runner pod hostname as per kfp >= 2.4
+                    # x-y, Z, N -> runner_pod = x-y-Z-N
+                    runner_pod_value = "-".join(
+                        [
+                            node_name_initials,
+                            _step.display_name,
+                            node_name_generated_id,
+                        ]
+                    ).rstrip("-")
+                    logger.debug(
+                        "No run found for step, trying with different node name",
+                        step_node_name=runner_pod_value,
+                    )
+                    _runs = db.list_runs(
+                        project=project,
+                        labels=f"{mlrun_constants.MLRunInternalLabels.runner_pod}={runner_pod_value}",
+                    )
+                _run = _runs[0]
+            except IndexError:
+                logger.warning("No run found for step", step=_step.to_dict())
+                _run = {
+                    "metadata": {
+                        "name": _step.display_name,
+                        "project": project,
+                    },
+                    "status": {},
+                }
+            _run["step_kind"] = _step.step_type
+            if _step.skipped:
+                _run.setdefault("status", {})["state"] = (
+                    runtimes_constants.RunStates.skipped
+                )
+            steps.append(_run)
+        def _add_deploy_function_step(_step: mlrun_pipelines.models.PipelineStep):
+            project, name, hash_key = Workflow._extract_function_uri(
+                _step.get_annotation("mlrun/function-uri")
+            )
+            if name:
+                try:
+                    function = db.get_function(
+                        project=project, name=name, hash_key=hash_key
+                    )
+                except mlrun.errors.MLRunNotFoundError:
+                    # If the function is not found (if build failed for example), we will create a dummy
+                    # function object for the notification to display the function name
+                    function = {
+                        "metadata": {
+                            "name": name,
+                            "project": project,
+                            "hash_key": hash_key,
+                        },
+                    }
+                pod_phase = _step.phase
+                if _step.skipped:
+                    state = mlrun.common.schemas.FunctionState.skipped
+                else:
+                    state = runtimes_constants.PodPhases.pod_phase_to_run_state(
+                        pod_phase
+                    )
+                function["status"] = {"state": state}
+                if isinstance(function["metadata"].get("updated"), datetime):
+                    function["metadata"]["updated"] = function["metadata"][
+                        "updated"
+                    ].isoformat()
+                function["step_kind"] = _step.step_type
+                steps.append(function)
+        step_methods = {
+            mlrun_pipelines.common.constants.PipelineRunType.run: _add_run_step,
+            mlrun_pipelines.common.constants.PipelineRunType.build: _add_deploy_function_step,
+            mlrun_pipelines.common.constants.PipelineRunType.deploy: _add_deploy_function_step,
+        }
+        if not workflow_id:
+            return steps
+        try:
+            workflow_manifest = Workflow._get_workflow_manifest(workflow_id)
+        except Exception:
+            logger.warning(
+                "Failed to extract workflow steps from workflow manifest, "
+                "returning all runs with the workflow id label",
+                workflow_id=workflow_id,
+                traceback=traceback.format_exc(),
+            )
+            return db.list_runs(
+                project=project,
+                labels=f"workflow={workflow_id}",
+            )
+        if not workflow_manifest:
+            return steps
+        try:
+            for step in workflow_manifest.get_steps():
+                step_method = step_methods.get(step.step_type)
+                if step_method:
+                    step_method(step)
+            return steps
+        except Exception:
+            # If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
+            logger.warning(
+                "Failed to extract workflow steps from workflow manifest, "
+                "returning all runs with the workflow id label",
+                workflow_id=workflow_id,
+                traceback=traceback.format_exc(),
+            )
+            return db.list_runs(
+                project=project,
+                labels=f"workflow={workflow_id}",
+            )
+    @staticmethod
+    def _extract_function_uri(function_uri: str) -> tuple[str, str, str]:
+        """
+        Extract the project, name, and hash key from a function uri.
+        Examples:
+            - "project/name@hash_key" returns project, name, hash_key
+            - "project/name returns" project, name, ""
+        """
+        project, name, hash_key = None, None, None
+        hashed_pattern = r"^(.+)/(.+)@(.+)$"
+        pattern = r"^(.+)/(.+)$"
+        match = re.match(hashed_pattern, function_uri)
+        if match:
+            project, name, hash_key = match.groups()
+        else:
+            match = re.match(pattern, function_uri)
+            if match:
+                project, name = match.groups()
+                hash_key = ""
+        return project, name, hash_key
+    @staticmethod
+    def _get_workflow_manifest(
+        workflow_id: str,
+    ) -> typing.Optional[mlrun_pipelines.models.PipelineManifest]:
+        kfp_client = mlrun_pipelines.utils.get_client(mlrun.mlconf.kfp_url)
+        # arbitrary timeout of 30 seconds, the workflow should be done by now, however sometimes kfp takes a few
+        # seconds to update the workflow status
+        kfp_run = kfp_client.wait_for_run_completion(workflow_id, 30)
+        if not kfp_run:
+            return None
+        kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
+        return kfp_run.workflow_manifest()
+def as_dict(data: typing.Union[dict, str]) -> dict:
+    if isinstance(data, str):
+        return json.loads(data)
+    return data
+def encode_user_code(
+    user_code: typing.Union[str, bytes], max_len_warning: typing.Optional[int] = None
+) -> str:
+    max_len_warning = max_len_warning or config.function.spec.source_code_max_bytes
+    if isinstance(user_code, str):
+        user_code = user_code.encode("utf-8")
+    encoded = base64.b64encode(user_code).decode("utf-8")
+    if len(encoded) > max_len_warning:
+        logger.warning(
+            f"User code exceeds the maximum allowed size of {max_len_warning} bytes for non remote source. "
+            "Consider using `with_source_archive` to add user code as a remote source to the function."
+        )
+    return encoded

mlrun 1.7.2rc4__py3-none-any.whl → 1.8.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.2rc4py3-none-any.whl → 1.8.0py3-none-any.whl