PyPI - mlrun - Versions diffs - 1.7.0rc17__py3-none-any.whl → 1.7.0rc18__py3-none-any.whl - Mend

mlrun 1.7.0rc17py3-none-any.whl → 1.7.0rc18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (55) hide show

mlrun/alerts/alert.py +1 -1
mlrun/artifacts/manager.py +5 -1
mlrun/common/runtimes/constants.py +3 -0
mlrun/common/schemas/__init__.py +1 -1
mlrun/common/schemas/alert.py +31 -9
mlrun/common/schemas/client_spec.py +1 -0
mlrun/common/schemas/function.py +4 -0
mlrun/common/schemas/model_monitoring/__init__.py +3 -1
mlrun/common/schemas/model_monitoring/constants.py +20 -1
mlrun/common/schemas/model_monitoring/grafana.py +9 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
mlrun/config.py +2 -0
mlrun/data_types/to_pandas.py +5 -5
mlrun/datastore/datastore.py +6 -2
mlrun/datastore/redis.py +2 -2
mlrun/datastore/s3.py +5 -0
mlrun/datastore/sources.py +111 -6
mlrun/datastore/targets.py +2 -2
mlrun/db/base.py +5 -1
mlrun/db/httpdb.py +22 -3
mlrun/db/nopdb.py +5 -1
mlrun/errors.py +6 -0
mlrun/feature_store/retrieval/conversion.py +5 -5
mlrun/feature_store/retrieval/job.py +3 -2
mlrun/feature_store/retrieval/spark_merger.py +2 -1
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
mlrun/model_monitoring/db/stores/base/store.py +16 -3
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +44 -43
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
mlrun/model_monitoring/db/tsdb/base.py +25 -18
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +207 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +231 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +73 -72
mlrun/model_monitoring/db/v3io_tsdb_reader.py +217 -16
mlrun/model_monitoring/helpers.py +32 -0
mlrun/model_monitoring/stream_processing.py +7 -4
mlrun/model_monitoring/writer.py +18 -13
mlrun/package/utils/_formatter.py +2 -2
mlrun/projects/project.py +33 -8
mlrun/render.py +8 -5
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/utils/async_http.py +25 -5
mlrun/utils/helpers.py +20 -1
mlrun/utils/notifications/notification/slack.py +27 -7
mlrun/utils/notifications/notification_pusher.py +38 -40
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/METADATA +7 -2
{mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/RECORD +55 -51
{mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/top_level.txt +0 -0

mlrun/model_monitoring/helpers.py CHANGED Viewed

@@ -111,6 +111,24 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
     )
+def get_tsdb_connection_string(
+    secret_provider: typing.Optional[typing.Callable] = None,
+) -> str:
+    """Get TSDB connection string from the project secret. If wasn't set, take it from the system
+    configurations.
+    :param secret_provider: An optional secret provider to get the connection string secret.
+    :return:                Valid TSDB connection string.
+    """
+    return (
+        mlrun.get_secret_or_env(
+            key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION,
+            secret_provider=secret_provider,
+        )
+        or mlrun.mlconf.model_endpoint_monitoring.tsdb_connection
+    )
 def batch_dict2timedelta(batch_dict: _BatchDict) -> datetime.timedelta:
     """
     Convert a batch dictionary to timedelta.
@@ -260,3 +278,17 @@ def get_endpoint_record(project: str, endpoint_id: str):
         project=project,
     )
     return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
+def get_result_instance_fqn(
+    model_endpoint_id: str, app_name: str, result_name: str
+) -> str:
+    return f"{model_endpoint_id}.{app_name}.result.{result_name}"
+def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
+    return get_result_instance_fqn(
+        model_endpoint_id,
+        mm_constants.HistogramDataDriftApplicationConstants.NAME,
+        mm_constants.HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
+    )

mlrun/model_monitoring/stream_processing.py CHANGED Viewed

@@ -136,7 +136,11 @@ class EventStreamProcessor:
         self.tsdb_batching_max_events = tsdb_batching_max_events
         self.tsdb_batching_timeout_secs = tsdb_batching_timeout_secs
-    def apply_monitoring_serving_graph(self, fn: mlrun.runtimes.ServingRuntime) -> None:
+    def apply_monitoring_serving_graph(
+        self,
+        fn: mlrun.runtimes.ServingRuntime,
+        tsdb_service_provider: typing.Optional[typing.Callable] = None,
+    ) -> None:
         """
         Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
         parts that each one them includes several steps of different operations that are executed on the events from
@@ -163,6 +167,7 @@ class EventStreamProcessor:
            using CE, the parquet target path is based on the defined MLRun artifact path.
         :param fn: A serving function.
+        :param tsdb_service_provider: An optional callable function that provides the TSDB connection string.
         """
         graph = typing.cast(
@@ -322,15 +327,13 @@ class EventStreamProcessor:
         # TSDB branch (skip to Prometheus if in CE env)
         if not mlrun.mlconf.is_ce_mode():
-            # TSDB branch
             tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
-                project=self.project,
+                project=self.project, secret_provider=tsdb_service_provider
             )
             tsdb_connector.apply_monitoring_stream_steps(graph=graph)
         else:
             # Prometheus
             # Increase the prediction counter by 1 and update the latency value
             graph.add_step(
                 "IncCounter",

mlrun/model_monitoring/writer.py CHANGED Viewed

@@ -29,7 +29,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
     WriterEventKind,
 )
 from mlrun.common.schemas.notification import NotificationKind, NotificationSeverity
-from mlrun.model_monitoring.helpers import get_endpoint_record
+from mlrun.model_monitoring.helpers import get_endpoint_record, get_result_instance_fqn
 from mlrun.serving.utils import StepToDict
 from mlrun.utils import logger
 from mlrun.utils.notifications.notification_pusher import CustomNotificationPusher
@@ -101,7 +101,7 @@ class ModelMonitoringWriter(StepToDict):
     kind = "monitoring_application_stream_pusher"
-    def __init__(self, project: str) -> None:
+    def __init__(self, project: str, tsdb_secret_provider=None) -> None:
         self.project = project
         self.name = project  # required for the deployment process
@@ -113,24 +113,24 @@ class ModelMonitoringWriter(StepToDict):
             project=self.project
         )
         self._tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
-            project=self.project,
+            project=self.project, secret_provider=tsdb_secret_provider
         )
         self._endpoints_records = {}
     @staticmethod
     def _generate_event_on_drift(
-        model_endpoint: str, drift_status: str, event_value: dict, project_name: str
+        entity_id: str, drift_status: str, event_value: dict, project_name: str
     ) -> None:
-        logger.info("Sending an alert")
+        logger.info("Sending an event")
         entity = mlrun.common.schemas.alert.EventEntities(
-            kind=alert_objects.EventEntityKind.MODEL,
+            kind=alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT,
             project=project_name,
-            ids=[model_endpoint],
+            ids=[entity_id],
         )
         event_kind = (
-            alert_objects.EventKind.DRIFT_DETECTED
+            alert_objects.EventKind.DATA_DRIFT_DETECTED
             if drift_status == ResultStatusApp.detected.value
-            else alert_objects.EventKind.DRIFT_SUSPECTED
+            else alert_objects.EventKind.DATA_DRIFT_SUSPECTED
         )
         event_data = mlrun.common.schemas.Event(
             kind=event_kind, entity=entity, value_dict=event_value
@@ -138,7 +138,7 @@ class ModelMonitoringWriter(StepToDict):
         mlrun.get_run_db().generate_event(event_kind, event_data)
     @staticmethod
-    def _reconstruct_event(event: _RawEvent) -> tuple[_AppResultEvent, str]:
+    def _reconstruct_event(event: _RawEvent) -> tuple[_AppResultEvent, WriterEventKind]:
         """
         Modify the raw event into the expected monitoring application event
         schema as defined in `mlrun.common.schemas.model_monitoring.constants.WriterEvent`
@@ -179,12 +179,13 @@ class ModelMonitoringWriter(StepToDict):
     def do(self, event: _RawEvent) -> None:
         event, kind = self._reconstruct_event(event)
         logger.info("Starting to write event", event=event)
         self._tsdb_connector.write_application_event(event=event.copy(), kind=kind)
         self._app_result_store.write_application_event(event=event.copy(), kind=kind)
         logger.info("Completed event DB writes")
-        _Notifier(event=event, notification_pusher=self._custom_notifier).notify()
+        if kind == WriterEventKind.RESULT:
+            _Notifier(event=event, notification_pusher=self._custom_notifier).notify()
         if (
             mlrun.mlconf.alerts.mode == mlrun.common.schemas.alert.AlertsModes.enabled
@@ -208,7 +209,11 @@ class ModelMonitoringWriter(StepToDict):
                 "result_value": event[ResultData.RESULT_VALUE],
             }
             self._generate_event_on_drift(
-                event[WriterEvent.ENDPOINT_ID],
+                get_result_instance_fqn(
+                    event[WriterEvent.ENDPOINT_ID],
+                    event[WriterEvent.APPLICATION_NAME],
+                    event[ResultData.RESULT_NAME],
+                ),
                 event[ResultData.RESULT_STATUS],
                 event_value,
                 self.project,

mlrun/package/utils/_formatter.py CHANGED Viewed

@@ -142,11 +142,11 @@ class _YAMLFormatter(_Formatter):
         :param obj:         The object to write.
         :param file_path:   The file path to write to.
-        :param dump_kwargs: Additional keyword arguments to pass to the `yaml.dump` method of the formatter in use.
+        :param dump_kwargs: Additional keyword arguments to pass to the `yaml.safe_dump` method of the formatter in use.
         """
         dump_kwargs = dump_kwargs or cls.DEFAULT_DUMP_KWARGS
         with open(file_path, "w") as file:
-            yaml.dump(obj, file, **dump_kwargs)
+            yaml.safe_dump(obj, file, **dump_kwargs)
     @classmethod
     def read(cls, file_path: str) -> Union[list, dict]:

mlrun/projects/project.py CHANGED Viewed

@@ -39,6 +39,7 @@ import yaml
 from mlrun_pipelines.models import PipelineNodeWrapper
 import mlrun.common.helpers
+import mlrun.common.runtimes.constants
 import mlrun.common.schemas.artifact
 import mlrun.common.schemas.model_monitoring.constants as mm_constants
 import mlrun.db
@@ -3098,17 +3099,18 @@ class MlrunProject(ModelObj):
     def set_model_monitoring_credentials(
         self,
-        access_key: str = None,
-        endpoint_store_connection: str = None,
-        stream_path: str = None,
+        access_key: Optional[str] = None,
+        endpoint_store_connection: Optional[str] = None,
+        stream_path: Optional[str] = None,
+        tsdb_connection: Optional[str] = None,
     ):
         """Set the credentials that will be used by the project's model monitoring
         infrastructure functions.
-        :param access_key:                Model Monitoring access key for managing user permissions
         :param access_key:                Model Monitoring access key for managing user permissions
         :param endpoint_store_connection: Endpoint store connection string
         :param stream_path:               Path to the model monitoring stream
+        :param tsdb_connection:           Connection string to the time series database
         """
         secrets_dict = {}
@@ -3131,6 +3133,16 @@ class MlrunProject(ModelObj):
                 mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
             ] = stream_path
+        if tsdb_connection:
+            if not tsdb_connection.startswith("taosws://"):
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "Currently only TDEngine websocket connection is supported for non-v3io TSDB,"
+                    "please provide a full URL (e.g. taosws://user:password@host:port)"
+                )
+            secrets_dict[
+                mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION
+            ] = tsdb_connection
         self.set_secrets(
             secrets=secrets_dict,
             provider=mlrun.common.schemas.SecretProviderName.kubernetes,
@@ -3689,7 +3701,10 @@ class MlrunProject(ModelObj):
         name: Optional[str] = None,
         uid: Optional[Union[str, list[str]]] = None,
         labels: Optional[Union[str, list[str]]] = None,
-        state: Optional[str] = None,
+        state: Optional[
+            mlrun.common.runtimes.constants.RunStates
+        ] = None,  # Backward compatibility
+        states: typing.Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
         sort: bool = True,
         last: int = 0,
         iter: bool = False,
@@ -3723,10 +3738,11 @@ class MlrunProject(ModelObj):
         :param labels:  A list of labels to filter by. Label filters work by either filtering a specific value
                 of a label (i.e. list("key=value")) or by looking for the existence of a given
                 key (i.e. "key").
-        :param state: List only runs whose state is specified.
+        :param state: Deprecated - List only runs whose state is specified.
+        :param states: List only runs whose state is one of the provided states.
         :param sort: Whether to sort the result according to their start time. Otherwise, results will be
             returned by their internal order in the DB (order will not be guaranteed).
-        :param last: Deprecated - currently not used (will be removed in 1.8.0).
+        :param last: Deprecated - currently not used (will be removed in 1.9.0).
         :param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
         :param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
         :param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
@@ -3734,13 +3750,22 @@ class MlrunProject(ModelObj):
             last_update_time_to)``.
         :param last_update_time_to: Filter by run last update time in ``(last_update_time_from, last_update_time_to)``.
         """
+        if state:
+            # TODO: Remove this in 1.9.0
+            warnings.warn(
+                "'state' is deprecated and will be removed in 1.9.0. Use 'states' instead.",
+                FutureWarning,
+            )
         db = mlrun.db.get_run_db(secrets=self._secrets)
         return db.list_runs(
             name,
             uid,
             self.metadata.name,
             labels=labels,
-            state=state,
+            states=mlrun.utils.helpers.as_list(state)
+            if state is not None
+            else states or None,
             sort=sort,
             last=last,
             iter=iter,

mlrun/render.py CHANGED Viewed

@@ -126,7 +126,7 @@ def artifacts_html(
         if not attribute_value:
             mlrun.utils.logger.warning(
-                "Artifact is incomplete, omitting from output (most likely due to a failed artifact logging)",
+                f"Artifact required attribute {attribute_name} is missing, omitting from output",
                 artifact_key=key,
             )
             continue
@@ -400,14 +400,17 @@ def runs_to_html(
     else:
         df["labels"] = df["labels"].apply(dict_html)
         df["inputs"] = df["inputs"].apply(inputs_html)
-        if df["artifact_uris"][0]:
-            df["artifact_uris"] = df["artifact_uris"].apply(dict_html)
-            df.drop("artifacts", axis=1, inplace=True)
-        else:
+        if df["artifacts"][0]:
             df["artifacts"] = df["artifacts"].apply(
                 lambda artifacts: artifacts_html(artifacts, "target_path"),
             )
             df.drop("artifact_uris", axis=1, inplace=True)
+        elif df["artifact_uris"][0]:
+            df["artifact_uris"] = df["artifact_uris"].apply(dict_html)
+            df.drop("artifacts", axis=1, inplace=True)
+        else:
+            df.drop("artifacts", axis=1, inplace=True)
+            df.drop("artifact_uris", axis=1, inplace=True)
     def expand_error(x):
         if x["state"] == "error":

mlrun/runtimes/databricks_job/databricks_wrapper.py CHANGED Viewed

@@ -99,7 +99,7 @@ def save_credentials(
         credentials["DATABRICKS_CLUSTER_ID"] = cluster_id
     with open(credentials_path, "w") as yaml_file:
-        yaml.dump(credentials, yaml_file, default_flow_style=False)
+        yaml.safe_dump(credentials, yaml_file, default_flow_style=False)
 def run_mlrun_databricks_job(

mlrun/utils/async_http.py CHANGED Viewed

@@ -24,7 +24,7 @@ from aiohttp_retry import ExponentialRetry, RequestParams, RetryClient, RetryOpt
 from aiohttp_retry.client import _RequestContext
 from mlrun.config import config
-from mlrun.errors import err_to_str
+from mlrun.errors import err_to_str, raise_for_status
 from .helpers import logger as mlrun_logger
@@ -46,12 +46,21 @@ class AsyncClientWithRetry(RetryClient):
         *args,
         **kwargs,
     ):
+        # do not retry on PUT / PATCH as they might have side effects (not truly idempotent)
+        blacklisted_methods = (
+            blacklisted_methods
+            if blacklisted_methods is not None
+            else [
+                "POST",
+                "PUT",
+                "PATCH",
+            ]
+        )
         super().__init__(
             *args,
             retry_options=ExponentialRetryOverride(
                 retry_on_exception=retry_on_exception,
-                # do not retry on PUT / PATCH as they might have side effects (not truly idempotent)
-                blacklisted_methods=blacklisted_methods or ["POST", "PUT", "PATCH"],
+                blacklisted_methods=blacklisted_methods,
                 attempts=max_retries,
                 statuses=retry_on_status_codes,
                 factor=retry_backoff_factor,
@@ -63,6 +72,12 @@ class AsyncClientWithRetry(RetryClient):
             **kwargs,
         )
+    def methods_blacklist_update_required(self, new_blacklist: str):
+        self._retry_options: ExponentialRetryOverride
+        return set(self._retry_options.blacklisted_methods).difference(
+            set(new_blacklist)
+        )
     def _make_requests(
         self,
         params_list: list[RequestParams],
@@ -173,7 +188,7 @@ class _CustomRequestContext(_RequestContext):
                 last_attempt = current_attempt == self._retry_options.attempts
                 if self._is_status_code_ok(response.status) or last_attempt:
                     if self._raise_for_status:
-                        response.raise_for_status()
+                        raise_for_status(response)
                     self._response = response
                     return response
@@ -275,6 +290,11 @@ class _CustomRequestContext(_RequestContext):
                 if isinstance(exc.os_error, exc_type):
                     return
         if exc.__cause__:
-            return self.verify_exception_type(exc.__cause__)
+            # If the cause exception is retriable, return, otherwise, raise the original exception
+            try:
+                self.verify_exception_type(exc.__cause__)
+            except Exception:
+                raise exc
+            return
         else:
             raise exc

mlrun/utils/helpers.py CHANGED Viewed

@@ -973,6 +973,15 @@ def get_ui_url(project, uid=None):
     return url
+def get_model_endpoint_url(project, model_name, model_endpoint_id):
+    url = ""
+    if mlrun.mlconf.resolve_ui_url():
+        url = f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}/models"
+        if model_name:
+            url += f"/model-endpoints/{model_name}/{model_endpoint_id}/overview"
+    return url
 def get_workflow_url(project, id=None):
     url = ""
     if mlrun.mlconf.resolve_ui_url():
@@ -1183,7 +1192,7 @@ def calculate_dataframe_hash(dataframe: pandas.DataFrame):
     return hashlib.sha1(pandas.util.hash_pandas_object(dataframe).values).hexdigest()
-def template_artifact_path(artifact_path, project, run_uid="project"):
+def template_artifact_path(artifact_path, project, run_uid=None):
     """
     Replace {{run.uid}} with the run uid and {{project}} with the project name in the artifact path.
     If no run uid is provided, the word `project` will be used instead as it is assumed to be a project
@@ -1191,6 +1200,7 @@ def template_artifact_path(artifact_path, project, run_uid="project"):
     """
     if not artifact_path:
         return artifact_path
+    run_uid = run_uid or "project"
     artifact_path = artifact_path.replace("{{run.uid}}", run_uid)
     artifact_path = _fill_project_path_template(artifact_path, project)
     return artifact_path
@@ -1603,3 +1613,12 @@ def validate_component_version_compatibility(
         if parsed_current_version < parsed_min_version:
             return False
     return True
+def format_alert_summary(
+    alert: mlrun.common.schemas.AlertConfig, event_data: mlrun.common.schemas.Event
+) -> str:
+    result = alert.summary.replace("{{project}}", alert.project)
+    result = result.replace("{{name}}", alert.name)
+    result = result.replace("{{entity}}", event_data.entity.ids[0])
+    return result

mlrun/utils/notifications/notification/slack.py CHANGED Viewed

@@ -32,6 +32,7 @@ class SlackNotification(NotificationBase):
         "completed": ":smiley:",
         "running": ":man-running:",
         "error": ":x:",
+        "skipped": ":zzz:",
     }
     async def push(
@@ -135,8 +136,16 @@ class SlackNotification(NotificationBase):
         line = [
             self._get_slack_row(f":bell: {alert.name} alert has occurred"),
             self._get_slack_row(f"*Project:*\n{alert.project}"),
-            self._get_slack_row(f"*UID:*\n{event_data.entity.ids[0]}"),
+            self._get_slack_row(f"*ID:*\n{event_data.entity.ids[0]}"),
         ]
+        if alert.summary:
+            line.append(
+                self._get_slack_row(
+                    f"*Summary:*\n{mlrun.utils.helpers.format_alert_summary(alert, event_data)}"
+                )
+            )
         if event_data.value_dict:
             data_lines = []
             for key, value in event_data.value_dict.items():
@@ -144,10 +153,21 @@ class SlackNotification(NotificationBase):
             data_text = "\n".join(data_lines)
             line.append(self._get_slack_row(f"*Event data:*\n{data_text}"))
-        if url := mlrun.utils.helpers.get_ui_url(
-            alert.project, event_data.entity.ids[0]
-        ):
-            line.append(self._get_slack_row(f"*Overview:*\n<{url}|*Job overview*>"))
+        if (
+            event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
+        ):  # JOB entity
+            uid = event_data.value_dict.get("uid")
+            url = mlrun.utils.helpers.get_ui_url(alert.project, uid)
+            overview_type = "Job overview"
+        else:  # MODEL entity
+            model_name = event_data.value_dict.get("model")
+            model_endpoint_id = event_data.value_dict.get("model_endpoint_id")
+            url = mlrun.utils.helpers.get_model_endpoint_url(
+                alert.project, model_name, model_endpoint_id
+            )
+            overview_type = "Model endpoint"
+        line.append(self._get_slack_row(f"*Overview:*\n<{url}|*{overview_type}*>"))
         return line
@@ -157,11 +177,11 @@ class SlackNotification(NotificationBase):
         # Only show the URL if the run is not a function (serving or mlrun function)
         kind = run.get("step_kind")
-        if url and not kind or kind == "run":
+        state = run["status"].get("state", "")
+        if state != "skipped" and (url and not kind or kind == "run"):
             line = f'<{url}|*{meta.get("name")}*>'
         else:
             line = meta.get("name")
-        state = run["status"].get("state", "")
         if kind:
             line = f'{line} *({run.get("step_kind", run.get("kind", ""))})*'
         line = f'{self.emojis.get(state, ":question:")}  {line}'

mlrun/utils/notifications/notification_pusher.py CHANGED Viewed

@@ -14,7 +14,6 @@
 import asyncio
 import datetime
-import json
 import os
 import re
 import traceback
@@ -23,6 +22,7 @@ from concurrent.futures import ThreadPoolExecutor
 import kfp
 import mlrun_pipelines.common.ops
+import mlrun_pipelines.models
 import mlrun.common.runtimes.constants
 import mlrun.common.schemas
@@ -392,17 +392,29 @@ class NotificationPusher(_NotificationPusherBase):
         steps = []
         db = mlrun.get_run_db()
-        def _add_run_step(_node_name, _node_template, _step_kind):
-            _run = db.list_runs(
-                project=run.metadata.project,
-                labels=f"mlrun/runner-pod={_node_name}",
-            )[0]
-            _run["step_kind"] = _step_kind
+        def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
+            try:
+                _run = db.list_runs(
+                    project=run.metadata.project,
+                    labels=f"mlrun/runner-pod={_step.node_name}",
+                )[0]
+            except IndexError:
+                _run = {
+                    "metadata": {
+                        "name": _step.display_name,
+                        "project": run.metadata.project,
+                    },
+                }
+            _run["step_kind"] = _step.step_type
+            if _step.skipped:
+                _run.setdefault("status", {})["state"] = (
+                    mlrun.common.runtimes.constants.RunStates.skipped
+                )
             steps.append(_run)
-        def _add_deploy_function_step(_, _node_template, _step_kind):
+        def _add_deploy_function_step(_step: mlrun_pipelines.models.PipelineStep):
             project, name, hash_key = self._extract_function_uri(
-                _node_template["metadata"]["annotations"]["mlrun/function-uri"]
+                _step.get_annotation("mlrun/function-uri")
             )
             if name:
                 try:
@@ -419,16 +431,19 @@ class NotificationPusher(_NotificationPusherBase):
                             "hash_key": hash_key,
                         },
                     }
-                function["status"] = {
-                    "state": mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
-                        node["phase"]
-                    ),
-                }
+                pod_phase = _step.phase
+                if _step.skipped:
+                    state = mlrun.common.schemas.FunctionState.skipped
+                else:
+                    state = mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
+                        pod_phase
+                    )
+                function["status"] = {"state": state}
                 if isinstance(function["metadata"].get("updated"), datetime.datetime):
                     function["metadata"]["updated"] = function["metadata"][
                         "updated"
                     ].isoformat()
-                function["step_kind"] = _step_kind
+                function["step_kind"] = _step.step_type
                 steps.append(function)
         step_methods = {
@@ -446,26 +461,10 @@ class NotificationPusher(_NotificationPusherBase):
             return steps
         try:
-            workflow_nodes = sorted(
-                workflow_manifest["status"]["nodes"].items(),
-                key=lambda _node: _node[1]["finishedAt"],
-            )
-            for node_name, node in workflow_nodes:
-                if node["type"] != "Pod":
-                    # Skip the parent DAG node
-                    continue
-                node_template = next(
-                    template
-                    for template in workflow_manifest["spec"]["templates"]
-                    if template["name"] == node["templateName"]
-                )
-                step_type = node_template["metadata"]["annotations"].get(
-                    "mlrun/pipeline-step-type"
-                )
-                step_method = step_methods.get(step_type)
+            for step in workflow_manifest.get_steps():
+                step_method = step_methods.get(step.step_type)
                 if step_method:
-                    step_method(node_name, node_template, step_type)
+                    step_method(step)
             return steps
         except Exception:
             # If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
@@ -481,7 +480,9 @@ class NotificationPusher(_NotificationPusherBase):
             )
     @staticmethod
-    def _get_workflow_manifest(workflow_id: str) -> typing.Optional[dict]:
+    def _get_workflow_manifest(
+        workflow_id: str,
+    ) -> typing.Optional[mlrun_pipelines.models.PipelineManifest]:
         kfp_url = mlrun.mlconf.resolve_kfp_url(mlrun.mlconf.namespace)
         if not kfp_url:
             raise mlrun.errors.MLRunNotFoundError(
@@ -495,11 +496,8 @@ class NotificationPusher(_NotificationPusherBase):
         if not kfp_run:
             return None
-        kfp_run = kfp_run.to_dict()
-        try:
-            return json.loads(kfp_run["pipeline_runtime"]["workflow_manifest"])
-        except Exception:
-            return None
+        kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
+        return kfp_run.workflow_manifest()
     def _extract_function_uri(self, function_uri: str) -> tuple[str, str, str]:
         """

mlrun/utils/version/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "git_commit": "fb7d21e35e68f1e2720647b57dc040d0309942ea",
-  "version": "1.7.0-rc17"
+  "git_commit": "cf983306a4f164f1c0a4f3ccf666ba9448d09e2e",
+  "version": "1.7.0-rc18"
 }

mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc18__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc17py3-none-any.whl → 1.7.0rc18py3-none-any.whl