PyPI - mlrun - Versions diffs - 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl - Mend

mlrun 1.7.0rc4py3-none-any.whl → 1.7.0rc20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show

mlrun/__init__.py +11 -1
mlrun/__main__.py +25 -111
mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
mlrun/alerts/alert.py +144 -0
mlrun/api/schemas/__init__.py +4 -3
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +38 -254
mlrun/artifacts/dataset.py +9 -190
mlrun/artifacts/manager.py +41 -47
mlrun/artifacts/model.py +30 -158
mlrun/artifacts/plots.py +23 -380
mlrun/common/constants.py +68 -0
mlrun/common/formatters/__init__.py +19 -0
mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
mlrun/common/formatters/base.py +78 -0
mlrun/common/formatters/function.py +41 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/{runtimes → common/runtimes}/constants.py +32 -4
mlrun/common/schemas/__init__.py +25 -4
mlrun/common/schemas/alert.py +203 -0
mlrun/common/schemas/api_gateway.py +148 -0
mlrun/common/schemas/artifact.py +15 -5
mlrun/common/schemas/auth.py +8 -2
mlrun/common/schemas/client_spec.py +2 -0
mlrun/common/schemas/frontend_spec.py +1 -0
mlrun/common/schemas/function.py +4 -0
mlrun/common/schemas/hub.py +7 -9
mlrun/common/schemas/model_monitoring/__init__.py +19 -3
mlrun/common/schemas/model_monitoring/constants.py +96 -26
mlrun/common/schemas/model_monitoring/grafana.py +9 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
mlrun/common/schemas/pipeline.py +0 -9
mlrun/common/schemas/project.py +22 -21
mlrun/common/types.py +7 -1
mlrun/config.py +87 -19
mlrun/data_types/data_types.py +4 -0
mlrun/data_types/to_pandas.py +9 -9
mlrun/datastore/__init__.py +5 -8
mlrun/datastore/alibaba_oss.py +130 -0
mlrun/datastore/azure_blob.py +4 -5
mlrun/datastore/base.py +69 -30
mlrun/datastore/datastore.py +10 -2
mlrun/datastore/datastore_profile.py +90 -6
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/hdfs.py +5 -0
mlrun/datastore/inmem.py +2 -2
mlrun/datastore/redis.py +2 -2
mlrun/datastore/s3.py +5 -0
mlrun/datastore/snowflake_utils.py +43 -0
mlrun/datastore/sources.py +172 -44
mlrun/datastore/store_resources.py +7 -7
mlrun/datastore/targets.py +285 -41
mlrun/datastore/utils.py +68 -5
mlrun/datastore/v3io.py +27 -50
mlrun/db/auth_utils.py +152 -0
mlrun/db/base.py +149 -14
mlrun/db/factory.py +1 -1
mlrun/db/httpdb.py +608 -178
mlrun/db/nopdb.py +191 -7
mlrun/errors.py +11 -0
mlrun/execution.py +37 -20
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +21 -52
mlrun/feature_store/feature_set.py +48 -23
mlrun/feature_store/feature_vector.py +2 -1
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/conversion.py +9 -9
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +9 -3
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +34 -24
mlrun/feature_store/steps.py +30 -19
mlrun/features.py +4 -13
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/parallel_coordinates.py +2 -1
mlrun/frameworks/pytorch/__init__.py +2 -2
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/tf_keras/__init__.py +5 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/k8s_utils.py +10 -11
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +6 -5
mlrun/launcher/client.py +8 -6
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +9 -3
mlrun/launcher/remote.py +9 -3
mlrun/lists.py +6 -2
mlrun/model.py +58 -19
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +127 -301
mlrun/model_monitoring/application.py +5 -296
mlrun/model_monitoring/applications/__init__.py +11 -0
mlrun/model_monitoring/applications/_application_steps.py +157 -0
mlrun/model_monitoring/applications/base.py +282 -0
mlrun/model_monitoring/applications/context.py +214 -0
mlrun/model_monitoring/applications/evidently_base.py +211 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +30 -36
mlrun/model_monitoring/db/__init__.py +18 -0
mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
mlrun/model_monitoring/db/tsdb/base.py +329 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
mlrun/model_monitoring/evidently_application.py +6 -118
mlrun/model_monitoring/features_drift_table.py +34 -22
mlrun/model_monitoring/helpers.py +100 -7
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +93 -228
mlrun/model_monitoring/tracking_policy.py +7 -1
mlrun/model_monitoring/writer.py +152 -124
mlrun/package/packagers_manager.py +1 -0
mlrun/package/utils/_formatter.py +2 -2
mlrun/platforms/__init__.py +11 -10
mlrun/platforms/iguazio.py +21 -202
mlrun/projects/operations.py +30 -16
mlrun/projects/pipelines.py +92 -99
mlrun/projects/project.py +757 -268
mlrun/render.py +15 -14
mlrun/run.py +160 -162
mlrun/runtimes/__init__.py +55 -3
mlrun/runtimes/base.py +33 -19
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +0 -28
mlrun/runtimes/kubejob.py +28 -122
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/abstract.py +8 -8
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/api_gateway.py +709 -0
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +523 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
mlrun/runtimes/nuclio/function.py +98 -58
mlrun/runtimes/nuclio/serving.py +36 -42
mlrun/runtimes/pod.py +196 -45
mlrun/runtimes/remotesparkjob.py +1 -1
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/runtimes/utils.py +6 -73
mlrun/secrets.py +6 -2
mlrun/serving/remote.py +2 -3
mlrun/serving/routers.py +7 -4
mlrun/serving/server.py +7 -8
mlrun/serving/states.py +73 -43
mlrun/serving/v2_serving.py +8 -7
mlrun/track/tracker.py +2 -1
mlrun/utils/async_http.py +25 -5
mlrun/utils/helpers.py +141 -75
mlrun/utils/http.py +1 -1
mlrun/utils/logger.py +39 -7
mlrun/utils/notifications/notification/__init__.py +14 -9
mlrun/utils/notifications/notification/base.py +12 -0
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +3 -1
mlrun/utils/notifications/notification/ipython.py +2 -0
mlrun/utils/notifications/notification/slack.py +101 -21
mlrun/utils/notifications/notification/webhook.py +11 -1
mlrun/utils/notifications/notification_pusher.py +147 -16
mlrun/utils/retryer.py +3 -2
mlrun/utils/v3io_clients.py +0 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
mlrun-1.7.0rc20.dist-info/RECORD +353 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
mlrun/kfpops.py +0 -868
mlrun/model_monitoring/batch.py +0 -974
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
mlrun/platforms/other.py +0 -305
mlrun-1.7.0rc4.dist-info/RECORD +0 -321
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0

mlrun/utils/logger.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 import logging
+import typing
 from enum import Enum
 from sys import stdout
 from traceback import format_exception
@@ -92,7 +93,25 @@ class HumanReadableFormatter(_BaseFormatter):
 class HumanReadableExtendedFormatter(HumanReadableFormatter):
     def format(self, record) -> str:
-        more = self._resolve_more(record)
+        more = ""
+        record_with = self._record_with(record)
+        if record_with:
+            def _format_value(val):
+                formatted_val = (
+                    val
+                    if isinstance(val, str)
+                    else str(orjson.loads(self._json_dump(val)))
+                )
+                return (
+                    formatted_val.replace("\n", "\n\t\t")
+                    if len(formatted_val) < 4096
+                    else repr(formatted_val)
+                )
+            more = "\n\t" + "\n\t".join(
+                [f"{key}: {_format_value(val)}" for key, val in record_with.items()]
+            )
         return (
             "> "
             f"{self.formatTime(record, self.datefmt)} "
@@ -221,14 +240,27 @@ class FormatterKinds(Enum):
     JSON = "json"
-def _create_formatter_instance(formatter_kind: FormatterKinds) -> logging.Formatter:
+def resolve_formatter_by_kind(
+    formatter_kind: FormatterKinds,
+) -> type[
+    typing.Union[HumanReadableFormatter, HumanReadableExtendedFormatter, JSONFormatter]
+]:
     return {
-        FormatterKinds.HUMAN: HumanReadableFormatter(),
-        FormatterKinds.HUMAN_EXTENDED: HumanReadableExtendedFormatter(),
-        FormatterKinds.JSON: JSONFormatter(),
+        FormatterKinds.HUMAN: HumanReadableFormatter,
+        FormatterKinds.HUMAN_EXTENDED: HumanReadableExtendedFormatter,
+        FormatterKinds.JSON: JSONFormatter,
     }[formatter_kind]
+def create_test_logger(name: str = "mlrun", stream: IO[str] = stdout) -> Logger:
+    return create_logger(
+        level="debug",
+        formatter_kind=FormatterKinds.HUMAN_EXTENDED.name,
+        name=name,
+        stream=stream,
+    )
 def create_logger(
     level: Optional[str] = None,
     formatter_kind: str = FormatterKinds.HUMAN.name,
@@ -243,11 +275,11 @@ def create_logger(
     logger_instance = Logger(level, name=name, propagate=False)
     # resolve formatter
-    formatter_instance = _create_formatter_instance(
+    formatter_instance = resolve_formatter_by_kind(
         FormatterKinds(formatter_kind.lower())
     )
     # set handler
-    logger_instance.set_handler("default", stream or stdout, formatter_instance)
+    logger_instance.set_handler("default", stream or stdout, formatter_instance())
     return logger_instance

mlrun/utils/notifications/notification/__init__.py CHANGED Viewed

@@ -51,14 +51,19 @@ class NotificationTypes(str, enum.Enum):
             self.console: [self.ipython],
         }.get(self, [])
+    @classmethod
+    def local(cls) -> list[str]:
+        return [
+            cls.console,
+            cls.ipython,
+        ]
     @classmethod
     def all(cls) -> list[str]:
-        return list(
-            [
-                cls.console,
-                cls.git,
-                cls.ipython,
-                cls.slack,
-                cls.webhook,
-            ]
-        )
+        return [
+            cls.console,
+            cls.git,
+            cls.ipython,
+            cls.slack,
+            cls.webhook,
+        ]

mlrun/utils/notifications/notification/base.py CHANGED Viewed

@@ -44,6 +44,8 @@ class NotificationBase:
         ] = mlrun.common.schemas.NotificationSeverity.INFO,
         runs: typing.Union[mlrun.lists.RunList, list] = None,
         custom_html: str = None,
+        alert: mlrun.common.schemas.AlertConfig = None,
+        event_data: mlrun.common.schemas.Event = None,
     ):
         raise NotImplementedError()
@@ -61,6 +63,8 @@ class NotificationBase:
         ] = mlrun.common.schemas.NotificationSeverity.INFO,
         runs: typing.Union[mlrun.lists.RunList, list] = None,
         custom_html: str = None,
+        alert: mlrun.common.schemas.AlertConfig = None,
+        event_data: mlrun.common.schemas.Event = None,
     ) -> str:
         if custom_html:
             return custom_html
@@ -68,6 +72,14 @@ class NotificationBase:
         if self.name:
             message = f"{self.name}: {message}"
+        if alert:
+            if not event_data:
+                return f"[{severity}] {message}"
+            return (
+                f"[{severity}] {message} for project {alert.project} "
+                f"UID {event_data.entity.ids[0]}. Values {event_data.value_dict}"
+            )
         if not runs:
             return f"[{severity}] {message}"

mlrun/utils/notifications/notification/console.py CHANGED Viewed

@@ -36,6 +36,8 @@ class ConsoleNotification(NotificationBase):
         ] = mlrun.common.schemas.NotificationSeverity.INFO,
         runs: typing.Union[mlrun.lists.RunList, list] = None,
         custom_html: str = None,
+        alert: mlrun.common.schemas.AlertConfig = None,
+        event_data: mlrun.common.schemas.Event = None,
     ):
         severity = self._resolve_severity(severity)
         print(f"[{severity}] {message}")

mlrun/utils/notifications/notification/git.py CHANGED Viewed

@@ -38,6 +38,8 @@ class GitNotification(NotificationBase):
         ] = mlrun.common.schemas.NotificationSeverity.INFO,
         runs: typing.Union[mlrun.lists.RunList, list] = None,
         custom_html: str = None,
+        alert: mlrun.common.schemas.AlertConfig = None,
+        event_data: mlrun.common.schemas.Event = None,
     ):
         git_repo = self.params.get("repo", None)
         git_issue = self.params.get("issue", None)
@@ -50,7 +52,7 @@ class GitNotification(NotificationBase):
         server = self.params.get("server", None)
         gitlab = self.params.get("gitlab", False)
         await self._pr_comment(
-            self._get_html(message, severity, runs, custom_html),
+            self._get_html(message, severity, runs, custom_html, alert, event_data),
             git_repo,
             git_issue,
             merge_request=git_merge_request,

mlrun/utils/notifications/notification/ipython.py CHANGED Viewed

@@ -53,6 +53,8 @@ class IPythonNotification(NotificationBase):
         ] = mlrun.common.schemas.NotificationSeverity.INFO,
         runs: typing.Union[mlrun.lists.RunList, list] = None,
         custom_html: str = None,
+        alert: mlrun.common.schemas.AlertConfig = None,
+        event_data: mlrun.common.schemas.Event = None,
     ):
         if not self._ipython:
             mlrun.utils.helpers.logger.debug(

mlrun/utils/notifications/notification/slack.py CHANGED Viewed

@@ -32,6 +32,7 @@ class SlackNotification(NotificationBase):
         "completed": ":smiley:",
         "running": ":man-running:",
         "error": ":x:",
+        "skipped": ":zzz:",
     }
     async def push(
@@ -42,6 +43,8 @@ class SlackNotification(NotificationBase):
         ] = mlrun.common.schemas.NotificationSeverity.INFO,
         runs: typing.Union[mlrun.lists.RunList, list] = None,
         custom_html: str = None,
+        alert: mlrun.common.schemas.AlertConfig = None,
+        event_data: mlrun.common.schemas.Event = None,
     ):
         webhook = self.params.get("webhook", None) or mlrun.get_secret_or_env(
             "SLACK_WEBHOOK"
@@ -53,7 +56,7 @@ class SlackNotification(NotificationBase):
             )
             return
-        data = self._generate_slack_data(message, severity, runs)
+        data = self._generate_slack_data(message, severity, runs, alert, event_data)
         async with aiohttp.ClientSession() as session:
             async with session.post(webhook, json=data) as response:
@@ -66,57 +69,134 @@ class SlackNotification(NotificationBase):
             mlrun.common.schemas.NotificationSeverity, str
         ] = mlrun.common.schemas.NotificationSeverity.INFO,
         runs: typing.Union[mlrun.lists.RunList, list] = None,
+        alert: mlrun.common.schemas.AlertConfig = None,
+        event_data: mlrun.common.schemas.Event = None,
     ) -> dict:
         data = {
-            "blocks": [
-                {
-                    "type": "section",
-                    "text": self._get_slack_row(f"[{severity}] {message}"),
-                },
-            ]
+            "blocks": self._generate_slack_header_blocks(severity, message),
         }
         if self.name:
             data["blocks"].append(
                 {"type": "section", "text": self._get_slack_row(self.name)}
             )
-        if not runs:
-            return data
+        if alert:
+            fields = self._get_alert_fields(alert, event_data)
-        if isinstance(runs, list):
-            runs = mlrun.lists.RunList(runs)
+            for i in range(len(fields)):
+                data["blocks"].append({"type": "section", "text": fields[i]})
+        else:
+            if not runs:
+                return data
-        fields = [self._get_slack_row("*Runs*"), self._get_slack_row("*Results*")]
-        for run in runs:
-            fields.append(self._get_run_line(run))
-            fields.append(self._get_run_result(run))
+            if isinstance(runs, list):
+                runs = mlrun.lists.RunList(runs)
-        for i in range(0, len(fields), 8):
-            data["blocks"].append({"type": "section", "fields": fields[i : i + 8]})
+            fields = [self._get_slack_row("*Runs*"), self._get_slack_row("*Results*")]
+            for run in runs:
+                fields.append(self._get_run_line(run))
+                fields.append(self._get_run_result(run))
+            for i in range(0, len(fields), 8):
+                data["blocks"].append({"type": "section", "fields": fields[i : i + 8]})
         return data
+    def _generate_slack_header_blocks(self, severity: str, message: str):
+        header_text = block_text = f"[{severity}] {message}"
+        section_text = None
+        # Slack doesn't allow headers to be longer than 150 characters
+        # If there's a comma in the message, split the message at the comma
+        # Otherwise, split the message at 150 characters
+        if len(block_text) > 150:
+            if ", " in block_text and block_text.index(", ") < 149:
+                header_text = block_text.split(",")[0]
+                section_text = block_text[len(header_text) + 2 :]
+            else:
+                header_text = block_text[:150]
+                section_text = block_text[150:]
+        blocks = [
+            {"type": "header", "text": {"type": "plain_text", "text": header_text}}
+        ]
+        if section_text:
+            blocks.append(
+                {
+                    "type": "section",
+                    "text": self._get_slack_row(section_text),
+                }
+            )
+        return blocks
+    def _get_alert_fields(
+        self,
+        alert: mlrun.common.schemas.AlertConfig,
+        event_data: mlrun.common.schemas.Event,
+    ) -> list:
+        line = [
+            self._get_slack_row(f":bell: {alert.name} alert has occurred"),
+            self._get_slack_row(f"*Project:*\n{alert.project}"),
+            self._get_slack_row(f"*ID:*\n{event_data.entity.ids[0]}"),
+        ]
+        if alert.summary:
+            line.append(
+                self._get_slack_row(
+                    f"*Summary:*\n{mlrun.utils.helpers.format_alert_summary(alert, event_data)}"
+                )
+            )
+        if event_data.value_dict:
+            data_lines = []
+            for key, value in event_data.value_dict.items():
+                data_lines.append(f"{key}: {value}")
+            data_text = "\n".join(data_lines)
+            line.append(self._get_slack_row(f"*Event data:*\n{data_text}"))
+        if (
+            event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
+        ):  # JOB entity
+            uid = event_data.value_dict.get("uid")
+            url = mlrun.utils.helpers.get_ui_url(alert.project, uid)
+            overview_type = "Job overview"
+        else:  # MODEL entity
+            model_name = event_data.value_dict.get("model")
+            model_endpoint_id = event_data.value_dict.get("model_endpoint_id")
+            url = mlrun.utils.helpers.get_model_endpoint_url(
+                alert.project, model_name, model_endpoint_id
+            )
+            overview_type = "Model endpoint"
+        line.append(self._get_slack_row(f"*Overview:*\n<{url}|*{overview_type}*>"))
+        return line
     def _get_run_line(self, run: dict) -> dict:
         meta = run["metadata"]
         url = mlrun.utils.helpers.get_ui_url(meta.get("project"), meta.get("uid"))
-        if url:
+        # Only show the URL if the run is not a function (serving or mlrun function)
+        kind = run.get("step_kind")
+        state = run["status"].get("state", "")
+        if state != "skipped" and (url and not kind or kind == "run"):
             line = f'<{url}|*{meta.get("name")}*>'
         else:
             line = meta.get("name")
-        state = run["status"].get("state", "")
+        if kind:
+            line = f'{line} *({run.get("step_kind", run.get("kind", ""))})*'
         line = f'{self.emojis.get(state, ":question:")}  {line}'
         return self._get_slack_row(line)
     def _get_run_result(self, run: dict) -> dict:
         state = run["status"].get("state", "")
         if state == "error":
-            error_status = run["status"].get("error", "")
+            error_status = run["status"].get("error", "") or state
             result = f"*{error_status}*"
         else:
             result = mlrun.utils.helpers.dict_to_str(
                 run["status"].get("results", {}), ", "
             )
-        return self._get_slack_row(result or "None")
+        return self._get_slack_row(result or state)
     @staticmethod
     def _get_slack_row(text: str) -> dict:

mlrun/utils/notifications/notification/webhook.py CHANGED Viewed

@@ -36,6 +36,8 @@ class WebhookNotification(NotificationBase):
         ] = mlrun.common.schemas.NotificationSeverity.INFO,
         runs: typing.Union[mlrun.lists.RunList, list] = None,
         custom_html: str = None,
+        alert: mlrun.common.schemas.AlertConfig = None,
+        event_data: mlrun.common.schemas.Event = None,
     ):
         url = self.params.get("url", None)
         method = self.params.get("method", "post").lower()
@@ -46,9 +48,17 @@ class WebhookNotification(NotificationBase):
         request_body = {
             "message": message,
             "severity": severity,
-            "runs": runs,
         }
+        if runs:
+            request_body["runs"] = runs
+        if alert:
+            request_body["alert"] = alert.dict()
+            if event_data:
+                request_body["value"] = event_data.value_dict
+                request_body["id"] = event_data.entity.ids[0]
         if custom_html:
             request_body["custom_html"] = custom_html

mlrun/utils/notifications/notification_pusher.py CHANGED Viewed

@@ -15,10 +15,17 @@
 import asyncio
 import datetime
 import os
+import re
 import traceback
 import typing
 from concurrent.futures import ThreadPoolExecutor
+import kfp
+import mlrun_pipelines.common.ops
+import mlrun_pipelines.models
+import mlrun.common.constants as mlrun_constants
+import mlrun.common.runtimes.constants
 import mlrun.common.schemas
 import mlrun.config
 import mlrun.db.base
@@ -233,25 +240,12 @@ class NotificationPusher(_NotificationPusherBase):
         resource = "Run"
         runs = [run.to_dict()]
-        if "workflow" in run.metadata.labels:
-            resource = "Workflow"
+        if mlrun_constants.MLRunInternalLabels.workflow in run.metadata.labels:
+            resource = mlrun_constants.MLRunInternalLabels.workflow
             custom_message = (
                 f" (workflow: {run.metadata.labels['workflow']}){custom_message}"
             )
-            db = mlrun.get_run_db()
-            workflow_id = run.status.results.get("workflow_id", None)
-            if workflow_id:
-                workflow_runs = db.list_runs(
-                    project=run.metadata.project,
-                    labels=f"workflow={workflow_id}",
-                )
-                logger.debug(
-                    "Found workflow runs, extending notification runs",
-                    workflow_id=workflow_id,
-                    workflow_runs_amount=len(workflow_runs),
-                )
-                runs.extend(workflow_runs)
+            runs.extend(self.get_workflow_steps(run))
         message = (
             self.messages.get(run.state(), "").format(resource=resource)
@@ -395,6 +389,137 @@ class NotificationPusher(_NotificationPusherBase):
             mask_params=False,
         )
+    def get_workflow_steps(self, run: mlrun.model.RunObject) -> list:
+        steps = []
+        db = mlrun.get_run_db()
+        def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
+            try:
+                _run = db.list_runs(
+                    project=run.metadata.project,
+                    labels=f"mlrun_constants.MLRunInternalLabels.runner_pod={_step.node_name}",
+                )[0]
+            except IndexError:
+                _run = {
+                    "metadata": {
+                        "name": _step.display_name,
+                        "project": run.metadata.project,
+                    },
+                }
+            _run["step_kind"] = _step.step_type
+            if _step.skipped:
+                _run.setdefault("status", {})["state"] = (
+                    mlrun.common.runtimes.constants.RunStates.skipped
+                )
+            steps.append(_run)
+        def _add_deploy_function_step(_step: mlrun_pipelines.models.PipelineStep):
+            project, name, hash_key = self._extract_function_uri(
+                _step.get_annotation("mlrun/function-uri")
+            )
+            if name:
+                try:
+                    function = db.get_function(
+                        project=project, name=name, hash_key=hash_key
+                    )
+                except mlrun.errors.MLRunNotFoundError:
+                    # If the function is not found (if build failed for example), we will create a dummy
+                    # function object for the notification to display the function name
+                    function = {
+                        "metadata": {
+                            "name": name,
+                            "project": project,
+                            "hash_key": hash_key,
+                        },
+                    }
+                pod_phase = _step.phase
+                if _step.skipped:
+                    state = mlrun.common.schemas.FunctionState.skipped
+                else:
+                    state = mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
+                        pod_phase
+                    )
+                function["status"] = {"state": state}
+                if isinstance(function["metadata"].get("updated"), datetime.datetime):
+                    function["metadata"]["updated"] = function["metadata"][
+                        "updated"
+                    ].isoformat()
+                function["step_kind"] = _step.step_type
+                steps.append(function)
+        step_methods = {
+            mlrun_pipelines.common.ops.PipelineRunType.run: _add_run_step,
+            mlrun_pipelines.common.ops.PipelineRunType.build: _add_deploy_function_step,
+            mlrun_pipelines.common.ops.PipelineRunType.deploy: _add_deploy_function_step,
+        }
+        workflow_id = run.status.results.get("workflow_id", None)
+        if not workflow_id:
+            return steps
+        workflow_manifest = self._get_workflow_manifest(workflow_id)
+        if not workflow_manifest:
+            return steps
+        try:
+            for step in workflow_manifest.get_steps():
+                step_method = step_methods.get(step.step_type)
+                if step_method:
+                    step_method(step)
+            return steps
+        except Exception:
+            # If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
+            logger.warning(
+                "Failed to extract workflow steps from workflow manifest, "
+                "returning all runs with the workflow id label",
+                workflow_id=workflow_id,
+                traceback=traceback.format_exc(),
+            )
+            return db.list_runs(
+                project=run.metadata.project,
+                labels=f"workflow={workflow_id}",
+            )
+    @staticmethod
+    def _get_workflow_manifest(
+        workflow_id: str,
+    ) -> typing.Optional[mlrun_pipelines.models.PipelineManifest]:
+        kfp_url = mlrun.mlconf.resolve_kfp_url(mlrun.mlconf.namespace)
+        if not kfp_url:
+            raise mlrun.errors.MLRunNotFoundError(
+                "KubeFlow Pipelines is not configured"
+            )
+        kfp_client = kfp.Client(host=kfp_url)
+        # arbitrary timeout of 5 seconds, the workflow should be done by now
+        kfp_run = kfp_client.wait_for_run_completion(workflow_id, 5)
+        if not kfp_run:
+            return None
+        kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
+        return kfp_run.workflow_manifest()
+    def _extract_function_uri(self, function_uri: str) -> tuple[str, str, str]:
+        """
+        Extract the project, name, and hash key from a function uri.
+        Examples:
+            - "project/name@hash_key" returns project, name, hash_key
+            - "project/name returns" project, name, ""
+        """
+        project, name, hash_key = None, None, None
+        hashed_pattern = r"^(.+)/(.+)@(.+)$"
+        pattern = r"^(.+)/(.+)$"
+        match = re.match(hashed_pattern, function_uri)
+        if match:
+            project, name, hash_key = match.groups()
+        else:
+            match = re.match(pattern, function_uri)
+            if match:
+                project, name = match.groups()
+                hash_key = ""
+        return project, name, hash_key
 class CustomNotificationPusher(_NotificationPusherBase):
     def __init__(self, notification_types: list[str] = None):
@@ -413,6 +538,12 @@ class CustomNotificationPusher(_NotificationPusherBase):
             if notification.is_async
         }
+    @property
+    def notifications(self):
+        notifications = self._sync_notifications.copy()
+        notifications.update(self._async_notifications)
+        return notifications
     def push(
         self,
         message: str,

mlrun/utils/retryer.py CHANGED Viewed

@@ -117,7 +117,7 @@ class Retryer:
         self._raise_last_exception()
     def _prepare(self):
-        self.start_time = time.time()
+        self.start_time = time.monotonic()
         self.last_exception = None
         # Check if backoff is just a simple interval
@@ -138,6 +138,7 @@ class Retryer:
         except mlrun.errors.MLRunFatalFailureError as exc:
             raise exc.original_exception
         except Exception as exc:
+            self.last_exception = exc
             return (
                 None,
                 self.last_exception,
@@ -172,7 +173,7 @@ class Retryer:
         ) from self.last_exception
     def _timeout_exceeded(self, next_interval=None):
-        now = time.time()
+        now = time.monotonic()
         if next_interval:
             now = now + next_interval
         return self.timeout is not None and now >= self.start_time + self.timeout

mlrun/utils/v3io_clients.py CHANGED Viewed

@@ -11,7 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-#
 from v3io.dataplane import Client as V3IOClient
 from v3io_frames import Client as get_client

mlrun/utils/version/version.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "git_commit": "cb2750f25e202a321723af3465359944445dfda7",
-  "version": "1.7.0-rc4"
+  "git_commit": "f869a5513ea7c9f4ccdaddad6589274eec39f0a4",
+  "version": "1.7.0-rc20"
 }

mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc4py3-none-any.whl → 1.7.0rc20py3-none-any.whl