mlrun 1.7.0rc17__py3-none-any.whl → 1.7.0rc18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (55) hide show
  1. mlrun/alerts/alert.py +1 -1
  2. mlrun/artifacts/manager.py +5 -1
  3. mlrun/common/runtimes/constants.py +3 -0
  4. mlrun/common/schemas/__init__.py +1 -1
  5. mlrun/common/schemas/alert.py +31 -9
  6. mlrun/common/schemas/client_spec.py +1 -0
  7. mlrun/common/schemas/function.py +4 -0
  8. mlrun/common/schemas/model_monitoring/__init__.py +3 -1
  9. mlrun/common/schemas/model_monitoring/constants.py +20 -1
  10. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  11. mlrun/common/schemas/model_monitoring/model_endpoints.py +17 -6
  12. mlrun/config.py +2 -0
  13. mlrun/data_types/to_pandas.py +5 -5
  14. mlrun/datastore/datastore.py +6 -2
  15. mlrun/datastore/redis.py +2 -2
  16. mlrun/datastore/s3.py +5 -0
  17. mlrun/datastore/sources.py +111 -6
  18. mlrun/datastore/targets.py +2 -2
  19. mlrun/db/base.py +5 -1
  20. mlrun/db/httpdb.py +22 -3
  21. mlrun/db/nopdb.py +5 -1
  22. mlrun/errors.py +6 -0
  23. mlrun/feature_store/retrieval/conversion.py +5 -5
  24. mlrun/feature_store/retrieval/job.py +3 -2
  25. mlrun/feature_store/retrieval/spark_merger.py +2 -1
  26. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +2 -2
  27. mlrun/model_monitoring/db/stores/base/store.py +16 -3
  28. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +44 -43
  29. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +190 -91
  30. mlrun/model_monitoring/db/tsdb/__init__.py +35 -6
  31. mlrun/model_monitoring/db/tsdb/base.py +25 -18
  32. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  33. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +207 -0
  34. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  35. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +231 -0
  36. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +73 -72
  37. mlrun/model_monitoring/db/v3io_tsdb_reader.py +217 -16
  38. mlrun/model_monitoring/helpers.py +32 -0
  39. mlrun/model_monitoring/stream_processing.py +7 -4
  40. mlrun/model_monitoring/writer.py +18 -13
  41. mlrun/package/utils/_formatter.py +2 -2
  42. mlrun/projects/project.py +33 -8
  43. mlrun/render.py +8 -5
  44. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  45. mlrun/utils/async_http.py +25 -5
  46. mlrun/utils/helpers.py +20 -1
  47. mlrun/utils/notifications/notification/slack.py +27 -7
  48. mlrun/utils/notifications/notification_pusher.py +38 -40
  49. mlrun/utils/version/version.json +2 -2
  50. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/METADATA +7 -2
  51. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/RECORD +55 -51
  52. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/LICENSE +0 -0
  53. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/WHEEL +0 -0
  54. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/entry_points.txt +0 -0
  55. {mlrun-1.7.0rc17.dist-info → mlrun-1.7.0rc18.dist-info}/top_level.txt +0 -0
@@ -111,6 +111,24 @@ def get_connection_string(secret_provider: typing.Callable = None) -> str:
111
111
  )
112
112
 
113
113
 
114
+ def get_tsdb_connection_string(
115
+ secret_provider: typing.Optional[typing.Callable] = None,
116
+ ) -> str:
117
+ """Get TSDB connection string from the project secret. If wasn't set, take it from the system
118
+ configurations.
119
+ :param secret_provider: An optional secret provider to get the connection string secret.
120
+ :return: Valid TSDB connection string.
121
+ """
122
+
123
+ return (
124
+ mlrun.get_secret_or_env(
125
+ key=mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION,
126
+ secret_provider=secret_provider,
127
+ )
128
+ or mlrun.mlconf.model_endpoint_monitoring.tsdb_connection
129
+ )
130
+
131
+
114
132
  def batch_dict2timedelta(batch_dict: _BatchDict) -> datetime.timedelta:
115
133
  """
116
134
  Convert a batch dictionary to timedelta.
@@ -260,3 +278,17 @@ def get_endpoint_record(project: str, endpoint_id: str):
260
278
  project=project,
261
279
  )
262
280
  return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
281
+
282
+
283
+ def get_result_instance_fqn(
284
+ model_endpoint_id: str, app_name: str, result_name: str
285
+ ) -> str:
286
+ return f"{model_endpoint_id}.{app_name}.result.{result_name}"
287
+
288
+
289
+ def get_default_result_instance_fqn(model_endpoint_id: str) -> str:
290
+ return get_result_instance_fqn(
291
+ model_endpoint_id,
292
+ mm_constants.HistogramDataDriftApplicationConstants.NAME,
293
+ mm_constants.HistogramDataDriftApplicationConstants.GENERAL_RESULT_NAME,
294
+ )
@@ -136,7 +136,11 @@ class EventStreamProcessor:
136
136
  self.tsdb_batching_max_events = tsdb_batching_max_events
137
137
  self.tsdb_batching_timeout_secs = tsdb_batching_timeout_secs
138
138
 
139
- def apply_monitoring_serving_graph(self, fn: mlrun.runtimes.ServingRuntime) -> None:
139
+ def apply_monitoring_serving_graph(
140
+ self,
141
+ fn: mlrun.runtimes.ServingRuntime,
142
+ tsdb_service_provider: typing.Optional[typing.Callable] = None,
143
+ ) -> None:
140
144
  """
141
145
  Apply monitoring serving graph to a given serving function. The following serving graph includes about 4 main
142
146
  parts that each one them includes several steps of different operations that are executed on the events from
@@ -163,6 +167,7 @@ class EventStreamProcessor:
163
167
  using CE, the parquet target path is based on the defined MLRun artifact path.
164
168
 
165
169
  :param fn: A serving function.
170
+ :param tsdb_service_provider: An optional callable function that provides the TSDB connection string.
166
171
  """
167
172
 
168
173
  graph = typing.cast(
@@ -322,15 +327,13 @@ class EventStreamProcessor:
322
327
 
323
328
  # TSDB branch (skip to Prometheus if in CE env)
324
329
  if not mlrun.mlconf.is_ce_mode():
325
- # TSDB branch
326
330
  tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
327
- project=self.project,
331
+ project=self.project, secret_provider=tsdb_service_provider
328
332
  )
329
333
  tsdb_connector.apply_monitoring_stream_steps(graph=graph)
330
334
 
331
335
  else:
332
336
  # Prometheus
333
-
334
337
  # Increase the prediction counter by 1 and update the latency value
335
338
  graph.add_step(
336
339
  "IncCounter",
@@ -29,7 +29,7 @@ from mlrun.common.schemas.model_monitoring.constants import (
29
29
  WriterEventKind,
30
30
  )
31
31
  from mlrun.common.schemas.notification import NotificationKind, NotificationSeverity
32
- from mlrun.model_monitoring.helpers import get_endpoint_record
32
+ from mlrun.model_monitoring.helpers import get_endpoint_record, get_result_instance_fqn
33
33
  from mlrun.serving.utils import StepToDict
34
34
  from mlrun.utils import logger
35
35
  from mlrun.utils.notifications.notification_pusher import CustomNotificationPusher
@@ -101,7 +101,7 @@ class ModelMonitoringWriter(StepToDict):
101
101
 
102
102
  kind = "monitoring_application_stream_pusher"
103
103
 
104
- def __init__(self, project: str) -> None:
104
+ def __init__(self, project: str, tsdb_secret_provider=None) -> None:
105
105
  self.project = project
106
106
  self.name = project # required for the deployment process
107
107
 
@@ -113,24 +113,24 @@ class ModelMonitoringWriter(StepToDict):
113
113
  project=self.project
114
114
  )
115
115
  self._tsdb_connector = mlrun.model_monitoring.get_tsdb_connector(
116
- project=self.project,
116
+ project=self.project, secret_provider=tsdb_secret_provider
117
117
  )
118
118
  self._endpoints_records = {}
119
119
 
120
120
  @staticmethod
121
121
  def _generate_event_on_drift(
122
- model_endpoint: str, drift_status: str, event_value: dict, project_name: str
122
+ entity_id: str, drift_status: str, event_value: dict, project_name: str
123
123
  ) -> None:
124
- logger.info("Sending an alert")
124
+ logger.info("Sending an event")
125
125
  entity = mlrun.common.schemas.alert.EventEntities(
126
- kind=alert_objects.EventEntityKind.MODEL,
126
+ kind=alert_objects.EventEntityKind.MODEL_ENDPOINT_RESULT,
127
127
  project=project_name,
128
- ids=[model_endpoint],
128
+ ids=[entity_id],
129
129
  )
130
130
  event_kind = (
131
- alert_objects.EventKind.DRIFT_DETECTED
131
+ alert_objects.EventKind.DATA_DRIFT_DETECTED
132
132
  if drift_status == ResultStatusApp.detected.value
133
- else alert_objects.EventKind.DRIFT_SUSPECTED
133
+ else alert_objects.EventKind.DATA_DRIFT_SUSPECTED
134
134
  )
135
135
  event_data = mlrun.common.schemas.Event(
136
136
  kind=event_kind, entity=entity, value_dict=event_value
@@ -138,7 +138,7 @@ class ModelMonitoringWriter(StepToDict):
138
138
  mlrun.get_run_db().generate_event(event_kind, event_data)
139
139
 
140
140
  @staticmethod
141
- def _reconstruct_event(event: _RawEvent) -> tuple[_AppResultEvent, str]:
141
+ def _reconstruct_event(event: _RawEvent) -> tuple[_AppResultEvent, WriterEventKind]:
142
142
  """
143
143
  Modify the raw event into the expected monitoring application event
144
144
  schema as defined in `mlrun.common.schemas.model_monitoring.constants.WriterEvent`
@@ -179,12 +179,13 @@ class ModelMonitoringWriter(StepToDict):
179
179
  def do(self, event: _RawEvent) -> None:
180
180
  event, kind = self._reconstruct_event(event)
181
181
  logger.info("Starting to write event", event=event)
182
-
183
182
  self._tsdb_connector.write_application_event(event=event.copy(), kind=kind)
184
183
  self._app_result_store.write_application_event(event=event.copy(), kind=kind)
184
+
185
185
  logger.info("Completed event DB writes")
186
186
 
187
- _Notifier(event=event, notification_pusher=self._custom_notifier).notify()
187
+ if kind == WriterEventKind.RESULT:
188
+ _Notifier(event=event, notification_pusher=self._custom_notifier).notify()
188
189
 
189
190
  if (
190
191
  mlrun.mlconf.alerts.mode == mlrun.common.schemas.alert.AlertsModes.enabled
@@ -208,7 +209,11 @@ class ModelMonitoringWriter(StepToDict):
208
209
  "result_value": event[ResultData.RESULT_VALUE],
209
210
  }
210
211
  self._generate_event_on_drift(
211
- event[WriterEvent.ENDPOINT_ID],
212
+ get_result_instance_fqn(
213
+ event[WriterEvent.ENDPOINT_ID],
214
+ event[WriterEvent.APPLICATION_NAME],
215
+ event[ResultData.RESULT_NAME],
216
+ ),
212
217
  event[ResultData.RESULT_STATUS],
213
218
  event_value,
214
219
  self.project,
@@ -142,11 +142,11 @@ class _YAMLFormatter(_Formatter):
142
142
 
143
143
  :param obj: The object to write.
144
144
  :param file_path: The file path to write to.
145
- :param dump_kwargs: Additional keyword arguments to pass to the `yaml.dump` method of the formatter in use.
145
+ :param dump_kwargs: Additional keyword arguments to pass to the `yaml.safe_dump` method of the formatter in use.
146
146
  """
147
147
  dump_kwargs = dump_kwargs or cls.DEFAULT_DUMP_KWARGS
148
148
  with open(file_path, "w") as file:
149
- yaml.dump(obj, file, **dump_kwargs)
149
+ yaml.safe_dump(obj, file, **dump_kwargs)
150
150
 
151
151
  @classmethod
152
152
  def read(cls, file_path: str) -> Union[list, dict]:
mlrun/projects/project.py CHANGED
@@ -39,6 +39,7 @@ import yaml
39
39
  from mlrun_pipelines.models import PipelineNodeWrapper
40
40
 
41
41
  import mlrun.common.helpers
42
+ import mlrun.common.runtimes.constants
42
43
  import mlrun.common.schemas.artifact
43
44
  import mlrun.common.schemas.model_monitoring.constants as mm_constants
44
45
  import mlrun.db
@@ -3098,17 +3099,18 @@ class MlrunProject(ModelObj):
3098
3099
 
3099
3100
  def set_model_monitoring_credentials(
3100
3101
  self,
3101
- access_key: str = None,
3102
- endpoint_store_connection: str = None,
3103
- stream_path: str = None,
3102
+ access_key: Optional[str] = None,
3103
+ endpoint_store_connection: Optional[str] = None,
3104
+ stream_path: Optional[str] = None,
3105
+ tsdb_connection: Optional[str] = None,
3104
3106
  ):
3105
3107
  """Set the credentials that will be used by the project's model monitoring
3106
3108
  infrastructure functions.
3107
3109
 
3108
- :param access_key: Model Monitoring access key for managing user permissions
3109
3110
  :param access_key: Model Monitoring access key for managing user permissions
3110
3111
  :param endpoint_store_connection: Endpoint store connection string
3111
3112
  :param stream_path: Path to the model monitoring stream
3113
+ :param tsdb_connection: Connection string to the time series database
3112
3114
  """
3113
3115
 
3114
3116
  secrets_dict = {}
@@ -3131,6 +3133,16 @@ class MlrunProject(ModelObj):
3131
3133
  mlrun.common.schemas.model_monitoring.ProjectSecretKeys.STREAM_PATH
3132
3134
  ] = stream_path
3133
3135
 
3136
+ if tsdb_connection:
3137
+ if not tsdb_connection.startswith("taosws://"):
3138
+ raise mlrun.errors.MLRunInvalidArgumentError(
3139
+ "Currently only TDEngine websocket connection is supported for non-v3io TSDB,"
3140
+ "please provide a full URL (e.g. taosws://user:password@host:port)"
3141
+ )
3142
+ secrets_dict[
3143
+ mlrun.common.schemas.model_monitoring.ProjectSecretKeys.TSDB_CONNECTION
3144
+ ] = tsdb_connection
3145
+
3134
3146
  self.set_secrets(
3135
3147
  secrets=secrets_dict,
3136
3148
  provider=mlrun.common.schemas.SecretProviderName.kubernetes,
@@ -3689,7 +3701,10 @@ class MlrunProject(ModelObj):
3689
3701
  name: Optional[str] = None,
3690
3702
  uid: Optional[Union[str, list[str]]] = None,
3691
3703
  labels: Optional[Union[str, list[str]]] = None,
3692
- state: Optional[str] = None,
3704
+ state: Optional[
3705
+ mlrun.common.runtimes.constants.RunStates
3706
+ ] = None, # Backward compatibility
3707
+ states: typing.Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
3693
3708
  sort: bool = True,
3694
3709
  last: int = 0,
3695
3710
  iter: bool = False,
@@ -3723,10 +3738,11 @@ class MlrunProject(ModelObj):
3723
3738
  :param labels: A list of labels to filter by. Label filters work by either filtering a specific value
3724
3739
  of a label (i.e. list("key=value")) or by looking for the existence of a given
3725
3740
  key (i.e. "key").
3726
- :param state: List only runs whose state is specified.
3741
+ :param state: Deprecated - List only runs whose state is specified.
3742
+ :param states: List only runs whose state is one of the provided states.
3727
3743
  :param sort: Whether to sort the result according to their start time. Otherwise, results will be
3728
3744
  returned by their internal order in the DB (order will not be guaranteed).
3729
- :param last: Deprecated - currently not used (will be removed in 1.8.0).
3745
+ :param last: Deprecated - currently not used (will be removed in 1.9.0).
3730
3746
  :param iter: If ``True`` return runs from all iterations. Otherwise, return only runs whose ``iter`` is 0.
3731
3747
  :param start_time_from: Filter by run start time in ``[start_time_from, start_time_to]``.
3732
3748
  :param start_time_to: Filter by run start time in ``[start_time_from, start_time_to]``.
@@ -3734,13 +3750,22 @@ class MlrunProject(ModelObj):
3734
3750
  last_update_time_to)``.
3735
3751
  :param last_update_time_to: Filter by run last update time in ``(last_update_time_from, last_update_time_to)``.
3736
3752
  """
3753
+ if state:
3754
+ # TODO: Remove this in 1.9.0
3755
+ warnings.warn(
3756
+ "'state' is deprecated and will be removed in 1.9.0. Use 'states' instead.",
3757
+ FutureWarning,
3758
+ )
3759
+
3737
3760
  db = mlrun.db.get_run_db(secrets=self._secrets)
3738
3761
  return db.list_runs(
3739
3762
  name,
3740
3763
  uid,
3741
3764
  self.metadata.name,
3742
3765
  labels=labels,
3743
- state=state,
3766
+ states=mlrun.utils.helpers.as_list(state)
3767
+ if state is not None
3768
+ else states or None,
3744
3769
  sort=sort,
3745
3770
  last=last,
3746
3771
  iter=iter,
mlrun/render.py CHANGED
@@ -126,7 +126,7 @@ def artifacts_html(
126
126
 
127
127
  if not attribute_value:
128
128
  mlrun.utils.logger.warning(
129
- "Artifact is incomplete, omitting from output (most likely due to a failed artifact logging)",
129
+ f"Artifact required attribute {attribute_name} is missing, omitting from output",
130
130
  artifact_key=key,
131
131
  )
132
132
  continue
@@ -400,14 +400,17 @@ def runs_to_html(
400
400
  else:
401
401
  df["labels"] = df["labels"].apply(dict_html)
402
402
  df["inputs"] = df["inputs"].apply(inputs_html)
403
- if df["artifact_uris"][0]:
404
- df["artifact_uris"] = df["artifact_uris"].apply(dict_html)
405
- df.drop("artifacts", axis=1, inplace=True)
406
- else:
403
+ if df["artifacts"][0]:
407
404
  df["artifacts"] = df["artifacts"].apply(
408
405
  lambda artifacts: artifacts_html(artifacts, "target_path"),
409
406
  )
410
407
  df.drop("artifact_uris", axis=1, inplace=True)
408
+ elif df["artifact_uris"][0]:
409
+ df["artifact_uris"] = df["artifact_uris"].apply(dict_html)
410
+ df.drop("artifacts", axis=1, inplace=True)
411
+ else:
412
+ df.drop("artifacts", axis=1, inplace=True)
413
+ df.drop("artifact_uris", axis=1, inplace=True)
411
414
 
412
415
  def expand_error(x):
413
416
  if x["state"] == "error":
@@ -99,7 +99,7 @@ def save_credentials(
99
99
  credentials["DATABRICKS_CLUSTER_ID"] = cluster_id
100
100
 
101
101
  with open(credentials_path, "w") as yaml_file:
102
- yaml.dump(credentials, yaml_file, default_flow_style=False)
102
+ yaml.safe_dump(credentials, yaml_file, default_flow_style=False)
103
103
 
104
104
 
105
105
  def run_mlrun_databricks_job(
mlrun/utils/async_http.py CHANGED
@@ -24,7 +24,7 @@ from aiohttp_retry import ExponentialRetry, RequestParams, RetryClient, RetryOpt
24
24
  from aiohttp_retry.client import _RequestContext
25
25
 
26
26
  from mlrun.config import config
27
- from mlrun.errors import err_to_str
27
+ from mlrun.errors import err_to_str, raise_for_status
28
28
 
29
29
  from .helpers import logger as mlrun_logger
30
30
 
@@ -46,12 +46,21 @@ class AsyncClientWithRetry(RetryClient):
46
46
  *args,
47
47
  **kwargs,
48
48
  ):
49
+ # do not retry on PUT / PATCH as they might have side effects (not truly idempotent)
50
+ blacklisted_methods = (
51
+ blacklisted_methods
52
+ if blacklisted_methods is not None
53
+ else [
54
+ "POST",
55
+ "PUT",
56
+ "PATCH",
57
+ ]
58
+ )
49
59
  super().__init__(
50
60
  *args,
51
61
  retry_options=ExponentialRetryOverride(
52
62
  retry_on_exception=retry_on_exception,
53
- # do not retry on PUT / PATCH as they might have side effects (not truly idempotent)
54
- blacklisted_methods=blacklisted_methods or ["POST", "PUT", "PATCH"],
63
+ blacklisted_methods=blacklisted_methods,
55
64
  attempts=max_retries,
56
65
  statuses=retry_on_status_codes,
57
66
  factor=retry_backoff_factor,
@@ -63,6 +72,12 @@ class AsyncClientWithRetry(RetryClient):
63
72
  **kwargs,
64
73
  )
65
74
 
75
+ def methods_blacklist_update_required(self, new_blacklist: str):
76
+ self._retry_options: ExponentialRetryOverride
77
+ return set(self._retry_options.blacklisted_methods).difference(
78
+ set(new_blacklist)
79
+ )
80
+
66
81
  def _make_requests(
67
82
  self,
68
83
  params_list: list[RequestParams],
@@ -173,7 +188,7 @@ class _CustomRequestContext(_RequestContext):
173
188
  last_attempt = current_attempt == self._retry_options.attempts
174
189
  if self._is_status_code_ok(response.status) or last_attempt:
175
190
  if self._raise_for_status:
176
- response.raise_for_status()
191
+ raise_for_status(response)
177
192
 
178
193
  self._response = response
179
194
  return response
@@ -275,6 +290,11 @@ class _CustomRequestContext(_RequestContext):
275
290
  if isinstance(exc.os_error, exc_type):
276
291
  return
277
292
  if exc.__cause__:
278
- return self.verify_exception_type(exc.__cause__)
293
+ # If the cause exception is retriable, return, otherwise, raise the original exception
294
+ try:
295
+ self.verify_exception_type(exc.__cause__)
296
+ except Exception:
297
+ raise exc
298
+ return
279
299
  else:
280
300
  raise exc
mlrun/utils/helpers.py CHANGED
@@ -973,6 +973,15 @@ def get_ui_url(project, uid=None):
973
973
  return url
974
974
 
975
975
 
976
+ def get_model_endpoint_url(project, model_name, model_endpoint_id):
977
+ url = ""
978
+ if mlrun.mlconf.resolve_ui_url():
979
+ url = f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}/models"
980
+ if model_name:
981
+ url += f"/model-endpoints/{model_name}/{model_endpoint_id}/overview"
982
+ return url
983
+
984
+
976
985
  def get_workflow_url(project, id=None):
977
986
  url = ""
978
987
  if mlrun.mlconf.resolve_ui_url():
@@ -1183,7 +1192,7 @@ def calculate_dataframe_hash(dataframe: pandas.DataFrame):
1183
1192
  return hashlib.sha1(pandas.util.hash_pandas_object(dataframe).values).hexdigest()
1184
1193
 
1185
1194
 
1186
- def template_artifact_path(artifact_path, project, run_uid="project"):
1195
+ def template_artifact_path(artifact_path, project, run_uid=None):
1187
1196
  """
1188
1197
  Replace {{run.uid}} with the run uid and {{project}} with the project name in the artifact path.
1189
1198
  If no run uid is provided, the word `project` will be used instead as it is assumed to be a project
@@ -1191,6 +1200,7 @@ def template_artifact_path(artifact_path, project, run_uid="project"):
1191
1200
  """
1192
1201
  if not artifact_path:
1193
1202
  return artifact_path
1203
+ run_uid = run_uid or "project"
1194
1204
  artifact_path = artifact_path.replace("{{run.uid}}", run_uid)
1195
1205
  artifact_path = _fill_project_path_template(artifact_path, project)
1196
1206
  return artifact_path
@@ -1603,3 +1613,12 @@ def validate_component_version_compatibility(
1603
1613
  if parsed_current_version < parsed_min_version:
1604
1614
  return False
1605
1615
  return True
1616
+
1617
+
1618
+ def format_alert_summary(
1619
+ alert: mlrun.common.schemas.AlertConfig, event_data: mlrun.common.schemas.Event
1620
+ ) -> str:
1621
+ result = alert.summary.replace("{{project}}", alert.project)
1622
+ result = result.replace("{{name}}", alert.name)
1623
+ result = result.replace("{{entity}}", event_data.entity.ids[0])
1624
+ return result
@@ -32,6 +32,7 @@ class SlackNotification(NotificationBase):
32
32
  "completed": ":smiley:",
33
33
  "running": ":man-running:",
34
34
  "error": ":x:",
35
+ "skipped": ":zzz:",
35
36
  }
36
37
 
37
38
  async def push(
@@ -135,8 +136,16 @@ class SlackNotification(NotificationBase):
135
136
  line = [
136
137
  self._get_slack_row(f":bell: {alert.name} alert has occurred"),
137
138
  self._get_slack_row(f"*Project:*\n{alert.project}"),
138
- self._get_slack_row(f"*UID:*\n{event_data.entity.ids[0]}"),
139
+ self._get_slack_row(f"*ID:*\n{event_data.entity.ids[0]}"),
139
140
  ]
141
+
142
+ if alert.summary:
143
+ line.append(
144
+ self._get_slack_row(
145
+ f"*Summary:*\n{mlrun.utils.helpers.format_alert_summary(alert, event_data)}"
146
+ )
147
+ )
148
+
140
149
  if event_data.value_dict:
141
150
  data_lines = []
142
151
  for key, value in event_data.value_dict.items():
@@ -144,10 +153,21 @@ class SlackNotification(NotificationBase):
144
153
  data_text = "\n".join(data_lines)
145
154
  line.append(self._get_slack_row(f"*Event data:*\n{data_text}"))
146
155
 
147
- if url := mlrun.utils.helpers.get_ui_url(
148
- alert.project, event_data.entity.ids[0]
149
- ):
150
- line.append(self._get_slack_row(f"*Overview:*\n<{url}|*Job overview*>"))
156
+ if (
157
+ event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
158
+ ): # JOB entity
159
+ uid = event_data.value_dict.get("uid")
160
+ url = mlrun.utils.helpers.get_ui_url(alert.project, uid)
161
+ overview_type = "Job overview"
162
+ else: # MODEL entity
163
+ model_name = event_data.value_dict.get("model")
164
+ model_endpoint_id = event_data.value_dict.get("model_endpoint_id")
165
+ url = mlrun.utils.helpers.get_model_endpoint_url(
166
+ alert.project, model_name, model_endpoint_id
167
+ )
168
+ overview_type = "Model endpoint"
169
+
170
+ line.append(self._get_slack_row(f"*Overview:*\n<{url}|*{overview_type}*>"))
151
171
 
152
172
  return line
153
173
 
@@ -157,11 +177,11 @@ class SlackNotification(NotificationBase):
157
177
 
158
178
  # Only show the URL if the run is not a function (serving or mlrun function)
159
179
  kind = run.get("step_kind")
160
- if url and not kind or kind == "run":
180
+ state = run["status"].get("state", "")
181
+ if state != "skipped" and (url and not kind or kind == "run"):
161
182
  line = f'<{url}|*{meta.get("name")}*>'
162
183
  else:
163
184
  line = meta.get("name")
164
- state = run["status"].get("state", "")
165
185
  if kind:
166
186
  line = f'{line} *({run.get("step_kind", run.get("kind", ""))})*'
167
187
  line = f'{self.emojis.get(state, ":question:")} {line}'
@@ -14,7 +14,6 @@
14
14
 
15
15
  import asyncio
16
16
  import datetime
17
- import json
18
17
  import os
19
18
  import re
20
19
  import traceback
@@ -23,6 +22,7 @@ from concurrent.futures import ThreadPoolExecutor
23
22
 
24
23
  import kfp
25
24
  import mlrun_pipelines.common.ops
25
+ import mlrun_pipelines.models
26
26
 
27
27
  import mlrun.common.runtimes.constants
28
28
  import mlrun.common.schemas
@@ -392,17 +392,29 @@ class NotificationPusher(_NotificationPusherBase):
392
392
  steps = []
393
393
  db = mlrun.get_run_db()
394
394
 
395
- def _add_run_step(_node_name, _node_template, _step_kind):
396
- _run = db.list_runs(
397
- project=run.metadata.project,
398
- labels=f"mlrun/runner-pod={_node_name}",
399
- )[0]
400
- _run["step_kind"] = _step_kind
395
+ def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
396
+ try:
397
+ _run = db.list_runs(
398
+ project=run.metadata.project,
399
+ labels=f"mlrun/runner-pod={_step.node_name}",
400
+ )[0]
401
+ except IndexError:
402
+ _run = {
403
+ "metadata": {
404
+ "name": _step.display_name,
405
+ "project": run.metadata.project,
406
+ },
407
+ }
408
+ _run["step_kind"] = _step.step_type
409
+ if _step.skipped:
410
+ _run.setdefault("status", {})["state"] = (
411
+ mlrun.common.runtimes.constants.RunStates.skipped
412
+ )
401
413
  steps.append(_run)
402
414
 
403
- def _add_deploy_function_step(_, _node_template, _step_kind):
415
+ def _add_deploy_function_step(_step: mlrun_pipelines.models.PipelineStep):
404
416
  project, name, hash_key = self._extract_function_uri(
405
- _node_template["metadata"]["annotations"]["mlrun/function-uri"]
417
+ _step.get_annotation("mlrun/function-uri")
406
418
  )
407
419
  if name:
408
420
  try:
@@ -419,16 +431,19 @@ class NotificationPusher(_NotificationPusherBase):
419
431
  "hash_key": hash_key,
420
432
  },
421
433
  }
422
- function["status"] = {
423
- "state": mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
424
- node["phase"]
425
- ),
426
- }
434
+ pod_phase = _step.phase
435
+ if _step.skipped:
436
+ state = mlrun.common.schemas.FunctionState.skipped
437
+ else:
438
+ state = mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
439
+ pod_phase
440
+ )
441
+ function["status"] = {"state": state}
427
442
  if isinstance(function["metadata"].get("updated"), datetime.datetime):
428
443
  function["metadata"]["updated"] = function["metadata"][
429
444
  "updated"
430
445
  ].isoformat()
431
- function["step_kind"] = _step_kind
446
+ function["step_kind"] = _step.step_type
432
447
  steps.append(function)
433
448
 
434
449
  step_methods = {
@@ -446,26 +461,10 @@ class NotificationPusher(_NotificationPusherBase):
446
461
  return steps
447
462
 
448
463
  try:
449
- workflow_nodes = sorted(
450
- workflow_manifest["status"]["nodes"].items(),
451
- key=lambda _node: _node[1]["finishedAt"],
452
- )
453
- for node_name, node in workflow_nodes:
454
- if node["type"] != "Pod":
455
- # Skip the parent DAG node
456
- continue
457
-
458
- node_template = next(
459
- template
460
- for template in workflow_manifest["spec"]["templates"]
461
- if template["name"] == node["templateName"]
462
- )
463
- step_type = node_template["metadata"]["annotations"].get(
464
- "mlrun/pipeline-step-type"
465
- )
466
- step_method = step_methods.get(step_type)
464
+ for step in workflow_manifest.get_steps():
465
+ step_method = step_methods.get(step.step_type)
467
466
  if step_method:
468
- step_method(node_name, node_template, step_type)
467
+ step_method(step)
469
468
  return steps
470
469
  except Exception:
471
470
  # If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
@@ -481,7 +480,9 @@ class NotificationPusher(_NotificationPusherBase):
481
480
  )
482
481
 
483
482
  @staticmethod
484
- def _get_workflow_manifest(workflow_id: str) -> typing.Optional[dict]:
483
+ def _get_workflow_manifest(
484
+ workflow_id: str,
485
+ ) -> typing.Optional[mlrun_pipelines.models.PipelineManifest]:
485
486
  kfp_url = mlrun.mlconf.resolve_kfp_url(mlrun.mlconf.namespace)
486
487
  if not kfp_url:
487
488
  raise mlrun.errors.MLRunNotFoundError(
@@ -495,11 +496,8 @@ class NotificationPusher(_NotificationPusherBase):
495
496
  if not kfp_run:
496
497
  return None
497
498
 
498
- kfp_run = kfp_run.to_dict()
499
- try:
500
- return json.loads(kfp_run["pipeline_runtime"]["workflow_manifest"])
501
- except Exception:
502
- return None
499
+ kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
500
+ return kfp_run.workflow_manifest()
503
501
 
504
502
  def _extract_function_uri(self, function_uri: str) -> tuple[str, str, str]:
505
503
  """
@@ -1,4 +1,4 @@
1
1
  {
2
- "git_commit": "fb7d21e35e68f1e2720647b57dc040d0309942ea",
3
- "version": "1.7.0-rc17"
2
+ "git_commit": "cf983306a4f164f1c0a4f3ccf666ba9448d09e2e",
3
+ "version": "1.7.0-rc18"
4
4
  }