mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +25 -111
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +38 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +41 -47
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +68 -0
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
  15. mlrun/common/formatters/base.py +78 -0
  16. mlrun/common/formatters/function.py +41 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +25 -4
  21. mlrun/common/schemas/alert.py +203 -0
  22. mlrun/common/schemas/api_gateway.py +148 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +8 -2
  25. mlrun/common/schemas/client_spec.py +2 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/hub.py +7 -9
  29. mlrun/common/schemas/model_monitoring/__init__.py +19 -3
  30. mlrun/common/schemas/model_monitoring/constants.py +96 -26
  31. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  32. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  33. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  34. mlrun/common/schemas/pipeline.py +0 -9
  35. mlrun/common/schemas/project.py +22 -21
  36. mlrun/common/types.py +7 -1
  37. mlrun/config.py +87 -19
  38. mlrun/data_types/data_types.py +4 -0
  39. mlrun/data_types/to_pandas.py +9 -9
  40. mlrun/datastore/__init__.py +5 -8
  41. mlrun/datastore/alibaba_oss.py +130 -0
  42. mlrun/datastore/azure_blob.py +4 -5
  43. mlrun/datastore/base.py +69 -30
  44. mlrun/datastore/datastore.py +10 -2
  45. mlrun/datastore/datastore_profile.py +90 -6
  46. mlrun/datastore/google_cloud_storage.py +1 -1
  47. mlrun/datastore/hdfs.py +5 -0
  48. mlrun/datastore/inmem.py +2 -2
  49. mlrun/datastore/redis.py +2 -2
  50. mlrun/datastore/s3.py +5 -0
  51. mlrun/datastore/snowflake_utils.py +43 -0
  52. mlrun/datastore/sources.py +172 -44
  53. mlrun/datastore/store_resources.py +7 -7
  54. mlrun/datastore/targets.py +285 -41
  55. mlrun/datastore/utils.py +68 -5
  56. mlrun/datastore/v3io.py +27 -50
  57. mlrun/db/auth_utils.py +152 -0
  58. mlrun/db/base.py +149 -14
  59. mlrun/db/factory.py +1 -1
  60. mlrun/db/httpdb.py +608 -178
  61. mlrun/db/nopdb.py +191 -7
  62. mlrun/errors.py +11 -0
  63. mlrun/execution.py +37 -20
  64. mlrun/feature_store/__init__.py +0 -2
  65. mlrun/feature_store/api.py +21 -52
  66. mlrun/feature_store/feature_set.py +48 -23
  67. mlrun/feature_store/feature_vector.py +2 -1
  68. mlrun/feature_store/ingestion.py +7 -6
  69. mlrun/feature_store/retrieval/base.py +9 -4
  70. mlrun/feature_store/retrieval/conversion.py +9 -9
  71. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  72. mlrun/feature_store/retrieval/job.py +9 -3
  73. mlrun/feature_store/retrieval/local_merger.py +2 -0
  74. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  75. mlrun/feature_store/steps.py +30 -19
  76. mlrun/features.py +4 -13
  77. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  78. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  79. mlrun/frameworks/lgbm/__init__.py +1 -1
  80. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  81. mlrun/frameworks/lgbm/model_handler.py +1 -1
  82. mlrun/frameworks/parallel_coordinates.py +2 -1
  83. mlrun/frameworks/pytorch/__init__.py +2 -2
  84. mlrun/frameworks/sklearn/__init__.py +1 -1
  85. mlrun/frameworks/tf_keras/__init__.py +5 -2
  86. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  87. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  88. mlrun/frameworks/xgboost/__init__.py +1 -1
  89. mlrun/k8s_utils.py +10 -11
  90. mlrun/launcher/__init__.py +1 -1
  91. mlrun/launcher/base.py +6 -5
  92. mlrun/launcher/client.py +8 -6
  93. mlrun/launcher/factory.py +1 -1
  94. mlrun/launcher/local.py +9 -3
  95. mlrun/launcher/remote.py +9 -3
  96. mlrun/lists.py +6 -2
  97. mlrun/model.py +58 -19
  98. mlrun/model_monitoring/__init__.py +1 -1
  99. mlrun/model_monitoring/api.py +127 -301
  100. mlrun/model_monitoring/application.py +5 -296
  101. mlrun/model_monitoring/applications/__init__.py +11 -0
  102. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  103. mlrun/model_monitoring/applications/base.py +282 -0
  104. mlrun/model_monitoring/applications/context.py +214 -0
  105. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  106. mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
  107. mlrun/model_monitoring/applications/results.py +99 -0
  108. mlrun/model_monitoring/controller.py +30 -36
  109. mlrun/model_monitoring/db/__init__.py +18 -0
  110. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  111. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  112. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
  113. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  114. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  115. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  116. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  117. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  118. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  119. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  120. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
  121. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  122. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  123. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  124. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  125. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  126. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  127. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  128. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  129. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  130. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  131. mlrun/model_monitoring/evidently_application.py +6 -118
  132. mlrun/model_monitoring/features_drift_table.py +34 -22
  133. mlrun/model_monitoring/helpers.py +100 -7
  134. mlrun/model_monitoring/model_endpoint.py +3 -2
  135. mlrun/model_monitoring/stream_processing.py +93 -228
  136. mlrun/model_monitoring/tracking_policy.py +7 -1
  137. mlrun/model_monitoring/writer.py +152 -124
  138. mlrun/package/packagers_manager.py +1 -0
  139. mlrun/package/utils/_formatter.py +2 -2
  140. mlrun/platforms/__init__.py +11 -10
  141. mlrun/platforms/iguazio.py +21 -202
  142. mlrun/projects/operations.py +30 -16
  143. mlrun/projects/pipelines.py +92 -99
  144. mlrun/projects/project.py +757 -268
  145. mlrun/render.py +15 -14
  146. mlrun/run.py +160 -162
  147. mlrun/runtimes/__init__.py +55 -3
  148. mlrun/runtimes/base.py +33 -19
  149. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  150. mlrun/runtimes/funcdoc.py +0 -28
  151. mlrun/runtimes/kubejob.py +28 -122
  152. mlrun/runtimes/local.py +5 -2
  153. mlrun/runtimes/mpijob/__init__.py +0 -20
  154. mlrun/runtimes/mpijob/abstract.py +8 -8
  155. mlrun/runtimes/mpijob/v1.py +1 -1
  156. mlrun/runtimes/nuclio/__init__.py +1 -0
  157. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  158. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  159. mlrun/runtimes/nuclio/application/application.py +523 -0
  160. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  161. mlrun/runtimes/nuclio/function.py +98 -58
  162. mlrun/runtimes/nuclio/serving.py +36 -42
  163. mlrun/runtimes/pod.py +196 -45
  164. mlrun/runtimes/remotesparkjob.py +1 -1
  165. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  166. mlrun/runtimes/utils.py +6 -73
  167. mlrun/secrets.py +6 -2
  168. mlrun/serving/remote.py +2 -3
  169. mlrun/serving/routers.py +7 -4
  170. mlrun/serving/server.py +7 -8
  171. mlrun/serving/states.py +73 -43
  172. mlrun/serving/v2_serving.py +8 -7
  173. mlrun/track/tracker.py +2 -1
  174. mlrun/utils/async_http.py +25 -5
  175. mlrun/utils/helpers.py +141 -75
  176. mlrun/utils/http.py +1 -1
  177. mlrun/utils/logger.py +39 -7
  178. mlrun/utils/notifications/notification/__init__.py +14 -9
  179. mlrun/utils/notifications/notification/base.py +12 -0
  180. mlrun/utils/notifications/notification/console.py +2 -0
  181. mlrun/utils/notifications/notification/git.py +3 -1
  182. mlrun/utils/notifications/notification/ipython.py +2 -0
  183. mlrun/utils/notifications/notification/slack.py +101 -21
  184. mlrun/utils/notifications/notification/webhook.py +11 -1
  185. mlrun/utils/notifications/notification_pusher.py +147 -16
  186. mlrun/utils/retryer.py +3 -2
  187. mlrun/utils/v3io_clients.py +0 -1
  188. mlrun/utils/version/version.json +2 -2
  189. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
  190. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  191. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
  192. mlrun/kfpops.py +0 -868
  193. mlrun/model_monitoring/batch.py +0 -974
  194. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  195. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  196. mlrun/platforms/other.py +0 -305
  197. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  198. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  199. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  200. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/utils/logger.py CHANGED
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
 
15
15
  import logging
16
+ import typing
16
17
  from enum import Enum
17
18
  from sys import stdout
18
19
  from traceback import format_exception
@@ -92,7 +93,25 @@ class HumanReadableFormatter(_BaseFormatter):
92
93
 
93
94
  class HumanReadableExtendedFormatter(HumanReadableFormatter):
94
95
  def format(self, record) -> str:
95
- more = self._resolve_more(record)
96
+ more = ""
97
+ record_with = self._record_with(record)
98
+ if record_with:
99
+
100
+ def _format_value(val):
101
+ formatted_val = (
102
+ val
103
+ if isinstance(val, str)
104
+ else str(orjson.loads(self._json_dump(val)))
105
+ )
106
+ return (
107
+ formatted_val.replace("\n", "\n\t\t")
108
+ if len(formatted_val) < 4096
109
+ else repr(formatted_val)
110
+ )
111
+
112
+ more = "\n\t" + "\n\t".join(
113
+ [f"{key}: {_format_value(val)}" for key, val in record_with.items()]
114
+ )
96
115
  return (
97
116
  "> "
98
117
  f"{self.formatTime(record, self.datefmt)} "
@@ -221,14 +240,27 @@ class FormatterKinds(Enum):
221
240
  JSON = "json"
222
241
 
223
242
 
224
- def _create_formatter_instance(formatter_kind: FormatterKinds) -> logging.Formatter:
243
+ def resolve_formatter_by_kind(
244
+ formatter_kind: FormatterKinds,
245
+ ) -> type[
246
+ typing.Union[HumanReadableFormatter, HumanReadableExtendedFormatter, JSONFormatter]
247
+ ]:
225
248
  return {
226
- FormatterKinds.HUMAN: HumanReadableFormatter(),
227
- FormatterKinds.HUMAN_EXTENDED: HumanReadableExtendedFormatter(),
228
- FormatterKinds.JSON: JSONFormatter(),
249
+ FormatterKinds.HUMAN: HumanReadableFormatter,
250
+ FormatterKinds.HUMAN_EXTENDED: HumanReadableExtendedFormatter,
251
+ FormatterKinds.JSON: JSONFormatter,
229
252
  }[formatter_kind]
230
253
 
231
254
 
255
+ def create_test_logger(name: str = "mlrun", stream: IO[str] = stdout) -> Logger:
256
+ return create_logger(
257
+ level="debug",
258
+ formatter_kind=FormatterKinds.HUMAN_EXTENDED.name,
259
+ name=name,
260
+ stream=stream,
261
+ )
262
+
263
+
232
264
  def create_logger(
233
265
  level: Optional[str] = None,
234
266
  formatter_kind: str = FormatterKinds.HUMAN.name,
@@ -243,11 +275,11 @@ def create_logger(
243
275
  logger_instance = Logger(level, name=name, propagate=False)
244
276
 
245
277
  # resolve formatter
246
- formatter_instance = _create_formatter_instance(
278
+ formatter_instance = resolve_formatter_by_kind(
247
279
  FormatterKinds(formatter_kind.lower())
248
280
  )
249
281
 
250
282
  # set handler
251
- logger_instance.set_handler("default", stream or stdout, formatter_instance)
283
+ logger_instance.set_handler("default", stream or stdout, formatter_instance())
252
284
 
253
285
  return logger_instance
@@ -51,14 +51,19 @@ class NotificationTypes(str, enum.Enum):
51
51
  self.console: [self.ipython],
52
52
  }.get(self, [])
53
53
 
54
+ @classmethod
55
+ def local(cls) -> list[str]:
56
+ return [
57
+ cls.console,
58
+ cls.ipython,
59
+ ]
60
+
54
61
  @classmethod
55
62
  def all(cls) -> list[str]:
56
- return list(
57
- [
58
- cls.console,
59
- cls.git,
60
- cls.ipython,
61
- cls.slack,
62
- cls.webhook,
63
- ]
64
- )
63
+ return [
64
+ cls.console,
65
+ cls.git,
66
+ cls.ipython,
67
+ cls.slack,
68
+ cls.webhook,
69
+ ]
@@ -44,6 +44,8 @@ class NotificationBase:
44
44
  ] = mlrun.common.schemas.NotificationSeverity.INFO,
45
45
  runs: typing.Union[mlrun.lists.RunList, list] = None,
46
46
  custom_html: str = None,
47
+ alert: mlrun.common.schemas.AlertConfig = None,
48
+ event_data: mlrun.common.schemas.Event = None,
47
49
  ):
48
50
  raise NotImplementedError()
49
51
 
@@ -61,6 +63,8 @@ class NotificationBase:
61
63
  ] = mlrun.common.schemas.NotificationSeverity.INFO,
62
64
  runs: typing.Union[mlrun.lists.RunList, list] = None,
63
65
  custom_html: str = None,
66
+ alert: mlrun.common.schemas.AlertConfig = None,
67
+ event_data: mlrun.common.schemas.Event = None,
64
68
  ) -> str:
65
69
  if custom_html:
66
70
  return custom_html
@@ -68,6 +72,14 @@ class NotificationBase:
68
72
  if self.name:
69
73
  message = f"{self.name}: {message}"
70
74
 
75
+ if alert:
76
+ if not event_data:
77
+ return f"[{severity}] {message}"
78
+ return (
79
+ f"[{severity}] {message} for project {alert.project} "
80
+ f"UID {event_data.entity.ids[0]}. Values {event_data.value_dict}"
81
+ )
82
+
71
83
  if not runs:
72
84
  return f"[{severity}] {message}"
73
85
 
@@ -36,6 +36,8 @@ class ConsoleNotification(NotificationBase):
36
36
  ] = mlrun.common.schemas.NotificationSeverity.INFO,
37
37
  runs: typing.Union[mlrun.lists.RunList, list] = None,
38
38
  custom_html: str = None,
39
+ alert: mlrun.common.schemas.AlertConfig = None,
40
+ event_data: mlrun.common.schemas.Event = None,
39
41
  ):
40
42
  severity = self._resolve_severity(severity)
41
43
  print(f"[{severity}] {message}")
@@ -38,6 +38,8 @@ class GitNotification(NotificationBase):
38
38
  ] = mlrun.common.schemas.NotificationSeverity.INFO,
39
39
  runs: typing.Union[mlrun.lists.RunList, list] = None,
40
40
  custom_html: str = None,
41
+ alert: mlrun.common.schemas.AlertConfig = None,
42
+ event_data: mlrun.common.schemas.Event = None,
41
43
  ):
42
44
  git_repo = self.params.get("repo", None)
43
45
  git_issue = self.params.get("issue", None)
@@ -50,7 +52,7 @@ class GitNotification(NotificationBase):
50
52
  server = self.params.get("server", None)
51
53
  gitlab = self.params.get("gitlab", False)
52
54
  await self._pr_comment(
53
- self._get_html(message, severity, runs, custom_html),
55
+ self._get_html(message, severity, runs, custom_html, alert, event_data),
54
56
  git_repo,
55
57
  git_issue,
56
58
  merge_request=git_merge_request,
@@ -53,6 +53,8 @@ class IPythonNotification(NotificationBase):
53
53
  ] = mlrun.common.schemas.NotificationSeverity.INFO,
54
54
  runs: typing.Union[mlrun.lists.RunList, list] = None,
55
55
  custom_html: str = None,
56
+ alert: mlrun.common.schemas.AlertConfig = None,
57
+ event_data: mlrun.common.schemas.Event = None,
56
58
  ):
57
59
  if not self._ipython:
58
60
  mlrun.utils.helpers.logger.debug(
@@ -32,6 +32,7 @@ class SlackNotification(NotificationBase):
32
32
  "completed": ":smiley:",
33
33
  "running": ":man-running:",
34
34
  "error": ":x:",
35
+ "skipped": ":zzz:",
35
36
  }
36
37
 
37
38
  async def push(
@@ -42,6 +43,8 @@ class SlackNotification(NotificationBase):
42
43
  ] = mlrun.common.schemas.NotificationSeverity.INFO,
43
44
  runs: typing.Union[mlrun.lists.RunList, list] = None,
44
45
  custom_html: str = None,
46
+ alert: mlrun.common.schemas.AlertConfig = None,
47
+ event_data: mlrun.common.schemas.Event = None,
45
48
  ):
46
49
  webhook = self.params.get("webhook", None) or mlrun.get_secret_or_env(
47
50
  "SLACK_WEBHOOK"
@@ -53,7 +56,7 @@ class SlackNotification(NotificationBase):
53
56
  )
54
57
  return
55
58
 
56
- data = self._generate_slack_data(message, severity, runs)
59
+ data = self._generate_slack_data(message, severity, runs, alert, event_data)
57
60
 
58
61
  async with aiohttp.ClientSession() as session:
59
62
  async with session.post(webhook, json=data) as response:
@@ -66,57 +69,134 @@ class SlackNotification(NotificationBase):
66
69
  mlrun.common.schemas.NotificationSeverity, str
67
70
  ] = mlrun.common.schemas.NotificationSeverity.INFO,
68
71
  runs: typing.Union[mlrun.lists.RunList, list] = None,
72
+ alert: mlrun.common.schemas.AlertConfig = None,
73
+ event_data: mlrun.common.schemas.Event = None,
69
74
  ) -> dict:
70
75
  data = {
71
- "blocks": [
72
- {
73
- "type": "section",
74
- "text": self._get_slack_row(f"[{severity}] {message}"),
75
- },
76
- ]
76
+ "blocks": self._generate_slack_header_blocks(severity, message),
77
77
  }
78
78
  if self.name:
79
79
  data["blocks"].append(
80
80
  {"type": "section", "text": self._get_slack_row(self.name)}
81
81
  )
82
82
 
83
- if not runs:
84
- return data
83
+ if alert:
84
+ fields = self._get_alert_fields(alert, event_data)
85
85
 
86
- if isinstance(runs, list):
87
- runs = mlrun.lists.RunList(runs)
86
+ for i in range(len(fields)):
87
+ data["blocks"].append({"type": "section", "text": fields[i]})
88
+ else:
89
+ if not runs:
90
+ return data
88
91
 
89
- fields = [self._get_slack_row("*Runs*"), self._get_slack_row("*Results*")]
90
- for run in runs:
91
- fields.append(self._get_run_line(run))
92
- fields.append(self._get_run_result(run))
92
+ if isinstance(runs, list):
93
+ runs = mlrun.lists.RunList(runs)
93
94
 
94
- for i in range(0, len(fields), 8):
95
- data["blocks"].append({"type": "section", "fields": fields[i : i + 8]})
95
+ fields = [self._get_slack_row("*Runs*"), self._get_slack_row("*Results*")]
96
+ for run in runs:
97
+ fields.append(self._get_run_line(run))
98
+ fields.append(self._get_run_result(run))
99
+
100
+ for i in range(0, len(fields), 8):
101
+ data["blocks"].append({"type": "section", "fields": fields[i : i + 8]})
96
102
 
97
103
  return data
98
104
 
105
+ def _generate_slack_header_blocks(self, severity: str, message: str):
106
+ header_text = block_text = f"[{severity}] {message}"
107
+ section_text = None
108
+
109
+ # Slack doesn't allow headers to be longer than 150 characters
110
+ # If there's a comma in the message, split the message at the comma
111
+ # Otherwise, split the message at 150 characters
112
+ if len(block_text) > 150:
113
+ if ", " in block_text and block_text.index(", ") < 149:
114
+ header_text = block_text.split(",")[0]
115
+ section_text = block_text[len(header_text) + 2 :]
116
+ else:
117
+ header_text = block_text[:150]
118
+ section_text = block_text[150:]
119
+ blocks = [
120
+ {"type": "header", "text": {"type": "plain_text", "text": header_text}}
121
+ ]
122
+ if section_text:
123
+ blocks.append(
124
+ {
125
+ "type": "section",
126
+ "text": self._get_slack_row(section_text),
127
+ }
128
+ )
129
+ return blocks
130
+
131
+ def _get_alert_fields(
132
+ self,
133
+ alert: mlrun.common.schemas.AlertConfig,
134
+ event_data: mlrun.common.schemas.Event,
135
+ ) -> list:
136
+ line = [
137
+ self._get_slack_row(f":bell: {alert.name} alert has occurred"),
138
+ self._get_slack_row(f"*Project:*\n{alert.project}"),
139
+ self._get_slack_row(f"*ID:*\n{event_data.entity.ids[0]}"),
140
+ ]
141
+
142
+ if alert.summary:
143
+ line.append(
144
+ self._get_slack_row(
145
+ f"*Summary:*\n{mlrun.utils.helpers.format_alert_summary(alert, event_data)}"
146
+ )
147
+ )
148
+
149
+ if event_data.value_dict:
150
+ data_lines = []
151
+ for key, value in event_data.value_dict.items():
152
+ data_lines.append(f"{key}: {value}")
153
+ data_text = "\n".join(data_lines)
154
+ line.append(self._get_slack_row(f"*Event data:*\n{data_text}"))
155
+
156
+ if (
157
+ event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
158
+ ): # JOB entity
159
+ uid = event_data.value_dict.get("uid")
160
+ url = mlrun.utils.helpers.get_ui_url(alert.project, uid)
161
+ overview_type = "Job overview"
162
+ else: # MODEL entity
163
+ model_name = event_data.value_dict.get("model")
164
+ model_endpoint_id = event_data.value_dict.get("model_endpoint_id")
165
+ url = mlrun.utils.helpers.get_model_endpoint_url(
166
+ alert.project, model_name, model_endpoint_id
167
+ )
168
+ overview_type = "Model endpoint"
169
+
170
+ line.append(self._get_slack_row(f"*Overview:*\n<{url}|*{overview_type}*>"))
171
+
172
+ return line
173
+
99
174
  def _get_run_line(self, run: dict) -> dict:
100
175
  meta = run["metadata"]
101
176
  url = mlrun.utils.helpers.get_ui_url(meta.get("project"), meta.get("uid"))
102
- if url:
177
+
178
+ # Only show the URL if the run is not a function (serving or mlrun function)
179
+ kind = run.get("step_kind")
180
+ state = run["status"].get("state", "")
181
+ if state != "skipped" and (url and not kind or kind == "run"):
103
182
  line = f'<{url}|*{meta.get("name")}*>'
104
183
  else:
105
184
  line = meta.get("name")
106
- state = run["status"].get("state", "")
185
+ if kind:
186
+ line = f'{line} *({run.get("step_kind", run.get("kind", ""))})*'
107
187
  line = f'{self.emojis.get(state, ":question:")} {line}'
108
188
  return self._get_slack_row(line)
109
189
 
110
190
  def _get_run_result(self, run: dict) -> dict:
111
191
  state = run["status"].get("state", "")
112
192
  if state == "error":
113
- error_status = run["status"].get("error", "")
193
+ error_status = run["status"].get("error", "") or state
114
194
  result = f"*{error_status}*"
115
195
  else:
116
196
  result = mlrun.utils.helpers.dict_to_str(
117
197
  run["status"].get("results", {}), ", "
118
198
  )
119
- return self._get_slack_row(result or "None")
199
+ return self._get_slack_row(result or state)
120
200
 
121
201
  @staticmethod
122
202
  def _get_slack_row(text: str) -> dict:
@@ -36,6 +36,8 @@ class WebhookNotification(NotificationBase):
36
36
  ] = mlrun.common.schemas.NotificationSeverity.INFO,
37
37
  runs: typing.Union[mlrun.lists.RunList, list] = None,
38
38
  custom_html: str = None,
39
+ alert: mlrun.common.schemas.AlertConfig = None,
40
+ event_data: mlrun.common.schemas.Event = None,
39
41
  ):
40
42
  url = self.params.get("url", None)
41
43
  method = self.params.get("method", "post").lower()
@@ -46,9 +48,17 @@ class WebhookNotification(NotificationBase):
46
48
  request_body = {
47
49
  "message": message,
48
50
  "severity": severity,
49
- "runs": runs,
50
51
  }
51
52
 
53
+ if runs:
54
+ request_body["runs"] = runs
55
+
56
+ if alert:
57
+ request_body["alert"] = alert.dict()
58
+ if event_data:
59
+ request_body["value"] = event_data.value_dict
60
+ request_body["id"] = event_data.entity.ids[0]
61
+
52
62
  if custom_html:
53
63
  request_body["custom_html"] = custom_html
54
64
 
@@ -15,10 +15,17 @@
15
15
  import asyncio
16
16
  import datetime
17
17
  import os
18
+ import re
18
19
  import traceback
19
20
  import typing
20
21
  from concurrent.futures import ThreadPoolExecutor
21
22
 
23
+ import kfp
24
+ import mlrun_pipelines.common.ops
25
+ import mlrun_pipelines.models
26
+
27
+ import mlrun.common.constants as mlrun_constants
28
+ import mlrun.common.runtimes.constants
22
29
  import mlrun.common.schemas
23
30
  import mlrun.config
24
31
  import mlrun.db.base
@@ -233,25 +240,12 @@ class NotificationPusher(_NotificationPusherBase):
233
240
  resource = "Run"
234
241
  runs = [run.to_dict()]
235
242
 
236
- if "workflow" in run.metadata.labels:
237
- resource = "Workflow"
243
+ if mlrun_constants.MLRunInternalLabels.workflow in run.metadata.labels:
244
+ resource = mlrun_constants.MLRunInternalLabels.workflow
238
245
  custom_message = (
239
246
  f" (workflow: {run.metadata.labels['workflow']}){custom_message}"
240
247
  )
241
- db = mlrun.get_run_db()
242
-
243
- workflow_id = run.status.results.get("workflow_id", None)
244
- if workflow_id:
245
- workflow_runs = db.list_runs(
246
- project=run.metadata.project,
247
- labels=f"workflow={workflow_id}",
248
- )
249
- logger.debug(
250
- "Found workflow runs, extending notification runs",
251
- workflow_id=workflow_id,
252
- workflow_runs_amount=len(workflow_runs),
253
- )
254
- runs.extend(workflow_runs)
248
+ runs.extend(self.get_workflow_steps(run))
255
249
 
256
250
  message = (
257
251
  self.messages.get(run.state(), "").format(resource=resource)
@@ -395,6 +389,137 @@ class NotificationPusher(_NotificationPusherBase):
395
389
  mask_params=False,
396
390
  )
397
391
 
392
+ def get_workflow_steps(self, run: mlrun.model.RunObject) -> list:
393
+ steps = []
394
+ db = mlrun.get_run_db()
395
+
396
+ def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
397
+ try:
398
+ _run = db.list_runs(
399
+ project=run.metadata.project,
400
+ labels=f"mlrun_constants.MLRunInternalLabels.runner_pod={_step.node_name}",
401
+ )[0]
402
+ except IndexError:
403
+ _run = {
404
+ "metadata": {
405
+ "name": _step.display_name,
406
+ "project": run.metadata.project,
407
+ },
408
+ }
409
+ _run["step_kind"] = _step.step_type
410
+ if _step.skipped:
411
+ _run.setdefault("status", {})["state"] = (
412
+ mlrun.common.runtimes.constants.RunStates.skipped
413
+ )
414
+ steps.append(_run)
415
+
416
+ def _add_deploy_function_step(_step: mlrun_pipelines.models.PipelineStep):
417
+ project, name, hash_key = self._extract_function_uri(
418
+ _step.get_annotation("mlrun/function-uri")
419
+ )
420
+ if name:
421
+ try:
422
+ function = db.get_function(
423
+ project=project, name=name, hash_key=hash_key
424
+ )
425
+ except mlrun.errors.MLRunNotFoundError:
426
+ # If the function is not found (if build failed for example), we will create a dummy
427
+ # function object for the notification to display the function name
428
+ function = {
429
+ "metadata": {
430
+ "name": name,
431
+ "project": project,
432
+ "hash_key": hash_key,
433
+ },
434
+ }
435
+ pod_phase = _step.phase
436
+ if _step.skipped:
437
+ state = mlrun.common.schemas.FunctionState.skipped
438
+ else:
439
+ state = mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
440
+ pod_phase
441
+ )
442
+ function["status"] = {"state": state}
443
+ if isinstance(function["metadata"].get("updated"), datetime.datetime):
444
+ function["metadata"]["updated"] = function["metadata"][
445
+ "updated"
446
+ ].isoformat()
447
+ function["step_kind"] = _step.step_type
448
+ steps.append(function)
449
+
450
+ step_methods = {
451
+ mlrun_pipelines.common.ops.PipelineRunType.run: _add_run_step,
452
+ mlrun_pipelines.common.ops.PipelineRunType.build: _add_deploy_function_step,
453
+ mlrun_pipelines.common.ops.PipelineRunType.deploy: _add_deploy_function_step,
454
+ }
455
+
456
+ workflow_id = run.status.results.get("workflow_id", None)
457
+ if not workflow_id:
458
+ return steps
459
+
460
+ workflow_manifest = self._get_workflow_manifest(workflow_id)
461
+ if not workflow_manifest:
462
+ return steps
463
+
464
+ try:
465
+ for step in workflow_manifest.get_steps():
466
+ step_method = step_methods.get(step.step_type)
467
+ if step_method:
468
+ step_method(step)
469
+ return steps
470
+ except Exception:
471
+ # If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
472
+ logger.warning(
473
+ "Failed to extract workflow steps from workflow manifest, "
474
+ "returning all runs with the workflow id label",
475
+ workflow_id=workflow_id,
476
+ traceback=traceback.format_exc(),
477
+ )
478
+ return db.list_runs(
479
+ project=run.metadata.project,
480
+ labels=f"workflow={workflow_id}",
481
+ )
482
+
483
+ @staticmethod
484
+ def _get_workflow_manifest(
485
+ workflow_id: str,
486
+ ) -> typing.Optional[mlrun_pipelines.models.PipelineManifest]:
487
+ kfp_url = mlrun.mlconf.resolve_kfp_url(mlrun.mlconf.namespace)
488
+ if not kfp_url:
489
+ raise mlrun.errors.MLRunNotFoundError(
490
+ "KubeFlow Pipelines is not configured"
491
+ )
492
+
493
+ kfp_client = kfp.Client(host=kfp_url)
494
+
495
+ # arbitrary timeout of 5 seconds, the workflow should be done by now
496
+ kfp_run = kfp_client.wait_for_run_completion(workflow_id, 5)
497
+ if not kfp_run:
498
+ return None
499
+
500
+ kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
501
+ return kfp_run.workflow_manifest()
502
+
503
+ def _extract_function_uri(self, function_uri: str) -> tuple[str, str, str]:
504
+ """
505
+ Extract the project, name, and hash key from a function uri.
506
+ Examples:
507
+ - "project/name@hash_key" returns project, name, hash_key
508
+ - "project/name returns" project, name, ""
509
+ """
510
+ project, name, hash_key = None, None, None
511
+ hashed_pattern = r"^(.+)/(.+)@(.+)$"
512
+ pattern = r"^(.+)/(.+)$"
513
+ match = re.match(hashed_pattern, function_uri)
514
+ if match:
515
+ project, name, hash_key = match.groups()
516
+ else:
517
+ match = re.match(pattern, function_uri)
518
+ if match:
519
+ project, name = match.groups()
520
+ hash_key = ""
521
+ return project, name, hash_key
522
+
398
523
 
399
524
  class CustomNotificationPusher(_NotificationPusherBase):
400
525
  def __init__(self, notification_types: list[str] = None):
@@ -413,6 +538,12 @@ class CustomNotificationPusher(_NotificationPusherBase):
413
538
  if notification.is_async
414
539
  }
415
540
 
541
+ @property
542
+ def notifications(self):
543
+ notifications = self._sync_notifications.copy()
544
+ notifications.update(self._async_notifications)
545
+ return notifications
546
+
416
547
  def push(
417
548
  self,
418
549
  message: str,
mlrun/utils/retryer.py CHANGED
@@ -117,7 +117,7 @@ class Retryer:
117
117
  self._raise_last_exception()
118
118
 
119
119
  def _prepare(self):
120
- self.start_time = time.time()
120
+ self.start_time = time.monotonic()
121
121
  self.last_exception = None
122
122
 
123
123
  # Check if backoff is just a simple interval
@@ -138,6 +138,7 @@ class Retryer:
138
138
  except mlrun.errors.MLRunFatalFailureError as exc:
139
139
  raise exc.original_exception
140
140
  except Exception as exc:
141
+ self.last_exception = exc
141
142
  return (
142
143
  None,
143
144
  self.last_exception,
@@ -172,7 +173,7 @@ class Retryer:
172
173
  ) from self.last_exception
173
174
 
174
175
  def _timeout_exceeded(self, next_interval=None):
175
- now = time.time()
176
+ now = time.monotonic()
176
177
  if next_interval:
177
178
  now = now + next_interval
178
179
  return self.timeout is not None and now >= self.start_time + self.timeout
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
  from v3io.dataplane import Client as V3IOClient
17
16
  from v3io_frames import Client as get_client
@@ -1,4 +1,4 @@
1
1
  {
2
- "git_commit": "cb2750f25e202a321723af3465359944445dfda7",
3
- "version": "1.7.0-rc4"
2
+ "git_commit": "f869a5513ea7c9f4ccdaddad6589274eec39f0a4",
3
+ "version": "1.7.0-rc20"
4
4
  }