mlrun 1.8.0rc19__py3-none-any.whl → 1.8.0rc26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +37 -3
- mlrun/__main__.py +5 -0
- mlrun/alerts/alert.py +1 -0
- mlrun/artifacts/document.py +78 -36
- mlrun/common/formatters/feature_set.py +1 -0
- mlrun/common/runtimes/constants.py +17 -0
- mlrun/common/schemas/alert.py +3 -0
- mlrun/common/schemas/client_spec.py +0 -1
- mlrun/common/schemas/model_monitoring/constants.py +32 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +2 -0
- mlrun/common/schemas/workflow.py +1 -0
- mlrun/config.py +39 -6
- mlrun/datastore/datastore_profile.py +58 -16
- mlrun/datastore/sources.py +7 -1
- mlrun/datastore/vectorstore.py +20 -1
- mlrun/db/base.py +20 -0
- mlrun/db/httpdb.py +97 -10
- mlrun/db/nopdb.py +19 -0
- mlrun/errors.py +4 -0
- mlrun/execution.py +15 -6
- mlrun/frameworks/_common/model_handler.py +0 -2
- mlrun/launcher/client.py +2 -2
- mlrun/launcher/local.py +5 -1
- mlrun/model_monitoring/applications/_application_steps.py +3 -1
- mlrun/model_monitoring/controller.py +266 -103
- mlrun/model_monitoring/db/tsdb/__init__.py +11 -23
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +2 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +20 -21
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -34
- mlrun/model_monitoring/helpers.py +16 -10
- mlrun/model_monitoring/stream_processing.py +106 -35
- mlrun/package/context_handler.py +1 -1
- mlrun/package/packagers_manager.py +4 -18
- mlrun/projects/pipelines.py +18 -5
- mlrun/projects/project.py +156 -39
- mlrun/runtimes/nuclio/serving.py +22 -13
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/secrets.py +1 -1
- mlrun/serving/server.py +11 -3
- mlrun/serving/states.py +65 -8
- mlrun/serving/v2_serving.py +67 -44
- mlrun/utils/helpers.py +111 -23
- mlrun/utils/notifications/notification/base.py +6 -1
- mlrun/utils/notifications/notification/slack.py +5 -1
- mlrun/utils/notifications/notification_pusher.py +67 -36
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/METADATA +33 -16
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/RECORD +52 -52
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/WHEEL +1 -1
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc19.dist-info → mlrun-1.8.0rc26.dist-info}/top_level.txt +0 -0
mlrun/serving/v2_serving.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import random
|
|
15
16
|
import threading
|
|
16
17
|
import time
|
|
17
18
|
import traceback
|
|
@@ -148,16 +149,24 @@ class V2ModelServer(StepToDict):
|
|
|
148
149
|
if not self.context.is_mock and not self.model_spec:
|
|
149
150
|
self.get_model()
|
|
150
151
|
if not self.context.is_mock or self.context.monitoring_mock:
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
152
|
+
try:
|
|
153
|
+
self.model_endpoint = mlrun.get_run_db().get_model_endpoint(
|
|
154
|
+
project=server.project,
|
|
155
|
+
name=self.name,
|
|
156
|
+
function_name=server.function_name,
|
|
157
|
+
function_tag=server.function_tag or "latest",
|
|
158
|
+
)
|
|
159
|
+
self.model_endpoint_uid = self.model_endpoint.metadata.uid
|
|
160
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
161
|
+
logger.info(
|
|
162
|
+
"Model Endpoint not found for this step we will not monitor this model",
|
|
163
|
+
function_name=server.function_name,
|
|
164
|
+
name=self.name,
|
|
165
|
+
)
|
|
166
|
+
self.model_endpoint, self.model_endpoint_uid = None, None
|
|
158
167
|
self._model_logger = (
|
|
159
168
|
_ModelLogPusher(self, self.context)
|
|
160
|
-
if self.context and self.context.stream.enabled
|
|
169
|
+
if self.context and self.context.stream.enabled and self.model_endpoint_uid
|
|
161
170
|
else None
|
|
162
171
|
)
|
|
163
172
|
|
|
@@ -283,7 +292,6 @@ class V2ModelServer(StepToDict):
|
|
|
283
292
|
}
|
|
284
293
|
if self.version:
|
|
285
294
|
response["model_version"] = self.version
|
|
286
|
-
|
|
287
295
|
elif op == "ready" and event.method == "GET":
|
|
288
296
|
# get model health operation
|
|
289
297
|
setattr(event, "terminated", True)
|
|
@@ -468,13 +476,9 @@ class _ModelLogPusher:
|
|
|
468
476
|
self.hostname = context.stream.hostname
|
|
469
477
|
self.function_uri = context.stream.function_uri
|
|
470
478
|
self.stream_path = context.stream.stream_uri
|
|
471
|
-
self.
|
|
472
|
-
self.stream_sample = int(context.get_param("log_stream_sample", 1))
|
|
479
|
+
self.sampling_percentage = float(context.get_param("sampling_percentage", 100))
|
|
473
480
|
self.output_stream = output_stream or context.stream.output_stream
|
|
474
481
|
self._worker = context.worker_id
|
|
475
|
-
self._sample_iter = 0
|
|
476
|
-
self._batch_iter = 0
|
|
477
|
-
self._batch = []
|
|
478
482
|
|
|
479
483
|
def base_data(self):
|
|
480
484
|
base_data = {
|
|
@@ -485,6 +489,7 @@ class _ModelLogPusher:
|
|
|
485
489
|
"host": self.hostname,
|
|
486
490
|
"function_uri": self.function_uri,
|
|
487
491
|
"endpoint_id": self.model.model_endpoint_uid,
|
|
492
|
+
"sampling_percentage": self.sampling_percentage,
|
|
488
493
|
}
|
|
489
494
|
if getattr(self.model, "labels", None):
|
|
490
495
|
base_data["labels"] = self.model.labels
|
|
@@ -504,37 +509,55 @@ class _ModelLogPusher:
|
|
|
504
509
|
self.output_stream.push([data], partition_key=partition_key)
|
|
505
510
|
return
|
|
506
511
|
|
|
507
|
-
|
|
508
|
-
|
|
512
|
+
if self.output_stream:
|
|
513
|
+
# Ensure that the inputs are a list of lists
|
|
514
|
+
request["inputs"] = (
|
|
515
|
+
request["inputs"]
|
|
516
|
+
if not any(not isinstance(req, list) for req in request["inputs"])
|
|
517
|
+
else [request["inputs"]]
|
|
518
|
+
)
|
|
509
519
|
microsec = (now_date() - start).microseconds
|
|
510
520
|
|
|
511
|
-
if self.
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
self.
|
|
515
|
-
|
|
521
|
+
if self.sampling_percentage != 100:
|
|
522
|
+
# Randomly select a subset of the requests based on the percentage
|
|
523
|
+
num_of_inputs = len(request["inputs"])
|
|
524
|
+
sampled_requests_indices = self._pick_random_requests(
|
|
525
|
+
num_of_inputs, self.sampling_percentage
|
|
516
526
|
)
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
"
|
|
527
|
+
if not sampled_requests_indices:
|
|
528
|
+
# No events were selected for sampling
|
|
529
|
+
return
|
|
530
|
+
|
|
531
|
+
request["inputs"] = [
|
|
532
|
+
request["inputs"][i] for i in sampled_requests_indices
|
|
533
|
+
]
|
|
534
|
+
|
|
535
|
+
if resp and "outputs" in resp and isinstance(resp["outputs"], list):
|
|
536
|
+
resp["outputs"] = [
|
|
537
|
+
resp["outputs"][i] for i in sampled_requests_indices
|
|
528
538
|
]
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
data["
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
539
|
+
|
|
540
|
+
data = self.base_data()
|
|
541
|
+
data["request"] = request
|
|
542
|
+
data["op"] = op
|
|
543
|
+
data["resp"] = resp
|
|
544
|
+
data["when"] = start_str
|
|
545
|
+
data["microsec"] = microsec
|
|
546
|
+
if getattr(self.model, "metrics", None):
|
|
547
|
+
data["metrics"] = self.model.metrics
|
|
548
|
+
data["effective_sample_count"] = len(request["inputs"])
|
|
549
|
+
self.output_stream.push([data], partition_key=partition_key)
|
|
550
|
+
|
|
551
|
+
@staticmethod
|
|
552
|
+
def _pick_random_requests(num_of_reqs: int, percentage: float) -> list[int]:
|
|
553
|
+
"""
|
|
554
|
+
Randomly selects indices of requests to sample based on the given percentage
|
|
555
|
+
|
|
556
|
+
:param num_of_reqs: Number of requests to select from
|
|
557
|
+
:param percentage: Sample percentage for each request
|
|
558
|
+
:return: A list containing the indices of the selected requests
|
|
559
|
+
"""
|
|
560
|
+
|
|
561
|
+
return [
|
|
562
|
+
req for req in range(num_of_reqs) if random.random() < (percentage / 100)
|
|
563
|
+
]
|
mlrun/utils/helpers.py
CHANGED
|
@@ -13,8 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
|
+
import base64
|
|
16
17
|
import enum
|
|
17
18
|
import functools
|
|
19
|
+
import gzip
|
|
18
20
|
import hashlib
|
|
19
21
|
import inspect
|
|
20
22
|
import itertools
|
|
@@ -91,14 +93,19 @@ class StorePrefix:
|
|
|
91
93
|
Artifact = "artifacts"
|
|
92
94
|
Model = "models"
|
|
93
95
|
Dataset = "datasets"
|
|
96
|
+
Document = "documents"
|
|
94
97
|
|
|
95
98
|
@classmethod
|
|
96
99
|
def is_artifact(cls, prefix):
|
|
97
|
-
return prefix in [cls.Artifact, cls.Model, cls.Dataset]
|
|
100
|
+
return prefix in [cls.Artifact, cls.Model, cls.Dataset, cls.Document]
|
|
98
101
|
|
|
99
102
|
@classmethod
|
|
100
103
|
def kind_to_prefix(cls, kind):
|
|
101
|
-
kind_map = {
|
|
104
|
+
kind_map = {
|
|
105
|
+
"model": cls.Model,
|
|
106
|
+
"dataset": cls.Dataset,
|
|
107
|
+
"document": cls.Document,
|
|
108
|
+
}
|
|
102
109
|
return kind_map.get(kind, cls.Artifact)
|
|
103
110
|
|
|
104
111
|
@classmethod
|
|
@@ -109,6 +116,7 @@ class StorePrefix:
|
|
|
109
116
|
cls.Dataset,
|
|
110
117
|
cls.FeatureSet,
|
|
111
118
|
cls.FeatureVector,
|
|
119
|
+
cls.Document,
|
|
112
120
|
]
|
|
113
121
|
|
|
114
122
|
|
|
@@ -1038,31 +1046,85 @@ async def retry_until_successful_async(
|
|
|
1038
1046
|
).run()
|
|
1039
1047
|
|
|
1040
1048
|
|
|
1041
|
-
def
|
|
1042
|
-
|
|
1049
|
+
def get_project_url(project: str) -> str:
|
|
1050
|
+
"""
|
|
1051
|
+
Generate the base URL for a given project.
|
|
1052
|
+
|
|
1053
|
+
:param project: The project name.
|
|
1054
|
+
:return: The base URL for the project, or an empty string if the base URL is not resolved.
|
|
1055
|
+
"""
|
|
1043
1056
|
if mlrun.mlconf.resolve_ui_url():
|
|
1044
|
-
|
|
1045
|
-
|
|
1046
|
-
url += f"/monitor/{uid}/overview"
|
|
1047
|
-
return url
|
|
1057
|
+
return f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}/{project}"
|
|
1058
|
+
return ""
|
|
1048
1059
|
|
|
1049
1060
|
|
|
1050
|
-
def
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1061
|
+
def get_run_url(project: str, uid: str, name: str) -> str:
|
|
1062
|
+
"""
|
|
1063
|
+
Generate the URL for a specific run.
|
|
1064
|
+
|
|
1065
|
+
:param project: The project name.
|
|
1066
|
+
:param uid: The run UID.
|
|
1067
|
+
:param name: The run name.
|
|
1068
|
+
:return: The URL for the run, or an empty string if the base URL is not resolved.
|
|
1069
|
+
"""
|
|
1070
|
+
runs_url = get_runs_url(project)
|
|
1071
|
+
if not runs_url:
|
|
1072
|
+
return ""
|
|
1073
|
+
return f"{runs_url}/monitor-jobs/{name}/{uid}/overview"
|
|
1074
|
+
|
|
1075
|
+
|
|
1076
|
+
def get_runs_url(project: str) -> str:
|
|
1077
|
+
"""
|
|
1078
|
+
Generate the URL for the runs of a given project.
|
|
1079
|
+
|
|
1080
|
+
:param project: The project name.
|
|
1081
|
+
:return: The URL for the runs, or an empty string if the base URL is not resolved.
|
|
1082
|
+
"""
|
|
1083
|
+
base_url = get_project_url(project)
|
|
1084
|
+
if not base_url:
|
|
1085
|
+
return ""
|
|
1086
|
+
return f"{base_url}/jobs"
|
|
1087
|
+
|
|
1088
|
+
|
|
1089
|
+
def get_model_endpoint_url(
|
|
1090
|
+
project: str,
|
|
1091
|
+
model_name: Optional[str] = None,
|
|
1092
|
+
model_endpoint_id: Optional[str] = None,
|
|
1093
|
+
) -> str:
|
|
1094
|
+
"""
|
|
1095
|
+
Generate the URL for a specific model endpoint.
|
|
1096
|
+
|
|
1097
|
+
:param project: The project name.
|
|
1098
|
+
:param model_name: The model name.
|
|
1099
|
+
:param model_endpoint_id: The model endpoint ID.
|
|
1100
|
+
:return: The URL for the model endpoint, or an empty string if the base URL is not resolved.
|
|
1101
|
+
"""
|
|
1102
|
+
base_url = get_project_url(project)
|
|
1103
|
+
if not base_url:
|
|
1104
|
+
return ""
|
|
1105
|
+
url = f"{base_url}/models"
|
|
1106
|
+
if model_name and model_endpoint_id:
|
|
1107
|
+
url += f"/model-endpoints/{model_name}/{model_endpoint_id}/overview"
|
|
1056
1108
|
return url
|
|
1057
1109
|
|
|
1058
1110
|
|
|
1059
|
-
def get_workflow_url(
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1111
|
+
def get_workflow_url(
|
|
1112
|
+
project: str,
|
|
1113
|
+
id: Optional[str] = None,
|
|
1114
|
+
) -> str:
|
|
1115
|
+
"""
|
|
1116
|
+
Generate the URL for a specific workflow.
|
|
1117
|
+
|
|
1118
|
+
:param project: The project name.
|
|
1119
|
+
:param id: The workflow ID.
|
|
1120
|
+
:return: The URL for the workflow, or an empty string if the base URL is not resolved.
|
|
1121
|
+
"""
|
|
1122
|
+
base_url = get_project_url(project)
|
|
1123
|
+
if not base_url:
|
|
1124
|
+
return ""
|
|
1125
|
+
url = f"{base_url}/jobs/monitor-workflows/workflow"
|
|
1126
|
+
if id:
|
|
1127
|
+
url += f"/{id}"
|
|
1066
1128
|
return url
|
|
1067
1129
|
|
|
1068
1130
|
|
|
@@ -1709,7 +1771,14 @@ def get_serving_spec():
|
|
|
1709
1771
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1710
1772
|
"Failed to find serving spec in env var or config file"
|
|
1711
1773
|
)
|
|
1712
|
-
|
|
1774
|
+
# Attempt to decode and decompress, or use as-is for backward compatibility
|
|
1775
|
+
try:
|
|
1776
|
+
decoded_data = base64.b64decode(data)
|
|
1777
|
+
decompressed_data = gzip.decompress(decoded_data)
|
|
1778
|
+
spec = json.loads(decompressed_data.decode("utf-8"))
|
|
1779
|
+
except (OSError, gzip.BadGzipFile, base64.binascii.Error, json.JSONDecodeError):
|
|
1780
|
+
spec = json.loads(data)
|
|
1781
|
+
|
|
1713
1782
|
return spec
|
|
1714
1783
|
|
|
1715
1784
|
|
|
@@ -1981,7 +2050,20 @@ class Workflow:
|
|
|
1981
2050
|
if not workflow_id:
|
|
1982
2051
|
return steps
|
|
1983
2052
|
|
|
1984
|
-
|
|
2053
|
+
try:
|
|
2054
|
+
workflow_manifest = Workflow._get_workflow_manifest(workflow_id)
|
|
2055
|
+
except Exception:
|
|
2056
|
+
logger.warning(
|
|
2057
|
+
"Failed to extract workflow steps from workflow manifest, "
|
|
2058
|
+
"returning all runs with the workflow id label",
|
|
2059
|
+
workflow_id=workflow_id,
|
|
2060
|
+
traceback=traceback.format_exc(),
|
|
2061
|
+
)
|
|
2062
|
+
return db.list_runs(
|
|
2063
|
+
project=project,
|
|
2064
|
+
labels=f"workflow={workflow_id}",
|
|
2065
|
+
)
|
|
2066
|
+
|
|
1985
2067
|
if not workflow_manifest:
|
|
1986
2068
|
return steps
|
|
1987
2069
|
|
|
@@ -2038,3 +2120,9 @@ class Workflow:
|
|
|
2038
2120
|
|
|
2039
2121
|
kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
|
|
2040
2122
|
return kfp_run.workflow_manifest()
|
|
2123
|
+
|
|
2124
|
+
|
|
2125
|
+
def as_dict(data: typing.Union[dict, str]) -> dict:
|
|
2126
|
+
if isinstance(data, str):
|
|
2127
|
+
return json.loads(data)
|
|
2128
|
+
return data
|
|
@@ -134,7 +134,12 @@ class NotificationBase:
|
|
|
134
134
|
event_data.entity.kind == mlrun.common.schemas.alert.EventEntityKind.JOB
|
|
135
135
|
): # JOB entity
|
|
136
136
|
uid = event_data.value_dict.get("uid")
|
|
137
|
-
|
|
137
|
+
name = event_data.entity.ids[0]
|
|
138
|
+
url = mlrun.utils.helpers.get_run_url(
|
|
139
|
+
alert.project,
|
|
140
|
+
uid=uid,
|
|
141
|
+
name=name,
|
|
142
|
+
)
|
|
138
143
|
overview_type = "Job overview"
|
|
139
144
|
else: # MODEL entity
|
|
140
145
|
model_name = event_data.value_dict.get("model")
|
|
@@ -168,7 +168,11 @@ class SlackNotification(NotificationBase):
|
|
|
168
168
|
|
|
169
169
|
def _get_run_line(self, run: dict) -> dict:
|
|
170
170
|
meta = run["metadata"]
|
|
171
|
-
url = mlrun.utils.helpers.
|
|
171
|
+
url = mlrun.utils.helpers.get_run_url(
|
|
172
|
+
meta.get("project"),
|
|
173
|
+
uid=meta.get("uid"),
|
|
174
|
+
name=meta.get("name"),
|
|
175
|
+
)
|
|
172
176
|
|
|
173
177
|
# Only show the URL if the run is not a function (serving or mlrun function)
|
|
174
178
|
kind = run.get("step_kind")
|
|
@@ -139,15 +139,25 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
139
139
|
error=mlrun.errors.err_to_str(exc),
|
|
140
140
|
)
|
|
141
141
|
|
|
142
|
-
def _process_notification(self,
|
|
143
|
-
|
|
142
|
+
def _process_notification(self, notification_object, run):
|
|
143
|
+
notification_object.status = run.status.notifications.get(
|
|
144
|
+
notification_object.name, {}
|
|
145
|
+
).get(
|
|
144
146
|
"status",
|
|
145
147
|
mlrun.common.schemas.NotificationStatus.PENDING,
|
|
146
148
|
)
|
|
147
|
-
if self._should_notify(run,
|
|
148
|
-
self._load_notification(
|
|
149
|
+
if self._should_notify(run, notification_object):
|
|
150
|
+
notification = self._load_notification(notification_object)
|
|
151
|
+
if notification.is_async:
|
|
152
|
+
self._async_notifications.append(
|
|
153
|
+
(notification, run, notification_object)
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
self._sync_notifications.append(
|
|
157
|
+
(notification, run, notification_object)
|
|
158
|
+
)
|
|
149
159
|
|
|
150
|
-
def push(self):
|
|
160
|
+
def push(self, sync_push_callback=None, async_push_callback=None):
|
|
151
161
|
"""
|
|
152
162
|
Asynchronously push notifications for all runs in the initialized runs list (if they should be pushed).
|
|
153
163
|
When running from a sync environment, the notifications will be pushed asynchronously however the function will
|
|
@@ -190,7 +200,7 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
190
200
|
"Failed to push notification async",
|
|
191
201
|
error=mlrun.errors.err_to_str(result),
|
|
192
202
|
traceback=traceback.format_exception(
|
|
193
|
-
|
|
203
|
+
result,
|
|
194
204
|
value=result,
|
|
195
205
|
tb=result.__traceback__,
|
|
196
206
|
),
|
|
@@ -201,8 +211,9 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
201
211
|
notifications_amount=len(self._sync_notifications)
|
|
202
212
|
+ len(self._async_notifications),
|
|
203
213
|
)
|
|
204
|
-
|
|
205
|
-
|
|
214
|
+
sync_push_callback = sync_push_callback or sync_push
|
|
215
|
+
async_push_callback = async_push_callback or async_push
|
|
216
|
+
self._push(sync_push_callback, async_push_callback)
|
|
206
217
|
|
|
207
218
|
@staticmethod
|
|
208
219
|
def _should_notify(
|
|
@@ -241,24 +252,19 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
241
252
|
return False
|
|
242
253
|
|
|
243
254
|
def _load_notification(
|
|
244
|
-
self,
|
|
255
|
+
self, notification_object: mlrun.model.Notification
|
|
245
256
|
) -> base.NotificationBase:
|
|
246
257
|
name = notification_object.name
|
|
247
258
|
notification_type = notification_module.NotificationTypes(
|
|
248
259
|
notification_object.kind or notification_module.NotificationTypes.console
|
|
249
260
|
)
|
|
250
261
|
params = {}
|
|
251
|
-
params.update(notification_object.secret_params)
|
|
252
|
-
params.update(notification_object.params)
|
|
262
|
+
params.update(notification_object.secret_params or {})
|
|
263
|
+
params.update(notification_object.params or {})
|
|
253
264
|
default_params = self._default_params.get(notification_type.value, {})
|
|
254
265
|
notification = notification_type.get_notification()(
|
|
255
266
|
name, params, default_params
|
|
256
267
|
)
|
|
257
|
-
if notification.is_async:
|
|
258
|
-
self._async_notifications.append((notification, run, notification_object))
|
|
259
|
-
else:
|
|
260
|
-
self._sync_notifications.append((notification, run, notification_object))
|
|
261
|
-
|
|
262
268
|
logger.debug(
|
|
263
269
|
"Loaded notification", notification=name, type=notification_type.value
|
|
264
270
|
)
|
|
@@ -406,8 +412,17 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
406
412
|
sent_time: typing.Optional[datetime.datetime] = None,
|
|
407
413
|
reason: typing.Optional[str] = None,
|
|
408
414
|
):
|
|
409
|
-
if
|
|
410
|
-
|
|
415
|
+
# Skip update the notification state if the following conditions are met:
|
|
416
|
+
# 1. the run is not in a terminal state
|
|
417
|
+
# 2. the when contains only one state (which is the current state)
|
|
418
|
+
# Skip updating because currently each notification has only one row in the db, even if it has multiple when.
|
|
419
|
+
# This means that if the notification is updated to sent for running state for example, it will not send for
|
|
420
|
+
# The terminal state
|
|
421
|
+
# TODO: Change this behavior after implementing ML-8723
|
|
422
|
+
if (
|
|
423
|
+
run_state not in runtimes_constants.RunStates.terminal_states()
|
|
424
|
+
and len(notification.when) > 1
|
|
425
|
+
):
|
|
411
426
|
logger.debug(
|
|
412
427
|
"Skip updating notification status - run not in terminal state",
|
|
413
428
|
run_uid=run_uid,
|
|
@@ -496,6 +511,14 @@ class CustomNotificationPusher(_NotificationPusherBase):
|
|
|
496
511
|
notification_type: str,
|
|
497
512
|
params: typing.Optional[dict[str, str]] = None,
|
|
498
513
|
):
|
|
514
|
+
if notification_type not in [
|
|
515
|
+
notification_module.NotificationTypes.console,
|
|
516
|
+
notification_module.NotificationTypes.ipython,
|
|
517
|
+
]:
|
|
518
|
+
# We want that only the console and ipython notifications will be notified by the client.
|
|
519
|
+
# The rest of the notifications will be notified by the BE.
|
|
520
|
+
return
|
|
521
|
+
|
|
499
522
|
if notification_type in self._async_notifications:
|
|
500
523
|
self._async_notifications[notification_type].load_notification(params)
|
|
501
524
|
elif notification_type in self._sync_notifications:
|
|
@@ -565,25 +588,9 @@ class CustomNotificationPusher(_NotificationPusherBase):
|
|
|
565
588
|
pipeline_id: typing.Optional[str] = None,
|
|
566
589
|
has_workflow_url: bool = False,
|
|
567
590
|
):
|
|
568
|
-
message =
|
|
569
|
-
|
|
570
|
-
message += f" id={pipeline_id}"
|
|
571
|
-
commit_id = (
|
|
572
|
-
commit_id or os.environ.get("GITHUB_SHA") or os.environ.get("CI_COMMIT_SHA")
|
|
591
|
+
html, message = self.generate_start_message(
|
|
592
|
+
commit_id, has_workflow_url, pipeline_id, project
|
|
573
593
|
)
|
|
574
|
-
if commit_id:
|
|
575
|
-
message += f", commit={commit_id}"
|
|
576
|
-
if has_workflow_url:
|
|
577
|
-
url = mlrun.utils.helpers.get_workflow_url(project, pipeline_id)
|
|
578
|
-
else:
|
|
579
|
-
url = mlrun.utils.helpers.get_ui_url(project)
|
|
580
|
-
html = ""
|
|
581
|
-
if url:
|
|
582
|
-
html = (
|
|
583
|
-
message
|
|
584
|
-
+ f'<div><a href="{url}" target="_blank">click here to view progress</a></div>'
|
|
585
|
-
)
|
|
586
|
-
message = message + f", check progress in {url}"
|
|
587
594
|
self.push(message, "info", custom_html=html)
|
|
588
595
|
|
|
589
596
|
def push_pipeline_run_results(
|
|
@@ -616,6 +623,30 @@ class CustomNotificationPusher(_NotificationPusherBase):
|
|
|
616
623
|
text += f", state={state}"
|
|
617
624
|
self.push(text, "info", runs=runs_list)
|
|
618
625
|
|
|
626
|
+
def generate_start_message(
|
|
627
|
+
self, commit_id=None, has_workflow_url=None, pipeline_id=None, project=None
|
|
628
|
+
):
|
|
629
|
+
message = f"Workflow started in project {project}"
|
|
630
|
+
if pipeline_id:
|
|
631
|
+
message += f" id={pipeline_id}"
|
|
632
|
+
commit_id = (
|
|
633
|
+
commit_id or os.environ.get("GITHUB_SHA") or os.environ.get("CI_COMMIT_SHA")
|
|
634
|
+
)
|
|
635
|
+
if commit_id:
|
|
636
|
+
message += f", commit={commit_id}"
|
|
637
|
+
if has_workflow_url:
|
|
638
|
+
url = mlrun.utils.helpers.get_workflow_url(project, pipeline_id)
|
|
639
|
+
else:
|
|
640
|
+
url = mlrun.utils.helpers.get_runs_url(project)
|
|
641
|
+
html = ""
|
|
642
|
+
if url:
|
|
643
|
+
html = (
|
|
644
|
+
message
|
|
645
|
+
+ f'<div><a href="{url}" target="_blank">click here to view progress</a></div>'
|
|
646
|
+
)
|
|
647
|
+
message = message + f", check progress in {url}"
|
|
648
|
+
return html, message
|
|
649
|
+
|
|
619
650
|
|
|
620
651
|
def sanitize_notification(notification_dict: dict):
|
|
621
652
|
notification_dict.pop("secret_params", None)
|
mlrun/utils/version/version.json
CHANGED