mlrun 1.8.0rc18__py3-none-any.whl → 1.8.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +5 -0
- mlrun/common/runtimes/constants.py +17 -0
- mlrun/common/schemas/artifact.py +6 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +16 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +4 -2
- mlrun/config.py +2 -2
- mlrun/db/base.py +18 -0
- mlrun/db/httpdb.py +118 -1
- mlrun/db/nopdb.py +9 -0
- mlrun/frameworks/_common/model_handler.py +0 -2
- mlrun/model_monitoring/db/tsdb/base.py +116 -8
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +2 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +37 -29
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +46 -26
- mlrun/model_monitoring/helpers.py +2 -2
- mlrun/model_monitoring/stream_processing.py +21 -0
- mlrun/projects/pipelines.py +16 -3
- mlrun/projects/project.py +45 -8
- mlrun/runtimes/nuclio/serving.py +20 -11
- mlrun/serving/v2_serving.py +51 -36
- mlrun/utils/helpers.py +163 -1
- mlrun/utils/notifications/notification/webhook.py +3 -0
- mlrun/utils/notifications/notification_pusher.py +59 -165
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.8.0rc18.dist-info → mlrun-1.8.0rc20.dist-info}/METADATA +1 -1
- {mlrun-1.8.0rc18.dist-info → mlrun-1.8.0rc20.dist-info}/RECORD +31 -31
- {mlrun-1.8.0rc18.dist-info → mlrun-1.8.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.8.0rc18.dist-info → mlrun-1.8.0rc20.dist-info}/WHEEL +0 -0
- {mlrun-1.8.0rc18.dist-info → mlrun-1.8.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.8.0rc18.dist-info → mlrun-1.8.0rc20.dist-info}/top_level.txt +0 -0
mlrun/serving/v2_serving.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
+
import random
|
|
15
16
|
import threading
|
|
16
17
|
import time
|
|
17
18
|
import traceback
|
|
@@ -283,7 +284,6 @@ class V2ModelServer(StepToDict):
|
|
|
283
284
|
}
|
|
284
285
|
if self.version:
|
|
285
286
|
response["model_version"] = self.version
|
|
286
|
-
|
|
287
287
|
elif op == "ready" and event.method == "GET":
|
|
288
288
|
# get model health operation
|
|
289
289
|
setattr(event, "terminated", True)
|
|
@@ -468,13 +468,9 @@ class _ModelLogPusher:
|
|
|
468
468
|
self.hostname = context.stream.hostname
|
|
469
469
|
self.function_uri = context.stream.function_uri
|
|
470
470
|
self.stream_path = context.stream.stream_uri
|
|
471
|
-
self.
|
|
472
|
-
self.stream_sample = int(context.get_param("log_stream_sample", 1))
|
|
471
|
+
self.sampling_percentage = float(context.get_param("sampling_percentage", 100))
|
|
473
472
|
self.output_stream = output_stream or context.stream.output_stream
|
|
474
473
|
self._worker = context.worker_id
|
|
475
|
-
self._sample_iter = 0
|
|
476
|
-
self._batch_iter = 0
|
|
477
|
-
self._batch = []
|
|
478
474
|
|
|
479
475
|
def base_data(self):
|
|
480
476
|
base_data = {
|
|
@@ -485,6 +481,7 @@ class _ModelLogPusher:
|
|
|
485
481
|
"host": self.hostname,
|
|
486
482
|
"function_uri": self.function_uri,
|
|
487
483
|
"endpoint_id": self.model.model_endpoint_uid,
|
|
484
|
+
"sampling_percentage": self.sampling_percentage,
|
|
488
485
|
}
|
|
489
486
|
if getattr(self.model, "labels", None):
|
|
490
487
|
base_data["labels"] = self.model.labels
|
|
@@ -504,37 +501,55 @@ class _ModelLogPusher:
|
|
|
504
501
|
self.output_stream.push([data], partition_key=partition_key)
|
|
505
502
|
return
|
|
506
503
|
|
|
507
|
-
|
|
508
|
-
|
|
504
|
+
if self.output_stream:
|
|
505
|
+
# Ensure that the inputs are a list of lists
|
|
506
|
+
request["inputs"] = (
|
|
507
|
+
request["inputs"]
|
|
508
|
+
if not any(not isinstance(req, list) for req in request["inputs"])
|
|
509
|
+
else [request["inputs"]]
|
|
510
|
+
)
|
|
509
511
|
microsec = (now_date() - start).microseconds
|
|
510
512
|
|
|
511
|
-
if self.
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
self.
|
|
515
|
-
|
|
513
|
+
if self.sampling_percentage != 100:
|
|
514
|
+
# Randomly select a subset of the requests based on the percentage
|
|
515
|
+
num_of_inputs = len(request["inputs"])
|
|
516
|
+
sampled_requests_indices = self._pick_random_requests(
|
|
517
|
+
num_of_inputs, self.sampling_percentage
|
|
516
518
|
)
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
"
|
|
519
|
+
if not sampled_requests_indices:
|
|
520
|
+
# No events were selected for sampling
|
|
521
|
+
return
|
|
522
|
+
|
|
523
|
+
request["inputs"] = [
|
|
524
|
+
request["inputs"][i] for i in sampled_requests_indices
|
|
525
|
+
]
|
|
526
|
+
|
|
527
|
+
if resp and "outputs" in resp and isinstance(resp["outputs"], list):
|
|
528
|
+
resp["outputs"] = [
|
|
529
|
+
resp["outputs"][i] for i in sampled_requests_indices
|
|
528
530
|
]
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
data["
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
531
|
+
|
|
532
|
+
data = self.base_data()
|
|
533
|
+
data["request"] = request
|
|
534
|
+
data["op"] = op
|
|
535
|
+
data["resp"] = resp
|
|
536
|
+
data["when"] = start_str
|
|
537
|
+
data["microsec"] = microsec
|
|
538
|
+
if getattr(self.model, "metrics", None):
|
|
539
|
+
data["metrics"] = self.model.metrics
|
|
540
|
+
data["effective_sample_count"] = len(request["inputs"])
|
|
541
|
+
self.output_stream.push([data], partition_key=partition_key)
|
|
542
|
+
|
|
543
|
+
@staticmethod
|
|
544
|
+
def _pick_random_requests(num_of_reqs: int, percentage: float) -> list[int]:
|
|
545
|
+
"""
|
|
546
|
+
Randomly selects indices of requests to sample based on the given percentage
|
|
547
|
+
|
|
548
|
+
:param num_of_reqs: Number of requests to select from
|
|
549
|
+
:param percentage: Sample percentage for each request
|
|
550
|
+
:return: A list containing the indices of the selected requests
|
|
551
|
+
"""
|
|
552
|
+
|
|
553
|
+
return [
|
|
554
|
+
req for req in range(num_of_reqs) if random.random() < (percentage / 100)
|
|
555
|
+
]
|
mlrun/utils/helpers.py
CHANGED
|
@@ -13,8 +13,10 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
|
+
import base64
|
|
16
17
|
import enum
|
|
17
18
|
import functools
|
|
19
|
+
import gzip
|
|
18
20
|
import hashlib
|
|
19
21
|
import inspect
|
|
20
22
|
import itertools
|
|
@@ -23,6 +25,7 @@ import os
|
|
|
23
25
|
import re
|
|
24
26
|
import string
|
|
25
27
|
import sys
|
|
28
|
+
import traceback
|
|
26
29
|
import typing
|
|
27
30
|
import uuid
|
|
28
31
|
import warnings
|
|
@@ -44,11 +47,16 @@ from pandas import Timedelta, Timestamp
|
|
|
44
47
|
from yaml.representer import RepresenterError
|
|
45
48
|
|
|
46
49
|
import mlrun
|
|
50
|
+
import mlrun.common.constants as mlrun_constants
|
|
47
51
|
import mlrun.common.helpers
|
|
52
|
+
import mlrun.common.runtimes.constants as runtimes_constants
|
|
48
53
|
import mlrun.common.schemas
|
|
49
54
|
import mlrun.errors
|
|
50
55
|
import mlrun.utils.regex
|
|
51
56
|
import mlrun.utils.version.version
|
|
57
|
+
import mlrun_pipelines.common.constants
|
|
58
|
+
import mlrun_pipelines.models
|
|
59
|
+
import mlrun_pipelines.utils
|
|
52
60
|
from mlrun.common.constants import MYSQL_MEDIUMBLOB_SIZE_BYTES
|
|
53
61
|
from mlrun.config import config
|
|
54
62
|
from mlrun_pipelines.models import PipelineRun
|
|
@@ -1703,7 +1711,14 @@ def get_serving_spec():
|
|
|
1703
1711
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1704
1712
|
"Failed to find serving spec in env var or config file"
|
|
1705
1713
|
)
|
|
1706
|
-
|
|
1714
|
+
# Attempt to decode and decompress, or use as-is for backward compatibility
|
|
1715
|
+
try:
|
|
1716
|
+
decoded_data = base64.b64decode(data)
|
|
1717
|
+
decompressed_data = gzip.decompress(decoded_data)
|
|
1718
|
+
spec = json.loads(decompressed_data.decode("utf-8"))
|
|
1719
|
+
except (OSError, gzip.BadGzipFile, base64.binascii.Error, json.JSONDecodeError):
|
|
1720
|
+
spec = json.loads(data)
|
|
1721
|
+
|
|
1707
1722
|
return spec
|
|
1708
1723
|
|
|
1709
1724
|
|
|
@@ -1904,3 +1919,150 @@ def join_urls(base_url: Optional[str], path: Optional[str]) -> str:
|
|
|
1904
1919
|
if base_url is None:
|
|
1905
1920
|
base_url = ""
|
|
1906
1921
|
return f"{base_url.rstrip('/')}/{path.lstrip('/')}" if path else base_url
|
|
1922
|
+
|
|
1923
|
+
|
|
1924
|
+
class Workflow:
|
|
1925
|
+
@staticmethod
|
|
1926
|
+
def get_workflow_steps(workflow_id: str, project: str) -> list:
|
|
1927
|
+
steps = []
|
|
1928
|
+
db = mlrun.get_run_db()
|
|
1929
|
+
|
|
1930
|
+
def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
|
|
1931
|
+
try:
|
|
1932
|
+
_run = db.list_runs(
|
|
1933
|
+
project=project,
|
|
1934
|
+
labels=f"{mlrun_constants.MLRunInternalLabels.runner_pod}={_step.node_name}",
|
|
1935
|
+
)[0]
|
|
1936
|
+
except IndexError:
|
|
1937
|
+
_run = {
|
|
1938
|
+
"metadata": {
|
|
1939
|
+
"name": _step.display_name,
|
|
1940
|
+
"project": project,
|
|
1941
|
+
},
|
|
1942
|
+
}
|
|
1943
|
+
_run["step_kind"] = _step.step_type
|
|
1944
|
+
if _step.skipped:
|
|
1945
|
+
_run.setdefault("status", {})["state"] = (
|
|
1946
|
+
runtimes_constants.RunStates.skipped
|
|
1947
|
+
)
|
|
1948
|
+
steps.append(_run)
|
|
1949
|
+
|
|
1950
|
+
def _add_deploy_function_step(_step: mlrun_pipelines.models.PipelineStep):
|
|
1951
|
+
project, name, hash_key = Workflow._extract_function_uri(
|
|
1952
|
+
_step.get_annotation("mlrun/function-uri")
|
|
1953
|
+
)
|
|
1954
|
+
if name:
|
|
1955
|
+
try:
|
|
1956
|
+
function = db.get_function(
|
|
1957
|
+
project=project, name=name, hash_key=hash_key
|
|
1958
|
+
)
|
|
1959
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
1960
|
+
# If the function is not found (if build failed for example), we will create a dummy
|
|
1961
|
+
# function object for the notification to display the function name
|
|
1962
|
+
function = {
|
|
1963
|
+
"metadata": {
|
|
1964
|
+
"name": name,
|
|
1965
|
+
"project": project,
|
|
1966
|
+
"hash_key": hash_key,
|
|
1967
|
+
},
|
|
1968
|
+
}
|
|
1969
|
+
pod_phase = _step.phase
|
|
1970
|
+
if _step.skipped:
|
|
1971
|
+
state = mlrun.common.schemas.FunctionState.skipped
|
|
1972
|
+
else:
|
|
1973
|
+
state = runtimes_constants.PodPhases.pod_phase_to_run_state(
|
|
1974
|
+
pod_phase
|
|
1975
|
+
)
|
|
1976
|
+
function["status"] = {"state": state}
|
|
1977
|
+
if isinstance(function["metadata"].get("updated"), datetime.datetime):
|
|
1978
|
+
function["metadata"]["updated"] = function["metadata"][
|
|
1979
|
+
"updated"
|
|
1980
|
+
].isoformat()
|
|
1981
|
+
function["step_kind"] = _step.step_type
|
|
1982
|
+
steps.append(function)
|
|
1983
|
+
|
|
1984
|
+
step_methods = {
|
|
1985
|
+
mlrun_pipelines.common.constants.PipelineRunType.run: _add_run_step,
|
|
1986
|
+
mlrun_pipelines.common.constants.PipelineRunType.build: _add_deploy_function_step,
|
|
1987
|
+
mlrun_pipelines.common.constants.PipelineRunType.deploy: _add_deploy_function_step,
|
|
1988
|
+
}
|
|
1989
|
+
|
|
1990
|
+
if not workflow_id:
|
|
1991
|
+
return steps
|
|
1992
|
+
|
|
1993
|
+
try:
|
|
1994
|
+
workflow_manifest = Workflow._get_workflow_manifest(workflow_id)
|
|
1995
|
+
except Exception:
|
|
1996
|
+
logger.warning(
|
|
1997
|
+
"Failed to extract workflow steps from workflow manifest, "
|
|
1998
|
+
"returning all runs with the workflow id label",
|
|
1999
|
+
workflow_id=workflow_id,
|
|
2000
|
+
traceback=traceback.format_exc(),
|
|
2001
|
+
)
|
|
2002
|
+
return db.list_runs(
|
|
2003
|
+
project=project,
|
|
2004
|
+
labels=f"workflow={workflow_id}",
|
|
2005
|
+
)
|
|
2006
|
+
|
|
2007
|
+
if not workflow_manifest:
|
|
2008
|
+
return steps
|
|
2009
|
+
|
|
2010
|
+
try:
|
|
2011
|
+
for step in workflow_manifest.get_steps():
|
|
2012
|
+
step_method = step_methods.get(step.step_type)
|
|
2013
|
+
if step_method:
|
|
2014
|
+
step_method(step)
|
|
2015
|
+
return steps
|
|
2016
|
+
except Exception:
|
|
2017
|
+
# If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
|
|
2018
|
+
logger.warning(
|
|
2019
|
+
"Failed to extract workflow steps from workflow manifest, "
|
|
2020
|
+
"returning all runs with the workflow id label",
|
|
2021
|
+
workflow_id=workflow_id,
|
|
2022
|
+
traceback=traceback.format_exc(),
|
|
2023
|
+
)
|
|
2024
|
+
return db.list_runs(
|
|
2025
|
+
project=project,
|
|
2026
|
+
labels=f"workflow={workflow_id}",
|
|
2027
|
+
)
|
|
2028
|
+
|
|
2029
|
+
@staticmethod
|
|
2030
|
+
def _extract_function_uri(function_uri: str) -> tuple[str, str, str]:
|
|
2031
|
+
"""
|
|
2032
|
+
Extract the project, name, and hash key from a function uri.
|
|
2033
|
+
Examples:
|
|
2034
|
+
- "project/name@hash_key" returns project, name, hash_key
|
|
2035
|
+
- "project/name returns" project, name, ""
|
|
2036
|
+
"""
|
|
2037
|
+
project, name, hash_key = None, None, None
|
|
2038
|
+
hashed_pattern = r"^(.+)/(.+)@(.+)$"
|
|
2039
|
+
pattern = r"^(.+)/(.+)$"
|
|
2040
|
+
match = re.match(hashed_pattern, function_uri)
|
|
2041
|
+
if match:
|
|
2042
|
+
project, name, hash_key = match.groups()
|
|
2043
|
+
else:
|
|
2044
|
+
match = re.match(pattern, function_uri)
|
|
2045
|
+
if match:
|
|
2046
|
+
project, name = match.groups()
|
|
2047
|
+
hash_key = ""
|
|
2048
|
+
return project, name, hash_key
|
|
2049
|
+
|
|
2050
|
+
@staticmethod
|
|
2051
|
+
def _get_workflow_manifest(
|
|
2052
|
+
workflow_id: str,
|
|
2053
|
+
) -> typing.Optional[mlrun_pipelines.models.PipelineManifest]:
|
|
2054
|
+
kfp_client = mlrun_pipelines.utils.get_client(mlrun.mlconf.kfp_url)
|
|
2055
|
+
|
|
2056
|
+
# arbitrary timeout of 5 seconds, the workflow should be done by now
|
|
2057
|
+
kfp_run = kfp_client.wait_for_run_completion(workflow_id, 5)
|
|
2058
|
+
if not kfp_run:
|
|
2059
|
+
return None
|
|
2060
|
+
|
|
2061
|
+
kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
|
|
2062
|
+
return kfp_run.workflow_manifest()
|
|
2063
|
+
|
|
2064
|
+
|
|
2065
|
+
def as_dict(data: typing.Union[dict, str]) -> dict:
|
|
2066
|
+
if isinstance(data, str):
|
|
2067
|
+
return json.loads(data)
|
|
2068
|
+
return data
|
|
@@ -118,6 +118,9 @@ class WebhookNotification(NotificationBase):
|
|
|
118
118
|
|
|
119
119
|
if isinstance(override_body, dict):
|
|
120
120
|
for key, value in override_body.items():
|
|
121
|
+
if not isinstance(value, str):
|
|
122
|
+
# If the value is not a string, we don't want to parse it
|
|
123
|
+
continue
|
|
121
124
|
if re.search(r"{{\s*runs\s*}}", value):
|
|
122
125
|
str_parsed_runs = parse_runs()
|
|
123
126
|
override_body[key] = re.sub(
|
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import datetime
|
|
17
17
|
import os
|
|
18
|
-
import re
|
|
19
18
|
import traceback
|
|
20
19
|
import typing
|
|
21
20
|
from concurrent.futures import ThreadPoolExecutor
|
|
@@ -31,11 +30,7 @@ import mlrun.model
|
|
|
31
30
|
import mlrun.utils.helpers
|
|
32
31
|
import mlrun.utils.notifications.notification as notification_module
|
|
33
32
|
import mlrun.utils.notifications.notification.base as base
|
|
34
|
-
import
|
|
35
|
-
import mlrun_pipelines.common.ops
|
|
36
|
-
import mlrun_pipelines.models
|
|
37
|
-
import mlrun_pipelines.utils
|
|
38
|
-
from mlrun.utils import logger
|
|
33
|
+
from mlrun.utils import Workflow, logger
|
|
39
34
|
from mlrun.utils.condition_evaluator import evaluate_condition_in_separate_process
|
|
40
35
|
|
|
41
36
|
|
|
@@ -144,15 +139,25 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
144
139
|
error=mlrun.errors.err_to_str(exc),
|
|
145
140
|
)
|
|
146
141
|
|
|
147
|
-
def _process_notification(self,
|
|
148
|
-
|
|
142
|
+
def _process_notification(self, notification_object, run):
|
|
143
|
+
notification_object.status = run.status.notifications.get(
|
|
144
|
+
notification_object.name, {}
|
|
145
|
+
).get(
|
|
149
146
|
"status",
|
|
150
147
|
mlrun.common.schemas.NotificationStatus.PENDING,
|
|
151
148
|
)
|
|
152
|
-
if self._should_notify(run,
|
|
153
|
-
self._load_notification(
|
|
149
|
+
if self._should_notify(run, notification_object):
|
|
150
|
+
notification = self._load_notification(notification_object)
|
|
151
|
+
if notification.is_async:
|
|
152
|
+
self._async_notifications.append(
|
|
153
|
+
(notification, run, notification_object)
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
self._sync_notifications.append(
|
|
157
|
+
(notification, run, notification_object)
|
|
158
|
+
)
|
|
154
159
|
|
|
155
|
-
def push(self):
|
|
160
|
+
def push(self, sync_push_callback=None, async_push_callback=None):
|
|
156
161
|
"""
|
|
157
162
|
Asynchronously push notifications for all runs in the initialized runs list (if they should be pushed).
|
|
158
163
|
When running from a sync environment, the notifications will be pushed asynchronously however the function will
|
|
@@ -206,8 +211,9 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
206
211
|
notifications_amount=len(self._sync_notifications)
|
|
207
212
|
+ len(self._async_notifications),
|
|
208
213
|
)
|
|
209
|
-
|
|
210
|
-
|
|
214
|
+
sync_push_callback = sync_push_callback or sync_push
|
|
215
|
+
async_push_callback = async_push_callback or async_push
|
|
216
|
+
self._push(sync_push_callback, async_push_callback)
|
|
211
217
|
|
|
212
218
|
@staticmethod
|
|
213
219
|
def _should_notify(
|
|
@@ -246,24 +252,19 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
246
252
|
return False
|
|
247
253
|
|
|
248
254
|
def _load_notification(
|
|
249
|
-
self,
|
|
255
|
+
self, notification_object: mlrun.model.Notification
|
|
250
256
|
) -> base.NotificationBase:
|
|
251
257
|
name = notification_object.name
|
|
252
258
|
notification_type = notification_module.NotificationTypes(
|
|
253
259
|
notification_object.kind or notification_module.NotificationTypes.console
|
|
254
260
|
)
|
|
255
261
|
params = {}
|
|
256
|
-
params.update(notification_object.secret_params)
|
|
257
|
-
params.update(notification_object.params)
|
|
262
|
+
params.update(notification_object.secret_params or {})
|
|
263
|
+
params.update(notification_object.params or {})
|
|
258
264
|
default_params = self._default_params.get(notification_type.value, {})
|
|
259
265
|
notification = notification_type.get_notification()(
|
|
260
266
|
name, params, default_params
|
|
261
267
|
)
|
|
262
|
-
if notification.is_async:
|
|
263
|
-
self._async_notifications.append((notification, run, notification_object))
|
|
264
|
-
else:
|
|
265
|
-
self._sync_notifications.append((notification, run, notification_object))
|
|
266
|
-
|
|
267
268
|
logger.debug(
|
|
268
269
|
"Loaded notification", notification=name, type=notification_type.value
|
|
269
270
|
)
|
|
@@ -283,7 +284,9 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
283
284
|
custom_message = (
|
|
284
285
|
f" (workflow: {run.metadata.labels['workflow']}){custom_message}"
|
|
285
286
|
)
|
|
286
|
-
|
|
287
|
+
project = run.metadata.project
|
|
288
|
+
workflow_id = run.status.results.get("workflow_id", None)
|
|
289
|
+
runs.extend(Workflow.get_workflow_steps(workflow_id, project))
|
|
287
290
|
|
|
288
291
|
message = (
|
|
289
292
|
self.messages.get(run.state(), "").format(resource=resource)
|
|
@@ -442,131 +445,6 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
442
445
|
mask_params=False,
|
|
443
446
|
)
|
|
444
447
|
|
|
445
|
-
def get_workflow_steps(self, run: mlrun.model.RunObject) -> list:
|
|
446
|
-
steps = []
|
|
447
|
-
db = mlrun.get_run_db()
|
|
448
|
-
|
|
449
|
-
def _add_run_step(_step: mlrun_pipelines.models.PipelineStep):
|
|
450
|
-
try:
|
|
451
|
-
_run = db.list_runs(
|
|
452
|
-
project=run.metadata.project,
|
|
453
|
-
labels=f"{mlrun_constants.MLRunInternalLabels.runner_pod}={_step.node_name}",
|
|
454
|
-
)[0]
|
|
455
|
-
except IndexError:
|
|
456
|
-
_run = {
|
|
457
|
-
"metadata": {
|
|
458
|
-
"name": _step.display_name,
|
|
459
|
-
"project": run.metadata.project,
|
|
460
|
-
},
|
|
461
|
-
}
|
|
462
|
-
_run["step_kind"] = _step.step_type
|
|
463
|
-
if _step.skipped:
|
|
464
|
-
_run.setdefault("status", {})["state"] = (
|
|
465
|
-
runtimes_constants.RunStates.skipped
|
|
466
|
-
)
|
|
467
|
-
steps.append(_run)
|
|
468
|
-
|
|
469
|
-
def _add_deploy_function_step(_step: mlrun_pipelines.models.PipelineStep):
|
|
470
|
-
project, name, hash_key = self._extract_function_uri(
|
|
471
|
-
_step.get_annotation("mlrun/function-uri")
|
|
472
|
-
)
|
|
473
|
-
if name:
|
|
474
|
-
try:
|
|
475
|
-
function = db.get_function(
|
|
476
|
-
project=project, name=name, hash_key=hash_key
|
|
477
|
-
)
|
|
478
|
-
except mlrun.errors.MLRunNotFoundError:
|
|
479
|
-
# If the function is not found (if build failed for example), we will create a dummy
|
|
480
|
-
# function object for the notification to display the function name
|
|
481
|
-
function = {
|
|
482
|
-
"metadata": {
|
|
483
|
-
"name": name,
|
|
484
|
-
"project": project,
|
|
485
|
-
"hash_key": hash_key,
|
|
486
|
-
},
|
|
487
|
-
}
|
|
488
|
-
pod_phase = _step.phase
|
|
489
|
-
if _step.skipped:
|
|
490
|
-
state = mlrun.common.schemas.FunctionState.skipped
|
|
491
|
-
else:
|
|
492
|
-
state = runtimes_constants.PodPhases.pod_phase_to_run_state(
|
|
493
|
-
pod_phase
|
|
494
|
-
)
|
|
495
|
-
function["status"] = {"state": state}
|
|
496
|
-
if isinstance(function["metadata"].get("updated"), datetime.datetime):
|
|
497
|
-
function["metadata"]["updated"] = function["metadata"][
|
|
498
|
-
"updated"
|
|
499
|
-
].isoformat()
|
|
500
|
-
function["step_kind"] = _step.step_type
|
|
501
|
-
steps.append(function)
|
|
502
|
-
|
|
503
|
-
step_methods = {
|
|
504
|
-
mlrun_pipelines.common.constants.PipelineRunType.run: _add_run_step,
|
|
505
|
-
mlrun_pipelines.common.constants.PipelineRunType.build: _add_deploy_function_step,
|
|
506
|
-
mlrun_pipelines.common.constants.PipelineRunType.deploy: _add_deploy_function_step,
|
|
507
|
-
}
|
|
508
|
-
|
|
509
|
-
workflow_id = run.status.results.get("workflow_id", None)
|
|
510
|
-
if not workflow_id:
|
|
511
|
-
return steps
|
|
512
|
-
|
|
513
|
-
workflow_manifest = self._get_workflow_manifest(workflow_id)
|
|
514
|
-
if not workflow_manifest:
|
|
515
|
-
return steps
|
|
516
|
-
|
|
517
|
-
try:
|
|
518
|
-
for step in workflow_manifest.get_steps():
|
|
519
|
-
step_method = step_methods.get(step.step_type)
|
|
520
|
-
if step_method:
|
|
521
|
-
step_method(step)
|
|
522
|
-
return steps
|
|
523
|
-
except Exception:
|
|
524
|
-
# If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
|
|
525
|
-
logger.warning(
|
|
526
|
-
"Failed to extract workflow steps from workflow manifest, "
|
|
527
|
-
"returning all runs with the workflow id label",
|
|
528
|
-
workflow_id=workflow_id,
|
|
529
|
-
traceback=traceback.format_exc(),
|
|
530
|
-
)
|
|
531
|
-
return db.list_runs(
|
|
532
|
-
project=run.metadata.project,
|
|
533
|
-
labels=f"workflow={workflow_id}",
|
|
534
|
-
)
|
|
535
|
-
|
|
536
|
-
@staticmethod
|
|
537
|
-
def _get_workflow_manifest(
|
|
538
|
-
workflow_id: str,
|
|
539
|
-
) -> typing.Optional[mlrun_pipelines.models.PipelineManifest]:
|
|
540
|
-
kfp_client = mlrun_pipelines.utils.get_client(mlrun.mlconf.kfp_url)
|
|
541
|
-
|
|
542
|
-
# arbitrary timeout of 5 seconds, the workflow should be done by now
|
|
543
|
-
kfp_run = kfp_client.wait_for_run_completion(workflow_id, 5)
|
|
544
|
-
if not kfp_run:
|
|
545
|
-
return None
|
|
546
|
-
|
|
547
|
-
kfp_run = mlrun_pipelines.models.PipelineRun(kfp_run)
|
|
548
|
-
return kfp_run.workflow_manifest()
|
|
549
|
-
|
|
550
|
-
def _extract_function_uri(self, function_uri: str) -> tuple[str, str, str]:
|
|
551
|
-
"""
|
|
552
|
-
Extract the project, name, and hash key from a function uri.
|
|
553
|
-
Examples:
|
|
554
|
-
- "project/name@hash_key" returns project, name, hash_key
|
|
555
|
-
- "project/name returns" project, name, ""
|
|
556
|
-
"""
|
|
557
|
-
project, name, hash_key = None, None, None
|
|
558
|
-
hashed_pattern = r"^(.+)/(.+)@(.+)$"
|
|
559
|
-
pattern = r"^(.+)/(.+)$"
|
|
560
|
-
match = re.match(hashed_pattern, function_uri)
|
|
561
|
-
if match:
|
|
562
|
-
project, name, hash_key = match.groups()
|
|
563
|
-
else:
|
|
564
|
-
match = re.match(pattern, function_uri)
|
|
565
|
-
if match:
|
|
566
|
-
project, name = match.groups()
|
|
567
|
-
hash_key = ""
|
|
568
|
-
return project, name, hash_key
|
|
569
|
-
|
|
570
448
|
|
|
571
449
|
class CustomNotificationPusher(_NotificationPusherBase):
|
|
572
450
|
def __init__(self, notification_types: typing.Optional[list[str]] = None):
|
|
@@ -624,6 +502,14 @@ class CustomNotificationPusher(_NotificationPusherBase):
|
|
|
624
502
|
notification_type: str,
|
|
625
503
|
params: typing.Optional[dict[str, str]] = None,
|
|
626
504
|
):
|
|
505
|
+
if notification_type not in [
|
|
506
|
+
notification_module.NotificationTypes.console,
|
|
507
|
+
notification_module.NotificationTypes.ipython,
|
|
508
|
+
]:
|
|
509
|
+
# We want that only the console and ipython notifications will be notified by the client.
|
|
510
|
+
# The rest of the notifications will be notified by the BE.
|
|
511
|
+
return
|
|
512
|
+
|
|
627
513
|
if notification_type in self._async_notifications:
|
|
628
514
|
self._async_notifications[notification_type].load_notification(params)
|
|
629
515
|
elif notification_type in self._sync_notifications:
|
|
@@ -693,25 +579,9 @@ class CustomNotificationPusher(_NotificationPusherBase):
|
|
|
693
579
|
pipeline_id: typing.Optional[str] = None,
|
|
694
580
|
has_workflow_url: bool = False,
|
|
695
581
|
):
|
|
696
|
-
message =
|
|
697
|
-
|
|
698
|
-
message += f" id={pipeline_id}"
|
|
699
|
-
commit_id = (
|
|
700
|
-
commit_id or os.environ.get("GITHUB_SHA") or os.environ.get("CI_COMMIT_SHA")
|
|
582
|
+
html, message = self.generate_start_message(
|
|
583
|
+
commit_id, has_workflow_url, pipeline_id, project
|
|
701
584
|
)
|
|
702
|
-
if commit_id:
|
|
703
|
-
message += f", commit={commit_id}"
|
|
704
|
-
if has_workflow_url:
|
|
705
|
-
url = mlrun.utils.helpers.get_workflow_url(project, pipeline_id)
|
|
706
|
-
else:
|
|
707
|
-
url = mlrun.utils.helpers.get_ui_url(project)
|
|
708
|
-
html = ""
|
|
709
|
-
if url:
|
|
710
|
-
html = (
|
|
711
|
-
message
|
|
712
|
-
+ f'<div><a href="{url}" target="_blank">click here to view progress</a></div>'
|
|
713
|
-
)
|
|
714
|
-
message = message + f", check progress in {url}"
|
|
715
585
|
self.push(message, "info", custom_html=html)
|
|
716
586
|
|
|
717
587
|
def push_pipeline_run_results(
|
|
@@ -744,6 +614,30 @@ class CustomNotificationPusher(_NotificationPusherBase):
|
|
|
744
614
|
text += f", state={state}"
|
|
745
615
|
self.push(text, "info", runs=runs_list)
|
|
746
616
|
|
|
617
|
+
def generate_start_message(
|
|
618
|
+
self, commit_id=None, has_workflow_url=None, pipeline_id=None, project=None
|
|
619
|
+
):
|
|
620
|
+
message = f"Workflow started in project {project}"
|
|
621
|
+
if pipeline_id:
|
|
622
|
+
message += f" id={pipeline_id}"
|
|
623
|
+
commit_id = (
|
|
624
|
+
commit_id or os.environ.get("GITHUB_SHA") or os.environ.get("CI_COMMIT_SHA")
|
|
625
|
+
)
|
|
626
|
+
if commit_id:
|
|
627
|
+
message += f", commit={commit_id}"
|
|
628
|
+
if has_workflow_url:
|
|
629
|
+
url = mlrun.utils.helpers.get_workflow_url(project, pipeline_id)
|
|
630
|
+
else:
|
|
631
|
+
url = mlrun.utils.helpers.get_ui_url(project)
|
|
632
|
+
html = ""
|
|
633
|
+
if url:
|
|
634
|
+
html = (
|
|
635
|
+
message
|
|
636
|
+
+ f'<div><a href="{url}" target="_blank">click here to view progress</a></div>'
|
|
637
|
+
)
|
|
638
|
+
message = message + f", check progress in {url}"
|
|
639
|
+
return html, message
|
|
640
|
+
|
|
747
641
|
|
|
748
642
|
def sanitize_notification(notification_dict: dict):
|
|
749
643
|
notification_dict.pop("secret_params", None)
|
mlrun/utils/version/version.json
CHANGED