mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +18 -109
- mlrun/{runtimes/mpijob/v1alpha1.py → alerts/__init__.py} +2 -16
- mlrun/alerts/alert.py +141 -0
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +36 -253
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +20 -41
- mlrun/artifacts/model.py +8 -140
- mlrun/artifacts/plots.py +14 -375
- mlrun/common/schemas/__init__.py +4 -2
- mlrun/common/schemas/alert.py +46 -4
- mlrun/common/schemas/api_gateway.py +4 -0
- mlrun/common/schemas/artifact.py +15 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +8 -1
- mlrun/common/schemas/model_monitoring/constants.py +40 -4
- mlrun/common/schemas/model_monitoring/model_endpoints.py +73 -2
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +7 -4
- mlrun/data_types/to_pandas.py +4 -4
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore_profile.py +54 -4
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/sources.py +43 -2
- mlrun/datastore/store_resources.py +2 -6
- mlrun/datastore/targets.py +106 -39
- mlrun/db/base.py +23 -3
- mlrun/db/httpdb.py +101 -47
- mlrun/db/nopdb.py +20 -2
- mlrun/errors.py +5 -0
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +4 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +2 -0
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +5 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
- mlrun/launcher/base.py +4 -3
- mlrun/launcher/client.py +1 -1
- mlrun/lists.py +4 -2
- mlrun/model.py +25 -11
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/base/store.py +9 -36
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +104 -187
- mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
- mlrun/model_monitoring/db/tsdb/base.py +135 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +1 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +48 -213
- mlrun/model_monitoring/writer.py +101 -121
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +11 -7
- mlrun/projects/pipelines.py +13 -76
- mlrun/projects/project.py +73 -45
- mlrun/render.py +11 -13
- mlrun/run.py +6 -41
- mlrun/runtimes/__init__.py +3 -3
- mlrun/runtimes/base.py +6 -6
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/api_gateway.py +75 -9
- mlrun/runtimes/nuclio/function.py +9 -35
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +1 -39
- mlrun/utils/helpers.py +72 -71
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +12 -5
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +134 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +4 -3
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +105 -95
- mlrun/kfpops.py +0 -865
- mlrun/platforms/other.py +0 -305
- /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0
mlrun/utils/helpers.py
CHANGED
|
@@ -39,7 +39,7 @@ import pandas
|
|
|
39
39
|
import semver
|
|
40
40
|
import yaml
|
|
41
41
|
from dateutil import parser
|
|
42
|
-
from
|
|
42
|
+
from mlrun_pipelines.models import PipelineRun
|
|
43
43
|
from pandas._libs.tslibs.timestamps import Timedelta, Timestamp
|
|
44
44
|
from yaml.representer import RepresenterError
|
|
45
45
|
|
|
@@ -76,19 +76,6 @@ class OverwriteBuildParamsWarning(FutureWarning):
|
|
|
76
76
|
pass
|
|
77
77
|
|
|
78
78
|
|
|
79
|
-
# TODO: remove in 1.7.0
|
|
80
|
-
@deprecated(
|
|
81
|
-
version="1.5.0",
|
|
82
|
-
reason="'parse_versioned_object_uri' will be removed from this file in 1.7.0, use "
|
|
83
|
-
"'mlrun.common.helpers.parse_versioned_object_uri' instead",
|
|
84
|
-
category=FutureWarning,
|
|
85
|
-
)
|
|
86
|
-
def parse_versioned_object_uri(uri: str, default_project: str = ""):
|
|
87
|
-
return mlrun.common.helpers.parse_versioned_object_uri(
|
|
88
|
-
uri=uri, default_project=default_project
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
|
|
92
79
|
class StorePrefix:
|
|
93
80
|
"""map mlrun store objects to prefixes"""
|
|
94
81
|
|
|
@@ -119,14 +106,9 @@ class StorePrefix:
|
|
|
119
106
|
|
|
120
107
|
|
|
121
108
|
def get_artifact_target(item: dict, project=None):
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
tree = item.get("tree")
|
|
126
|
-
else:
|
|
127
|
-
db_key = item["spec"].get("db_key")
|
|
128
|
-
project_str = project or item["metadata"].get("project")
|
|
129
|
-
tree = item["metadata"].get("tree")
|
|
109
|
+
db_key = item["spec"].get("db_key")
|
|
110
|
+
project_str = project or item["metadata"].get("project")
|
|
111
|
+
tree = item["metadata"].get("tree")
|
|
130
112
|
|
|
131
113
|
kind = item.get("kind")
|
|
132
114
|
if kind in ["dataset", "model", "artifact"] and db_key:
|
|
@@ -135,11 +117,15 @@ def get_artifact_target(item: dict, project=None):
|
|
|
135
117
|
target = f"{target}@{tree}"
|
|
136
118
|
return target
|
|
137
119
|
|
|
138
|
-
return (
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
120
|
+
return item["spec"].get("target_path")
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# TODO: left for migrations testing purposes. Remove in 1.8.0.
|
|
124
|
+
def is_legacy_artifact(artifact):
|
|
125
|
+
if isinstance(artifact, dict):
|
|
126
|
+
return "metadata" not in artifact
|
|
127
|
+
else:
|
|
128
|
+
return not hasattr(artifact, "metadata")
|
|
143
129
|
|
|
144
130
|
|
|
145
131
|
logger = create_logger(config.log_level, config.log_formatter, "mlrun", sys.stdout)
|
|
@@ -801,34 +787,6 @@ def gen_html_table(header, rows=None):
|
|
|
801
787
|
return style + '<table class="tg">\n' + out + "</table>\n\n"
|
|
802
788
|
|
|
803
789
|
|
|
804
|
-
def new_pipe_metadata(
|
|
805
|
-
artifact_path: str = None,
|
|
806
|
-
cleanup_ttl: int = None,
|
|
807
|
-
op_transformers: list[typing.Callable] = None,
|
|
808
|
-
):
|
|
809
|
-
from kfp.dsl import PipelineConf
|
|
810
|
-
|
|
811
|
-
def _set_artifact_path(task):
|
|
812
|
-
from kubernetes import client as k8s_client
|
|
813
|
-
|
|
814
|
-
task.add_env_variable(
|
|
815
|
-
k8s_client.V1EnvVar(name="MLRUN_ARTIFACT_PATH", value=artifact_path)
|
|
816
|
-
)
|
|
817
|
-
return task
|
|
818
|
-
|
|
819
|
-
conf = PipelineConf()
|
|
820
|
-
cleanup_ttl = cleanup_ttl or int(config.kfp_ttl)
|
|
821
|
-
|
|
822
|
-
if cleanup_ttl:
|
|
823
|
-
conf.set_ttl_seconds_after_finished(cleanup_ttl)
|
|
824
|
-
if artifact_path:
|
|
825
|
-
conf.add_op_transformer(_set_artifact_path)
|
|
826
|
-
if op_transformers:
|
|
827
|
-
for op_transformer in op_transformers:
|
|
828
|
-
conf.add_op_transformer(op_transformer)
|
|
829
|
-
return conf
|
|
830
|
-
|
|
831
|
-
|
|
832
790
|
def _convert_python_package_version_to_image_tag(version: typing.Optional[str]):
|
|
833
791
|
return (
|
|
834
792
|
version.replace("+", "-").replace("0.0.0-", "") if version is not None else None
|
|
@@ -1018,14 +976,15 @@ def get_ui_url(project, uid=None):
|
|
|
1018
976
|
def get_workflow_url(project, id=None):
|
|
1019
977
|
url = ""
|
|
1020
978
|
if mlrun.mlconf.resolve_ui_url():
|
|
1021
|
-
url =
|
|
1022
|
-
mlrun.mlconf.resolve_ui_url()
|
|
979
|
+
url = (
|
|
980
|
+
f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}"
|
|
981
|
+
f"/{project}/jobs/monitor-workflows/workflow/{id}"
|
|
1023
982
|
)
|
|
1024
983
|
return url
|
|
1025
984
|
|
|
1026
985
|
|
|
1027
986
|
def are_strings_in_exception_chain_messages(
|
|
1028
|
-
exception: Exception, strings_list
|
|
987
|
+
exception: Exception, strings_list: list[str]
|
|
1029
988
|
) -> bool:
|
|
1030
989
|
while exception is not None:
|
|
1031
990
|
if any([string in str(exception) for string in strings_list]):
|
|
@@ -1291,13 +1250,6 @@ def str_to_timestamp(time_str: str, now_time: Timestamp = None):
|
|
|
1291
1250
|
return Timestamp(time_str)
|
|
1292
1251
|
|
|
1293
1252
|
|
|
1294
|
-
def is_legacy_artifact(artifact):
|
|
1295
|
-
if isinstance(artifact, dict):
|
|
1296
|
-
return "metadata" not in artifact
|
|
1297
|
-
else:
|
|
1298
|
-
return not hasattr(artifact, "metadata")
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
1253
|
def is_link_artifact(artifact):
|
|
1302
1254
|
if isinstance(artifact, dict):
|
|
1303
1255
|
return (
|
|
@@ -1307,7 +1259,7 @@ def is_link_artifact(artifact):
|
|
|
1307
1259
|
return artifact.kind == mlrun.common.schemas.ArtifactCategories.link.value
|
|
1308
1260
|
|
|
1309
1261
|
|
|
1310
|
-
def format_run(run:
|
|
1262
|
+
def format_run(run: PipelineRun, with_project=False) -> dict:
|
|
1311
1263
|
fields = [
|
|
1312
1264
|
"id",
|
|
1313
1265
|
"name",
|
|
@@ -1344,17 +1296,17 @@ def format_run(run: dict, with_project=False) -> dict:
|
|
|
1344
1296
|
# pipelines are yet to populate the status or workflow has failed
|
|
1345
1297
|
# as observed https://jira.iguazeng.com/browse/ML-5195
|
|
1346
1298
|
# set to unknown to ensure a status is returned
|
|
1347
|
-
if run
|
|
1348
|
-
run["status"] = inflection.titleize(
|
|
1299
|
+
if run.get("status", None) is None:
|
|
1300
|
+
run["status"] = inflection.titleize(
|
|
1301
|
+
mlrun.common.runtimes.constants.RunStates.unknown
|
|
1302
|
+
)
|
|
1349
1303
|
|
|
1350
1304
|
return run
|
|
1351
1305
|
|
|
1352
1306
|
|
|
1353
1307
|
def get_in_artifact(artifact: dict, key, default=None, raise_on_missing=False):
|
|
1354
1308
|
"""artifact can be dict or Artifact object"""
|
|
1355
|
-
if
|
|
1356
|
-
return artifact.get(key, default)
|
|
1357
|
-
elif key == "kind":
|
|
1309
|
+
if key == "kind":
|
|
1358
1310
|
return artifact.get(key, default)
|
|
1359
1311
|
else:
|
|
1360
1312
|
for block in ["metadata", "spec", "status"]:
|
|
@@ -1596,3 +1548,52 @@ def get_serving_spec():
|
|
|
1596
1548
|
)
|
|
1597
1549
|
spec = json.loads(data)
|
|
1598
1550
|
return spec
|
|
1551
|
+
|
|
1552
|
+
|
|
1553
|
+
def additional_filters_warning(additional_filters, class_name):
|
|
1554
|
+
if additional_filters and any(additional_filters):
|
|
1555
|
+
mlrun.utils.logger.warn(
|
|
1556
|
+
f"additional_filters parameter is not supported in {class_name},"
|
|
1557
|
+
f" parameter has been ignored."
|
|
1558
|
+
)
|
|
1559
|
+
|
|
1560
|
+
|
|
1561
|
+
def validate_component_version_compatibility(
|
|
1562
|
+
component_name: typing.Literal["iguazio", "nuclio"], *min_versions: str
|
|
1563
|
+
):
|
|
1564
|
+
"""
|
|
1565
|
+
:param component_name: Name of the component to validate compatibility for.
|
|
1566
|
+
:param min_versions: Valid minimum version(s) required, assuming no 2 versions has equal major and minor.
|
|
1567
|
+
"""
|
|
1568
|
+
parsed_min_versions = [
|
|
1569
|
+
semver.VersionInfo.parse(min_version) for min_version in min_versions
|
|
1570
|
+
]
|
|
1571
|
+
parsed_current_version = None
|
|
1572
|
+
component_current_version = None
|
|
1573
|
+
try:
|
|
1574
|
+
if component_name == "iguazio":
|
|
1575
|
+
parsed_current_version = mlrun.mlconf.get_parsed_igz_version()
|
|
1576
|
+
component_current_version = mlrun.mlconf.igz_version
|
|
1577
|
+
if component_name == "nuclio":
|
|
1578
|
+
parsed_current_version = semver.VersionInfo.parse(
|
|
1579
|
+
mlrun.mlconf.nuclio_version
|
|
1580
|
+
)
|
|
1581
|
+
component_current_version = mlrun.mlconf.nuclio_version
|
|
1582
|
+
if not parsed_current_version:
|
|
1583
|
+
return True
|
|
1584
|
+
except ValueError:
|
|
1585
|
+
# only log when version is set but invalid
|
|
1586
|
+
if component_current_version:
|
|
1587
|
+
logger.warning(
|
|
1588
|
+
"Unable to parse current version, assuming compatibility",
|
|
1589
|
+
component_name=component_name,
|
|
1590
|
+
current_version=component_current_version,
|
|
1591
|
+
min_versions=min_versions,
|
|
1592
|
+
)
|
|
1593
|
+
return True
|
|
1594
|
+
|
|
1595
|
+
parsed_min_versions.sort(reverse=True)
|
|
1596
|
+
for parsed_min_version in parsed_min_versions:
|
|
1597
|
+
if parsed_current_version < parsed_min_version:
|
|
1598
|
+
return False
|
|
1599
|
+
return True
|
|
@@ -77,7 +77,7 @@ class NotificationBase:
|
|
|
77
77
|
return f"[{severity}] {message}"
|
|
78
78
|
return (
|
|
79
79
|
f"[{severity}] {message} for project {alert.project} "
|
|
80
|
-
f"UID {event_data.entity.
|
|
80
|
+
f"UID {event_data.entity.ids[0]}. Values {event_data.value_dict}"
|
|
81
81
|
)
|
|
82
82
|
|
|
83
83
|
if not runs:
|
|
@@ -135,7 +135,7 @@ class SlackNotification(NotificationBase):
|
|
|
135
135
|
line = [
|
|
136
136
|
self._get_slack_row(f":bell: {alert.name} alert has occurred"),
|
|
137
137
|
self._get_slack_row(f"*Project:*\n{alert.project}"),
|
|
138
|
-
self._get_slack_row(f"*UID:*\n{event_data.entity.
|
|
138
|
+
self._get_slack_row(f"*UID:*\n{event_data.entity.ids[0]}"),
|
|
139
139
|
]
|
|
140
140
|
if event_data.value_dict:
|
|
141
141
|
data_lines = []
|
|
@@ -144,7 +144,9 @@ class SlackNotification(NotificationBase):
|
|
|
144
144
|
data_text = "\n".join(data_lines)
|
|
145
145
|
line.append(self._get_slack_row(f"*Event data:*\n{data_text}"))
|
|
146
146
|
|
|
147
|
-
if url := mlrun.utils.helpers.get_ui_url(
|
|
147
|
+
if url := mlrun.utils.helpers.get_ui_url(
|
|
148
|
+
alert.project, event_data.entity.ids[0]
|
|
149
|
+
):
|
|
148
150
|
line.append(self._get_slack_row(f"*Overview:*\n<{url}|*Job overview*>"))
|
|
149
151
|
|
|
150
152
|
return line
|
|
@@ -152,24 +154,29 @@ class SlackNotification(NotificationBase):
|
|
|
152
154
|
def _get_run_line(self, run: dict) -> dict:
|
|
153
155
|
meta = run["metadata"]
|
|
154
156
|
url = mlrun.utils.helpers.get_ui_url(meta.get("project"), meta.get("uid"))
|
|
155
|
-
|
|
157
|
+
|
|
158
|
+
# Only show the URL if the run is not a function (serving or mlrun function)
|
|
159
|
+
kind = run.get("step_kind")
|
|
160
|
+
if url and not kind or kind == "run":
|
|
156
161
|
line = f'<{url}|*{meta.get("name")}*>'
|
|
157
162
|
else:
|
|
158
163
|
line = meta.get("name")
|
|
159
164
|
state = run["status"].get("state", "")
|
|
165
|
+
if kind:
|
|
166
|
+
line = f'{line} *({run.get("step_kind", run.get("kind", ""))})*'
|
|
160
167
|
line = f'{self.emojis.get(state, ":question:")} {line}'
|
|
161
168
|
return self._get_slack_row(line)
|
|
162
169
|
|
|
163
170
|
def _get_run_result(self, run: dict) -> dict:
|
|
164
171
|
state = run["status"].get("state", "")
|
|
165
172
|
if state == "error":
|
|
166
|
-
error_status = run["status"].get("error", "")
|
|
173
|
+
error_status = run["status"].get("error", "") or state
|
|
167
174
|
result = f"*{error_status}*"
|
|
168
175
|
else:
|
|
169
176
|
result = mlrun.utils.helpers.dict_to_str(
|
|
170
177
|
run["status"].get("results", {}), ", "
|
|
171
178
|
)
|
|
172
|
-
return self._get_slack_row(result or
|
|
179
|
+
return self._get_slack_row(result or state)
|
|
173
180
|
|
|
174
181
|
@staticmethod
|
|
175
182
|
def _get_slack_row(text: str) -> dict:
|
|
@@ -57,7 +57,7 @@ class WebhookNotification(NotificationBase):
|
|
|
57
57
|
request_body["alert"] = alert.dict()
|
|
58
58
|
if event_data:
|
|
59
59
|
request_body["value"] = event_data.value_dict
|
|
60
|
-
request_body["id"] = event_data.entity.
|
|
60
|
+
request_body["id"] = event_data.entity.ids[0]
|
|
61
61
|
|
|
62
62
|
if custom_html:
|
|
63
63
|
request_body["custom_html"] = custom_html
|
|
@@ -14,11 +14,17 @@
|
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import datetime
|
|
17
|
+
import json
|
|
17
18
|
import os
|
|
19
|
+
import re
|
|
18
20
|
import traceback
|
|
19
21
|
import typing
|
|
20
22
|
from concurrent.futures import ThreadPoolExecutor
|
|
21
23
|
|
|
24
|
+
import kfp
|
|
25
|
+
import mlrun_pipelines.common.ops
|
|
26
|
+
|
|
27
|
+
import mlrun.common.runtimes.constants
|
|
22
28
|
import mlrun.common.schemas
|
|
23
29
|
import mlrun.config
|
|
24
30
|
import mlrun.db.base
|
|
@@ -238,20 +244,7 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
238
244
|
custom_message = (
|
|
239
245
|
f" (workflow: {run.metadata.labels['workflow']}){custom_message}"
|
|
240
246
|
)
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
workflow_id = run.status.results.get("workflow_id", None)
|
|
244
|
-
if workflow_id:
|
|
245
|
-
workflow_runs = db.list_runs(
|
|
246
|
-
project=run.metadata.project,
|
|
247
|
-
labels=f"workflow={workflow_id}",
|
|
248
|
-
)
|
|
249
|
-
logger.debug(
|
|
250
|
-
"Found workflow runs, extending notification runs",
|
|
251
|
-
workflow_id=workflow_id,
|
|
252
|
-
workflow_runs_amount=len(workflow_runs),
|
|
253
|
-
)
|
|
254
|
-
runs.extend(workflow_runs)
|
|
247
|
+
runs.extend(self.get_workflow_steps(run))
|
|
255
248
|
|
|
256
249
|
message = (
|
|
257
250
|
self.messages.get(run.state(), "").format(resource=resource)
|
|
@@ -395,6 +388,133 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
395
388
|
mask_params=False,
|
|
396
389
|
)
|
|
397
390
|
|
|
391
|
+
def get_workflow_steps(self, run: mlrun.model.RunObject) -> list:
|
|
392
|
+
steps = []
|
|
393
|
+
db = mlrun.get_run_db()
|
|
394
|
+
|
|
395
|
+
def _add_run_step(_node_name, _node_template, _step_kind):
|
|
396
|
+
_run = db.list_runs(
|
|
397
|
+
project=run.metadata.project,
|
|
398
|
+
labels=f"mlrun/runner-pod={_node_name}",
|
|
399
|
+
)[0]
|
|
400
|
+
_run["step_kind"] = _step_kind
|
|
401
|
+
steps.append(_run)
|
|
402
|
+
|
|
403
|
+
def _add_deploy_function_step(_, _node_template, _step_kind):
|
|
404
|
+
project, name, hash_key = self._extract_function_uri(
|
|
405
|
+
_node_template["metadata"]["annotations"]["mlrun/function-uri"]
|
|
406
|
+
)
|
|
407
|
+
if name:
|
|
408
|
+
try:
|
|
409
|
+
function = db.get_function(
|
|
410
|
+
project=project, name=name, hash_key=hash_key
|
|
411
|
+
)
|
|
412
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
413
|
+
# If the function is not found (if build failed for example), we will create a dummy
|
|
414
|
+
# function object for the notification to display the function name
|
|
415
|
+
function = {
|
|
416
|
+
"metadata": {
|
|
417
|
+
"name": name,
|
|
418
|
+
"project": project,
|
|
419
|
+
"hash_key": hash_key,
|
|
420
|
+
},
|
|
421
|
+
}
|
|
422
|
+
function["status"] = {
|
|
423
|
+
"state": mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
|
|
424
|
+
node["phase"]
|
|
425
|
+
),
|
|
426
|
+
}
|
|
427
|
+
if isinstance(function["metadata"].get("updated"), datetime.datetime):
|
|
428
|
+
function["metadata"]["updated"] = function["metadata"][
|
|
429
|
+
"updated"
|
|
430
|
+
].isoformat()
|
|
431
|
+
function["step_kind"] = _step_kind
|
|
432
|
+
steps.append(function)
|
|
433
|
+
|
|
434
|
+
step_methods = {
|
|
435
|
+
mlrun_pipelines.common.ops.PipelineRunType.run: _add_run_step,
|
|
436
|
+
mlrun_pipelines.common.ops.PipelineRunType.build: _add_deploy_function_step,
|
|
437
|
+
mlrun_pipelines.common.ops.PipelineRunType.deploy: _add_deploy_function_step,
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
workflow_id = run.status.results.get("workflow_id", None)
|
|
441
|
+
if not workflow_id:
|
|
442
|
+
return steps
|
|
443
|
+
|
|
444
|
+
workflow_manifest = self._get_workflow_manifest(workflow_id)
|
|
445
|
+
if not workflow_manifest:
|
|
446
|
+
return steps
|
|
447
|
+
|
|
448
|
+
try:
|
|
449
|
+
workflow_nodes = sorted(
|
|
450
|
+
workflow_manifest["status"]["nodes"].items(),
|
|
451
|
+
key=lambda _node: _node[1]["finishedAt"],
|
|
452
|
+
)
|
|
453
|
+
for node_name, node in workflow_nodes:
|
|
454
|
+
if node["type"] != "Pod":
|
|
455
|
+
# Skip the parent DAG node
|
|
456
|
+
continue
|
|
457
|
+
|
|
458
|
+
node_template = next(
|
|
459
|
+
template
|
|
460
|
+
for template in workflow_manifest["spec"]["templates"]
|
|
461
|
+
if template["name"] == node["templateName"]
|
|
462
|
+
)
|
|
463
|
+
step_type = node_template["metadata"]["annotations"].get(
|
|
464
|
+
"mlrun/pipeline-step-type"
|
|
465
|
+
)
|
|
466
|
+
step_method = step_methods.get(step_type)
|
|
467
|
+
if step_method:
|
|
468
|
+
step_method(node_name, node_template, step_type)
|
|
469
|
+
return steps
|
|
470
|
+
except Exception:
|
|
471
|
+
# If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
|
|
472
|
+
logger.warning(
|
|
473
|
+
"Failed to extract workflow steps from workflow manifest, "
|
|
474
|
+
"returning all runs with the workflow id label",
|
|
475
|
+
workflow_id=workflow_id,
|
|
476
|
+
traceback=traceback.format_exc(),
|
|
477
|
+
)
|
|
478
|
+
return db.list_runs(
|
|
479
|
+
project=run.metadata.project,
|
|
480
|
+
labels=f"workflow={workflow_id}",
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
@staticmethod
|
|
484
|
+
def _get_workflow_manifest(workflow_id: str) -> typing.Optional[dict]:
|
|
485
|
+
kfp_client = kfp.Client(namespace=mlrun.mlconf.namespace)
|
|
486
|
+
|
|
487
|
+
# arbitrary timeout of 5 seconds, the workflow should be done by now
|
|
488
|
+
kfp_run = kfp_client.wait_for_run_completion(workflow_id, 5)
|
|
489
|
+
if not kfp_run:
|
|
490
|
+
return None
|
|
491
|
+
|
|
492
|
+
kfp_run = kfp_run.to_dict()
|
|
493
|
+
try:
|
|
494
|
+
return json.loads(kfp_run["pipeline_runtime"]["workflow_manifest"])
|
|
495
|
+
except Exception:
|
|
496
|
+
return None
|
|
497
|
+
|
|
498
|
+
def _extract_function_uri(self, function_uri: str) -> tuple[str, str, str]:
|
|
499
|
+
"""
|
|
500
|
+
Extract the project, name, and hash key from a function uri.
|
|
501
|
+
Examples:
|
|
502
|
+
- "project/name@hash_key" returns project, name, hash_key
|
|
503
|
+
- "project/name returns" project, name, ""
|
|
504
|
+
"""
|
|
505
|
+
project, name, hash_key = None, None, None
|
|
506
|
+
hashed_pattern = r"^(.+)/(.+)@(.+)$"
|
|
507
|
+
pattern = r"^(.+)/(.+)$"
|
|
508
|
+
match = re.match(hashed_pattern, function_uri)
|
|
509
|
+
if match:
|
|
510
|
+
project, name, hash_key = match.groups()
|
|
511
|
+
else:
|
|
512
|
+
match = re.match(pattern, function_uri)
|
|
513
|
+
if match:
|
|
514
|
+
project, name = match.groups()
|
|
515
|
+
hash_key = ""
|
|
516
|
+
return project, name, hash_key
|
|
517
|
+
|
|
398
518
|
|
|
399
519
|
class CustomNotificationPusher(_NotificationPusherBase):
|
|
400
520
|
def __init__(self, notification_types: list[str] = None):
|
mlrun/utils/version/version.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mlrun
|
|
3
|
-
Version: 1.7.
|
|
3
|
+
Version: 1.7.0rc16
|
|
4
4
|
Summary: Tracking and config of machine learning runs
|
|
5
5
|
Home-page: https://github.com/mlrun/mlrun
|
|
6
6
|
Author: Yaron Haviv
|
|
@@ -26,7 +26,6 @@ Requires-Dist: GitPython >=3.1.41,~=3.1
|
|
|
26
26
|
Requires-Dist: aiohttp ~=3.9
|
|
27
27
|
Requires-Dist: aiohttp-retry ~=2.8
|
|
28
28
|
Requires-Dist: click ~=8.1
|
|
29
|
-
Requires-Dist: kfp ~=1.8
|
|
30
29
|
Requires-Dist: nest-asyncio ~=1.0
|
|
31
30
|
Requires-Dist: ipython ~=8.10
|
|
32
31
|
Requires-Dist: nuclio-jupyter ~=0.9.16
|
|
@@ -44,13 +43,15 @@ Requires-Dist: semver ~=3.0
|
|
|
44
43
|
Requires-Dist: dependency-injector ~=4.41
|
|
45
44
|
Requires-Dist: fsspec <2024.4,>=2023.9.2
|
|
46
45
|
Requires-Dist: v3iofs ~=0.1.17
|
|
47
|
-
Requires-Dist: storey ~=1.7.
|
|
46
|
+
Requires-Dist: storey ~=1.7.11
|
|
48
47
|
Requires-Dist: inflection ~=0.5.0
|
|
49
48
|
Requires-Dist: python-dotenv ~=0.17.0
|
|
50
49
|
Requires-Dist: setuptools ~=69.1
|
|
51
50
|
Requires-Dist: deprecated ~=1.2
|
|
52
51
|
Requires-Dist: jinja2 >=3.1.3,~=3.1
|
|
53
52
|
Requires-Dist: orjson <4,>=3.9.15
|
|
53
|
+
Requires-Dist: mlrun-pipelines-kfp-common-experiment ~=0.2.0
|
|
54
|
+
Requires-Dist: mlrun-pipelines-kfp-v1-8-experiment ~=0.2.0
|
|
54
55
|
Provides-Extra: alibaba-oss
|
|
55
56
|
Requires-Dist: ossfs ==2023.12.0 ; extra == 'alibaba-oss'
|
|
56
57
|
Requires-Dist: oss2 ==2.18.1 ; extra == 'alibaba-oss'
|