mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +0 -105
- mlrun/artifacts/__init__.py +1 -2
- mlrun/artifacts/base.py +8 -250
- mlrun/artifacts/dataset.py +1 -190
- mlrun/artifacts/manager.py +2 -41
- mlrun/artifacts/model.py +1 -140
- mlrun/artifacts/plots.py +1 -375
- mlrun/common/schemas/model_monitoring/__init__.py +4 -0
- mlrun/common/schemas/model_monitoring/constants.py +24 -3
- mlrun/common/schemas/model_monitoring/model_endpoints.py +13 -1
- mlrun/config.py +3 -3
- mlrun/data_types/to_pandas.py +4 -4
- mlrun/datastore/base.py +41 -9
- mlrun/datastore/datastore_profile.py +50 -3
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/sources.py +43 -2
- mlrun/datastore/store_resources.py +2 -6
- mlrun/datastore/targets.py +106 -39
- mlrun/db/httpdb.py +4 -4
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +12 -47
- mlrun/feature_store/feature_set.py +9 -0
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +4 -4
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +2 -0
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +5 -0
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
- mlrun/kfpops.py +5 -10
- mlrun/launcher/base.py +1 -1
- mlrun/launcher/client.py +1 -1
- mlrun/lists.py +2 -2
- mlrun/model.py +18 -9
- mlrun/model_monitoring/api.py +41 -18
- mlrun/model_monitoring/application.py +5 -305
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +158 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +92 -77
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +3 -1
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +1 -1
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +67 -4
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/helpers.py +1 -1
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +2 -3
- mlrun/model_monitoring/writer.py +69 -39
- mlrun/platforms/iguazio.py +2 -2
- mlrun/projects/project.py +18 -31
- mlrun/render.py +2 -10
- mlrun/run.py +1 -3
- mlrun/runtimes/__init__.py +3 -3
- mlrun/runtimes/base.py +3 -3
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/local.py +1 -1
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/function.py +1 -1
- mlrun/runtimes/utils.py +1 -1
- mlrun/utils/helpers.py +27 -40
- mlrun/utils/notifications/notification/slack.py +4 -2
- mlrun/utils/notifications/notification_pusher.py +133 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/METADATA +2 -2
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/RECORD +75 -71
- mlrun/runtimes/mpijob/v1alpha1.py +0 -29
- /mlrun/{runtimes → common/runtimes}/constants.py +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc15.dist-info}/top_level.txt +0 -0
mlrun/run.py
CHANGED
|
@@ -47,7 +47,6 @@ from .runtimes import (
|
|
|
47
47
|
KubejobRuntime,
|
|
48
48
|
LocalRuntime,
|
|
49
49
|
MpiRuntimeV1,
|
|
50
|
-
MpiRuntimeV1Alpha1,
|
|
51
50
|
RemoteRuntime,
|
|
52
51
|
RemoteSparkRuntime,
|
|
53
52
|
RuntimeKinds,
|
|
@@ -606,7 +605,6 @@ def code_to_function(
|
|
|
606
605
|
ignored_tags: Optional[str] = None,
|
|
607
606
|
requirements_file: Optional[str] = "",
|
|
608
607
|
) -> Union[
|
|
609
|
-
MpiRuntimeV1Alpha1,
|
|
610
608
|
MpiRuntimeV1,
|
|
611
609
|
RemoteRuntime,
|
|
612
610
|
ServingRuntime,
|
|
@@ -1150,7 +1148,7 @@ def wait_for_runs_completion(
|
|
|
1150
1148
|
running = []
|
|
1151
1149
|
for run in runs:
|
|
1152
1150
|
state = run.state()
|
|
1153
|
-
if state in mlrun.runtimes.constants.RunStates.terminal_states():
|
|
1151
|
+
if state in mlrun.common.runtimes.constants.RunStates.terminal_states():
|
|
1154
1152
|
completed.append(run)
|
|
1155
1153
|
else:
|
|
1156
1154
|
running.append(run)
|
mlrun/runtimes/__init__.py
CHANGED
|
@@ -30,13 +30,13 @@ __all__ = [
|
|
|
30
30
|
|
|
31
31
|
from mlrun.runtimes.utils import resolve_spark_operator_version
|
|
32
32
|
|
|
33
|
+
from ..common.runtimes.constants import MPIJobCRDVersions
|
|
33
34
|
from .base import BaseRuntime, RunError, RuntimeClassMode # noqa
|
|
34
|
-
from .constants import MPIJobCRDVersions
|
|
35
35
|
from .daskjob import DaskCluster # noqa
|
|
36
36
|
from .databricks_job.databricks_runtime import DatabricksRuntime
|
|
37
37
|
from .kubejob import KubejobRuntime, KubeResource # noqa
|
|
38
38
|
from .local import HandlerRuntime, LocalRuntime # noqa
|
|
39
|
-
from .mpijob import
|
|
39
|
+
from .mpijob import MpiRuntimeV1 # noqa
|
|
40
40
|
from .nuclio import (
|
|
41
41
|
RemoteRuntime,
|
|
42
42
|
ServingRuntime,
|
|
@@ -264,7 +264,7 @@ class RuntimeKinds:
|
|
|
264
264
|
|
|
265
265
|
def get_runtime_class(kind: str):
|
|
266
266
|
if kind == RuntimeKinds.mpijob:
|
|
267
|
-
return
|
|
267
|
+
return MpiRuntimeV1
|
|
268
268
|
|
|
269
269
|
if kind == RuntimeKinds.spark:
|
|
270
270
|
return Spark3Runtime
|
mlrun/runtimes/base.py
CHANGED
|
@@ -469,7 +469,7 @@ class BaseRuntime(ModelObj):
|
|
|
469
469
|
def _store_function(self, runspec, meta, db):
|
|
470
470
|
meta.labels["kind"] = self.kind
|
|
471
471
|
mlrun.runtimes.utils.enrich_run_labels(
|
|
472
|
-
meta.labels, [mlrun.runtimes.constants.RunLabels.owner]
|
|
472
|
+
meta.labels, [mlrun.common.runtimes.constants.RunLabels.owner]
|
|
473
473
|
)
|
|
474
474
|
if runspec.spec.output_path:
|
|
475
475
|
runspec.spec.output_path = runspec.spec.output_path.replace(
|
|
@@ -580,9 +580,9 @@ class BaseRuntime(ModelObj):
|
|
|
580
580
|
|
|
581
581
|
elif (
|
|
582
582
|
not was_none
|
|
583
|
-
and last_state != mlrun.runtimes.constants.RunStates.completed
|
|
583
|
+
and last_state != mlrun.common.runtimes.constants.RunStates.completed
|
|
584
584
|
and last_state
|
|
585
|
-
not in mlrun.runtimes.constants.RunStates.error_and_abortion_states()
|
|
585
|
+
not in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
586
586
|
):
|
|
587
587
|
try:
|
|
588
588
|
runtime_cls = mlrun.runtimes.get_runtime_class(kind)
|
mlrun/runtimes/funcdoc.py
CHANGED
|
@@ -16,8 +16,6 @@ import ast
|
|
|
16
16
|
import inspect
|
|
17
17
|
import re
|
|
18
18
|
|
|
19
|
-
from deprecated import deprecated
|
|
20
|
-
|
|
21
19
|
from mlrun.model import FunctionEntrypoint
|
|
22
20
|
|
|
23
21
|
|
|
@@ -73,32 +71,6 @@ def func_dict(
|
|
|
73
71
|
}
|
|
74
72
|
|
|
75
73
|
|
|
76
|
-
# TODO: remove in 1.7.0
|
|
77
|
-
@deprecated(
|
|
78
|
-
version="1.5.0",
|
|
79
|
-
reason="'func_info' is deprecated and will be removed in 1.7.0, use 'ast_func_info' instead",
|
|
80
|
-
category=FutureWarning,
|
|
81
|
-
)
|
|
82
|
-
def func_info(fn) -> dict:
|
|
83
|
-
sig = inspect.signature(fn)
|
|
84
|
-
doc = inspect.getdoc(fn) or ""
|
|
85
|
-
|
|
86
|
-
out = func_dict(
|
|
87
|
-
name=fn.__name__,
|
|
88
|
-
doc=doc,
|
|
89
|
-
params=[inspect_param(p) for p in sig.parameters.values()],
|
|
90
|
-
returns=param_dict(
|
|
91
|
-
type=type_name(sig.return_annotation, empty_is_none=True), default=None
|
|
92
|
-
),
|
|
93
|
-
lineno=func_lineno(fn),
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
if not fn.__doc__ or not fn.__doc__.strip():
|
|
97
|
-
return out
|
|
98
|
-
|
|
99
|
-
return merge_doc(out, doc)
|
|
100
|
-
|
|
101
|
-
|
|
102
74
|
def func_lineno(fn):
|
|
103
75
|
try:
|
|
104
76
|
return inspect.getsourcelines(fn)[1]
|
mlrun/runtimes/local.py
CHANGED
|
@@ -493,7 +493,7 @@ def exec_from_params(handler, runobj: RunObject, context: MLClientCtx, cwd=None)
|
|
|
493
493
|
logger.warning("Run was aborted", err=err_to_str(exc))
|
|
494
494
|
# Run was aborted, the state run state is updated by the abort job, no need to commit again
|
|
495
495
|
context.set_state(
|
|
496
|
-
mlrun.runtimes.constants.RunStates.aborted, commit=False
|
|
496
|
+
mlrun.common.runtimes.constants.RunStates.aborted, commit=False
|
|
497
497
|
)
|
|
498
498
|
commit = False
|
|
499
499
|
except Exception as exc:
|
|
@@ -21,28 +21,8 @@ from mlrun.config import config
|
|
|
21
21
|
from .. import MPIJobCRDVersions
|
|
22
22
|
from .abstract import AbstractMPIJobRuntime
|
|
23
23
|
from .v1 import MpiRuntimeV1
|
|
24
|
-
from .v1alpha1 import MpiRuntimeV1Alpha1
|
|
25
24
|
|
|
26
25
|
|
|
27
26
|
def _resolve_mpijob_crd_version():
|
|
28
27
|
# config is expected to get enriched from the API through the client-spec
|
|
29
28
|
return config.mpijob_crd_version or MPIJobCRDVersions.default()
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
class MpiRuntimeContainer(containers.DeclarativeContainer):
|
|
33
|
-
resolver = providers.Callable(
|
|
34
|
-
_resolve_mpijob_crd_version,
|
|
35
|
-
)
|
|
36
|
-
|
|
37
|
-
selector = providers.Selector(
|
|
38
|
-
resolver,
|
|
39
|
-
v1=providers.Object(MpiRuntimeV1),
|
|
40
|
-
v1alpha1=providers.Object(MpiRuntimeV1Alpha1),
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
# An empty selector to be overriden by the API
|
|
44
|
-
handler_selector = providers.Selector(
|
|
45
|
-
resolver,
|
|
46
|
-
v1=providers.Object(None),
|
|
47
|
-
v1alpha1=providers.Object(None),
|
|
48
|
-
)
|
mlrun/runtimes/mpijob/v1.py
CHANGED
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
from mlrun.runtimes.constants import MPIJobCRDVersions, MPIJobV1CleanPodPolicies
|
|
14
|
+
from mlrun.common.runtimes.constants import MPIJobCRDVersions, MPIJobV1CleanPodPolicies
|
|
15
15
|
from mlrun.runtimes.mpijob.abstract import AbstractMPIJobRuntime, MPIResourceSpec
|
|
16
16
|
|
|
17
17
|
|
|
@@ -778,7 +778,7 @@ class RemoteRuntime(KubeResource):
|
|
|
778
778
|
runtime_env["MLRUN_NAMESPACE"] = mlconf.namespace
|
|
779
779
|
if self.metadata.credentials.access_key:
|
|
780
780
|
runtime_env[
|
|
781
|
-
mlrun.runtimes.constants.FunctionEnvironmentVariables.auth_session
|
|
781
|
+
mlrun.common.runtimes.constants.FunctionEnvironmentVariables.auth_session
|
|
782
782
|
] = self.metadata.credentials.access_key
|
|
783
783
|
return runtime_env
|
|
784
784
|
|
mlrun/runtimes/utils.py
CHANGED
|
@@ -27,10 +27,10 @@ import mlrun.common.constants
|
|
|
27
27
|
import mlrun.common.schemas
|
|
28
28
|
import mlrun.utils.regex
|
|
29
29
|
from mlrun.artifacts import TableArtifact
|
|
30
|
+
from mlrun.common.runtimes.constants import RunLabels
|
|
30
31
|
from mlrun.config import config
|
|
31
32
|
from mlrun.errors import err_to_str
|
|
32
33
|
from mlrun.frameworks.parallel_coordinates import gen_pcp_plot
|
|
33
|
-
from mlrun.runtimes.constants import RunLabels
|
|
34
34
|
from mlrun.runtimes.generators import selector
|
|
35
35
|
from mlrun.utils import get_in, helpers, logger, verify_field_regex
|
|
36
36
|
|
mlrun/utils/helpers.py
CHANGED
|
@@ -39,7 +39,6 @@ import pandas
|
|
|
39
39
|
import semver
|
|
40
40
|
import yaml
|
|
41
41
|
from dateutil import parser
|
|
42
|
-
from deprecated import deprecated
|
|
43
42
|
from pandas._libs.tslibs.timestamps import Timedelta, Timestamp
|
|
44
43
|
from yaml.representer import RepresenterError
|
|
45
44
|
|
|
@@ -76,19 +75,6 @@ class OverwriteBuildParamsWarning(FutureWarning):
|
|
|
76
75
|
pass
|
|
77
76
|
|
|
78
77
|
|
|
79
|
-
# TODO: remove in 1.7.0
|
|
80
|
-
@deprecated(
|
|
81
|
-
version="1.5.0",
|
|
82
|
-
reason="'parse_versioned_object_uri' will be removed from this file in 1.7.0, use "
|
|
83
|
-
"'mlrun.common.helpers.parse_versioned_object_uri' instead",
|
|
84
|
-
category=FutureWarning,
|
|
85
|
-
)
|
|
86
|
-
def parse_versioned_object_uri(uri: str, default_project: str = ""):
|
|
87
|
-
return mlrun.common.helpers.parse_versioned_object_uri(
|
|
88
|
-
uri=uri, default_project=default_project
|
|
89
|
-
)
|
|
90
|
-
|
|
91
|
-
|
|
92
78
|
class StorePrefix:
|
|
93
79
|
"""map mlrun store objects to prefixes"""
|
|
94
80
|
|
|
@@ -119,14 +105,9 @@ class StorePrefix:
|
|
|
119
105
|
|
|
120
106
|
|
|
121
107
|
def get_artifact_target(item: dict, project=None):
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
tree = item.get("tree")
|
|
126
|
-
else:
|
|
127
|
-
db_key = item["spec"].get("db_key")
|
|
128
|
-
project_str = project or item["metadata"].get("project")
|
|
129
|
-
tree = item["metadata"].get("tree")
|
|
108
|
+
db_key = item["spec"].get("db_key")
|
|
109
|
+
project_str = project or item["metadata"].get("project")
|
|
110
|
+
tree = item["metadata"].get("tree")
|
|
130
111
|
|
|
131
112
|
kind = item.get("kind")
|
|
132
113
|
if kind in ["dataset", "model", "artifact"] and db_key:
|
|
@@ -135,11 +116,15 @@ def get_artifact_target(item: dict, project=None):
|
|
|
135
116
|
target = f"{target}@{tree}"
|
|
136
117
|
return target
|
|
137
118
|
|
|
138
|
-
return (
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
119
|
+
return item["spec"].get("target_path")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
# TODO: left for migrations testing purposes. Remove in 1.8.0.
|
|
123
|
+
def is_legacy_artifact(artifact):
|
|
124
|
+
if isinstance(artifact, dict):
|
|
125
|
+
return "metadata" not in artifact
|
|
126
|
+
else:
|
|
127
|
+
return not hasattr(artifact, "metadata")
|
|
143
128
|
|
|
144
129
|
|
|
145
130
|
logger = create_logger(config.log_level, config.log_formatter, "mlrun", sys.stdout)
|
|
@@ -1018,8 +1003,9 @@ def get_ui_url(project, uid=None):
|
|
|
1018
1003
|
def get_workflow_url(project, id=None):
|
|
1019
1004
|
url = ""
|
|
1020
1005
|
if mlrun.mlconf.resolve_ui_url():
|
|
1021
|
-
url =
|
|
1022
|
-
mlrun.mlconf.resolve_ui_url()
|
|
1006
|
+
url = (
|
|
1007
|
+
f"{mlrun.mlconf.resolve_ui_url()}/{mlrun.mlconf.ui.projects_prefix}"
|
|
1008
|
+
f"/{project}/jobs/monitor-workflows/workflow/{id}"
|
|
1023
1009
|
)
|
|
1024
1010
|
return url
|
|
1025
1011
|
|
|
@@ -1291,13 +1277,6 @@ def str_to_timestamp(time_str: str, now_time: Timestamp = None):
|
|
|
1291
1277
|
return Timestamp(time_str)
|
|
1292
1278
|
|
|
1293
1279
|
|
|
1294
|
-
def is_legacy_artifact(artifact):
|
|
1295
|
-
if isinstance(artifact, dict):
|
|
1296
|
-
return "metadata" not in artifact
|
|
1297
|
-
else:
|
|
1298
|
-
return not hasattr(artifact, "metadata")
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
1280
|
def is_link_artifact(artifact):
|
|
1302
1281
|
if isinstance(artifact, dict):
|
|
1303
1282
|
return (
|
|
@@ -1345,16 +1324,16 @@ def format_run(run: dict, with_project=False) -> dict:
|
|
|
1345
1324
|
# as observed https://jira.iguazeng.com/browse/ML-5195
|
|
1346
1325
|
# set to unknown to ensure a status is returned
|
|
1347
1326
|
if run["status"] is None:
|
|
1348
|
-
run["status"] = inflection.titleize(
|
|
1327
|
+
run["status"] = inflection.titleize(
|
|
1328
|
+
mlrun.common.runtimes.constants.RunStates.unknown
|
|
1329
|
+
)
|
|
1349
1330
|
|
|
1350
1331
|
return run
|
|
1351
1332
|
|
|
1352
1333
|
|
|
1353
1334
|
def get_in_artifact(artifact: dict, key, default=None, raise_on_missing=False):
|
|
1354
1335
|
"""artifact can be dict or Artifact object"""
|
|
1355
|
-
if
|
|
1356
|
-
return artifact.get(key, default)
|
|
1357
|
-
elif key == "kind":
|
|
1336
|
+
if key == "kind":
|
|
1358
1337
|
return artifact.get(key, default)
|
|
1359
1338
|
else:
|
|
1360
1339
|
for block in ["metadata", "spec", "status"]:
|
|
@@ -1596,3 +1575,11 @@ def get_serving_spec():
|
|
|
1596
1575
|
)
|
|
1597
1576
|
spec = json.loads(data)
|
|
1598
1577
|
return spec
|
|
1578
|
+
|
|
1579
|
+
|
|
1580
|
+
def additional_filters_warning(additional_filters, class_name):
|
|
1581
|
+
if additional_filters and any(additional_filters):
|
|
1582
|
+
mlrun.utils.logger.warn(
|
|
1583
|
+
f"additional_filters parameter is not supported in {class_name},"
|
|
1584
|
+
f" parameter has been ignored."
|
|
1585
|
+
)
|
|
@@ -152,7 +152,9 @@ class SlackNotification(NotificationBase):
|
|
|
152
152
|
def _get_run_line(self, run: dict) -> dict:
|
|
153
153
|
meta = run["metadata"]
|
|
154
154
|
url = mlrun.utils.helpers.get_ui_url(meta.get("project"), meta.get("uid"))
|
|
155
|
-
|
|
155
|
+
|
|
156
|
+
# Only show the URL if the run is not a function (serving or mlrun function)
|
|
157
|
+
if run.get("kind") not in ["serving", None] and url:
|
|
156
158
|
line = f'<{url}|*{meta.get("name")}*>'
|
|
157
159
|
else:
|
|
158
160
|
line = meta.get("name")
|
|
@@ -169,7 +171,7 @@ class SlackNotification(NotificationBase):
|
|
|
169
171
|
result = mlrun.utils.helpers.dict_to_str(
|
|
170
172
|
run["status"].get("results", {}), ", "
|
|
171
173
|
)
|
|
172
|
-
return self._get_slack_row(result or
|
|
174
|
+
return self._get_slack_row(result or state)
|
|
173
175
|
|
|
174
176
|
@staticmethod
|
|
175
177
|
def _get_slack_row(text: str) -> dict:
|
|
@@ -14,15 +14,21 @@
|
|
|
14
14
|
|
|
15
15
|
import asyncio
|
|
16
16
|
import datetime
|
|
17
|
+
import json
|
|
17
18
|
import os
|
|
19
|
+
import re
|
|
18
20
|
import traceback
|
|
19
21
|
import typing
|
|
20
22
|
from concurrent.futures import ThreadPoolExecutor
|
|
21
23
|
|
|
24
|
+
import kfp
|
|
25
|
+
|
|
26
|
+
import mlrun.common.runtimes.constants
|
|
22
27
|
import mlrun.common.schemas
|
|
23
28
|
import mlrun.config
|
|
24
29
|
import mlrun.db.base
|
|
25
30
|
import mlrun.errors
|
|
31
|
+
import mlrun.kfpops
|
|
26
32
|
import mlrun.lists
|
|
27
33
|
import mlrun.model
|
|
28
34
|
import mlrun.utils.helpers
|
|
@@ -238,20 +244,7 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
238
244
|
custom_message = (
|
|
239
245
|
f" (workflow: {run.metadata.labels['workflow']}){custom_message}"
|
|
240
246
|
)
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
workflow_id = run.status.results.get("workflow_id", None)
|
|
244
|
-
if workflow_id:
|
|
245
|
-
workflow_runs = db.list_runs(
|
|
246
|
-
project=run.metadata.project,
|
|
247
|
-
labels=f"workflow={workflow_id}",
|
|
248
|
-
)
|
|
249
|
-
logger.debug(
|
|
250
|
-
"Found workflow runs, extending notification runs",
|
|
251
|
-
workflow_id=workflow_id,
|
|
252
|
-
workflow_runs_amount=len(workflow_runs),
|
|
253
|
-
)
|
|
254
|
-
runs.extend(workflow_runs)
|
|
247
|
+
runs.extend(self.get_workflow_steps(run))
|
|
255
248
|
|
|
256
249
|
message = (
|
|
257
250
|
self.messages.get(run.state(), "").format(resource=resource)
|
|
@@ -395,6 +388,132 @@ class NotificationPusher(_NotificationPusherBase):
|
|
|
395
388
|
mask_params=False,
|
|
396
389
|
)
|
|
397
390
|
|
|
391
|
+
def get_workflow_steps(self, run: mlrun.model.RunObject) -> list:
|
|
392
|
+
steps = []
|
|
393
|
+
db = mlrun.get_run_db()
|
|
394
|
+
|
|
395
|
+
def _add_run_step(_node_name, _):
|
|
396
|
+
steps.append(
|
|
397
|
+
db.list_runs(
|
|
398
|
+
project=run.metadata.project,
|
|
399
|
+
labels=f"mlrun/runner-pod={_node_name}",
|
|
400
|
+
)[0]
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
def _add_deploy_function_step(_, _node_template):
|
|
404
|
+
project, name, hash_key = self._extract_function_uri(
|
|
405
|
+
_node_template["metadata"]["annotations"]["mlrun/function-uri"]
|
|
406
|
+
)
|
|
407
|
+
if name:
|
|
408
|
+
try:
|
|
409
|
+
function = db.get_function(
|
|
410
|
+
project=project, name=name, hash_key=hash_key
|
|
411
|
+
)
|
|
412
|
+
except mlrun.errors.MLRunNotFoundError:
|
|
413
|
+
# If the function is not found (if build failed for example), we will create a dummy
|
|
414
|
+
# function object for the notification to display the function name
|
|
415
|
+
function = {
|
|
416
|
+
"metadata": {
|
|
417
|
+
"name": name,
|
|
418
|
+
"project": project,
|
|
419
|
+
"hash_key": hash_key,
|
|
420
|
+
},
|
|
421
|
+
}
|
|
422
|
+
function["status"] = {
|
|
423
|
+
"state": mlrun.common.runtimes.constants.PodPhases.pod_phase_to_run_state(
|
|
424
|
+
node["phase"]
|
|
425
|
+
),
|
|
426
|
+
}
|
|
427
|
+
if isinstance(function["metadata"].get("updated"), datetime.datetime):
|
|
428
|
+
function["metadata"]["updated"] = function["metadata"][
|
|
429
|
+
"updated"
|
|
430
|
+
].isoformat()
|
|
431
|
+
steps.append(function)
|
|
432
|
+
|
|
433
|
+
step_methods = {
|
|
434
|
+
mlrun.kfpops.PipelineRunType.run: _add_run_step,
|
|
435
|
+
mlrun.kfpops.PipelineRunType.build: _add_deploy_function_step,
|
|
436
|
+
mlrun.kfpops.PipelineRunType.deploy: _add_deploy_function_step,
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
workflow_id = run.status.results.get("workflow_id", None)
|
|
440
|
+
if not workflow_id:
|
|
441
|
+
return steps
|
|
442
|
+
|
|
443
|
+
workflow_manifest = self._get_workflow_manifest(workflow_id)
|
|
444
|
+
if not workflow_manifest:
|
|
445
|
+
return steps
|
|
446
|
+
|
|
447
|
+
try:
|
|
448
|
+
workflow_nodes = sorted(
|
|
449
|
+
workflow_manifest["status"]["nodes"].items(),
|
|
450
|
+
key=lambda _node: _node[1]["finishedAt"],
|
|
451
|
+
)
|
|
452
|
+
for node_name, node in workflow_nodes:
|
|
453
|
+
if node["type"] != "Pod":
|
|
454
|
+
# Skip the parent DAG node
|
|
455
|
+
continue
|
|
456
|
+
|
|
457
|
+
node_template = next(
|
|
458
|
+
template
|
|
459
|
+
for template in workflow_manifest["spec"]["templates"]
|
|
460
|
+
if template["name"] == node["templateName"]
|
|
461
|
+
)
|
|
462
|
+
step_type = node_template["metadata"]["annotations"].get(
|
|
463
|
+
"mlrun/pipeline-step-type"
|
|
464
|
+
)
|
|
465
|
+
step_method = step_methods.get(step_type)
|
|
466
|
+
if step_method:
|
|
467
|
+
step_method(node_name, node_template)
|
|
468
|
+
return steps
|
|
469
|
+
except Exception:
|
|
470
|
+
# If we fail to read the pipeline steps, we will return the list of runs that have the same workflow id
|
|
471
|
+
logger.warning(
|
|
472
|
+
"Failed to extract workflow steps from workflow manifest, "
|
|
473
|
+
"returning all runs with the workflow id label",
|
|
474
|
+
workflow_id=workflow_id,
|
|
475
|
+
traceback=traceback.format_exc(),
|
|
476
|
+
)
|
|
477
|
+
return db.list_runs(
|
|
478
|
+
project=run.metadata.project,
|
|
479
|
+
labels=f"workflow={workflow_id}",
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
@staticmethod
|
|
483
|
+
def _get_workflow_manifest(workflow_id: str) -> typing.Optional[dict]:
|
|
484
|
+
kfp_client = kfp.Client(namespace=mlrun.config.config.namespace)
|
|
485
|
+
|
|
486
|
+
# arbitrary timeout of 5 seconds, the workflow should be done by now
|
|
487
|
+
kfp_run = kfp_client.wait_for_run_completion(workflow_id, 5)
|
|
488
|
+
if not kfp_run:
|
|
489
|
+
return None
|
|
490
|
+
|
|
491
|
+
kfp_run = kfp_run.to_dict()
|
|
492
|
+
try:
|
|
493
|
+
return json.loads(kfp_run["pipeline_runtime"]["workflow_manifest"])
|
|
494
|
+
except Exception:
|
|
495
|
+
return None
|
|
496
|
+
|
|
497
|
+
def _extract_function_uri(self, function_uri: str) -> tuple[str, str, str]:
|
|
498
|
+
"""
|
|
499
|
+
Extract the project, name, and hash key from a function uri.
|
|
500
|
+
Examples:
|
|
501
|
+
- "project/name@hash_key" returns project, name, hash_key
|
|
502
|
+
- "project/name returns" project, name, ""
|
|
503
|
+
"""
|
|
504
|
+
project, name, hash_key = None, None, None
|
|
505
|
+
hashed_pattern = r"^(.+)/(.+)@(.+)$"
|
|
506
|
+
pattern = r"^(.+)/(.+)$"
|
|
507
|
+
match = re.match(hashed_pattern, function_uri)
|
|
508
|
+
if match:
|
|
509
|
+
project, name, hash_key = match.groups()
|
|
510
|
+
else:
|
|
511
|
+
match = re.match(pattern, function_uri)
|
|
512
|
+
if match:
|
|
513
|
+
project, name = match.groups()
|
|
514
|
+
hash_key = ""
|
|
515
|
+
return project, name, hash_key
|
|
516
|
+
|
|
398
517
|
|
|
399
518
|
class CustomNotificationPusher(_NotificationPusherBase):
|
|
400
519
|
def __init__(self, notification_types: list[str] = None):
|
mlrun/utils/version/version.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: mlrun
|
|
3
|
-
Version: 1.7.
|
|
3
|
+
Version: 1.7.0rc15
|
|
4
4
|
Summary: Tracking and config of machine learning runs
|
|
5
5
|
Home-page: https://github.com/mlrun/mlrun
|
|
6
6
|
Author: Yaron Haviv
|
|
@@ -44,7 +44,7 @@ Requires-Dist: semver ~=3.0
|
|
|
44
44
|
Requires-Dist: dependency-injector ~=4.41
|
|
45
45
|
Requires-Dist: fsspec <2024.4,>=2023.9.2
|
|
46
46
|
Requires-Dist: v3iofs ~=0.1.17
|
|
47
|
-
Requires-Dist: storey ~=1.7.
|
|
47
|
+
Requires-Dist: storey ~=1.7.11
|
|
48
48
|
Requires-Dist: inflection ~=0.5.0
|
|
49
49
|
Requires-Dist: python-dotenv ~=0.17.0
|
|
50
50
|
Requires-Dist: setuptools ~=69.1
|