mlrun 1.7.0rc15__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +18 -4
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +141 -0
- mlrun/artifacts/__init__.py +7 -1
- mlrun/artifacts/base.py +28 -3
- mlrun/artifacts/dataset.py +8 -0
- mlrun/artifacts/manager.py +18 -0
- mlrun/artifacts/model.py +7 -0
- mlrun/artifacts/plots.py +13 -0
- mlrun/common/schemas/__init__.py +4 -2
- mlrun/common/schemas/alert.py +46 -4
- mlrun/common/schemas/api_gateway.py +4 -0
- mlrun/common/schemas/artifact.py +15 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -1
- mlrun/common/schemas/model_monitoring/constants.py +16 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +60 -1
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +4 -1
- mlrun/datastore/datastore_profile.py +10 -7
- mlrun/db/base.py +23 -3
- mlrun/db/httpdb.py +97 -43
- mlrun/db/nopdb.py +20 -2
- mlrun/errors.py +5 -0
- mlrun/launcher/base.py +3 -2
- mlrun/lists.py +2 -0
- mlrun/model.py +7 -2
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/applications/_application_steps.py +1 -2
- mlrun/model_monitoring/applications/context.py +1 -1
- mlrun/model_monitoring/applications/histogram_data_drift.py +64 -38
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/base/store.py +9 -36
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +56 -202
- mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
- mlrun/model_monitoring/db/tsdb/base.py +135 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
- mlrun/model_monitoring/stream_processing.py +46 -210
- mlrun/model_monitoring/writer.py +49 -99
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +19 -200
- mlrun/projects/operations.py +11 -7
- mlrun/projects/pipelines.py +13 -76
- mlrun/projects/project.py +55 -14
- mlrun/render.py +9 -3
- mlrun/run.py +5 -38
- mlrun/runtimes/base.py +3 -3
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/nuclio/api_gateway.py +75 -9
- mlrun/runtimes/nuclio/function.py +8 -34
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +0 -38
- mlrun/utils/helpers.py +45 -31
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +9 -4
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +15 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +3 -2
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +71 -65
- mlrun/kfpops.py +0 -860
- mlrun/platforms/other.py +0 -305
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0
mlrun/platforms/iguazio.py
CHANGED
|
@@ -15,12 +15,9 @@
|
|
|
15
15
|
import json
|
|
16
16
|
import os
|
|
17
17
|
import urllib
|
|
18
|
-
from collections import namedtuple
|
|
19
18
|
from urllib.parse import urlparse
|
|
20
19
|
|
|
21
|
-
import kfp.dsl
|
|
22
20
|
import requests
|
|
23
|
-
import semver
|
|
24
21
|
import v3io
|
|
25
22
|
|
|
26
23
|
import mlrun.errors
|
|
@@ -29,203 +26,6 @@ from mlrun.utils import dict_to_json
|
|
|
29
26
|
|
|
30
27
|
_cached_control_session = None
|
|
31
28
|
|
|
32
|
-
VolumeMount = namedtuple("Mount", ["path", "sub_path"])
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def mount_v3io(
|
|
36
|
-
name="v3io",
|
|
37
|
-
remote="",
|
|
38
|
-
access_key="",
|
|
39
|
-
user="",
|
|
40
|
-
secret=None,
|
|
41
|
-
volume_mounts=None,
|
|
42
|
-
):
|
|
43
|
-
"""Modifier function to apply to a Container Op to volume mount a v3io path
|
|
44
|
-
|
|
45
|
-
:param name: the volume name
|
|
46
|
-
:param remote: the v3io path to use for the volume. ~/ prefix will be replaced with /users/<username>/
|
|
47
|
-
:param access_key: the access key used to auth against v3io. if not given V3IO_ACCESS_KEY env var will be used
|
|
48
|
-
:param user: the username used to auth against v3io. if not given V3IO_USERNAME env var will be used
|
|
49
|
-
:param secret: k8s secret name which would be used to get the username and access key to auth against v3io.
|
|
50
|
-
:param volume_mounts: list of VolumeMount. empty volume mounts & remote will default to mount /v3io & /User.
|
|
51
|
-
"""
|
|
52
|
-
volume_mounts, user = _enrich_and_validate_v3io_mounts(
|
|
53
|
-
remote=remote,
|
|
54
|
-
volume_mounts=volume_mounts,
|
|
55
|
-
user=user,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
def _attach_volume_mounts_and_creds(container_op: kfp.dsl.ContainerOp):
|
|
59
|
-
from kubernetes import client as k8s_client
|
|
60
|
-
|
|
61
|
-
vol = v3io_to_vol(name, remote, access_key, user, secret=secret)
|
|
62
|
-
container_op.add_volume(vol)
|
|
63
|
-
for volume_mount in volume_mounts:
|
|
64
|
-
container_op.container.add_volume_mount(
|
|
65
|
-
k8s_client.V1VolumeMount(
|
|
66
|
-
mount_path=volume_mount.path,
|
|
67
|
-
sub_path=volume_mount.sub_path,
|
|
68
|
-
name=name,
|
|
69
|
-
)
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
if not secret:
|
|
73
|
-
container_op = v3io_cred(access_key=access_key, user=user)(container_op)
|
|
74
|
-
return container_op
|
|
75
|
-
|
|
76
|
-
return _attach_volume_mounts_and_creds
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def _enrich_and_validate_v3io_mounts(remote="", volume_mounts=None, user=""):
|
|
80
|
-
if remote and not volume_mounts:
|
|
81
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
82
|
-
"volume_mounts must be specified when remote is given"
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
# Empty remote & volume_mounts defaults are volume mounts of /v3io and /User
|
|
86
|
-
if not remote and not volume_mounts:
|
|
87
|
-
user = _resolve_mount_user(user)
|
|
88
|
-
if not user:
|
|
89
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
90
|
-
"user name/env must be specified when using empty remote and volume_mounts"
|
|
91
|
-
)
|
|
92
|
-
volume_mounts = [
|
|
93
|
-
VolumeMount(path="/v3io", sub_path=""),
|
|
94
|
-
VolumeMount(path="/User", sub_path="users/" + user),
|
|
95
|
-
]
|
|
96
|
-
|
|
97
|
-
if not isinstance(volume_mounts, list) and any(
|
|
98
|
-
[not isinstance(x, VolumeMount) for x in volume_mounts]
|
|
99
|
-
):
|
|
100
|
-
raise TypeError("mounts should be a list of Mount")
|
|
101
|
-
|
|
102
|
-
return volume_mounts, user
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def _resolve_mount_user(user=None):
|
|
106
|
-
return user or os.environ.get("V3IO_USERNAME")
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def mount_spark_conf():
|
|
110
|
-
def _mount_spark(container_op: kfp.dsl.ContainerOp):
|
|
111
|
-
from kubernetes import client as k8s_client
|
|
112
|
-
|
|
113
|
-
container_op.container.add_volume_mount(
|
|
114
|
-
k8s_client.V1VolumeMount(
|
|
115
|
-
name="spark-master-config", mount_path="/etc/config/spark"
|
|
116
|
-
)
|
|
117
|
-
)
|
|
118
|
-
return container_op
|
|
119
|
-
|
|
120
|
-
return _mount_spark
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def mount_v3iod(namespace, v3io_config_configmap):
|
|
124
|
-
def _mount_v3iod(container_op: kfp.dsl.ContainerOp):
|
|
125
|
-
from kubernetes import client as k8s_client
|
|
126
|
-
|
|
127
|
-
def add_vol(name, mount_path, host_path):
|
|
128
|
-
vol = k8s_client.V1Volume(
|
|
129
|
-
name=name,
|
|
130
|
-
host_path=k8s_client.V1HostPathVolumeSource(path=host_path, type=""),
|
|
131
|
-
)
|
|
132
|
-
container_op.add_volume(vol)
|
|
133
|
-
container_op.container.add_volume_mount(
|
|
134
|
-
k8s_client.V1VolumeMount(mount_path=mount_path, name=name)
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
# this is a legacy path for the daemon shared memory
|
|
138
|
-
host_path = "/dev/shm/"
|
|
139
|
-
|
|
140
|
-
# path to shared memory for daemon was changed in Iguazio 3.2.3-b1
|
|
141
|
-
igz_version = mlrun.mlconf.get_parsed_igz_version()
|
|
142
|
-
if igz_version and igz_version >= semver.VersionInfo.parse("3.2.3-b1"):
|
|
143
|
-
host_path = "/var/run/iguazio/dayman-shm/"
|
|
144
|
-
add_vol(name="shm", mount_path="/dev/shm", host_path=host_path + namespace)
|
|
145
|
-
|
|
146
|
-
add_vol(
|
|
147
|
-
name="v3iod-comm",
|
|
148
|
-
mount_path="/var/run/iguazio/dayman",
|
|
149
|
-
host_path="/var/run/iguazio/dayman/" + namespace,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
vol = k8s_client.V1Volume(
|
|
153
|
-
name="daemon-health", empty_dir=k8s_client.V1EmptyDirVolumeSource()
|
|
154
|
-
)
|
|
155
|
-
container_op.add_volume(vol)
|
|
156
|
-
container_op.container.add_volume_mount(
|
|
157
|
-
k8s_client.V1VolumeMount(
|
|
158
|
-
mount_path="/var/run/iguazio/daemon_health", name="daemon-health"
|
|
159
|
-
)
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
vol = k8s_client.V1Volume(
|
|
163
|
-
name="v3io-config",
|
|
164
|
-
config_map=k8s_client.V1ConfigMapVolumeSource(
|
|
165
|
-
name=v3io_config_configmap, default_mode=420
|
|
166
|
-
),
|
|
167
|
-
)
|
|
168
|
-
container_op.add_volume(vol)
|
|
169
|
-
container_op.container.add_volume_mount(
|
|
170
|
-
k8s_client.V1VolumeMount(mount_path="/etc/config/v3io", name="v3io-config")
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
container_op.container.add_env_variable(
|
|
174
|
-
k8s_client.V1EnvVar(
|
|
175
|
-
name="CURRENT_NODE_IP",
|
|
176
|
-
value_from=k8s_client.V1EnvVarSource(
|
|
177
|
-
field_ref=k8s_client.V1ObjectFieldSelector(
|
|
178
|
-
api_version="v1", field_path="status.hostIP"
|
|
179
|
-
)
|
|
180
|
-
),
|
|
181
|
-
)
|
|
182
|
-
)
|
|
183
|
-
container_op.container.add_env_variable(
|
|
184
|
-
k8s_client.V1EnvVar(
|
|
185
|
-
name="IGZ_DATA_CONFIG_FILE", value="/igz/java/conf/v3io.conf"
|
|
186
|
-
)
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
return container_op
|
|
190
|
-
|
|
191
|
-
return _mount_v3iod
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def v3io_cred(api="", user="", access_key=""):
|
|
195
|
-
"""
|
|
196
|
-
Modifier function to copy local v3io env vars to container
|
|
197
|
-
|
|
198
|
-
Usage::
|
|
199
|
-
|
|
200
|
-
train = train_op(...)
|
|
201
|
-
train.apply(use_v3io_cred())
|
|
202
|
-
"""
|
|
203
|
-
|
|
204
|
-
def _use_v3io_cred(container_op: kfp.dsl.ContainerOp):
|
|
205
|
-
from os import environ
|
|
206
|
-
|
|
207
|
-
from kubernetes import client as k8s_client
|
|
208
|
-
|
|
209
|
-
web_api = api or environ.get("V3IO_API") or mlconf.v3io_api
|
|
210
|
-
_user = user or environ.get("V3IO_USERNAME")
|
|
211
|
-
_access_key = access_key or environ.get("V3IO_ACCESS_KEY")
|
|
212
|
-
v3io_framesd = mlconf.v3io_framesd or environ.get("V3IO_FRAMESD")
|
|
213
|
-
|
|
214
|
-
return (
|
|
215
|
-
container_op.container.add_env_variable(
|
|
216
|
-
k8s_client.V1EnvVar(name="V3IO_API", value=web_api)
|
|
217
|
-
)
|
|
218
|
-
.add_env_variable(k8s_client.V1EnvVar(name="V3IO_USERNAME", value=_user))
|
|
219
|
-
.add_env_variable(
|
|
220
|
-
k8s_client.V1EnvVar(name="V3IO_ACCESS_KEY", value=_access_key)
|
|
221
|
-
)
|
|
222
|
-
.add_env_variable(
|
|
223
|
-
k8s_client.V1EnvVar(name="V3IO_FRAMESD", value=v3io_framesd)
|
|
224
|
-
)
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
return _use_v3io_cred
|
|
228
|
-
|
|
229
29
|
|
|
230
30
|
def split_path(mntpath=""):
|
|
231
31
|
if mntpath[0] == "/":
|
|
@@ -582,3 +382,22 @@ def sanitize_username(username: str):
|
|
|
582
382
|
So simply replace it with dash
|
|
583
383
|
"""
|
|
584
384
|
return username.replace("_", "-")
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def min_iguazio_versions(*versions):
|
|
388
|
+
def decorator(function):
|
|
389
|
+
def wrapper(*args, **kwargs):
|
|
390
|
+
if mlrun.utils.helpers.validate_component_version_compatibility(
|
|
391
|
+
"iguazio", *versions
|
|
392
|
+
):
|
|
393
|
+
return function(*args, **kwargs)
|
|
394
|
+
|
|
395
|
+
message = (
|
|
396
|
+
f"{function.__name__} is supported since Iguazio {' or '.join(versions)}, currently using "
|
|
397
|
+
f"Iguazio {mlconf.igz_version}."
|
|
398
|
+
)
|
|
399
|
+
raise mlrun.errors.MLRunIncompatibleVersionError(message)
|
|
400
|
+
|
|
401
|
+
return wrapper
|
|
402
|
+
|
|
403
|
+
return decorator
|
mlrun/projects/operations.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
|
-
import
|
|
18
|
+
from mlrun_pipelines.models import PipelineNodeWrapper
|
|
19
19
|
|
|
20
20
|
import mlrun
|
|
21
21
|
from mlrun.utils import hub_prefix
|
|
@@ -76,7 +76,7 @@ def run_function(
|
|
|
76
76
|
notifications: list[mlrun.model.Notification] = None,
|
|
77
77
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
78
78
|
builder_env: Optional[list] = None,
|
|
79
|
-
) -> Union[mlrun.model.RunObject,
|
|
79
|
+
) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
80
80
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
81
81
|
|
|
82
82
|
run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
|
|
@@ -86,7 +86,7 @@ def run_function(
|
|
|
86
86
|
when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
|
|
87
87
|
e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
|
|
88
88
|
project runs provide additional notifications/reporting and exception handling.
|
|
89
|
-
inside a Kubeflow pipeline (KFP) run_function() generates KFP
|
|
89
|
+
inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
|
|
90
90
|
some behavior may differ between regular runs and deferred KFP runs.
|
|
91
91
|
|
|
92
92
|
example (use with function object)::
|
|
@@ -166,7 +166,7 @@ def run_function(
|
|
|
166
166
|
artifact type can be given there. The artifact key must appear in the dictionary as
|
|
167
167
|
"key": "the_key".
|
|
168
168
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
169
|
-
:return: MLRun RunObject or
|
|
169
|
+
:return: MLRun RunObject or PipelineNodeWrapper
|
|
170
170
|
"""
|
|
171
171
|
engine, function = _get_engine_and_function(function, project_object)
|
|
172
172
|
task = mlrun.new_task(
|
|
@@ -254,7 +254,7 @@ def build_function(
|
|
|
254
254
|
overwrite_build_params: bool = False,
|
|
255
255
|
extra_args: str = None,
|
|
256
256
|
force_build: bool = False,
|
|
257
|
-
) -> Union[BuildStatus,
|
|
257
|
+
) -> Union[BuildStatus, PipelineNodeWrapper]:
|
|
258
258
|
"""deploy ML function, build container with its dependencies
|
|
259
259
|
|
|
260
260
|
:param function: Name of the function (in the project) or function object
|
|
@@ -294,7 +294,11 @@ def build_function(
|
|
|
294
294
|
if overwrite_build_params:
|
|
295
295
|
function.spec.build.commands = None
|
|
296
296
|
if requirements or requirements_file:
|
|
297
|
-
function.with_requirements(
|
|
297
|
+
function.with_requirements(
|
|
298
|
+
requirements=requirements,
|
|
299
|
+
requirements_file=requirements_file,
|
|
300
|
+
overwrite=True,
|
|
301
|
+
)
|
|
298
302
|
if commands:
|
|
299
303
|
function.with_commands(commands)
|
|
300
304
|
return function.deploy_step(
|
|
@@ -358,7 +362,7 @@ def deploy_function(
|
|
|
358
362
|
builder_env: dict = None,
|
|
359
363
|
project_object=None,
|
|
360
364
|
mock: bool = None,
|
|
361
|
-
) -> Union[DeployStatus,
|
|
365
|
+
) -> Union[DeployStatus, PipelineNodeWrapper]:
|
|
362
366
|
"""deploy real-time (nuclio based) functions
|
|
363
367
|
|
|
364
368
|
:param function: name of the function (in the project) or function object
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -20,9 +20,10 @@ import tempfile
|
|
|
20
20
|
import typing
|
|
21
21
|
import uuid
|
|
22
22
|
|
|
23
|
-
import
|
|
24
|
-
|
|
23
|
+
import mlrun_pipelines.common.models
|
|
24
|
+
import mlrun_pipelines.patcher
|
|
25
25
|
from kfp.compiler import compiler
|
|
26
|
+
from mlrun_pipelines.helpers import new_pipe_metadata
|
|
26
27
|
|
|
27
28
|
import mlrun
|
|
28
29
|
import mlrun.common.schemas
|
|
@@ -31,7 +32,6 @@ from mlrun.errors import err_to_str
|
|
|
31
32
|
from mlrun.utils import (
|
|
32
33
|
get_ui_url,
|
|
33
34
|
logger,
|
|
34
|
-
new_pipe_metadata,
|
|
35
35
|
normalize_workflow_name,
|
|
36
36
|
retry_until_successful,
|
|
37
37
|
)
|
|
@@ -301,72 +301,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
|
|
|
301
301
|
}
|
|
302
302
|
|
|
303
303
|
|
|
304
|
-
# When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
|
|
305
|
-
# converts it to a k8s object. As part of the flow in the Compile.compile() method,
|
|
306
|
-
# we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
|
|
307
|
-
# Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
|
|
308
|
-
# I ran across the following problem when seeking for a method to set the priority_class_name:
|
|
309
|
-
# https://github.com/kubeflow/pipelines/issues/3594
|
|
310
|
-
# When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
|
|
311
|
-
# to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
|
|
312
|
-
# https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
|
|
313
|
-
def _create_enriched_mlrun_workflow(
|
|
314
|
-
self,
|
|
315
|
-
pipeline_func: typing.Callable,
|
|
316
|
-
pipeline_name: typing.Optional[str] = None,
|
|
317
|
-
pipeline_description: typing.Optional[str] = None,
|
|
318
|
-
params_list: typing.Optional[list[dsl.PipelineParam]] = None,
|
|
319
|
-
pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
|
|
320
|
-
):
|
|
321
|
-
"""Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
|
|
322
|
-
workflow = self._original_create_workflow(
|
|
323
|
-
pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
|
|
324
|
-
)
|
|
325
|
-
# We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
|
|
326
|
-
# that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
|
|
327
|
-
# we know can be raised.
|
|
328
|
-
try:
|
|
329
|
-
functions = []
|
|
330
|
-
if pipeline_context.functions:
|
|
331
|
-
try:
|
|
332
|
-
functions = pipeline_context.functions.values()
|
|
333
|
-
except Exception as err:
|
|
334
|
-
logger.debug(
|
|
335
|
-
"Unable to retrieve project functions, not enriching workflow with mlrun",
|
|
336
|
-
error=err_to_str(err),
|
|
337
|
-
)
|
|
338
|
-
return workflow
|
|
339
|
-
|
|
340
|
-
# enrich each pipeline step with your desire k8s attribute
|
|
341
|
-
for kfp_step_template in workflow["spec"]["templates"]:
|
|
342
|
-
if kfp_step_template.get("container"):
|
|
343
|
-
for function_obj in functions:
|
|
344
|
-
# we condition within each function since the comparison between the function and
|
|
345
|
-
# the kfp pod may change depending on the attribute type.
|
|
346
|
-
_set_function_attribute_on_kfp_pod(
|
|
347
|
-
kfp_step_template,
|
|
348
|
-
function_obj,
|
|
349
|
-
"PriorityClassName",
|
|
350
|
-
"priority_class_name",
|
|
351
|
-
)
|
|
352
|
-
_enrich_kfp_pod_security_context(
|
|
353
|
-
kfp_step_template,
|
|
354
|
-
function_obj,
|
|
355
|
-
)
|
|
356
|
-
except mlrun.errors.MLRunInvalidArgumentError:
|
|
357
|
-
raise
|
|
358
|
-
except Exception as err:
|
|
359
|
-
logger.debug(
|
|
360
|
-
"Something in the enrichment of kfp pods failed", error=err_to_str(err)
|
|
361
|
-
)
|
|
362
|
-
return workflow
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
# patching function as class method
|
|
366
|
-
kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
|
|
367
|
-
kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
|
|
368
|
-
|
|
369
|
-
|
|
370
304
|
def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
|
|
371
305
|
project_instance, name, tag, hash_key = parse_versioned_object_uri(
|
|
372
306
|
key, project.metadata.name
|
|
@@ -457,7 +391,10 @@ class _PipelineRunStatus:
|
|
|
457
391
|
|
|
458
392
|
@property
|
|
459
393
|
def state(self):
|
|
460
|
-
if
|
|
394
|
+
if (
|
|
395
|
+
self._state
|
|
396
|
+
not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
|
|
397
|
+
):
|
|
461
398
|
self._state = self._engine.get_state(self.run_id, self.project)
|
|
462
399
|
return self._state
|
|
463
400
|
|
|
@@ -754,7 +691,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
754
691
|
err = None
|
|
755
692
|
try:
|
|
756
693
|
workflow_handler(**workflow_spec.args)
|
|
757
|
-
state =
|
|
694
|
+
state = mlrun_pipelines.common.models.RunStatuses.succeeded
|
|
758
695
|
except Exception as exc:
|
|
759
696
|
err = exc
|
|
760
697
|
logger.exception("Workflow run failed")
|
|
@@ -762,7 +699,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
762
699
|
f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
|
|
763
700
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
764
701
|
)
|
|
765
|
-
state =
|
|
702
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
766
703
|
mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
|
|
767
704
|
project.notifiers.push_pipeline_run_results(
|
|
768
705
|
pipeline_context.runs_map.values(), state=state
|
|
@@ -921,9 +858,9 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
921
858
|
f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
|
|
922
859
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
923
860
|
)
|
|
924
|
-
state =
|
|
861
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
925
862
|
else:
|
|
926
|
-
state =
|
|
863
|
+
state = mlrun_pipelines.common.models.RunStatuses.succeeded
|
|
927
864
|
project.notifiers.push_pipeline_start_message(
|
|
928
865
|
project.metadata.name,
|
|
929
866
|
)
|
|
@@ -1116,7 +1053,7 @@ def load_and_run(
|
|
|
1116
1053
|
context.log_result(key="workflow_id", value=run.run_id)
|
|
1117
1054
|
context.log_result(key="engine", value=run._engine.engine, commit=True)
|
|
1118
1055
|
|
|
1119
|
-
if run.state ==
|
|
1056
|
+
if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
|
1120
1057
|
raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
|
|
1121
1058
|
|
|
1122
1059
|
if wait_for_completion:
|
|
@@ -1131,7 +1068,7 @@ def load_and_run(
|
|
|
1131
1068
|
|
|
1132
1069
|
pipeline_state, _, _ = project.get_run_status(run)
|
|
1133
1070
|
context.log_result(key="workflow_state", value=pipeline_state, commit=True)
|
|
1134
|
-
if pipeline_state !=
|
|
1071
|
+
if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1135
1072
|
raise RuntimeError(
|
|
1136
1073
|
f"Workflow {workflow_log_message} failed, state={pipeline_state}"
|
|
1137
1074
|
)
|
mlrun/projects/project.py
CHANGED
|
@@ -31,12 +31,15 @@ from typing import Callable, Optional, Union
|
|
|
31
31
|
import dotenv
|
|
32
32
|
import git
|
|
33
33
|
import git.exc
|
|
34
|
-
import
|
|
34
|
+
import mlrun_pipelines.common.models
|
|
35
|
+
import mlrun_pipelines.mounts
|
|
35
36
|
import nuclio.utils
|
|
36
37
|
import requests
|
|
37
38
|
import yaml
|
|
39
|
+
from mlrun_pipelines.models import PipelineNodeWrapper
|
|
38
40
|
|
|
39
41
|
import mlrun.common.helpers
|
|
42
|
+
import mlrun.common.schemas.artifact
|
|
40
43
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
41
44
|
import mlrun.db
|
|
42
45
|
import mlrun.errors
|
|
@@ -48,7 +51,8 @@ import mlrun.runtimes.pod
|
|
|
48
51
|
import mlrun.runtimes.utils
|
|
49
52
|
import mlrun.serving
|
|
50
53
|
import mlrun.utils.regex
|
|
51
|
-
from mlrun.
|
|
54
|
+
from mlrun.alerts.alert import AlertConfig
|
|
55
|
+
from mlrun.common.schemas.alert import AlertTemplate
|
|
52
56
|
from mlrun.datastore.datastore_profile import DatastoreProfile, DatastoreProfile2Json
|
|
53
57
|
from mlrun.runtimes.nuclio.function import RemoteRuntime
|
|
54
58
|
|
|
@@ -1586,6 +1590,23 @@ class MlrunProject(ModelObj):
|
|
|
1586
1590
|
)
|
|
1587
1591
|
return item
|
|
1588
1592
|
|
|
1593
|
+
def delete_artifact(
|
|
1594
|
+
self,
|
|
1595
|
+
item: Artifact,
|
|
1596
|
+
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
|
|
1597
|
+
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
1598
|
+
),
|
|
1599
|
+
secrets: dict = None,
|
|
1600
|
+
):
|
|
1601
|
+
"""Delete an artifact object in the DB and optionally delete the artifact data
|
|
1602
|
+
|
|
1603
|
+
:param item: Artifact object (can be any type, such as dataset, model, feature store).
|
|
1604
|
+
:param deletion_strategy: The artifact deletion strategy types.
|
|
1605
|
+
:param secrets: Credentials needed to access the artifact data.
|
|
1606
|
+
"""
|
|
1607
|
+
am = self._get_artifact_manager()
|
|
1608
|
+
am.delete_artifact(item, deletion_strategy, secrets)
|
|
1609
|
+
|
|
1589
1610
|
def log_dataset(
|
|
1590
1611
|
self,
|
|
1591
1612
|
key,
|
|
@@ -2868,7 +2889,7 @@ class MlrunProject(ModelObj):
|
|
|
2868
2889
|
(which will be converted to the class using its `from_crontab` constructor),
|
|
2869
2890
|
see this link for help:
|
|
2870
2891
|
https://apscheduler.readthedocs.io/en/3.x/modules/triggers/cron.html#module-apscheduler.triggers.cron
|
|
2871
|
-
|
|
2892
|
+
For using the pre-defined workflow's schedule, set `schedule=True`
|
|
2872
2893
|
:param timeout: Timeout in seconds to wait for pipeline completion (watch will be activated)
|
|
2873
2894
|
:param source: Source to use instead of the actual `project.spec.source` (used when engine is remote).
|
|
2874
2895
|
Can be a one of:
|
|
@@ -2879,10 +2900,11 @@ class MlrunProject(ModelObj):
|
|
|
2879
2900
|
For other engines the source is used to validate that the code is up-to-date.
|
|
2880
2901
|
:param cleanup_ttl:
|
|
2881
2902
|
Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
|
|
2882
|
-
|
|
2903
|
+
workflow and all its resources are deleted)
|
|
2883
2904
|
:param notifications:
|
|
2884
2905
|
List of notifications to send for workflow completion
|
|
2885
|
-
|
|
2906
|
+
|
|
2907
|
+
:returns: ~py:class:`~mlrun.projects.pipelines._PipelineRunStatus` instance
|
|
2886
2908
|
"""
|
|
2887
2909
|
|
|
2888
2910
|
arguments = arguments or {}
|
|
@@ -2956,7 +2978,7 @@ class MlrunProject(ModelObj):
|
|
|
2956
2978
|
notifications=notifications,
|
|
2957
2979
|
)
|
|
2958
2980
|
# run is None when scheduling
|
|
2959
|
-
if run and run.state ==
|
|
2981
|
+
if run and run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
|
2960
2982
|
return run
|
|
2961
2983
|
if not workflow_spec.schedule:
|
|
2962
2984
|
# Failure and schedule messages already logged
|
|
@@ -3133,7 +3155,7 @@ class MlrunProject(ModelObj):
|
|
|
3133
3155
|
notifications: list[mlrun.model.Notification] = None,
|
|
3134
3156
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
3135
3157
|
builder_env: Optional[dict] = None,
|
|
3136
|
-
) -> typing.Union[mlrun.model.RunObject,
|
|
3158
|
+
) -> typing.Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
3137
3159
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
3138
3160
|
|
|
3139
3161
|
example (use with project)::
|
|
@@ -3189,7 +3211,7 @@ class MlrunProject(ModelObj):
|
|
|
3189
3211
|
artifact type can be given there. The artifact key must appear in the dictionary as
|
|
3190
3212
|
"key": "the_key".
|
|
3191
3213
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
3192
|
-
:return: MLRun RunObject or
|
|
3214
|
+
:return: MLRun RunObject or PipelineNodeWrapper
|
|
3193
3215
|
"""
|
|
3194
3216
|
return run_function(
|
|
3195
3217
|
function,
|
|
@@ -3232,7 +3254,7 @@ class MlrunProject(ModelObj):
|
|
|
3232
3254
|
requirements_file: str = None,
|
|
3233
3255
|
extra_args: str = None,
|
|
3234
3256
|
force_build: bool = False,
|
|
3235
|
-
) -> typing.Union[BuildStatus,
|
|
3257
|
+
) -> typing.Union[BuildStatus, PipelineNodeWrapper]:
|
|
3236
3258
|
"""deploy ML function, build container with its dependencies
|
|
3237
3259
|
|
|
3238
3260
|
:param function: name of the function (in the project) or function object
|
|
@@ -3353,7 +3375,7 @@ class MlrunProject(ModelObj):
|
|
|
3353
3375
|
requirements_file: str = None,
|
|
3354
3376
|
extra_args: str = None,
|
|
3355
3377
|
target_dir: str = None,
|
|
3356
|
-
) -> typing.Union[BuildStatus,
|
|
3378
|
+
) -> typing.Union[BuildStatus, PipelineNodeWrapper]:
|
|
3357
3379
|
"""Builder docker image for the project, based on the project's build config. Parameters allow to override
|
|
3358
3380
|
the build config.
|
|
3359
3381
|
If the project has a source configured and pull_at_runtime is not configured, this source will be cloned to the
|
|
@@ -3464,7 +3486,7 @@ class MlrunProject(ModelObj):
|
|
|
3464
3486
|
verbose: bool = None,
|
|
3465
3487
|
builder_env: dict = None,
|
|
3466
3488
|
mock: bool = None,
|
|
3467
|
-
) -> typing.Union[DeployStatus,
|
|
3489
|
+
) -> typing.Union[DeployStatus, PipelineNodeWrapper]:
|
|
3468
3490
|
"""deploy real-time (nuclio based) functions
|
|
3469
3491
|
|
|
3470
3492
|
:param function: name of the function (in the project) or function object
|
|
@@ -3876,7 +3898,9 @@ class MlrunProject(ModelObj):
|
|
|
3876
3898
|
|
|
3877
3899
|
mlrun.db.get_run_db().delete_api_gateway(name=name, project=self.name)
|
|
3878
3900
|
|
|
3879
|
-
def store_alert_config(
|
|
3901
|
+
def store_alert_config(
|
|
3902
|
+
self, alert_data: AlertConfig, alert_name=None
|
|
3903
|
+
) -> AlertConfig:
|
|
3880
3904
|
"""
|
|
3881
3905
|
Create/modify an alert.
|
|
3882
3906
|
:param alert_data: The data of the alert.
|
|
@@ -3886,7 +3910,7 @@ class MlrunProject(ModelObj):
|
|
|
3886
3910
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3887
3911
|
if alert_name is None:
|
|
3888
3912
|
alert_name = alert_data.name
|
|
3889
|
-
return db.store_alert_config(alert_name, alert_data
|
|
3913
|
+
return db.store_alert_config(alert_name, alert_data, project=self.metadata.name)
|
|
3890
3914
|
|
|
3891
3915
|
def get_alert_config(self, alert_name: str) -> AlertConfig:
|
|
3892
3916
|
"""
|
|
@@ -3897,7 +3921,7 @@ class MlrunProject(ModelObj):
|
|
|
3897
3921
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3898
3922
|
return db.get_alert_config(alert_name, self.metadata.name)
|
|
3899
3923
|
|
|
3900
|
-
def list_alerts_configs(self):
|
|
3924
|
+
def list_alerts_configs(self) -> list[AlertConfig]:
|
|
3901
3925
|
"""
|
|
3902
3926
|
Retrieve list of alerts of a project.
|
|
3903
3927
|
:return: All the alerts objects of the project.
|
|
@@ -3943,6 +3967,23 @@ class MlrunProject(ModelObj):
|
|
|
3943
3967
|
alert_name = alert_data.name
|
|
3944
3968
|
db.reset_alert_config(alert_name, self.metadata.name)
|
|
3945
3969
|
|
|
3970
|
+
def get_alert_template(self, template_name: str) -> AlertTemplate:
|
|
3971
|
+
"""
|
|
3972
|
+
Retrieve a specific alert template.
|
|
3973
|
+
:param template_name: The name of the template to retrieve.
|
|
3974
|
+
:return: The template object.
|
|
3975
|
+
"""
|
|
3976
|
+
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3977
|
+
return db.get_alert_template(template_name)
|
|
3978
|
+
|
|
3979
|
+
def list_alert_templates(self) -> list[AlertTemplate]:
|
|
3980
|
+
"""
|
|
3981
|
+
Retrieve list of all alert templates.
|
|
3982
|
+
:return: All the alert template objects in the database.
|
|
3983
|
+
"""
|
|
3984
|
+
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3985
|
+
return db.list_alert_templates()
|
|
3986
|
+
|
|
3946
3987
|
def _run_authenticated_git_action(
|
|
3947
3988
|
self,
|
|
3948
3989
|
action: Callable,
|
mlrun/render.py
CHANGED
|
@@ -396,12 +396,18 @@ def runs_to_html(
|
|
|
396
396
|
df.drop("labels", axis=1, inplace=True)
|
|
397
397
|
df.drop("inputs", axis=1, inplace=True)
|
|
398
398
|
df.drop("artifacts", axis=1, inplace=True)
|
|
399
|
+
df.drop("artifact_uris", axis=1, inplace=True)
|
|
399
400
|
else:
|
|
400
401
|
df["labels"] = df["labels"].apply(dict_html)
|
|
401
402
|
df["inputs"] = df["inputs"].apply(inputs_html)
|
|
402
|
-
df["
|
|
403
|
-
|
|
404
|
-
|
|
403
|
+
if df["artifact_uris"][0]:
|
|
404
|
+
df["artifact_uris"] = df["artifact_uris"].apply(dict_html)
|
|
405
|
+
df.drop("artifacts", axis=1, inplace=True)
|
|
406
|
+
else:
|
|
407
|
+
df["artifacts"] = df["artifacts"].apply(
|
|
408
|
+
lambda artifacts: artifacts_html(artifacts, "target_path"),
|
|
409
|
+
)
|
|
410
|
+
df.drop("artifact_uris", axis=1, inplace=True)
|
|
405
411
|
|
|
406
412
|
def expand_error(x):
|
|
407
413
|
if x["state"] == "error":
|