mlrun 1.7.0rc15__py3-none-any.whl → 1.7.0rc17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +10 -1
- mlrun/__main__.py +18 -4
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/artifacts/__init__.py +7 -1
- mlrun/artifacts/base.py +28 -3
- mlrun/artifacts/dataset.py +8 -0
- mlrun/artifacts/manager.py +18 -0
- mlrun/artifacts/model.py +8 -1
- mlrun/artifacts/plots.py +13 -0
- mlrun/common/schemas/__init__.py +10 -2
- mlrun/common/schemas/alert.py +64 -5
- mlrun/common/schemas/api_gateway.py +4 -0
- mlrun/common/schemas/artifact.py +15 -0
- mlrun/common/schemas/auth.py +2 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -1
- mlrun/common/schemas/model_monitoring/constants.py +17 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +60 -1
- mlrun/common/schemas/project.py +5 -1
- mlrun/config.py +11 -4
- mlrun/datastore/datastore_profile.py +10 -7
- mlrun/db/base.py +24 -4
- mlrun/db/httpdb.py +97 -43
- mlrun/db/nopdb.py +25 -4
- mlrun/errors.py +5 -0
- mlrun/launcher/base.py +3 -2
- mlrun/lists.py +4 -0
- mlrun/model.py +15 -8
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/applications/_application_steps.py +1 -2
- mlrun/model_monitoring/applications/context.py +1 -1
- mlrun/model_monitoring/applications/histogram_data_drift.py +64 -38
- mlrun/model_monitoring/db/__init__.py +2 -0
- mlrun/model_monitoring/db/stores/base/store.py +9 -36
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +56 -202
- mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
- mlrun/model_monitoring/db/tsdb/base.py +135 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +442 -0
- mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
- mlrun/model_monitoring/stream_processing.py +46 -210
- mlrun/model_monitoring/writer.py +50 -100
- mlrun/platforms/__init__.py +10 -9
- mlrun/platforms/iguazio.py +19 -200
- mlrun/projects/operations.py +11 -7
- mlrun/projects/pipelines.py +13 -76
- mlrun/projects/project.py +62 -17
- mlrun/render.py +9 -3
- mlrun/run.py +5 -38
- mlrun/runtimes/__init__.py +1 -0
- mlrun/runtimes/base.py +3 -3
- mlrun/runtimes/kubejob.py +2 -1
- mlrun/runtimes/nuclio/api_gateway.py +163 -77
- mlrun/runtimes/nuclio/application/application.py +160 -7
- mlrun/runtimes/nuclio/function.py +25 -45
- mlrun/runtimes/pod.py +16 -36
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +0 -38
- mlrun/track/tracker.py +2 -1
- mlrun/utils/helpers.py +51 -31
- mlrun/utils/logger.py +11 -6
- mlrun/utils/notifications/notification/base.py +1 -1
- mlrun/utils/notifications/notification/slack.py +9 -4
- mlrun/utils/notifications/notification/webhook.py +1 -1
- mlrun/utils/notifications/notification_pusher.py +21 -14
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/METADATA +4 -3
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/RECORD +75 -69
- mlrun/kfpops.py +0 -860
- mlrun/platforms/other.py +0 -305
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc15.dist-info → mlrun-1.7.0rc17.dist-info}/top_level.txt +0 -0
mlrun/platforms/iguazio.py
CHANGED
|
@@ -15,12 +15,9 @@
|
|
|
15
15
|
import json
|
|
16
16
|
import os
|
|
17
17
|
import urllib
|
|
18
|
-
from collections import namedtuple
|
|
19
18
|
from urllib.parse import urlparse
|
|
20
19
|
|
|
21
|
-
import kfp.dsl
|
|
22
20
|
import requests
|
|
23
|
-
import semver
|
|
24
21
|
import v3io
|
|
25
22
|
|
|
26
23
|
import mlrun.errors
|
|
@@ -29,203 +26,6 @@ from mlrun.utils import dict_to_json
|
|
|
29
26
|
|
|
30
27
|
_cached_control_session = None
|
|
31
28
|
|
|
32
|
-
VolumeMount = namedtuple("Mount", ["path", "sub_path"])
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def mount_v3io(
|
|
36
|
-
name="v3io",
|
|
37
|
-
remote="",
|
|
38
|
-
access_key="",
|
|
39
|
-
user="",
|
|
40
|
-
secret=None,
|
|
41
|
-
volume_mounts=None,
|
|
42
|
-
):
|
|
43
|
-
"""Modifier function to apply to a Container Op to volume mount a v3io path
|
|
44
|
-
|
|
45
|
-
:param name: the volume name
|
|
46
|
-
:param remote: the v3io path to use for the volume. ~/ prefix will be replaced with /users/<username>/
|
|
47
|
-
:param access_key: the access key used to auth against v3io. if not given V3IO_ACCESS_KEY env var will be used
|
|
48
|
-
:param user: the username used to auth against v3io. if not given V3IO_USERNAME env var will be used
|
|
49
|
-
:param secret: k8s secret name which would be used to get the username and access key to auth against v3io.
|
|
50
|
-
:param volume_mounts: list of VolumeMount. empty volume mounts & remote will default to mount /v3io & /User.
|
|
51
|
-
"""
|
|
52
|
-
volume_mounts, user = _enrich_and_validate_v3io_mounts(
|
|
53
|
-
remote=remote,
|
|
54
|
-
volume_mounts=volume_mounts,
|
|
55
|
-
user=user,
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
def _attach_volume_mounts_and_creds(container_op: kfp.dsl.ContainerOp):
|
|
59
|
-
from kubernetes import client as k8s_client
|
|
60
|
-
|
|
61
|
-
vol = v3io_to_vol(name, remote, access_key, user, secret=secret)
|
|
62
|
-
container_op.add_volume(vol)
|
|
63
|
-
for volume_mount in volume_mounts:
|
|
64
|
-
container_op.container.add_volume_mount(
|
|
65
|
-
k8s_client.V1VolumeMount(
|
|
66
|
-
mount_path=volume_mount.path,
|
|
67
|
-
sub_path=volume_mount.sub_path,
|
|
68
|
-
name=name,
|
|
69
|
-
)
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
if not secret:
|
|
73
|
-
container_op = v3io_cred(access_key=access_key, user=user)(container_op)
|
|
74
|
-
return container_op
|
|
75
|
-
|
|
76
|
-
return _attach_volume_mounts_and_creds
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
def _enrich_and_validate_v3io_mounts(remote="", volume_mounts=None, user=""):
|
|
80
|
-
if remote and not volume_mounts:
|
|
81
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
82
|
-
"volume_mounts must be specified when remote is given"
|
|
83
|
-
)
|
|
84
|
-
|
|
85
|
-
# Empty remote & volume_mounts defaults are volume mounts of /v3io and /User
|
|
86
|
-
if not remote and not volume_mounts:
|
|
87
|
-
user = _resolve_mount_user(user)
|
|
88
|
-
if not user:
|
|
89
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
90
|
-
"user name/env must be specified when using empty remote and volume_mounts"
|
|
91
|
-
)
|
|
92
|
-
volume_mounts = [
|
|
93
|
-
VolumeMount(path="/v3io", sub_path=""),
|
|
94
|
-
VolumeMount(path="/User", sub_path="users/" + user),
|
|
95
|
-
]
|
|
96
|
-
|
|
97
|
-
if not isinstance(volume_mounts, list) and any(
|
|
98
|
-
[not isinstance(x, VolumeMount) for x in volume_mounts]
|
|
99
|
-
):
|
|
100
|
-
raise TypeError("mounts should be a list of Mount")
|
|
101
|
-
|
|
102
|
-
return volume_mounts, user
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def _resolve_mount_user(user=None):
|
|
106
|
-
return user or os.environ.get("V3IO_USERNAME")
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
def mount_spark_conf():
|
|
110
|
-
def _mount_spark(container_op: kfp.dsl.ContainerOp):
|
|
111
|
-
from kubernetes import client as k8s_client
|
|
112
|
-
|
|
113
|
-
container_op.container.add_volume_mount(
|
|
114
|
-
k8s_client.V1VolumeMount(
|
|
115
|
-
name="spark-master-config", mount_path="/etc/config/spark"
|
|
116
|
-
)
|
|
117
|
-
)
|
|
118
|
-
return container_op
|
|
119
|
-
|
|
120
|
-
return _mount_spark
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def mount_v3iod(namespace, v3io_config_configmap):
|
|
124
|
-
def _mount_v3iod(container_op: kfp.dsl.ContainerOp):
|
|
125
|
-
from kubernetes import client as k8s_client
|
|
126
|
-
|
|
127
|
-
def add_vol(name, mount_path, host_path):
|
|
128
|
-
vol = k8s_client.V1Volume(
|
|
129
|
-
name=name,
|
|
130
|
-
host_path=k8s_client.V1HostPathVolumeSource(path=host_path, type=""),
|
|
131
|
-
)
|
|
132
|
-
container_op.add_volume(vol)
|
|
133
|
-
container_op.container.add_volume_mount(
|
|
134
|
-
k8s_client.V1VolumeMount(mount_path=mount_path, name=name)
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
# this is a legacy path for the daemon shared memory
|
|
138
|
-
host_path = "/dev/shm/"
|
|
139
|
-
|
|
140
|
-
# path to shared memory for daemon was changed in Iguazio 3.2.3-b1
|
|
141
|
-
igz_version = mlrun.mlconf.get_parsed_igz_version()
|
|
142
|
-
if igz_version and igz_version >= semver.VersionInfo.parse("3.2.3-b1"):
|
|
143
|
-
host_path = "/var/run/iguazio/dayman-shm/"
|
|
144
|
-
add_vol(name="shm", mount_path="/dev/shm", host_path=host_path + namespace)
|
|
145
|
-
|
|
146
|
-
add_vol(
|
|
147
|
-
name="v3iod-comm",
|
|
148
|
-
mount_path="/var/run/iguazio/dayman",
|
|
149
|
-
host_path="/var/run/iguazio/dayman/" + namespace,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
vol = k8s_client.V1Volume(
|
|
153
|
-
name="daemon-health", empty_dir=k8s_client.V1EmptyDirVolumeSource()
|
|
154
|
-
)
|
|
155
|
-
container_op.add_volume(vol)
|
|
156
|
-
container_op.container.add_volume_mount(
|
|
157
|
-
k8s_client.V1VolumeMount(
|
|
158
|
-
mount_path="/var/run/iguazio/daemon_health", name="daemon-health"
|
|
159
|
-
)
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
vol = k8s_client.V1Volume(
|
|
163
|
-
name="v3io-config",
|
|
164
|
-
config_map=k8s_client.V1ConfigMapVolumeSource(
|
|
165
|
-
name=v3io_config_configmap, default_mode=420
|
|
166
|
-
),
|
|
167
|
-
)
|
|
168
|
-
container_op.add_volume(vol)
|
|
169
|
-
container_op.container.add_volume_mount(
|
|
170
|
-
k8s_client.V1VolumeMount(mount_path="/etc/config/v3io", name="v3io-config")
|
|
171
|
-
)
|
|
172
|
-
|
|
173
|
-
container_op.container.add_env_variable(
|
|
174
|
-
k8s_client.V1EnvVar(
|
|
175
|
-
name="CURRENT_NODE_IP",
|
|
176
|
-
value_from=k8s_client.V1EnvVarSource(
|
|
177
|
-
field_ref=k8s_client.V1ObjectFieldSelector(
|
|
178
|
-
api_version="v1", field_path="status.hostIP"
|
|
179
|
-
)
|
|
180
|
-
),
|
|
181
|
-
)
|
|
182
|
-
)
|
|
183
|
-
container_op.container.add_env_variable(
|
|
184
|
-
k8s_client.V1EnvVar(
|
|
185
|
-
name="IGZ_DATA_CONFIG_FILE", value="/igz/java/conf/v3io.conf"
|
|
186
|
-
)
|
|
187
|
-
)
|
|
188
|
-
|
|
189
|
-
return container_op
|
|
190
|
-
|
|
191
|
-
return _mount_v3iod
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
def v3io_cred(api="", user="", access_key=""):
|
|
195
|
-
"""
|
|
196
|
-
Modifier function to copy local v3io env vars to container
|
|
197
|
-
|
|
198
|
-
Usage::
|
|
199
|
-
|
|
200
|
-
train = train_op(...)
|
|
201
|
-
train.apply(use_v3io_cred())
|
|
202
|
-
"""
|
|
203
|
-
|
|
204
|
-
def _use_v3io_cred(container_op: kfp.dsl.ContainerOp):
|
|
205
|
-
from os import environ
|
|
206
|
-
|
|
207
|
-
from kubernetes import client as k8s_client
|
|
208
|
-
|
|
209
|
-
web_api = api or environ.get("V3IO_API") or mlconf.v3io_api
|
|
210
|
-
_user = user or environ.get("V3IO_USERNAME")
|
|
211
|
-
_access_key = access_key or environ.get("V3IO_ACCESS_KEY")
|
|
212
|
-
v3io_framesd = mlconf.v3io_framesd or environ.get("V3IO_FRAMESD")
|
|
213
|
-
|
|
214
|
-
return (
|
|
215
|
-
container_op.container.add_env_variable(
|
|
216
|
-
k8s_client.V1EnvVar(name="V3IO_API", value=web_api)
|
|
217
|
-
)
|
|
218
|
-
.add_env_variable(k8s_client.V1EnvVar(name="V3IO_USERNAME", value=_user))
|
|
219
|
-
.add_env_variable(
|
|
220
|
-
k8s_client.V1EnvVar(name="V3IO_ACCESS_KEY", value=_access_key)
|
|
221
|
-
)
|
|
222
|
-
.add_env_variable(
|
|
223
|
-
k8s_client.V1EnvVar(name="V3IO_FRAMESD", value=v3io_framesd)
|
|
224
|
-
)
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
return _use_v3io_cred
|
|
228
|
-
|
|
229
29
|
|
|
230
30
|
def split_path(mntpath=""):
|
|
231
31
|
if mntpath[0] == "/":
|
|
@@ -582,3 +382,22 @@ def sanitize_username(username: str):
|
|
|
582
382
|
So simply replace it with dash
|
|
583
383
|
"""
|
|
584
384
|
return username.replace("_", "-")
|
|
385
|
+
|
|
386
|
+
|
|
387
|
+
def min_iguazio_versions(*versions):
|
|
388
|
+
def decorator(function):
|
|
389
|
+
def wrapper(*args, **kwargs):
|
|
390
|
+
if mlrun.utils.helpers.validate_component_version_compatibility(
|
|
391
|
+
"iguazio", *versions
|
|
392
|
+
):
|
|
393
|
+
return function(*args, **kwargs)
|
|
394
|
+
|
|
395
|
+
message = (
|
|
396
|
+
f"{function.__name__} is supported since Iguazio {' or '.join(versions)}, currently using "
|
|
397
|
+
f"Iguazio {mlconf.igz_version}."
|
|
398
|
+
)
|
|
399
|
+
raise mlrun.errors.MLRunIncompatibleVersionError(message)
|
|
400
|
+
|
|
401
|
+
return wrapper
|
|
402
|
+
|
|
403
|
+
return decorator
|
mlrun/projects/operations.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import warnings
|
|
16
16
|
from typing import Optional, Union
|
|
17
17
|
|
|
18
|
-
import
|
|
18
|
+
from mlrun_pipelines.models import PipelineNodeWrapper
|
|
19
19
|
|
|
20
20
|
import mlrun
|
|
21
21
|
from mlrun.utils import hub_prefix
|
|
@@ -76,7 +76,7 @@ def run_function(
|
|
|
76
76
|
notifications: list[mlrun.model.Notification] = None,
|
|
77
77
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
78
78
|
builder_env: Optional[list] = None,
|
|
79
|
-
) -> Union[mlrun.model.RunObject,
|
|
79
|
+
) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
80
80
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
81
81
|
|
|
82
82
|
run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
|
|
@@ -86,7 +86,7 @@ def run_function(
|
|
|
86
86
|
when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
|
|
87
87
|
e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
|
|
88
88
|
project runs provide additional notifications/reporting and exception handling.
|
|
89
|
-
inside a Kubeflow pipeline (KFP) run_function() generates KFP
|
|
89
|
+
inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
|
|
90
90
|
some behavior may differ between regular runs and deferred KFP runs.
|
|
91
91
|
|
|
92
92
|
example (use with function object)::
|
|
@@ -166,7 +166,7 @@ def run_function(
|
|
|
166
166
|
artifact type can be given there. The artifact key must appear in the dictionary as
|
|
167
167
|
"key": "the_key".
|
|
168
168
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
169
|
-
:return: MLRun RunObject or
|
|
169
|
+
:return: MLRun RunObject or PipelineNodeWrapper
|
|
170
170
|
"""
|
|
171
171
|
engine, function = _get_engine_and_function(function, project_object)
|
|
172
172
|
task = mlrun.new_task(
|
|
@@ -254,7 +254,7 @@ def build_function(
|
|
|
254
254
|
overwrite_build_params: bool = False,
|
|
255
255
|
extra_args: str = None,
|
|
256
256
|
force_build: bool = False,
|
|
257
|
-
) -> Union[BuildStatus,
|
|
257
|
+
) -> Union[BuildStatus, PipelineNodeWrapper]:
|
|
258
258
|
"""deploy ML function, build container with its dependencies
|
|
259
259
|
|
|
260
260
|
:param function: Name of the function (in the project) or function object
|
|
@@ -294,7 +294,11 @@ def build_function(
|
|
|
294
294
|
if overwrite_build_params:
|
|
295
295
|
function.spec.build.commands = None
|
|
296
296
|
if requirements or requirements_file:
|
|
297
|
-
function.with_requirements(
|
|
297
|
+
function.with_requirements(
|
|
298
|
+
requirements=requirements,
|
|
299
|
+
requirements_file=requirements_file,
|
|
300
|
+
overwrite=True,
|
|
301
|
+
)
|
|
298
302
|
if commands:
|
|
299
303
|
function.with_commands(commands)
|
|
300
304
|
return function.deploy_step(
|
|
@@ -358,7 +362,7 @@ def deploy_function(
|
|
|
358
362
|
builder_env: dict = None,
|
|
359
363
|
project_object=None,
|
|
360
364
|
mock: bool = None,
|
|
361
|
-
) -> Union[DeployStatus,
|
|
365
|
+
) -> Union[DeployStatus, PipelineNodeWrapper]:
|
|
362
366
|
"""deploy real-time (nuclio based) functions
|
|
363
367
|
|
|
364
368
|
:param function: name of the function (in the project) or function object
|
mlrun/projects/pipelines.py
CHANGED
|
@@ -20,9 +20,10 @@ import tempfile
|
|
|
20
20
|
import typing
|
|
21
21
|
import uuid
|
|
22
22
|
|
|
23
|
-
import
|
|
24
|
-
|
|
23
|
+
import mlrun_pipelines.common.models
|
|
24
|
+
import mlrun_pipelines.patcher
|
|
25
25
|
from kfp.compiler import compiler
|
|
26
|
+
from mlrun_pipelines.helpers import new_pipe_metadata
|
|
26
27
|
|
|
27
28
|
import mlrun
|
|
28
29
|
import mlrun.common.schemas
|
|
@@ -31,7 +32,6 @@ from mlrun.errors import err_to_str
|
|
|
31
32
|
from mlrun.utils import (
|
|
32
33
|
get_ui_url,
|
|
33
34
|
logger,
|
|
34
|
-
new_pipe_metadata,
|
|
35
35
|
normalize_workflow_name,
|
|
36
36
|
retry_until_successful,
|
|
37
37
|
)
|
|
@@ -301,72 +301,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
|
|
|
301
301
|
}
|
|
302
302
|
|
|
303
303
|
|
|
304
|
-
# When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
|
|
305
|
-
# converts it to a k8s object. As part of the flow in the Compile.compile() method,
|
|
306
|
-
# we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
|
|
307
|
-
# Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
|
|
308
|
-
# I ran across the following problem when seeking for a method to set the priority_class_name:
|
|
309
|
-
# https://github.com/kubeflow/pipelines/issues/3594
|
|
310
|
-
# When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
|
|
311
|
-
# to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
|
|
312
|
-
# https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
|
|
313
|
-
def _create_enriched_mlrun_workflow(
|
|
314
|
-
self,
|
|
315
|
-
pipeline_func: typing.Callable,
|
|
316
|
-
pipeline_name: typing.Optional[str] = None,
|
|
317
|
-
pipeline_description: typing.Optional[str] = None,
|
|
318
|
-
params_list: typing.Optional[list[dsl.PipelineParam]] = None,
|
|
319
|
-
pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
|
|
320
|
-
):
|
|
321
|
-
"""Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
|
|
322
|
-
workflow = self._original_create_workflow(
|
|
323
|
-
pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
|
|
324
|
-
)
|
|
325
|
-
# We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
|
|
326
|
-
# that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
|
|
327
|
-
# we know can be raised.
|
|
328
|
-
try:
|
|
329
|
-
functions = []
|
|
330
|
-
if pipeline_context.functions:
|
|
331
|
-
try:
|
|
332
|
-
functions = pipeline_context.functions.values()
|
|
333
|
-
except Exception as err:
|
|
334
|
-
logger.debug(
|
|
335
|
-
"Unable to retrieve project functions, not enriching workflow with mlrun",
|
|
336
|
-
error=err_to_str(err),
|
|
337
|
-
)
|
|
338
|
-
return workflow
|
|
339
|
-
|
|
340
|
-
# enrich each pipeline step with your desire k8s attribute
|
|
341
|
-
for kfp_step_template in workflow["spec"]["templates"]:
|
|
342
|
-
if kfp_step_template.get("container"):
|
|
343
|
-
for function_obj in functions:
|
|
344
|
-
# we condition within each function since the comparison between the function and
|
|
345
|
-
# the kfp pod may change depending on the attribute type.
|
|
346
|
-
_set_function_attribute_on_kfp_pod(
|
|
347
|
-
kfp_step_template,
|
|
348
|
-
function_obj,
|
|
349
|
-
"PriorityClassName",
|
|
350
|
-
"priority_class_name",
|
|
351
|
-
)
|
|
352
|
-
_enrich_kfp_pod_security_context(
|
|
353
|
-
kfp_step_template,
|
|
354
|
-
function_obj,
|
|
355
|
-
)
|
|
356
|
-
except mlrun.errors.MLRunInvalidArgumentError:
|
|
357
|
-
raise
|
|
358
|
-
except Exception as err:
|
|
359
|
-
logger.debug(
|
|
360
|
-
"Something in the enrichment of kfp pods failed", error=err_to_str(err)
|
|
361
|
-
)
|
|
362
|
-
return workflow
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
# patching function as class method
|
|
366
|
-
kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
|
|
367
|
-
kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
|
|
368
|
-
|
|
369
|
-
|
|
370
304
|
def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
|
|
371
305
|
project_instance, name, tag, hash_key = parse_versioned_object_uri(
|
|
372
306
|
key, project.metadata.name
|
|
@@ -457,7 +391,10 @@ class _PipelineRunStatus:
|
|
|
457
391
|
|
|
458
392
|
@property
|
|
459
393
|
def state(self):
|
|
460
|
-
if
|
|
394
|
+
if (
|
|
395
|
+
self._state
|
|
396
|
+
not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
|
|
397
|
+
):
|
|
461
398
|
self._state = self._engine.get_state(self.run_id, self.project)
|
|
462
399
|
return self._state
|
|
463
400
|
|
|
@@ -754,7 +691,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
754
691
|
err = None
|
|
755
692
|
try:
|
|
756
693
|
workflow_handler(**workflow_spec.args)
|
|
757
|
-
state =
|
|
694
|
+
state = mlrun_pipelines.common.models.RunStatuses.succeeded
|
|
758
695
|
except Exception as exc:
|
|
759
696
|
err = exc
|
|
760
697
|
logger.exception("Workflow run failed")
|
|
@@ -762,7 +699,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
762
699
|
f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
|
|
763
700
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
764
701
|
)
|
|
765
|
-
state =
|
|
702
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
766
703
|
mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
|
|
767
704
|
project.notifiers.push_pipeline_run_results(
|
|
768
705
|
pipeline_context.runs_map.values(), state=state
|
|
@@ -921,9 +858,9 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
921
858
|
f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
|
|
922
859
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
923
860
|
)
|
|
924
|
-
state =
|
|
861
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
925
862
|
else:
|
|
926
|
-
state =
|
|
863
|
+
state = mlrun_pipelines.common.models.RunStatuses.succeeded
|
|
927
864
|
project.notifiers.push_pipeline_start_message(
|
|
928
865
|
project.metadata.name,
|
|
929
866
|
)
|
|
@@ -1116,7 +1053,7 @@ def load_and_run(
|
|
|
1116
1053
|
context.log_result(key="workflow_id", value=run.run_id)
|
|
1117
1054
|
context.log_result(key="engine", value=run._engine.engine, commit=True)
|
|
1118
1055
|
|
|
1119
|
-
if run.state ==
|
|
1056
|
+
if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
|
1120
1057
|
raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
|
|
1121
1058
|
|
|
1122
1059
|
if wait_for_completion:
|
|
@@ -1131,7 +1068,7 @@ def load_and_run(
|
|
|
1131
1068
|
|
|
1132
1069
|
pipeline_state, _, _ = project.get_run_status(run)
|
|
1133
1070
|
context.log_result(key="workflow_state", value=pipeline_state, commit=True)
|
|
1134
|
-
if pipeline_state !=
|
|
1071
|
+
if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1135
1072
|
raise RuntimeError(
|
|
1136
1073
|
f"Workflow {workflow_log_message} failed, state={pipeline_state}"
|
|
1137
1074
|
)
|
mlrun/projects/project.py
CHANGED
|
@@ -31,12 +31,15 @@ from typing import Callable, Optional, Union
|
|
|
31
31
|
import dotenv
|
|
32
32
|
import git
|
|
33
33
|
import git.exc
|
|
34
|
-
import
|
|
34
|
+
import mlrun_pipelines.common.models
|
|
35
|
+
import mlrun_pipelines.mounts
|
|
35
36
|
import nuclio.utils
|
|
36
37
|
import requests
|
|
37
38
|
import yaml
|
|
39
|
+
from mlrun_pipelines.models import PipelineNodeWrapper
|
|
38
40
|
|
|
39
41
|
import mlrun.common.helpers
|
|
42
|
+
import mlrun.common.schemas.artifact
|
|
40
43
|
import mlrun.common.schemas.model_monitoring.constants as mm_constants
|
|
41
44
|
import mlrun.db
|
|
42
45
|
import mlrun.errors
|
|
@@ -48,7 +51,8 @@ import mlrun.runtimes.pod
|
|
|
48
51
|
import mlrun.runtimes.utils
|
|
49
52
|
import mlrun.serving
|
|
50
53
|
import mlrun.utils.regex
|
|
51
|
-
from mlrun.
|
|
54
|
+
from mlrun.alerts.alert import AlertConfig
|
|
55
|
+
from mlrun.common.schemas.alert import AlertTemplate
|
|
52
56
|
from mlrun.datastore.datastore_profile import DatastoreProfile, DatastoreProfile2Json
|
|
53
57
|
from mlrun.runtimes.nuclio.function import RemoteRuntime
|
|
54
58
|
|
|
@@ -1586,6 +1590,23 @@ class MlrunProject(ModelObj):
|
|
|
1586
1590
|
)
|
|
1587
1591
|
return item
|
|
1588
1592
|
|
|
1593
|
+
def delete_artifact(
|
|
1594
|
+
self,
|
|
1595
|
+
item: Artifact,
|
|
1596
|
+
deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
|
|
1597
|
+
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
1598
|
+
),
|
|
1599
|
+
secrets: dict = None,
|
|
1600
|
+
):
|
|
1601
|
+
"""Delete an artifact object in the DB and optionally delete the artifact data
|
|
1602
|
+
|
|
1603
|
+
:param item: Artifact object (can be any type, such as dataset, model, feature store).
|
|
1604
|
+
:param deletion_strategy: The artifact deletion strategy types.
|
|
1605
|
+
:param secrets: Credentials needed to access the artifact data.
|
|
1606
|
+
"""
|
|
1607
|
+
am = self._get_artifact_manager()
|
|
1608
|
+
am.delete_artifact(item, deletion_strategy, secrets)
|
|
1609
|
+
|
|
1589
1610
|
def log_dataset(
|
|
1590
1611
|
self,
|
|
1591
1612
|
key,
|
|
@@ -2868,7 +2889,7 @@ class MlrunProject(ModelObj):
|
|
|
2868
2889
|
(which will be converted to the class using its `from_crontab` constructor),
|
|
2869
2890
|
see this link for help:
|
|
2870
2891
|
https://apscheduler.readthedocs.io/en/3.x/modules/triggers/cron.html#module-apscheduler.triggers.cron
|
|
2871
|
-
|
|
2892
|
+
For using the pre-defined workflow's schedule, set `schedule=True`
|
|
2872
2893
|
:param timeout: Timeout in seconds to wait for pipeline completion (watch will be activated)
|
|
2873
2894
|
:param source: Source to use instead of the actual `project.spec.source` (used when engine is remote).
|
|
2874
2895
|
Can be a one of:
|
|
@@ -2879,10 +2900,11 @@ class MlrunProject(ModelObj):
|
|
|
2879
2900
|
For other engines the source is used to validate that the code is up-to-date.
|
|
2880
2901
|
:param cleanup_ttl:
|
|
2881
2902
|
Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
|
|
2882
|
-
|
|
2903
|
+
workflow and all its resources are deleted)
|
|
2883
2904
|
:param notifications:
|
|
2884
2905
|
List of notifications to send for workflow completion
|
|
2885
|
-
|
|
2906
|
+
|
|
2907
|
+
:returns: ~py:class:`~mlrun.projects.pipelines._PipelineRunStatus` instance
|
|
2886
2908
|
"""
|
|
2887
2909
|
|
|
2888
2910
|
arguments = arguments or {}
|
|
@@ -2940,8 +2962,12 @@ class MlrunProject(ModelObj):
|
|
|
2940
2962
|
engine = "remote"
|
|
2941
2963
|
# The default engine is kfp if not given:
|
|
2942
2964
|
workflow_engine = get_workflow_engine(engine or workflow_spec.engine, local)
|
|
2943
|
-
if not inner_engine and engine == "remote":
|
|
2944
|
-
|
|
2965
|
+
if not inner_engine and workflow_engine.engine == "remote":
|
|
2966
|
+
# if inner engine is set to remote, assume kfp as the default inner engine with remote as the runner
|
|
2967
|
+
engine_kind = (
|
|
2968
|
+
workflow_spec.engine if workflow_spec.engine != "remote" else "kfp"
|
|
2969
|
+
)
|
|
2970
|
+
inner_engine = get_workflow_engine(engine_kind, local).engine
|
|
2945
2971
|
workflow_spec.engine = inner_engine or workflow_engine.engine
|
|
2946
2972
|
|
|
2947
2973
|
run = workflow_engine.run(
|
|
@@ -2956,7 +2982,7 @@ class MlrunProject(ModelObj):
|
|
|
2956
2982
|
notifications=notifications,
|
|
2957
2983
|
)
|
|
2958
2984
|
# run is None when scheduling
|
|
2959
|
-
if run and run.state ==
|
|
2985
|
+
if run and run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
|
2960
2986
|
return run
|
|
2961
2987
|
if not workflow_spec.schedule:
|
|
2962
2988
|
# Failure and schedule messages already logged
|
|
@@ -2969,7 +2995,7 @@ class MlrunProject(ModelObj):
|
|
|
2969
2995
|
# run's engine gets replaced with inner engine if engine is remote,
|
|
2970
2996
|
# so in that case we need to get the status from the remote engine manually
|
|
2971
2997
|
# TODO: support watch for remote:local
|
|
2972
|
-
if engine == "remote" and status_engine.engine != "local":
|
|
2998
|
+
if workflow_engine.engine == "remote" and status_engine.engine != "local":
|
|
2973
2999
|
status_engine = _RemoteRunner
|
|
2974
3000
|
|
|
2975
3001
|
status_engine.get_run_status(project=self, run=run, timeout=timeout)
|
|
@@ -3133,7 +3159,7 @@ class MlrunProject(ModelObj):
|
|
|
3133
3159
|
notifications: list[mlrun.model.Notification] = None,
|
|
3134
3160
|
returns: Optional[list[Union[str, dict[str, str]]]] = None,
|
|
3135
3161
|
builder_env: Optional[dict] = None,
|
|
3136
|
-
) -> typing.Union[mlrun.model.RunObject,
|
|
3162
|
+
) -> typing.Union[mlrun.model.RunObject, PipelineNodeWrapper]:
|
|
3137
3163
|
"""Run a local or remote task as part of a local/kubeflow pipeline
|
|
3138
3164
|
|
|
3139
3165
|
example (use with project)::
|
|
@@ -3189,7 +3215,7 @@ class MlrunProject(ModelObj):
|
|
|
3189
3215
|
artifact type can be given there. The artifact key must appear in the dictionary as
|
|
3190
3216
|
"key": "the_key".
|
|
3191
3217
|
:param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
|
|
3192
|
-
:return: MLRun RunObject or
|
|
3218
|
+
:return: MLRun RunObject or PipelineNodeWrapper
|
|
3193
3219
|
"""
|
|
3194
3220
|
return run_function(
|
|
3195
3221
|
function,
|
|
@@ -3232,7 +3258,7 @@ class MlrunProject(ModelObj):
|
|
|
3232
3258
|
requirements_file: str = None,
|
|
3233
3259
|
extra_args: str = None,
|
|
3234
3260
|
force_build: bool = False,
|
|
3235
|
-
) -> typing.Union[BuildStatus,
|
|
3261
|
+
) -> typing.Union[BuildStatus, PipelineNodeWrapper]:
|
|
3236
3262
|
"""deploy ML function, build container with its dependencies
|
|
3237
3263
|
|
|
3238
3264
|
:param function: name of the function (in the project) or function object
|
|
@@ -3353,7 +3379,7 @@ class MlrunProject(ModelObj):
|
|
|
3353
3379
|
requirements_file: str = None,
|
|
3354
3380
|
extra_args: str = None,
|
|
3355
3381
|
target_dir: str = None,
|
|
3356
|
-
) -> typing.Union[BuildStatus,
|
|
3382
|
+
) -> typing.Union[BuildStatus, PipelineNodeWrapper]:
|
|
3357
3383
|
"""Builder docker image for the project, based on the project's build config. Parameters allow to override
|
|
3358
3384
|
the build config.
|
|
3359
3385
|
If the project has a source configured and pull_at_runtime is not configured, this source will be cloned to the
|
|
@@ -3464,7 +3490,7 @@ class MlrunProject(ModelObj):
|
|
|
3464
3490
|
verbose: bool = None,
|
|
3465
3491
|
builder_env: dict = None,
|
|
3466
3492
|
mock: bool = None,
|
|
3467
|
-
) -> typing.Union[DeployStatus,
|
|
3493
|
+
) -> typing.Union[DeployStatus, PipelineNodeWrapper]:
|
|
3468
3494
|
"""deploy real-time (nuclio based) functions
|
|
3469
3495
|
|
|
3470
3496
|
:param function: name of the function (in the project) or function object
|
|
@@ -3876,7 +3902,9 @@ class MlrunProject(ModelObj):
|
|
|
3876
3902
|
|
|
3877
3903
|
mlrun.db.get_run_db().delete_api_gateway(name=name, project=self.name)
|
|
3878
3904
|
|
|
3879
|
-
def store_alert_config(
|
|
3905
|
+
def store_alert_config(
|
|
3906
|
+
self, alert_data: AlertConfig, alert_name=None
|
|
3907
|
+
) -> AlertConfig:
|
|
3880
3908
|
"""
|
|
3881
3909
|
Create/modify an alert.
|
|
3882
3910
|
:param alert_data: The data of the alert.
|
|
@@ -3886,7 +3914,7 @@ class MlrunProject(ModelObj):
|
|
|
3886
3914
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3887
3915
|
if alert_name is None:
|
|
3888
3916
|
alert_name = alert_data.name
|
|
3889
|
-
return db.store_alert_config(alert_name, alert_data
|
|
3917
|
+
return db.store_alert_config(alert_name, alert_data, project=self.metadata.name)
|
|
3890
3918
|
|
|
3891
3919
|
def get_alert_config(self, alert_name: str) -> AlertConfig:
|
|
3892
3920
|
"""
|
|
@@ -3897,7 +3925,7 @@ class MlrunProject(ModelObj):
|
|
|
3897
3925
|
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3898
3926
|
return db.get_alert_config(alert_name, self.metadata.name)
|
|
3899
3927
|
|
|
3900
|
-
def list_alerts_configs(self):
|
|
3928
|
+
def list_alerts_configs(self) -> list[AlertConfig]:
|
|
3901
3929
|
"""
|
|
3902
3930
|
Retrieve list of alerts of a project.
|
|
3903
3931
|
:return: All the alerts objects of the project.
|
|
@@ -3943,6 +3971,23 @@ class MlrunProject(ModelObj):
|
|
|
3943
3971
|
alert_name = alert_data.name
|
|
3944
3972
|
db.reset_alert_config(alert_name, self.metadata.name)
|
|
3945
3973
|
|
|
3974
|
+
def get_alert_template(self, template_name: str) -> AlertTemplate:
|
|
3975
|
+
"""
|
|
3976
|
+
Retrieve a specific alert template.
|
|
3977
|
+
:param template_name: The name of the template to retrieve.
|
|
3978
|
+
:return: The template object.
|
|
3979
|
+
"""
|
|
3980
|
+
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3981
|
+
return db.get_alert_template(template_name)
|
|
3982
|
+
|
|
3983
|
+
def list_alert_templates(self) -> list[AlertTemplate]:
|
|
3984
|
+
"""
|
|
3985
|
+
Retrieve list of all alert templates.
|
|
3986
|
+
:return: All the alert template objects in the database.
|
|
3987
|
+
"""
|
|
3988
|
+
db = mlrun.db.get_run_db(secrets=self._secrets)
|
|
3989
|
+
return db.list_alert_templates()
|
|
3990
|
+
|
|
3946
3991
|
def _run_authenticated_git_action(
|
|
3947
3992
|
self,
|
|
3948
3993
|
action: Callable,
|
mlrun/render.py
CHANGED
|
@@ -396,12 +396,18 @@ def runs_to_html(
|
|
|
396
396
|
df.drop("labels", axis=1, inplace=True)
|
|
397
397
|
df.drop("inputs", axis=1, inplace=True)
|
|
398
398
|
df.drop("artifacts", axis=1, inplace=True)
|
|
399
|
+
df.drop("artifact_uris", axis=1, inplace=True)
|
|
399
400
|
else:
|
|
400
401
|
df["labels"] = df["labels"].apply(dict_html)
|
|
401
402
|
df["inputs"] = df["inputs"].apply(inputs_html)
|
|
402
|
-
df["
|
|
403
|
-
|
|
404
|
-
|
|
403
|
+
if df["artifact_uris"][0]:
|
|
404
|
+
df["artifact_uris"] = df["artifact_uris"].apply(dict_html)
|
|
405
|
+
df.drop("artifacts", axis=1, inplace=True)
|
|
406
|
+
else:
|
|
407
|
+
df["artifacts"] = df["artifacts"].apply(
|
|
408
|
+
lambda artifacts: artifacts_html(artifacts, "target_path"),
|
|
409
|
+
)
|
|
410
|
+
df.drop("artifact_uris", axis=1, inplace=True)
|
|
405
411
|
|
|
406
412
|
def expand_error(x):
|
|
407
413
|
if x["state"] == "error":
|