mlrun 1.6.2rc6__py3-none-any.whl → 1.6.3rc3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/model.py +28 -22
- mlrun/common/db/sql_session.py +3 -0
- mlrun/common/model_monitoring/helpers.py +4 -2
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/common.py +40 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +21 -5
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +51 -20
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/azure_blob.py +9 -9
- mlrun/datastore/base.py +22 -44
- mlrun/datastore/google_cloud_storage.py +6 -6
- mlrun/datastore/v3io.py +70 -46
- mlrun/db/base.py +18 -0
- mlrun/db/httpdb.py +41 -36
- mlrun/execution.py +3 -3
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
- mlrun/frameworks/tf_keras/model_handler.py +7 -7
- mlrun/k8s_utils.py +10 -5
- mlrun/kfpops.py +19 -10
- mlrun/model.py +6 -0
- mlrun/model_monitoring/api.py +8 -8
- mlrun/model_monitoring/batch.py +1 -1
- mlrun/model_monitoring/controller.py +0 -7
- mlrun/model_monitoring/features_drift_table.py +6 -0
- mlrun/model_monitoring/helpers.py +4 -1
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
- mlrun/model_monitoring/stream_processing.py +50 -36
- mlrun/package/packagers/pandas_packagers.py +3 -3
- mlrun/package/utils/_archiver.py +3 -1
- mlrun/platforms/iguazio.py +6 -65
- mlrun/projects/pipelines.py +29 -12
- mlrun/projects/project.py +69 -55
- mlrun/run.py +2 -0
- mlrun/runtimes/base.py +24 -1
- mlrun/runtimes/function.py +9 -9
- mlrun/runtimes/kubejob.py +5 -3
- mlrun/runtimes/local.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +6 -6
- mlrun/runtimes/pod.py +3 -3
- mlrun/runtimes/serving.py +3 -3
- mlrun/runtimes/sparkjob/spark3job.py +3 -3
- mlrun/serving/remote.py +4 -2
- mlrun/utils/async_http.py +3 -3
- mlrun/utils/helpers.py +20 -0
- mlrun/utils/http.py +3 -3
- mlrun/utils/logger.py +2 -2
- mlrun/utils/notifications/notification_pusher.py +6 -6
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/METADATA +15 -17
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/RECORD +57 -56
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/LICENSE +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/WHEEL +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3rc3.dist-info}/top_level.txt +0 -0
mlrun/datastore/v3io.py
CHANGED
|
@@ -15,12 +15,11 @@
|
|
|
15
15
|
import mmap
|
|
16
16
|
import os
|
|
17
17
|
import time
|
|
18
|
-
from copy import deepcopy
|
|
19
18
|
from datetime import datetime
|
|
20
19
|
|
|
21
20
|
import fsspec
|
|
22
|
-
import
|
|
23
|
-
|
|
21
|
+
import v3io
|
|
22
|
+
from v3io.dataplane.response import HttpResponseError
|
|
24
23
|
|
|
25
24
|
import mlrun
|
|
26
25
|
from mlrun.datastore.helpers import ONE_GB, ONE_MB
|
|
@@ -30,11 +29,6 @@ from .base import (
|
|
|
30
29
|
DataStore,
|
|
31
30
|
FileStats,
|
|
32
31
|
basic_auth_header,
|
|
33
|
-
get_range,
|
|
34
|
-
http_get,
|
|
35
|
-
http_head,
|
|
36
|
-
http_put,
|
|
37
|
-
http_upload,
|
|
38
32
|
)
|
|
39
33
|
|
|
40
34
|
V3IO_LOCAL_ROOT = "v3io"
|
|
@@ -47,17 +41,18 @@ class V3ioStore(DataStore):
|
|
|
47
41
|
|
|
48
42
|
self.headers = None
|
|
49
43
|
self.secure = self.kind == "v3ios"
|
|
44
|
+
|
|
45
|
+
token = self._get_secret_or_env("V3IO_ACCESS_KEY")
|
|
46
|
+
username = self._get_secret_or_env("V3IO_USERNAME")
|
|
47
|
+
password = self._get_secret_or_env("V3IO_PASSWORD")
|
|
50
48
|
if self.endpoint.startswith("https://"):
|
|
51
49
|
self.endpoint = self.endpoint[len("https://") :]
|
|
52
50
|
self.secure = True
|
|
53
51
|
elif self.endpoint.startswith("http://"):
|
|
54
52
|
self.endpoint = self.endpoint[len("http://") :]
|
|
55
53
|
self.secure = False
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
username = self._get_secret_or_env("V3IO_USERNAME")
|
|
59
|
-
password = self._get_secret_or_env("V3IO_PASSWORD")
|
|
60
|
-
|
|
54
|
+
self.client = v3io.dataplane.Client(access_key=token, endpoint=self.url)
|
|
55
|
+
self.object = self.client.object
|
|
61
56
|
self.auth = None
|
|
62
57
|
self.token = token
|
|
63
58
|
if token:
|
|
@@ -65,6 +60,16 @@ class V3ioStore(DataStore):
|
|
|
65
60
|
elif username and password:
|
|
66
61
|
self.headers = basic_auth_header(username, password)
|
|
67
62
|
|
|
63
|
+
@staticmethod
|
|
64
|
+
def _do_object_request(function: callable, *args, **kwargs):
|
|
65
|
+
try:
|
|
66
|
+
return function(*args, **kwargs)
|
|
67
|
+
except HttpResponseError as http_response_error:
|
|
68
|
+
raise mlrun.errors.err_for_status_code(
|
|
69
|
+
status_code=http_response_error.status_code,
|
|
70
|
+
message=mlrun.errors.err_to_str(http_response_error),
|
|
71
|
+
)
|
|
72
|
+
|
|
68
73
|
@staticmethod
|
|
69
74
|
def uri_to_ipython(endpoint, subpath):
|
|
70
75
|
return V3IO_LOCAL_ROOT + subpath
|
|
@@ -91,13 +96,19 @@ class V3ioStore(DataStore):
|
|
|
91
96
|
|
|
92
97
|
def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
|
|
93
98
|
"""helper function for upload method, allows for controlling max_chunk_size in testing"""
|
|
99
|
+
container, path = split_path(self._join(key))
|
|
94
100
|
file_size = os.path.getsize(src_path) # in bytes
|
|
95
101
|
if file_size <= ONE_MB:
|
|
96
|
-
|
|
102
|
+
with open(src_path, "rb") as source_file:
|
|
103
|
+
data = source_file.read()
|
|
104
|
+
self._do_object_request(
|
|
105
|
+
self.object.put,
|
|
106
|
+
container=container,
|
|
107
|
+
path=path,
|
|
108
|
+
body=data,
|
|
109
|
+
append=False,
|
|
110
|
+
)
|
|
97
111
|
return
|
|
98
|
-
append_header = deepcopy(self.headers)
|
|
99
|
-
append_header["Range"] = "-1"
|
|
100
|
-
|
|
101
112
|
# chunk must be a multiple of the ALLOCATIONGRANULARITY
|
|
102
113
|
# https://docs.python.org/3/library/mmap.html
|
|
103
114
|
if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
|
|
@@ -114,11 +125,13 @@ class V3ioStore(DataStore):
|
|
|
114
125
|
access=mmap.ACCESS_READ,
|
|
115
126
|
offset=file_offset,
|
|
116
127
|
) as mmap_obj:
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
128
|
+
append = file_offset != 0
|
|
129
|
+
self._do_object_request(
|
|
130
|
+
self.object.put,
|
|
131
|
+
container=container,
|
|
132
|
+
path=path,
|
|
133
|
+
body=mmap_obj,
|
|
134
|
+
append=append,
|
|
122
135
|
)
|
|
123
136
|
file_offset += chunk_size
|
|
124
137
|
|
|
@@ -126,43 +139,55 @@ class V3ioStore(DataStore):
|
|
|
126
139
|
return self._upload(key, src_path)
|
|
127
140
|
|
|
128
141
|
def get(self, key, size=None, offset=0):
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
142
|
+
container, path = split_path(self._join(key))
|
|
143
|
+
return self._do_object_request(
|
|
144
|
+
function=self.object.get,
|
|
145
|
+
container=container,
|
|
146
|
+
path=path,
|
|
147
|
+
offset=offset,
|
|
148
|
+
num_bytes=size,
|
|
149
|
+
).body
|
|
134
150
|
|
|
135
|
-
def _put(self, key, data, max_chunk_size: int = ONE_GB):
|
|
151
|
+
def _put(self, key, data, append=False, max_chunk_size: int = ONE_GB):
|
|
136
152
|
"""helper function for put method, allows for controlling max_chunk_size in testing"""
|
|
153
|
+
container, path = split_path(self._join(key))
|
|
137
154
|
buffer_size = len(data) # in bytes
|
|
138
155
|
if buffer_size <= ONE_MB:
|
|
139
|
-
|
|
156
|
+
self._do_object_request(
|
|
157
|
+
self.object.put,
|
|
158
|
+
container=container,
|
|
159
|
+
path=path,
|
|
160
|
+
body=data,
|
|
161
|
+
append=append,
|
|
162
|
+
)
|
|
140
163
|
return
|
|
141
|
-
append_header = deepcopy(self.headers)
|
|
142
|
-
append_header["Range"] = "-1"
|
|
143
164
|
buffer_offset = 0
|
|
144
165
|
try:
|
|
145
166
|
data = memoryview(data)
|
|
146
167
|
except TypeError:
|
|
147
168
|
pass
|
|
148
169
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
170
|
+
while buffer_offset < buffer_size:
|
|
171
|
+
chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
|
|
172
|
+
append = True if buffer_offset or append else False
|
|
173
|
+
self._do_object_request(
|
|
174
|
+
self.object.put,
|
|
175
|
+
container=container,
|
|
176
|
+
path=path,
|
|
177
|
+
body=data[buffer_offset : buffer_offset + chunk_size],
|
|
178
|
+
append=append,
|
|
179
|
+
)
|
|
180
|
+
buffer_offset += chunk_size
|
|
160
181
|
|
|
161
182
|
def put(self, key, data, append=False):
|
|
162
|
-
return self._put(key, data)
|
|
183
|
+
return self._put(key, data, append)
|
|
163
184
|
|
|
164
185
|
def stat(self, key):
|
|
165
|
-
|
|
186
|
+
container, path = split_path(self._join(key))
|
|
187
|
+
response = self._do_object_request(
|
|
188
|
+
function=self.object.head, container=container, path=path
|
|
189
|
+
)
|
|
190
|
+
head = dict(response.headers)
|
|
166
191
|
size = int(head.get("Content-Length", "0"))
|
|
167
192
|
datestr = head.get("Last-Modified", "0")
|
|
168
193
|
modified = time.mktime(
|
|
@@ -171,7 +196,6 @@ class V3ioStore(DataStore):
|
|
|
171
196
|
return FileStats(size, modified)
|
|
172
197
|
|
|
173
198
|
def listdir(self, key):
|
|
174
|
-
v3io_client = v3io.dataplane.Client(endpoint=self.url, access_key=self.token)
|
|
175
199
|
container, subpath = split_path(self._join(key))
|
|
176
200
|
if not subpath.endswith("/"):
|
|
177
201
|
subpath += "/"
|
|
@@ -180,7 +204,7 @@ class V3ioStore(DataStore):
|
|
|
180
204
|
subpath_length = len(subpath) - 1
|
|
181
205
|
|
|
182
206
|
try:
|
|
183
|
-
response =
|
|
207
|
+
response = self.client.container.list(
|
|
184
208
|
container=container,
|
|
185
209
|
path=subpath,
|
|
186
210
|
get_all_attributes=False,
|
mlrun/db/base.py
CHANGED
|
@@ -677,3 +677,21 @@ class RunDBInterface(ABC):
|
|
|
677
677
|
self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
|
|
678
678
|
):
|
|
679
679
|
pass
|
|
680
|
+
|
|
681
|
+
def submit_workflow(
|
|
682
|
+
self,
|
|
683
|
+
project: str,
|
|
684
|
+
name: str,
|
|
685
|
+
workflow_spec: Union[
|
|
686
|
+
"mlrun.projects.pipelines.WorkflowSpec",
|
|
687
|
+
"mlrun.common.schemas.WorkflowSpec",
|
|
688
|
+
dict,
|
|
689
|
+
],
|
|
690
|
+
arguments: Optional[dict] = None,
|
|
691
|
+
artifact_path: Optional[str] = None,
|
|
692
|
+
source: Optional[str] = None,
|
|
693
|
+
run_name: Optional[str] = None,
|
|
694
|
+
namespace: Optional[str] = None,
|
|
695
|
+
notifications: list["mlrun.model.Notification"] = None,
|
|
696
|
+
) -> "mlrun.common.schemas.WorkflowResponse":
|
|
697
|
+
pass
|
mlrun/db/httpdb.py
CHANGED
|
@@ -152,7 +152,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
152
152
|
@staticmethod
|
|
153
153
|
def get_api_path_prefix(version: str = None) -> str:
|
|
154
154
|
"""
|
|
155
|
-
:param version: API version to use, None (the default) will mean to use the default value from
|
|
155
|
+
:param version: API version to use, None (the default) will mean to use the default value from mlrun.config,
|
|
156
156
|
for un-versioned api set an empty string.
|
|
157
157
|
"""
|
|
158
158
|
if version is not None:
|
|
@@ -250,7 +250,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
250
250
|
|
|
251
251
|
try:
|
|
252
252
|
response = self.session.request(
|
|
253
|
-
method,
|
|
253
|
+
method,
|
|
254
|
+
url,
|
|
255
|
+
timeout=timeout,
|
|
256
|
+
verify=config.httpdb.http.verify,
|
|
257
|
+
**kw,
|
|
254
258
|
)
|
|
255
259
|
except requests.RequestException as exc:
|
|
256
260
|
error = f"{err_to_str(exc)}: {error}" if error else err_to_str(exc)
|
|
@@ -302,11 +306,11 @@ class HTTPRunDB(RunDBInterface):
|
|
|
302
306
|
|
|
303
307
|
def connect(self, secrets=None):
|
|
304
308
|
"""Connect to the MLRun API server. Must be called prior to executing any other method.
|
|
305
|
-
The code utilizes the URL for the API server from the configuration - ``
|
|
309
|
+
The code utilizes the URL for the API server from the configuration - ``config.dbpath``.
|
|
306
310
|
|
|
307
311
|
For example::
|
|
308
312
|
|
|
309
|
-
|
|
313
|
+
config.dbpath = config.dbpath or 'http://mlrun-api:8080'
|
|
310
314
|
db = get_run_db().connect()
|
|
311
315
|
"""
|
|
312
316
|
# hack to allow unit tests to instantiate HTTPRunDB without a real server behind
|
|
@@ -500,7 +504,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
500
504
|
if offset < 0:
|
|
501
505
|
raise MLRunInvalidArgumentError("Offset cannot be negative")
|
|
502
506
|
if size is None:
|
|
503
|
-
size = int(
|
|
507
|
+
size = int(config.httpdb.logs.pull_logs_default_size_limit)
|
|
504
508
|
elif size == -1:
|
|
505
509
|
logger.warning(
|
|
506
510
|
"Retrieving all logs. This may be inefficient and can result in a large log."
|
|
@@ -546,25 +550,23 @@ class HTTPRunDB(RunDBInterface):
|
|
|
546
550
|
|
|
547
551
|
state, text = self.get_log(uid, project, offset=offset)
|
|
548
552
|
if text:
|
|
549
|
-
print(text.decode(errors=
|
|
553
|
+
print(text.decode(errors=config.httpdb.logs.decode.errors))
|
|
550
554
|
nil_resp = 0
|
|
551
555
|
while True:
|
|
552
556
|
offset += len(text)
|
|
553
557
|
# if we get 3 nil responses in a row, increase the sleep time to 10 seconds
|
|
554
558
|
# TODO: refactor this to use a conditional backoff mechanism
|
|
555
559
|
if nil_resp < 3:
|
|
556
|
-
time.sleep(int(
|
|
560
|
+
time.sleep(int(config.httpdb.logs.pull_logs_default_interval))
|
|
557
561
|
else:
|
|
558
562
|
time.sleep(
|
|
559
|
-
int(
|
|
560
|
-
mlrun.mlconf.httpdb.logs.pull_logs_backoff_no_logs_default_interval
|
|
561
|
-
)
|
|
563
|
+
int(config.httpdb.logs.pull_logs_backoff_no_logs_default_interval)
|
|
562
564
|
)
|
|
563
565
|
state, text = self.get_log(uid, project, offset=offset)
|
|
564
566
|
if text:
|
|
565
567
|
nil_resp = 0
|
|
566
568
|
print(
|
|
567
|
-
text.decode(errors=
|
|
569
|
+
text.decode(errors=config.httpdb.logs.decode.errors),
|
|
568
570
|
end="",
|
|
569
571
|
)
|
|
570
572
|
else:
|
|
@@ -1135,17 +1137,17 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1135
1137
|
structured_dict = {}
|
|
1136
1138
|
for project, job_runtime_resources_map in response.json().items():
|
|
1137
1139
|
for job_id, runtime_resources in job_runtime_resources_map.items():
|
|
1138
|
-
structured_dict.setdefault(project, {})[
|
|
1139
|
-
|
|
1140
|
-
|
|
1140
|
+
structured_dict.setdefault(project, {})[job_id] = (
|
|
1141
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1142
|
+
)
|
|
1141
1143
|
return structured_dict
|
|
1142
1144
|
elif group_by == mlrun.common.schemas.ListRuntimeResourcesGroupByField.project:
|
|
1143
1145
|
structured_dict = {}
|
|
1144
1146
|
for project, kind_runtime_resources_map in response.json().items():
|
|
1145
1147
|
for kind, runtime_resources in kind_runtime_resources_map.items():
|
|
1146
|
-
structured_dict.setdefault(project, {})[
|
|
1147
|
-
|
|
1148
|
-
|
|
1148
|
+
structured_dict.setdefault(project, {})[kind] = (
|
|
1149
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1150
|
+
)
|
|
1149
1151
|
return structured_dict
|
|
1150
1152
|
else:
|
|
1151
1153
|
raise NotImplementedError(
|
|
@@ -1173,7 +1175,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1173
1175
|
:param force: Force deletion - delete the runtime resource even if it's not in terminal state or if the grace
|
|
1174
1176
|
period didn't pass.
|
|
1175
1177
|
:param grace_period: Grace period given to the runtime resource before they are actually removed, counted from
|
|
1176
|
-
the moment they moved to terminal state
|
|
1178
|
+
the moment they moved to terminal state
|
|
1179
|
+
(defaults to mlrun.config.config.runtime_resources_deletion_grace_period).
|
|
1177
1180
|
|
|
1178
1181
|
:returns: :py:class:`~mlrun.common.schemas.GroupedByProjectRuntimeResourcesOutput` listing the runtime resources
|
|
1179
1182
|
that were removed.
|
|
@@ -1203,9 +1206,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1203
1206
|
structured_dict = {}
|
|
1204
1207
|
for project, kind_runtime_resources_map in response.json().items():
|
|
1205
1208
|
for kind, runtime_resources in kind_runtime_resources_map.items():
|
|
1206
|
-
structured_dict.setdefault(project, {})[
|
|
1207
|
-
|
|
1208
|
-
|
|
1209
|
+
structured_dict.setdefault(project, {})[kind] = (
|
|
1210
|
+
mlrun.common.schemas.RuntimeResources(**runtime_resources)
|
|
1211
|
+
)
|
|
1209
1212
|
return structured_dict
|
|
1210
1213
|
|
|
1211
1214
|
def create_schedule(
|
|
@@ -1340,7 +1343,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1340
1343
|
logger.warning(
|
|
1341
1344
|
"Building a function image to ECR and loading an S3 source to the image may require conflicting access "
|
|
1342
1345
|
"keys. Only the permissions granted to the platform's configured secret will take affect "
|
|
1343
|
-
"(see mlrun.
|
|
1346
|
+
"(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
|
|
1344
1347
|
"In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
|
|
1345
1348
|
source=func.spec.build.source,
|
|
1346
1349
|
load_source_on_run=func.spec.build.load_source_on_run,
|
|
@@ -1495,7 +1498,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1495
1498
|
Retrieve updated information on project background tasks being executed.
|
|
1496
1499
|
If no filter is provided, will return background tasks from the last week.
|
|
1497
1500
|
|
|
1498
|
-
:param project: Project name (defaults to mlrun.
|
|
1501
|
+
:param project: Project name (defaults to mlrun.config.config.default_project).
|
|
1499
1502
|
:param state: List only background tasks whose state is specified.
|
|
1500
1503
|
:param created_from: Filter by background task created time in ``[created_from, created_to]``.
|
|
1501
1504
|
:param created_to: Filter by background task created time in ``[created_from, created_to]``.
|
|
@@ -1608,19 +1611,21 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1608
1611
|
artifact_path=None,
|
|
1609
1612
|
ops=None,
|
|
1610
1613
|
cleanup_ttl=None,
|
|
1614
|
+
timeout=60,
|
|
1611
1615
|
):
|
|
1612
1616
|
"""Submit a KFP pipeline for execution.
|
|
1613
1617
|
|
|
1614
|
-
:param project:
|
|
1615
|
-
:param pipeline:
|
|
1616
|
-
:param arguments:
|
|
1617
|
-
:param experiment:
|
|
1618
|
-
:param run:
|
|
1619
|
-
:param namespace:
|
|
1620
|
-
:param artifact_path:
|
|
1621
|
-
:param ops:
|
|
1622
|
-
:param cleanup_ttl:
|
|
1623
|
-
|
|
1618
|
+
:param project: The project of the pipeline
|
|
1619
|
+
:param pipeline: Pipeline function or path to .yaml/.zip pipeline file.
|
|
1620
|
+
:param arguments: A dictionary of arguments to pass to the pipeline.
|
|
1621
|
+
:param experiment: A name to assign for the specific experiment.
|
|
1622
|
+
:param run: A name for this specific run.
|
|
1623
|
+
:param namespace: Kubernetes namespace to execute the pipeline in.
|
|
1624
|
+
:param artifact_path: A path to artifacts used by this pipeline.
|
|
1625
|
+
:param ops: Transformers to apply on all ops in the pipeline.
|
|
1626
|
+
:param cleanup_ttl: Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
|
|
1627
|
+
workflow and all its resources are deleted)
|
|
1628
|
+
:param timeout: Timeout for the API call.
|
|
1624
1629
|
"""
|
|
1625
1630
|
|
|
1626
1631
|
if isinstance(pipeline, str):
|
|
@@ -1662,7 +1667,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
1662
1667
|
"POST",
|
|
1663
1668
|
f"projects/{project}/pipelines",
|
|
1664
1669
|
params=params,
|
|
1665
|
-
timeout=
|
|
1670
|
+
timeout=timeout,
|
|
1666
1671
|
body=data,
|
|
1667
1672
|
headers=headers,
|
|
1668
1673
|
)
|
|
@@ -3450,8 +3455,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3450
3455
|
source: Optional[str] = None,
|
|
3451
3456
|
run_name: Optional[str] = None,
|
|
3452
3457
|
namespace: Optional[str] = None,
|
|
3453
|
-
notifications:
|
|
3454
|
-
):
|
|
3458
|
+
notifications: list[mlrun.model.Notification] = None,
|
|
3459
|
+
) -> mlrun.common.schemas.WorkflowResponse:
|
|
3455
3460
|
"""
|
|
3456
3461
|
Submitting workflow for a remote execution.
|
|
3457
3462
|
|
mlrun/execution.py
CHANGED
|
@@ -559,9 +559,9 @@ class MLClientCtx(object):
|
|
|
559
559
|
for k, v in get_in(task, ["status", "results"], {}).items():
|
|
560
560
|
self._results[k] = v
|
|
561
561
|
for artifact in get_in(task, ["status", run_keys.artifacts], []):
|
|
562
|
-
self._artifacts_manager.artifacts[
|
|
563
|
-
artifact
|
|
564
|
-
|
|
562
|
+
self._artifacts_manager.artifacts[artifact["metadata"]["key"]] = (
|
|
563
|
+
artifact
|
|
564
|
+
)
|
|
565
565
|
self._artifacts_manager.link_artifact(
|
|
566
566
|
self.project,
|
|
567
567
|
self.name,
|
|
@@ -389,9 +389,9 @@ class LoggingCallback(Callback):
|
|
|
389
389
|
):
|
|
390
390
|
try:
|
|
391
391
|
self._get_hyperparameter(key_chain=learning_rate_key_chain)
|
|
392
|
-
self._dynamic_hyperparameters_keys[
|
|
393
|
-
|
|
394
|
-
|
|
392
|
+
self._dynamic_hyperparameters_keys[learning_rate_key] = (
|
|
393
|
+
learning_rate_key_chain
|
|
394
|
+
)
|
|
395
395
|
except (KeyError, IndexError, ValueError):
|
|
396
396
|
pass
|
|
397
397
|
|
|
@@ -263,13 +263,13 @@ class TFKerasModelHandler(DLModelHandler):
|
|
|
263
263
|
# Update the paths and log artifacts if context is available:
|
|
264
264
|
if self._weights_file is not None:
|
|
265
265
|
if self._context is not None:
|
|
266
|
-
artifacts[
|
|
267
|
-
self.
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
266
|
+
artifacts[self._get_weights_file_artifact_name()] = (
|
|
267
|
+
self._context.log_artifact(
|
|
268
|
+
self._weights_file,
|
|
269
|
+
local_path=self._weights_file,
|
|
270
|
+
artifact_path=output_path,
|
|
271
|
+
db_key=False,
|
|
272
|
+
)
|
|
273
273
|
)
|
|
274
274
|
|
|
275
275
|
return artifacts if self._context is not None else None
|
mlrun/k8s_utils.py
CHANGED
|
@@ -134,13 +134,13 @@ def sanitize_label_value(value: str) -> str:
|
|
|
134
134
|
return re.sub(r"([^a-zA-Z0-9_.-]|^[^a-zA-Z0-9]|[^a-zA-Z0-9]$)", "-", value[:63])
|
|
135
135
|
|
|
136
136
|
|
|
137
|
-
def verify_label_key(key):
|
|
137
|
+
def verify_label_key(key: str):
|
|
138
|
+
"""
|
|
139
|
+
Verify that the label key is valid for Kubernetes.
|
|
140
|
+
Refer to https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#syntax-and-character-set
|
|
141
|
+
"""
|
|
138
142
|
if not key:
|
|
139
143
|
raise mlrun.errors.MLRunInvalidArgumentError("label key cannot be empty")
|
|
140
|
-
if key.startswith("k8s.io") or key.startswith("kubernetes.io"):
|
|
141
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
142
|
-
"Labels cannot start with 'k8s.io' or 'kubernetes.io'"
|
|
143
|
-
)
|
|
144
144
|
|
|
145
145
|
mlrun.utils.helpers.verify_field_regex(
|
|
146
146
|
f"project.metadata.labels.'{key}'",
|
|
@@ -148,6 +148,11 @@ def verify_label_key(key):
|
|
|
148
148
|
mlrun.utils.regex.k8s_character_limit,
|
|
149
149
|
)
|
|
150
150
|
|
|
151
|
+
if key.startswith("k8s.io/") or key.startswith("kubernetes.io/"):
|
|
152
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
153
|
+
"Labels cannot start with 'k8s.io/' or 'kubernetes.io/'"
|
|
154
|
+
)
|
|
155
|
+
|
|
151
156
|
parts = key.split("/")
|
|
152
157
|
if len(parts) == 1:
|
|
153
158
|
name = parts[0]
|
mlrun/kfpops.py
CHANGED
|
@@ -41,8 +41,8 @@ from .utils import (
|
|
|
41
41
|
|
|
42
42
|
# default KFP artifacts and output (ui metadata, metrics etc.)
|
|
43
43
|
# directories to /tmp to allow running with security context
|
|
44
|
-
KFPMETA_DIR =
|
|
45
|
-
KFP_ARTIFACTS_DIR =
|
|
44
|
+
KFPMETA_DIR = "/tmp"
|
|
45
|
+
KFP_ARTIFACTS_DIR = "/tmp"
|
|
46
46
|
|
|
47
47
|
project_annotation = "mlrun/project"
|
|
48
48
|
run_annotation = "mlrun/pipeline-step-type"
|
|
@@ -71,7 +71,7 @@ def write_kfpmeta(struct):
|
|
|
71
71
|
{"name": k, "numberValue": v} for k, v in results.items() if is_num(v)
|
|
72
72
|
],
|
|
73
73
|
}
|
|
74
|
-
with open(KFPMETA_DIR
|
|
74
|
+
with open(os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"), "w") as f:
|
|
75
75
|
json.dump(metrics, f)
|
|
76
76
|
|
|
77
77
|
struct = deepcopy(struct)
|
|
@@ -91,7 +91,14 @@ def write_kfpmeta(struct):
|
|
|
91
91
|
elif key in results:
|
|
92
92
|
val = results[key]
|
|
93
93
|
try:
|
|
94
|
-
|
|
94
|
+
# NOTE: if key has "../x", it would fail on path traversal
|
|
95
|
+
path = os.path.join(KFP_ARTIFACTS_DIR, key)
|
|
96
|
+
if not mlrun.utils.helpers.is_safe_path(KFP_ARTIFACTS_DIR, path):
|
|
97
|
+
logger.warning(
|
|
98
|
+
"Path traversal is not allowed ignoring", path=path, key=key
|
|
99
|
+
)
|
|
100
|
+
continue
|
|
101
|
+
path = os.path.abspath(path)
|
|
95
102
|
logger.info("Writing artifact output", path=path, val=val)
|
|
96
103
|
with open(path, "w") as fp:
|
|
97
104
|
fp.write(str(val))
|
|
@@ -109,7 +116,7 @@ def write_kfpmeta(struct):
|
|
|
109
116
|
"outputs": output_artifacts
|
|
110
117
|
+ [{"type": "markdown", "storage": "inline", "source": text}]
|
|
111
118
|
}
|
|
112
|
-
with open(KFPMETA_DIR
|
|
119
|
+
with open(os.path.join(KFPMETA_DIR, "mlpipeline-ui-metadata.json"), "w") as f:
|
|
113
120
|
json.dump(metadata, f)
|
|
114
121
|
|
|
115
122
|
|
|
@@ -401,9 +408,9 @@ def mlrun_op(
|
|
|
401
408
|
cmd += ["--label", f"{label}={val}"]
|
|
402
409
|
for output in outputs:
|
|
403
410
|
cmd += ["-o", str(output)]
|
|
404
|
-
file_outputs[
|
|
405
|
-
output
|
|
406
|
-
|
|
411
|
+
file_outputs[output.replace(".", "_")] = (
|
|
412
|
+
f"/tmp/{output}" # not using path.join to avoid windows "\"
|
|
413
|
+
)
|
|
407
414
|
if project:
|
|
408
415
|
cmd += ["--project", project]
|
|
409
416
|
if handler:
|
|
@@ -450,8 +457,10 @@ def mlrun_op(
|
|
|
450
457
|
command=cmd + [command],
|
|
451
458
|
file_outputs=file_outputs,
|
|
452
459
|
output_artifact_paths={
|
|
453
|
-
"mlpipeline-ui-metadata":
|
|
454
|
-
|
|
460
|
+
"mlpipeline-ui-metadata": os.path.join(
|
|
461
|
+
KFPMETA_DIR, "mlpipeline-ui-metadata.json"
|
|
462
|
+
),
|
|
463
|
+
"mlpipeline-metrics": os.path.join(KFPMETA_DIR, "mlpipeline-metrics.json"),
|
|
455
464
|
},
|
|
456
465
|
)
|
|
457
466
|
cop = add_default_function_resources(cop)
|
mlrun/model.py
CHANGED
|
@@ -62,6 +62,7 @@ class ModelObj:
|
|
|
62
62
|
return new_type.from_dict(param)
|
|
63
63
|
return param
|
|
64
64
|
|
|
65
|
+
@mlrun.utils.filter_warnings("ignore", FutureWarning)
|
|
65
66
|
def to_dict(self, fields=None, exclude=None):
|
|
66
67
|
"""convert the object to a python dictionary
|
|
67
68
|
|
|
@@ -359,6 +360,7 @@ class ImageBuilder(ModelObj):
|
|
|
359
360
|
requirements: list = None,
|
|
360
361
|
extra_args=None,
|
|
361
362
|
builder_env=None,
|
|
363
|
+
source_code_target_dir=None,
|
|
362
364
|
):
|
|
363
365
|
self.functionSourceCode = functionSourceCode #: functionSourceCode
|
|
364
366
|
self.codeEntryType = "" #: codeEntryType
|
|
@@ -379,6 +381,7 @@ class ImageBuilder(ModelObj):
|
|
|
379
381
|
self.auto_build = auto_build #: auto_build
|
|
380
382
|
self.build_pod = None
|
|
381
383
|
self.requirements = requirements or [] #: pip requirements
|
|
384
|
+
self.source_code_target_dir = source_code_target_dir or None
|
|
382
385
|
|
|
383
386
|
@property
|
|
384
387
|
def source(self):
|
|
@@ -415,6 +418,7 @@ class ImageBuilder(ModelObj):
|
|
|
415
418
|
overwrite=False,
|
|
416
419
|
builder_env=None,
|
|
417
420
|
extra_args=None,
|
|
421
|
+
source_code_target_dir=None,
|
|
418
422
|
):
|
|
419
423
|
if image:
|
|
420
424
|
self.image = image
|
|
@@ -440,6 +444,8 @@ class ImageBuilder(ModelObj):
|
|
|
440
444
|
self.builder_env = builder_env
|
|
441
445
|
if extra_args:
|
|
442
446
|
self.extra_args = extra_args
|
|
447
|
+
if source_code_target_dir:
|
|
448
|
+
self.source_code_target_dir = source_code_target_dir
|
|
443
449
|
|
|
444
450
|
def with_commands(
|
|
445
451
|
self,
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -436,9 +436,9 @@ def _generate_model_endpoint(
|
|
|
436
436
|
] = possible_drift_threshold
|
|
437
437
|
|
|
438
438
|
model_endpoint.spec.monitoring_mode = monitoring_mode
|
|
439
|
-
model_endpoint.status.first_request = (
|
|
440
|
-
|
|
441
|
-
)
|
|
439
|
+
model_endpoint.status.first_request = model_endpoint.status.last_request = (
|
|
440
|
+
datetime_now().isoformat()
|
|
441
|
+
)
|
|
442
442
|
if sample_set_statistics:
|
|
443
443
|
model_endpoint.status.feature_stats = sample_set_statistics
|
|
444
444
|
|
|
@@ -476,11 +476,11 @@ def trigger_drift_batch_job(
|
|
|
476
476
|
db_session = mlrun.get_run_db()
|
|
477
477
|
|
|
478
478
|
# Register the monitoring batch job (do nothing if already exist) and get the job function as a dictionary
|
|
479
|
-
batch_function_dict: typing.Dict[
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
479
|
+
batch_function_dict: typing.Dict[str, typing.Any] = (
|
|
480
|
+
db_session.deploy_monitoring_batch_job(
|
|
481
|
+
project=project,
|
|
482
|
+
default_batch_image=default_batch_image,
|
|
483
|
+
)
|
|
484
484
|
)
|
|
485
485
|
|
|
486
486
|
# Prepare current run params
|
mlrun/model_monitoring/batch.py
CHANGED
|
@@ -426,13 +426,6 @@ class MonitoringApplicationController:
|
|
|
426
426
|
m_fs = fstore.get_feature_set(
|
|
427
427
|
endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
|
|
428
428
|
)
|
|
429
|
-
labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
|
|
430
|
-
if labels:
|
|
431
|
-
if isinstance(labels, str):
|
|
432
|
-
labels = json.loads(labels)
|
|
433
|
-
for label in labels:
|
|
434
|
-
if label not in list(m_fs.spec.features.keys()):
|
|
435
|
-
m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
|
|
436
429
|
|
|
437
430
|
for application in applications_names:
|
|
438
431
|
batch_window = batch_window_generator.get_batch_window(
|