mlrun 1.6.2rc6__py3-none-any.whl → 1.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/model.py +28 -22
- mlrun/common/db/sql_session.py +3 -0
- mlrun/common/model_monitoring/helpers.py +4 -2
- mlrun/common/schemas/__init__.py +2 -0
- mlrun/common/schemas/common.py +40 -0
- mlrun/common/schemas/model_monitoring/__init__.py +1 -0
- mlrun/common/schemas/model_monitoring/constants.py +21 -5
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +59 -20
- mlrun/data_types/data_types.py +4 -0
- mlrun/datastore/azure_blob.py +9 -9
- mlrun/datastore/base.py +22 -44
- mlrun/datastore/google_cloud_storage.py +6 -6
- mlrun/datastore/v3io.py +74 -73
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +18 -0
- mlrun/db/httpdb.py +79 -55
- mlrun/execution.py +3 -3
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
- mlrun/frameworks/tf_keras/model_handler.py +7 -7
- mlrun/k8s_utils.py +10 -5
- mlrun/kfpops.py +19 -10
- mlrun/lists.py +2 -0
- mlrun/model.py +31 -2
- mlrun/model_monitoring/api.py +8 -8
- mlrun/model_monitoring/batch.py +1 -1
- mlrun/model_monitoring/controller.py +0 -7
- mlrun/model_monitoring/features_drift_table.py +6 -0
- mlrun/model_monitoring/helpers.py +4 -1
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
- mlrun/model_monitoring/stream_processing.py +50 -37
- mlrun/package/packagers/pandas_packagers.py +3 -3
- mlrun/package/utils/_archiver.py +3 -1
- mlrun/platforms/iguazio.py +6 -65
- mlrun/projects/pipelines.py +51 -17
- mlrun/projects/project.py +77 -61
- mlrun/render.py +13 -4
- mlrun/run.py +2 -0
- mlrun/runtimes/base.py +24 -1
- mlrun/runtimes/function.py +9 -9
- mlrun/runtimes/kubejob.py +5 -3
- mlrun/runtimes/local.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +6 -6
- mlrun/runtimes/pod.py +8 -8
- mlrun/runtimes/serving.py +3 -3
- mlrun/runtimes/sparkjob/spark3job.py +3 -3
- mlrun/serving/remote.py +4 -2
- mlrun/utils/async_http.py +28 -8
- mlrun/utils/helpers.py +20 -0
- mlrun/utils/http.py +3 -3
- mlrun/utils/logger.py +11 -6
- mlrun/utils/notifications/notification_pusher.py +6 -6
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/METADATA +18 -18
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/RECORD +60 -59
- mlrun/datastore/helpers.py +0 -18
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/LICENSE +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/WHEEL +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/top_level.txt +0 -0
mlrun/datastore/v3io.py
CHANGED
|
@@ -12,32 +12,24 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import mmap
|
|
16
|
-
import os
|
|
17
15
|
import time
|
|
18
|
-
from copy import deepcopy
|
|
19
16
|
from datetime import datetime
|
|
20
17
|
|
|
21
18
|
import fsspec
|
|
22
|
-
import
|
|
23
|
-
|
|
19
|
+
import v3io
|
|
20
|
+
from v3io.dataplane.response import HttpResponseError
|
|
24
21
|
|
|
25
22
|
import mlrun
|
|
26
|
-
from mlrun.datastore.helpers import ONE_GB, ONE_MB
|
|
27
23
|
|
|
28
24
|
from ..platforms.iguazio import parse_path, split_path
|
|
29
25
|
from .base import (
|
|
30
26
|
DataStore,
|
|
31
27
|
FileStats,
|
|
32
28
|
basic_auth_header,
|
|
33
|
-
get_range,
|
|
34
|
-
http_get,
|
|
35
|
-
http_head,
|
|
36
|
-
http_put,
|
|
37
|
-
http_upload,
|
|
38
29
|
)
|
|
39
30
|
|
|
40
31
|
V3IO_LOCAL_ROOT = "v3io"
|
|
32
|
+
V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 100
|
|
41
33
|
|
|
42
34
|
|
|
43
35
|
class V3ioStore(DataStore):
|
|
@@ -47,17 +39,18 @@ class V3ioStore(DataStore):
|
|
|
47
39
|
|
|
48
40
|
self.headers = None
|
|
49
41
|
self.secure = self.kind == "v3ios"
|
|
42
|
+
|
|
43
|
+
token = self._get_secret_or_env("V3IO_ACCESS_KEY")
|
|
44
|
+
username = self._get_secret_or_env("V3IO_USERNAME")
|
|
45
|
+
password = self._get_secret_or_env("V3IO_PASSWORD")
|
|
50
46
|
if self.endpoint.startswith("https://"):
|
|
51
47
|
self.endpoint = self.endpoint[len("https://") :]
|
|
52
48
|
self.secure = True
|
|
53
49
|
elif self.endpoint.startswith("http://"):
|
|
54
50
|
self.endpoint = self.endpoint[len("http://") :]
|
|
55
51
|
self.secure = False
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
username = self._get_secret_or_env("V3IO_USERNAME")
|
|
59
|
-
password = self._get_secret_or_env("V3IO_PASSWORD")
|
|
60
|
-
|
|
52
|
+
self.client = v3io.dataplane.Client(access_key=token, endpoint=self.url)
|
|
53
|
+
self.object = self.client.object
|
|
61
54
|
self.auth = None
|
|
62
55
|
self.token = token
|
|
63
56
|
if token:
|
|
@@ -65,6 +58,16 @@ class V3ioStore(DataStore):
|
|
|
65
58
|
elif username and password:
|
|
66
59
|
self.headers = basic_auth_header(username, password)
|
|
67
60
|
|
|
61
|
+
@staticmethod
|
|
62
|
+
def _do_object_request(function: callable, *args, **kwargs):
|
|
63
|
+
try:
|
|
64
|
+
return function(*args, **kwargs)
|
|
65
|
+
except HttpResponseError as http_response_error:
|
|
66
|
+
raise mlrun.errors.err_for_status_code(
|
|
67
|
+
status_code=http_response_error.status_code,
|
|
68
|
+
message=mlrun.errors.err_to_str(http_response_error),
|
|
69
|
+
)
|
|
70
|
+
|
|
68
71
|
@staticmethod
|
|
69
72
|
def uri_to_ipython(endpoint, subpath):
|
|
70
73
|
return V3IO_LOCAL_ROOT + subpath
|
|
@@ -89,80 +92,79 @@ class V3ioStore(DataStore):
|
|
|
89
92
|
)
|
|
90
93
|
return self._sanitize_storage_options(res)
|
|
91
94
|
|
|
92
|
-
def _upload(
|
|
95
|
+
def _upload(
|
|
96
|
+
self,
|
|
97
|
+
key: str,
|
|
98
|
+
src_path: str,
|
|
99
|
+
max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
|
|
100
|
+
):
|
|
93
101
|
"""helper function for upload method, allows for controlling max_chunk_size in testing"""
|
|
94
|
-
|
|
95
|
-
if file_size <= ONE_MB:
|
|
96
|
-
http_upload(self.url + self._join(key), src_path, self.headers, None)
|
|
97
|
-
return
|
|
98
|
-
append_header = deepcopy(self.headers)
|
|
99
|
-
append_header["Range"] = "-1"
|
|
100
|
-
|
|
101
|
-
# chunk must be a multiple of the ALLOCATIONGRANULARITY
|
|
102
|
-
# https://docs.python.org/3/library/mmap.html
|
|
103
|
-
if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
|
|
104
|
-
# round down to the nearest multiple of ALLOCATIONGRANULARITY
|
|
105
|
-
max_chunk_size -= residue
|
|
106
|
-
|
|
102
|
+
container, path = split_path(self._join(key))
|
|
107
103
|
with open(src_path, "rb") as file_obj:
|
|
108
|
-
|
|
109
|
-
while
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
None,
|
|
122
|
-
)
|
|
123
|
-
file_offset += chunk_size
|
|
104
|
+
append = False
|
|
105
|
+
while True:
|
|
106
|
+
data = memoryview(file_obj.read(max_chunk_size))
|
|
107
|
+
if not data:
|
|
108
|
+
break
|
|
109
|
+
self._do_object_request(
|
|
110
|
+
self.object.put,
|
|
111
|
+
container=container,
|
|
112
|
+
path=path,
|
|
113
|
+
body=data,
|
|
114
|
+
append=append,
|
|
115
|
+
)
|
|
116
|
+
append = True
|
|
124
117
|
|
|
125
118
|
def upload(self, key, src_path):
|
|
126
119
|
return self._upload(key, src_path)
|
|
127
120
|
|
|
128
121
|
def get(self, key, size=None, offset=0):
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
122
|
+
container, path = split_path(self._join(key))
|
|
123
|
+
return self._do_object_request(
|
|
124
|
+
function=self.object.get,
|
|
125
|
+
container=container,
|
|
126
|
+
path=path,
|
|
127
|
+
offset=offset,
|
|
128
|
+
num_bytes=size,
|
|
129
|
+
).body
|
|
130
|
+
|
|
131
|
+
def _put(
|
|
132
|
+
self,
|
|
133
|
+
key,
|
|
134
|
+
data,
|
|
135
|
+
append=False,
|
|
136
|
+
max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
|
|
137
|
+
):
|
|
136
138
|
"""helper function for put method, allows for controlling max_chunk_size in testing"""
|
|
139
|
+
container, path = split_path(self._join(key))
|
|
137
140
|
buffer_size = len(data) # in bytes
|
|
138
|
-
if buffer_size <= ONE_MB:
|
|
139
|
-
http_put(self.url + self._join(key), data, self.headers, None)
|
|
140
|
-
return
|
|
141
|
-
append_header = deepcopy(self.headers)
|
|
142
|
-
append_header["Range"] = "-1"
|
|
143
141
|
buffer_offset = 0
|
|
144
142
|
try:
|
|
145
143
|
data = memoryview(data)
|
|
146
144
|
except TypeError:
|
|
147
145
|
pass
|
|
148
146
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
147
|
+
while buffer_offset < buffer_size:
|
|
148
|
+
chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
|
|
149
|
+
append = True if buffer_offset or append else False
|
|
150
|
+
self._do_object_request(
|
|
151
|
+
self.object.put,
|
|
152
|
+
container=container,
|
|
153
|
+
path=path,
|
|
154
|
+
body=data[buffer_offset : buffer_offset + chunk_size],
|
|
155
|
+
append=append,
|
|
156
|
+
)
|
|
157
|
+
buffer_offset += chunk_size
|
|
160
158
|
|
|
161
159
|
def put(self, key, data, append=False):
|
|
162
|
-
return self._put(key, data)
|
|
160
|
+
return self._put(key, data, append)
|
|
163
161
|
|
|
164
162
|
def stat(self, key):
|
|
165
|
-
|
|
163
|
+
container, path = split_path(self._join(key))
|
|
164
|
+
response = self._do_object_request(
|
|
165
|
+
function=self.object.head, container=container, path=path
|
|
166
|
+
)
|
|
167
|
+
head = dict(response.headers)
|
|
166
168
|
size = int(head.get("Content-Length", "0"))
|
|
167
169
|
datestr = head.get("Last-Modified", "0")
|
|
168
170
|
modified = time.mktime(
|
|
@@ -171,7 +173,6 @@ class V3ioStore(DataStore):
|
|
|
171
173
|
return FileStats(size, modified)
|
|
172
174
|
|
|
173
175
|
def listdir(self, key):
|
|
174
|
-
v3io_client = v3io.dataplane.Client(endpoint=self.url, access_key=self.token)
|
|
175
176
|
container, subpath = split_path(self._join(key))
|
|
176
177
|
if not subpath.endswith("/"):
|
|
177
178
|
subpath += "/"
|
|
@@ -180,7 +181,7 @@ class V3ioStore(DataStore):
|
|
|
180
181
|
subpath_length = len(subpath) - 1
|
|
181
182
|
|
|
182
183
|
try:
|
|
183
|
-
response =
|
|
184
|
+
response = self.client.container.list(
|
|
184
185
|
container=container,
|
|
185
186
|
path=subpath,
|
|
186
187
|
get_all_attributes=False,
|
mlrun/db/auth_utils.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
from abc import ABC, abstractmethod
|
|
16
|
+
from datetime import datetime, timedelta
|
|
17
|
+
|
|
18
|
+
import requests
|
|
19
|
+
|
|
20
|
+
import mlrun.errors
|
|
21
|
+
from mlrun.utils import logger
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class TokenProvider(ABC):
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def get_token(self):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
@abstractmethod
|
|
30
|
+
def is_iguazio_session(self):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class StaticTokenProvider(TokenProvider):
|
|
35
|
+
def __init__(self, token: str):
|
|
36
|
+
self.token = token
|
|
37
|
+
|
|
38
|
+
def get_token(self):
|
|
39
|
+
return self.token
|
|
40
|
+
|
|
41
|
+
def is_iguazio_session(self):
|
|
42
|
+
return mlrun.platforms.iguazio.is_iguazio_session(self.token)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class OAuthClientIDTokenProvider(TokenProvider):
|
|
46
|
+
def __init__(
|
|
47
|
+
self, token_endpoint: str, client_id: str, client_secret: str, timeout=5
|
|
48
|
+
):
|
|
49
|
+
if not token_endpoint or not client_id or not client_secret:
|
|
50
|
+
raise mlrun.errors.MLRunValueError(
|
|
51
|
+
"Invalid client_id configuration for authentication. Must provide token endpoint, client-id and secret"
|
|
52
|
+
)
|
|
53
|
+
self.token_endpoint = token_endpoint
|
|
54
|
+
self.client_id = client_id
|
|
55
|
+
self.client_secret = client_secret
|
|
56
|
+
self.timeout = timeout
|
|
57
|
+
|
|
58
|
+
# Since we're only issuing POST requests, which are actually a disguised GET, then it's ok to allow retries
|
|
59
|
+
# on them.
|
|
60
|
+
self._session = mlrun.utils.HTTPSessionWithRetry(
|
|
61
|
+
retry_on_post=True,
|
|
62
|
+
verbose=True,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
self._cleanup()
|
|
66
|
+
self._refresh_token_if_needed()
|
|
67
|
+
|
|
68
|
+
def get_token(self):
|
|
69
|
+
self._refresh_token_if_needed()
|
|
70
|
+
return self.token
|
|
71
|
+
|
|
72
|
+
def is_iguazio_session(self):
|
|
73
|
+
return False
|
|
74
|
+
|
|
75
|
+
def _cleanup(self):
|
|
76
|
+
self.token = self.token_expiry_time = self.token_refresh_time = None
|
|
77
|
+
|
|
78
|
+
def _refresh_token_if_needed(self):
|
|
79
|
+
now = datetime.now()
|
|
80
|
+
if self.token:
|
|
81
|
+
if self.token_refresh_time and now <= self.token_refresh_time:
|
|
82
|
+
return self.token
|
|
83
|
+
|
|
84
|
+
# We only cleanup if token was really expired - even if we fail in refreshing the token, we can still
|
|
85
|
+
# use the existing one given that it's not expired.
|
|
86
|
+
if now >= self.token_expiry_time:
|
|
87
|
+
self._cleanup()
|
|
88
|
+
|
|
89
|
+
self._issue_token_request()
|
|
90
|
+
return self.token
|
|
91
|
+
|
|
92
|
+
def _issue_token_request(self, raise_on_error=False):
|
|
93
|
+
try:
|
|
94
|
+
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
|
95
|
+
request_body = {
|
|
96
|
+
"grant_type": "client_credentials",
|
|
97
|
+
"client_id": self.client_id,
|
|
98
|
+
"client_secret": self.client_secret,
|
|
99
|
+
}
|
|
100
|
+
response = self._session.request(
|
|
101
|
+
"POST",
|
|
102
|
+
self.token_endpoint,
|
|
103
|
+
timeout=self.timeout,
|
|
104
|
+
headers=headers,
|
|
105
|
+
data=request_body,
|
|
106
|
+
)
|
|
107
|
+
except requests.RequestException as exc:
|
|
108
|
+
error = f"Retrieving token failed: {mlrun.errors.err_to_str(exc)}"
|
|
109
|
+
if raise_on_error:
|
|
110
|
+
raise mlrun.errors.MLRunRuntimeError(error) from exc
|
|
111
|
+
else:
|
|
112
|
+
logger.warning(error)
|
|
113
|
+
return
|
|
114
|
+
|
|
115
|
+
if not response.ok:
|
|
116
|
+
error = "No error available"
|
|
117
|
+
if response.content:
|
|
118
|
+
try:
|
|
119
|
+
data = response.json()
|
|
120
|
+
error = data.get("error")
|
|
121
|
+
except Exception:
|
|
122
|
+
pass
|
|
123
|
+
logger.warning(
|
|
124
|
+
"Retrieving token failed", status=response.status_code, error=error
|
|
125
|
+
)
|
|
126
|
+
if raise_on_error:
|
|
127
|
+
mlrun.errors.raise_for_status(response)
|
|
128
|
+
return
|
|
129
|
+
|
|
130
|
+
self._parse_response(response.json())
|
|
131
|
+
|
|
132
|
+
def _parse_response(self, data: dict):
|
|
133
|
+
# Response is described in https://datatracker.ietf.org/doc/html/rfc6749#section-4.4.3
|
|
134
|
+
# According to spec, there isn't a refresh token - just the access token and its expiry time (in seconds).
|
|
135
|
+
self.token = data.get("access_token")
|
|
136
|
+
expires_in = data.get("expires_in")
|
|
137
|
+
if not self.token or not expires_in:
|
|
138
|
+
token_str = "****" if self.token else "missing"
|
|
139
|
+
logger.warning(
|
|
140
|
+
"Failed to parse token response", token=token_str, expires_in=expires_in
|
|
141
|
+
)
|
|
142
|
+
return
|
|
143
|
+
|
|
144
|
+
now = datetime.now()
|
|
145
|
+
self.token_expiry_time = now + timedelta(seconds=expires_in)
|
|
146
|
+
self.token_refresh_time = now + timedelta(seconds=expires_in / 2)
|
|
147
|
+
logger.info(
|
|
148
|
+
"Successfully retrieved client-id token",
|
|
149
|
+
expires_in=expires_in,
|
|
150
|
+
expiry=str(self.token_expiry_time),
|
|
151
|
+
refresh=str(self.token_refresh_time),
|
|
152
|
+
)
|
mlrun/db/base.py
CHANGED
|
@@ -677,3 +677,21 @@ class RunDBInterface(ABC):
|
|
|
677
677
|
self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
|
|
678
678
|
):
|
|
679
679
|
pass
|
|
680
|
+
|
|
681
|
+
def submit_workflow(
|
|
682
|
+
self,
|
|
683
|
+
project: str,
|
|
684
|
+
name: str,
|
|
685
|
+
workflow_spec: Union[
|
|
686
|
+
"mlrun.projects.pipelines.WorkflowSpec",
|
|
687
|
+
"mlrun.common.schemas.WorkflowSpec",
|
|
688
|
+
dict,
|
|
689
|
+
],
|
|
690
|
+
arguments: Optional[dict] = None,
|
|
691
|
+
artifact_path: Optional[str] = None,
|
|
692
|
+
source: Optional[str] = None,
|
|
693
|
+
run_name: Optional[str] = None,
|
|
694
|
+
namespace: Optional[str] = None,
|
|
695
|
+
notifications: list["mlrun.model.Notification"] = None,
|
|
696
|
+
) -> "mlrun.common.schemas.WorkflowResponse":
|
|
697
|
+
pass
|