mlrun 1.6.2rc6__py3-none-any.whl → 1.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (61) hide show
  1. mlrun/artifacts/model.py +28 -22
  2. mlrun/common/db/sql_session.py +3 -0
  3. mlrun/common/model_monitoring/helpers.py +4 -2
  4. mlrun/common/schemas/__init__.py +2 -0
  5. mlrun/common/schemas/common.py +40 -0
  6. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  7. mlrun/common/schemas/model_monitoring/constants.py +21 -5
  8. mlrun/common/schemas/project.py +2 -0
  9. mlrun/config.py +59 -20
  10. mlrun/data_types/data_types.py +4 -0
  11. mlrun/datastore/azure_blob.py +9 -9
  12. mlrun/datastore/base.py +22 -44
  13. mlrun/datastore/google_cloud_storage.py +6 -6
  14. mlrun/datastore/v3io.py +74 -73
  15. mlrun/db/auth_utils.py +152 -0
  16. mlrun/db/base.py +18 -0
  17. mlrun/db/httpdb.py +79 -55
  18. mlrun/execution.py +3 -3
  19. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
  20. mlrun/frameworks/tf_keras/model_handler.py +7 -7
  21. mlrun/k8s_utils.py +10 -5
  22. mlrun/kfpops.py +19 -10
  23. mlrun/lists.py +2 -0
  24. mlrun/model.py +31 -2
  25. mlrun/model_monitoring/api.py +8 -8
  26. mlrun/model_monitoring/batch.py +1 -1
  27. mlrun/model_monitoring/controller.py +0 -7
  28. mlrun/model_monitoring/features_drift_table.py +6 -0
  29. mlrun/model_monitoring/helpers.py +4 -1
  30. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
  31. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
  32. mlrun/model_monitoring/stream_processing.py +50 -37
  33. mlrun/package/packagers/pandas_packagers.py +3 -3
  34. mlrun/package/utils/_archiver.py +3 -1
  35. mlrun/platforms/iguazio.py +6 -65
  36. mlrun/projects/pipelines.py +51 -17
  37. mlrun/projects/project.py +77 -61
  38. mlrun/render.py +13 -4
  39. mlrun/run.py +2 -0
  40. mlrun/runtimes/base.py +24 -1
  41. mlrun/runtimes/function.py +9 -9
  42. mlrun/runtimes/kubejob.py +5 -3
  43. mlrun/runtimes/local.py +2 -2
  44. mlrun/runtimes/mpijob/abstract.py +6 -6
  45. mlrun/runtimes/pod.py +8 -8
  46. mlrun/runtimes/serving.py +3 -3
  47. mlrun/runtimes/sparkjob/spark3job.py +3 -3
  48. mlrun/serving/remote.py +4 -2
  49. mlrun/utils/async_http.py +28 -8
  50. mlrun/utils/helpers.py +20 -0
  51. mlrun/utils/http.py +3 -3
  52. mlrun/utils/logger.py +11 -6
  53. mlrun/utils/notifications/notification_pusher.py +6 -6
  54. mlrun/utils/version/version.json +2 -2
  55. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/METADATA +18 -18
  56. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/RECORD +60 -59
  57. mlrun/datastore/helpers.py +0 -18
  58. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/LICENSE +0 -0
  59. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/WHEEL +0 -0
  60. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/entry_points.txt +0 -0
  61. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/top_level.txt +0 -0
mlrun/datastore/v3io.py CHANGED
@@ -12,32 +12,24 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import mmap
16
- import os
17
15
  import time
18
- from copy import deepcopy
19
16
  from datetime import datetime
20
17
 
21
18
  import fsspec
22
- import requests
23
- import v3io.dataplane
19
+ import v3io
20
+ from v3io.dataplane.response import HttpResponseError
24
21
 
25
22
  import mlrun
26
- from mlrun.datastore.helpers import ONE_GB, ONE_MB
27
23
 
28
24
  from ..platforms.iguazio import parse_path, split_path
29
25
  from .base import (
30
26
  DataStore,
31
27
  FileStats,
32
28
  basic_auth_header,
33
- get_range,
34
- http_get,
35
- http_head,
36
- http_put,
37
- http_upload,
38
29
  )
39
30
 
40
31
  V3IO_LOCAL_ROOT = "v3io"
32
+ V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 100
41
33
 
42
34
 
43
35
  class V3ioStore(DataStore):
@@ -47,17 +39,18 @@ class V3ioStore(DataStore):
47
39
 
48
40
  self.headers = None
49
41
  self.secure = self.kind == "v3ios"
42
+
43
+ token = self._get_secret_or_env("V3IO_ACCESS_KEY")
44
+ username = self._get_secret_or_env("V3IO_USERNAME")
45
+ password = self._get_secret_or_env("V3IO_PASSWORD")
50
46
  if self.endpoint.startswith("https://"):
51
47
  self.endpoint = self.endpoint[len("https://") :]
52
48
  self.secure = True
53
49
  elif self.endpoint.startswith("http://"):
54
50
  self.endpoint = self.endpoint[len("http://") :]
55
51
  self.secure = False
56
-
57
- token = self._get_secret_or_env("V3IO_ACCESS_KEY")
58
- username = self._get_secret_or_env("V3IO_USERNAME")
59
- password = self._get_secret_or_env("V3IO_PASSWORD")
60
-
52
+ self.client = v3io.dataplane.Client(access_key=token, endpoint=self.url)
53
+ self.object = self.client.object
61
54
  self.auth = None
62
55
  self.token = token
63
56
  if token:
@@ -65,6 +58,16 @@ class V3ioStore(DataStore):
65
58
  elif username and password:
66
59
  self.headers = basic_auth_header(username, password)
67
60
 
61
+ @staticmethod
62
+ def _do_object_request(function: callable, *args, **kwargs):
63
+ try:
64
+ return function(*args, **kwargs)
65
+ except HttpResponseError as http_response_error:
66
+ raise mlrun.errors.err_for_status_code(
67
+ status_code=http_response_error.status_code,
68
+ message=mlrun.errors.err_to_str(http_response_error),
69
+ )
70
+
68
71
  @staticmethod
69
72
  def uri_to_ipython(endpoint, subpath):
70
73
  return V3IO_LOCAL_ROOT + subpath
@@ -89,80 +92,79 @@ class V3ioStore(DataStore):
89
92
  )
90
93
  return self._sanitize_storage_options(res)
91
94
 
92
- def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
95
+ def _upload(
96
+ self,
97
+ key: str,
98
+ src_path: str,
99
+ max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
100
+ ):
93
101
  """helper function for upload method, allows for controlling max_chunk_size in testing"""
94
- file_size = os.path.getsize(src_path) # in bytes
95
- if file_size <= ONE_MB:
96
- http_upload(self.url + self._join(key), src_path, self.headers, None)
97
- return
98
- append_header = deepcopy(self.headers)
99
- append_header["Range"] = "-1"
100
-
101
- # chunk must be a multiple of the ALLOCATIONGRANULARITY
102
- # https://docs.python.org/3/library/mmap.html
103
- if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
104
- # round down to the nearest multiple of ALLOCATIONGRANULARITY
105
- max_chunk_size -= residue
106
-
102
+ container, path = split_path(self._join(key))
107
103
  with open(src_path, "rb") as file_obj:
108
- file_offset = 0
109
- while file_offset < file_size:
110
- chunk_size = min(file_size - file_offset, max_chunk_size)
111
- with mmap.mmap(
112
- file_obj.fileno(),
113
- length=chunk_size,
114
- access=mmap.ACCESS_READ,
115
- offset=file_offset,
116
- ) as mmap_obj:
117
- http_put(
118
- self.url + self._join(key),
119
- mmap_obj,
120
- append_header if file_offset else self.headers,
121
- None,
122
- )
123
- file_offset += chunk_size
104
+ append = False
105
+ while True:
106
+ data = memoryview(file_obj.read(max_chunk_size))
107
+ if not data:
108
+ break
109
+ self._do_object_request(
110
+ self.object.put,
111
+ container=container,
112
+ path=path,
113
+ body=data,
114
+ append=append,
115
+ )
116
+ append = True
124
117
 
125
118
  def upload(self, key, src_path):
126
119
  return self._upload(key, src_path)
127
120
 
128
121
  def get(self, key, size=None, offset=0):
129
- headers = self.headers
130
- if size or offset:
131
- headers = deepcopy(headers)
132
- headers["Range"] = get_range(size, offset)
133
- return http_get(self.url + self._join(key), headers)
134
-
135
- def _put(self, key, data, max_chunk_size: int = ONE_GB):
122
+ container, path = split_path(self._join(key))
123
+ return self._do_object_request(
124
+ function=self.object.get,
125
+ container=container,
126
+ path=path,
127
+ offset=offset,
128
+ num_bytes=size,
129
+ ).body
130
+
131
+ def _put(
132
+ self,
133
+ key,
134
+ data,
135
+ append=False,
136
+ max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
137
+ ):
136
138
  """helper function for put method, allows for controlling max_chunk_size in testing"""
139
+ container, path = split_path(self._join(key))
137
140
  buffer_size = len(data) # in bytes
138
- if buffer_size <= ONE_MB:
139
- http_put(self.url + self._join(key), data, self.headers, None)
140
- return
141
- append_header = deepcopy(self.headers)
142
- append_header["Range"] = "-1"
143
141
  buffer_offset = 0
144
142
  try:
145
143
  data = memoryview(data)
146
144
  except TypeError:
147
145
  pass
148
146
 
149
- with requests.Session() as requests_session:
150
- while buffer_offset < buffer_size:
151
- chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
152
- http_put(
153
- self.url + self._join(key),
154
- data[buffer_offset : buffer_offset + chunk_size],
155
- append_header if buffer_offset else self.headers,
156
- None,
157
- requests_session,
158
- )
159
- buffer_offset += chunk_size
147
+ while buffer_offset < buffer_size:
148
+ chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
149
+ append = True if buffer_offset or append else False
150
+ self._do_object_request(
151
+ self.object.put,
152
+ container=container,
153
+ path=path,
154
+ body=data[buffer_offset : buffer_offset + chunk_size],
155
+ append=append,
156
+ )
157
+ buffer_offset += chunk_size
160
158
 
161
159
  def put(self, key, data, append=False):
162
- return self._put(key, data)
160
+ return self._put(key, data, append)
163
161
 
164
162
  def stat(self, key):
165
- head = http_head(self.url + self._join(key), self.headers)
163
+ container, path = split_path(self._join(key))
164
+ response = self._do_object_request(
165
+ function=self.object.head, container=container, path=path
166
+ )
167
+ head = dict(response.headers)
166
168
  size = int(head.get("Content-Length", "0"))
167
169
  datestr = head.get("Last-Modified", "0")
168
170
  modified = time.mktime(
@@ -171,7 +173,6 @@ class V3ioStore(DataStore):
171
173
  return FileStats(size, modified)
172
174
 
173
175
  def listdir(self, key):
174
- v3io_client = v3io.dataplane.Client(endpoint=self.url, access_key=self.token)
175
176
  container, subpath = split_path(self._join(key))
176
177
  if not subpath.endswith("/"):
177
178
  subpath += "/"
@@ -180,7 +181,7 @@ class V3ioStore(DataStore):
180
181
  subpath_length = len(subpath) - 1
181
182
 
182
183
  try:
183
- response = v3io_client.container.list(
184
+ response = self.client.container.list(
184
185
  container=container,
185
186
  path=subpath,
186
187
  get_all_attributes=False,
mlrun/db/auth_utils.py ADDED
@@ -0,0 +1,152 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+ from datetime import datetime, timedelta
17
+
18
+ import requests
19
+
20
+ import mlrun.errors
21
+ from mlrun.utils import logger
22
+
23
+
24
+ class TokenProvider(ABC):
25
+ @abstractmethod
26
+ def get_token(self):
27
+ pass
28
+
29
+ @abstractmethod
30
+ def is_iguazio_session(self):
31
+ pass
32
+
33
+
34
+ class StaticTokenProvider(TokenProvider):
35
+ def __init__(self, token: str):
36
+ self.token = token
37
+
38
+ def get_token(self):
39
+ return self.token
40
+
41
+ def is_iguazio_session(self):
42
+ return mlrun.platforms.iguazio.is_iguazio_session(self.token)
43
+
44
+
45
+ class OAuthClientIDTokenProvider(TokenProvider):
46
+ def __init__(
47
+ self, token_endpoint: str, client_id: str, client_secret: str, timeout=5
48
+ ):
49
+ if not token_endpoint or not client_id or not client_secret:
50
+ raise mlrun.errors.MLRunValueError(
51
+ "Invalid client_id configuration for authentication. Must provide token endpoint, client-id and secret"
52
+ )
53
+ self.token_endpoint = token_endpoint
54
+ self.client_id = client_id
55
+ self.client_secret = client_secret
56
+ self.timeout = timeout
57
+
58
+ # Since we're only issuing POST requests, which are actually a disguised GET, then it's ok to allow retries
59
+ # on them.
60
+ self._session = mlrun.utils.HTTPSessionWithRetry(
61
+ retry_on_post=True,
62
+ verbose=True,
63
+ )
64
+
65
+ self._cleanup()
66
+ self._refresh_token_if_needed()
67
+
68
+ def get_token(self):
69
+ self._refresh_token_if_needed()
70
+ return self.token
71
+
72
+ def is_iguazio_session(self):
73
+ return False
74
+
75
+ def _cleanup(self):
76
+ self.token = self.token_expiry_time = self.token_refresh_time = None
77
+
78
+ def _refresh_token_if_needed(self):
79
+ now = datetime.now()
80
+ if self.token:
81
+ if self.token_refresh_time and now <= self.token_refresh_time:
82
+ return self.token
83
+
84
+ # We only cleanup if token was really expired - even if we fail in refreshing the token, we can still
85
+ # use the existing one given that it's not expired.
86
+ if now >= self.token_expiry_time:
87
+ self._cleanup()
88
+
89
+ self._issue_token_request()
90
+ return self.token
91
+
92
+ def _issue_token_request(self, raise_on_error=False):
93
+ try:
94
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
95
+ request_body = {
96
+ "grant_type": "client_credentials",
97
+ "client_id": self.client_id,
98
+ "client_secret": self.client_secret,
99
+ }
100
+ response = self._session.request(
101
+ "POST",
102
+ self.token_endpoint,
103
+ timeout=self.timeout,
104
+ headers=headers,
105
+ data=request_body,
106
+ )
107
+ except requests.RequestException as exc:
108
+ error = f"Retrieving token failed: {mlrun.errors.err_to_str(exc)}"
109
+ if raise_on_error:
110
+ raise mlrun.errors.MLRunRuntimeError(error) from exc
111
+ else:
112
+ logger.warning(error)
113
+ return
114
+
115
+ if not response.ok:
116
+ error = "No error available"
117
+ if response.content:
118
+ try:
119
+ data = response.json()
120
+ error = data.get("error")
121
+ except Exception:
122
+ pass
123
+ logger.warning(
124
+ "Retrieving token failed", status=response.status_code, error=error
125
+ )
126
+ if raise_on_error:
127
+ mlrun.errors.raise_for_status(response)
128
+ return
129
+
130
+ self._parse_response(response.json())
131
+
132
+ def _parse_response(self, data: dict):
133
+ # Response is described in https://datatracker.ietf.org/doc/html/rfc6749#section-4.4.3
134
+ # According to spec, there isn't a refresh token - just the access token and its expiry time (in seconds).
135
+ self.token = data.get("access_token")
136
+ expires_in = data.get("expires_in")
137
+ if not self.token or not expires_in:
138
+ token_str = "****" if self.token else "missing"
139
+ logger.warning(
140
+ "Failed to parse token response", token=token_str, expires_in=expires_in
141
+ )
142
+ return
143
+
144
+ now = datetime.now()
145
+ self.token_expiry_time = now + timedelta(seconds=expires_in)
146
+ self.token_refresh_time = now + timedelta(seconds=expires_in / 2)
147
+ logger.info(
148
+ "Successfully retrieved client-id token",
149
+ expires_in=expires_in,
150
+ expiry=str(self.token_expiry_time),
151
+ refresh=str(self.token_refresh_time),
152
+ )
mlrun/db/base.py CHANGED
@@ -677,3 +677,21 @@ class RunDBInterface(ABC):
677
677
  self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
678
678
  ):
679
679
  pass
680
+
681
+ def submit_workflow(
682
+ self,
683
+ project: str,
684
+ name: str,
685
+ workflow_spec: Union[
686
+ "mlrun.projects.pipelines.WorkflowSpec",
687
+ "mlrun.common.schemas.WorkflowSpec",
688
+ dict,
689
+ ],
690
+ arguments: Optional[dict] = None,
691
+ artifact_path: Optional[str] = None,
692
+ source: Optional[str] = None,
693
+ run_name: Optional[str] = None,
694
+ namespace: Optional[str] = None,
695
+ notifications: list["mlrun.model.Notification"] = None,
696
+ ) -> "mlrun.common.schemas.WorkflowResponse":
697
+ pass