mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -2
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +21 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +113 -2
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +11 -0
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +224 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +374 -102
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +231 -22
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +864 -228
  77. mlrun/db/nopdb.py +268 -16
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1125 -414
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +207 -180
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +40 -14
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/api_gateway.py +646 -177
  178. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  179. mlrun/runtimes/nuclio/application/application.py +758 -0
  180. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  181. mlrun/runtimes/nuclio/function.py +188 -68
  182. mlrun/runtimes/nuclio/serving.py +57 -60
  183. mlrun/runtimes/pod.py +191 -58
  184. mlrun/runtimes/remotesparkjob.py +11 -8
  185. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  186. mlrun/runtimes/utils.py +40 -73
  187. mlrun/secrets.py +6 -2
  188. mlrun/serving/__init__.py +8 -1
  189. mlrun/serving/remote.py +2 -3
  190. mlrun/serving/routers.py +89 -64
  191. mlrun/serving/server.py +54 -26
  192. mlrun/serving/states.py +187 -56
  193. mlrun/serving/utils.py +19 -11
  194. mlrun/serving/v2_serving.py +136 -63
  195. mlrun/track/tracker.py +2 -1
  196. mlrun/track/trackers/mlflow_tracker.py +5 -0
  197. mlrun/utils/async_http.py +26 -6
  198. mlrun/utils/db.py +18 -0
  199. mlrun/utils/helpers.py +375 -105
  200. mlrun/utils/http.py +2 -2
  201. mlrun/utils/logger.py +75 -9
  202. mlrun/utils/notifications/notification/__init__.py +14 -10
  203. mlrun/utils/notifications/notification/base.py +48 -0
  204. mlrun/utils/notifications/notification/console.py +2 -0
  205. mlrun/utils/notifications/notification/git.py +24 -1
  206. mlrun/utils/notifications/notification/ipython.py +2 -0
  207. mlrun/utils/notifications/notification/slack.py +96 -21
  208. mlrun/utils/notifications/notification/webhook.py +63 -2
  209. mlrun/utils/notifications/notification_pusher.py +146 -16
  210. mlrun/utils/regex.py +9 -0
  211. mlrun/utils/retryer.py +3 -2
  212. mlrun/utils/v3io_clients.py +2 -3
  213. mlrun/utils/version/version.json +2 -2
  214. mlrun-1.7.2.dist-info/METADATA +390 -0
  215. mlrun-1.7.2.dist-info/RECORD +351 -0
  216. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  217. mlrun/feature_store/retrieval/conversion.py +0 -271
  218. mlrun/kfpops.py +0 -868
  219. mlrun/model_monitoring/application.py +0 -310
  220. mlrun/model_monitoring/batch.py +0 -974
  221. mlrun/model_monitoring/controller_handler.py +0 -37
  222. mlrun/model_monitoring/prometheus.py +0 -216
  223. mlrun/model_monitoring/stores/__init__.py +0 -111
  224. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  225. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  226. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  227. mlrun/model_monitoring/stores/models/base.py +0 -84
  228. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  229. mlrun/platforms/other.py +0 -305
  230. mlrun-1.7.0rc5.dist-info/METADATA +0 -269
  231. mlrun-1.7.0rc5.dist-info/RECORD +0 -323
  232. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  233. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  234. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
mlrun/datastore/utils.py CHANGED
@@ -12,9 +12,11 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  #
15
+ import math
15
16
  import tarfile
16
17
  import tempfile
17
18
  import typing
19
+ import warnings
18
20
  from urllib.parse import parse_qs, urlparse
19
21
 
20
22
  import pandas as pd
@@ -23,24 +25,29 @@ import semver
23
25
  import mlrun.datastore
24
26
 
25
27
 
26
- def parse_kafka_url(url: str, bootstrap_servers: list = None) -> tuple[str, list]:
28
+ def parse_kafka_url(
29
+ url: str, brokers: typing.Union[list, str] = None
30
+ ) -> tuple[str, list]:
27
31
  """Generating Kafka topic and adjusting a list of bootstrap servers.
28
32
 
29
33
  :param url: URL path to parse using urllib.parse.urlparse.
30
- :param bootstrap_servers: List of bootstrap servers for the kafka brokers.
34
+ :param brokers: List of kafka brokers.
31
35
 
32
36
  :return: A tuple of:
33
37
  [0] = Kafka topic value
34
38
  [1] = List of bootstrap servers
35
39
  """
36
- bootstrap_servers = bootstrap_servers or []
40
+ brokers = brokers or []
41
+
42
+ if isinstance(brokers, str):
43
+ brokers = brokers.split(",")
37
44
 
38
45
  # Parse the provided URL into six components according to the general structure of a URL
39
46
  url = urlparse(url)
40
47
 
41
48
  # Add the network location to the bootstrap servers list
42
49
  if url.netloc:
43
- bootstrap_servers = [url.netloc] + bootstrap_servers
50
+ brokers = [url.netloc] + brokers
44
51
 
45
52
  # Get the topic value from the parsed url
46
53
  query_dict = parse_qs(url.query)
@@ -49,7 +56,7 @@ def parse_kafka_url(url: str, bootstrap_servers: list = None) -> tuple[str, list
49
56
  else:
50
57
  topic = url.path
51
58
  topic = topic.lstrip("/")
52
- return topic, bootstrap_servers
59
+ return topic, brokers
53
60
 
54
61
 
55
62
  def upload_tarball(source_dir, target, secrets=None):
@@ -159,3 +166,59 @@ def _generate_sql_query_with_time_filter(
159
166
  query = query.filter(getattr(table.c, time_column) <= end_time)
160
167
 
161
168
  return query, parse_dates
169
+
170
+
171
+ def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str]:
172
+ get_or_pop = options.pop if pop else options.get
173
+ kafka_brokers = get_or_pop("kafka_brokers", None)
174
+ if kafka_brokers:
175
+ return kafka_brokers
176
+ kafka_bootstrap_servers = get_or_pop("kafka_bootstrap_servers", None)
177
+ if kafka_bootstrap_servers:
178
+ warnings.warn(
179
+ "The 'kafka_bootstrap_servers' parameter is deprecated and will be removed in "
180
+ "1.9.0. Please pass the 'kafka_brokers' parameter instead.",
181
+ FutureWarning,
182
+ )
183
+ return kafka_bootstrap_servers
184
+
185
+
186
+ def transform_list_filters_to_tuple(additional_filters):
187
+ tuple_filters = []
188
+ if not additional_filters:
189
+ return tuple_filters
190
+ validate_additional_filters(additional_filters)
191
+ for additional_filter in additional_filters:
192
+ tuple_filters.append(tuple(additional_filter))
193
+ return tuple_filters
194
+
195
+
196
+ def validate_additional_filters(additional_filters):
197
+ nan_error_message = "using NaN in additional_filters is not supported"
198
+ if additional_filters in [None, [], ()]:
199
+ return
200
+ for filter_tuple in additional_filters:
201
+ if filter_tuple == () or filter_tuple == []:
202
+ continue
203
+ if not isinstance(filter_tuple, (list, tuple)):
204
+ raise mlrun.errors.MLRunInvalidArgumentError(
205
+ f"mlrun supports additional_filters only as a list of tuples."
206
+ f" Current additional_filters: {additional_filters}"
207
+ )
208
+ if isinstance(filter_tuple[0], (list, tuple)):
209
+ raise mlrun.errors.MLRunInvalidArgumentError(
210
+ f"additional_filters does not support nested list inside filter tuples except in -in- logic."
211
+ f" Current filter_tuple: {filter_tuple}."
212
+ )
213
+ if len(filter_tuple) != 3:
214
+ raise mlrun.errors.MLRunInvalidArgumentError(
215
+ f"illegal filter tuple length, {filter_tuple} in additional filters:"
216
+ f" {additional_filters}"
217
+ )
218
+ col_name, op, value = filter_tuple
219
+ if isinstance(value, float) and math.isnan(value):
220
+ raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
221
+ elif isinstance(value, (list, tuple)):
222
+ for sub_value in value:
223
+ if isinstance(sub_value, float) and math.isnan(sub_value):
224
+ raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
mlrun/datastore/v3io.py CHANGED
@@ -12,8 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import mmap
16
- import os
17
15
  import time
18
16
  from datetime import datetime
19
17
 
@@ -22,7 +20,6 @@ import v3io
22
20
  from v3io.dataplane.response import HttpResponseError
23
21
 
24
22
  import mlrun
25
- from mlrun.datastore.helpers import ONE_GB, ONE_MB
26
23
 
27
24
  from ..platforms.iguazio import parse_path, split_path
28
25
  from .base import (
@@ -32,6 +29,7 @@ from .base import (
32
29
  )
33
30
 
34
31
  V3IO_LOCAL_ROOT = "v3io"
32
+ V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 10
35
33
 
36
34
 
37
35
  class V3ioStore(DataStore):
@@ -98,46 +96,28 @@ class V3ioStore(DataStore):
98
96
  )
99
97
  return self._sanitize_storage_options(res)
100
98
 
101
- def _upload(self, key: str, src_path: str, max_chunk_size: int = ONE_GB):
99
+ def _upload(
100
+ self,
101
+ key: str,
102
+ src_path: str,
103
+ max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
104
+ ):
102
105
  """helper function for upload method, allows for controlling max_chunk_size in testing"""
103
106
  container, path = split_path(self._join(key))
104
- file_size = os.path.getsize(src_path) # in bytes
105
- if file_size <= ONE_MB:
106
- with open(src_path, "rb") as source_file:
107
- data = source_file.read()
108
- self._do_object_request(
109
- self.object.put,
110
- container=container,
111
- path=path,
112
- body=data,
113
- append=False,
114
- )
115
- return
116
- # chunk must be a multiple of the ALLOCATIONGRANULARITY
117
- # https://docs.python.org/3/library/mmap.html
118
- if residue := max_chunk_size % mmap.ALLOCATIONGRANULARITY:
119
- # round down to the nearest multiple of ALLOCATIONGRANULARITY
120
- max_chunk_size -= residue
121
-
122
107
  with open(src_path, "rb") as file_obj:
123
- file_offset = 0
124
- while file_offset < file_size:
125
- chunk_size = min(file_size - file_offset, max_chunk_size)
126
- with mmap.mmap(
127
- file_obj.fileno(),
128
- length=chunk_size,
129
- access=mmap.ACCESS_READ,
130
- offset=file_offset,
131
- ) as mmap_obj:
132
- append = file_offset != 0
133
- self._do_object_request(
134
- self.object.put,
135
- container=container,
136
- path=path,
137
- body=mmap_obj,
138
- append=append,
139
- )
140
- file_offset += chunk_size
108
+ append = False
109
+ while True:
110
+ data = memoryview(file_obj.read(max_chunk_size))
111
+ if not data:
112
+ break
113
+ self._do_object_request(
114
+ self.object.put,
115
+ container=container,
116
+ path=path,
117
+ body=data,
118
+ append=append,
119
+ )
120
+ append = True
141
121
 
142
122
  def upload(self, key, src_path):
143
123
  return self._upload(key, src_path)
@@ -152,19 +132,17 @@ class V3ioStore(DataStore):
152
132
  num_bytes=size,
153
133
  ).body
154
134
 
155
- def _put(self, key, data, append=False, max_chunk_size: int = ONE_GB):
135
+ def _put(
136
+ self,
137
+ key,
138
+ data,
139
+ append=False,
140
+ max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
141
+ ):
156
142
  """helper function for put method, allows for controlling max_chunk_size in testing"""
143
+ data, _ = self._prepare_put_data(data, append)
157
144
  container, path = split_path(self._join(key))
158
145
  buffer_size = len(data) # in bytes
159
- if buffer_size <= ONE_MB:
160
- self._do_object_request(
161
- self.object.put,
162
- container=container,
163
- path=path,
164
- body=data,
165
- append=append,
166
- )
167
- return
168
146
  buffer_offset = 0
169
147
  try:
170
148
  data = memoryview(data)
mlrun/db/auth_utils.py ADDED
@@ -0,0 +1,152 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from abc import ABC, abstractmethod
16
+ from datetime import datetime, timedelta
17
+
18
+ import requests
19
+
20
+ import mlrun.errors
21
+ from mlrun.utils import logger
22
+
23
+
24
+ class TokenProvider(ABC):
25
+ @abstractmethod
26
+ def get_token(self):
27
+ pass
28
+
29
+ @abstractmethod
30
+ def is_iguazio_session(self):
31
+ pass
32
+
33
+
34
+ class StaticTokenProvider(TokenProvider):
35
+ def __init__(self, token: str):
36
+ self.token = token
37
+
38
+ def get_token(self):
39
+ return self.token
40
+
41
+ def is_iguazio_session(self):
42
+ return mlrun.platforms.iguazio.is_iguazio_session(self.token)
43
+
44
+
45
+ class OAuthClientIDTokenProvider(TokenProvider):
46
+ def __init__(
47
+ self, token_endpoint: str, client_id: str, client_secret: str, timeout=5
48
+ ):
49
+ if not token_endpoint or not client_id or not client_secret:
50
+ raise mlrun.errors.MLRunValueError(
51
+ "Invalid client_id configuration for authentication. Must provide token endpoint, client-id and secret"
52
+ )
53
+ self.token_endpoint = token_endpoint
54
+ self.client_id = client_id
55
+ self.client_secret = client_secret
56
+ self.timeout = timeout
57
+
58
+ # Since we're only issuing POST requests, which are actually a disguised GET, then it's ok to allow retries
59
+ # on them.
60
+ self._session = mlrun.utils.HTTPSessionWithRetry(
61
+ retry_on_post=True,
62
+ verbose=True,
63
+ )
64
+
65
+ self._cleanup()
66
+ self._refresh_token_if_needed()
67
+
68
+ def get_token(self):
69
+ self._refresh_token_if_needed()
70
+ return self.token
71
+
72
+ def is_iguazio_session(self):
73
+ return False
74
+
75
+ def _cleanup(self):
76
+ self.token = self.token_expiry_time = self.token_refresh_time = None
77
+
78
+ def _refresh_token_if_needed(self):
79
+ now = datetime.now()
80
+ if self.token:
81
+ if self.token_refresh_time and now <= self.token_refresh_time:
82
+ return self.token
83
+
84
+ # We only cleanup if token was really expired - even if we fail in refreshing the token, we can still
85
+ # use the existing one given that it's not expired.
86
+ if now >= self.token_expiry_time:
87
+ self._cleanup()
88
+
89
+ self._issue_token_request()
90
+ return self.token
91
+
92
+ def _issue_token_request(self, raise_on_error=False):
93
+ try:
94
+ headers = {"Content-Type": "application/x-www-form-urlencoded"}
95
+ request_body = {
96
+ "grant_type": "client_credentials",
97
+ "client_id": self.client_id,
98
+ "client_secret": self.client_secret,
99
+ }
100
+ response = self._session.request(
101
+ "POST",
102
+ self.token_endpoint,
103
+ timeout=self.timeout,
104
+ headers=headers,
105
+ data=request_body,
106
+ )
107
+ except requests.RequestException as exc:
108
+ error = f"Retrieving token failed: {mlrun.errors.err_to_str(exc)}"
109
+ if raise_on_error:
110
+ raise mlrun.errors.MLRunRuntimeError(error) from exc
111
+ else:
112
+ logger.warning(error)
113
+ return
114
+
115
+ if not response.ok:
116
+ error = "No error available"
117
+ if response.content:
118
+ try:
119
+ data = response.json()
120
+ error = data.get("error")
121
+ except Exception:
122
+ pass
123
+ logger.warning(
124
+ "Retrieving token failed", status=response.status_code, error=error
125
+ )
126
+ if raise_on_error:
127
+ mlrun.errors.raise_for_status(response)
128
+ return
129
+
130
+ self._parse_response(response.json())
131
+
132
+ def _parse_response(self, data: dict):
133
+ # Response is described in https://datatracker.ietf.org/doc/html/rfc6749#section-4.4.3
134
+ # According to spec, there isn't a refresh token - just the access token and its expiry time (in seconds).
135
+ self.token = data.get("access_token")
136
+ expires_in = data.get("expires_in")
137
+ if not self.token or not expires_in:
138
+ token_str = "****" if self.token else "missing"
139
+ logger.warning(
140
+ "Failed to parse token response", token=token_str, expires_in=expires_in
141
+ )
142
+ return
143
+
144
+ now = datetime.now()
145
+ self.token_expiry_time = now + timedelta(seconds=expires_in)
146
+ self.token_refresh_time = now + timedelta(seconds=expires_in / 2)
147
+ logger.info(
148
+ "Successfully retrieved client-id token",
149
+ expires_in=expires_in,
150
+ expiry=str(self.token_expiry_time),
151
+ refresh=str(self.token_refresh_time),
152
+ )