mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +25 -111
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +38 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +41 -47
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +68 -0
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
  15. mlrun/common/formatters/base.py +78 -0
  16. mlrun/common/formatters/function.py +41 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +25 -4
  21. mlrun/common/schemas/alert.py +203 -0
  22. mlrun/common/schemas/api_gateway.py +148 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +8 -2
  25. mlrun/common/schemas/client_spec.py +2 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/hub.py +7 -9
  29. mlrun/common/schemas/model_monitoring/__init__.py +19 -3
  30. mlrun/common/schemas/model_monitoring/constants.py +96 -26
  31. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  32. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  33. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  34. mlrun/common/schemas/pipeline.py +0 -9
  35. mlrun/common/schemas/project.py +22 -21
  36. mlrun/common/types.py +7 -1
  37. mlrun/config.py +87 -19
  38. mlrun/data_types/data_types.py +4 -0
  39. mlrun/data_types/to_pandas.py +9 -9
  40. mlrun/datastore/__init__.py +5 -8
  41. mlrun/datastore/alibaba_oss.py +130 -0
  42. mlrun/datastore/azure_blob.py +4 -5
  43. mlrun/datastore/base.py +69 -30
  44. mlrun/datastore/datastore.py +10 -2
  45. mlrun/datastore/datastore_profile.py +90 -6
  46. mlrun/datastore/google_cloud_storage.py +1 -1
  47. mlrun/datastore/hdfs.py +5 -0
  48. mlrun/datastore/inmem.py +2 -2
  49. mlrun/datastore/redis.py +2 -2
  50. mlrun/datastore/s3.py +5 -0
  51. mlrun/datastore/snowflake_utils.py +43 -0
  52. mlrun/datastore/sources.py +172 -44
  53. mlrun/datastore/store_resources.py +7 -7
  54. mlrun/datastore/targets.py +285 -41
  55. mlrun/datastore/utils.py +68 -5
  56. mlrun/datastore/v3io.py +27 -50
  57. mlrun/db/auth_utils.py +152 -0
  58. mlrun/db/base.py +149 -14
  59. mlrun/db/factory.py +1 -1
  60. mlrun/db/httpdb.py +608 -178
  61. mlrun/db/nopdb.py +191 -7
  62. mlrun/errors.py +11 -0
  63. mlrun/execution.py +37 -20
  64. mlrun/feature_store/__init__.py +0 -2
  65. mlrun/feature_store/api.py +21 -52
  66. mlrun/feature_store/feature_set.py +48 -23
  67. mlrun/feature_store/feature_vector.py +2 -1
  68. mlrun/feature_store/ingestion.py +7 -6
  69. mlrun/feature_store/retrieval/base.py +9 -4
  70. mlrun/feature_store/retrieval/conversion.py +9 -9
  71. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  72. mlrun/feature_store/retrieval/job.py +9 -3
  73. mlrun/feature_store/retrieval/local_merger.py +2 -0
  74. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  75. mlrun/feature_store/steps.py +30 -19
  76. mlrun/features.py +4 -13
  77. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  78. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  79. mlrun/frameworks/lgbm/__init__.py +1 -1
  80. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  81. mlrun/frameworks/lgbm/model_handler.py +1 -1
  82. mlrun/frameworks/parallel_coordinates.py +2 -1
  83. mlrun/frameworks/pytorch/__init__.py +2 -2
  84. mlrun/frameworks/sklearn/__init__.py +1 -1
  85. mlrun/frameworks/tf_keras/__init__.py +5 -2
  86. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  87. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  88. mlrun/frameworks/xgboost/__init__.py +1 -1
  89. mlrun/k8s_utils.py +10 -11
  90. mlrun/launcher/__init__.py +1 -1
  91. mlrun/launcher/base.py +6 -5
  92. mlrun/launcher/client.py +8 -6
  93. mlrun/launcher/factory.py +1 -1
  94. mlrun/launcher/local.py +9 -3
  95. mlrun/launcher/remote.py +9 -3
  96. mlrun/lists.py +6 -2
  97. mlrun/model.py +58 -19
  98. mlrun/model_monitoring/__init__.py +1 -1
  99. mlrun/model_monitoring/api.py +127 -301
  100. mlrun/model_monitoring/application.py +5 -296
  101. mlrun/model_monitoring/applications/__init__.py +11 -0
  102. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  103. mlrun/model_monitoring/applications/base.py +282 -0
  104. mlrun/model_monitoring/applications/context.py +214 -0
  105. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  106. mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
  107. mlrun/model_monitoring/applications/results.py +99 -0
  108. mlrun/model_monitoring/controller.py +30 -36
  109. mlrun/model_monitoring/db/__init__.py +18 -0
  110. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  111. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  112. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
  113. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  114. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  115. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  116. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  117. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  118. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  119. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  120. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
  121. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  122. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  123. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  124. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  125. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  126. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  127. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  128. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  129. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  130. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  131. mlrun/model_monitoring/evidently_application.py +6 -118
  132. mlrun/model_monitoring/features_drift_table.py +34 -22
  133. mlrun/model_monitoring/helpers.py +100 -7
  134. mlrun/model_monitoring/model_endpoint.py +3 -2
  135. mlrun/model_monitoring/stream_processing.py +93 -228
  136. mlrun/model_monitoring/tracking_policy.py +7 -1
  137. mlrun/model_monitoring/writer.py +152 -124
  138. mlrun/package/packagers_manager.py +1 -0
  139. mlrun/package/utils/_formatter.py +2 -2
  140. mlrun/platforms/__init__.py +11 -10
  141. mlrun/platforms/iguazio.py +21 -202
  142. mlrun/projects/operations.py +30 -16
  143. mlrun/projects/pipelines.py +92 -99
  144. mlrun/projects/project.py +757 -268
  145. mlrun/render.py +15 -14
  146. mlrun/run.py +160 -162
  147. mlrun/runtimes/__init__.py +55 -3
  148. mlrun/runtimes/base.py +33 -19
  149. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  150. mlrun/runtimes/funcdoc.py +0 -28
  151. mlrun/runtimes/kubejob.py +28 -122
  152. mlrun/runtimes/local.py +5 -2
  153. mlrun/runtimes/mpijob/__init__.py +0 -20
  154. mlrun/runtimes/mpijob/abstract.py +8 -8
  155. mlrun/runtimes/mpijob/v1.py +1 -1
  156. mlrun/runtimes/nuclio/__init__.py +1 -0
  157. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  158. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  159. mlrun/runtimes/nuclio/application/application.py +523 -0
  160. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  161. mlrun/runtimes/nuclio/function.py +98 -58
  162. mlrun/runtimes/nuclio/serving.py +36 -42
  163. mlrun/runtimes/pod.py +196 -45
  164. mlrun/runtimes/remotesparkjob.py +1 -1
  165. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  166. mlrun/runtimes/utils.py +6 -73
  167. mlrun/secrets.py +6 -2
  168. mlrun/serving/remote.py +2 -3
  169. mlrun/serving/routers.py +7 -4
  170. mlrun/serving/server.py +7 -8
  171. mlrun/serving/states.py +73 -43
  172. mlrun/serving/v2_serving.py +8 -7
  173. mlrun/track/tracker.py +2 -1
  174. mlrun/utils/async_http.py +25 -5
  175. mlrun/utils/helpers.py +141 -75
  176. mlrun/utils/http.py +1 -1
  177. mlrun/utils/logger.py +39 -7
  178. mlrun/utils/notifications/notification/__init__.py +14 -9
  179. mlrun/utils/notifications/notification/base.py +12 -0
  180. mlrun/utils/notifications/notification/console.py +2 -0
  181. mlrun/utils/notifications/notification/git.py +3 -1
  182. mlrun/utils/notifications/notification/ipython.py +2 -0
  183. mlrun/utils/notifications/notification/slack.py +101 -21
  184. mlrun/utils/notifications/notification/webhook.py +11 -1
  185. mlrun/utils/notifications/notification_pusher.py +147 -16
  186. mlrun/utils/retryer.py +3 -2
  187. mlrun/utils/v3io_clients.py +0 -1
  188. mlrun/utils/version/version.json +2 -2
  189. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
  190. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  191. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
  192. mlrun/kfpops.py +0 -868
  193. mlrun/model_monitoring/batch.py +0 -974
  194. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  195. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  196. mlrun/platforms/other.py +0 -305
  197. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  198. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  199. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  200. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/datastore/base.py CHANGED
@@ -27,6 +27,7 @@ import requests
27
27
  import urllib3
28
28
  from deprecated import deprecated
29
29
 
30
+ import mlrun.config
30
31
  import mlrun.errors
31
32
  from mlrun.errors import err_to_str
32
33
  from mlrun.utils import StorePrefix, is_ipython, logger
@@ -34,10 +35,6 @@ from mlrun.utils import StorePrefix, is_ipython, logger
34
35
  from .store_resources import is_store_uri, parse_store_uri
35
36
  from .utils import filter_df_start_end_time, select_columns_from_df
36
37
 
37
- verify_ssl = False
38
- if not verify_ssl:
39
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
40
-
41
38
 
42
39
  class FileStats:
43
40
  def __init__(self, size, modified, content_type=None):
@@ -182,11 +179,23 @@ class DataStore:
182
179
  return {}
183
180
 
184
181
  @staticmethod
185
- def _parquet_reader(df_module, url, file_system, time_column, start_time, end_time):
182
+ def _parquet_reader(
183
+ df_module,
184
+ url,
185
+ file_system,
186
+ time_column,
187
+ start_time,
188
+ end_time,
189
+ additional_filters,
190
+ ):
186
191
  from storey.utils import find_filters, find_partitions
187
192
 
188
193
  def set_filters(
189
- partitions_time_attributes, start_time_inner, end_time_inner, kwargs
194
+ partitions_time_attributes,
195
+ start_time_inner,
196
+ end_time_inner,
197
+ filters_inner,
198
+ kwargs,
190
199
  ):
191
200
  filters = []
192
201
  find_filters(
@@ -196,20 +205,23 @@ class DataStore:
196
205
  filters,
197
206
  time_column,
198
207
  )
208
+ if filters and filters_inner:
209
+ filters[0] += filters_inner
210
+
199
211
  kwargs["filters"] = filters
200
212
 
201
213
  def reader(*args, **kwargs):
202
- if start_time or end_time:
203
- if time_column is None:
204
- raise mlrun.errors.MLRunInvalidArgumentError(
205
- "When providing start_time or end_time, must provide time_column"
206
- )
207
-
214
+ if time_column is None and (start_time or end_time):
215
+ raise mlrun.errors.MLRunInvalidArgumentError(
216
+ "When providing start_time or end_time, must provide time_column"
217
+ )
218
+ if start_time or end_time or additional_filters:
208
219
  partitions_time_attributes = find_partitions(url, file_system)
209
220
  set_filters(
210
221
  partitions_time_attributes,
211
222
  start_time,
212
223
  end_time,
224
+ additional_filters,
213
225
  kwargs,
214
226
  )
215
227
  try:
@@ -220,6 +232,7 @@ class DataStore:
220
232
  ):
221
233
  raise ex
222
234
 
235
+ # TODO: fix timezone issue (ML-6308)
223
236
  if start_time.tzinfo:
224
237
  start_time_inner = start_time.replace(tzinfo=None)
225
238
  end_time_inner = end_time.replace(tzinfo=None)
@@ -231,6 +244,7 @@ class DataStore:
231
244
  partitions_time_attributes,
232
245
  start_time_inner,
233
246
  end_time_inner,
247
+ additional_filters,
234
248
  kwargs,
235
249
  )
236
250
  return df_module.read_parquet(*args, **kwargs)
@@ -249,6 +263,7 @@ class DataStore:
249
263
  start_time=None,
250
264
  end_time=None,
251
265
  time_column=None,
266
+ additional_filters=None,
252
267
  **kwargs,
253
268
  ):
254
269
  df_module = df_module or pd
@@ -313,7 +328,13 @@ class DataStore:
313
328
  kwargs["columns"] = columns
314
329
 
315
330
  reader = self._parquet_reader(
316
- df_module, url, file_system, time_column, start_time, end_time
331
+ df_module,
332
+ url,
333
+ file_system,
334
+ time_column,
335
+ start_time,
336
+ end_time,
337
+ additional_filters,
317
338
  )
318
339
 
319
340
  elif file_url.endswith(".json") or format == "json":
@@ -392,14 +413,15 @@ class DataItem:
392
413
 
393
414
 
394
415
  # reading run results using DataItem (run.artifact())
395
- train_run = train_iris_func.run(inputs={'dataset': dataset},
396
- params={'label_column': 'label'})
416
+ train_run = train_iris_func.run(
417
+ inputs={"dataset": dataset}, params={"label_column": "label"}
418
+ )
397
419
 
398
- train_run.artifact('confusion-matrix').show()
399
- test_set = train_run.artifact('test_set').as_df()
420
+ train_run.artifact("confusion-matrix").show()
421
+ test_set = train_run.artifact("test_set").as_df()
400
422
 
401
423
  # create and use DataItem from uri
402
- data = mlrun.get_dataitem('http://xyz/data.json').get()
424
+ data = mlrun.get_dataitem("http://xyz/data.json").get()
403
425
  """
404
426
 
405
427
  def __init__(
@@ -541,6 +563,7 @@ class DataItem:
541
563
  time_column=None,
542
564
  start_time=None,
543
565
  end_time=None,
566
+ additional_filters=None,
544
567
  **kwargs,
545
568
  ):
546
569
  """return a dataframe object (generated from the dataitem).
@@ -552,6 +575,12 @@ class DataItem:
552
575
  :param end_time: filters out data after this time
553
576
  :param time_column: Store timestamp_key will be used if None.
554
577
  The results will be filtered by this column and start_time & end_time.
578
+ :param additional_filters: List of additional_filter conditions as tuples.
579
+ Each tuple should be in the format (column_name, operator, value).
580
+ Supported operators: "=", ">=", "<=", ">", "<".
581
+ Example: [("Product", "=", "Computer")]
582
+ For all supported filters, please see:
583
+ https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
555
584
  """
556
585
  df = self._store.as_df(
557
586
  self._url,
@@ -562,6 +591,7 @@ class DataItem:
562
591
  time_column=time_column,
563
592
  start_time=start_time,
564
593
  end_time=end_time,
594
+ additional_filters=additional_filters,
565
595
  **kwargs,
566
596
  )
567
597
  return df
@@ -633,17 +663,6 @@ def basic_auth_header(user, password):
633
663
  return {"Authorization": authstr}
634
664
 
635
665
 
636
- def http_get(url, headers=None, auth=None):
637
- try:
638
- response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
639
- except OSError as exc:
640
- raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
641
-
642
- mlrun.errors.raise_for_status(response)
643
-
644
- return response.content
645
-
646
-
647
666
  class HttpStore(DataStore):
648
667
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
649
668
  super().__init__(parent, name, schema, endpoint, secrets)
@@ -671,7 +690,7 @@ class HttpStore(DataStore):
671
690
  raise ValueError("unimplemented")
672
691
 
673
692
  def get(self, key, size=None, offset=0):
674
- data = http_get(self.url + self._join(key), self._headers, self.auth)
693
+ data = self._http_get(self.url + self._join(key), self._headers, self.auth)
675
694
  if offset:
676
695
  data = data[offset:]
677
696
  if size:
@@ -691,6 +710,26 @@ class HttpStore(DataStore):
691
710
  f"schema as it is not secure and is not recommended."
692
711
  )
693
712
 
713
+ def _http_get(
714
+ self,
715
+ url,
716
+ headers=None,
717
+ auth=None,
718
+ ):
719
+ # import here to prevent import cycle
720
+ from mlrun.config import config as mlconf
721
+
722
+ verify_ssl = mlconf.httpdb.http.verify
723
+ try:
724
+ if not verify_ssl:
725
+ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
726
+ response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
727
+ except OSError as exc:
728
+ raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
729
+
730
+ mlrun.errors.raise_for_status(response)
731
+ return response.content
732
+
694
733
 
695
734
  # This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
696
735
  # Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.
@@ -98,6 +98,10 @@ def schema_to_store(schema):
98
98
  from .hdfs import HdfsStore
99
99
 
100
100
  return HdfsStore
101
+ elif schema == "oss":
102
+ from .alibaba_oss import OSSStore
103
+
104
+ return OSSStore
101
105
  else:
102
106
  raise ValueError(f"unsupported store scheme ({schema})")
103
107
 
@@ -219,6 +223,11 @@ class StoreManager:
219
223
  subpath = url[len("memory://") :]
220
224
  return in_memory_store, subpath, url
221
225
 
226
+ elif schema in get_local_file_schema():
227
+ # parse_url() will drop the windows drive-letter from the path for url like "c:\a\b".
228
+ # As a workaround, we set subpath to the url.
229
+ subpath = url.replace("file://", "", 1)
230
+
222
231
  if not schema and endpoint:
223
232
  if endpoint in self._stores.keys():
224
233
  return self._stores[endpoint], subpath, url
@@ -237,8 +246,7 @@ class StoreManager:
237
246
  )
238
247
  if not secrets and not mlrun.config.is_running_as_api():
239
248
  self._stores[store_key] = store
240
- # in file stores in windows path like c:\a\b the drive letter is dropped from the path, so we return the url
241
- return store, url if store.kind == "file" else subpath, url
249
+ return store, subpath, url
242
250
 
243
251
  def reset_secrets(self):
244
252
  self._secrets = {}
@@ -16,6 +16,7 @@ import ast
16
16
  import base64
17
17
  import json
18
18
  import typing
19
+ import warnings
19
20
  from urllib.parse import ParseResult, urlparse, urlunparse
20
21
 
21
22
  import pydantic
@@ -36,6 +37,7 @@ class DatastoreProfile(pydantic.BaseModel):
36
37
  extra = pydantic.Extra.forbid
37
38
 
38
39
  @pydantic.validator("name")
40
+ @classmethod
39
41
  def lower_case(cls, v):
40
42
  return v.lower()
41
43
 
@@ -68,6 +70,9 @@ class TemporaryClientDatastoreProfiles(metaclass=mlrun.utils.singleton.Singleton
68
70
  def get(self, key):
69
71
  return self._data.get(key, None)
70
72
 
73
+ def remove(self, key):
74
+ self._data.pop(key, None)
75
+
71
76
 
72
77
  class DatastoreProfileBasic(DatastoreProfile):
73
78
  type: str = pydantic.Field("basic")
@@ -79,13 +84,37 @@ class DatastoreProfileBasic(DatastoreProfile):
79
84
  class DatastoreProfileKafkaTarget(DatastoreProfile):
80
85
  type: str = pydantic.Field("kafka_target")
81
86
  _private_attributes = "kwargs_private"
82
- bootstrap_servers: str
87
+ bootstrap_servers: typing.Optional[str] = None
88
+ brokers: typing.Optional[str] = None
83
89
  topic: str
84
90
  kwargs_public: typing.Optional[dict]
85
91
  kwargs_private: typing.Optional[dict]
86
92
 
93
+ def __init__(self, **kwargs):
94
+ super().__init__(**kwargs)
95
+
96
+ if not self.brokers and not self.bootstrap_servers:
97
+ raise mlrun.errors.MLRunInvalidArgumentError(
98
+ "DatastoreProfileKafkaTarget requires the 'brokers' field to be set"
99
+ )
100
+
101
+ if self.bootstrap_servers:
102
+ if self.brokers:
103
+ raise mlrun.errors.MLRunInvalidArgumentError(
104
+ "DatastoreProfileKafkaTarget cannot be created with both 'brokers' and 'bootstrap_servers'"
105
+ )
106
+ else:
107
+ self.brokers = self.bootstrap_servers
108
+ self.bootstrap_servers = None
109
+ warnings.warn(
110
+ "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
111
+ "use 'brokers' instead.",
112
+ # TODO: Remove this in 1.9.0
113
+ FutureWarning,
114
+ )
115
+
87
116
  def attributes(self):
88
- attributes = {"bootstrap_servers": self.bootstrap_servers}
117
+ attributes = {"brokers": self.brokers or self.bootstrap_servers}
89
118
  if self.kwargs_public:
90
119
  attributes = merge(attributes, self.kwargs_public)
91
120
  if self.kwargs_private:
@@ -157,6 +186,18 @@ class DatastoreProfileS3(DatastoreProfile):
157
186
  assume_role_arn: typing.Optional[str] = None
158
187
  access_key_id: typing.Optional[str] = None
159
188
  secret_key: typing.Optional[str] = None
189
+ bucket: typing.Optional[str] = None
190
+
191
+ @pydantic.validator("bucket")
192
+ @classmethod
193
+ def check_bucket(cls, v):
194
+ if not v:
195
+ warnings.warn(
196
+ "The 'bucket' attribute will be mandatory starting from version 1.9",
197
+ FutureWarning,
198
+ stacklevel=2,
199
+ )
200
+ return v
160
201
 
161
202
  def secrets(self) -> dict:
162
203
  res = {}
@@ -175,7 +216,13 @@ class DatastoreProfileS3(DatastoreProfile):
175
216
  return res
176
217
 
177
218
  def url(self, subpath):
178
- return f"s3:/{subpath}"
219
+ # TODO: There is an inconsistency with DatastoreProfileGCS. In DatastoreProfileGCS,
220
+ # we assume that the subpath can begin without a '/' character,
221
+ # while here we assume it always starts with one.
222
+ if self.bucket:
223
+ return f"s3://{self.bucket}{subpath}"
224
+ else:
225
+ return f"s3:/{subpath}"
179
226
 
180
227
 
181
228
  class DatastoreProfileRedis(DatastoreProfile):
@@ -244,18 +291,36 @@ class DatastoreProfileGCS(DatastoreProfile):
244
291
  _private_attributes = ("gcp_credentials",)
245
292
  credentials_path: typing.Optional[str] = None # path to file.
246
293
  gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
294
+ bucket: typing.Optional[str] = None
295
+
296
+ @pydantic.validator("bucket")
297
+ @classmethod
298
+ def check_bucket(cls, v):
299
+ if not v:
300
+ warnings.warn(
301
+ "The 'bucket' attribute will be mandatory starting from version 1.9",
302
+ FutureWarning,
303
+ stacklevel=2,
304
+ )
305
+ return v
247
306
 
248
307
  @pydantic.validator("gcp_credentials", pre=True, always=True)
308
+ @classmethod
249
309
  def convert_dict_to_json(cls, v):
250
310
  if isinstance(v, dict):
251
311
  return json.dumps(v)
252
312
  return v
253
313
 
254
314
  def url(self, subpath) -> str:
315
+ # TODO: but there's something wrong with the subpath being assumed to not start with a slash here,
316
+ # but the opposite assumption is made in S3.
255
317
  if subpath.startswith("/"):
256
318
  # in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
257
319
  subpath = subpath[1:]
258
- return f"gcs://{subpath}"
320
+ if self.bucket:
321
+ return f"gcs://{self.bucket}/{subpath}"
322
+ else:
323
+ return f"gcs://{subpath}"
259
324
 
260
325
  def secrets(self) -> dict:
261
326
  res = {}
@@ -283,12 +348,27 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
283
348
  client_secret: typing.Optional[str] = None
284
349
  sas_token: typing.Optional[str] = None
285
350
  credential: typing.Optional[str] = None
351
+ container: typing.Optional[str] = None
352
+
353
+ @pydantic.validator("container")
354
+ @classmethod
355
+ def check_container(cls, v):
356
+ if not v:
357
+ warnings.warn(
358
+ "The 'container' attribute will be mandatory starting from version 1.9",
359
+ FutureWarning,
360
+ stacklevel=2,
361
+ )
362
+ return v
286
363
 
287
364
  def url(self, subpath) -> str:
288
365
  if subpath.startswith("/"):
289
- # in azure the path after schema is starts with bucket, wherefore it should not start with "/".
366
+ # in azure the path after schema is starts with container, wherefore it should not start with "/".
290
367
  subpath = subpath[1:]
291
- return f"az://{subpath}"
368
+ if self.container:
369
+ return f"az://{self.container}/{subpath}"
370
+ else:
371
+ return f"az://{subpath}"
292
372
 
293
373
  def secrets(self) -> dict:
294
374
  res = {}
@@ -460,3 +540,7 @@ def register_temporary_client_datastore_profile(profile: DatastoreProfile):
460
540
  It's beneficial for testing purposes.
461
541
  """
462
542
  TemporaryClientDatastoreProfiles().add(profile)
543
+
544
+
545
+ def remove_temporary_client_datastore_profile(profile_name: str):
546
+ TemporaryClientDatastoreProfiles().remove(profile_name)
@@ -132,7 +132,7 @@ class GoogleCloudStorageStore(DataStore):
132
132
  self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
133
133
 
134
134
  def get_spark_options(self):
135
- res = None
135
+ res = {}
136
136
  st = self.get_storage_options()
137
137
  if "token" in st:
138
138
  res = {"spark.hadoop.google.cloud.auth.service.account.enable": "true"}
mlrun/datastore/hdfs.py CHANGED
@@ -12,6 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import os
15
+ from urllib.parse import urlparse
15
16
 
16
17
  import fsspec
17
18
 
@@ -49,3 +50,7 @@ class HdfsStore(DataStore):
49
50
  @property
50
51
  def spark_url(self):
51
52
  return f"hdfs://{self.host}:{self.port}"
53
+
54
+ def rm(self, url, recursive=False, maxdepth=None):
55
+ path = urlparse(url).path
56
+ self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
mlrun/datastore/inmem.py CHANGED
@@ -80,8 +80,8 @@ class InMemoryStore(DataStore):
80
80
  reader = df_module.read_json
81
81
  else:
82
82
  raise mlrun.errors.MLRunInvalidArgumentError(f"file type unhandled {url}")
83
- # InMemoryStore store do not filter on time
84
- for field in ["time_column", "start_time", "end_time"]:
83
+ # InMemoryStore store don't pass filters
84
+ for field in ["time_column", "start_time", "end_time", "additional_filters"]:
85
85
  kwargs.pop(field, None)
86
86
 
87
87
  return reader(item, **kwargs)
mlrun/datastore/redis.py CHANGED
@@ -31,7 +31,7 @@ class RedisStore(DataStore):
31
31
  """
32
32
 
33
33
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
34
- REDIS_DEFAULT_PORT = "6379"
34
+ redis_default_port = "6379"
35
35
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
36
36
  self.headers = None
37
37
 
@@ -49,7 +49,7 @@ class RedisStore(DataStore):
49
49
  user = self._get_secret_or_env("REDIS_USER", "", credentials_prefix)
50
50
  password = self._get_secret_or_env("REDIS_PASSWORD", "", credentials_prefix)
51
51
  host = parsed_endpoint.hostname
52
- port = parsed_endpoint.port if parsed_endpoint.port else REDIS_DEFAULT_PORT
52
+ port = parsed_endpoint.port if parsed_endpoint.port else redis_default_port
53
53
  schema = parsed_endpoint.scheme
54
54
  if user or password:
55
55
  endpoint = f"{schema}://{user}:{password}@{host}:{port}"
mlrun/datastore/s3.py CHANGED
@@ -198,6 +198,11 @@ class S3Store(DataStore):
198
198
  bucket = self.s3.Bucket(bucket)
199
199
  return [obj.key[key_length:] for obj in bucket.objects.filter(Prefix=key)]
200
200
 
201
+ def rm(self, path, recursive=False, maxdepth=None):
202
+ bucket, key = self.get_bucket_and_key(path)
203
+ path = f"{bucket}/{key}"
204
+ self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
205
+
201
206
 
202
207
  def parse_s3_bucket_and_key(s3_path):
203
208
  try:
@@ -0,0 +1,43 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ #
15
+
16
+ import mlrun
17
+
18
+
19
+ def get_snowflake_password():
20
+ key = "SNOWFLAKE_PASSWORD"
21
+ snowflake_password = mlrun.get_secret_or_env(key)
22
+
23
+ if not snowflake_password:
24
+ raise mlrun.errors.MLRunInvalidArgumentError(
25
+ f"No password provided. Set password using the {key} "
26
+ "project secret or environment variable."
27
+ )
28
+
29
+ return snowflake_password
30
+
31
+
32
+ def get_snowflake_spark_options(attributes):
33
+ return {
34
+ "format": "net.snowflake.spark.snowflake",
35
+ "sfURL": attributes.get("url"),
36
+ "sfUser": attributes.get("user"),
37
+ "sfPassword": get_snowflake_password(),
38
+ "sfDatabase": attributes.get("database"),
39
+ "sfSchema": attributes.get("schema"),
40
+ "sfWarehouse": attributes.get("warehouse"),
41
+ "application": "iguazio_platform",
42
+ "TIMESTAMP_TYPE_MAPPING": "TIMESTAMP_LTZ",
43
+ }