mlrun 1.6.0rc20__py3-none-any.whl → 1.6.0rc22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (51) hide show
  1. mlrun/artifacts/base.py +6 -6
  2. mlrun/artifacts/dataset.py +15 -8
  3. mlrun/artifacts/manager.py +6 -3
  4. mlrun/artifacts/model.py +2 -2
  5. mlrun/artifacts/plots.py +8 -8
  6. mlrun/config.py +1 -1
  7. mlrun/data_types/to_pandas.py +1 -1
  8. mlrun/datastore/azure_blob.py +12 -16
  9. mlrun/datastore/base.py +32 -10
  10. mlrun/datastore/datastore_profile.py +4 -4
  11. mlrun/datastore/dbfs_store.py +12 -11
  12. mlrun/datastore/filestore.py +2 -1
  13. mlrun/datastore/google_cloud_storage.py +11 -10
  14. mlrun/datastore/redis.py +2 -1
  15. mlrun/datastore/s3.py +12 -15
  16. mlrun/datastore/sources.py +16 -11
  17. mlrun/datastore/targets.py +2 -13
  18. mlrun/datastore/v3io.py +18 -20
  19. mlrun/db/httpdb.py +76 -7
  20. mlrun/errors.py +4 -0
  21. mlrun/execution.py +13 -4
  22. mlrun/feature_store/api.py +3 -4
  23. mlrun/launcher/base.py +4 -4
  24. mlrun/lists.py +0 -6
  25. mlrun/model.py +8 -1
  26. mlrun/model_monitoring/api.py +9 -31
  27. mlrun/model_monitoring/batch.py +14 -13
  28. mlrun/model_monitoring/controller.py +100 -70
  29. mlrun/model_monitoring/controller_handler.py +1 -3
  30. mlrun/model_monitoring/helpers.py +65 -20
  31. mlrun/model_monitoring/stream_processing.py +0 -3
  32. mlrun/projects/operations.py +1 -1
  33. mlrun/projects/project.py +10 -4
  34. mlrun/runtimes/base.py +6 -1
  35. mlrun/runtimes/constants.py +11 -0
  36. mlrun/runtimes/databricks_job/databricks_runtime.py +7 -9
  37. mlrun/runtimes/kubejob.py +1 -1
  38. mlrun/runtimes/local.py +64 -53
  39. mlrun/runtimes/serving.py +8 -1
  40. mlrun/serving/routers.py +7 -20
  41. mlrun/serving/server.py +4 -14
  42. mlrun/serving/utils.py +0 -3
  43. mlrun/utils/helpers.py +10 -2
  44. mlrun/utils/logger.py +5 -5
  45. mlrun/utils/version/version.json +2 -2
  46. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/METADATA +5 -3
  47. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/RECORD +51 -51
  48. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/LICENSE +0 -0
  49. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/WHEEL +0 -0
  50. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/entry_points.txt +0 -0
  51. {mlrun-1.6.0rc20.dist-info → mlrun-1.6.0rc22.dist-info}/top_level.txt +0 -0
mlrun/artifacts/base.py CHANGED
@@ -714,10 +714,10 @@ class LinkArtifact(Artifact):
714
714
  self._spec = self._verify_dict(spec, "spec", LinkArtifactSpec)
715
715
 
716
716
 
717
- # TODO: remove in 1.6.0
717
+ # TODO: remove in 1.7.0
718
718
  @deprecated(
719
719
  version="1.3.0",
720
- reason="'LegacyArtifact' will be removed in 1.6.0, use 'Artifact' instead",
720
+ reason="'LegacyArtifact' will be removed in 1.7.0, use 'Artifact' instead",
721
721
  category=FutureWarning,
722
722
  )
723
723
  class LegacyArtifact(ModelObj):
@@ -880,10 +880,10 @@ class LegacyArtifact(ModelObj):
880
880
  return generate_target_path(self, artifact_path, producer)
881
881
 
882
882
 
883
- # TODO: remove in 1.6.0
883
+ # TODO: remove in 1.7.0
884
884
  @deprecated(
885
885
  version="1.3.0",
886
- reason="'LegacyDirArtifact' will be removed in 1.6.0, use 'DirArtifact' instead",
886
+ reason="'LegacyDirArtifact' will be removed in 1.7.0, use 'DirArtifact' instead",
887
887
  category=FutureWarning,
888
888
  )
889
889
  class LegacyDirArtifact(LegacyArtifact):
@@ -916,10 +916,10 @@ class LegacyDirArtifact(LegacyArtifact):
916
916
  mlrun.datastore.store_manager.object(url=target).upload(file_path)
917
917
 
918
918
 
919
- # TODO: remove in 1.6.0
919
+ # TODO: remove in 1.7.0
920
920
  @deprecated(
921
921
  version="1.3.0",
922
- reason="'LegacyLinkArtifact' will be removed in 1.6.0, use 'LinkArtifact' instead",
922
+ reason="'LegacyLinkArtifact' will be removed in 1.7.0, use 'LinkArtifact' instead",
923
923
  category=FutureWarning,
924
924
  )
925
925
  class LegacyLinkArtifact(LegacyArtifact):
@@ -283,14 +283,16 @@ class DatasetArtifact(Artifact):
283
283
  if artifact.spec.length > preview_rows_length and not ignore_preview_limits:
284
284
  preview_df = df.head(preview_rows_length)
285
285
 
286
- # reset index while dropping existing index
287
- # that way it wont create another index if one already there
288
- preview_df = preview_df.reset_index(drop=True)
286
+ preview_df = preview_df.reset_index()
289
287
  artifact.status.header_original_length = len(preview_df.columns)
290
288
  if len(preview_df.columns) > max_preview_columns and not ignore_preview_limits:
291
289
  preview_df = preview_df.iloc[:, :max_preview_columns]
292
290
  artifact.spec.header = preview_df.columns.values.tolist()
293
291
  artifact.status.preview = preview_df.values.tolist()
292
+ # Table schema parsing doesn't require a column named "index"
293
+ # to align its output with previously generated header and preview data
294
+ if "index" in preview_df.columns:
295
+ preview_df.drop("index", axis=1, inplace=True)
294
296
  artifact.spec.schema = build_table_schema(preview_df)
295
297
 
296
298
  # set artifact stats if stats is explicitly set to true, or if stats is None and the dataframe is small
@@ -344,10 +346,10 @@ class DatasetArtifact(Artifact):
344
346
  self.status.stats = stats
345
347
 
346
348
 
347
- # TODO: remove in 1.6.0
349
+ # TODO: remove in 1.7.0
348
350
  @deprecated(
349
351
  version="1.3.0",
350
- reason="'LegacyTableArtifact' will be removed in 1.6.0, use 'TableArtifact' instead",
352
+ reason="'LegacyTableArtifact' will be removed in 1.7.0, use 'TableArtifact' instead",
351
353
  category=FutureWarning,
352
354
  )
353
355
  class LegacyTableArtifact(LegacyArtifact):
@@ -400,10 +402,10 @@ class LegacyTableArtifact(LegacyArtifact):
400
402
  return csv_buffer.getvalue()
401
403
 
402
404
 
403
- # TODO: remove in 1.6.0
405
+ # TODO: remove in 1.7.0
404
406
  @deprecated(
405
407
  version="1.3.0",
406
- reason="'LegacyDatasetArtifact' will be removed in 1.6.0, use 'DatasetArtifact' instead",
408
+ reason="'LegacyDatasetArtifact' will be removed in 1.7.0, use 'DatasetArtifact' instead",
407
409
  category=FutureWarning,
408
410
  )
409
411
  class LegacyDatasetArtifact(LegacyArtifact):
@@ -513,11 +515,16 @@ class LegacyDatasetArtifact(LegacyArtifact):
513
515
 
514
516
  if artifact.length > preview_rows_length and not ignore_preview_limits:
515
517
  preview_df = df.head(preview_rows_length)
516
- preview_df = preview_df.reset_index(drop=True)
518
+
519
+ preview_df = preview_df.reset_index()
517
520
  if len(preview_df.columns) > max_preview_columns and not ignore_preview_limits:
518
521
  preview_df = preview_df.iloc[:, :max_preview_columns]
519
522
  artifact.header = preview_df.columns.values.tolist()
520
523
  artifact.preview = preview_df.values.tolist()
524
+ # Table schema parsing doesn't require a column named "index"
525
+ # to align its output with previously generated header and preview data
526
+ if "index" in preview_df.columns:
527
+ preview_df.drop("index", axis=1, inplace=True)
521
528
  artifact.schema = build_table_schema(preview_df)
522
529
  if (
523
530
  stats
@@ -66,7 +66,7 @@ artifact_types = {
66
66
  "bokeh": BokehArtifact,
67
67
  }
68
68
 
69
- # TODO - Remove this when legacy types are deleted in 1.6.0
69
+ # TODO - Remove this when legacy types are deleted in 1.7.0
70
70
  legacy_artifact_types = {
71
71
  "": LegacyArtifact,
72
72
  "dir": LegacyDirArtifact,
@@ -200,8 +200,11 @@ class ArtifactManager:
200
200
  # and receive back all the runs that are associated with his search result.
201
201
  db_key = producer.name + "_" + key
202
202
  else:
203
- db_key = key
204
- item.db_key = db_key if db_key else ""
203
+ # if the db_key is not explicitly set on the item, we want to use the key as the db_key
204
+ # otherwise, we do not want to override it.
205
+ # this is mainly relevant for imported artifacts that have an explicit db_key value already set
206
+ db_key = item.db_key or key
207
+ item.db_key = db_key or ""
205
208
  item.viewer = viewer or item.viewer
206
209
  item.tree = producer.tag
207
210
  item.tag = tag or item.tag
mlrun/artifacts/model.py CHANGED
@@ -390,10 +390,10 @@ class ModelArtifact(Artifact):
390
390
  return mlrun.get_dataitem(target_model_path).get()
391
391
 
392
392
 
393
- # TODO: remove in 1.6.0
393
+ # TODO: remove in 1.7.0
394
394
  @deprecated(
395
395
  version="1.3.0",
396
- reason="'LegacyModelArtifact' will be removed in 1.6.0, use 'ModelArtifact' instead",
396
+ reason="'LegacyModelArtifact' will be removed in 1.7.0, use 'ModelArtifact' instead",
397
397
  category=FutureWarning,
398
398
  )
399
399
  class LegacyModelArtifact(LegacyArtifact):
mlrun/artifacts/plots.py CHANGED
@@ -256,10 +256,10 @@ class PlotlyArtifact(Artifact):
256
256
  return self._figure.to_html()
257
257
 
258
258
 
259
- # TODO: remove in 1.6.0
259
+ # TODO: remove in 1.7.0
260
260
  @deprecated(
261
261
  version="1.3.0",
262
- reason="'LegacyPlotArtifact' will be removed in 1.6.0, use 'PlotArtifact' instead",
262
+ reason="'LegacyPlotArtifact' will be removed in 1.7.0, use 'PlotArtifact' instead",
263
263
  category=FutureWarning,
264
264
  )
265
265
  class LegacyPlotArtifact(LegacyArtifact):
@@ -303,10 +303,10 @@ class LegacyPlotArtifact(LegacyArtifact):
303
303
  return self._TEMPLATE.format(self.description or self.key, self.key, data_uri)
304
304
 
305
305
 
306
- # TODO: remove in 1.6.0
306
+ # TODO: remove in 1.7.0
307
307
  @deprecated(
308
308
  version="1.3.0",
309
- reason="'LegacyChartArtifact' will be removed in 1.6.0, use 'ChartArtifact' instead",
309
+ reason="'LegacyChartArtifact' will be removed in 1.7.0, use 'ChartArtifact' instead",
310
310
  category=FutureWarning,
311
311
  )
312
312
  class LegacyChartArtifact(LegacyArtifact):
@@ -377,10 +377,10 @@ class LegacyChartArtifact(LegacyArtifact):
377
377
  )
378
378
 
379
379
 
380
- # TODO: remove in 1.6.0
380
+ # TODO: remove in 1.7.0
381
381
  @deprecated(
382
382
  version="1.3.0",
383
- reason="'LegacyBokehArtifact' will be removed in 1.6.0, use 'BokehArtifact' instead",
383
+ reason="'LegacyBokehArtifact' will be removed in 1.7.0, use 'BokehArtifact' instead",
384
384
  category=FutureWarning,
385
385
  )
386
386
  class LegacyBokehArtifact(LegacyArtifact):
@@ -433,10 +433,10 @@ class LegacyBokehArtifact(LegacyArtifact):
433
433
  return file_html(self._figure, CDN, self.key)
434
434
 
435
435
 
436
- # TODO: remove in 1.6.0
436
+ # TODO: remove in 1.7.0
437
437
  @deprecated(
438
438
  version="1.3.0",
439
- reason="'LegacyPlotlyArtifact' will be removed in 1.6.0, use 'PlotlyArtifact' instead",
439
+ reason="'LegacyPlotlyArtifact' will be removed in 1.7.0, use 'PlotlyArtifact' instead",
440
440
  category=FutureWarning,
441
441
  )
442
442
  class LegacyPlotlyArtifact(LegacyArtifact):
mlrun/config.py CHANGED
@@ -278,7 +278,7 @@ default_config = {
278
278
  "real_path": "",
279
279
  # comma delimited prefixes of paths allowed through the /files API (v3io & the real_path are always allowed).
280
280
  # These paths must be schemas (cannot be used for local files). For example "s3://mybucket,gcs://"
281
- "allowed_file_paths": "s3://,gcs://,gs://,az://,dbfs://",
281
+ "allowed_file_paths": "s3://,gcs://,gs://,az://,dbfs://,ds://",
282
282
  "db_type": "sqldb",
283
283
  "max_workers": 64,
284
284
  # See mlrun.common.schemas.APIStates for options
@@ -178,7 +178,7 @@ def toPandas(spark_df):
178
178
  if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
179
179
  dtype[fieldIdx] = np.float64
180
180
  if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
181
- dtype[fieldIdx] = np.object
181
+ dtype[fieldIdx] = object
182
182
 
183
183
  df = pd.DataFrame()
184
184
  for index, t in enumerate(dtype):
@@ -20,7 +20,6 @@ from azure.storage.blob._shared.base_client import parse_connection_str
20
20
  from fsspec.registry import get_filesystem_class
21
21
 
22
22
  import mlrun.errors
23
- from mlrun.errors import err_to_str
24
23
 
25
24
  from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
26
25
 
@@ -33,20 +32,16 @@ class AzureBlobStore(DataStore):
33
32
 
34
33
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
35
34
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
36
- self.get_filesystem()
37
35
 
38
- def get_filesystem(self, silent=True):
36
+ @property
37
+ def filesystem(self):
39
38
  """return fsspec file system object, if supported"""
40
39
  if self._filesystem:
41
40
  return self._filesystem
42
41
  try:
43
42
  import adlfs # noqa
44
43
  except ImportError as exc:
45
- if not silent:
46
- raise ImportError(
47
- f"Azure adlfs not installed, run pip install adlfs, {err_to_str(exc)}"
48
- )
49
- return None
44
+ raise ImportError("Azure adlfs not installed") from exc
50
45
  # in order to support az and wasbs kinds.
51
46
  filesystem_class = get_filesystem_class(protocol=self.kind)
52
47
  self._filesystem = makeDatastoreSchemaSanitizer(
@@ -57,7 +52,7 @@ class AzureBlobStore(DataStore):
57
52
  return self._filesystem
58
53
 
59
54
  def get_storage_options(self):
60
- return dict(
55
+ res = dict(
61
56
  account_name=self._get_secret_or_env("account_name")
62
57
  or self._get_secret_or_env("AZURE_STORAGE_ACCOUNT_NAME"),
63
58
  account_key=self._get_secret_or_env("account_key")
@@ -74,6 +69,7 @@ class AzureBlobStore(DataStore):
74
69
  or self._get_secret_or_env("AZURE_STORAGE_SAS_TOKEN"),
75
70
  credential=self._get_secret_or_env("credential"),
76
71
  )
72
+ return self._sanitize_storage_options(res)
77
73
 
78
74
  def _convert_key_to_remote_path(self, key):
79
75
  key = key.strip("/")
@@ -86,12 +82,12 @@ class AzureBlobStore(DataStore):
86
82
 
87
83
  def upload(self, key, src_path):
88
84
  remote_path = self._convert_key_to_remote_path(key)
89
- self._filesystem.put_file(src_path, remote_path, overwrite=True)
85
+ self.filesystem.put_file(src_path, remote_path, overwrite=True)
90
86
 
91
87
  def get(self, key, size=None, offset=0):
92
88
  remote_path = self._convert_key_to_remote_path(key)
93
89
  end = offset + size if size else None
94
- blob = self._filesystem.cat_file(remote_path, start=offset, end=end)
90
+ blob = self.filesystem.cat_file(remote_path, start=offset, end=end)
95
91
  return blob
96
92
 
97
93
  def put(self, key, data, append=False):
@@ -106,12 +102,12 @@ class AzureBlobStore(DataStore):
106
102
  mode = "w"
107
103
  else:
108
104
  raise TypeError("Data type unknown. Unable to put in Azure!")
109
- with self._filesystem.open(remote_path, mode) as f:
105
+ with self.filesystem.open(remote_path, mode) as f:
110
106
  f.write(data)
111
107
 
112
108
  def stat(self, key):
113
109
  remote_path = self._convert_key_to_remote_path(key)
114
- files = self._filesystem.ls(remote_path, detail=True)
110
+ files = self.filesystem.ls(remote_path, detail=True)
115
111
  if len(files) == 1 and files[0]["type"] == "file":
116
112
  size = files[0]["size"]
117
113
  modified = files[0]["last_modified"]
@@ -123,10 +119,10 @@ class AzureBlobStore(DataStore):
123
119
 
124
120
  def listdir(self, key):
125
121
  remote_path = self._convert_key_to_remote_path(key)
126
- if self._filesystem.isfile(remote_path):
122
+ if self.filesystem.isfile(remote_path):
127
123
  return key
128
124
  remote_path = f"{remote_path}/**"
129
- files = self._filesystem.glob(remote_path)
125
+ files = self.filesystem.glob(remote_path)
130
126
  key_length = len(key)
131
127
  files = [
132
128
  f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
@@ -149,7 +145,7 @@ class AzureBlobStore(DataStore):
149
145
  for key in ["account_name", "account_key"]:
150
146
  parsed_value = parsed_credential.get(key)
151
147
  if parsed_value:
152
- if st[key] and st[key] != parsed_value:
148
+ if key in st and st[key] != parsed_value:
153
149
  if key == "account_name":
154
150
  raise mlrun.errors.MLRunInvalidArgumentError(
155
151
  f"Storage option for '{key}' is '{st[key]}',\
mlrun/datastore/base.py CHANGED
@@ -25,6 +25,7 @@ import pyarrow
25
25
  import pytz
26
26
  import requests
27
27
  import urllib3
28
+ from deprecated import deprecated
28
29
 
29
30
  import mlrun.errors
30
31
  from mlrun.errors import err_to_str
@@ -71,16 +72,24 @@ class DataStore:
71
72
  def is_unstructured(self):
72
73
  return True
73
74
 
75
+ @staticmethod
76
+ def _sanitize_storage_options(options):
77
+ if not options:
78
+ return {}
79
+ options = {k: v for k, v in options.items() if v is not None and v != ""}
80
+ return options
81
+
74
82
  @staticmethod
75
83
  def _sanitize_url(url):
76
84
  """
77
85
  Extract only the schema, netloc, and path from an input URL if they exist,
78
86
  excluding parameters, query, or fragments.
79
87
  """
88
+ if not url:
89
+ raise mlrun.errors.MLRunInvalidArgumentError("Cannot parse an empty URL")
80
90
  parsed_url = urllib.parse.urlparse(url)
81
- scheme = f"{parsed_url.scheme}:" if parsed_url.scheme else ""
82
91
  netloc = f"//{parsed_url.netloc}" if parsed_url.netloc else "//"
83
- return f"{scheme}{netloc}{parsed_url.path}"
92
+ return f"{parsed_url.scheme}:{netloc}{parsed_url.path}"
84
93
 
85
94
  @staticmethod
86
95
  def uri_to_kfp(endpoint, subpath):
@@ -90,7 +99,18 @@ class DataStore:
90
99
  def uri_to_ipython(endpoint, subpath):
91
100
  return ""
92
101
 
93
- def get_filesystem(self, silent=True) -> Optional[fsspec.AbstractFileSystem]:
102
+ # TODO: remove in 1.8.0
103
+ @deprecated(
104
+ version="1.8.0",
105
+ reason="'get_filesystem()' will be removed in 1.8.0, use "
106
+ "'filesystem' property instead",
107
+ category=FutureWarning,
108
+ )
109
+ def get_filesystem(self):
110
+ return self.filesystem
111
+
112
+ @property
113
+ def filesystem(self) -> Optional[fsspec.AbstractFileSystem]:
94
114
  """return fsspec file system object, if supported"""
95
115
  return None
96
116
 
@@ -106,10 +126,10 @@ class DataStore:
106
126
 
107
127
  def get_storage_options(self):
108
128
  """get fsspec storage options"""
109
- return None
129
+ return self._sanitize_storage_options(None)
110
130
 
111
131
  def open(self, filepath, mode):
112
- file_system = self.get_filesystem(False)
132
+ file_system = self.filesystem
113
133
  return file_system.open(filepath, mode)
114
134
 
115
135
  def _join(self, key):
@@ -230,7 +250,7 @@ class DataStore:
230
250
  df_module = df_module or pd
231
251
  file_url = self._sanitize_url(url)
232
252
  is_csv, is_json, drop_time_column = False, False, False
233
- file_system = self.get_filesystem()
253
+ file_system = self.filesystem
234
254
  if file_url.endswith(".csv") or format == "csv":
235
255
  is_csv = True
236
256
  drop_time_column = False
@@ -355,7 +375,7 @@ class DataStore:
355
375
  }
356
376
 
357
377
  def rm(self, path, recursive=False, maxdepth=None):
358
- self.get_filesystem().rm(path=path, recursive=recursive, maxdepth=maxdepth)
378
+ self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
359
379
 
360
380
  @staticmethod
361
381
  def _is_dd(df_module):
@@ -645,9 +665,10 @@ def http_head(url, headers=None, auth=None):
645
665
  return response.headers
646
666
 
647
667
 
648
- def http_put(url, data, headers=None, auth=None):
668
+ def http_put(url, data, headers=None, auth=None, session=None):
649
669
  try:
650
- response = requests.put(
670
+ put_api = session.put if session else requests.put
671
+ response = put_api(
651
672
  url, data=data, headers=headers, auth=auth, verify=verify_ssl
652
673
  )
653
674
  except OSError as exc:
@@ -671,7 +692,8 @@ class HttpStore(DataStore):
671
692
  self._enrich_https_token()
672
693
  self._validate_https_token()
673
694
 
674
- def get_filesystem(self, silent=True):
695
+ @property
696
+ def filesystem(self):
675
697
  """return fsspec file system object, if supported"""
676
698
  if not self._filesystem:
677
699
  self._filesystem = fsspec.filesystem("http")
@@ -131,18 +131,18 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
131
131
 
132
132
  class DatastoreProfileS3(DatastoreProfile):
133
133
  type: str = pydantic.Field("s3")
134
- _private_attributes = ("access_key", "secret_key")
134
+ _private_attributes = ("access_key_id", "secret_key")
135
135
  endpoint_url: typing.Optional[str] = None
136
136
  force_non_anonymous: typing.Optional[str] = None
137
137
  profile_name: typing.Optional[str] = None
138
138
  assume_role_arn: typing.Optional[str] = None
139
- access_key: typing.Optional[str] = None
139
+ access_key_id: typing.Optional[str] = None
140
140
  secret_key: typing.Optional[str] = None
141
141
 
142
142
  def secrets(self) -> dict:
143
143
  res = {}
144
- if self.access_key:
145
- res["AWS_ACCESS_KEY_ID"] = self.access_key
144
+ if self.access_key_id:
145
+ res["AWS_ACCESS_KEY_ID"] = self.access_key_id
146
146
  if self.secret_key:
147
147
  res["AWS_SECRET_ACCESS_KEY"] = self.secret_key
148
148
  if self.endpoint_url:
@@ -83,9 +83,9 @@ class DatabricksFileSystemDisableCache(DatabricksFileSystem):
83
83
  class DBFSStore(DataStore):
84
84
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
85
85
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
86
- self.get_filesystem(silent=False)
87
86
 
88
- def get_filesystem(self, silent=True):
87
+ @property
88
+ def filesystem(self):
89
89
  """return fsspec file system object, if supported"""
90
90
  filesystem_class = get_filesystem_class(protocol=self.kind)
91
91
  if not self._filesystem:
@@ -97,13 +97,14 @@ class DBFSStore(DataStore):
97
97
  return self._filesystem
98
98
 
99
99
  def get_storage_options(self):
100
- return dict(
100
+ res = dict(
101
101
  token=self._get_secret_or_env("DATABRICKS_TOKEN"),
102
102
  instance=self._get_secret_or_env("DATABRICKS_HOST"),
103
103
  )
104
+ return self._sanitize_storage_options(res)
104
105
 
105
106
  def _verify_filesystem_and_key(self, key: str):
106
- if not self._filesystem:
107
+ if not self.filesystem:
107
108
  raise mlrun.errors.MLRunInvalidArgumentError(
108
109
  "Performing actions on data-item without a valid filesystem"
109
110
  )
@@ -120,7 +121,7 @@ class DBFSStore(DataStore):
120
121
  raise mlrun.errors.MLRunInvalidArgumentError("offset cannot be None")
121
122
  start = offset or None
122
123
  end = offset + size if size else None
123
- return self._filesystem.cat_file(key, start=start, end=end)
124
+ return self.filesystem.cat_file(key, start=start, end=end)
124
125
 
125
126
  def put(self, key, data, append=False):
126
127
  self._verify_filesystem_and_key(key)
@@ -134,16 +135,16 @@ class DBFSStore(DataStore):
134
135
  mode += "b"
135
136
  elif not isinstance(data, str):
136
137
  raise TypeError(f"Unknown data type {type(data)}")
137
- with self._filesystem.open(key, mode) as f:
138
+ with self.filesystem.open(key, mode) as f:
138
139
  f.write(data)
139
140
 
140
141
  def upload(self, key: str, src_path: str):
141
142
  self._verify_filesystem_and_key(key)
142
- self._filesystem.put_file(src_path, key, overwrite=True)
143
+ self.filesystem.put_file(src_path, key, overwrite=True)
143
144
 
144
145
  def stat(self, key: str):
145
146
  self._verify_filesystem_and_key(key)
146
- file = self._filesystem.stat(key)
147
+ file = self.filesystem.stat(key)
147
148
  if file["type"] == "file":
148
149
  size = file["size"]
149
150
  elif file["type"] == "directory":
@@ -155,10 +156,10 @@ class DBFSStore(DataStore):
155
156
  Basic ls of file/dir - without recursion.
156
157
  """
157
158
  self._verify_filesystem_and_key(key)
158
- if self._filesystem.isfile(key):
159
+ if self.filesystem.isfile(key):
159
160
  return key
160
161
  remote_path = f"{key}/*"
161
- files = self._filesystem.glob(remote_path)
162
+ files = self.filesystem.glob(remote_path)
162
163
  # Get only the files and directories under key path, without the key path itself.
163
164
  # for example in a filesystem that has this path: /test_mlrun_dbfs_objects/test.txt
164
165
  # listdir with the input /test_mlrun_dbfs_objects as a key will return ['test.txt'].
@@ -170,4 +171,4 @@ class DBFSStore(DataStore):
170
171
  raise mlrun.errors.MLRunInvalidArgumentError(
171
172
  "dbfs file system does not support maxdepth option in rm function"
172
173
  )
173
- self.get_filesystem().rm(path=path, recursive=recursive)
174
+ self.filesystem.rm(path=path, recursive=recursive)
@@ -47,7 +47,8 @@ class FileStore(DataStore):
47
47
  key = path.join(self._real_path, suffix)
48
48
  return path.join(self.subpath, key)
49
49
 
50
- def get_filesystem(self, silent=True):
50
+ @property
51
+ def filesystem(self):
51
52
  """return fsspec file system object, if supported"""
52
53
  if not self._filesystem:
53
54
  self._filesystem = fsspec.filesystem("file")
@@ -30,7 +30,8 @@ class GoogleCloudStorageStore(DataStore):
30
30
  def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
31
31
  super().__init__(parent, name, schema, endpoint, secrets=secrets)
32
32
 
33
- def get_filesystem(self):
33
+ @property
34
+ def filesystem(self):
34
35
  """return fsspec file system object, if supported"""
35
36
  if self._filesystem:
36
37
  return self._filesystem
@@ -59,12 +60,12 @@ class GoogleCloudStorageStore(DataStore):
59
60
  except json.JSONDecodeError:
60
61
  # If it's not json, handle it as a filename
61
62
  token = credentials
62
- return dict(token=token)
63
+ return self._sanitize_storage_options(dict(token=token))
63
64
  else:
64
65
  logger.info(
65
66
  "No GCS credentials available - auth will rely on auto-discovery of credentials"
66
67
  )
67
- return None
68
+ return self._sanitize_storage_options(None)
68
69
 
69
70
  def _make_path(self, key):
70
71
  key = key.strip("/")
@@ -75,7 +76,7 @@ class GoogleCloudStorageStore(DataStore):
75
76
  path = self._make_path(key)
76
77
 
77
78
  end = offset + size if size else None
78
- blob = self.get_filesystem().cat_file(path, start=offset, end=end)
79
+ blob = self.filesystem.cat_file(path, start=offset, end=end)
79
80
  return blob
80
81
 
81
82
  def put(self, key, data, append=False):
@@ -94,17 +95,17 @@ class GoogleCloudStorageStore(DataStore):
94
95
  raise TypeError(
95
96
  "Data type unknown. Unable to put in Google cloud storage!"
96
97
  )
97
- with self.get_filesystem().open(path, mode) as f:
98
+ with self.filesystem.open(path, mode) as f:
98
99
  f.write(data)
99
100
 
100
101
  def upload(self, key, src_path):
101
102
  path = self._make_path(key)
102
- self.get_filesystem().put_file(src_path, path, overwrite=True)
103
+ self.filesystem.put_file(src_path, path, overwrite=True)
103
104
 
104
105
  def stat(self, key):
105
106
  path = self._make_path(key)
106
107
 
107
- files = self.get_filesystem().ls(path, detail=True)
108
+ files = self.filesystem.ls(path, detail=True)
108
109
  if len(files) == 1 and files[0]["type"] == "file":
109
110
  size = files[0]["size"]
110
111
  modified = files[0]["updated"]
@@ -116,10 +117,10 @@ class GoogleCloudStorageStore(DataStore):
116
117
 
117
118
  def listdir(self, key):
118
119
  path = self._make_path(key)
119
- if self.get_filesystem().isfile(path):
120
+ if self.filesystem.isfile(path):
120
121
  return key
121
122
  remote_path = f"{path}/**"
122
- files = self.get_filesystem().glob(remote_path)
123
+ files = self.filesystem.glob(remote_path)
123
124
  key_length = len(key)
124
125
  files = [
125
126
  f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
@@ -128,7 +129,7 @@ class GoogleCloudStorageStore(DataStore):
128
129
 
129
130
  def rm(self, path, recursive=False, maxdepth=None):
130
131
  path = self._make_path(path)
131
- self.get_filesystem().rm(path=path, recursive=recursive, maxdepth=maxdepth)
132
+ self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
132
133
 
133
134
  def get_spark_options(self):
134
135
  res = None
mlrun/datastore/redis.py CHANGED
@@ -73,7 +73,8 @@ class RedisStore(DataStore):
73
73
 
74
74
  return self._redis
75
75
 
76
- def get_filesystem(self, silent):
76
+ @property
77
+ def filesystem(self):
77
78
  return None # no support for fsspec
78
79
 
79
80
  def supports_isdir(self):