mlrun 1.6.0rc21__py3-none-any.whl → 1.6.0rc22__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/base.py +6 -6
- mlrun/artifacts/dataset.py +15 -8
- mlrun/artifacts/manager.py +1 -1
- mlrun/artifacts/model.py +2 -2
- mlrun/artifacts/plots.py +8 -8
- mlrun/datastore/azure_blob.py +9 -14
- mlrun/datastore/base.py +21 -7
- mlrun/datastore/dbfs_store.py +10 -10
- mlrun/datastore/filestore.py +2 -1
- mlrun/datastore/google_cloud_storage.py +9 -8
- mlrun/datastore/redis.py +2 -1
- mlrun/datastore/s3.py +3 -6
- mlrun/datastore/sources.py +2 -12
- mlrun/datastore/targets.py +2 -13
- mlrun/datastore/v3io.py +16 -19
- mlrun/db/httpdb.py +8 -1
- mlrun/execution.py +14 -5
- mlrun/feature_store/api.py +3 -4
- mlrun/launcher/base.py +4 -4
- mlrun/lists.py +0 -6
- mlrun/model.py +8 -1
- mlrun/model_monitoring/api.py +9 -31
- mlrun/model_monitoring/batch.py +14 -13
- mlrun/model_monitoring/controller.py +91 -69
- mlrun/model_monitoring/controller_handler.py +1 -3
- mlrun/model_monitoring/helpers.py +19 -8
- mlrun/model_monitoring/stream_processing.py +0 -3
- mlrun/projects/operations.py +1 -1
- mlrun/projects/project.py +5 -4
- mlrun/runtimes/base.py +6 -1
- mlrun/runtimes/constants.py +11 -0
- mlrun/runtimes/kubejob.py +1 -1
- mlrun/runtimes/local.py +64 -53
- mlrun/serving/routers.py +7 -20
- mlrun/serving/server.py +4 -14
- mlrun/serving/utils.py +0 -3
- mlrun/utils/helpers.py +5 -2
- mlrun/utils/logger.py +5 -5
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/METADATA +3 -1
- {mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/RECORD +45 -45
- {mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/LICENSE +0 -0
- {mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/WHEEL +0 -0
- {mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.0rc21.dist-info → mlrun-1.6.0rc22.dist-info}/top_level.txt +0 -0
mlrun/artifacts/base.py
CHANGED
|
@@ -714,10 +714,10 @@ class LinkArtifact(Artifact):
|
|
|
714
714
|
self._spec = self._verify_dict(spec, "spec", LinkArtifactSpec)
|
|
715
715
|
|
|
716
716
|
|
|
717
|
-
# TODO: remove in 1.
|
|
717
|
+
# TODO: remove in 1.7.0
|
|
718
718
|
@deprecated(
|
|
719
719
|
version="1.3.0",
|
|
720
|
-
reason="'LegacyArtifact' will be removed in 1.
|
|
720
|
+
reason="'LegacyArtifact' will be removed in 1.7.0, use 'Artifact' instead",
|
|
721
721
|
category=FutureWarning,
|
|
722
722
|
)
|
|
723
723
|
class LegacyArtifact(ModelObj):
|
|
@@ -880,10 +880,10 @@ class LegacyArtifact(ModelObj):
|
|
|
880
880
|
return generate_target_path(self, artifact_path, producer)
|
|
881
881
|
|
|
882
882
|
|
|
883
|
-
# TODO: remove in 1.
|
|
883
|
+
# TODO: remove in 1.7.0
|
|
884
884
|
@deprecated(
|
|
885
885
|
version="1.3.0",
|
|
886
|
-
reason="'LegacyDirArtifact' will be removed in 1.
|
|
886
|
+
reason="'LegacyDirArtifact' will be removed in 1.7.0, use 'DirArtifact' instead",
|
|
887
887
|
category=FutureWarning,
|
|
888
888
|
)
|
|
889
889
|
class LegacyDirArtifact(LegacyArtifact):
|
|
@@ -916,10 +916,10 @@ class LegacyDirArtifact(LegacyArtifact):
|
|
|
916
916
|
mlrun.datastore.store_manager.object(url=target).upload(file_path)
|
|
917
917
|
|
|
918
918
|
|
|
919
|
-
# TODO: remove in 1.
|
|
919
|
+
# TODO: remove in 1.7.0
|
|
920
920
|
@deprecated(
|
|
921
921
|
version="1.3.0",
|
|
922
|
-
reason="'LegacyLinkArtifact' will be removed in 1.
|
|
922
|
+
reason="'LegacyLinkArtifact' will be removed in 1.7.0, use 'LinkArtifact' instead",
|
|
923
923
|
category=FutureWarning,
|
|
924
924
|
)
|
|
925
925
|
class LegacyLinkArtifact(LegacyArtifact):
|
mlrun/artifacts/dataset.py
CHANGED
|
@@ -283,14 +283,16 @@ class DatasetArtifact(Artifact):
|
|
|
283
283
|
if artifact.spec.length > preview_rows_length and not ignore_preview_limits:
|
|
284
284
|
preview_df = df.head(preview_rows_length)
|
|
285
285
|
|
|
286
|
-
|
|
287
|
-
# that way it wont create another index if one already there
|
|
288
|
-
preview_df = preview_df.reset_index(drop=True)
|
|
286
|
+
preview_df = preview_df.reset_index()
|
|
289
287
|
artifact.status.header_original_length = len(preview_df.columns)
|
|
290
288
|
if len(preview_df.columns) > max_preview_columns and not ignore_preview_limits:
|
|
291
289
|
preview_df = preview_df.iloc[:, :max_preview_columns]
|
|
292
290
|
artifact.spec.header = preview_df.columns.values.tolist()
|
|
293
291
|
artifact.status.preview = preview_df.values.tolist()
|
|
292
|
+
# Table schema parsing doesn't require a column named "index"
|
|
293
|
+
# to align its output with previously generated header and preview data
|
|
294
|
+
if "index" in preview_df.columns:
|
|
295
|
+
preview_df.drop("index", axis=1, inplace=True)
|
|
294
296
|
artifact.spec.schema = build_table_schema(preview_df)
|
|
295
297
|
|
|
296
298
|
# set artifact stats if stats is explicitly set to true, or if stats is None and the dataframe is small
|
|
@@ -344,10 +346,10 @@ class DatasetArtifact(Artifact):
|
|
|
344
346
|
self.status.stats = stats
|
|
345
347
|
|
|
346
348
|
|
|
347
|
-
# TODO: remove in 1.
|
|
349
|
+
# TODO: remove in 1.7.0
|
|
348
350
|
@deprecated(
|
|
349
351
|
version="1.3.0",
|
|
350
|
-
reason="'LegacyTableArtifact' will be removed in 1.
|
|
352
|
+
reason="'LegacyTableArtifact' will be removed in 1.7.0, use 'TableArtifact' instead",
|
|
351
353
|
category=FutureWarning,
|
|
352
354
|
)
|
|
353
355
|
class LegacyTableArtifact(LegacyArtifact):
|
|
@@ -400,10 +402,10 @@ class LegacyTableArtifact(LegacyArtifact):
|
|
|
400
402
|
return csv_buffer.getvalue()
|
|
401
403
|
|
|
402
404
|
|
|
403
|
-
# TODO: remove in 1.
|
|
405
|
+
# TODO: remove in 1.7.0
|
|
404
406
|
@deprecated(
|
|
405
407
|
version="1.3.0",
|
|
406
|
-
reason="'LegacyDatasetArtifact' will be removed in 1.
|
|
408
|
+
reason="'LegacyDatasetArtifact' will be removed in 1.7.0, use 'DatasetArtifact' instead",
|
|
407
409
|
category=FutureWarning,
|
|
408
410
|
)
|
|
409
411
|
class LegacyDatasetArtifact(LegacyArtifact):
|
|
@@ -513,11 +515,16 @@ class LegacyDatasetArtifact(LegacyArtifact):
|
|
|
513
515
|
|
|
514
516
|
if artifact.length > preview_rows_length and not ignore_preview_limits:
|
|
515
517
|
preview_df = df.head(preview_rows_length)
|
|
516
|
-
|
|
518
|
+
|
|
519
|
+
preview_df = preview_df.reset_index()
|
|
517
520
|
if len(preview_df.columns) > max_preview_columns and not ignore_preview_limits:
|
|
518
521
|
preview_df = preview_df.iloc[:, :max_preview_columns]
|
|
519
522
|
artifact.header = preview_df.columns.values.tolist()
|
|
520
523
|
artifact.preview = preview_df.values.tolist()
|
|
524
|
+
# Table schema parsing doesn't require a column named "index"
|
|
525
|
+
# to align its output with previously generated header and preview data
|
|
526
|
+
if "index" in preview_df.columns:
|
|
527
|
+
preview_df.drop("index", axis=1, inplace=True)
|
|
521
528
|
artifact.schema = build_table_schema(preview_df)
|
|
522
529
|
if (
|
|
523
530
|
stats
|
mlrun/artifacts/manager.py
CHANGED
|
@@ -66,7 +66,7 @@ artifact_types = {
|
|
|
66
66
|
"bokeh": BokehArtifact,
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
-
# TODO - Remove this when legacy types are deleted in 1.
|
|
69
|
+
# TODO - Remove this when legacy types are deleted in 1.7.0
|
|
70
70
|
legacy_artifact_types = {
|
|
71
71
|
"": LegacyArtifact,
|
|
72
72
|
"dir": LegacyDirArtifact,
|
mlrun/artifacts/model.py
CHANGED
|
@@ -390,10 +390,10 @@ class ModelArtifact(Artifact):
|
|
|
390
390
|
return mlrun.get_dataitem(target_model_path).get()
|
|
391
391
|
|
|
392
392
|
|
|
393
|
-
# TODO: remove in 1.
|
|
393
|
+
# TODO: remove in 1.7.0
|
|
394
394
|
@deprecated(
|
|
395
395
|
version="1.3.0",
|
|
396
|
-
reason="'LegacyModelArtifact' will be removed in 1.
|
|
396
|
+
reason="'LegacyModelArtifact' will be removed in 1.7.0, use 'ModelArtifact' instead",
|
|
397
397
|
category=FutureWarning,
|
|
398
398
|
)
|
|
399
399
|
class LegacyModelArtifact(LegacyArtifact):
|
mlrun/artifacts/plots.py
CHANGED
|
@@ -256,10 +256,10 @@ class PlotlyArtifact(Artifact):
|
|
|
256
256
|
return self._figure.to_html()
|
|
257
257
|
|
|
258
258
|
|
|
259
|
-
# TODO: remove in 1.
|
|
259
|
+
# TODO: remove in 1.7.0
|
|
260
260
|
@deprecated(
|
|
261
261
|
version="1.3.0",
|
|
262
|
-
reason="'LegacyPlotArtifact' will be removed in 1.
|
|
262
|
+
reason="'LegacyPlotArtifact' will be removed in 1.7.0, use 'PlotArtifact' instead",
|
|
263
263
|
category=FutureWarning,
|
|
264
264
|
)
|
|
265
265
|
class LegacyPlotArtifact(LegacyArtifact):
|
|
@@ -303,10 +303,10 @@ class LegacyPlotArtifact(LegacyArtifact):
|
|
|
303
303
|
return self._TEMPLATE.format(self.description or self.key, self.key, data_uri)
|
|
304
304
|
|
|
305
305
|
|
|
306
|
-
# TODO: remove in 1.
|
|
306
|
+
# TODO: remove in 1.7.0
|
|
307
307
|
@deprecated(
|
|
308
308
|
version="1.3.0",
|
|
309
|
-
reason="'LegacyChartArtifact' will be removed in 1.
|
|
309
|
+
reason="'LegacyChartArtifact' will be removed in 1.7.0, use 'ChartArtifact' instead",
|
|
310
310
|
category=FutureWarning,
|
|
311
311
|
)
|
|
312
312
|
class LegacyChartArtifact(LegacyArtifact):
|
|
@@ -377,10 +377,10 @@ class LegacyChartArtifact(LegacyArtifact):
|
|
|
377
377
|
)
|
|
378
378
|
|
|
379
379
|
|
|
380
|
-
# TODO: remove in 1.
|
|
380
|
+
# TODO: remove in 1.7.0
|
|
381
381
|
@deprecated(
|
|
382
382
|
version="1.3.0",
|
|
383
|
-
reason="'LegacyBokehArtifact' will be removed in 1.
|
|
383
|
+
reason="'LegacyBokehArtifact' will be removed in 1.7.0, use 'BokehArtifact' instead",
|
|
384
384
|
category=FutureWarning,
|
|
385
385
|
)
|
|
386
386
|
class LegacyBokehArtifact(LegacyArtifact):
|
|
@@ -433,10 +433,10 @@ class LegacyBokehArtifact(LegacyArtifact):
|
|
|
433
433
|
return file_html(self._figure, CDN, self.key)
|
|
434
434
|
|
|
435
435
|
|
|
436
|
-
# TODO: remove in 1.
|
|
436
|
+
# TODO: remove in 1.7.0
|
|
437
437
|
@deprecated(
|
|
438
438
|
version="1.3.0",
|
|
439
|
-
reason="'LegacyPlotlyArtifact' will be removed in 1.
|
|
439
|
+
reason="'LegacyPlotlyArtifact' will be removed in 1.7.0, use 'PlotlyArtifact' instead",
|
|
440
440
|
category=FutureWarning,
|
|
441
441
|
)
|
|
442
442
|
class LegacyPlotlyArtifact(LegacyArtifact):
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -20,7 +20,6 @@ from azure.storage.blob._shared.base_client import parse_connection_str
|
|
|
20
20
|
from fsspec.registry import get_filesystem_class
|
|
21
21
|
|
|
22
22
|
import mlrun.errors
|
|
23
|
-
from mlrun.errors import err_to_str
|
|
24
23
|
|
|
25
24
|
from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
|
|
26
25
|
|
|
@@ -33,20 +32,16 @@ class AzureBlobStore(DataStore):
|
|
|
33
32
|
|
|
34
33
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
35
34
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
36
|
-
self.get_filesystem()
|
|
37
35
|
|
|
38
|
-
|
|
36
|
+
@property
|
|
37
|
+
def filesystem(self):
|
|
39
38
|
"""return fsspec file system object, if supported"""
|
|
40
39
|
if self._filesystem:
|
|
41
40
|
return self._filesystem
|
|
42
41
|
try:
|
|
43
42
|
import adlfs # noqa
|
|
44
43
|
except ImportError as exc:
|
|
45
|
-
|
|
46
|
-
raise ImportError(
|
|
47
|
-
f"Azure adlfs not installed, run pip install adlfs, {err_to_str(exc)}"
|
|
48
|
-
)
|
|
49
|
-
return None
|
|
44
|
+
raise ImportError("Azure adlfs not installed") from exc
|
|
50
45
|
# in order to support az and wasbs kinds.
|
|
51
46
|
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
52
47
|
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
@@ -87,12 +82,12 @@ class AzureBlobStore(DataStore):
|
|
|
87
82
|
|
|
88
83
|
def upload(self, key, src_path):
|
|
89
84
|
remote_path = self._convert_key_to_remote_path(key)
|
|
90
|
-
self.
|
|
85
|
+
self.filesystem.put_file(src_path, remote_path, overwrite=True)
|
|
91
86
|
|
|
92
87
|
def get(self, key, size=None, offset=0):
|
|
93
88
|
remote_path = self._convert_key_to_remote_path(key)
|
|
94
89
|
end = offset + size if size else None
|
|
95
|
-
blob = self.
|
|
90
|
+
blob = self.filesystem.cat_file(remote_path, start=offset, end=end)
|
|
96
91
|
return blob
|
|
97
92
|
|
|
98
93
|
def put(self, key, data, append=False):
|
|
@@ -107,12 +102,12 @@ class AzureBlobStore(DataStore):
|
|
|
107
102
|
mode = "w"
|
|
108
103
|
else:
|
|
109
104
|
raise TypeError("Data type unknown. Unable to put in Azure!")
|
|
110
|
-
with self.
|
|
105
|
+
with self.filesystem.open(remote_path, mode) as f:
|
|
111
106
|
f.write(data)
|
|
112
107
|
|
|
113
108
|
def stat(self, key):
|
|
114
109
|
remote_path = self._convert_key_to_remote_path(key)
|
|
115
|
-
files = self.
|
|
110
|
+
files = self.filesystem.ls(remote_path, detail=True)
|
|
116
111
|
if len(files) == 1 and files[0]["type"] == "file":
|
|
117
112
|
size = files[0]["size"]
|
|
118
113
|
modified = files[0]["last_modified"]
|
|
@@ -124,10 +119,10 @@ class AzureBlobStore(DataStore):
|
|
|
124
119
|
|
|
125
120
|
def listdir(self, key):
|
|
126
121
|
remote_path = self._convert_key_to_remote_path(key)
|
|
127
|
-
if self.
|
|
122
|
+
if self.filesystem.isfile(remote_path):
|
|
128
123
|
return key
|
|
129
124
|
remote_path = f"{remote_path}/**"
|
|
130
|
-
files = self.
|
|
125
|
+
files = self.filesystem.glob(remote_path)
|
|
131
126
|
key_length = len(key)
|
|
132
127
|
files = [
|
|
133
128
|
f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
|
mlrun/datastore/base.py
CHANGED
|
@@ -25,6 +25,7 @@ import pyarrow
|
|
|
25
25
|
import pytz
|
|
26
26
|
import requests
|
|
27
27
|
import urllib3
|
|
28
|
+
from deprecated import deprecated
|
|
28
29
|
|
|
29
30
|
import mlrun.errors
|
|
30
31
|
from mlrun.errors import err_to_str
|
|
@@ -98,7 +99,18 @@ class DataStore:
|
|
|
98
99
|
def uri_to_ipython(endpoint, subpath):
|
|
99
100
|
return ""
|
|
100
101
|
|
|
101
|
-
|
|
102
|
+
# TODO: remove in 1.8.0
|
|
103
|
+
@deprecated(
|
|
104
|
+
version="1.8.0",
|
|
105
|
+
reason="'get_filesystem()' will be removed in 1.8.0, use "
|
|
106
|
+
"'filesystem' property instead",
|
|
107
|
+
category=FutureWarning,
|
|
108
|
+
)
|
|
109
|
+
def get_filesystem(self):
|
|
110
|
+
return self.filesystem
|
|
111
|
+
|
|
112
|
+
@property
|
|
113
|
+
def filesystem(self) -> Optional[fsspec.AbstractFileSystem]:
|
|
102
114
|
"""return fsspec file system object, if supported"""
|
|
103
115
|
return None
|
|
104
116
|
|
|
@@ -117,7 +129,7 @@ class DataStore:
|
|
|
117
129
|
return self._sanitize_storage_options(None)
|
|
118
130
|
|
|
119
131
|
def open(self, filepath, mode):
|
|
120
|
-
file_system = self.
|
|
132
|
+
file_system = self.filesystem
|
|
121
133
|
return file_system.open(filepath, mode)
|
|
122
134
|
|
|
123
135
|
def _join(self, key):
|
|
@@ -238,7 +250,7 @@ class DataStore:
|
|
|
238
250
|
df_module = df_module or pd
|
|
239
251
|
file_url = self._sanitize_url(url)
|
|
240
252
|
is_csv, is_json, drop_time_column = False, False, False
|
|
241
|
-
file_system = self.
|
|
253
|
+
file_system = self.filesystem
|
|
242
254
|
if file_url.endswith(".csv") or format == "csv":
|
|
243
255
|
is_csv = True
|
|
244
256
|
drop_time_column = False
|
|
@@ -363,7 +375,7 @@ class DataStore:
|
|
|
363
375
|
}
|
|
364
376
|
|
|
365
377
|
def rm(self, path, recursive=False, maxdepth=None):
|
|
366
|
-
self.
|
|
378
|
+
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
367
379
|
|
|
368
380
|
@staticmethod
|
|
369
381
|
def _is_dd(df_module):
|
|
@@ -653,9 +665,10 @@ def http_head(url, headers=None, auth=None):
|
|
|
653
665
|
return response.headers
|
|
654
666
|
|
|
655
667
|
|
|
656
|
-
def http_put(url, data, headers=None, auth=None):
|
|
668
|
+
def http_put(url, data, headers=None, auth=None, session=None):
|
|
657
669
|
try:
|
|
658
|
-
|
|
670
|
+
put_api = session.put if session else requests.put
|
|
671
|
+
response = put_api(
|
|
659
672
|
url, data=data, headers=headers, auth=auth, verify=verify_ssl
|
|
660
673
|
)
|
|
661
674
|
except OSError as exc:
|
|
@@ -679,7 +692,8 @@ class HttpStore(DataStore):
|
|
|
679
692
|
self._enrich_https_token()
|
|
680
693
|
self._validate_https_token()
|
|
681
694
|
|
|
682
|
-
|
|
695
|
+
@property
|
|
696
|
+
def filesystem(self):
|
|
683
697
|
"""return fsspec file system object, if supported"""
|
|
684
698
|
if not self._filesystem:
|
|
685
699
|
self._filesystem = fsspec.filesystem("http")
|
mlrun/datastore/dbfs_store.py
CHANGED
|
@@ -83,9 +83,9 @@ class DatabricksFileSystemDisableCache(DatabricksFileSystem):
|
|
|
83
83
|
class DBFSStore(DataStore):
|
|
84
84
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
85
85
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
86
|
-
self.get_filesystem(silent=False)
|
|
87
86
|
|
|
88
|
-
|
|
87
|
+
@property
|
|
88
|
+
def filesystem(self):
|
|
89
89
|
"""return fsspec file system object, if supported"""
|
|
90
90
|
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
91
91
|
if not self._filesystem:
|
|
@@ -104,7 +104,7 @@ class DBFSStore(DataStore):
|
|
|
104
104
|
return self._sanitize_storage_options(res)
|
|
105
105
|
|
|
106
106
|
def _verify_filesystem_and_key(self, key: str):
|
|
107
|
-
if not self.
|
|
107
|
+
if not self.filesystem:
|
|
108
108
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
109
109
|
"Performing actions on data-item without a valid filesystem"
|
|
110
110
|
)
|
|
@@ -121,7 +121,7 @@ class DBFSStore(DataStore):
|
|
|
121
121
|
raise mlrun.errors.MLRunInvalidArgumentError("offset cannot be None")
|
|
122
122
|
start = offset or None
|
|
123
123
|
end = offset + size if size else None
|
|
124
|
-
return self.
|
|
124
|
+
return self.filesystem.cat_file(key, start=start, end=end)
|
|
125
125
|
|
|
126
126
|
def put(self, key, data, append=False):
|
|
127
127
|
self._verify_filesystem_and_key(key)
|
|
@@ -135,16 +135,16 @@ class DBFSStore(DataStore):
|
|
|
135
135
|
mode += "b"
|
|
136
136
|
elif not isinstance(data, str):
|
|
137
137
|
raise TypeError(f"Unknown data type {type(data)}")
|
|
138
|
-
with self.
|
|
138
|
+
with self.filesystem.open(key, mode) as f:
|
|
139
139
|
f.write(data)
|
|
140
140
|
|
|
141
141
|
def upload(self, key: str, src_path: str):
|
|
142
142
|
self._verify_filesystem_and_key(key)
|
|
143
|
-
self.
|
|
143
|
+
self.filesystem.put_file(src_path, key, overwrite=True)
|
|
144
144
|
|
|
145
145
|
def stat(self, key: str):
|
|
146
146
|
self._verify_filesystem_and_key(key)
|
|
147
|
-
file = self.
|
|
147
|
+
file = self.filesystem.stat(key)
|
|
148
148
|
if file["type"] == "file":
|
|
149
149
|
size = file["size"]
|
|
150
150
|
elif file["type"] == "directory":
|
|
@@ -156,10 +156,10 @@ class DBFSStore(DataStore):
|
|
|
156
156
|
Basic ls of file/dir - without recursion.
|
|
157
157
|
"""
|
|
158
158
|
self._verify_filesystem_and_key(key)
|
|
159
|
-
if self.
|
|
159
|
+
if self.filesystem.isfile(key):
|
|
160
160
|
return key
|
|
161
161
|
remote_path = f"{key}/*"
|
|
162
|
-
files = self.
|
|
162
|
+
files = self.filesystem.glob(remote_path)
|
|
163
163
|
# Get only the files and directories under key path, without the key path itself.
|
|
164
164
|
# for example in a filesystem that has this path: /test_mlrun_dbfs_objects/test.txt
|
|
165
165
|
# listdir with the input /test_mlrun_dbfs_objects as a key will return ['test.txt'].
|
|
@@ -171,4 +171,4 @@ class DBFSStore(DataStore):
|
|
|
171
171
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
172
172
|
"dbfs file system does not support maxdepth option in rm function"
|
|
173
173
|
)
|
|
174
|
-
self.
|
|
174
|
+
self.filesystem.rm(path=path, recursive=recursive)
|
mlrun/datastore/filestore.py
CHANGED
|
@@ -47,7 +47,8 @@ class FileStore(DataStore):
|
|
|
47
47
|
key = path.join(self._real_path, suffix)
|
|
48
48
|
return path.join(self.subpath, key)
|
|
49
49
|
|
|
50
|
-
|
|
50
|
+
@property
|
|
51
|
+
def filesystem(self):
|
|
51
52
|
"""return fsspec file system object, if supported"""
|
|
52
53
|
if not self._filesystem:
|
|
53
54
|
self._filesystem = fsspec.filesystem("file")
|
|
@@ -30,7 +30,8 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
30
30
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
31
31
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
32
32
|
|
|
33
|
-
|
|
33
|
+
@property
|
|
34
|
+
def filesystem(self):
|
|
34
35
|
"""return fsspec file system object, if supported"""
|
|
35
36
|
if self._filesystem:
|
|
36
37
|
return self._filesystem
|
|
@@ -75,7 +76,7 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
75
76
|
path = self._make_path(key)
|
|
76
77
|
|
|
77
78
|
end = offset + size if size else None
|
|
78
|
-
blob = self.
|
|
79
|
+
blob = self.filesystem.cat_file(path, start=offset, end=end)
|
|
79
80
|
return blob
|
|
80
81
|
|
|
81
82
|
def put(self, key, data, append=False):
|
|
@@ -94,17 +95,17 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
94
95
|
raise TypeError(
|
|
95
96
|
"Data type unknown. Unable to put in Google cloud storage!"
|
|
96
97
|
)
|
|
97
|
-
with self.
|
|
98
|
+
with self.filesystem.open(path, mode) as f:
|
|
98
99
|
f.write(data)
|
|
99
100
|
|
|
100
101
|
def upload(self, key, src_path):
|
|
101
102
|
path = self._make_path(key)
|
|
102
|
-
self.
|
|
103
|
+
self.filesystem.put_file(src_path, path, overwrite=True)
|
|
103
104
|
|
|
104
105
|
def stat(self, key):
|
|
105
106
|
path = self._make_path(key)
|
|
106
107
|
|
|
107
|
-
files = self.
|
|
108
|
+
files = self.filesystem.ls(path, detail=True)
|
|
108
109
|
if len(files) == 1 and files[0]["type"] == "file":
|
|
109
110
|
size = files[0]["size"]
|
|
110
111
|
modified = files[0]["updated"]
|
|
@@ -116,10 +117,10 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
116
117
|
|
|
117
118
|
def listdir(self, key):
|
|
118
119
|
path = self._make_path(key)
|
|
119
|
-
if self.
|
|
120
|
+
if self.filesystem.isfile(path):
|
|
120
121
|
return key
|
|
121
122
|
remote_path = f"{path}/**"
|
|
122
|
-
files = self.
|
|
123
|
+
files = self.filesystem.glob(remote_path)
|
|
123
124
|
key_length = len(key)
|
|
124
125
|
files = [
|
|
125
126
|
f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
|
|
@@ -128,7 +129,7 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
128
129
|
|
|
129
130
|
def rm(self, path, recursive=False, maxdepth=None):
|
|
130
131
|
path = self._make_path(path)
|
|
131
|
-
self.
|
|
132
|
+
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
132
133
|
|
|
133
134
|
def get_spark_options(self):
|
|
134
135
|
res = None
|
mlrun/datastore/redis.py
CHANGED
mlrun/datastore/s3.py
CHANGED
|
@@ -109,18 +109,15 @@ class S3Store(DataStore):
|
|
|
109
109
|
res["spark.hadoop.fs.s3a.aws.profile"] = st.get("profile")
|
|
110
110
|
return res
|
|
111
111
|
|
|
112
|
-
|
|
112
|
+
@property
|
|
113
|
+
def filesystem(self):
|
|
113
114
|
"""return fsspec file system object, if supported"""
|
|
114
115
|
if self._filesystem:
|
|
115
116
|
return self._filesystem
|
|
116
117
|
try:
|
|
117
118
|
import s3fs # noqa
|
|
118
119
|
except ImportError as exc:
|
|
119
|
-
|
|
120
|
-
raise ImportError(
|
|
121
|
-
"AWS s3fs not installed, run pip install s3fs"
|
|
122
|
-
) from exc
|
|
123
|
-
return None
|
|
120
|
+
raise ImportError("AWS s3fs not installed") from exc
|
|
124
121
|
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
125
122
|
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
126
123
|
filesystem_class,
|
mlrun/datastore/sources.py
CHANGED
|
@@ -1027,7 +1027,7 @@ class KafkaSource(OnlineSource):
|
|
|
1027
1027
|
) < semver.VersionInfo.parse(bug_fix_version):
|
|
1028
1028
|
warnings.warn(
|
|
1029
1029
|
f"Detected nuclio version {config.nuclio_version}, which is older "
|
|
1030
|
-
f"than {bug_fix_version}. Forcing number of replicas of 1 in function '{function.
|
|
1030
|
+
f"than {bug_fix_version}. Forcing number of replicas of 1 in function '{function.metadata.name}'. "
|
|
1031
1031
|
f"To resolve this, please upgrade Nuclio."
|
|
1032
1032
|
)
|
|
1033
1033
|
function.spec.min_replicas = 1
|
|
@@ -1053,7 +1053,6 @@ class SQLSource(BaseSourceDriver):
|
|
|
1053
1053
|
db_url: str = None,
|
|
1054
1054
|
table_name: str = None,
|
|
1055
1055
|
spark_options: dict = None,
|
|
1056
|
-
time_fields: List[str] = None,
|
|
1057
1056
|
parse_dates: List[str] = None,
|
|
1058
1057
|
**kwargs,
|
|
1059
1058
|
):
|
|
@@ -1078,17 +1077,8 @@ class SQLSource(BaseSourceDriver):
|
|
|
1078
1077
|
:param table_name: the name of the collection to access,
|
|
1079
1078
|
from the current database
|
|
1080
1079
|
:param spark_options: additional spark read options
|
|
1081
|
-
:param time_fields : all the field to be parsed as timestamp.
|
|
1082
1080
|
:param parse_dates : all the field to be parsed as timestamp.
|
|
1083
1081
|
"""
|
|
1084
|
-
if time_fields:
|
|
1085
|
-
warnings.warn(
|
|
1086
|
-
"'time_fields' is deprecated, use 'parse_dates' instead. "
|
|
1087
|
-
"This will be removed in 1.6.0",
|
|
1088
|
-
# TODO: Remove this in 1.6.0
|
|
1089
|
-
FutureWarning,
|
|
1090
|
-
)
|
|
1091
|
-
parse_dates = time_fields
|
|
1092
1082
|
db_url = db_url or mlrun.mlconf.sql.url
|
|
1093
1083
|
if db_url is None:
|
|
1094
1084
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -1096,7 +1086,7 @@ class SQLSource(BaseSourceDriver):
|
|
|
1096
1086
|
)
|
|
1097
1087
|
if time_field:
|
|
1098
1088
|
if parse_dates:
|
|
1099
|
-
|
|
1089
|
+
parse_dates.append(time_field)
|
|
1100
1090
|
else:
|
|
1101
1091
|
parse_dates = [time_field]
|
|
1102
1092
|
attrs = {
|
mlrun/datastore/targets.py
CHANGED
|
@@ -17,7 +17,6 @@ import os
|
|
|
17
17
|
import random
|
|
18
18
|
import sys
|
|
19
19
|
import time
|
|
20
|
-
import warnings
|
|
21
20
|
from collections import Counter
|
|
22
21
|
from copy import copy
|
|
23
22
|
from typing import Any, Dict, List, Optional, Union
|
|
@@ -553,7 +552,7 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
553
552
|
else:
|
|
554
553
|
store, target_path = self._get_store_and_path()
|
|
555
554
|
target_path = generate_path_with_chunk(self, chunk_id, target_path)
|
|
556
|
-
file_system = store.
|
|
555
|
+
file_system = store.filesystem
|
|
557
556
|
if file_system.protocol == "file":
|
|
558
557
|
dir = os.path.dirname(target_path)
|
|
559
558
|
if dir:
|
|
@@ -1407,7 +1406,7 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1407
1406
|
from storey import V3ioDriver
|
|
1408
1407
|
|
|
1409
1408
|
key_columns = list(key_columns.keys())
|
|
1410
|
-
endpoint, uri = parse_path(self.
|
|
1409
|
+
endpoint, uri = parse_path(self.path)
|
|
1411
1410
|
column_list = self._get_column_list(
|
|
1412
1411
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1413
1412
|
)
|
|
@@ -1680,7 +1679,6 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1680
1679
|
if_exists: str = "append",
|
|
1681
1680
|
create_table: bool = False,
|
|
1682
1681
|
# create_according_to_data: bool = False,
|
|
1683
|
-
time_fields: List[str] = None,
|
|
1684
1682
|
varchar_len: int = 50,
|
|
1685
1683
|
parse_dates: List[str] = None,
|
|
1686
1684
|
):
|
|
@@ -1718,20 +1716,11 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1718
1716
|
:param create_table: pass True if you want to create new table named by
|
|
1719
1717
|
table_name with schema on current database.
|
|
1720
1718
|
:param create_according_to_data: (not valid)
|
|
1721
|
-
:param time_fields : all the field to be parsed as timestamp.
|
|
1722
1719
|
:param varchar_len : the defalut len of the all the varchar column (using if needed to create the table).
|
|
1723
1720
|
:param parse_dates : all the field to be parsed as timestamp.
|
|
1724
1721
|
"""
|
|
1725
1722
|
|
|
1726
1723
|
create_according_to_data = False # TODO: open for user
|
|
1727
|
-
if time_fields:
|
|
1728
|
-
warnings.warn(
|
|
1729
|
-
"'time_fields' is deprecated, use 'parse_dates' instead. "
|
|
1730
|
-
"This will be removed in 1.6.0",
|
|
1731
|
-
# TODO: Remove this in 1.6.0
|
|
1732
|
-
FutureWarning,
|
|
1733
|
-
)
|
|
1734
|
-
parse_dates = time_fields
|
|
1735
1724
|
db_url = db_url or mlrun.mlconf.sql.url
|
|
1736
1725
|
if db_url is None or table_name is None:
|
|
1737
1726
|
attr = {}
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -19,6 +19,7 @@ from copy import deepcopy
|
|
|
19
19
|
from datetime import datetime
|
|
20
20
|
|
|
21
21
|
import fsspec
|
|
22
|
+
import requests
|
|
22
23
|
import v3io.dataplane
|
|
23
24
|
|
|
24
25
|
import mlrun
|
|
@@ -73,18 +74,11 @@ class V3ioStore(DataStore):
|
|
|
73
74
|
schema = "https" if self.secure else "http"
|
|
74
75
|
return f"{schema}://{self.endpoint}"
|
|
75
76
|
|
|
76
|
-
|
|
77
|
+
@property
|
|
78
|
+
def filesystem(self):
|
|
77
79
|
"""return fsspec file system object, if supported"""
|
|
78
80
|
if self._filesystem:
|
|
79
81
|
return self._filesystem
|
|
80
|
-
try:
|
|
81
|
-
import v3iofs # noqa
|
|
82
|
-
except ImportError as exc:
|
|
83
|
-
if not silent:
|
|
84
|
-
raise ImportError(
|
|
85
|
-
"v3iofs or storey not installed, run pip install storey"
|
|
86
|
-
) from exc
|
|
87
|
-
return None
|
|
88
82
|
self._filesystem = fsspec.filesystem("v3io", **self.get_storage_options())
|
|
89
83
|
return self._filesystem
|
|
90
84
|
|
|
@@ -151,15 +145,18 @@ class V3ioStore(DataStore):
|
|
|
151
145
|
data = memoryview(data)
|
|
152
146
|
except TypeError:
|
|
153
147
|
pass
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
148
|
+
|
|
149
|
+
with requests.Session() as requests_session:
|
|
150
|
+
while buffer_offset < buffer_size:
|
|
151
|
+
chunk_size = min(buffer_size - buffer_offset, max_chunk_size)
|
|
152
|
+
http_put(
|
|
153
|
+
self.url + self._join(key),
|
|
154
|
+
data[buffer_offset : buffer_offset + chunk_size],
|
|
155
|
+
append_header if buffer_offset else self.headers,
|
|
156
|
+
None,
|
|
157
|
+
requests_session,
|
|
158
|
+
)
|
|
159
|
+
buffer_offset += chunk_size
|
|
163
160
|
|
|
164
161
|
def put(self, key, data, append=False):
|
|
165
162
|
return self._put(key, data)
|
|
@@ -207,7 +204,7 @@ class V3ioStore(DataStore):
|
|
|
207
204
|
"""Recursive rm file/folder
|
|
208
205
|
Workaround for v3io-fs not supporting recursive directory removal"""
|
|
209
206
|
|
|
210
|
-
file_system = self.
|
|
207
|
+
file_system = self.filesystem
|
|
211
208
|
if isinstance(path, str):
|
|
212
209
|
path = [path]
|
|
213
210
|
maxdepth = maxdepth if not maxdepth else maxdepth - 1
|