mlrun 1.7.2rc4__py3-none-any.whl → 1.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +26 -22
- mlrun/__main__.py +15 -16
- mlrun/alerts/alert.py +150 -15
- mlrun/api/schemas/__init__.py +1 -9
- mlrun/artifacts/__init__.py +2 -3
- mlrun/artifacts/base.py +62 -19
- mlrun/artifacts/dataset.py +17 -17
- mlrun/artifacts/document.py +454 -0
- mlrun/artifacts/manager.py +28 -18
- mlrun/artifacts/model.py +91 -59
- mlrun/artifacts/plots.py +2 -2
- mlrun/common/constants.py +8 -0
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +1 -1
- mlrun/common/formatters/feature_set.py +2 -0
- mlrun/common/formatters/function.py +1 -0
- mlrun/{model_monitoring/db/stores/v3io_kv/__init__.py → common/formatters/model_endpoint.py} +17 -0
- mlrun/common/formatters/pipeline.py +1 -2
- mlrun/common/formatters/project.py +9 -0
- mlrun/common/model_monitoring/__init__.py +0 -5
- mlrun/common/model_monitoring/helpers.py +12 -62
- mlrun/common/runtimes/constants.py +25 -4
- mlrun/common/schemas/__init__.py +9 -5
- mlrun/common/schemas/alert.py +114 -19
- mlrun/common/schemas/api_gateway.py +3 -3
- mlrun/common/schemas/artifact.py +22 -9
- mlrun/common/schemas/auth.py +8 -4
- mlrun/common/schemas/background_task.py +7 -7
- mlrun/common/schemas/client_spec.py +4 -4
- mlrun/common/schemas/clusterization_spec.py +2 -2
- mlrun/common/schemas/common.py +53 -3
- mlrun/common/schemas/constants.py +15 -0
- mlrun/common/schemas/datastore_profile.py +1 -1
- mlrun/common/schemas/feature_store.py +9 -9
- mlrun/common/schemas/frontend_spec.py +4 -4
- mlrun/common/schemas/function.py +10 -10
- mlrun/common/schemas/hub.py +1 -1
- mlrun/common/schemas/k8s.py +3 -3
- mlrun/common/schemas/memory_reports.py +3 -3
- mlrun/common/schemas/model_monitoring/__init__.py +4 -8
- mlrun/common/schemas/model_monitoring/constants.py +127 -46
- mlrun/common/schemas/model_monitoring/grafana.py +18 -12
- mlrun/common/schemas/model_monitoring/model_endpoints.py +154 -160
- mlrun/common/schemas/notification.py +24 -3
- mlrun/common/schemas/object.py +1 -1
- mlrun/common/schemas/pagination.py +4 -4
- mlrun/common/schemas/partition.py +142 -0
- mlrun/common/schemas/pipeline.py +3 -3
- mlrun/common/schemas/project.py +26 -18
- mlrun/common/schemas/runs.py +3 -3
- mlrun/common/schemas/runtime_resource.py +5 -5
- mlrun/common/schemas/schedule.py +1 -1
- mlrun/common/schemas/secret.py +1 -1
- mlrun/{model_monitoring/db/stores/sqldb/__init__.py → common/schemas/serving.py} +10 -1
- mlrun/common/schemas/tag.py +3 -3
- mlrun/common/schemas/workflow.py +6 -5
- mlrun/common/types.py +1 -0
- mlrun/config.py +157 -89
- mlrun/data_types/__init__.py +5 -3
- mlrun/data_types/infer.py +13 -3
- mlrun/data_types/spark.py +2 -1
- mlrun/datastore/__init__.py +59 -18
- mlrun/datastore/alibaba_oss.py +4 -1
- mlrun/datastore/azure_blob.py +4 -1
- mlrun/datastore/base.py +19 -24
- mlrun/datastore/datastore.py +10 -4
- mlrun/datastore/datastore_profile.py +178 -45
- mlrun/datastore/dbfs_store.py +4 -1
- mlrun/datastore/filestore.py +4 -1
- mlrun/datastore/google_cloud_storage.py +4 -1
- mlrun/datastore/hdfs.py +4 -1
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +4 -1
- mlrun/datastore/s3.py +14 -3
- mlrun/datastore/sources.py +89 -92
- mlrun/datastore/store_resources.py +7 -4
- mlrun/datastore/storeytargets.py +51 -16
- mlrun/datastore/targets.py +38 -31
- mlrun/datastore/utils.py +87 -4
- mlrun/datastore/v3io.py +4 -1
- mlrun/datastore/vectorstore.py +291 -0
- mlrun/datastore/wasbfs/fs.py +13 -12
- mlrun/db/base.py +286 -100
- mlrun/db/httpdb.py +1562 -490
- mlrun/db/nopdb.py +250 -83
- mlrun/errors.py +6 -2
- mlrun/execution.py +194 -50
- mlrun/feature_store/__init__.py +2 -10
- mlrun/feature_store/api.py +20 -458
- mlrun/feature_store/common.py +9 -9
- mlrun/feature_store/feature_set.py +20 -18
- mlrun/feature_store/feature_vector.py +105 -479
- mlrun/feature_store/feature_vector_utils.py +466 -0
- mlrun/feature_store/retrieval/base.py +15 -11
- mlrun/feature_store/retrieval/job.py +2 -1
- mlrun/feature_store/retrieval/storey_merger.py +1 -1
- mlrun/feature_store/steps.py +3 -3
- mlrun/features.py +30 -13
- mlrun/frameworks/__init__.py +1 -2
- mlrun/frameworks/_common/__init__.py +1 -2
- mlrun/frameworks/_common/artifacts_library.py +2 -2
- mlrun/frameworks/_common/mlrun_interface.py +10 -6
- mlrun/frameworks/_common/model_handler.py +31 -31
- mlrun/frameworks/_common/producer.py +3 -1
- mlrun/frameworks/_dl_common/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +4 -4
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +3 -3
- mlrun/frameworks/_ml_common/__init__.py +1 -2
- mlrun/frameworks/_ml_common/loggers/__init__.py +1 -2
- mlrun/frameworks/_ml_common/model_handler.py +21 -21
- mlrun/frameworks/_ml_common/plans/__init__.py +1 -2
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +3 -1
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/auto_mlrun/__init__.py +1 -2
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +22 -15
- mlrun/frameworks/huggingface/__init__.py +1 -2
- mlrun/frameworks/huggingface/model_server.py +9 -9
- mlrun/frameworks/lgbm/__init__.py +47 -44
- mlrun/frameworks/lgbm/callbacks/__init__.py +1 -2
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -2
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/__init__.py +1 -2
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +5 -5
- mlrun/frameworks/lgbm/model_handler.py +15 -11
- mlrun/frameworks/lgbm/model_server.py +11 -7
- mlrun/frameworks/lgbm/utils.py +2 -2
- mlrun/frameworks/onnx/__init__.py +1 -2
- mlrun/frameworks/onnx/dataset.py +3 -3
- mlrun/frameworks/onnx/mlrun_interface.py +2 -2
- mlrun/frameworks/onnx/model_handler.py +7 -5
- mlrun/frameworks/onnx/model_server.py +8 -6
- mlrun/frameworks/parallel_coordinates.py +11 -11
- mlrun/frameworks/pytorch/__init__.py +22 -23
- mlrun/frameworks/pytorch/callbacks/__init__.py +1 -2
- mlrun/frameworks/pytorch/callbacks/callback.py +2 -1
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +15 -8
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +19 -12
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +22 -15
- mlrun/frameworks/pytorch/callbacks_handler.py +36 -30
- mlrun/frameworks/pytorch/mlrun_interface.py +17 -17
- mlrun/frameworks/pytorch/model_handler.py +21 -17
- mlrun/frameworks/pytorch/model_server.py +13 -9
- mlrun/frameworks/sklearn/__init__.py +19 -18
- mlrun/frameworks/sklearn/estimator.py +2 -2
- mlrun/frameworks/sklearn/metric.py +3 -3
- mlrun/frameworks/sklearn/metrics_library.py +8 -6
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -2
- mlrun/frameworks/sklearn/model_handler.py +4 -3
- mlrun/frameworks/tf_keras/__init__.py +11 -12
- mlrun/frameworks/tf_keras/callbacks/__init__.py +1 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +17 -14
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +15 -12
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +21 -18
- mlrun/frameworks/tf_keras/model_handler.py +17 -13
- mlrun/frameworks/tf_keras/model_server.py +12 -8
- mlrun/frameworks/xgboost/__init__.py +19 -18
- mlrun/frameworks/xgboost/model_handler.py +13 -9
- mlrun/k8s_utils.py +2 -5
- mlrun/launcher/base.py +3 -4
- mlrun/launcher/client.py +2 -2
- mlrun/launcher/local.py +6 -2
- mlrun/launcher/remote.py +1 -1
- mlrun/lists.py +8 -4
- mlrun/model.py +132 -46
- mlrun/model_monitoring/__init__.py +3 -5
- mlrun/model_monitoring/api.py +113 -98
- mlrun/model_monitoring/applications/__init__.py +0 -5
- mlrun/model_monitoring/applications/_application_steps.py +81 -50
- mlrun/model_monitoring/applications/base.py +467 -14
- mlrun/model_monitoring/applications/context.py +212 -134
- mlrun/model_monitoring/{db/stores/base → applications/evidently}/__init__.py +6 -2
- mlrun/model_monitoring/applications/evidently/base.py +146 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +89 -56
- mlrun/model_monitoring/applications/results.py +67 -15
- mlrun/model_monitoring/controller.py +701 -315
- mlrun/model_monitoring/db/__init__.py +0 -2
- mlrun/model_monitoring/db/_schedules.py +242 -0
- mlrun/model_monitoring/db/_stats.py +189 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +33 -22
- mlrun/model_monitoring/db/tsdb/base.py +243 -49
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +76 -36
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +33 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connection.py +213 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +534 -88
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +1 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +436 -106
- mlrun/model_monitoring/helpers.py +356 -114
- mlrun/model_monitoring/stream_processing.py +190 -345
- mlrun/model_monitoring/tracking_policy.py +11 -4
- mlrun/model_monitoring/writer.py +49 -90
- mlrun/package/__init__.py +3 -6
- mlrun/package/context_handler.py +2 -2
- mlrun/package/packager.py +12 -9
- mlrun/package/packagers/__init__.py +0 -2
- mlrun/package/packagers/default_packager.py +14 -11
- mlrun/package/packagers/numpy_packagers.py +16 -7
- mlrun/package/packagers/pandas_packagers.py +18 -18
- mlrun/package/packagers/python_standard_library_packagers.py +25 -11
- mlrun/package/packagers_manager.py +35 -32
- mlrun/package/utils/__init__.py +0 -3
- mlrun/package/utils/_pickler.py +6 -6
- mlrun/platforms/__init__.py +47 -16
- mlrun/platforms/iguazio.py +4 -1
- mlrun/projects/operations.py +30 -30
- mlrun/projects/pipelines.py +116 -47
- mlrun/projects/project.py +1292 -329
- mlrun/render.py +5 -9
- mlrun/run.py +57 -14
- mlrun/runtimes/__init__.py +1 -3
- mlrun/runtimes/base.py +30 -22
- mlrun/runtimes/daskjob.py +9 -9
- mlrun/runtimes/databricks_job/databricks_runtime.py +6 -5
- mlrun/runtimes/function_reference.py +5 -2
- mlrun/runtimes/generators.py +3 -2
- mlrun/runtimes/kubejob.py +6 -7
- mlrun/runtimes/mounts.py +574 -0
- mlrun/runtimes/mpijob/__init__.py +0 -2
- mlrun/runtimes/mpijob/abstract.py +7 -6
- mlrun/runtimes/nuclio/api_gateway.py +7 -7
- mlrun/runtimes/nuclio/application/application.py +11 -13
- mlrun/runtimes/nuclio/application/reverse_proxy.go +66 -64
- mlrun/runtimes/nuclio/function.py +127 -70
- mlrun/runtimes/nuclio/serving.py +105 -37
- mlrun/runtimes/pod.py +159 -54
- mlrun/runtimes/remotesparkjob.py +3 -2
- mlrun/runtimes/sparkjob/__init__.py +0 -2
- mlrun/runtimes/sparkjob/spark3job.py +22 -12
- mlrun/runtimes/utils.py +7 -6
- mlrun/secrets.py +2 -2
- mlrun/serving/__init__.py +8 -0
- mlrun/serving/merger.py +7 -5
- mlrun/serving/remote.py +35 -22
- mlrun/serving/routers.py +186 -240
- mlrun/serving/server.py +41 -10
- mlrun/serving/states.py +432 -118
- mlrun/serving/utils.py +13 -2
- mlrun/serving/v1_serving.py +3 -2
- mlrun/serving/v2_serving.py +161 -203
- mlrun/track/__init__.py +1 -1
- mlrun/track/tracker.py +2 -2
- mlrun/track/trackers/mlflow_tracker.py +6 -5
- mlrun/utils/async_http.py +35 -22
- mlrun/utils/clones.py +7 -4
- mlrun/utils/helpers.py +511 -58
- mlrun/utils/logger.py +119 -13
- mlrun/utils/notifications/notification/__init__.py +22 -19
- mlrun/utils/notifications/notification/base.py +39 -15
- mlrun/utils/notifications/notification/console.py +6 -6
- mlrun/utils/notifications/notification/git.py +11 -11
- mlrun/utils/notifications/notification/ipython.py +10 -9
- mlrun/utils/notifications/notification/mail.py +176 -0
- mlrun/utils/notifications/notification/slack.py +16 -8
- mlrun/utils/notifications/notification/webhook.py +24 -8
- mlrun/utils/notifications/notification_pusher.py +191 -200
- mlrun/utils/regex.py +12 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/METADATA +69 -54
- mlrun-1.8.0.dist-info/RECORD +351 -0
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/WHEEL +1 -1
- mlrun/model_monitoring/applications/evidently_base.py +0 -137
- mlrun/model_monitoring/db/stores/__init__.py +0 -136
- mlrun/model_monitoring/db/stores/base/store.py +0 -213
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +0 -71
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +0 -190
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +0 -103
- mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +0 -40
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +0 -659
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +0 -726
- mlrun/model_monitoring/model_endpoint.py +0 -118
- mlrun-1.7.2rc4.dist-info/RECORD +0 -351
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info/licenses}/LICENSE +0 -0
- {mlrun-1.7.2rc4.dist-info → mlrun-1.8.0.dist-info}/top_level.txt +0 -0
mlrun/datastore/alibaba_oss.py
CHANGED
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
import time
|
|
16
16
|
from datetime import datetime
|
|
17
17
|
from pathlib import Path
|
|
18
|
+
from typing import Optional
|
|
18
19
|
from urllib.parse import urlparse
|
|
19
20
|
|
|
20
21
|
import oss2
|
|
@@ -28,7 +29,9 @@ from .base import DataStore, FileStats, make_datastore_schema_sanitizer
|
|
|
28
29
|
class OSSStore(DataStore):
|
|
29
30
|
using_bucket = True
|
|
30
31
|
|
|
31
|
-
def __init__(
|
|
32
|
+
def __init__(
|
|
33
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
34
|
+
):
|
|
32
35
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
33
36
|
# will be used in case user asks to assume a role and work through fsspec
|
|
34
37
|
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
import time
|
|
16
16
|
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
17
18
|
from urllib.parse import urlparse
|
|
18
19
|
|
|
19
20
|
from azure.storage.blob import BlobServiceClient
|
|
@@ -36,7 +37,9 @@ class AzureBlobStore(DataStore):
|
|
|
36
37
|
1024 * 1024 * 8
|
|
37
38
|
) # for service_client property only, does not affect filesystem
|
|
38
39
|
|
|
39
|
-
def __init__(
|
|
40
|
+
def __init__(
|
|
41
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
42
|
+
):
|
|
40
43
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
41
44
|
self._service_client = None
|
|
42
45
|
self._storage_options = None
|
mlrun/datastore/base.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
import tempfile
|
|
15
15
|
import urllib.parse
|
|
16
16
|
from base64 import b64encode
|
|
17
|
+
from copy import copy
|
|
17
18
|
from os import path, remove
|
|
18
19
|
from typing import Optional, Union
|
|
19
20
|
from urllib.parse import urlparse
|
|
@@ -24,7 +25,6 @@ import pandas as pd
|
|
|
24
25
|
import pyarrow
|
|
25
26
|
import pytz
|
|
26
27
|
import requests
|
|
27
|
-
from deprecated import deprecated
|
|
28
28
|
|
|
29
29
|
import mlrun.config
|
|
30
30
|
import mlrun.errors
|
|
@@ -48,7 +48,7 @@ class FileStats:
|
|
|
48
48
|
class DataStore:
|
|
49
49
|
using_bucket = False
|
|
50
50
|
|
|
51
|
-
def __init__(self, parent, name, kind, endpoint="", secrets: dict = None):
|
|
51
|
+
def __init__(self, parent, name, kind, endpoint="", secrets: Optional[dict] = None):
|
|
52
52
|
self._parent = parent
|
|
53
53
|
self.kind = kind
|
|
54
54
|
self.name = name
|
|
@@ -95,16 +95,6 @@ class DataStore:
|
|
|
95
95
|
def uri_to_ipython(endpoint, subpath):
|
|
96
96
|
return ""
|
|
97
97
|
|
|
98
|
-
# TODO: remove in 1.8.0
|
|
99
|
-
@deprecated(
|
|
100
|
-
version="1.8.0",
|
|
101
|
-
reason="'get_filesystem()' will be removed in 1.8.0, use "
|
|
102
|
-
"'filesystem' property instead",
|
|
103
|
-
category=FutureWarning,
|
|
104
|
-
)
|
|
105
|
-
def get_filesystem(self):
|
|
106
|
-
return self.filesystem
|
|
107
|
-
|
|
108
98
|
@property
|
|
109
99
|
def filesystem(self) -> Optional[fsspec.AbstractFileSystem]:
|
|
110
100
|
"""return fsspec file system object, if supported"""
|
|
@@ -500,12 +490,18 @@ class DataItem:
|
|
|
500
490
|
"""DataItem url e.g. /dir/path, s3://bucket/path"""
|
|
501
491
|
return self._url
|
|
502
492
|
|
|
503
|
-
def get(
|
|
493
|
+
def get(
|
|
494
|
+
self,
|
|
495
|
+
size: Optional[int] = None,
|
|
496
|
+
offset: int = 0,
|
|
497
|
+
encoding: Optional[str] = None,
|
|
498
|
+
) -> Union[bytes, str]:
|
|
504
499
|
"""read all or a byte range and return the content
|
|
505
500
|
|
|
506
501
|
:param size: number of bytes to get
|
|
507
502
|
:param offset: fetch from offset (in bytes)
|
|
508
503
|
:param encoding: encoding (e.g. "utf-8") for converting bytes to str
|
|
504
|
+
:return: the bytes/str content
|
|
509
505
|
"""
|
|
510
506
|
body = self._store.get(self._path, size=size, offset=offset)
|
|
511
507
|
if encoding and isinstance(body, bytes):
|
|
@@ -519,7 +515,7 @@ class DataItem:
|
|
|
519
515
|
"""
|
|
520
516
|
self._store.download(self._path, target_path)
|
|
521
517
|
|
|
522
|
-
def put(self, data, append=False):
|
|
518
|
+
def put(self, data: Union[bytes, str], append: bool = False) -> None:
|
|
523
519
|
"""write/upload the data, append is only supported by some datastores
|
|
524
520
|
|
|
525
521
|
:param data: data (bytes/str) to write
|
|
@@ -671,13 +667,6 @@ class DataItem:
|
|
|
671
667
|
return f"'{self.url}'"
|
|
672
668
|
|
|
673
669
|
|
|
674
|
-
def get_range(size, offset):
|
|
675
|
-
byterange = f"bytes={offset}-"
|
|
676
|
-
if size:
|
|
677
|
-
byterange += str(offset + size)
|
|
678
|
-
return byterange
|
|
679
|
-
|
|
680
|
-
|
|
681
670
|
def basic_auth_header(user, password):
|
|
682
671
|
username = user.encode("latin1")
|
|
683
672
|
password = password.encode("latin1")
|
|
@@ -687,7 +676,9 @@ def basic_auth_header(user, password):
|
|
|
687
676
|
|
|
688
677
|
|
|
689
678
|
class HttpStore(DataStore):
|
|
690
|
-
def __init__(
|
|
679
|
+
def __init__(
|
|
680
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
681
|
+
):
|
|
691
682
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
692
683
|
self._https_auth_token = None
|
|
693
684
|
self._schema = schema
|
|
@@ -713,7 +704,11 @@ class HttpStore(DataStore):
|
|
|
713
704
|
raise ValueError("unimplemented")
|
|
714
705
|
|
|
715
706
|
def get(self, key, size=None, offset=0):
|
|
716
|
-
|
|
707
|
+
headers = self._headers
|
|
708
|
+
if urlparse(self.url).hostname == "api.github.com":
|
|
709
|
+
headers = copy(self._headers)
|
|
710
|
+
headers["Accept"] = headers.get("Accept", "application/vnd.github.raw")
|
|
711
|
+
data = self._http_get(self.url + self._join(key), headers, self.auth)
|
|
717
712
|
if offset:
|
|
718
713
|
data = data[offset:]
|
|
719
714
|
if size:
|
|
@@ -724,7 +719,7 @@ class HttpStore(DataStore):
|
|
|
724
719
|
token = self._get_secret_or_env("HTTPS_AUTH_TOKEN")
|
|
725
720
|
if token:
|
|
726
721
|
self._https_auth_token = token
|
|
727
|
-
self._headers.setdefault("Authorization", f"
|
|
722
|
+
self._headers.setdefault("Authorization", f"Bearer {token}")
|
|
728
723
|
|
|
729
724
|
def _validate_https_token(self):
|
|
730
725
|
if self._https_auth_token and self._schema in ["http"]:
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
from typing import Optional
|
|
14
15
|
from urllib.parse import urlparse
|
|
15
16
|
|
|
16
17
|
from mergedeep import merge
|
|
@@ -110,7 +111,7 @@ def schema_to_store(schema):
|
|
|
110
111
|
|
|
111
112
|
def uri_to_ipython(link):
|
|
112
113
|
schema, endpoint, parsed_url = parse_url(link)
|
|
113
|
-
if schema in [DB_SCHEMA, "memory"]:
|
|
114
|
+
if schema in [DB_SCHEMA, "memory", "ds"]:
|
|
114
115
|
return ""
|
|
115
116
|
return schema_to_store(schema).uri_to_ipython(endpoint, parsed_url.path)
|
|
116
117
|
|
|
@@ -178,12 +179,17 @@ class StoreManager:
|
|
|
178
179
|
# which accepts a feature vector uri and generate the offline vector (parquet) for it if it doesnt exist
|
|
179
180
|
if not target and not allow_empty_resources:
|
|
180
181
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
181
|
-
f"
|
|
182
|
+
f"Resource {url} does not have a valid/persistent offline target"
|
|
182
183
|
)
|
|
183
184
|
return resource, target or ""
|
|
184
185
|
|
|
185
186
|
def object(
|
|
186
|
-
self,
|
|
187
|
+
self,
|
|
188
|
+
url,
|
|
189
|
+
key="",
|
|
190
|
+
project="",
|
|
191
|
+
allow_empty_resources=None,
|
|
192
|
+
secrets: Optional[dict] = None,
|
|
187
193
|
) -> DataItem:
|
|
188
194
|
meta = artifact_url = None
|
|
189
195
|
if is_store_uri(url):
|
|
@@ -205,7 +211,7 @@ class StoreManager:
|
|
|
205
211
|
)
|
|
206
212
|
|
|
207
213
|
def get_or_create_store(
|
|
208
|
-
self, url, secrets: dict = None, project_name=""
|
|
214
|
+
self, url, secrets: Optional[dict] = None, project_name=""
|
|
209
215
|
) -> (DataStore, str, str):
|
|
210
216
|
schema, endpoint, parsed_url = parse_url(url)
|
|
211
217
|
subpath = parsed_url.path
|
|
@@ -11,15 +11,15 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
import ast
|
|
16
16
|
import base64
|
|
17
17
|
import json
|
|
18
18
|
import typing
|
|
19
19
|
import warnings
|
|
20
|
-
from urllib.parse import ParseResult, urlparse
|
|
20
|
+
from urllib.parse import ParseResult, urlparse
|
|
21
21
|
|
|
22
|
-
import pydantic
|
|
22
|
+
import pydantic.v1
|
|
23
23
|
from mergedeep import merge
|
|
24
24
|
|
|
25
25
|
import mlrun
|
|
@@ -28,15 +28,15 @@ import mlrun.errors
|
|
|
28
28
|
from ..secrets import get_secret_or_env
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
class DatastoreProfile(pydantic.BaseModel):
|
|
31
|
+
class DatastoreProfile(pydantic.v1.BaseModel):
|
|
32
32
|
type: str
|
|
33
33
|
name: str
|
|
34
34
|
_private_attributes: list = ()
|
|
35
35
|
|
|
36
36
|
class Config:
|
|
37
|
-
extra = pydantic.Extra.forbid
|
|
37
|
+
extra = pydantic.v1.Extra.forbid
|
|
38
38
|
|
|
39
|
-
@pydantic.validator("name")
|
|
39
|
+
@pydantic.v1.validator("name")
|
|
40
40
|
@classmethod
|
|
41
41
|
def lower_case(cls, v):
|
|
42
42
|
return v.lower()
|
|
@@ -75,14 +75,72 @@ class TemporaryClientDatastoreProfiles(metaclass=mlrun.utils.singleton.Singleton
|
|
|
75
75
|
|
|
76
76
|
|
|
77
77
|
class DatastoreProfileBasic(DatastoreProfile):
|
|
78
|
-
type: str = pydantic.Field("basic")
|
|
78
|
+
type: str = pydantic.v1.Field("basic")
|
|
79
79
|
_private_attributes = "private"
|
|
80
80
|
public: str
|
|
81
81
|
private: typing.Optional[str] = None
|
|
82
82
|
|
|
83
83
|
|
|
84
|
+
class ConfigProfile(DatastoreProfile):
|
|
85
|
+
"""
|
|
86
|
+
A profile class for managing configuration data with nested public and private attributes.
|
|
87
|
+
This class extends DatastoreProfile to handle configuration settings, separating them into
|
|
88
|
+
public and private dictionaries. Both dictionaries support nested structures, and the class
|
|
89
|
+
provides functionality to merge these attributes when needed.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
public (Optional[dict]): Dictionary containing public configuration settings,
|
|
93
|
+
supporting nested structures
|
|
94
|
+
private (Optional[dict]): Dictionary containing private/sensitive configuration settings,
|
|
95
|
+
supporting nested structures
|
|
96
|
+
|
|
97
|
+
Example:
|
|
98
|
+
>>> public = {
|
|
99
|
+
"database": {
|
|
100
|
+
"host": "localhost",
|
|
101
|
+
"port": 5432
|
|
102
|
+
},
|
|
103
|
+
"api_version": "v1"
|
|
104
|
+
}
|
|
105
|
+
>>> private = {
|
|
106
|
+
"database": {
|
|
107
|
+
"password": "secret123",
|
|
108
|
+
"username": "admin"
|
|
109
|
+
},
|
|
110
|
+
"api_key": "xyz789"
|
|
111
|
+
}
|
|
112
|
+
>>> config = ConfigProfile("myconfig", public=public, private=private)
|
|
113
|
+
|
|
114
|
+
# When attributes() is called, it merges public and private:
|
|
115
|
+
# {
|
|
116
|
+
# "database": {
|
|
117
|
+
# "host": "localhost",
|
|
118
|
+
# "port": 5432,
|
|
119
|
+
# "password": "secret123",
|
|
120
|
+
# "username": "admin"
|
|
121
|
+
# },
|
|
122
|
+
# "api_version": "v1",
|
|
123
|
+
# "api_key": "xyz789"
|
|
124
|
+
# }
|
|
125
|
+
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
type = "config"
|
|
129
|
+
_private_attributes = "private"
|
|
130
|
+
public: typing.Optional[dict] = None
|
|
131
|
+
private: typing.Optional[dict] = None
|
|
132
|
+
|
|
133
|
+
def attributes(self):
|
|
134
|
+
res = {}
|
|
135
|
+
if self.public:
|
|
136
|
+
res = merge(res, self.public)
|
|
137
|
+
if self.private:
|
|
138
|
+
res = merge(res, self.private)
|
|
139
|
+
return res
|
|
140
|
+
|
|
141
|
+
|
|
84
142
|
class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
85
|
-
type: str = pydantic.Field("kafka_target")
|
|
143
|
+
type: str = pydantic.v1.Field("kafka_target")
|
|
86
144
|
_private_attributes = "kwargs_private"
|
|
87
145
|
bootstrap_servers: typing.Optional[str] = None
|
|
88
146
|
brokers: typing.Optional[str] = None
|
|
@@ -107,12 +165,15 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
|
107
165
|
self.brokers = self.bootstrap_servers
|
|
108
166
|
self.bootstrap_servers = None
|
|
109
167
|
warnings.warn(
|
|
110
|
-
"'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.
|
|
168
|
+
"'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.10.0, "
|
|
111
169
|
"use 'brokers' instead.",
|
|
112
|
-
# TODO: Remove this in 1.
|
|
170
|
+
# TODO: Remove this in 1.10.0
|
|
113
171
|
FutureWarning,
|
|
114
172
|
)
|
|
115
173
|
|
|
174
|
+
def get_topic(self) -> typing.Optional[str]:
|
|
175
|
+
return self.topic
|
|
176
|
+
|
|
116
177
|
def attributes(self):
|
|
117
178
|
attributes = {"brokers": self.brokers or self.bootstrap_servers}
|
|
118
179
|
if self.kwargs_public:
|
|
@@ -123,7 +184,7 @@ class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
|
123
184
|
|
|
124
185
|
|
|
125
186
|
class DatastoreProfileKafkaSource(DatastoreProfile):
|
|
126
|
-
type: str = pydantic.Field("kafka_source")
|
|
187
|
+
type: str = pydantic.v1.Field("kafka_source")
|
|
127
188
|
_private_attributes = ("kwargs_private", "sasl_user", "sasl_pass")
|
|
128
189
|
brokers: typing.Union[str, list[str]]
|
|
129
190
|
topics: typing.Union[str, list[str]]
|
|
@@ -135,7 +196,11 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
|
|
|
135
196
|
kwargs_public: typing.Optional[dict]
|
|
136
197
|
kwargs_private: typing.Optional[dict]
|
|
137
198
|
|
|
138
|
-
def
|
|
199
|
+
def get_topic(self) -> typing.Optional[str]:
|
|
200
|
+
topics = [self.topics] if isinstance(self.topics, str) else self.topics
|
|
201
|
+
return topics[0] if topics else None
|
|
202
|
+
|
|
203
|
+
def attributes(self) -> dict[str, typing.Any]:
|
|
139
204
|
attributes = {}
|
|
140
205
|
if self.kwargs_public:
|
|
141
206
|
attributes = merge(attributes, self.kwargs_public)
|
|
@@ -151,18 +216,15 @@ class DatastoreProfileKafkaSource(DatastoreProfile):
|
|
|
151
216
|
attributes["initial_offset"] = self.initial_offset
|
|
152
217
|
if self.partitions is not None:
|
|
153
218
|
attributes["partitions"] = self.partitions
|
|
154
|
-
sasl
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
sasl["user"] = self.sasl_user
|
|
158
|
-
sasl["password"] = self.sasl_pass
|
|
159
|
-
if sasl:
|
|
219
|
+
if sasl := mlrun.datastore.utils.KafkaParameters(attributes).sasl(
|
|
220
|
+
usr=self.sasl_user, pwd=self.sasl_pass
|
|
221
|
+
):
|
|
160
222
|
attributes["sasl"] = sasl
|
|
161
223
|
return attributes
|
|
162
224
|
|
|
163
225
|
|
|
164
226
|
class DatastoreProfileV3io(DatastoreProfile):
|
|
165
|
-
type: str = pydantic.Field("v3io")
|
|
227
|
+
type: str = pydantic.v1.Field("v3io")
|
|
166
228
|
v3io_access_key: typing.Optional[str] = None
|
|
167
229
|
_private_attributes = "v3io_access_key"
|
|
168
230
|
|
|
@@ -178,7 +240,7 @@ class DatastoreProfileV3io(DatastoreProfile):
|
|
|
178
240
|
|
|
179
241
|
|
|
180
242
|
class DatastoreProfileS3(DatastoreProfile):
|
|
181
|
-
type: str = pydantic.Field("s3")
|
|
243
|
+
type: str = pydantic.v1.Field("s3")
|
|
182
244
|
_private_attributes = ("access_key_id", "secret_key")
|
|
183
245
|
endpoint_url: typing.Optional[str] = None
|
|
184
246
|
force_non_anonymous: typing.Optional[str] = None
|
|
@@ -188,7 +250,7 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
188
250
|
secret_key: typing.Optional[str] = None
|
|
189
251
|
bucket: typing.Optional[str] = None
|
|
190
252
|
|
|
191
|
-
@pydantic.validator("bucket")
|
|
253
|
+
@pydantic.v1.validator("bucket")
|
|
192
254
|
@classmethod
|
|
193
255
|
def check_bucket(cls, v):
|
|
194
256
|
if not v:
|
|
@@ -226,7 +288,7 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
226
288
|
|
|
227
289
|
|
|
228
290
|
class DatastoreProfileRedis(DatastoreProfile):
|
|
229
|
-
type: str = pydantic.Field("redis")
|
|
291
|
+
type: str = pydantic.v1.Field("redis")
|
|
230
292
|
_private_attributes = ("username", "password")
|
|
231
293
|
endpoint_url: str
|
|
232
294
|
username: typing.Optional[str] = None
|
|
@@ -254,7 +316,7 @@ class DatastoreProfileRedis(DatastoreProfile):
|
|
|
254
316
|
query=parsed_url.query,
|
|
255
317
|
fragment=parsed_url.fragment,
|
|
256
318
|
)
|
|
257
|
-
return
|
|
319
|
+
return new_parsed_url.geturl()
|
|
258
320
|
|
|
259
321
|
def secrets(self) -> dict:
|
|
260
322
|
res = {}
|
|
@@ -269,7 +331,7 @@ class DatastoreProfileRedis(DatastoreProfile):
|
|
|
269
331
|
|
|
270
332
|
|
|
271
333
|
class DatastoreProfileDBFS(DatastoreProfile):
|
|
272
|
-
type: str = pydantic.Field("dbfs")
|
|
334
|
+
type: str = pydantic.v1.Field("dbfs")
|
|
273
335
|
_private_attributes = ("token",)
|
|
274
336
|
endpoint_url: typing.Optional[str] = None # host
|
|
275
337
|
token: typing.Optional[str] = None
|
|
@@ -287,13 +349,13 @@ class DatastoreProfileDBFS(DatastoreProfile):
|
|
|
287
349
|
|
|
288
350
|
|
|
289
351
|
class DatastoreProfileGCS(DatastoreProfile):
|
|
290
|
-
type: str = pydantic.Field("gcs")
|
|
352
|
+
type: str = pydantic.v1.Field("gcs")
|
|
291
353
|
_private_attributes = ("gcp_credentials",)
|
|
292
354
|
credentials_path: typing.Optional[str] = None # path to file.
|
|
293
355
|
gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
|
|
294
356
|
bucket: typing.Optional[str] = None
|
|
295
357
|
|
|
296
|
-
@pydantic.validator("bucket")
|
|
358
|
+
@pydantic.v1.validator("bucket")
|
|
297
359
|
@classmethod
|
|
298
360
|
def check_bucket(cls, v):
|
|
299
361
|
if not v:
|
|
@@ -304,7 +366,7 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
304
366
|
)
|
|
305
367
|
return v
|
|
306
368
|
|
|
307
|
-
@pydantic.validator("gcp_credentials", pre=True, always=True)
|
|
369
|
+
@pydantic.v1.validator("gcp_credentials", pre=True, always=True)
|
|
308
370
|
@classmethod
|
|
309
371
|
def convert_dict_to_json(cls, v):
|
|
310
372
|
if isinstance(v, dict):
|
|
@@ -332,7 +394,7 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
332
394
|
|
|
333
395
|
|
|
334
396
|
class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
335
|
-
type: str = pydantic.Field("az")
|
|
397
|
+
type: str = pydantic.v1.Field("az")
|
|
336
398
|
_private_attributes = (
|
|
337
399
|
"connection_string",
|
|
338
400
|
"account_key",
|
|
@@ -350,7 +412,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
350
412
|
credential: typing.Optional[str] = None
|
|
351
413
|
container: typing.Optional[str] = None
|
|
352
414
|
|
|
353
|
-
@pydantic.validator("container")
|
|
415
|
+
@pydantic.v1.validator("container")
|
|
354
416
|
@classmethod
|
|
355
417
|
def check_container(cls, v):
|
|
356
418
|
if not v:
|
|
@@ -392,7 +454,7 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
392
454
|
|
|
393
455
|
|
|
394
456
|
class DatastoreProfileHdfs(DatastoreProfile):
|
|
395
|
-
type: str = pydantic.Field("hdfs")
|
|
457
|
+
type: str = pydantic.v1.Field("hdfs")
|
|
396
458
|
_private_attributes = "token"
|
|
397
459
|
host: typing.Optional[str] = None
|
|
398
460
|
port: typing.Optional[int] = None
|
|
@@ -415,7 +477,60 @@ class DatastoreProfileHdfs(DatastoreProfile):
|
|
|
415
477
|
return f"webhdfs://{self.host}:{self.http_port}{subpath}"
|
|
416
478
|
|
|
417
479
|
|
|
418
|
-
class
|
|
480
|
+
class DatastoreProfileTDEngine(DatastoreProfile):
|
|
481
|
+
"""
|
|
482
|
+
A profile that holds the required parameters for a TDEngine database, with the websocket scheme.
|
|
483
|
+
https://docs.tdengine.com/developer-guide/connecting-to-tdengine/#websocket-connection
|
|
484
|
+
"""
|
|
485
|
+
|
|
486
|
+
type: str = pydantic.v1.Field("taosws")
|
|
487
|
+
_private_attributes = ["password"]
|
|
488
|
+
user: str
|
|
489
|
+
# The password cannot be empty in real world scenarios. It's here just because of the profiles completion design.
|
|
490
|
+
password: typing.Optional[str]
|
|
491
|
+
host: str
|
|
492
|
+
port: int
|
|
493
|
+
|
|
494
|
+
def dsn(self) -> str:
|
|
495
|
+
"""Get the Data Source Name of the configured TDEngine profile."""
|
|
496
|
+
return f"{self.type}://{self.user}:{self.password}@{self.host}:{self.port}"
|
|
497
|
+
|
|
498
|
+
@classmethod
|
|
499
|
+
def from_dsn(cls, dsn: str, profile_name: str) -> "DatastoreProfileTDEngine":
|
|
500
|
+
"""
|
|
501
|
+
Construct a TDEngine profile from DSN (connection string) and a name for the profile.
|
|
502
|
+
|
|
503
|
+
:param dsn: The DSN (Data Source Name) of the TDEngine database, e.g.: ``"taosws://root:taosdata@localhost:6041"``.
|
|
504
|
+
:param profile_name: The new profile's name.
|
|
505
|
+
:return: The TDEngine profile.
|
|
506
|
+
"""
|
|
507
|
+
parsed_url = urlparse(dsn)
|
|
508
|
+
return cls(
|
|
509
|
+
name=profile_name,
|
|
510
|
+
user=parsed_url.username,
|
|
511
|
+
password=parsed_url.password,
|
|
512
|
+
host=parsed_url.hostname,
|
|
513
|
+
port=parsed_url.port,
|
|
514
|
+
)
|
|
515
|
+
|
|
516
|
+
|
|
517
|
+
_DATASTORE_TYPE_TO_PROFILE_CLASS: dict[str, type[DatastoreProfile]] = {
|
|
518
|
+
"v3io": DatastoreProfileV3io,
|
|
519
|
+
"s3": DatastoreProfileS3,
|
|
520
|
+
"redis": DatastoreProfileRedis,
|
|
521
|
+
"basic": DatastoreProfileBasic,
|
|
522
|
+
"kafka_target": DatastoreProfileKafkaTarget,
|
|
523
|
+
"kafka_source": DatastoreProfileKafkaSource,
|
|
524
|
+
"dbfs": DatastoreProfileDBFS,
|
|
525
|
+
"gcs": DatastoreProfileGCS,
|
|
526
|
+
"az": DatastoreProfileAzureBlob,
|
|
527
|
+
"hdfs": DatastoreProfileHdfs,
|
|
528
|
+
"taosws": DatastoreProfileTDEngine,
|
|
529
|
+
"config": ConfigProfile,
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
|
|
533
|
+
class DatastoreProfile2Json(pydantic.v1.BaseModel):
|
|
419
534
|
@staticmethod
|
|
420
535
|
def _to_json(attributes):
|
|
421
536
|
# First, base64 encode the values
|
|
@@ -465,18 +580,7 @@ class DatastoreProfile2Json(pydantic.BaseModel):
|
|
|
465
580
|
|
|
466
581
|
decoded_dict = {k: safe_literal_eval(v) for k, v in decoded_dict.items()}
|
|
467
582
|
datastore_type = decoded_dict.get("type")
|
|
468
|
-
ds_profile_factory =
|
|
469
|
-
"v3io": DatastoreProfileV3io,
|
|
470
|
-
"s3": DatastoreProfileS3,
|
|
471
|
-
"redis": DatastoreProfileRedis,
|
|
472
|
-
"basic": DatastoreProfileBasic,
|
|
473
|
-
"kafka_target": DatastoreProfileKafkaTarget,
|
|
474
|
-
"kafka_source": DatastoreProfileKafkaSource,
|
|
475
|
-
"dbfs": DatastoreProfileDBFS,
|
|
476
|
-
"gcs": DatastoreProfileGCS,
|
|
477
|
-
"az": DatastoreProfileAzureBlob,
|
|
478
|
-
"hdfs": DatastoreProfileHdfs,
|
|
479
|
-
}
|
|
583
|
+
ds_profile_factory = _DATASTORE_TYPE_TO_PROFILE_CLASS
|
|
480
584
|
if datastore_type in ds_profile_factory:
|
|
481
585
|
return ds_profile_factory[datastore_type].parse_obj(decoded_dict)
|
|
482
586
|
else:
|
|
@@ -489,7 +593,36 @@ class DatastoreProfile2Json(pydantic.BaseModel):
|
|
|
489
593
|
)
|
|
490
594
|
|
|
491
595
|
|
|
492
|
-
def datastore_profile_read(url, project_name="", secrets: dict = None):
|
|
596
|
+
def datastore_profile_read(url, project_name="", secrets: typing.Optional[dict] = None):
|
|
597
|
+
"""
|
|
598
|
+
Read and retrieve a datastore profile from a given URL.
|
|
599
|
+
|
|
600
|
+
This function retrieves a datastore profile either from temporary client storage,
|
|
601
|
+
or from the MLRun database. It handles both client-side and server-side profile formats
|
|
602
|
+
and performs necessary conversions.
|
|
603
|
+
|
|
604
|
+
Args:
|
|
605
|
+
url (str): A URL with 'ds' scheme pointing to the datastore profile
|
|
606
|
+
(e.g., 'ds://profile-name').
|
|
607
|
+
project_name (str, optional): The project name where the profile is stored.
|
|
608
|
+
Defaults to MLRun's default project.
|
|
609
|
+
secrets (dict, optional): Dictionary containing secrets needed for profile retrieval.
|
|
610
|
+
|
|
611
|
+
Returns:
|
|
612
|
+
DatastoreProfile: The retrieved datastore profile object.
|
|
613
|
+
|
|
614
|
+
Raises:
|
|
615
|
+
MLRunInvalidArgumentError: In the following cases:
|
|
616
|
+
- If the URL scheme is not 'ds'
|
|
617
|
+
- If the profile cannot be retrieved from either server or local environment
|
|
618
|
+
|
|
619
|
+
Note:
|
|
620
|
+
When running from a client environment (outside MLRun pods), private profile information
|
|
621
|
+
is not accessible. In this case, use register_temporary_client_datastore_profile() to
|
|
622
|
+
register the profile with credentials for your local session. When running inside MLRun
|
|
623
|
+
pods, the private information is automatically available and no temporary registration is needed.
|
|
624
|
+
"""
|
|
625
|
+
|
|
493
626
|
parsed_url = urlparse(url)
|
|
494
627
|
if parsed_url.scheme.lower() != "ds":
|
|
495
628
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
@@ -521,7 +654,7 @@ def datastore_profile_read(url, project_name="", secrets: dict = None):
|
|
|
521
654
|
)
|
|
522
655
|
private_body = get_secret_or_env(project_ds_name_private, secret_provider=secrets)
|
|
523
656
|
if not public_profile or not private_body:
|
|
524
|
-
raise mlrun.errors.
|
|
657
|
+
raise mlrun.errors.MLRunNotFoundError(
|
|
525
658
|
f"Unable to retrieve the datastore profile '{url}' from either the server or local environment. "
|
|
526
659
|
"Make sure the profile is registered correctly, or if running in a local environment, "
|
|
527
660
|
"use register_temporary_client_datastore_profile() to provide credentials locally."
|
mlrun/datastore/dbfs_store.py
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
15
|
import pathlib
|
|
16
|
+
from typing import Optional
|
|
16
17
|
|
|
17
18
|
from fsspec.implementations.dbfs import DatabricksFile, DatabricksFileSystem
|
|
18
19
|
from fsspec.registry import get_filesystem_class
|
|
@@ -81,7 +82,9 @@ class DatabricksFileSystemDisableCache(DatabricksFileSystem):
|
|
|
81
82
|
|
|
82
83
|
# dbfs objects will be represented with the following URL: dbfs://<path>
|
|
83
84
|
class DBFSStore(DataStore):
|
|
84
|
-
def __init__(
|
|
85
|
+
def __init__(
|
|
86
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
87
|
+
):
|
|
85
88
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
86
89
|
|
|
87
90
|
@property
|
mlrun/datastore/filestore.py
CHANGED
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
import time
|
|
15
15
|
from os import listdir, makedirs, path, stat
|
|
16
16
|
from shutil import copyfile
|
|
17
|
+
from typing import Optional
|
|
17
18
|
|
|
18
19
|
import fsspec
|
|
19
20
|
|
|
@@ -23,7 +24,9 @@ from .base import DataStore, FileStats
|
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
class FileStore(DataStore):
|
|
26
|
-
def __init__(
|
|
27
|
+
def __init__(
|
|
28
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
29
|
+
):
|
|
27
30
|
super().__init__(parent, name, "file", endpoint, secrets=secrets)
|
|
28
31
|
|
|
29
32
|
self._item_path, self._real_path = None, None
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
import json
|
|
15
15
|
import os
|
|
16
16
|
from pathlib import Path
|
|
17
|
+
from typing import Optional
|
|
17
18
|
|
|
18
19
|
from fsspec.registry import get_filesystem_class
|
|
19
20
|
from google.auth.credentials import Credentials
|
|
@@ -33,7 +34,9 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
33
34
|
workers = 8
|
|
34
35
|
chunk_size = 32 * 1024 * 1024
|
|
35
36
|
|
|
36
|
-
def __init__(
|
|
37
|
+
def __init__(
|
|
38
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
39
|
+
):
|
|
37
40
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
38
41
|
self._storage_client = None
|
|
39
42
|
self._storage_options = None
|
mlrun/datastore/hdfs.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import os
|
|
15
|
+
from typing import Optional
|
|
15
16
|
from urllib.parse import urlparse
|
|
16
17
|
|
|
17
18
|
import fsspec
|
|
@@ -20,7 +21,9 @@ from mlrun.datastore.base import DataStore
|
|
|
20
21
|
|
|
21
22
|
|
|
22
23
|
class HdfsStore(DataStore):
|
|
23
|
-
def __init__(
|
|
24
|
+
def __init__(
|
|
25
|
+
self, parent, schema, name, endpoint="", secrets: Optional[dict] = None
|
|
26
|
+
):
|
|
24
27
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
25
28
|
|
|
26
29
|
self.host = self._get_secret_or_env("HDFS_HOST")
|