mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +26 -112
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +46 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +47 -48
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +69 -0
- mlrun/common/db/sql_session.py +2 -3
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/common/formatters/artifact.py +21 -0
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/helpers.py +1 -2
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +24 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +18 -8
- mlrun/common/schemas/auth.py +11 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -1
- mlrun/common/schemas/feature_store.py +16 -16
- mlrun/common/schemas/frontend_spec.py +8 -7
- mlrun/common/schemas/function.py +5 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +18 -3
- mlrun/common/schemas/model_monitoring/constants.py +83 -26
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
- mlrun/common/schemas/notification.py +4 -4
- mlrun/common/schemas/object.py +2 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +1 -10
- mlrun/common/schemas/project.py +24 -23
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +3 -3
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +2 -2
- mlrun/common/types.py +7 -1
- mlrun/config.py +54 -17
- mlrun/data_types/to_pandas.py +10 -12
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +17 -5
- mlrun/datastore/base.py +62 -39
- mlrun/datastore/datastore.py +28 -9
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/filestore.py +0 -1
- mlrun/datastore/google_cloud_storage.py +6 -2
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +6 -2
- mlrun/datastore/s3.py +9 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +201 -96
- mlrun/datastore/spark_utils.py +1 -2
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +358 -104
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +5 -1
- mlrun/db/base.py +185 -35
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +614 -179
- mlrun/db/nopdb.py +210 -26
- mlrun/errors.py +12 -1
- mlrun/execution.py +41 -24
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +40 -72
- mlrun/feature_store/common.py +1 -1
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +28 -30
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/conversion.py +11 -13
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +37 -34
- mlrun/features.py +9 -20
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +2 -3
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +4 -3
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +14 -16
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +17 -11
- mlrun/launcher/remote.py +16 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +238 -73
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +138 -315
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +24 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +104 -84
- mlrun/model_monitoring/controller_handler.py +13 -5
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +127 -28
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/prometheus.py +1 -4
- mlrun/model_monitoring/stream_processing.py +62 -231
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +6 -6
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +35 -21
- mlrun/projects/pipelines.py +68 -99
- mlrun/projects/project.py +830 -266
- mlrun/render.py +3 -11
- mlrun/run.py +162 -166
- mlrun/runtimes/__init__.py +62 -7
- mlrun/runtimes/base.py +39 -32
- mlrun/runtimes/daskjob.py +8 -8
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +6 -3
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
- mlrun/runtimes/pod.py +286 -88
- mlrun/runtimes/remotesparkjob.py +2 -2
- mlrun/runtimes/sparkjob/spark3job.py +51 -34
- mlrun/runtimes/utils.py +7 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +13 -10
- mlrun/serving/server.py +22 -26
- mlrun/serving/states.py +99 -25
- mlrun/serving/utils.py +3 -3
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +59 -20
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +1 -2
- mlrun/utils/async_http.py +5 -7
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +3 -3
- mlrun/utils/helpers.py +183 -197
- mlrun/utils/http.py +2 -5
- mlrun/utils/logger.py +76 -14
- mlrun/utils/notifications/notification/__init__.py +17 -12
- mlrun/utils/notifications/notification/base.py +14 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +155 -30
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +2 -4
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc2.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/common/schemas/tag.py
CHANGED
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
-
import typing
|
|
16
15
|
|
|
17
16
|
import pydantic
|
|
18
17
|
|
|
@@ -29,4 +28,4 @@ class TagObjects(pydantic.BaseModel):
|
|
|
29
28
|
|
|
30
29
|
kind: str
|
|
31
30
|
# TODO: Add more types to the list for new supported tagged objects
|
|
32
|
-
identifiers:
|
|
31
|
+
identifiers: list[ArtifactIdentifier]
|
mlrun/common/schemas/workflow.py
CHANGED
|
@@ -36,12 +36,12 @@ class WorkflowSpec(pydantic.BaseModel):
|
|
|
36
36
|
|
|
37
37
|
class WorkflowRequest(pydantic.BaseModel):
|
|
38
38
|
spec: typing.Optional[WorkflowSpec] = None
|
|
39
|
-
arguments: typing.Optional[
|
|
39
|
+
arguments: typing.Optional[dict] = None
|
|
40
40
|
artifact_path: typing.Optional[str] = None
|
|
41
41
|
source: typing.Optional[str] = None
|
|
42
42
|
run_name: typing.Optional[str] = None
|
|
43
43
|
namespace: typing.Optional[str] = None
|
|
44
|
-
notifications: typing.Optional[
|
|
44
|
+
notifications: typing.Optional[list[Notification]] = None
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class WorkflowResponse(pydantic.BaseModel):
|
mlrun/common/types.py
CHANGED
|
@@ -11,7 +11,6 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
-
#
|
|
15
14
|
|
|
16
15
|
import enum
|
|
17
16
|
|
|
@@ -23,3 +22,10 @@ class StrEnum(str, enum.Enum):
|
|
|
23
22
|
|
|
24
23
|
def __repr__(self):
|
|
25
24
|
return self.value
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Partial backport from Python 3.11
|
|
28
|
+
# https://docs.python.org/3/library/http.html#http.HTTPMethod
|
|
29
|
+
class HTTPMethod(StrEnum):
|
|
30
|
+
GET = "GET"
|
|
31
|
+
POST = "POST"
|
mlrun/config.py
CHANGED
|
@@ -17,7 +17,7 @@ Configuration system.
|
|
|
17
17
|
Configuration can be in either a configuration file specified by
|
|
18
18
|
MLRUN_CONFIG_FILE environment variable or by environment variables.
|
|
19
19
|
|
|
20
|
-
Environment variables are in the format "
|
|
20
|
+
Environment variables are in the format "MLRUN_HTTPDB__PORT=8080". This will be
|
|
21
21
|
mapped to config.httpdb.port. Values should be in JSON format.
|
|
22
22
|
"""
|
|
23
23
|
|
|
@@ -37,6 +37,7 @@ import dotenv
|
|
|
37
37
|
import semver
|
|
38
38
|
import yaml
|
|
39
39
|
|
|
40
|
+
import mlrun.common.constants
|
|
40
41
|
import mlrun.common.schemas
|
|
41
42
|
import mlrun.errors
|
|
42
43
|
|
|
@@ -87,7 +88,7 @@ default_config = {
|
|
|
87
88
|
"mpijob_crd_version": "", # mpijob crd version (e.g: "v1alpha1". must be in: mlrun.runtime.MPIJobCRDVersions)
|
|
88
89
|
"ipython_widget": True,
|
|
89
90
|
"log_level": "INFO",
|
|
90
|
-
# log formatter (options: human | json)
|
|
91
|
+
# log formatter (options: human | human_extended | json)
|
|
91
92
|
"log_formatter": "human",
|
|
92
93
|
"submit_timeout": "180", # timeout when submitting a new k8s resource
|
|
93
94
|
# runtimes cleanup interval in seconds
|
|
@@ -149,7 +150,6 @@ default_config = {
|
|
|
149
150
|
"url": "",
|
|
150
151
|
},
|
|
151
152
|
"v3io_framesd": "http://framesd:8080",
|
|
152
|
-
"datastore": {"async_source_mode": "disabled"},
|
|
153
153
|
# default node selector to be applied to all functions - json string base64 encoded format
|
|
154
154
|
"default_function_node_selector": "e30=",
|
|
155
155
|
# default priority class to be applied to functions running on k8s cluster
|
|
@@ -189,6 +189,7 @@ default_config = {
|
|
|
189
189
|
"background_tasks": {
|
|
190
190
|
# enabled / disabled
|
|
191
191
|
"timeout_mode": "enabled",
|
|
192
|
+
"function_deletion_batch_size": 10,
|
|
192
193
|
# timeout in seconds to wait for background task to be updated / finished by the worker responsible for the task
|
|
193
194
|
"default_timeouts": {
|
|
194
195
|
"operations": {
|
|
@@ -197,6 +198,7 @@ default_config = {
|
|
|
197
198
|
"run_abortion": "600",
|
|
198
199
|
"abort_grace_period": "10",
|
|
199
200
|
"delete_project": "900",
|
|
201
|
+
"delete_function": "900",
|
|
200
202
|
},
|
|
201
203
|
"runtimes": {"dask": "600"},
|
|
202
204
|
},
|
|
@@ -231,6 +233,10 @@ default_config = {
|
|
|
231
233
|
"databricks": {
|
|
232
234
|
"artifact_directory_path": "/mlrun_databricks_runtime/artifacts_dictionaries"
|
|
233
235
|
},
|
|
236
|
+
"application": {
|
|
237
|
+
"default_sidecar_internal_port": 8050,
|
|
238
|
+
"default_authentication_mode": "accessKey",
|
|
239
|
+
},
|
|
234
240
|
},
|
|
235
241
|
# TODO: function defaults should be moved to the function spec config above
|
|
236
242
|
"function_defaults": {
|
|
@@ -241,6 +247,7 @@ default_config = {
|
|
|
241
247
|
"remote": "mlrun/mlrun",
|
|
242
248
|
"dask": "mlrun/ml-base",
|
|
243
249
|
"mpijob": "mlrun/mlrun",
|
|
250
|
+
"application": "python:3.9-slim",
|
|
244
251
|
},
|
|
245
252
|
# see enrich_function_preemption_spec for more info,
|
|
246
253
|
# and mlrun.common.schemas.function.PreemptionModes for available options
|
|
@@ -325,7 +332,13 @@ default_config = {
|
|
|
325
332
|
# optional values (as per https://dev.mysql.com/doc/refman/8.0/en/sql-mode.html#sql-mode-full):
|
|
326
333
|
#
|
|
327
334
|
# if set to "nil" or "none", nothing would be set
|
|
328
|
-
"modes":
|
|
335
|
+
"modes": (
|
|
336
|
+
"STRICT_TRANS_TABLES"
|
|
337
|
+
",NO_ZERO_IN_DATE"
|
|
338
|
+
",NO_ZERO_DATE"
|
|
339
|
+
",ERROR_FOR_DIVISION_BY_ZERO"
|
|
340
|
+
",NO_ENGINE_SUBSTITUTION",
|
|
341
|
+
)
|
|
329
342
|
},
|
|
330
343
|
},
|
|
331
344
|
"jobs": {
|
|
@@ -353,10 +366,12 @@ default_config = {
|
|
|
353
366
|
# is set to ClusterIP
|
|
354
367
|
# ---------------------------------------------------------------------
|
|
355
368
|
# Note: adding a mode requires special handling on
|
|
356
|
-
# - mlrun.runtimes.constants.NuclioIngressAddTemplatedIngressModes
|
|
357
|
-
# - mlrun.runtimes.function.enrich_function_with_ingress
|
|
369
|
+
# - mlrun.common.runtimes.constants.NuclioIngressAddTemplatedIngressModes
|
|
370
|
+
# - mlrun.runtimes.nuclio.function.enrich_function_with_ingress
|
|
358
371
|
"add_templated_ingress_host_mode": "never",
|
|
359
372
|
"explicit_ack": "enabled",
|
|
373
|
+
# size of serving spec to move to config maps
|
|
374
|
+
"serving_spec_env_cutoff": 0,
|
|
360
375
|
},
|
|
361
376
|
"logs": {
|
|
362
377
|
"decode": {
|
|
@@ -474,6 +489,14 @@ default_config = {
|
|
|
474
489
|
# if set to true, will log a warning for trying to use run db functionality while in nop db mode
|
|
475
490
|
"verbose": True,
|
|
476
491
|
},
|
|
492
|
+
"pagination": {
|
|
493
|
+
"default_page_size": 20,
|
|
494
|
+
"pagination_cache": {
|
|
495
|
+
"interval": 60,
|
|
496
|
+
"ttl": 3600,
|
|
497
|
+
"max_size": 10000,
|
|
498
|
+
},
|
|
499
|
+
},
|
|
477
500
|
},
|
|
478
501
|
"model_endpoint_monitoring": {
|
|
479
502
|
"serving_stream_args": {"shard_count": 1, "retention_period_hours": 24},
|
|
@@ -485,6 +508,7 @@ default_config = {
|
|
|
485
508
|
"default": "v3io:///users/pipelines/{project}/model-endpoints/{kind}",
|
|
486
509
|
"user_space": "v3io:///projects/{project}/model-endpoints/{kind}",
|
|
487
510
|
"stream": "",
|
|
511
|
+
"monitoring_application": "v3io:///users/pipelines/{project}/monitoring-apps/",
|
|
488
512
|
},
|
|
489
513
|
# Offline storage path can be either relative or a full path. This path is used for general offline data
|
|
490
514
|
# storage such as the parquet file which is generated from the monitoring stream function for the drift analysis
|
|
@@ -493,12 +517,14 @@ default_config = {
|
|
|
493
517
|
# when the user is working in CE environment and has not provided any stream path.
|
|
494
518
|
"default_http_sink": "http://nuclio-{project}-model-monitoring-stream.{namespace}.svc.cluster.local:8080",
|
|
495
519
|
"default_http_sink_app": "http://nuclio-{project}-{application_name}.{namespace}.svc.cluster.local:8080",
|
|
496
|
-
"batch_processing_function_branch": "master",
|
|
497
520
|
"parquet_batching_max_events": 10_000,
|
|
498
521
|
"parquet_batching_timeout_secs": timedelta(minutes=1).total_seconds(),
|
|
499
|
-
# See mlrun.model_monitoring.stores.
|
|
522
|
+
# See mlrun.model_monitoring.db.stores.ObjectStoreFactory for available options
|
|
500
523
|
"store_type": "v3io-nosql",
|
|
501
524
|
"endpoint_store_connection": "",
|
|
525
|
+
# See mlrun.model_monitoring.db.tsdb.ObjectTSDBFactory for available options
|
|
526
|
+
"tsdb_connector_type": "v3io-tsdb",
|
|
527
|
+
"tsdb_connection": "",
|
|
502
528
|
},
|
|
503
529
|
"secret_stores": {
|
|
504
530
|
# Use only in testing scenarios (such as integration tests) to avoid using k8s for secrets (will use in-memory
|
|
@@ -534,9 +560,10 @@ default_config = {
|
|
|
534
560
|
"feature_store": {
|
|
535
561
|
"data_prefixes": {
|
|
536
562
|
"default": "v3io:///projects/{project}/FeatureStore/{name}/{kind}",
|
|
537
|
-
"nosql": "v3io:///projects/{project}/FeatureStore/{name}/
|
|
563
|
+
"nosql": "v3io:///projects/{project}/FeatureStore/{name}/nosql",
|
|
538
564
|
# "authority" is optional and generalizes [userinfo "@"] host [":" port]
|
|
539
|
-
"redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/
|
|
565
|
+
"redisnosql": "redis://{authority}/projects/{project}/FeatureStore/{name}/nosql",
|
|
566
|
+
"dsnosql": "ds://{ds_profile_name}/projects/{project}/FeatureStore/{name}/{kind}",
|
|
540
567
|
},
|
|
541
568
|
"default_targets": "parquet,nosql",
|
|
542
569
|
"default_job_image": "mlrun/mlrun",
|
|
@@ -672,6 +699,10 @@ default_config = {
|
|
|
672
699
|
"access_key": "",
|
|
673
700
|
},
|
|
674
701
|
"grafana_url": "",
|
|
702
|
+
"alerts": {
|
|
703
|
+
# supported modes: "enabled", "disabled".
|
|
704
|
+
"mode": "enabled"
|
|
705
|
+
},
|
|
675
706
|
"auth_with_client_id": {
|
|
676
707
|
"enabled": False,
|
|
677
708
|
"request_timeout": 5,
|
|
@@ -937,6 +968,10 @@ class Config:
|
|
|
937
968
|
self.httpdb.clusterization.chief.url = chief_api_url
|
|
938
969
|
return self.httpdb.clusterization.chief.url
|
|
939
970
|
|
|
971
|
+
@staticmethod
|
|
972
|
+
def internal_labels():
|
|
973
|
+
return mlrun.common.constants.MLRunInternalLabels.all()
|
|
974
|
+
|
|
940
975
|
@staticmethod
|
|
941
976
|
def get_storage_auto_mount_params():
|
|
942
977
|
auto_mount_params = {}
|
|
@@ -1065,7 +1100,8 @@ class Config:
|
|
|
1065
1100
|
kind: str = "",
|
|
1066
1101
|
target: str = "online",
|
|
1067
1102
|
artifact_path: str = None,
|
|
1068
|
-
|
|
1103
|
+
function_name: str = None,
|
|
1104
|
+
**kwargs,
|
|
1069
1105
|
) -> typing.Union[str, list[str]]:
|
|
1070
1106
|
"""Get the full path from the configuration based on the provided project and kind.
|
|
1071
1107
|
|
|
@@ -1080,7 +1116,7 @@ class Config:
|
|
|
1080
1116
|
artifact path instead.
|
|
1081
1117
|
:param artifact_path: Optional artifact path that will be used as a relative path. If not provided, the
|
|
1082
1118
|
relative artifact path will be taken from the global MLRun artifact path.
|
|
1083
|
-
:param
|
|
1119
|
+
:param function_name: Application name, None for model_monitoring_stream.
|
|
1084
1120
|
|
|
1085
1121
|
:return: Full configured path for the provided kind. Can be either a single path
|
|
1086
1122
|
or a list of paths in the case of the online model monitoring stream path.
|
|
@@ -1092,17 +1128,18 @@ class Config:
|
|
|
1092
1128
|
)
|
|
1093
1129
|
if store_prefix_dict.get(kind):
|
|
1094
1130
|
# Target exist in store prefix and has a valid string value
|
|
1095
|
-
return store_prefix_dict[kind].format(project=project)
|
|
1131
|
+
return store_prefix_dict[kind].format(project=project, **kwargs)
|
|
1096
1132
|
|
|
1097
1133
|
if (
|
|
1098
|
-
|
|
1134
|
+
function_name
|
|
1135
|
+
and function_name
|
|
1099
1136
|
!= mlrun.common.schemas.model_monitoring.constants.MonitoringFunctionNames.STREAM
|
|
1100
1137
|
):
|
|
1101
1138
|
return mlrun.mlconf.model_endpoint_monitoring.store_prefixes.user_space.format(
|
|
1102
1139
|
project=project,
|
|
1103
1140
|
kind=kind
|
|
1104
|
-
if
|
|
1105
|
-
else f"{kind}-{
|
|
1141
|
+
if function_name is None
|
|
1142
|
+
else f"{kind}-{function_name.lower()}",
|
|
1106
1143
|
)
|
|
1107
1144
|
elif kind == "stream": # return list for mlrun<1.6.3 BC
|
|
1108
1145
|
return [
|
|
@@ -1148,7 +1185,7 @@ class Config:
|
|
|
1148
1185
|
ver in mlrun.mlconf.ce.mode for ver in ["lite", "full"]
|
|
1149
1186
|
)
|
|
1150
1187
|
|
|
1151
|
-
def get_s3_storage_options(self) ->
|
|
1188
|
+
def get_s3_storage_options(self) -> dict[str, typing.Any]:
|
|
1152
1189
|
"""
|
|
1153
1190
|
Generate storage options dictionary as required for handling S3 path in fsspec. The model monitoring stream
|
|
1154
1191
|
graph uses this method for generating the storage options for S3 parquet target path.
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -65,10 +65,10 @@ def toPandas(spark_df):
|
|
|
65
65
|
msg = (
|
|
66
66
|
"toPandas attempted Arrow optimization because "
|
|
67
67
|
"'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
|
|
68
|
-
"failed by the reason below:\n
|
|
68
|
+
f"failed by the reason below:\n {e}\n"
|
|
69
69
|
"Attempting non-optimization as "
|
|
70
70
|
"'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
|
|
71
|
-
"true."
|
|
71
|
+
"true."
|
|
72
72
|
)
|
|
73
73
|
warnings.warn(msg)
|
|
74
74
|
use_arrow = False
|
|
@@ -78,7 +78,7 @@ def toPandas(spark_df):
|
|
|
78
78
|
"'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
|
|
79
79
|
"reached the error below and will not continue because automatic fallback "
|
|
80
80
|
"with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
|
|
81
|
-
"false.\n
|
|
81
|
+
f"false.\n {e}"
|
|
82
82
|
)
|
|
83
83
|
warnings.warn(msg)
|
|
84
84
|
raise
|
|
@@ -94,9 +94,7 @@ def toPandas(spark_df):
|
|
|
94
94
|
)
|
|
95
95
|
|
|
96
96
|
# Rename columns to avoid duplicated column names.
|
|
97
|
-
tmp_column_names = [
|
|
98
|
-
"col_{}".format(i) for i in range(len(spark_df.columns))
|
|
99
|
-
]
|
|
97
|
+
tmp_column_names = [f"col_{i}" for i in range(len(spark_df.columns))]
|
|
100
98
|
self_destruct = spark_df.sql_ctx._conf.arrowPySparkSelfDestructEnabled()
|
|
101
99
|
batches = spark_df.toDF(*tmp_column_names)._collect_as_arrow(
|
|
102
100
|
split_batches=self_destruct
|
|
@@ -146,7 +144,7 @@ def toPandas(spark_df):
|
|
|
146
144
|
"reached the error below and can not continue. Note that "
|
|
147
145
|
"'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
|
|
148
146
|
"effect on failures in the middle of "
|
|
149
|
-
"computation.\n
|
|
147
|
+
f"computation.\n {e}"
|
|
150
148
|
)
|
|
151
149
|
warnings.warn(msg)
|
|
152
150
|
raise
|
|
@@ -156,10 +154,10 @@ def toPandas(spark_df):
|
|
|
156
154
|
column_counter = Counter(spark_df.columns)
|
|
157
155
|
|
|
158
156
|
dtype = [None] * len(spark_df.schema)
|
|
159
|
-
for
|
|
157
|
+
for field_idx, field in enumerate(spark_df.schema):
|
|
160
158
|
# For duplicate column name, we use `iloc` to access it.
|
|
161
159
|
if column_counter[field.name] > 1:
|
|
162
|
-
pandas_col = pdf.iloc[:,
|
|
160
|
+
pandas_col = pdf.iloc[:, field_idx]
|
|
163
161
|
else:
|
|
164
162
|
pandas_col = pdf[field.name]
|
|
165
163
|
|
|
@@ -173,12 +171,12 @@ def toPandas(spark_df):
|
|
|
173
171
|
and field.nullable
|
|
174
172
|
and pandas_col.isnull().any()
|
|
175
173
|
):
|
|
176
|
-
dtype[
|
|
174
|
+
dtype[field_idx] = pandas_type
|
|
177
175
|
# Ensure we fall back to nullable numpy types, even when whole column is null:
|
|
178
176
|
if isinstance(field.dataType, IntegralType) and pandas_col.isnull().any():
|
|
179
|
-
dtype[
|
|
177
|
+
dtype[field_idx] = np.float64
|
|
180
178
|
if isinstance(field.dataType, BooleanType) and pandas_col.isnull().any():
|
|
181
|
-
dtype[
|
|
179
|
+
dtype[field_idx] = object
|
|
182
180
|
|
|
183
181
|
df = pd.DataFrame()
|
|
184
182
|
for index, t in enumerate(dtype):
|
mlrun/datastore/__init__.py
CHANGED
|
@@ -64,7 +64,7 @@ from .store_resources import (
|
|
|
64
64
|
parse_store_uri,
|
|
65
65
|
)
|
|
66
66
|
from .targets import CSVTarget, NoSqlTarget, ParquetTarget, StreamTarget
|
|
67
|
-
from .utils import parse_kafka_url
|
|
67
|
+
from .utils import get_kafka_brokers_from_dict, parse_kafka_url
|
|
68
68
|
|
|
69
69
|
store_manager = StoreManager()
|
|
70
70
|
|
|
@@ -107,13 +107,10 @@ def get_stream_pusher(stream_path: str, **kwargs):
|
|
|
107
107
|
:param stream_path: path/url of stream
|
|
108
108
|
"""
|
|
109
109
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
)
|
|
114
|
-
return KafkaOutputStream(
|
|
115
|
-
topic, bootstrap_servers, kwargs.get("kafka_producer_options")
|
|
116
|
-
)
|
|
110
|
+
kafka_brokers = get_kafka_brokers_from_dict(kwargs)
|
|
111
|
+
if stream_path.startswith("kafka://") or kafka_brokers:
|
|
112
|
+
topic, brokers = parse_kafka_url(stream_path, kafka_brokers)
|
|
113
|
+
return KafkaOutputStream(topic, brokers, kwargs.get("kafka_producer_options"))
|
|
117
114
|
elif stream_path.startswith("http://") or stream_path.startswith("https://"):
|
|
118
115
|
return HTTPOutputStream(stream_path=stream_path)
|
|
119
116
|
elif "://" not in stream_path:
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# Copyright 2023 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import time
|
|
16
|
+
from datetime import datetime
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from urllib.parse import urlparse
|
|
19
|
+
|
|
20
|
+
import oss2
|
|
21
|
+
from fsspec.registry import get_filesystem_class
|
|
22
|
+
|
|
23
|
+
import mlrun.errors
|
|
24
|
+
|
|
25
|
+
from .base import DataStore, FileStats, makeDatastoreSchemaSanitizer
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class OSSStore(DataStore):
|
|
29
|
+
using_bucket = True
|
|
30
|
+
|
|
31
|
+
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
32
|
+
super().__init__(parent, name, schema, endpoint, secrets)
|
|
33
|
+
# will be used in case user asks to assume a role and work through fsspec
|
|
34
|
+
|
|
35
|
+
access_key_id = self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID")
|
|
36
|
+
secret_key = self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY")
|
|
37
|
+
endpoint_url = self._get_secret_or_env("ALIBABA_ENDPOINT_URL")
|
|
38
|
+
if access_key_id and secret_key and endpoint_url:
|
|
39
|
+
self.auth = oss2.Auth(access_key_id, secret_key)
|
|
40
|
+
self.endpoint_url = endpoint_url
|
|
41
|
+
else:
|
|
42
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
43
|
+
"missing ALIBABA_ACCESS_KEY_ID or ALIBABA_SECRET_ACCESS_KEY ALIBABA_ENDPOINT_URL in environment"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
@property
|
|
47
|
+
def filesystem(self):
|
|
48
|
+
"""return fsspec file system object, if supported"""
|
|
49
|
+
if self._filesystem:
|
|
50
|
+
return self._filesystem
|
|
51
|
+
try:
|
|
52
|
+
import ossfs # noqa
|
|
53
|
+
except ImportError as exc:
|
|
54
|
+
raise ImportError("ALIBABA ossfs not installed") from exc
|
|
55
|
+
filesystem_class = get_filesystem_class(protocol=self.kind)
|
|
56
|
+
self._filesystem = makeDatastoreSchemaSanitizer(
|
|
57
|
+
filesystem_class,
|
|
58
|
+
using_bucket=self.using_bucket,
|
|
59
|
+
**self.get_storage_options(),
|
|
60
|
+
)
|
|
61
|
+
return self._filesystem
|
|
62
|
+
|
|
63
|
+
def get_storage_options(self):
|
|
64
|
+
res = dict(
|
|
65
|
+
endpoint=self._get_secret_or_env("ALIBABA_ENDPOINT_URL"),
|
|
66
|
+
key=self._get_secret_or_env("ALIBABA_ACCESS_KEY_ID"),
|
|
67
|
+
secret=self._get_secret_or_env("ALIBABA_SECRET_ACCESS_KEY"),
|
|
68
|
+
)
|
|
69
|
+
return self._sanitize_storage_options(res)
|
|
70
|
+
|
|
71
|
+
def get_bucket_and_key(self, key):
|
|
72
|
+
path = self._join(key)[1:]
|
|
73
|
+
return self.endpoint, path
|
|
74
|
+
|
|
75
|
+
def upload(self, key, src_path):
|
|
76
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
77
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
78
|
+
oss.put_object(key, open(src_path, "rb"))
|
|
79
|
+
|
|
80
|
+
def get(self, key, size=None, offset=0):
|
|
81
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
82
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
83
|
+
if size or offset:
|
|
84
|
+
return oss.get_object(key, byte_range=self.get_range(size, offset)).read()
|
|
85
|
+
return oss.get_object(key).read()
|
|
86
|
+
|
|
87
|
+
def put(self, key, data, append=False):
|
|
88
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
89
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
90
|
+
oss.put_object(key, data)
|
|
91
|
+
|
|
92
|
+
def stat(self, key):
|
|
93
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
94
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
95
|
+
obj = oss.get_object_meta(key)
|
|
96
|
+
size = obj.content_length
|
|
97
|
+
modified = datetime.fromtimestamp(obj.last_modified)
|
|
98
|
+
return FileStats(size, time.mktime(modified.timetuple()))
|
|
99
|
+
|
|
100
|
+
def listdir(self, key):
|
|
101
|
+
remote_path = self._convert_key_to_remote_path(key)
|
|
102
|
+
if self.filesystem.isfile(remote_path):
|
|
103
|
+
return key
|
|
104
|
+
remote_path = f"{remote_path}/**"
|
|
105
|
+
files = self.filesystem.glob(remote_path)
|
|
106
|
+
key_length = len(key)
|
|
107
|
+
files = [
|
|
108
|
+
f.split("/", 1)[1][key_length:] for f in files if len(f.split("/")) > 1
|
|
109
|
+
]
|
|
110
|
+
return files
|
|
111
|
+
|
|
112
|
+
def delete(self, key):
|
|
113
|
+
bucket, key = self.get_bucket_and_key(key)
|
|
114
|
+
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
115
|
+
oss.delete_object(key)
|
|
116
|
+
|
|
117
|
+
def _convert_key_to_remote_path(self, key):
|
|
118
|
+
key = key.strip("/")
|
|
119
|
+
schema = urlparse(key).scheme
|
|
120
|
+
# if called without passing dataitem - like in fset.purge_targets,
|
|
121
|
+
# key will include schema.
|
|
122
|
+
if not schema:
|
|
123
|
+
key = Path(self.endpoint, key).as_posix()
|
|
124
|
+
return key
|
|
125
|
+
|
|
126
|
+
@staticmethod
|
|
127
|
+
def get_range(size, offset):
|
|
128
|
+
if size:
|
|
129
|
+
return [offset, size]
|
|
130
|
+
return [offset, None]
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -158,18 +158,17 @@ class AzureBlobStore(DataStore):
|
|
|
158
158
|
st[key] = parsed_value
|
|
159
159
|
|
|
160
160
|
account_name = st.get("account_name")
|
|
161
|
-
if not account_name:
|
|
162
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
163
|
-
"Property 'account_name' is absent both in storage settings and connection string"
|
|
164
|
-
)
|
|
165
161
|
if primary_url:
|
|
166
162
|
if primary_url.startswith("http://"):
|
|
167
163
|
primary_url = primary_url[len("http://") :]
|
|
168
164
|
if primary_url.startswith("https://"):
|
|
169
165
|
primary_url = primary_url[len("https://") :]
|
|
170
166
|
host = primary_url
|
|
171
|
-
|
|
167
|
+
elif account_name:
|
|
172
168
|
host = f"{account_name}.{service}.core.windows.net"
|
|
169
|
+
else:
|
|
170
|
+
return res
|
|
171
|
+
|
|
173
172
|
if "account_key" in st:
|
|
174
173
|
res[f"spark.hadoop.fs.azure.account.key.{host}"] = st["account_key"]
|
|
175
174
|
|
|
@@ -199,3 +198,16 @@ class AzureBlobStore(DataStore):
|
|
|
199
198
|
)
|
|
200
199
|
res[f"spark.hadoop.fs.azure.sas.fixed.token.{host}"] = st["sas_token"]
|
|
201
200
|
return res
|
|
201
|
+
|
|
202
|
+
@property
|
|
203
|
+
def spark_url(self):
|
|
204
|
+
spark_options = self.get_spark_options()
|
|
205
|
+
url = f"wasbs://{self.endpoint}"
|
|
206
|
+
prefix = "spark.hadoop.fs.azure.account.key."
|
|
207
|
+
if spark_options:
|
|
208
|
+
for key in spark_options:
|
|
209
|
+
if key.startswith(prefix):
|
|
210
|
+
account_key = key[len(prefix) :]
|
|
211
|
+
url += f"@{account_key}"
|
|
212
|
+
break
|
|
213
|
+
return url
|