mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +131 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +129 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc7.dist-info/METADATA +0 -272
- mlrun-1.6.4rc7.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/datastore/spark_utils.py
CHANGED
|
@@ -12,12 +12,14 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
|
|
15
|
+
|
|
16
|
+
from typing import Union
|
|
16
17
|
|
|
17
18
|
import mlrun
|
|
19
|
+
from mlrun.features import Entity
|
|
18
20
|
|
|
19
21
|
|
|
20
|
-
def spark_session_update_hadoop_options(session, spark_options) ->
|
|
22
|
+
def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str]:
|
|
21
23
|
hadoop_conf = session.sparkContext._jsc.hadoopConfiguration()
|
|
22
24
|
non_hadoop_spark_options = {}
|
|
23
25
|
|
|
@@ -36,3 +38,30 @@ def spark_session_update_hadoop_options(session, spark_options) -> Dict[str, str
|
|
|
36
38
|
else:
|
|
37
39
|
non_hadoop_spark_options[key] = value
|
|
38
40
|
return non_hadoop_spark_options
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def check_special_columns_exists(
|
|
44
|
+
spark_df, entities: list[Union[Entity, str]], timestamp_key: str, label_column: str
|
|
45
|
+
):
|
|
46
|
+
columns = spark_df.columns
|
|
47
|
+
entities = entities or []
|
|
48
|
+
entities = [
|
|
49
|
+
entity.name if isinstance(entity, Entity) else entity for entity in entities
|
|
50
|
+
]
|
|
51
|
+
missing_entities = [entity for entity in entities if entity not in columns]
|
|
52
|
+
cases_message = "Please check the letter cases (uppercase or lowercase)"
|
|
53
|
+
if missing_entities:
|
|
54
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
55
|
+
f"There are missing entities from dataframe during ingestion. missing_entities: {missing_entities}."
|
|
56
|
+
f" {cases_message}"
|
|
57
|
+
)
|
|
58
|
+
if timestamp_key and timestamp_key not in columns:
|
|
59
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
60
|
+
f"timestamp_key is missing from dataframe during ingestion. timestamp_key: {timestamp_key}."
|
|
61
|
+
f" {cases_message}"
|
|
62
|
+
)
|
|
63
|
+
if label_column and label_column not in columns:
|
|
64
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
65
|
+
f"label_column is missing from dataframe during ingestion. label_column: {label_column}. "
|
|
66
|
+
f"{cases_message}"
|
|
67
|
+
)
|
|
@@ -17,7 +17,7 @@
|
|
|
17
17
|
import mlrun
|
|
18
18
|
import mlrun.artifacts
|
|
19
19
|
from mlrun.config import config
|
|
20
|
-
from mlrun.utils.helpers import
|
|
20
|
+
from mlrun.utils.helpers import parse_artifact_uri
|
|
21
21
|
|
|
22
22
|
from ..common.helpers import parse_versioned_object_uri
|
|
23
23
|
from ..platforms.iguazio import parse_path
|
|
@@ -27,6 +27,8 @@ from .targets import get_online_target
|
|
|
27
27
|
|
|
28
28
|
def is_store_uri(url):
|
|
29
29
|
"""detect if the uri starts with the store schema prefix"""
|
|
30
|
+
if not url:
|
|
31
|
+
return False
|
|
30
32
|
return url.startswith(DB_SCHEMA + "://")
|
|
31
33
|
|
|
32
34
|
|
|
@@ -146,7 +148,11 @@ def get_store_resource(
|
|
|
146
148
|
|
|
147
149
|
db = db or mlrun.get_run_db(secrets=secrets)
|
|
148
150
|
kind, uri = parse_store_uri(uri)
|
|
149
|
-
if kind
|
|
151
|
+
if not kind:
|
|
152
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
153
|
+
f"Cannot get store resource from invalid URI: {uri}"
|
|
154
|
+
)
|
|
155
|
+
elif kind == StorePrefix.FeatureSet:
|
|
150
156
|
project, name, tag, uid = parse_versioned_object_uri(
|
|
151
157
|
uri, project or config.default_project
|
|
152
158
|
)
|
|
@@ -167,11 +173,7 @@ def get_store_resource(
|
|
|
167
173
|
)
|
|
168
174
|
if resource.get("kind", "") == "link":
|
|
169
175
|
# todo: support other link types (not just iter, move this to the db/api layer
|
|
170
|
-
link_iteration = (
|
|
171
|
-
resource.get("link_iteration", 0)
|
|
172
|
-
if is_legacy_artifact(resource)
|
|
173
|
-
else resource["spec"].get("link_iteration", 0)
|
|
174
|
-
)
|
|
176
|
+
link_iteration = resource["spec"].get("link_iteration", 0)
|
|
175
177
|
|
|
176
178
|
resource = db.read_artifact(
|
|
177
179
|
key,
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import storey
|
|
15
|
+
from mergedeep import merge
|
|
16
|
+
from storey import V3ioDriver
|
|
17
|
+
|
|
18
|
+
import mlrun
|
|
19
|
+
import mlrun.model_monitoring.helpers
|
|
20
|
+
from mlrun.datastore.base import DataStore
|
|
21
|
+
|
|
22
|
+
from ..platforms.iguazio import parse_path
|
|
23
|
+
from .utils import (
|
|
24
|
+
parse_kafka_url,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
"""
|
|
28
|
+
Storey targets expect storage_options, which may contain credentials.
|
|
29
|
+
To avoid passing it openly within the graph, we use wrapper classes.
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_url_and_storage_options(path, external_storage_options=None):
|
|
34
|
+
store, resolved_store_path, url = mlrun.store_manager.get_or_create_store(path)
|
|
35
|
+
storage_options = store.get_storage_options()
|
|
36
|
+
if storage_options and external_storage_options:
|
|
37
|
+
# merge external storage options with the store's storage options. storage_options takes precedence
|
|
38
|
+
storage_options = merge(external_storage_options, storage_options)
|
|
39
|
+
else:
|
|
40
|
+
storage_options = storage_options or external_storage_options
|
|
41
|
+
return url, DataStore._sanitize_storage_options(storage_options)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class TDEngineStoreyTarget(storey.TDEngineTarget):
|
|
45
|
+
def __init__(self, *args, **kwargs):
|
|
46
|
+
kwargs["url"] = mlrun.model_monitoring.helpers.get_tsdb_connection_string()
|
|
47
|
+
super().__init__(*args, **kwargs)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class StoreyTargetUtils:
|
|
51
|
+
@staticmethod
|
|
52
|
+
def process_args_and_kwargs(args, kwargs):
|
|
53
|
+
args = list(args)
|
|
54
|
+
path = args[0] if args else kwargs.get("path")
|
|
55
|
+
external_storage_options = kwargs.get("storage_options")
|
|
56
|
+
|
|
57
|
+
url, storage_options = get_url_and_storage_options(
|
|
58
|
+
path, external_storage_options
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if storage_options:
|
|
62
|
+
kwargs["storage_options"] = storage_options
|
|
63
|
+
if args:
|
|
64
|
+
args[0] = url
|
|
65
|
+
if "path" in kwargs:
|
|
66
|
+
kwargs["path"] = url
|
|
67
|
+
return args, kwargs
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class ParquetStoreyTarget(storey.ParquetTarget):
|
|
71
|
+
def __init__(self, *args, **kwargs):
|
|
72
|
+
args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
|
|
73
|
+
super().__init__(*args, **kwargs)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class CSVStoreyTarget(storey.CSVTarget):
|
|
77
|
+
def __init__(self, *args, **kwargs):
|
|
78
|
+
args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
|
|
79
|
+
super().__init__(*args, **kwargs)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class StreamStoreyTarget(storey.StreamTarget):
|
|
83
|
+
def __init__(self, *args, **kwargs):
|
|
84
|
+
args = list(args)
|
|
85
|
+
|
|
86
|
+
uri = args[0] if args else kwargs.get("stream_path")
|
|
87
|
+
|
|
88
|
+
if not uri:
|
|
89
|
+
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
90
|
+
|
|
91
|
+
_, storage_options = get_url_and_storage_options(uri)
|
|
92
|
+
endpoint, path = parse_path(uri)
|
|
93
|
+
|
|
94
|
+
access_key = storage_options.get("v3io_access_key")
|
|
95
|
+
storage = V3ioDriver(
|
|
96
|
+
webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
if storage_options:
|
|
100
|
+
kwargs["storage"] = storage
|
|
101
|
+
if args:
|
|
102
|
+
args[0] = endpoint
|
|
103
|
+
if "stream_path" in kwargs:
|
|
104
|
+
kwargs["stream_path"] = path
|
|
105
|
+
|
|
106
|
+
super().__init__(*args, **kwargs)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class KafkaStoreyTarget(storey.KafkaTarget):
|
|
110
|
+
def __init__(self, *args, **kwargs):
|
|
111
|
+
path = kwargs.pop("path")
|
|
112
|
+
attributes = kwargs.pop("attributes", None)
|
|
113
|
+
if path and path.startswith("ds://"):
|
|
114
|
+
datastore_profile = (
|
|
115
|
+
mlrun.datastore.datastore_profile.datastore_profile_read(path)
|
|
116
|
+
)
|
|
117
|
+
attributes = merge(attributes, datastore_profile.attributes())
|
|
118
|
+
brokers = attributes.pop(
|
|
119
|
+
"brokers", attributes.pop("bootstrap_servers", None)
|
|
120
|
+
)
|
|
121
|
+
topic = datastore_profile.topic
|
|
122
|
+
else:
|
|
123
|
+
brokers = attributes.pop(
|
|
124
|
+
"brokers", attributes.pop("bootstrap_servers", None)
|
|
125
|
+
)
|
|
126
|
+
topic, brokers = parse_kafka_url(path, brokers)
|
|
127
|
+
|
|
128
|
+
if not topic:
|
|
129
|
+
raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
|
|
130
|
+
kwargs["brokers"] = brokers
|
|
131
|
+
kwargs["topic"] = topic
|
|
132
|
+
super().__init__(*args, **kwargs, **attributes)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
class NoSqlStoreyTarget(storey.NoSqlTarget):
|
|
136
|
+
pass
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class RedisNoSqlStoreyTarget(storey.NoSqlTarget):
|
|
140
|
+
def __init__(self, *args, **kwargs):
|
|
141
|
+
path = kwargs.pop("path")
|
|
142
|
+
endpoint, uri = mlrun.datastore.targets.RedisNoSqlTarget.get_server_endpoint(
|
|
143
|
+
path,
|
|
144
|
+
kwargs.pop("credentials_prefix", None),
|
|
145
|
+
)
|
|
146
|
+
kwargs["path"] = endpoint + "/" + uri
|
|
147
|
+
super().__init__(*args, **kwargs)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class TSDBStoreyTarget(storey.TSDBTarget):
|
|
151
|
+
pass
|