mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +131 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +129 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc7.dist-info/METADATA +0 -272
- mlrun-1.6.4rc7.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/datastore/utils.py
CHANGED
|
@@ -12,10 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
#
|
|
15
|
+
import math
|
|
15
16
|
import tarfile
|
|
16
17
|
import tempfile
|
|
17
18
|
import typing
|
|
18
|
-
|
|
19
|
+
import warnings
|
|
20
|
+
from urllib.parse import parse_qs, urlparse
|
|
19
21
|
|
|
20
22
|
import pandas as pd
|
|
21
23
|
import semver
|
|
@@ -23,73 +25,29 @@ import semver
|
|
|
23
25
|
import mlrun.datastore
|
|
24
26
|
|
|
25
27
|
|
|
26
|
-
def store_path_to_spark(path, spark_options=None):
|
|
27
|
-
schemas = ["redis://", "rediss://", "ds://"]
|
|
28
|
-
if any(path.startswith(schema) for schema in schemas):
|
|
29
|
-
url = urlparse(path)
|
|
30
|
-
if url.path:
|
|
31
|
-
path = url.path
|
|
32
|
-
elif path.startswith("gcs://"):
|
|
33
|
-
path = "gs:" + path[len("gcs:") :]
|
|
34
|
-
elif path.startswith("v3io:///"):
|
|
35
|
-
path = "v3io:" + path[len("v3io:/") :]
|
|
36
|
-
elif path.startswith("az://"):
|
|
37
|
-
account_key = None
|
|
38
|
-
path = "wasbs:" + path[len("az:") :]
|
|
39
|
-
prefix = "spark.hadoop.fs.azure.account.key."
|
|
40
|
-
if spark_options:
|
|
41
|
-
for key in spark_options:
|
|
42
|
-
if key.startswith(prefix):
|
|
43
|
-
account_key = key[len(prefix) :]
|
|
44
|
-
break
|
|
45
|
-
if account_key:
|
|
46
|
-
# transfer "wasb://basket/some/path" to wasb://basket@account_key.blob.core.windows.net/some/path
|
|
47
|
-
parsed_url = urlparse(path)
|
|
48
|
-
new_netloc = f"{parsed_url.hostname}@{account_key}"
|
|
49
|
-
path = urlunparse(
|
|
50
|
-
(
|
|
51
|
-
parsed_url.scheme,
|
|
52
|
-
new_netloc,
|
|
53
|
-
parsed_url.path,
|
|
54
|
-
parsed_url.params,
|
|
55
|
-
parsed_url.query,
|
|
56
|
-
parsed_url.fragment,
|
|
57
|
-
)
|
|
58
|
-
)
|
|
59
|
-
elif path.startswith("s3://"):
|
|
60
|
-
if path.startswith("s3:///"):
|
|
61
|
-
# 's3:///' not supported since mlrun 0.9.0 should use s3:// instead
|
|
62
|
-
from mlrun.errors import MLRunInvalidArgumentError
|
|
63
|
-
|
|
64
|
-
valid_path = "s3:" + path[len("s3:/") :]
|
|
65
|
-
raise MLRunInvalidArgumentError(
|
|
66
|
-
f"'s3:///' is not supported, try using 's3://' instead.\nE.g: '{valid_path}'"
|
|
67
|
-
)
|
|
68
|
-
else:
|
|
69
|
-
path = "s3a:" + path[len("s3:") :]
|
|
70
|
-
return path
|
|
71
|
-
|
|
72
|
-
|
|
73
28
|
def parse_kafka_url(
|
|
74
|
-
url: str,
|
|
75
|
-
) ->
|
|
29
|
+
url: str, brokers: typing.Union[list, str] = None
|
|
30
|
+
) -> tuple[str, list]:
|
|
76
31
|
"""Generating Kafka topic and adjusting a list of bootstrap servers.
|
|
77
32
|
|
|
78
33
|
:param url: URL path to parse using urllib.parse.urlparse.
|
|
79
|
-
:param
|
|
34
|
+
:param brokers: List of kafka brokers.
|
|
80
35
|
|
|
81
36
|
:return: A tuple of:
|
|
82
37
|
[0] = Kafka topic value
|
|
83
38
|
[1] = List of bootstrap servers
|
|
84
39
|
"""
|
|
85
|
-
|
|
40
|
+
brokers = brokers or []
|
|
41
|
+
|
|
42
|
+
if isinstance(brokers, str):
|
|
43
|
+
brokers = brokers.split(",")
|
|
86
44
|
|
|
87
45
|
# Parse the provided URL into six components according to the general structure of a URL
|
|
88
46
|
url = urlparse(url)
|
|
89
47
|
|
|
90
48
|
# Add the network location to the bootstrap servers list
|
|
91
49
|
if url.netloc:
|
|
92
|
-
|
|
50
|
+
brokers = [url.netloc] + brokers
|
|
93
51
|
|
|
94
52
|
# Get the topic value from the parsed url
|
|
95
53
|
query_dict = parse_qs(url.query)
|
|
@@ -98,7 +56,7 @@ def parse_kafka_url(
|
|
|
98
56
|
else:
|
|
99
57
|
topic = url.path
|
|
100
58
|
topic = topic.lstrip("/")
|
|
101
|
-
return topic,
|
|
59
|
+
return topic, brokers
|
|
102
60
|
|
|
103
61
|
|
|
104
62
|
def upload_tarball(source_dir, target, secrets=None):
|
|
@@ -107,7 +65,7 @@ def upload_tarball(source_dir, target, secrets=None):
|
|
|
107
65
|
with tarfile.open(mode="w:gz", fileobj=temp_fh) as tar:
|
|
108
66
|
tar.add(source_dir, arcname="")
|
|
109
67
|
stores = mlrun.datastore.store_manager.set(secrets)
|
|
110
|
-
datastore, subpath = stores.get_or_create_store(target)
|
|
68
|
+
datastore, subpath, url = stores.get_or_create_store(target)
|
|
111
69
|
datastore.upload(subpath, temp_fh.name)
|
|
112
70
|
|
|
113
71
|
|
|
@@ -157,7 +115,7 @@ def _execute_time_filter(
|
|
|
157
115
|
|
|
158
116
|
def select_columns_from_df(
|
|
159
117
|
df: typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]],
|
|
160
|
-
columns:
|
|
118
|
+
columns: list[str],
|
|
161
119
|
) -> typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]]:
|
|
162
120
|
if not columns:
|
|
163
121
|
return df
|
|
@@ -169,7 +127,7 @@ def select_columns_from_df(
|
|
|
169
127
|
|
|
170
128
|
def select_columns_generator(
|
|
171
129
|
dfs: typing.Union[pd.DataFrame, typing.Iterator[pd.DataFrame]],
|
|
172
|
-
columns:
|
|
130
|
+
columns: list[str],
|
|
173
131
|
) -> typing.Iterator[pd.DataFrame]:
|
|
174
132
|
for df in dfs:
|
|
175
133
|
yield df[columns]
|
|
@@ -179,7 +137,7 @@ def _generate_sql_query_with_time_filter(
|
|
|
179
137
|
table_name: str,
|
|
180
138
|
engine: "sqlalchemy.engine.Engine", # noqa: F821,
|
|
181
139
|
time_column: str,
|
|
182
|
-
parse_dates:
|
|
140
|
+
parse_dates: list[str],
|
|
183
141
|
start_time: pd.Timestamp,
|
|
184
142
|
end_time: pd.Timestamp,
|
|
185
143
|
):
|
|
@@ -208,3 +166,59 @@ def _generate_sql_query_with_time_filter(
|
|
|
208
166
|
query = query.filter(getattr(table.c, time_column) <= end_time)
|
|
209
167
|
|
|
210
168
|
return query, parse_dates
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def get_kafka_brokers_from_dict(options: dict, pop=False) -> typing.Optional[str]:
|
|
172
|
+
get_or_pop = options.pop if pop else options.get
|
|
173
|
+
kafka_brokers = get_or_pop("kafka_brokers", None)
|
|
174
|
+
if kafka_brokers:
|
|
175
|
+
return kafka_brokers
|
|
176
|
+
kafka_bootstrap_servers = get_or_pop("kafka_bootstrap_servers", None)
|
|
177
|
+
if kafka_bootstrap_servers:
|
|
178
|
+
warnings.warn(
|
|
179
|
+
"The 'kafka_bootstrap_servers' parameter is deprecated and will be removed in "
|
|
180
|
+
"1.9.0. Please pass the 'kafka_brokers' parameter instead.",
|
|
181
|
+
FutureWarning,
|
|
182
|
+
)
|
|
183
|
+
return kafka_bootstrap_servers
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def transform_list_filters_to_tuple(additional_filters):
|
|
187
|
+
tuple_filters = []
|
|
188
|
+
if not additional_filters:
|
|
189
|
+
return tuple_filters
|
|
190
|
+
validate_additional_filters(additional_filters)
|
|
191
|
+
for additional_filter in additional_filters:
|
|
192
|
+
tuple_filters.append(tuple(additional_filter))
|
|
193
|
+
return tuple_filters
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def validate_additional_filters(additional_filters):
|
|
197
|
+
nan_error_message = "using NaN in additional_filters is not supported"
|
|
198
|
+
if additional_filters in [None, [], ()]:
|
|
199
|
+
return
|
|
200
|
+
for filter_tuple in additional_filters:
|
|
201
|
+
if filter_tuple == () or filter_tuple == []:
|
|
202
|
+
continue
|
|
203
|
+
if not isinstance(filter_tuple, (list, tuple)):
|
|
204
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
205
|
+
f"mlrun supports additional_filters only as a list of tuples."
|
|
206
|
+
f" Current additional_filters: {additional_filters}"
|
|
207
|
+
)
|
|
208
|
+
if isinstance(filter_tuple[0], (list, tuple)):
|
|
209
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
210
|
+
f"additional_filters does not support nested list inside filter tuples except in -in- logic."
|
|
211
|
+
f" Current filter_tuple: {filter_tuple}."
|
|
212
|
+
)
|
|
213
|
+
if len(filter_tuple) != 3:
|
|
214
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
215
|
+
f"illegal filter tuple length, {filter_tuple} in additional filters:"
|
|
216
|
+
f" {additional_filters}"
|
|
217
|
+
)
|
|
218
|
+
col_name, op, value = filter_tuple
|
|
219
|
+
if isinstance(value, float) and math.isnan(value):
|
|
220
|
+
raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
|
|
221
|
+
elif isinstance(value, (list, tuple)):
|
|
222
|
+
for sub_value in value:
|
|
223
|
+
if isinstance(sub_value, float) and math.isnan(sub_value):
|
|
224
|
+
raise mlrun.errors.MLRunInvalidArgumentError(nan_error_message)
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -29,7 +29,7 @@ from .base import (
|
|
|
29
29
|
)
|
|
30
30
|
|
|
31
31
|
V3IO_LOCAL_ROOT = "v3io"
|
|
32
|
-
V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 *
|
|
32
|
+
V3IO_DEFAULT_UPLOAD_CHUNK_SIZE = 1024 * 1024 * 10
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
class V3ioStore(DataStore):
|
|
@@ -77,6 +77,10 @@ class V3ioStore(DataStore):
|
|
|
77
77
|
schema = "https" if self.secure else "http"
|
|
78
78
|
return f"{schema}://{self.endpoint}"
|
|
79
79
|
|
|
80
|
+
@property
|
|
81
|
+
def spark_url(self):
|
|
82
|
+
return "v3io:/"
|
|
83
|
+
|
|
80
84
|
@property
|
|
81
85
|
def filesystem(self):
|
|
82
86
|
"""return fsspec file system object, if supported"""
|
|
@@ -136,6 +140,7 @@ class V3ioStore(DataStore):
|
|
|
136
140
|
max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
|
|
137
141
|
):
|
|
138
142
|
"""helper function for put method, allows for controlling max_chunk_size in testing"""
|
|
143
|
+
data, _ = self._prepare_put_data(data, append)
|
|
139
144
|
container, path = split_path(self._join(key))
|
|
140
145
|
buffer_size = len(data) # in bytes
|
|
141
146
|
buffer_offset = 0
|