mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +25 -111
- mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
- mlrun/alerts/alert.py +144 -0
- mlrun/api/schemas/__init__.py +4 -3
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +38 -254
- mlrun/artifacts/dataset.py +9 -190
- mlrun/artifacts/manager.py +41 -47
- mlrun/artifacts/model.py +30 -158
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +68 -0
- mlrun/common/formatters/__init__.py +19 -0
- mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
- mlrun/common/formatters/base.py +78 -0
- mlrun/common/formatters/function.py +41 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/{runtimes → common/runtimes}/constants.py +32 -4
- mlrun/common/schemas/__init__.py +25 -4
- mlrun/common/schemas/alert.py +203 -0
- mlrun/common/schemas/api_gateway.py +148 -0
- mlrun/common/schemas/artifact.py +15 -5
- mlrun/common/schemas/auth.py +8 -2
- mlrun/common/schemas/client_spec.py +2 -0
- mlrun/common/schemas/frontend_spec.py +1 -0
- mlrun/common/schemas/function.py +4 -0
- mlrun/common/schemas/hub.py +7 -9
- mlrun/common/schemas/model_monitoring/__init__.py +19 -3
- mlrun/common/schemas/model_monitoring/constants.py +96 -26
- mlrun/common/schemas/model_monitoring/grafana.py +9 -5
- mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
- mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
- mlrun/common/schemas/pipeline.py +0 -9
- mlrun/common/schemas/project.py +22 -21
- mlrun/common/types.py +7 -1
- mlrun/config.py +87 -19
- mlrun/data_types/data_types.py +4 -0
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/__init__.py +5 -8
- mlrun/datastore/alibaba_oss.py +130 -0
- mlrun/datastore/azure_blob.py +4 -5
- mlrun/datastore/base.py +69 -30
- mlrun/datastore/datastore.py +10 -2
- mlrun/datastore/datastore_profile.py +90 -6
- mlrun/datastore/google_cloud_storage.py +1 -1
- mlrun/datastore/hdfs.py +5 -0
- mlrun/datastore/inmem.py +2 -2
- mlrun/datastore/redis.py +2 -2
- mlrun/datastore/s3.py +5 -0
- mlrun/datastore/snowflake_utils.py +43 -0
- mlrun/datastore/sources.py +172 -44
- mlrun/datastore/store_resources.py +7 -7
- mlrun/datastore/targets.py +285 -41
- mlrun/datastore/utils.py +68 -5
- mlrun/datastore/v3io.py +27 -50
- mlrun/db/auth_utils.py +152 -0
- mlrun/db/base.py +149 -14
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +608 -178
- mlrun/db/nopdb.py +191 -7
- mlrun/errors.py +11 -0
- mlrun/execution.py +37 -20
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +21 -52
- mlrun/feature_store/feature_set.py +48 -23
- mlrun/feature_store/feature_vector.py +2 -1
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +9 -4
- mlrun/feature_store/retrieval/conversion.py +9 -9
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +9 -3
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +34 -24
- mlrun/feature_store/steps.py +30 -19
- mlrun/features.py +4 -13
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
- mlrun/frameworks/lgbm/__init__.py +1 -1
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/model_handler.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +2 -1
- mlrun/frameworks/pytorch/__init__.py +2 -2
- mlrun/frameworks/sklearn/__init__.py +1 -1
- mlrun/frameworks/tf_keras/__init__.py +5 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
- mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
- mlrun/frameworks/xgboost/__init__.py +1 -1
- mlrun/k8s_utils.py +10 -11
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +6 -5
- mlrun/launcher/client.py +8 -6
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +9 -3
- mlrun/launcher/remote.py +9 -3
- mlrun/lists.py +6 -2
- mlrun/model.py +58 -19
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +127 -301
- mlrun/model_monitoring/application.py +5 -296
- mlrun/model_monitoring/applications/__init__.py +11 -0
- mlrun/model_monitoring/applications/_application_steps.py +157 -0
- mlrun/model_monitoring/applications/base.py +282 -0
- mlrun/model_monitoring/applications/context.py +214 -0
- mlrun/model_monitoring/applications/evidently_base.py +211 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +30 -36
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
- mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
- mlrun/model_monitoring/db/tsdb/base.py +329 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
- mlrun/model_monitoring/evidently_application.py +6 -118
- mlrun/model_monitoring/features_drift_table.py +34 -22
- mlrun/model_monitoring/helpers.py +100 -7
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +93 -228
- mlrun/model_monitoring/tracking_policy.py +7 -1
- mlrun/model_monitoring/writer.py +152 -124
- mlrun/package/packagers_manager.py +1 -0
- mlrun/package/utils/_formatter.py +2 -2
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +21 -202
- mlrun/projects/operations.py +30 -16
- mlrun/projects/pipelines.py +92 -99
- mlrun/projects/project.py +757 -268
- mlrun/render.py +15 -14
- mlrun/run.py +160 -162
- mlrun/runtimes/__init__.py +55 -3
- mlrun/runtimes/base.py +33 -19
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +0 -28
- mlrun/runtimes/kubejob.py +28 -122
- mlrun/runtimes/local.py +5 -2
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +8 -8
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/runtimes/nuclio/__init__.py +1 -0
- mlrun/runtimes/nuclio/api_gateway.py +709 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +523 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/nuclio/function.py +98 -58
- mlrun/runtimes/nuclio/serving.py +36 -42
- mlrun/runtimes/pod.py +196 -45
- mlrun/runtimes/remotesparkjob.py +1 -1
- mlrun/runtimes/sparkjob/spark3job.py +1 -1
- mlrun/runtimes/utils.py +6 -73
- mlrun/secrets.py +6 -2
- mlrun/serving/remote.py +2 -3
- mlrun/serving/routers.py +7 -4
- mlrun/serving/server.py +7 -8
- mlrun/serving/states.py +73 -43
- mlrun/serving/v2_serving.py +8 -7
- mlrun/track/tracker.py +2 -1
- mlrun/utils/async_http.py +25 -5
- mlrun/utils/helpers.py +141 -75
- mlrun/utils/http.py +1 -1
- mlrun/utils/logger.py +39 -7
- mlrun/utils/notifications/notification/__init__.py +14 -9
- mlrun/utils/notifications/notification/base.py +12 -0
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +3 -1
- mlrun/utils/notifications/notification/ipython.py +2 -0
- mlrun/utils/notifications/notification/slack.py +101 -21
- mlrun/utils/notifications/notification/webhook.py +11 -1
- mlrun/utils/notifications/notification_pusher.py +147 -16
- mlrun/utils/retryer.py +3 -2
- mlrun/utils/v3io_clients.py +0 -1
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
- mlrun-1.7.0rc20.dist-info/RECORD +353 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/batch.py +0 -974
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
- mlrun/platforms/other.py +0 -305
- mlrun-1.7.0rc4.dist-info/RECORD +0 -321
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
mlrun/datastore/base.py
CHANGED
|
@@ -27,6 +27,7 @@ import requests
|
|
|
27
27
|
import urllib3
|
|
28
28
|
from deprecated import deprecated
|
|
29
29
|
|
|
30
|
+
import mlrun.config
|
|
30
31
|
import mlrun.errors
|
|
31
32
|
from mlrun.errors import err_to_str
|
|
32
33
|
from mlrun.utils import StorePrefix, is_ipython, logger
|
|
@@ -34,10 +35,6 @@ from mlrun.utils import StorePrefix, is_ipython, logger
|
|
|
34
35
|
from .store_resources import is_store_uri, parse_store_uri
|
|
35
36
|
from .utils import filter_df_start_end_time, select_columns_from_df
|
|
36
37
|
|
|
37
|
-
verify_ssl = False
|
|
38
|
-
if not verify_ssl:
|
|
39
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
40
|
-
|
|
41
38
|
|
|
42
39
|
class FileStats:
|
|
43
40
|
def __init__(self, size, modified, content_type=None):
|
|
@@ -182,11 +179,23 @@ class DataStore:
|
|
|
182
179
|
return {}
|
|
183
180
|
|
|
184
181
|
@staticmethod
|
|
185
|
-
def _parquet_reader(
|
|
182
|
+
def _parquet_reader(
|
|
183
|
+
df_module,
|
|
184
|
+
url,
|
|
185
|
+
file_system,
|
|
186
|
+
time_column,
|
|
187
|
+
start_time,
|
|
188
|
+
end_time,
|
|
189
|
+
additional_filters,
|
|
190
|
+
):
|
|
186
191
|
from storey.utils import find_filters, find_partitions
|
|
187
192
|
|
|
188
193
|
def set_filters(
|
|
189
|
-
partitions_time_attributes,
|
|
194
|
+
partitions_time_attributes,
|
|
195
|
+
start_time_inner,
|
|
196
|
+
end_time_inner,
|
|
197
|
+
filters_inner,
|
|
198
|
+
kwargs,
|
|
190
199
|
):
|
|
191
200
|
filters = []
|
|
192
201
|
find_filters(
|
|
@@ -196,20 +205,23 @@ class DataStore:
|
|
|
196
205
|
filters,
|
|
197
206
|
time_column,
|
|
198
207
|
)
|
|
208
|
+
if filters and filters_inner:
|
|
209
|
+
filters[0] += filters_inner
|
|
210
|
+
|
|
199
211
|
kwargs["filters"] = filters
|
|
200
212
|
|
|
201
213
|
def reader(*args, **kwargs):
|
|
202
|
-
if start_time or end_time:
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
214
|
+
if time_column is None and (start_time or end_time):
|
|
215
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
216
|
+
"When providing start_time or end_time, must provide time_column"
|
|
217
|
+
)
|
|
218
|
+
if start_time or end_time or additional_filters:
|
|
208
219
|
partitions_time_attributes = find_partitions(url, file_system)
|
|
209
220
|
set_filters(
|
|
210
221
|
partitions_time_attributes,
|
|
211
222
|
start_time,
|
|
212
223
|
end_time,
|
|
224
|
+
additional_filters,
|
|
213
225
|
kwargs,
|
|
214
226
|
)
|
|
215
227
|
try:
|
|
@@ -220,6 +232,7 @@ class DataStore:
|
|
|
220
232
|
):
|
|
221
233
|
raise ex
|
|
222
234
|
|
|
235
|
+
# TODO: fix timezone issue (ML-6308)
|
|
223
236
|
if start_time.tzinfo:
|
|
224
237
|
start_time_inner = start_time.replace(tzinfo=None)
|
|
225
238
|
end_time_inner = end_time.replace(tzinfo=None)
|
|
@@ -231,6 +244,7 @@ class DataStore:
|
|
|
231
244
|
partitions_time_attributes,
|
|
232
245
|
start_time_inner,
|
|
233
246
|
end_time_inner,
|
|
247
|
+
additional_filters,
|
|
234
248
|
kwargs,
|
|
235
249
|
)
|
|
236
250
|
return df_module.read_parquet(*args, **kwargs)
|
|
@@ -249,6 +263,7 @@ class DataStore:
|
|
|
249
263
|
start_time=None,
|
|
250
264
|
end_time=None,
|
|
251
265
|
time_column=None,
|
|
266
|
+
additional_filters=None,
|
|
252
267
|
**kwargs,
|
|
253
268
|
):
|
|
254
269
|
df_module = df_module or pd
|
|
@@ -313,7 +328,13 @@ class DataStore:
|
|
|
313
328
|
kwargs["columns"] = columns
|
|
314
329
|
|
|
315
330
|
reader = self._parquet_reader(
|
|
316
|
-
df_module,
|
|
331
|
+
df_module,
|
|
332
|
+
url,
|
|
333
|
+
file_system,
|
|
334
|
+
time_column,
|
|
335
|
+
start_time,
|
|
336
|
+
end_time,
|
|
337
|
+
additional_filters,
|
|
317
338
|
)
|
|
318
339
|
|
|
319
340
|
elif file_url.endswith(".json") or format == "json":
|
|
@@ -392,14 +413,15 @@ class DataItem:
|
|
|
392
413
|
|
|
393
414
|
|
|
394
415
|
# reading run results using DataItem (run.artifact())
|
|
395
|
-
train_run = train_iris_func.run(
|
|
396
|
-
|
|
416
|
+
train_run = train_iris_func.run(
|
|
417
|
+
inputs={"dataset": dataset}, params={"label_column": "label"}
|
|
418
|
+
)
|
|
397
419
|
|
|
398
|
-
train_run.artifact(
|
|
399
|
-
test_set = train_run.artifact(
|
|
420
|
+
train_run.artifact("confusion-matrix").show()
|
|
421
|
+
test_set = train_run.artifact("test_set").as_df()
|
|
400
422
|
|
|
401
423
|
# create and use DataItem from uri
|
|
402
|
-
data = mlrun.get_dataitem(
|
|
424
|
+
data = mlrun.get_dataitem("http://xyz/data.json").get()
|
|
403
425
|
"""
|
|
404
426
|
|
|
405
427
|
def __init__(
|
|
@@ -541,6 +563,7 @@ class DataItem:
|
|
|
541
563
|
time_column=None,
|
|
542
564
|
start_time=None,
|
|
543
565
|
end_time=None,
|
|
566
|
+
additional_filters=None,
|
|
544
567
|
**kwargs,
|
|
545
568
|
):
|
|
546
569
|
"""return a dataframe object (generated from the dataitem).
|
|
@@ -552,6 +575,12 @@ class DataItem:
|
|
|
552
575
|
:param end_time: filters out data after this time
|
|
553
576
|
:param time_column: Store timestamp_key will be used if None.
|
|
554
577
|
The results will be filtered by this column and start_time & end_time.
|
|
578
|
+
:param additional_filters: List of additional_filter conditions as tuples.
|
|
579
|
+
Each tuple should be in the format (column_name, operator, value).
|
|
580
|
+
Supported operators: "=", ">=", "<=", ">", "<".
|
|
581
|
+
Example: [("Product", "=", "Computer")]
|
|
582
|
+
For all supported filters, please see:
|
|
583
|
+
https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
|
|
555
584
|
"""
|
|
556
585
|
df = self._store.as_df(
|
|
557
586
|
self._url,
|
|
@@ -562,6 +591,7 @@ class DataItem:
|
|
|
562
591
|
time_column=time_column,
|
|
563
592
|
start_time=start_time,
|
|
564
593
|
end_time=end_time,
|
|
594
|
+
additional_filters=additional_filters,
|
|
565
595
|
**kwargs,
|
|
566
596
|
)
|
|
567
597
|
return df
|
|
@@ -633,17 +663,6 @@ def basic_auth_header(user, password):
|
|
|
633
663
|
return {"Authorization": authstr}
|
|
634
664
|
|
|
635
665
|
|
|
636
|
-
def http_get(url, headers=None, auth=None):
|
|
637
|
-
try:
|
|
638
|
-
response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
639
|
-
except OSError as exc:
|
|
640
|
-
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
641
|
-
|
|
642
|
-
mlrun.errors.raise_for_status(response)
|
|
643
|
-
|
|
644
|
-
return response.content
|
|
645
|
-
|
|
646
|
-
|
|
647
666
|
class HttpStore(DataStore):
|
|
648
667
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
649
668
|
super().__init__(parent, name, schema, endpoint, secrets)
|
|
@@ -671,7 +690,7 @@ class HttpStore(DataStore):
|
|
|
671
690
|
raise ValueError("unimplemented")
|
|
672
691
|
|
|
673
692
|
def get(self, key, size=None, offset=0):
|
|
674
|
-
data =
|
|
693
|
+
data = self._http_get(self.url + self._join(key), self._headers, self.auth)
|
|
675
694
|
if offset:
|
|
676
695
|
data = data[offset:]
|
|
677
696
|
if size:
|
|
@@ -691,6 +710,26 @@ class HttpStore(DataStore):
|
|
|
691
710
|
f"schema as it is not secure and is not recommended."
|
|
692
711
|
)
|
|
693
712
|
|
|
713
|
+
def _http_get(
|
|
714
|
+
self,
|
|
715
|
+
url,
|
|
716
|
+
headers=None,
|
|
717
|
+
auth=None,
|
|
718
|
+
):
|
|
719
|
+
# import here to prevent import cycle
|
|
720
|
+
from mlrun.config import config as mlconf
|
|
721
|
+
|
|
722
|
+
verify_ssl = mlconf.httpdb.http.verify
|
|
723
|
+
try:
|
|
724
|
+
if not verify_ssl:
|
|
725
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
726
|
+
response = requests.get(url, headers=headers, auth=auth, verify=verify_ssl)
|
|
727
|
+
except OSError as exc:
|
|
728
|
+
raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
|
|
729
|
+
|
|
730
|
+
mlrun.errors.raise_for_status(response)
|
|
731
|
+
return response.content
|
|
732
|
+
|
|
694
733
|
|
|
695
734
|
# This wrapper class is designed to extract the 'ds' schema and profile name from URL-formatted paths.
|
|
696
735
|
# Within fsspec, the AbstractFileSystem::_strip_protocol() internal method is used to handle complete URL paths.
|
mlrun/datastore/datastore.py
CHANGED
|
@@ -98,6 +98,10 @@ def schema_to_store(schema):
|
|
|
98
98
|
from .hdfs import HdfsStore
|
|
99
99
|
|
|
100
100
|
return HdfsStore
|
|
101
|
+
elif schema == "oss":
|
|
102
|
+
from .alibaba_oss import OSSStore
|
|
103
|
+
|
|
104
|
+
return OSSStore
|
|
101
105
|
else:
|
|
102
106
|
raise ValueError(f"unsupported store scheme ({schema})")
|
|
103
107
|
|
|
@@ -219,6 +223,11 @@ class StoreManager:
|
|
|
219
223
|
subpath = url[len("memory://") :]
|
|
220
224
|
return in_memory_store, subpath, url
|
|
221
225
|
|
|
226
|
+
elif schema in get_local_file_schema():
|
|
227
|
+
# parse_url() will drop the windows drive-letter from the path for url like "c:\a\b".
|
|
228
|
+
# As a workaround, we set subpath to the url.
|
|
229
|
+
subpath = url.replace("file://", "", 1)
|
|
230
|
+
|
|
222
231
|
if not schema and endpoint:
|
|
223
232
|
if endpoint in self._stores.keys():
|
|
224
233
|
return self._stores[endpoint], subpath, url
|
|
@@ -237,8 +246,7 @@ class StoreManager:
|
|
|
237
246
|
)
|
|
238
247
|
if not secrets and not mlrun.config.is_running_as_api():
|
|
239
248
|
self._stores[store_key] = store
|
|
240
|
-
|
|
241
|
-
return store, url if store.kind == "file" else subpath, url
|
|
249
|
+
return store, subpath, url
|
|
242
250
|
|
|
243
251
|
def reset_secrets(self):
|
|
244
252
|
self._secrets = {}
|
|
@@ -16,6 +16,7 @@ import ast
|
|
|
16
16
|
import base64
|
|
17
17
|
import json
|
|
18
18
|
import typing
|
|
19
|
+
import warnings
|
|
19
20
|
from urllib.parse import ParseResult, urlparse, urlunparse
|
|
20
21
|
|
|
21
22
|
import pydantic
|
|
@@ -36,6 +37,7 @@ class DatastoreProfile(pydantic.BaseModel):
|
|
|
36
37
|
extra = pydantic.Extra.forbid
|
|
37
38
|
|
|
38
39
|
@pydantic.validator("name")
|
|
40
|
+
@classmethod
|
|
39
41
|
def lower_case(cls, v):
|
|
40
42
|
return v.lower()
|
|
41
43
|
|
|
@@ -68,6 +70,9 @@ class TemporaryClientDatastoreProfiles(metaclass=mlrun.utils.singleton.Singleton
|
|
|
68
70
|
def get(self, key):
|
|
69
71
|
return self._data.get(key, None)
|
|
70
72
|
|
|
73
|
+
def remove(self, key):
|
|
74
|
+
self._data.pop(key, None)
|
|
75
|
+
|
|
71
76
|
|
|
72
77
|
class DatastoreProfileBasic(DatastoreProfile):
|
|
73
78
|
type: str = pydantic.Field("basic")
|
|
@@ -79,13 +84,37 @@ class DatastoreProfileBasic(DatastoreProfile):
|
|
|
79
84
|
class DatastoreProfileKafkaTarget(DatastoreProfile):
|
|
80
85
|
type: str = pydantic.Field("kafka_target")
|
|
81
86
|
_private_attributes = "kwargs_private"
|
|
82
|
-
bootstrap_servers: str
|
|
87
|
+
bootstrap_servers: typing.Optional[str] = None
|
|
88
|
+
brokers: typing.Optional[str] = None
|
|
83
89
|
topic: str
|
|
84
90
|
kwargs_public: typing.Optional[dict]
|
|
85
91
|
kwargs_private: typing.Optional[dict]
|
|
86
92
|
|
|
93
|
+
def __init__(self, **kwargs):
|
|
94
|
+
super().__init__(**kwargs)
|
|
95
|
+
|
|
96
|
+
if not self.brokers and not self.bootstrap_servers:
|
|
97
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
98
|
+
"DatastoreProfileKafkaTarget requires the 'brokers' field to be set"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
if self.bootstrap_servers:
|
|
102
|
+
if self.brokers:
|
|
103
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
104
|
+
"DatastoreProfileKafkaTarget cannot be created with both 'brokers' and 'bootstrap_servers'"
|
|
105
|
+
)
|
|
106
|
+
else:
|
|
107
|
+
self.brokers = self.bootstrap_servers
|
|
108
|
+
self.bootstrap_servers = None
|
|
109
|
+
warnings.warn(
|
|
110
|
+
"'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
|
|
111
|
+
"use 'brokers' instead.",
|
|
112
|
+
# TODO: Remove this in 1.9.0
|
|
113
|
+
FutureWarning,
|
|
114
|
+
)
|
|
115
|
+
|
|
87
116
|
def attributes(self):
|
|
88
|
-
attributes = {"
|
|
117
|
+
attributes = {"brokers": self.brokers or self.bootstrap_servers}
|
|
89
118
|
if self.kwargs_public:
|
|
90
119
|
attributes = merge(attributes, self.kwargs_public)
|
|
91
120
|
if self.kwargs_private:
|
|
@@ -157,6 +186,18 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
157
186
|
assume_role_arn: typing.Optional[str] = None
|
|
158
187
|
access_key_id: typing.Optional[str] = None
|
|
159
188
|
secret_key: typing.Optional[str] = None
|
|
189
|
+
bucket: typing.Optional[str] = None
|
|
190
|
+
|
|
191
|
+
@pydantic.validator("bucket")
|
|
192
|
+
@classmethod
|
|
193
|
+
def check_bucket(cls, v):
|
|
194
|
+
if not v:
|
|
195
|
+
warnings.warn(
|
|
196
|
+
"The 'bucket' attribute will be mandatory starting from version 1.9",
|
|
197
|
+
FutureWarning,
|
|
198
|
+
stacklevel=2,
|
|
199
|
+
)
|
|
200
|
+
return v
|
|
160
201
|
|
|
161
202
|
def secrets(self) -> dict:
|
|
162
203
|
res = {}
|
|
@@ -175,7 +216,13 @@ class DatastoreProfileS3(DatastoreProfile):
|
|
|
175
216
|
return res
|
|
176
217
|
|
|
177
218
|
def url(self, subpath):
|
|
178
|
-
|
|
219
|
+
# TODO: There is an inconsistency with DatastoreProfileGCS. In DatastoreProfileGCS,
|
|
220
|
+
# we assume that the subpath can begin without a '/' character,
|
|
221
|
+
# while here we assume it always starts with one.
|
|
222
|
+
if self.bucket:
|
|
223
|
+
return f"s3://{self.bucket}{subpath}"
|
|
224
|
+
else:
|
|
225
|
+
return f"s3:/{subpath}"
|
|
179
226
|
|
|
180
227
|
|
|
181
228
|
class DatastoreProfileRedis(DatastoreProfile):
|
|
@@ -244,18 +291,36 @@ class DatastoreProfileGCS(DatastoreProfile):
|
|
|
244
291
|
_private_attributes = ("gcp_credentials",)
|
|
245
292
|
credentials_path: typing.Optional[str] = None # path to file.
|
|
246
293
|
gcp_credentials: typing.Optional[typing.Union[str, dict]] = None
|
|
294
|
+
bucket: typing.Optional[str] = None
|
|
295
|
+
|
|
296
|
+
@pydantic.validator("bucket")
|
|
297
|
+
@classmethod
|
|
298
|
+
def check_bucket(cls, v):
|
|
299
|
+
if not v:
|
|
300
|
+
warnings.warn(
|
|
301
|
+
"The 'bucket' attribute will be mandatory starting from version 1.9",
|
|
302
|
+
FutureWarning,
|
|
303
|
+
stacklevel=2,
|
|
304
|
+
)
|
|
305
|
+
return v
|
|
247
306
|
|
|
248
307
|
@pydantic.validator("gcp_credentials", pre=True, always=True)
|
|
308
|
+
@classmethod
|
|
249
309
|
def convert_dict_to_json(cls, v):
|
|
250
310
|
if isinstance(v, dict):
|
|
251
311
|
return json.dumps(v)
|
|
252
312
|
return v
|
|
253
313
|
|
|
254
314
|
def url(self, subpath) -> str:
|
|
315
|
+
# TODO: but there's something wrong with the subpath being assumed to not start with a slash here,
|
|
316
|
+
# but the opposite assumption is made in S3.
|
|
255
317
|
if subpath.startswith("/"):
|
|
256
318
|
# in gcs the path after schema is starts with bucket, wherefore it should not start with "/".
|
|
257
319
|
subpath = subpath[1:]
|
|
258
|
-
|
|
320
|
+
if self.bucket:
|
|
321
|
+
return f"gcs://{self.bucket}/{subpath}"
|
|
322
|
+
else:
|
|
323
|
+
return f"gcs://{subpath}"
|
|
259
324
|
|
|
260
325
|
def secrets(self) -> dict:
|
|
261
326
|
res = {}
|
|
@@ -283,12 +348,27 @@ class DatastoreProfileAzureBlob(DatastoreProfile):
|
|
|
283
348
|
client_secret: typing.Optional[str] = None
|
|
284
349
|
sas_token: typing.Optional[str] = None
|
|
285
350
|
credential: typing.Optional[str] = None
|
|
351
|
+
container: typing.Optional[str] = None
|
|
352
|
+
|
|
353
|
+
@pydantic.validator("container")
|
|
354
|
+
@classmethod
|
|
355
|
+
def check_container(cls, v):
|
|
356
|
+
if not v:
|
|
357
|
+
warnings.warn(
|
|
358
|
+
"The 'container' attribute will be mandatory starting from version 1.9",
|
|
359
|
+
FutureWarning,
|
|
360
|
+
stacklevel=2,
|
|
361
|
+
)
|
|
362
|
+
return v
|
|
286
363
|
|
|
287
364
|
def url(self, subpath) -> str:
|
|
288
365
|
if subpath.startswith("/"):
|
|
289
|
-
# in azure the path after schema is starts with
|
|
366
|
+
# in azure the path after schema is starts with container, wherefore it should not start with "/".
|
|
290
367
|
subpath = subpath[1:]
|
|
291
|
-
|
|
368
|
+
if self.container:
|
|
369
|
+
return f"az://{self.container}/{subpath}"
|
|
370
|
+
else:
|
|
371
|
+
return f"az://{subpath}"
|
|
292
372
|
|
|
293
373
|
def secrets(self) -> dict:
|
|
294
374
|
res = {}
|
|
@@ -460,3 +540,7 @@ def register_temporary_client_datastore_profile(profile: DatastoreProfile):
|
|
|
460
540
|
It's beneficial for testing purposes.
|
|
461
541
|
"""
|
|
462
542
|
TemporaryClientDatastoreProfiles().add(profile)
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def remove_temporary_client_datastore_profile(profile_name: str):
|
|
546
|
+
TemporaryClientDatastoreProfiles().remove(profile_name)
|
|
@@ -132,7 +132,7 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
132
132
|
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
133
133
|
|
|
134
134
|
def get_spark_options(self):
|
|
135
|
-
res =
|
|
135
|
+
res = {}
|
|
136
136
|
st = self.get_storage_options()
|
|
137
137
|
if "token" in st:
|
|
138
138
|
res = {"spark.hadoop.google.cloud.auth.service.account.enable": "true"}
|
mlrun/datastore/hdfs.py
CHANGED
|
@@ -12,6 +12,7 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import os
|
|
15
|
+
from urllib.parse import urlparse
|
|
15
16
|
|
|
16
17
|
import fsspec
|
|
17
18
|
|
|
@@ -49,3 +50,7 @@ class HdfsStore(DataStore):
|
|
|
49
50
|
@property
|
|
50
51
|
def spark_url(self):
|
|
51
52
|
return f"hdfs://{self.host}:{self.port}"
|
|
53
|
+
|
|
54
|
+
def rm(self, url, recursive=False, maxdepth=None):
|
|
55
|
+
path = urlparse(url).path
|
|
56
|
+
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
mlrun/datastore/inmem.py
CHANGED
|
@@ -80,8 +80,8 @@ class InMemoryStore(DataStore):
|
|
|
80
80
|
reader = df_module.read_json
|
|
81
81
|
else:
|
|
82
82
|
raise mlrun.errors.MLRunInvalidArgumentError(f"file type unhandled {url}")
|
|
83
|
-
# InMemoryStore store
|
|
84
|
-
for field in ["time_column", "start_time", "end_time"]:
|
|
83
|
+
# InMemoryStore store – don't pass filters
|
|
84
|
+
for field in ["time_column", "start_time", "end_time", "additional_filters"]:
|
|
85
85
|
kwargs.pop(field, None)
|
|
86
86
|
|
|
87
87
|
return reader(item, **kwargs)
|
mlrun/datastore/redis.py
CHANGED
|
@@ -31,7 +31,7 @@ class RedisStore(DataStore):
|
|
|
31
31
|
"""
|
|
32
32
|
|
|
33
33
|
def __init__(self, parent, schema, name, endpoint="", secrets: dict = None):
|
|
34
|
-
|
|
34
|
+
redis_default_port = "6379"
|
|
35
35
|
super().__init__(parent, name, schema, endpoint, secrets=secrets)
|
|
36
36
|
self.headers = None
|
|
37
37
|
|
|
@@ -49,7 +49,7 @@ class RedisStore(DataStore):
|
|
|
49
49
|
user = self._get_secret_or_env("REDIS_USER", "", credentials_prefix)
|
|
50
50
|
password = self._get_secret_or_env("REDIS_PASSWORD", "", credentials_prefix)
|
|
51
51
|
host = parsed_endpoint.hostname
|
|
52
|
-
port = parsed_endpoint.port if parsed_endpoint.port else
|
|
52
|
+
port = parsed_endpoint.port if parsed_endpoint.port else redis_default_port
|
|
53
53
|
schema = parsed_endpoint.scheme
|
|
54
54
|
if user or password:
|
|
55
55
|
endpoint = f"{schema}://{user}:{password}@{host}:{port}"
|
mlrun/datastore/s3.py
CHANGED
|
@@ -198,6 +198,11 @@ class S3Store(DataStore):
|
|
|
198
198
|
bucket = self.s3.Bucket(bucket)
|
|
199
199
|
return [obj.key[key_length:] for obj in bucket.objects.filter(Prefix=key)]
|
|
200
200
|
|
|
201
|
+
def rm(self, path, recursive=False, maxdepth=None):
|
|
202
|
+
bucket, key = self.get_bucket_and_key(path)
|
|
203
|
+
path = f"{bucket}/{key}"
|
|
204
|
+
self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
|
|
205
|
+
|
|
201
206
|
|
|
202
207
|
def parse_s3_bucket_and_key(s3_path):
|
|
203
208
|
try:
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
#
|
|
15
|
+
|
|
16
|
+
import mlrun
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_snowflake_password():
|
|
20
|
+
key = "SNOWFLAKE_PASSWORD"
|
|
21
|
+
snowflake_password = mlrun.get_secret_or_env(key)
|
|
22
|
+
|
|
23
|
+
if not snowflake_password:
|
|
24
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
25
|
+
f"No password provided. Set password using the {key} "
|
|
26
|
+
"project secret or environment variable."
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
return snowflake_password
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_snowflake_spark_options(attributes):
|
|
33
|
+
return {
|
|
34
|
+
"format": "net.snowflake.spark.snowflake",
|
|
35
|
+
"sfURL": attributes.get("url"),
|
|
36
|
+
"sfUser": attributes.get("user"),
|
|
37
|
+
"sfPassword": get_snowflake_password(),
|
|
38
|
+
"sfDatabase": attributes.get("database"),
|
|
39
|
+
"sfSchema": attributes.get("schema"),
|
|
40
|
+
"sfWarehouse": attributes.get("warehouse"),
|
|
41
|
+
"application": "iguazio_platform",
|
|
42
|
+
"TIMESTAMP_TYPE_MAPPING": "TIMESTAMP_LTZ",
|
|
43
|
+
}
|