mlrun 1.7.0rc3__py3-none-any.whl → 1.7.0rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/artifacts/manager.py +6 -1
- mlrun/common/constants.py +1 -0
- mlrun/common/model_monitoring/helpers.py +12 -6
- mlrun/common/schemas/__init__.py +1 -0
- mlrun/common/schemas/client_spec.py +1 -0
- mlrun/common/schemas/common.py +40 -0
- mlrun/common/schemas/model_monitoring/constants.py +4 -1
- mlrun/common/schemas/project.py +2 -0
- mlrun/config.py +20 -15
- mlrun/datastore/azure_blob.py +22 -9
- mlrun/datastore/base.py +15 -25
- mlrun/datastore/datastore.py +19 -8
- mlrun/datastore/datastore_profile.py +47 -5
- mlrun/datastore/google_cloud_storage.py +10 -6
- mlrun/datastore/hdfs.py +51 -0
- mlrun/datastore/redis.py +4 -0
- mlrun/datastore/s3.py +4 -0
- mlrun/datastore/sources.py +29 -43
- mlrun/datastore/targets.py +58 -48
- mlrun/datastore/utils.py +2 -49
- mlrun/datastore/v3io.py +4 -0
- mlrun/db/base.py +34 -0
- mlrun/db/httpdb.py +71 -42
- mlrun/execution.py +3 -3
- mlrun/feature_store/feature_vector.py +2 -2
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
- mlrun/frameworks/tf_keras/model_handler.py +7 -7
- mlrun/k8s_utils.py +10 -5
- mlrun/kfpops.py +19 -10
- mlrun/model.py +5 -0
- mlrun/model_monitoring/api.py +3 -3
- mlrun/model_monitoring/application.py +1 -1
- mlrun/model_monitoring/applications/__init__.py +13 -0
- mlrun/model_monitoring/applications/histogram_data_drift.py +218 -0
- mlrun/model_monitoring/batch.py +9 -111
- mlrun/model_monitoring/controller.py +73 -55
- mlrun/model_monitoring/controller_handler.py +13 -5
- mlrun/model_monitoring/features_drift_table.py +62 -53
- mlrun/model_monitoring/helpers.py +30 -21
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +14 -14
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
- mlrun/package/packagers/pandas_packagers.py +3 -3
- mlrun/package/utils/_archiver.py +3 -1
- mlrun/platforms/iguazio.py +8 -65
- mlrun/projects/pipelines.py +21 -11
- mlrun/projects/project.py +121 -42
- mlrun/runtimes/base.py +21 -2
- mlrun/runtimes/kubejob.py +5 -3
- mlrun/runtimes/local.py +2 -2
- mlrun/runtimes/mpijob/abstract.py +6 -6
- mlrun/runtimes/nuclio/function.py +9 -9
- mlrun/runtimes/nuclio/serving.py +3 -3
- mlrun/runtimes/pod.py +3 -3
- mlrun/runtimes/sparkjob/spark3job.py +3 -3
- mlrun/serving/remote.py +4 -2
- mlrun/serving/server.py +2 -8
- mlrun/utils/async_http.py +3 -3
- mlrun/utils/helpers.py +27 -5
- mlrun/utils/http.py +3 -3
- mlrun/utils/notifications/notification_pusher.py +6 -6
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/METADATA +13 -16
- {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/RECORD +69 -63
- {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/WHEEL +0 -0
- {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc3.dist-info → mlrun-1.7.0rc4.dist-info}/top_level.txt +0 -0
mlrun/datastore/sources.py
CHANGED
|
@@ -39,7 +39,6 @@ from .utils import (
|
|
|
39
39
|
_generate_sql_query_with_time_filter,
|
|
40
40
|
filter_df_start_end_time,
|
|
41
41
|
select_columns_from_df,
|
|
42
|
-
store_path_to_spark,
|
|
43
42
|
)
|
|
44
43
|
|
|
45
44
|
|
|
@@ -193,14 +192,10 @@ class CSVSource(BaseSourceDriver):
|
|
|
193
192
|
parse_dates.append(time_field)
|
|
194
193
|
|
|
195
194
|
data_item = mlrun.store_manager.object(self.path)
|
|
196
|
-
|
|
197
|
-
store, path = mlrun.store_manager.get_or_create_store(self.path)
|
|
198
|
-
path = store.url + path
|
|
199
|
-
else:
|
|
200
|
-
path = data_item.url
|
|
195
|
+
store, path, url = mlrun.store_manager.get_or_create_store(self.path)
|
|
201
196
|
|
|
202
197
|
return storey.CSVSource(
|
|
203
|
-
paths=
|
|
198
|
+
paths=url, # unlike self.path, it already has store:// replaced
|
|
204
199
|
build_dict=True,
|
|
205
200
|
key_field=self.key_field or key_field,
|
|
206
201
|
storage_options=data_item.store.get_storage_options(),
|
|
@@ -209,25 +204,17 @@ class CSVSource(BaseSourceDriver):
|
|
|
209
204
|
)
|
|
210
205
|
|
|
211
206
|
def get_spark_options(self):
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
"path": store_path_to_spark(path, storage_spark_options),
|
|
218
|
-
"format": "csv",
|
|
219
|
-
"header": "true",
|
|
220
|
-
"inferSchema": "true",
|
|
221
|
-
}
|
|
222
|
-
|
|
223
|
-
return {**result, **storage_spark_options}
|
|
224
|
-
else:
|
|
225
|
-
return {
|
|
226
|
-
"path": store_path_to_spark(self.path),
|
|
207
|
+
store, path, url = mlrun.store_manager.get_or_create_store(self.path)
|
|
208
|
+
spark_options = store.get_spark_options()
|
|
209
|
+
spark_options.update(
|
|
210
|
+
{
|
|
211
|
+
"path": url,
|
|
227
212
|
"format": "csv",
|
|
228
213
|
"header": "true",
|
|
229
214
|
"inferSchema": "true",
|
|
230
215
|
}
|
|
216
|
+
)
|
|
217
|
+
return spark_options
|
|
231
218
|
|
|
232
219
|
def to_spark_df(self, session, named_view=False, time_field=None, columns=None):
|
|
233
220
|
import pyspark.sql.functions as funcs
|
|
@@ -357,14 +344,10 @@ class ParquetSource(BaseSourceDriver):
|
|
|
357
344
|
attributes["context"] = context
|
|
358
345
|
|
|
359
346
|
data_item = mlrun.store_manager.object(self.path)
|
|
360
|
-
|
|
361
|
-
store, path = mlrun.store_manager.get_or_create_store(self.path)
|
|
362
|
-
path = store.url + path
|
|
363
|
-
else:
|
|
364
|
-
path = data_item.url
|
|
347
|
+
store, path, url = mlrun.store_manager.get_or_create_store(self.path)
|
|
365
348
|
|
|
366
349
|
return storey.ParquetSource(
|
|
367
|
-
paths=
|
|
350
|
+
paths=url, # unlike self.path, it already has store:// replaced
|
|
368
351
|
key_field=self.key_field or key_field,
|
|
369
352
|
storage_options=data_item.store.get_storage_options(),
|
|
370
353
|
end_filter=self.end_time,
|
|
@@ -374,20 +357,15 @@ class ParquetSource(BaseSourceDriver):
|
|
|
374
357
|
)
|
|
375
358
|
|
|
376
359
|
def get_spark_options(self):
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
"path": store_path_to_spark(path, storage_spark_options),
|
|
383
|
-
"format": "parquet",
|
|
384
|
-
}
|
|
385
|
-
return {**result, **storage_spark_options}
|
|
386
|
-
else:
|
|
387
|
-
return {
|
|
388
|
-
"path": store_path_to_spark(self.path),
|
|
360
|
+
store, path, url = mlrun.store_manager.get_or_create_store(self.path)
|
|
361
|
+
spark_options = store.get_spark_options()
|
|
362
|
+
spark_options.update(
|
|
363
|
+
{
|
|
364
|
+
"path": store.spark_url + path,
|
|
389
365
|
"format": "parquet",
|
|
390
366
|
}
|
|
367
|
+
)
|
|
368
|
+
return spark_options
|
|
391
369
|
|
|
392
370
|
def to_dataframe(
|
|
393
371
|
self,
|
|
@@ -875,8 +853,16 @@ class StreamSource(OnlineSource):
|
|
|
875
853
|
super().__init__(name, attributes=attrs, **kwargs)
|
|
876
854
|
|
|
877
855
|
def add_nuclio_trigger(self, function):
|
|
878
|
-
|
|
879
|
-
|
|
856
|
+
store, path, url = mlrun.store_manager.get_or_create_store(self.path)
|
|
857
|
+
if store.kind != "v3io":
|
|
858
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
859
|
+
"Only profiles that reference the v3io datastore can be used with StreamSource"
|
|
860
|
+
)
|
|
861
|
+
path = "v3io:/" + path
|
|
862
|
+
storage_options = store.get_storage_options()
|
|
863
|
+
access_key = storage_options.get("v3io_access_key")
|
|
864
|
+
endpoint, stream_path = parse_path(url)
|
|
865
|
+
v3io_client = v3io.dataplane.Client(endpoint=endpoint, access_key=access_key)
|
|
880
866
|
container, stream_path = split_path(stream_path)
|
|
881
867
|
res = v3io_client.stream.create(
|
|
882
868
|
container=container,
|
|
@@ -896,7 +882,7 @@ class StreamSource(OnlineSource):
|
|
|
896
882
|
kwargs["worker_allocation_mode"] = "static"
|
|
897
883
|
|
|
898
884
|
function.add_v3io_stream_trigger(
|
|
899
|
-
|
|
885
|
+
path,
|
|
900
886
|
self.name,
|
|
901
887
|
self.attributes["group"],
|
|
902
888
|
self.attributes["seek_to"],
|
mlrun/datastore/targets.py
CHANGED
|
@@ -29,7 +29,7 @@ import mlrun
|
|
|
29
29
|
import mlrun.utils.helpers
|
|
30
30
|
from mlrun.config import config
|
|
31
31
|
from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
|
|
32
|
-
from mlrun.utils import now_date
|
|
32
|
+
from mlrun.utils import logger, now_date
|
|
33
33
|
from mlrun.utils.helpers import to_parquet
|
|
34
34
|
from mlrun.utils.v3io_clients import get_frames_client
|
|
35
35
|
|
|
@@ -43,7 +43,6 @@ from .utils import (
|
|
|
43
43
|
filter_df_start_end_time,
|
|
44
44
|
parse_kafka_url,
|
|
45
45
|
select_columns_from_df,
|
|
46
|
-
store_path_to_spark,
|
|
47
46
|
)
|
|
48
47
|
|
|
49
48
|
|
|
@@ -448,14 +447,11 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
448
447
|
if self.credentials_prefix
|
|
449
448
|
else None
|
|
450
449
|
)
|
|
451
|
-
store, resolved_store_path = mlrun.store_manager.get_or_create_store(
|
|
450
|
+
store, resolved_store_path, url = mlrun.store_manager.get_or_create_store(
|
|
452
451
|
self.get_target_path(),
|
|
453
452
|
credentials_prefix_secrets,
|
|
454
453
|
)
|
|
455
|
-
|
|
456
|
-
return store, store.url + resolved_store_path
|
|
457
|
-
else:
|
|
458
|
-
return store, self.get_target_path()
|
|
454
|
+
return store, url
|
|
459
455
|
|
|
460
456
|
def _get_column_list(self, features, timestamp_key, key_columns, with_type=False):
|
|
461
457
|
result = []
|
|
@@ -925,27 +921,21 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
925
921
|
if unit == time_partitioning_granularity:
|
|
926
922
|
break
|
|
927
923
|
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
"path": store_path_to_spark(path, storage_spark_options),
|
|
936
|
-
"format": "parquet",
|
|
937
|
-
}
|
|
938
|
-
result = {**result, **storage_spark_options}
|
|
939
|
-
else:
|
|
940
|
-
result = {
|
|
941
|
-
"path": store_path_to_spark(self.get_target_path()),
|
|
924
|
+
store, path, url = mlrun.store_manager.get_or_create_store(
|
|
925
|
+
self.get_target_path()
|
|
926
|
+
)
|
|
927
|
+
spark_options = store.get_spark_options()
|
|
928
|
+
spark_options.update(
|
|
929
|
+
{
|
|
930
|
+
"path": store.spark_url + path,
|
|
942
931
|
"format": "parquet",
|
|
943
932
|
}
|
|
933
|
+
)
|
|
944
934
|
for partition_col in self.partition_cols or []:
|
|
945
935
|
partition_cols.append(partition_col)
|
|
946
936
|
if partition_cols:
|
|
947
|
-
|
|
948
|
-
return
|
|
937
|
+
spark_options["partitionBy"] = partition_cols
|
|
938
|
+
return spark_options
|
|
949
939
|
|
|
950
940
|
def get_dask_options(self):
|
|
951
941
|
return {"format": "parquet"}
|
|
@@ -1067,24 +1057,18 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1067
1057
|
)
|
|
1068
1058
|
|
|
1069
1059
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
"path": store_path_to_spark(path, storage_spark_options),
|
|
1078
|
-
"format": "csv",
|
|
1079
|
-
"header": "true",
|
|
1080
|
-
}
|
|
1081
|
-
return {**result, **storage_spark_options}
|
|
1082
|
-
else:
|
|
1083
|
-
return {
|
|
1084
|
-
"path": store_path_to_spark(self.get_target_path()),
|
|
1060
|
+
store, path, url = mlrun.store_manager.get_or_create_store(
|
|
1061
|
+
self.get_target_path()
|
|
1062
|
+
)
|
|
1063
|
+
spark_options = store.get_spark_options()
|
|
1064
|
+
spark_options.update(
|
|
1065
|
+
{
|
|
1066
|
+
"path": store.spark_url + path,
|
|
1085
1067
|
"format": "csv",
|
|
1086
1068
|
"header": "true",
|
|
1087
1069
|
}
|
|
1070
|
+
)
|
|
1071
|
+
return spark_options
|
|
1088
1072
|
|
|
1089
1073
|
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
|
|
1090
1074
|
import pyspark.sql.functions as funcs
|
|
@@ -1209,7 +1193,11 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1209
1193
|
df = df.copy(deep=False)
|
|
1210
1194
|
access_key = self._get_credential("V3IO_ACCESS_KEY")
|
|
1211
1195
|
|
|
1212
|
-
|
|
1196
|
+
store, target_path = self._get_store_and_path()
|
|
1197
|
+
storage_options = store.get_storage_options()
|
|
1198
|
+
access_key = storage_options.get("v3io_access_key", access_key)
|
|
1199
|
+
|
|
1200
|
+
_, path_with_container = parse_path(target_path)
|
|
1213
1201
|
container, path = split_path(path_with_container)
|
|
1214
1202
|
|
|
1215
1203
|
frames_client = get_frames_client(
|
|
@@ -1227,17 +1215,31 @@ class NoSqlTarget(NoSqlBaseTarget):
|
|
|
1227
1215
|
def get_table_object(self):
|
|
1228
1216
|
from storey import Table, V3ioDriver
|
|
1229
1217
|
|
|
1230
|
-
|
|
1231
|
-
endpoint, uri = parse_path(
|
|
1218
|
+
store, target_path = self._get_store_and_path()
|
|
1219
|
+
endpoint, uri = parse_path(target_path)
|
|
1220
|
+
storage_options = store.get_storage_options()
|
|
1221
|
+
access_key = storage_options.get("v3io_access_key")
|
|
1222
|
+
|
|
1232
1223
|
return Table(
|
|
1233
1224
|
uri,
|
|
1234
|
-
V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
|
|
1225
|
+
V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key),
|
|
1235
1226
|
flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
|
|
1236
1227
|
)
|
|
1237
1228
|
|
|
1238
1229
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1230
|
+
store, target_path = self._get_store_and_path()
|
|
1231
|
+
storage_options = store.get_storage_options()
|
|
1232
|
+
store_access_key = storage_options.get("v3io_access_key")
|
|
1233
|
+
env_access_key = self._secrets.get(
|
|
1234
|
+
"V3IO_ACCESS_KEY", os.getenv("V3IO_ACCESS_KEY")
|
|
1235
|
+
)
|
|
1236
|
+
if store_access_key and env_access_key and store_access_key != env_access_key:
|
|
1237
|
+
logger.warning(
|
|
1238
|
+
"The Spark v3io connector does not support access_key parameterization."
|
|
1239
|
+
"Spark will disregard the store-provided key."
|
|
1240
|
+
)
|
|
1239
1241
|
spark_options = {
|
|
1240
|
-
"path":
|
|
1242
|
+
"path": store.spark_url + target_path,
|
|
1241
1243
|
"format": "io.iguaz.v3io.spark.sql.kv",
|
|
1242
1244
|
}
|
|
1243
1245
|
if isinstance(key_column, list) and len(key_column) >= 1:
|
|
@@ -1330,10 +1332,10 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1330
1332
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1331
1333
|
endpoint, uri = self._get_server_endpoint()
|
|
1332
1334
|
parsed_endpoint = urlparse(endpoint)
|
|
1333
|
-
|
|
1335
|
+
store, path = self._get_store_and_path()
|
|
1334
1336
|
return {
|
|
1335
1337
|
"key.column": "_spark_object_name",
|
|
1336
|
-
"table": "{" +
|
|
1338
|
+
"table": "{" + store.spark_url + path,
|
|
1337
1339
|
"format": "org.apache.spark.sql.redis",
|
|
1338
1340
|
"host": parsed_endpoint.hostname,
|
|
1339
1341
|
"port": parsed_endpoint.port,
|
|
@@ -1381,10 +1383,12 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1381
1383
|
from storey import V3ioDriver
|
|
1382
1384
|
|
|
1383
1385
|
key_columns = list(key_columns.keys())
|
|
1384
|
-
path = self.
|
|
1386
|
+
store, path = self._get_store_and_path()
|
|
1385
1387
|
if not path:
|
|
1386
1388
|
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
1387
1389
|
endpoint, uri = parse_path(path)
|
|
1390
|
+
storage_options = store.get_storage_options()
|
|
1391
|
+
access_key = storage_options.get("v3io_access_key")
|
|
1388
1392
|
column_list = self._get_column_list(
|
|
1389
1393
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1390
1394
|
)
|
|
@@ -1395,7 +1399,9 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1395
1399
|
graph_shape="cylinder",
|
|
1396
1400
|
class_name="storey.StreamTarget",
|
|
1397
1401
|
columns=column_list,
|
|
1398
|
-
storage=V3ioDriver(
|
|
1402
|
+
storage=V3ioDriver(
|
|
1403
|
+
webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
|
|
1404
|
+
),
|
|
1399
1405
|
stream_path=uri,
|
|
1400
1406
|
**self.attributes,
|
|
1401
1407
|
)
|
|
@@ -1531,7 +1537,11 @@ class TSDBTarget(BaseStoreTarget):
|
|
|
1531
1537
|
key_column = [key_column]
|
|
1532
1538
|
new_index.extend(key_column)
|
|
1533
1539
|
|
|
1534
|
-
|
|
1540
|
+
store, target_path = self._get_store_and_path()
|
|
1541
|
+
storage_options = store.get_storage_options()
|
|
1542
|
+
access_key = storage_options.get("v3io_access_key", access_key)
|
|
1543
|
+
|
|
1544
|
+
_, path_with_container = parse_path(target_path)
|
|
1535
1545
|
container, path = split_path(path_with_container)
|
|
1536
1546
|
|
|
1537
1547
|
frames_client = get_frames_client(
|
mlrun/datastore/utils.py
CHANGED
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
import tarfile
|
|
16
16
|
import tempfile
|
|
17
17
|
import typing
|
|
18
|
-
from urllib.parse import parse_qs, urlparse
|
|
18
|
+
from urllib.parse import parse_qs, urlparse
|
|
19
19
|
|
|
20
20
|
import pandas as pd
|
|
21
21
|
import semver
|
|
@@ -23,53 +23,6 @@ import semver
|
|
|
23
23
|
import mlrun.datastore
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
def store_path_to_spark(path, spark_options=None):
|
|
27
|
-
schemas = ["redis://", "rediss://", "ds://"]
|
|
28
|
-
if any(path.startswith(schema) for schema in schemas):
|
|
29
|
-
url = urlparse(path)
|
|
30
|
-
if url.path:
|
|
31
|
-
path = url.path
|
|
32
|
-
elif path.startswith("gcs://"):
|
|
33
|
-
path = "gs:" + path[len("gcs:") :]
|
|
34
|
-
elif path.startswith("v3io:///"):
|
|
35
|
-
path = "v3io:" + path[len("v3io:/") :]
|
|
36
|
-
elif path.startswith("az://"):
|
|
37
|
-
account_key = None
|
|
38
|
-
path = "wasbs:" + path[len("az:") :]
|
|
39
|
-
prefix = "spark.hadoop.fs.azure.account.key."
|
|
40
|
-
if spark_options:
|
|
41
|
-
for key in spark_options:
|
|
42
|
-
if key.startswith(prefix):
|
|
43
|
-
account_key = key[len(prefix) :]
|
|
44
|
-
break
|
|
45
|
-
if account_key:
|
|
46
|
-
# transfer "wasb://basket/some/path" to wasb://basket@account_key.blob.core.windows.net/some/path
|
|
47
|
-
parsed_url = urlparse(path)
|
|
48
|
-
new_netloc = f"{parsed_url.hostname}@{account_key}"
|
|
49
|
-
path = urlunparse(
|
|
50
|
-
(
|
|
51
|
-
parsed_url.scheme,
|
|
52
|
-
new_netloc,
|
|
53
|
-
parsed_url.path,
|
|
54
|
-
parsed_url.params,
|
|
55
|
-
parsed_url.query,
|
|
56
|
-
parsed_url.fragment,
|
|
57
|
-
)
|
|
58
|
-
)
|
|
59
|
-
elif path.startswith("s3://"):
|
|
60
|
-
if path.startswith("s3:///"):
|
|
61
|
-
# 's3:///' not supported since mlrun 0.9.0 should use s3:// instead
|
|
62
|
-
from mlrun.errors import MLRunInvalidArgumentError
|
|
63
|
-
|
|
64
|
-
valid_path = "s3:" + path[len("s3:/") :]
|
|
65
|
-
raise MLRunInvalidArgumentError(
|
|
66
|
-
f"'s3:///' is not supported, try using 's3://' instead.\nE.g: '{valid_path}'"
|
|
67
|
-
)
|
|
68
|
-
else:
|
|
69
|
-
path = "s3a:" + path[len("s3:") :]
|
|
70
|
-
return path
|
|
71
|
-
|
|
72
|
-
|
|
73
26
|
def parse_kafka_url(url: str, bootstrap_servers: list = None) -> tuple[str, list]:
|
|
74
27
|
"""Generating Kafka topic and adjusting a list of bootstrap servers.
|
|
75
28
|
|
|
@@ -105,7 +58,7 @@ def upload_tarball(source_dir, target, secrets=None):
|
|
|
105
58
|
with tarfile.open(mode="w:gz", fileobj=temp_fh) as tar:
|
|
106
59
|
tar.add(source_dir, arcname="")
|
|
107
60
|
stores = mlrun.datastore.store_manager.set(secrets)
|
|
108
|
-
datastore, subpath = stores.get_or_create_store(target)
|
|
61
|
+
datastore, subpath, url = stores.get_or_create_store(target)
|
|
109
62
|
datastore.upload(subpath, temp_fh.name)
|
|
110
63
|
|
|
111
64
|
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -79,6 +79,10 @@ class V3ioStore(DataStore):
|
|
|
79
79
|
schema = "https" if self.secure else "http"
|
|
80
80
|
return f"{schema}://{self.endpoint}"
|
|
81
81
|
|
|
82
|
+
@property
|
|
83
|
+
def spark_url(self):
|
|
84
|
+
return "v3io:/"
|
|
85
|
+
|
|
82
86
|
@property
|
|
83
87
|
def filesystem(self):
|
|
84
88
|
"""return fsspec file system object, if supported"""
|
mlrun/db/base.py
CHANGED
|
@@ -676,3 +676,37 @@ class RunDBInterface(ABC):
|
|
|
676
676
|
self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
|
|
677
677
|
):
|
|
678
678
|
pass
|
|
679
|
+
|
|
680
|
+
def submit_workflow(
|
|
681
|
+
self,
|
|
682
|
+
project: str,
|
|
683
|
+
name: str,
|
|
684
|
+
workflow_spec: Union[
|
|
685
|
+
"mlrun.projects.pipelines.WorkflowSpec",
|
|
686
|
+
"mlrun.common.schemas.WorkflowSpec",
|
|
687
|
+
dict,
|
|
688
|
+
],
|
|
689
|
+
arguments: Optional[dict] = None,
|
|
690
|
+
artifact_path: Optional[str] = None,
|
|
691
|
+
source: Optional[str] = None,
|
|
692
|
+
run_name: Optional[str] = None,
|
|
693
|
+
namespace: Optional[str] = None,
|
|
694
|
+
notifications: list["mlrun.model.Notification"] = None,
|
|
695
|
+
) -> "mlrun.common.schemas.WorkflowResponse":
|
|
696
|
+
pass
|
|
697
|
+
|
|
698
|
+
def update_model_monitoring_controller(
|
|
699
|
+
self,
|
|
700
|
+
project: str,
|
|
701
|
+
base_period: int = 10,
|
|
702
|
+
image: str = "mlrun/mlrun",
|
|
703
|
+
):
|
|
704
|
+
pass
|
|
705
|
+
|
|
706
|
+
def enable_model_monitoring(
|
|
707
|
+
self,
|
|
708
|
+
project: str,
|
|
709
|
+
base_period: int = 10,
|
|
710
|
+
image: str = "mlrun/mlrun",
|
|
711
|
+
):
|
|
712
|
+
pass
|