mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__main__.py +4 -2
- mlrun/alerts/alert.py +75 -8
- mlrun/artifacts/base.py +1 -0
- mlrun/artifacts/manager.py +9 -2
- mlrun/common/constants.py +4 -1
- mlrun/common/db/sql_session.py +3 -2
- mlrun/common/formatters/__init__.py +1 -0
- mlrun/common/formatters/artifact.py +1 -0
- mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
- mlrun/common/formatters/run.py +3 -0
- mlrun/common/helpers.py +0 -1
- mlrun/common/schemas/__init__.py +3 -1
- mlrun/common/schemas/alert.py +15 -12
- mlrun/common/schemas/api_gateway.py +6 -6
- mlrun/common/schemas/auth.py +5 -0
- mlrun/common/schemas/client_spec.py +0 -1
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/frontend_spec.py +7 -0
- mlrun/common/schemas/function.py +7 -0
- mlrun/common/schemas/model_monitoring/__init__.py +4 -3
- mlrun/common/schemas/model_monitoring/constants.py +41 -26
- mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
- mlrun/common/schemas/notification.py +69 -12
- mlrun/common/schemas/project.py +45 -12
- mlrun/common/schemas/workflow.py +10 -2
- mlrun/common/types.py +1 -0
- mlrun/config.py +91 -35
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +57 -25
- mlrun/datastore/__init__.py +1 -0
- mlrun/datastore/alibaba_oss.py +3 -2
- mlrun/datastore/azure_blob.py +125 -37
- mlrun/datastore/base.py +42 -21
- mlrun/datastore/datastore.py +4 -2
- mlrun/datastore/datastore_profile.py +1 -1
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +85 -29
- mlrun/datastore/inmem.py +4 -1
- mlrun/datastore/redis.py +1 -0
- mlrun/datastore/s3.py +25 -12
- mlrun/datastore/sources.py +76 -4
- mlrun/datastore/spark_utils.py +30 -0
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +102 -131
- mlrun/datastore/v3io.py +1 -0
- mlrun/db/base.py +15 -6
- mlrun/db/httpdb.py +57 -28
- mlrun/db/nopdb.py +29 -5
- mlrun/errors.py +20 -3
- mlrun/execution.py +46 -5
- mlrun/feature_store/api.py +25 -1
- mlrun/feature_store/common.py +6 -11
- mlrun/feature_store/feature_vector.py +3 -1
- mlrun/feature_store/retrieval/job.py +4 -1
- mlrun/feature_store/retrieval/spark_merger.py +10 -39
- mlrun/feature_store/steps.py +8 -0
- mlrun/frameworks/_common/plan.py +3 -3
- mlrun/frameworks/_ml_common/plan.py +1 -1
- mlrun/frameworks/parallel_coordinates.py +2 -3
- mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
- mlrun/k8s_utils.py +48 -2
- mlrun/launcher/client.py +6 -6
- mlrun/launcher/local.py +2 -2
- mlrun/model.py +215 -34
- mlrun/model_monitoring/api.py +38 -24
- mlrun/model_monitoring/applications/__init__.py +1 -2
- mlrun/model_monitoring/applications/_application_steps.py +60 -29
- mlrun/model_monitoring/applications/base.py +2 -174
- mlrun/model_monitoring/applications/context.py +197 -70
- mlrun/model_monitoring/applications/evidently_base.py +11 -85
- mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
- mlrun/model_monitoring/applications/results.py +4 -4
- mlrun/model_monitoring/controller.py +110 -282
- mlrun/model_monitoring/db/stores/__init__.py +8 -3
- mlrun/model_monitoring/db/stores/base/store.py +3 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
- mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
- mlrun/model_monitoring/db/tsdb/base.py +147 -15
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
- mlrun/model_monitoring/helpers.py +70 -50
- mlrun/model_monitoring/stream_processing.py +96 -195
- mlrun/model_monitoring/writer.py +13 -5
- mlrun/package/packagers/default_packager.py +2 -2
- mlrun/projects/operations.py +16 -8
- mlrun/projects/pipelines.py +126 -115
- mlrun/projects/project.py +286 -129
- mlrun/render.py +3 -3
- mlrun/run.py +38 -19
- mlrun/runtimes/__init__.py +19 -8
- mlrun/runtimes/base.py +4 -1
- mlrun/runtimes/daskjob.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -1
- mlrun/runtimes/kubejob.py +6 -6
- mlrun/runtimes/local.py +12 -5
- mlrun/runtimes/nuclio/api_gateway.py +68 -8
- mlrun/runtimes/nuclio/application/application.py +307 -70
- mlrun/runtimes/nuclio/function.py +63 -14
- mlrun/runtimes/nuclio/serving.py +10 -10
- mlrun/runtimes/pod.py +25 -19
- mlrun/runtimes/remotesparkjob.py +2 -5
- mlrun/runtimes/sparkjob/spark3job.py +16 -17
- mlrun/runtimes/utils.py +34 -0
- mlrun/serving/routers.py +2 -5
- mlrun/serving/server.py +37 -19
- mlrun/serving/states.py +30 -3
- mlrun/serving/v2_serving.py +44 -35
- mlrun/track/trackers/mlflow_tracker.py +5 -0
- mlrun/utils/async_http.py +1 -1
- mlrun/utils/db.py +18 -0
- mlrun/utils/helpers.py +150 -36
- mlrun/utils/http.py +1 -1
- mlrun/utils/notifications/notification/__init__.py +0 -1
- mlrun/utils/notifications/notification/webhook.py +8 -1
- mlrun/utils/notifications/notification_pusher.py +1 -1
- mlrun/utils/v3io_clients.py +2 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -271
- mlrun/model_monitoring/controller_handler.py +0 -37
- mlrun/model_monitoring/evidently_application.py +0 -20
- mlrun/model_monitoring/prometheus.py +0 -216
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0
mlrun/datastore/targets.py
CHANGED
|
@@ -47,7 +47,6 @@ from .spark_utils import spark_session_update_hadoop_options
|
|
|
47
47
|
from .utils import (
|
|
48
48
|
_generate_sql_query_with_time_filter,
|
|
49
49
|
filter_df_start_end_time,
|
|
50
|
-
parse_kafka_url,
|
|
51
50
|
select_columns_from_df,
|
|
52
51
|
)
|
|
53
52
|
|
|
@@ -390,6 +389,7 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
390
389
|
is_offline = False
|
|
391
390
|
support_spark = False
|
|
392
391
|
support_storey = False
|
|
392
|
+
support_pandas = False
|
|
393
393
|
support_append = False
|
|
394
394
|
|
|
395
395
|
def __init__(
|
|
@@ -439,6 +439,12 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
439
439
|
self.storage_options = storage_options
|
|
440
440
|
self.schema = schema or {}
|
|
441
441
|
self.credentials_prefix = credentials_prefix
|
|
442
|
+
if credentials_prefix:
|
|
443
|
+
warnings.warn(
|
|
444
|
+
"The 'credentials_prefix' parameter is deprecated and will be removed in "
|
|
445
|
+
"1.9.0. Please use datastore profiles instead.",
|
|
446
|
+
FutureWarning,
|
|
447
|
+
)
|
|
442
448
|
|
|
443
449
|
self._target = None
|
|
444
450
|
self._resource = None
|
|
@@ -549,9 +555,7 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
549
555
|
os.makedirs(dir, exist_ok=True)
|
|
550
556
|
target_df = df
|
|
551
557
|
partition_cols = None # single parquet file
|
|
552
|
-
if not
|
|
553
|
-
".pq"
|
|
554
|
-
): # directory
|
|
558
|
+
if not mlrun.utils.helpers.is_parquet_file(target_path): # directory
|
|
555
559
|
partition_cols = []
|
|
556
560
|
if timestamp_key and (
|
|
557
561
|
self.partitioned or self.time_partitioning_granularity
|
|
@@ -728,6 +732,10 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
728
732
|
timestamp_key=None,
|
|
729
733
|
featureset_status=None,
|
|
730
734
|
):
|
|
735
|
+
if not self.support_storey:
|
|
736
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
737
|
+
f"{type(self).__name__} does not support storey engine"
|
|
738
|
+
)
|
|
731
739
|
raise NotImplementedError()
|
|
732
740
|
|
|
733
741
|
def purge(self):
|
|
@@ -756,6 +764,8 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
756
764
|
**kwargs,
|
|
757
765
|
):
|
|
758
766
|
"""return the target data as dataframe"""
|
|
767
|
+
if not self.support_pandas:
|
|
768
|
+
raise NotImplementedError()
|
|
759
769
|
mlrun.utils.helpers.additional_filters_warning(
|
|
760
770
|
additional_filters, self.__class__
|
|
761
771
|
)
|
|
@@ -770,6 +780,10 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
770
780
|
|
|
771
781
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
772
782
|
# options used in spark.read.load(**options)
|
|
783
|
+
if not self.support_spark:
|
|
784
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
785
|
+
f"{type(self).__name__} does not support spark engine"
|
|
786
|
+
)
|
|
773
787
|
raise NotImplementedError()
|
|
774
788
|
|
|
775
789
|
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
|
|
@@ -813,6 +827,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
813
827
|
support_spark = True
|
|
814
828
|
support_storey = True
|
|
815
829
|
support_dask = True
|
|
830
|
+
support_pandas = True
|
|
816
831
|
support_append = True
|
|
817
832
|
|
|
818
833
|
def __init__(
|
|
@@ -918,10 +933,9 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
918
933
|
if time_unit == time_partitioning_granularity:
|
|
919
934
|
break
|
|
920
935
|
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
and not self.get_target_path().endswith(".pq")
|
|
936
|
+
target_path = self.get_target_path()
|
|
937
|
+
if not self.partitioned and not mlrun.utils.helpers.is_parquet_file(
|
|
938
|
+
target_path
|
|
925
939
|
):
|
|
926
940
|
partition_cols = []
|
|
927
941
|
|
|
@@ -929,25 +943,16 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
929
943
|
for key_column in key_columns:
|
|
930
944
|
tuple_key_columns.append((key_column.name, key_column.value_type))
|
|
931
945
|
|
|
932
|
-
store, path_in_store, target_path = self._get_store_and_path()
|
|
933
|
-
|
|
934
|
-
storage_options = store.get_storage_options()
|
|
935
|
-
if storage_options and self.storage_options:
|
|
936
|
-
storage_options = merge(storage_options, self.storage_options)
|
|
937
|
-
else:
|
|
938
|
-
storage_options = storage_options or self.storage_options
|
|
939
|
-
|
|
940
946
|
step = graph.add_step(
|
|
941
947
|
name=self.name or "ParquetTarget",
|
|
942
948
|
after=after,
|
|
943
949
|
graph_shape="cylinder",
|
|
944
|
-
class_name="
|
|
950
|
+
class_name="mlrun.datastore.storeytargets.ParquetStoreyTarget",
|
|
945
951
|
path=target_path,
|
|
946
952
|
columns=column_list,
|
|
947
953
|
index_cols=tuple_key_columns,
|
|
948
954
|
partition_cols=partition_cols,
|
|
949
955
|
time_field=timestamp_key,
|
|
950
|
-
storage_options=storage_options,
|
|
951
956
|
max_events=self.max_events,
|
|
952
957
|
flush_after_seconds=self.flush_after_seconds,
|
|
953
958
|
update_last_written=featureset_status.update_last_written_for_target,
|
|
@@ -1040,9 +1045,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
1040
1045
|
return result
|
|
1041
1046
|
|
|
1042
1047
|
def is_single_file(self):
|
|
1043
|
-
|
|
1044
|
-
return self.path.endswith(".parquet") or self.path.endswith(".pq")
|
|
1045
|
-
return False
|
|
1048
|
+
return mlrun.utils.helpers.is_parquet_file(self.path)
|
|
1046
1049
|
|
|
1047
1050
|
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
|
|
1048
1051
|
# If partitioning by time, add the necessary columns
|
|
@@ -1082,6 +1085,7 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1082
1085
|
is_offline = True
|
|
1083
1086
|
support_spark = True
|
|
1084
1087
|
support_storey = True
|
|
1088
|
+
support_pandas = True
|
|
1085
1089
|
|
|
1086
1090
|
@staticmethod
|
|
1087
1091
|
def _write_dataframe(df, storage_options, target_path, partition_cols, **kwargs):
|
|
@@ -1103,17 +1107,16 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1103
1107
|
column_list = self._get_column_list(
|
|
1104
1108
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1105
1109
|
)
|
|
1106
|
-
|
|
1110
|
+
target_path = self.get_target_path()
|
|
1107
1111
|
graph.add_step(
|
|
1108
1112
|
name=self.name or "CSVTarget",
|
|
1109
1113
|
after=after,
|
|
1110
1114
|
graph_shape="cylinder",
|
|
1111
|
-
class_name="
|
|
1115
|
+
class_name="mlrun.datastore.storeytargets.CSVStoreyTarget",
|
|
1112
1116
|
path=target_path,
|
|
1113
1117
|
columns=column_list,
|
|
1114
1118
|
header=True,
|
|
1115
1119
|
index_cols=key_columns,
|
|
1116
|
-
storage_options=store.get_storage_options(),
|
|
1117
1120
|
**self.attributes,
|
|
1118
1121
|
)
|
|
1119
1122
|
|
|
@@ -1289,7 +1292,9 @@ class SnowflakeTarget(BaseStoreTarget):
|
|
|
1289
1292
|
additional_filters=None,
|
|
1290
1293
|
**kwargs,
|
|
1291
1294
|
):
|
|
1292
|
-
raise
|
|
1295
|
+
raise mlrun.errors.MLRunRuntimeError(
|
|
1296
|
+
f"{type(self).__name__} does not support pandas engine"
|
|
1297
|
+
)
|
|
1293
1298
|
|
|
1294
1299
|
@property
|
|
1295
1300
|
def source_spark_attributes(self) -> dict:
|
|
@@ -1325,6 +1330,19 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1325
1330
|
timestamp_key=None,
|
|
1326
1331
|
featureset_status=None,
|
|
1327
1332
|
):
|
|
1333
|
+
table, column_list = self._get_table_and_columns(features, key_columns)
|
|
1334
|
+
|
|
1335
|
+
graph.add_step(
|
|
1336
|
+
name=self.name or self.writer_step_name,
|
|
1337
|
+
after=after,
|
|
1338
|
+
graph_shape="cylinder",
|
|
1339
|
+
class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
|
|
1340
|
+
columns=column_list,
|
|
1341
|
+
table=table,
|
|
1342
|
+
**self.attributes,
|
|
1343
|
+
)
|
|
1344
|
+
|
|
1345
|
+
def _get_table_and_columns(self, features, key_columns):
|
|
1328
1346
|
key_columns = list(key_columns.keys())
|
|
1329
1347
|
table = self._resource.uri
|
|
1330
1348
|
column_list = self._get_column_list(
|
|
@@ -1343,15 +1361,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1343
1361
|
col for col in column_list if col[0] not in aggregate_features
|
|
1344
1362
|
]
|
|
1345
1363
|
|
|
1346
|
-
|
|
1347
|
-
name=self.name or self.writer_step_name,
|
|
1348
|
-
after=after,
|
|
1349
|
-
graph_shape="cylinder",
|
|
1350
|
-
class_name="storey.NoSqlTarget",
|
|
1351
|
-
columns=column_list,
|
|
1352
|
-
table=table,
|
|
1353
|
-
**self.attributes,
|
|
1354
|
-
)
|
|
1364
|
+
return table, column_list
|
|
1355
1365
|
|
|
1356
1366
|
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
|
|
1357
1367
|
raise NotImplementedError()
|
|
@@ -1362,19 +1372,6 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1362
1372
|
def get_dask_options(self):
|
|
1363
1373
|
return {"format": "csv"}
|
|
1364
1374
|
|
|
1365
|
-
def as_df(
|
|
1366
|
-
self,
|
|
1367
|
-
columns=None,
|
|
1368
|
-
df_module=None,
|
|
1369
|
-
entities=None,
|
|
1370
|
-
start_time=None,
|
|
1371
|
-
end_time=None,
|
|
1372
|
-
time_column=None,
|
|
1373
|
-
additional_filters=None,
|
|
1374
|
-
**kwargs,
|
|
1375
|
-
):
|
|
1376
|
-
raise NotImplementedError()
|
|
1377
|
-
|
|
1378
1375
|
def write_dataframe(
|
|
1379
1376
|
self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
|
|
1380
1377
|
):
|
|
@@ -1487,11 +1484,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1487
1484
|
support_spark = True
|
|
1488
1485
|
writer_step_name = "RedisNoSqlTarget"
|
|
1489
1486
|
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
def _get_server_endpoint(self):
|
|
1494
|
-
endpoint, uri = parse_path(self.get_target_path())
|
|
1487
|
+
@staticmethod
|
|
1488
|
+
def get_server_endpoint(path, credentials_prefix=None):
|
|
1489
|
+
endpoint, uri = parse_path(path)
|
|
1495
1490
|
endpoint = endpoint or mlrun.mlconf.redis.url
|
|
1496
1491
|
if endpoint.startswith("ds://"):
|
|
1497
1492
|
datastore_profile = datastore_profile_read(endpoint)
|
|
@@ -1508,8 +1503,15 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1508
1503
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1509
1504
|
"Provide Redis username and password only via secrets"
|
|
1510
1505
|
)
|
|
1511
|
-
|
|
1512
|
-
|
|
1506
|
+
credentials_prefix = credentials_prefix or mlrun.get_secret_or_env(
|
|
1507
|
+
key="CREDENTIALS_PREFIX"
|
|
1508
|
+
)
|
|
1509
|
+
user = mlrun.get_secret_or_env(
|
|
1510
|
+
"REDIS_USER", default="", prefix=credentials_prefix
|
|
1511
|
+
)
|
|
1512
|
+
password = mlrun.get_secret_or_env(
|
|
1513
|
+
"REDIS_PASSWORD", default="", prefix=credentials_prefix
|
|
1514
|
+
)
|
|
1513
1515
|
host = parsed_endpoint.hostname
|
|
1514
1516
|
port = parsed_endpoint.port if parsed_endpoint.port else "6379"
|
|
1515
1517
|
scheme = parsed_endpoint.scheme
|
|
@@ -1523,7 +1525,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1523
1525
|
from storey import Table
|
|
1524
1526
|
from storey.redis_driver import RedisDriver
|
|
1525
1527
|
|
|
1526
|
-
endpoint, uri = self.
|
|
1528
|
+
endpoint, uri = self.get_server_endpoint(
|
|
1529
|
+
self.get_target_path(), self.credentials_prefix
|
|
1530
|
+
)
|
|
1527
1531
|
|
|
1528
1532
|
return Table(
|
|
1529
1533
|
uri,
|
|
@@ -1532,7 +1536,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1532
1536
|
)
|
|
1533
1537
|
|
|
1534
1538
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1535
|
-
endpoint, uri = self.
|
|
1539
|
+
endpoint, uri = self.get_server_endpoint(
|
|
1540
|
+
self.get_target_path(), self.credentials_prefix
|
|
1541
|
+
)
|
|
1536
1542
|
parsed_endpoint = urlparse(endpoint)
|
|
1537
1543
|
store, path_in_store, path = self._get_store_and_path()
|
|
1538
1544
|
return {
|
|
@@ -1564,6 +1570,29 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1564
1570
|
|
|
1565
1571
|
return df
|
|
1566
1572
|
|
|
1573
|
+
def add_writer_step(
|
|
1574
|
+
self,
|
|
1575
|
+
graph,
|
|
1576
|
+
after,
|
|
1577
|
+
features,
|
|
1578
|
+
key_columns=None,
|
|
1579
|
+
timestamp_key=None,
|
|
1580
|
+
featureset_status=None,
|
|
1581
|
+
):
|
|
1582
|
+
table, column_list = self._get_table_and_columns(features, key_columns)
|
|
1583
|
+
|
|
1584
|
+
graph.add_step(
|
|
1585
|
+
path=self.get_target_path(),
|
|
1586
|
+
name=self.name or self.writer_step_name,
|
|
1587
|
+
after=after,
|
|
1588
|
+
graph_shape="cylinder",
|
|
1589
|
+
class_name="mlrun.datastore.storeytargets.RedisNoSqlStoreyTarget",
|
|
1590
|
+
columns=column_list,
|
|
1591
|
+
table=table,
|
|
1592
|
+
credentials_prefix=self.credentials_prefix,
|
|
1593
|
+
**self.attributes,
|
|
1594
|
+
)
|
|
1595
|
+
|
|
1567
1596
|
|
|
1568
1597
|
class StreamTarget(BaseStoreTarget):
|
|
1569
1598
|
kind = TargetTypes.stream
|
|
@@ -1582,45 +1611,25 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1582
1611
|
timestamp_key=None,
|
|
1583
1612
|
featureset_status=None,
|
|
1584
1613
|
):
|
|
1585
|
-
from storey import V3ioDriver
|
|
1586
|
-
|
|
1587
1614
|
key_columns = list(key_columns.keys())
|
|
1588
|
-
|
|
1589
|
-
if not path:
|
|
1590
|
-
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
1591
|
-
endpoint, uri = parse_path(path)
|
|
1592
|
-
storage_options = store.get_storage_options()
|
|
1593
|
-
access_key = storage_options.get("v3io_access_key")
|
|
1615
|
+
|
|
1594
1616
|
column_list = self._get_column_list(
|
|
1595
1617
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1596
1618
|
)
|
|
1619
|
+
stream_path = self.get_target_path()
|
|
1620
|
+
if not stream_path:
|
|
1621
|
+
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
1597
1622
|
|
|
1598
1623
|
graph.add_step(
|
|
1599
1624
|
name=self.name or "StreamTarget",
|
|
1600
1625
|
after=after,
|
|
1601
1626
|
graph_shape="cylinder",
|
|
1602
|
-
class_name="
|
|
1627
|
+
class_name="mlrun.datastore.storeytargets.StreamStoreyTarget",
|
|
1603
1628
|
columns=column_list,
|
|
1604
|
-
|
|
1605
|
-
webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
|
|
1606
|
-
),
|
|
1607
|
-
stream_path=uri,
|
|
1629
|
+
stream_path=stream_path,
|
|
1608
1630
|
**self.attributes,
|
|
1609
1631
|
)
|
|
1610
1632
|
|
|
1611
|
-
def as_df(
|
|
1612
|
-
self,
|
|
1613
|
-
columns=None,
|
|
1614
|
-
df_module=None,
|
|
1615
|
-
entities=None,
|
|
1616
|
-
start_time=None,
|
|
1617
|
-
end_time=None,
|
|
1618
|
-
time_column=None,
|
|
1619
|
-
additional_filters=None,
|
|
1620
|
-
**kwargs,
|
|
1621
|
-
):
|
|
1622
|
-
raise NotImplementedError()
|
|
1623
|
-
|
|
1624
1633
|
|
|
1625
1634
|
class KafkaTarget(BaseStoreTarget):
|
|
1626
1635
|
"""
|
|
@@ -1693,49 +1702,21 @@ class KafkaTarget(BaseStoreTarget):
|
|
|
1693
1702
|
column_list = self._get_column_list(
|
|
1694
1703
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1695
1704
|
)
|
|
1696
|
-
|
|
1697
|
-
datastore_profile = datastore_profile_read(self.path)
|
|
1698
|
-
attributes = datastore_profile.attributes()
|
|
1699
|
-
brokers = attributes.pop(
|
|
1700
|
-
"brokers", attributes.pop("bootstrap_servers", None)
|
|
1701
|
-
)
|
|
1702
|
-
topic = datastore_profile.topic
|
|
1703
|
-
else:
|
|
1704
|
-
attributes = copy(self.attributes)
|
|
1705
|
-
brokers = attributes.pop(
|
|
1706
|
-
"brokers", attributes.pop("bootstrap_servers", None)
|
|
1707
|
-
)
|
|
1708
|
-
topic, brokers = parse_kafka_url(self.get_target_path(), brokers)
|
|
1705
|
+
path = self.get_target_path()
|
|
1709
1706
|
|
|
1710
|
-
if not
|
|
1711
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1712
|
-
"KafkaTarget requires a path (topic)"
|
|
1713
|
-
)
|
|
1707
|
+
if not path:
|
|
1708
|
+
raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a path")
|
|
1714
1709
|
|
|
1715
1710
|
graph.add_step(
|
|
1716
1711
|
name=self.name or "KafkaTarget",
|
|
1717
1712
|
after=after,
|
|
1718
1713
|
graph_shape="cylinder",
|
|
1719
|
-
class_name="
|
|
1714
|
+
class_name="mlrun.datastore.storeytargets.KafkaStoreyTarget",
|
|
1720
1715
|
columns=column_list,
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
**attributes,
|
|
1716
|
+
path=path,
|
|
1717
|
+
attributes=self.attributes,
|
|
1724
1718
|
)
|
|
1725
1719
|
|
|
1726
|
-
def as_df(
|
|
1727
|
-
self,
|
|
1728
|
-
columns=None,
|
|
1729
|
-
df_module=None,
|
|
1730
|
-
entities=None,
|
|
1731
|
-
start_time=None,
|
|
1732
|
-
end_time=None,
|
|
1733
|
-
time_column=None,
|
|
1734
|
-
additional_filters=None,
|
|
1735
|
-
**kwargs,
|
|
1736
|
-
):
|
|
1737
|
-
raise NotImplementedError()
|
|
1738
|
-
|
|
1739
1720
|
def purge(self):
|
|
1740
1721
|
pass
|
|
1741
1722
|
|
|
@@ -1770,7 +1751,7 @@ class TSDBTarget(BaseStoreTarget):
|
|
|
1770
1751
|
|
|
1771
1752
|
graph.add_step(
|
|
1772
1753
|
name=self.name or "TSDBTarget",
|
|
1773
|
-
class_name="
|
|
1754
|
+
class_name="mlrun.datastore.storeytargets.TSDBStoreyTarget",
|
|
1774
1755
|
after=after,
|
|
1775
1756
|
graph_shape="cylinder",
|
|
1776
1757
|
path=uri,
|
|
@@ -1780,19 +1761,6 @@ class TSDBTarget(BaseStoreTarget):
|
|
|
1780
1761
|
**self.attributes,
|
|
1781
1762
|
)
|
|
1782
1763
|
|
|
1783
|
-
def as_df(
|
|
1784
|
-
self,
|
|
1785
|
-
columns=None,
|
|
1786
|
-
df_module=None,
|
|
1787
|
-
entities=None,
|
|
1788
|
-
start_time=None,
|
|
1789
|
-
end_time=None,
|
|
1790
|
-
time_column=None,
|
|
1791
|
-
additional_filters=None,
|
|
1792
|
-
**kwargs,
|
|
1793
|
-
):
|
|
1794
|
-
raise NotImplementedError()
|
|
1795
|
-
|
|
1796
1764
|
def write_dataframe(
|
|
1797
1765
|
self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
|
|
1798
1766
|
):
|
|
@@ -1830,6 +1798,7 @@ class CustomTarget(BaseStoreTarget):
|
|
|
1830
1798
|
is_online = False
|
|
1831
1799
|
support_spark = False
|
|
1832
1800
|
support_storey = True
|
|
1801
|
+
support_pandas = True
|
|
1833
1802
|
|
|
1834
1803
|
def __init__(
|
|
1835
1804
|
self,
|
|
@@ -1865,6 +1834,7 @@ class CustomTarget(BaseStoreTarget):
|
|
|
1865
1834
|
class DFTarget(BaseStoreTarget):
|
|
1866
1835
|
kind = TargetTypes.dataframe
|
|
1867
1836
|
support_storey = True
|
|
1837
|
+
support_pandas = True
|
|
1868
1838
|
|
|
1869
1839
|
def __init__(self, *args, name="dataframe", **kwargs):
|
|
1870
1840
|
self._df = None
|
|
@@ -1927,6 +1897,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1927
1897
|
is_online = True
|
|
1928
1898
|
support_spark = False
|
|
1929
1899
|
support_storey = True
|
|
1900
|
+
support_pandas = True
|
|
1930
1901
|
|
|
1931
1902
|
def __init__(
|
|
1932
1903
|
self,
|
|
@@ -2069,7 +2040,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
2069
2040
|
name=self.name or "SqlTarget",
|
|
2070
2041
|
after=after,
|
|
2071
2042
|
graph_shape="cylinder",
|
|
2072
|
-
class_name="
|
|
2043
|
+
class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
|
|
2073
2044
|
columns=column_list,
|
|
2074
2045
|
header=True,
|
|
2075
2046
|
table=table,
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -140,6 +140,7 @@ class V3ioStore(DataStore):
|
|
|
140
140
|
max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
|
|
141
141
|
):
|
|
142
142
|
"""helper function for put method, allows for controlling max_chunk_size in testing"""
|
|
143
|
+
data, _ = self._prepare_put_data(data, append)
|
|
143
144
|
container, path = split_path(self._join(key))
|
|
144
145
|
buffer_size = len(data) # in bytes
|
|
145
146
|
buffer_offset = 0
|
mlrun/db/base.py
CHANGED
|
@@ -154,6 +154,7 @@ class RunDBInterface(ABC):
|
|
|
154
154
|
mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
|
|
155
155
|
),
|
|
156
156
|
secrets: dict = None,
|
|
157
|
+
iter=None,
|
|
157
158
|
):
|
|
158
159
|
pass
|
|
159
160
|
|
|
@@ -174,7 +175,9 @@ class RunDBInterface(ABC):
|
|
|
174
175
|
pass
|
|
175
176
|
|
|
176
177
|
@abstractmethod
|
|
177
|
-
def list_functions(
|
|
178
|
+
def list_functions(
|
|
179
|
+
self, name=None, project="", tag="", labels=None, since=None, until=None
|
|
180
|
+
):
|
|
178
181
|
pass
|
|
179
182
|
|
|
180
183
|
@abstractmethod
|
|
@@ -239,9 +242,8 @@ class RunDBInterface(ABC):
|
|
|
239
242
|
)
|
|
240
243
|
artifact_identifiers.append(
|
|
241
244
|
mlrun.common.schemas.ArtifactIdentifier(
|
|
242
|
-
key
|
|
243
|
-
|
|
244
|
-
# pass the tree as uid
|
|
245
|
+
# we pass the db_key and not the key so the API will be able to find the artifact in the db
|
|
246
|
+
key=mlrun.utils.get_in_artifact(artifact_obj, "db_key"),
|
|
245
247
|
uid=mlrun.utils.get_in_artifact(artifact_obj, "uid"),
|
|
246
248
|
producer_id=mlrun.utils.get_in_artifact(artifact_obj, "tree"),
|
|
247
249
|
kind=mlrun.utils.get_in_artifact(artifact_obj, "kind"),
|
|
@@ -393,6 +395,9 @@ class RunDBInterface(ABC):
|
|
|
393
395
|
partition_order: Union[
|
|
394
396
|
mlrun.common.schemas.OrderType, str
|
|
395
397
|
] = mlrun.common.schemas.OrderType.desc,
|
|
398
|
+
format_: Union[
|
|
399
|
+
str, mlrun.common.formatters.FeatureSetFormat
|
|
400
|
+
] = mlrun.common.formatters.FeatureSetFormat.full,
|
|
396
401
|
) -> list[dict]:
|
|
397
402
|
pass
|
|
398
403
|
|
|
@@ -687,8 +692,11 @@ class RunDBInterface(ABC):
|
|
|
687
692
|
@abstractmethod
|
|
688
693
|
def store_api_gateway(
|
|
689
694
|
self,
|
|
690
|
-
api_gateway:
|
|
691
|
-
|
|
695
|
+
api_gateway: Union[
|
|
696
|
+
mlrun.common.schemas.APIGateway,
|
|
697
|
+
"mlrun.runtimes.nuclio.api_gateway.APIGateway",
|
|
698
|
+
],
|
|
699
|
+
project: Optional[str] = None,
|
|
692
700
|
):
|
|
693
701
|
pass
|
|
694
702
|
|
|
@@ -924,5 +932,6 @@ class RunDBInterface(ABC):
|
|
|
924
932
|
self,
|
|
925
933
|
project: str,
|
|
926
934
|
credentials: dict[str, str],
|
|
935
|
+
replace_creds: bool,
|
|
927
936
|
) -> None:
|
|
928
937
|
pass
|