mlrun 1.7.0rc38__py3-none-any.whl → 1.7.0rc41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/alerts/alert.py +30 -27
- mlrun/common/constants.py +3 -0
- mlrun/common/helpers.py +0 -1
- mlrun/common/schemas/alert.py +3 -0
- mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
- mlrun/common/schemas/notification.py +1 -0
- mlrun/config.py +1 -1
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/alibaba_oss.py +3 -2
- mlrun/datastore/azure_blob.py +7 -9
- mlrun/datastore/base.py +13 -1
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +84 -29
- mlrun/datastore/redis.py +1 -0
- mlrun/datastore/s3.py +3 -2
- mlrun/datastore/sources.py +54 -0
- mlrun/datastore/storeytargets.py +147 -0
- mlrun/datastore/targets.py +76 -122
- mlrun/datastore/v3io.py +1 -0
- mlrun/db/httpdb.py +6 -1
- mlrun/errors.py +8 -0
- mlrun/execution.py +7 -0
- mlrun/feature_store/api.py +5 -0
- mlrun/feature_store/retrieval/job.py +1 -0
- mlrun/model.py +24 -3
- mlrun/model_monitoring/api.py +10 -2
- mlrun/model_monitoring/applications/_application_steps.py +52 -34
- mlrun/model_monitoring/applications/context.py +206 -70
- mlrun/model_monitoring/applications/histogram_data_drift.py +15 -13
- mlrun/model_monitoring/controller.py +15 -12
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +19 -9
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +85 -47
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +46 -10
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +38 -24
- mlrun/model_monitoring/helpers.py +54 -18
- mlrun/model_monitoring/stream_processing.py +10 -29
- mlrun/projects/pipelines.py +19 -30
- mlrun/projects/project.py +86 -67
- mlrun/run.py +8 -6
- mlrun/runtimes/__init__.py +4 -0
- mlrun/runtimes/nuclio/api_gateway.py +18 -0
- mlrun/runtimes/nuclio/application/application.py +150 -59
- mlrun/runtimes/nuclio/function.py +5 -11
- mlrun/runtimes/nuclio/serving.py +2 -2
- mlrun/runtimes/utils.py +16 -0
- mlrun/serving/routers.py +1 -1
- mlrun/serving/server.py +19 -5
- mlrun/serving/states.py +8 -0
- mlrun/serving/v2_serving.py +34 -26
- mlrun/utils/helpers.py +33 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/METADATA +9 -12
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/RECORD +59 -58
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc38.dist-info → mlrun-1.7.0rc41.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import storey
|
|
15
|
+
from mergedeep import merge
|
|
16
|
+
from storey import V3ioDriver
|
|
17
|
+
|
|
18
|
+
import mlrun
|
|
19
|
+
import mlrun.model_monitoring.helpers
|
|
20
|
+
from mlrun.datastore.base import DataStore
|
|
21
|
+
|
|
22
|
+
from .utils import (
|
|
23
|
+
parse_kafka_url,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
Storey targets expect storage_options, which may contain credentials.
|
|
28
|
+
To avoid passing it openly within the graph, we use wrapper classes.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_url_and_storage_options(path, external_storage_options=None):
|
|
33
|
+
store, resolved_store_path, url = mlrun.store_manager.get_or_create_store(path)
|
|
34
|
+
storage_options = store.get_storage_options()
|
|
35
|
+
if storage_options and external_storage_options:
|
|
36
|
+
# merge external storage options with the store's storage options. storage_options takes precedence
|
|
37
|
+
storage_options = merge(external_storage_options, storage_options)
|
|
38
|
+
else:
|
|
39
|
+
storage_options = storage_options or external_storage_options
|
|
40
|
+
return url, DataStore._sanitize_storage_options(storage_options)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TDEngineStoreyTarget(storey.TDEngineTarget):
|
|
44
|
+
def __init__(self, *args, **kwargs):
|
|
45
|
+
kwargs["url"] = mlrun.model_monitoring.helpers.get_tsdb_connection_string()
|
|
46
|
+
super().__init__(*args, **kwargs)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class StoreyTargetUtils:
|
|
50
|
+
@staticmethod
|
|
51
|
+
def process_args_and_kwargs(args, kwargs):
|
|
52
|
+
args = list(args)
|
|
53
|
+
path = args[0] if args else kwargs.get("path")
|
|
54
|
+
external_storage_options = kwargs.get("storage_options")
|
|
55
|
+
|
|
56
|
+
url, storage_options = get_url_and_storage_options(
|
|
57
|
+
path, external_storage_options
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
if storage_options:
|
|
61
|
+
kwargs["storage_options"] = storage_options
|
|
62
|
+
if args:
|
|
63
|
+
args[0] = url
|
|
64
|
+
if "path" in kwargs:
|
|
65
|
+
kwargs["path"] = url
|
|
66
|
+
return args, kwargs
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ParquetStoreyTarget(storey.ParquetTarget):
|
|
70
|
+
def __init__(self, *args, **kwargs):
|
|
71
|
+
args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
|
|
72
|
+
super().__init__(*args, **kwargs)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class CSVStoreyTarget(storey.CSVTarget):
|
|
76
|
+
def __init__(self, *args, **kwargs):
|
|
77
|
+
args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
|
|
78
|
+
super().__init__(*args, **kwargs)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class StreamStoreyTarget(storey.StreamTarget):
|
|
82
|
+
def __init__(self, *args, **kwargs):
|
|
83
|
+
args = list(args)
|
|
84
|
+
|
|
85
|
+
path = args[0] if args else kwargs.get("stream_path")
|
|
86
|
+
endpoint, storage_options = get_url_and_storage_options(path)
|
|
87
|
+
|
|
88
|
+
if not path:
|
|
89
|
+
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
90
|
+
|
|
91
|
+
access_key = storage_options.get("v3io_access_key")
|
|
92
|
+
storage = (
|
|
93
|
+
V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if storage_options:
|
|
97
|
+
kwargs["storage"] = storage
|
|
98
|
+
if args:
|
|
99
|
+
args[0] = endpoint
|
|
100
|
+
if "stream_path" in kwargs:
|
|
101
|
+
kwargs["stream_path"] = endpoint
|
|
102
|
+
|
|
103
|
+
super().__init__(*args, **kwargs)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class KafkaStoreyTarget(storey.KafkaTarget):
|
|
107
|
+
def __init__(self, *args, **kwargs):
|
|
108
|
+
path = kwargs.pop("path")
|
|
109
|
+
attributes = kwargs.pop("attributes", None)
|
|
110
|
+
if path and path.startswith("ds://"):
|
|
111
|
+
datastore_profile = (
|
|
112
|
+
mlrun.datastore.datastore_profile.datastore_profile_read(path)
|
|
113
|
+
)
|
|
114
|
+
attributes = merge(attributes, datastore_profile.attributes())
|
|
115
|
+
brokers = attributes.pop(
|
|
116
|
+
"brokers", attributes.pop("bootstrap_servers", None)
|
|
117
|
+
)
|
|
118
|
+
topic = datastore_profile.topic
|
|
119
|
+
else:
|
|
120
|
+
brokers = attributes.pop(
|
|
121
|
+
"brokers", attributes.pop("bootstrap_servers", None)
|
|
122
|
+
)
|
|
123
|
+
topic, brokers = parse_kafka_url(path, brokers)
|
|
124
|
+
|
|
125
|
+
if not topic:
|
|
126
|
+
raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
|
|
127
|
+
kwargs["brokers"] = brokers
|
|
128
|
+
kwargs["topic"] = topic
|
|
129
|
+
super().__init__(*args, **kwargs, **attributes)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class NoSqlStoreyTarget(storey.NoSqlTarget):
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class RedisNoSqlStoreyTarget(storey.NoSqlTarget):
|
|
137
|
+
def __init__(self, *args, **kwargs):
|
|
138
|
+
path = kwargs.pop("path")
|
|
139
|
+
endpoint, uri = mlrun.datastore.targets.RedisNoSqlTarget.get_server_endpoint(
|
|
140
|
+
path
|
|
141
|
+
)
|
|
142
|
+
kwargs["path"] = endpoint + "/" + uri
|
|
143
|
+
super().__init__(*args, **kwargs)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class TSDBStoreyTarget(storey.TSDBTarget):
|
|
147
|
+
pass
|
mlrun/datastore/targets.py
CHANGED
|
@@ -47,7 +47,6 @@ from .spark_utils import spark_session_update_hadoop_options
|
|
|
47
47
|
from .utils import (
|
|
48
48
|
_generate_sql_query_with_time_filter,
|
|
49
49
|
filter_df_start_end_time,
|
|
50
|
-
parse_kafka_url,
|
|
51
50
|
select_columns_from_df,
|
|
52
51
|
)
|
|
53
52
|
|
|
@@ -390,6 +389,7 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
390
389
|
is_offline = False
|
|
391
390
|
support_spark = False
|
|
392
391
|
support_storey = False
|
|
392
|
+
support_pandas = False
|
|
393
393
|
support_append = False
|
|
394
394
|
|
|
395
395
|
def __init__(
|
|
@@ -758,6 +758,8 @@ class BaseStoreTarget(DataTargetBase):
|
|
|
758
758
|
**kwargs,
|
|
759
759
|
):
|
|
760
760
|
"""return the target data as dataframe"""
|
|
761
|
+
if not self.support_pandas:
|
|
762
|
+
raise NotImplementedError()
|
|
761
763
|
mlrun.utils.helpers.additional_filters_warning(
|
|
762
764
|
additional_filters, self.__class__
|
|
763
765
|
)
|
|
@@ -819,6 +821,7 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
819
821
|
support_spark = True
|
|
820
822
|
support_storey = True
|
|
821
823
|
support_dask = True
|
|
824
|
+
support_pandas = True
|
|
822
825
|
support_append = True
|
|
823
826
|
|
|
824
827
|
def __init__(
|
|
@@ -924,8 +927,9 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
924
927
|
if time_unit == time_partitioning_granularity:
|
|
925
928
|
break
|
|
926
929
|
|
|
930
|
+
target_path = self.get_target_path()
|
|
927
931
|
if not self.partitioned and not mlrun.utils.helpers.is_parquet_file(
|
|
928
|
-
|
|
932
|
+
target_path
|
|
929
933
|
):
|
|
930
934
|
partition_cols = []
|
|
931
935
|
|
|
@@ -933,25 +937,16 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
933
937
|
for key_column in key_columns:
|
|
934
938
|
tuple_key_columns.append((key_column.name, key_column.value_type))
|
|
935
939
|
|
|
936
|
-
store, path_in_store, target_path = self._get_store_and_path()
|
|
937
|
-
|
|
938
|
-
storage_options = store.get_storage_options()
|
|
939
|
-
if storage_options and self.storage_options:
|
|
940
|
-
storage_options = merge(storage_options, self.storage_options)
|
|
941
|
-
else:
|
|
942
|
-
storage_options = storage_options or self.storage_options
|
|
943
|
-
|
|
944
940
|
step = graph.add_step(
|
|
945
941
|
name=self.name or "ParquetTarget",
|
|
946
942
|
after=after,
|
|
947
943
|
graph_shape="cylinder",
|
|
948
|
-
class_name="
|
|
944
|
+
class_name="mlrun.datastore.storeytargets.ParquetStoreyTarget",
|
|
949
945
|
path=target_path,
|
|
950
946
|
columns=column_list,
|
|
951
947
|
index_cols=tuple_key_columns,
|
|
952
948
|
partition_cols=partition_cols,
|
|
953
949
|
time_field=timestamp_key,
|
|
954
|
-
storage_options=storage_options,
|
|
955
950
|
max_events=self.max_events,
|
|
956
951
|
flush_after_seconds=self.flush_after_seconds,
|
|
957
952
|
update_last_written=featureset_status.update_last_written_for_target,
|
|
@@ -1084,6 +1079,7 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1084
1079
|
is_offline = True
|
|
1085
1080
|
support_spark = True
|
|
1086
1081
|
support_storey = True
|
|
1082
|
+
support_pandas = True
|
|
1087
1083
|
|
|
1088
1084
|
@staticmethod
|
|
1089
1085
|
def _write_dataframe(df, storage_options, target_path, partition_cols, **kwargs):
|
|
@@ -1105,17 +1101,16 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1105
1101
|
column_list = self._get_column_list(
|
|
1106
1102
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1107
1103
|
)
|
|
1108
|
-
|
|
1104
|
+
target_path = self.get_target_path()
|
|
1109
1105
|
graph.add_step(
|
|
1110
1106
|
name=self.name or "CSVTarget",
|
|
1111
1107
|
after=after,
|
|
1112
1108
|
graph_shape="cylinder",
|
|
1113
|
-
class_name="
|
|
1109
|
+
class_name="mlrun.datastore.storeytargets.CSVStoreyTarget",
|
|
1114
1110
|
path=target_path,
|
|
1115
1111
|
columns=column_list,
|
|
1116
1112
|
header=True,
|
|
1117
1113
|
index_cols=key_columns,
|
|
1118
|
-
storage_options=store.get_storage_options(),
|
|
1119
1114
|
**self.attributes,
|
|
1120
1115
|
)
|
|
1121
1116
|
|
|
@@ -1292,7 +1287,7 @@ class SnowflakeTarget(BaseStoreTarget):
|
|
|
1292
1287
|
**kwargs,
|
|
1293
1288
|
):
|
|
1294
1289
|
raise mlrun.errors.MLRunRuntimeError(
|
|
1295
|
-
f"{type(self).__name__} does not support
|
|
1290
|
+
f"{type(self).__name__} does not support pandas engine"
|
|
1296
1291
|
)
|
|
1297
1292
|
|
|
1298
1293
|
@property
|
|
@@ -1329,6 +1324,19 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1329
1324
|
timestamp_key=None,
|
|
1330
1325
|
featureset_status=None,
|
|
1331
1326
|
):
|
|
1327
|
+
table, column_list = self._get_table_and_columns(features, key_columns)
|
|
1328
|
+
|
|
1329
|
+
graph.add_step(
|
|
1330
|
+
name=self.name or self.writer_step_name,
|
|
1331
|
+
after=after,
|
|
1332
|
+
graph_shape="cylinder",
|
|
1333
|
+
class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
|
|
1334
|
+
columns=column_list,
|
|
1335
|
+
table=table,
|
|
1336
|
+
**self.attributes,
|
|
1337
|
+
)
|
|
1338
|
+
|
|
1339
|
+
def _get_table_and_columns(self, features, key_columns):
|
|
1332
1340
|
key_columns = list(key_columns.keys())
|
|
1333
1341
|
table = self._resource.uri
|
|
1334
1342
|
column_list = self._get_column_list(
|
|
@@ -1347,15 +1355,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1347
1355
|
col for col in column_list if col[0] not in aggregate_features
|
|
1348
1356
|
]
|
|
1349
1357
|
|
|
1350
|
-
|
|
1351
|
-
name=self.name or self.writer_step_name,
|
|
1352
|
-
after=after,
|
|
1353
|
-
graph_shape="cylinder",
|
|
1354
|
-
class_name="storey.NoSqlTarget",
|
|
1355
|
-
columns=column_list,
|
|
1356
|
-
table=table,
|
|
1357
|
-
**self.attributes,
|
|
1358
|
-
)
|
|
1358
|
+
return table, column_list
|
|
1359
1359
|
|
|
1360
1360
|
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
|
|
1361
1361
|
raise NotImplementedError()
|
|
@@ -1366,19 +1366,6 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1366
1366
|
def get_dask_options(self):
|
|
1367
1367
|
return {"format": "csv"}
|
|
1368
1368
|
|
|
1369
|
-
def as_df(
|
|
1370
|
-
self,
|
|
1371
|
-
columns=None,
|
|
1372
|
-
df_module=None,
|
|
1373
|
-
entities=None,
|
|
1374
|
-
start_time=None,
|
|
1375
|
-
end_time=None,
|
|
1376
|
-
time_column=None,
|
|
1377
|
-
additional_filters=None,
|
|
1378
|
-
**kwargs,
|
|
1379
|
-
):
|
|
1380
|
-
raise NotImplementedError()
|
|
1381
|
-
|
|
1382
1369
|
def write_dataframe(
|
|
1383
1370
|
self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
|
|
1384
1371
|
):
|
|
@@ -1491,11 +1478,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1491
1478
|
support_spark = True
|
|
1492
1479
|
writer_step_name = "RedisNoSqlTarget"
|
|
1493
1480
|
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
def _get_server_endpoint(self):
|
|
1498
|
-
endpoint, uri = parse_path(self.get_target_path())
|
|
1481
|
+
@staticmethod
|
|
1482
|
+
def get_server_endpoint(path):
|
|
1483
|
+
endpoint, uri = parse_path(path)
|
|
1499
1484
|
endpoint = endpoint or mlrun.mlconf.redis.url
|
|
1500
1485
|
if endpoint.startswith("ds://"):
|
|
1501
1486
|
datastore_profile = datastore_profile_read(endpoint)
|
|
@@ -1512,8 +1497,13 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1512
1497
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1513
1498
|
"Provide Redis username and password only via secrets"
|
|
1514
1499
|
)
|
|
1515
|
-
|
|
1516
|
-
|
|
1500
|
+
credentials_prefix = mlrun.get_secret_or_env(key="CREDENTIALS_PREFIX")
|
|
1501
|
+
user = mlrun.get_secret_or_env(
|
|
1502
|
+
"REDIS_USER", default="", prefix=credentials_prefix
|
|
1503
|
+
)
|
|
1504
|
+
password = mlrun.get_secret_or_env(
|
|
1505
|
+
"REDIS_PASSWORD", default="", prefix=credentials_prefix
|
|
1506
|
+
)
|
|
1517
1507
|
host = parsed_endpoint.hostname
|
|
1518
1508
|
port = parsed_endpoint.port if parsed_endpoint.port else "6379"
|
|
1519
1509
|
scheme = parsed_endpoint.scheme
|
|
@@ -1527,7 +1517,7 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1527
1517
|
from storey import Table
|
|
1528
1518
|
from storey.redis_driver import RedisDriver
|
|
1529
1519
|
|
|
1530
|
-
endpoint, uri = self.
|
|
1520
|
+
endpoint, uri = self.get_server_endpoint(self.get_target_path())
|
|
1531
1521
|
|
|
1532
1522
|
return Table(
|
|
1533
1523
|
uri,
|
|
@@ -1536,7 +1526,7 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1536
1526
|
)
|
|
1537
1527
|
|
|
1538
1528
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1539
|
-
endpoint, uri = self.
|
|
1529
|
+
endpoint, uri = self.get_server_endpoint(self.get_target_path())
|
|
1540
1530
|
parsed_endpoint = urlparse(endpoint)
|
|
1541
1531
|
store, path_in_store, path = self._get_store_and_path()
|
|
1542
1532
|
return {
|
|
@@ -1568,6 +1558,28 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1568
1558
|
|
|
1569
1559
|
return df
|
|
1570
1560
|
|
|
1561
|
+
def add_writer_step(
|
|
1562
|
+
self,
|
|
1563
|
+
graph,
|
|
1564
|
+
after,
|
|
1565
|
+
features,
|
|
1566
|
+
key_columns=None,
|
|
1567
|
+
timestamp_key=None,
|
|
1568
|
+
featureset_status=None,
|
|
1569
|
+
):
|
|
1570
|
+
table, column_list = self._get_table_and_columns(features, key_columns)
|
|
1571
|
+
|
|
1572
|
+
graph.add_step(
|
|
1573
|
+
path=self.get_target_path(),
|
|
1574
|
+
name=self.name or self.writer_step_name,
|
|
1575
|
+
after=after,
|
|
1576
|
+
graph_shape="cylinder",
|
|
1577
|
+
class_name="mlrun.datastore.storeytargets.RedisNoSqlStoreyTarget",
|
|
1578
|
+
columns=column_list,
|
|
1579
|
+
table=table,
|
|
1580
|
+
**self.attributes,
|
|
1581
|
+
)
|
|
1582
|
+
|
|
1571
1583
|
|
|
1572
1584
|
class StreamTarget(BaseStoreTarget):
|
|
1573
1585
|
kind = TargetTypes.stream
|
|
@@ -1586,45 +1598,25 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1586
1598
|
timestamp_key=None,
|
|
1587
1599
|
featureset_status=None,
|
|
1588
1600
|
):
|
|
1589
|
-
from storey import V3ioDriver
|
|
1590
|
-
|
|
1591
1601
|
key_columns = list(key_columns.keys())
|
|
1592
|
-
|
|
1593
|
-
if not path:
|
|
1594
|
-
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
1595
|
-
endpoint, uri = parse_path(path)
|
|
1596
|
-
storage_options = store.get_storage_options()
|
|
1597
|
-
access_key = storage_options.get("v3io_access_key")
|
|
1602
|
+
|
|
1598
1603
|
column_list = self._get_column_list(
|
|
1599
1604
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1600
1605
|
)
|
|
1606
|
+
stream_path = self.get_target_path()
|
|
1607
|
+
if not stream_path:
|
|
1608
|
+
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
1601
1609
|
|
|
1602
1610
|
graph.add_step(
|
|
1603
1611
|
name=self.name or "StreamTarget",
|
|
1604
1612
|
after=after,
|
|
1605
1613
|
graph_shape="cylinder",
|
|
1606
|
-
class_name="
|
|
1614
|
+
class_name="mlrun.datastore.storeytargets.StreamStoreyTarget",
|
|
1607
1615
|
columns=column_list,
|
|
1608
|
-
|
|
1609
|
-
webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
|
|
1610
|
-
),
|
|
1611
|
-
stream_path=uri,
|
|
1616
|
+
stream_path=stream_path,
|
|
1612
1617
|
**self.attributes,
|
|
1613
1618
|
)
|
|
1614
1619
|
|
|
1615
|
-
def as_df(
|
|
1616
|
-
self,
|
|
1617
|
-
columns=None,
|
|
1618
|
-
df_module=None,
|
|
1619
|
-
entities=None,
|
|
1620
|
-
start_time=None,
|
|
1621
|
-
end_time=None,
|
|
1622
|
-
time_column=None,
|
|
1623
|
-
additional_filters=None,
|
|
1624
|
-
**kwargs,
|
|
1625
|
-
):
|
|
1626
|
-
raise NotImplementedError()
|
|
1627
|
-
|
|
1628
1620
|
|
|
1629
1621
|
class KafkaTarget(BaseStoreTarget):
|
|
1630
1622
|
"""
|
|
@@ -1697,49 +1689,21 @@ class KafkaTarget(BaseStoreTarget):
|
|
|
1697
1689
|
column_list = self._get_column_list(
|
|
1698
1690
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1699
1691
|
)
|
|
1700
|
-
|
|
1701
|
-
datastore_profile = datastore_profile_read(self.path)
|
|
1702
|
-
attributes = datastore_profile.attributes()
|
|
1703
|
-
brokers = attributes.pop(
|
|
1704
|
-
"brokers", attributes.pop("bootstrap_servers", None)
|
|
1705
|
-
)
|
|
1706
|
-
topic = datastore_profile.topic
|
|
1707
|
-
else:
|
|
1708
|
-
attributes = copy(self.attributes)
|
|
1709
|
-
brokers = attributes.pop(
|
|
1710
|
-
"brokers", attributes.pop("bootstrap_servers", None)
|
|
1711
|
-
)
|
|
1712
|
-
topic, brokers = parse_kafka_url(self.get_target_path(), brokers)
|
|
1692
|
+
path = self.get_target_path()
|
|
1713
1693
|
|
|
1714
|
-
if not
|
|
1715
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1716
|
-
"KafkaTarget requires a path (topic)"
|
|
1717
|
-
)
|
|
1694
|
+
if not path:
|
|
1695
|
+
raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a path")
|
|
1718
1696
|
|
|
1719
1697
|
graph.add_step(
|
|
1720
1698
|
name=self.name or "KafkaTarget",
|
|
1721
1699
|
after=after,
|
|
1722
1700
|
graph_shape="cylinder",
|
|
1723
|
-
class_name="
|
|
1701
|
+
class_name="mlrun.datastore.storeytargets.KafkaStoreyTarget",
|
|
1724
1702
|
columns=column_list,
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
**attributes,
|
|
1703
|
+
path=path,
|
|
1704
|
+
attributes=self.attributes,
|
|
1728
1705
|
)
|
|
1729
1706
|
|
|
1730
|
-
def as_df(
|
|
1731
|
-
self,
|
|
1732
|
-
columns=None,
|
|
1733
|
-
df_module=None,
|
|
1734
|
-
entities=None,
|
|
1735
|
-
start_time=None,
|
|
1736
|
-
end_time=None,
|
|
1737
|
-
time_column=None,
|
|
1738
|
-
additional_filters=None,
|
|
1739
|
-
**kwargs,
|
|
1740
|
-
):
|
|
1741
|
-
raise NotImplementedError()
|
|
1742
|
-
|
|
1743
1707
|
def purge(self):
|
|
1744
1708
|
pass
|
|
1745
1709
|
|
|
@@ -1774,7 +1738,7 @@ class TSDBTarget(BaseStoreTarget):
|
|
|
1774
1738
|
|
|
1775
1739
|
graph.add_step(
|
|
1776
1740
|
name=self.name or "TSDBTarget",
|
|
1777
|
-
class_name="
|
|
1741
|
+
class_name="mlrun.datastore.storeytargets.TSDBStoreyTarget",
|
|
1778
1742
|
after=after,
|
|
1779
1743
|
graph_shape="cylinder",
|
|
1780
1744
|
path=uri,
|
|
@@ -1784,19 +1748,6 @@ class TSDBTarget(BaseStoreTarget):
|
|
|
1784
1748
|
**self.attributes,
|
|
1785
1749
|
)
|
|
1786
1750
|
|
|
1787
|
-
def as_df(
|
|
1788
|
-
self,
|
|
1789
|
-
columns=None,
|
|
1790
|
-
df_module=None,
|
|
1791
|
-
entities=None,
|
|
1792
|
-
start_time=None,
|
|
1793
|
-
end_time=None,
|
|
1794
|
-
time_column=None,
|
|
1795
|
-
additional_filters=None,
|
|
1796
|
-
**kwargs,
|
|
1797
|
-
):
|
|
1798
|
-
raise NotImplementedError()
|
|
1799
|
-
|
|
1800
1751
|
def write_dataframe(
|
|
1801
1752
|
self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
|
|
1802
1753
|
):
|
|
@@ -1834,6 +1785,7 @@ class CustomTarget(BaseStoreTarget):
|
|
|
1834
1785
|
is_online = False
|
|
1835
1786
|
support_spark = False
|
|
1836
1787
|
support_storey = True
|
|
1788
|
+
support_pandas = True
|
|
1837
1789
|
|
|
1838
1790
|
def __init__(
|
|
1839
1791
|
self,
|
|
@@ -1869,6 +1821,7 @@ class CustomTarget(BaseStoreTarget):
|
|
|
1869
1821
|
class DFTarget(BaseStoreTarget):
|
|
1870
1822
|
kind = TargetTypes.dataframe
|
|
1871
1823
|
support_storey = True
|
|
1824
|
+
support_pandas = True
|
|
1872
1825
|
|
|
1873
1826
|
def __init__(self, *args, name="dataframe", **kwargs):
|
|
1874
1827
|
self._df = None
|
|
@@ -1931,6 +1884,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
1931
1884
|
is_online = True
|
|
1932
1885
|
support_spark = False
|
|
1933
1886
|
support_storey = True
|
|
1887
|
+
support_pandas = True
|
|
1934
1888
|
|
|
1935
1889
|
def __init__(
|
|
1936
1890
|
self,
|
|
@@ -2073,7 +2027,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
2073
2027
|
name=self.name or "SqlTarget",
|
|
2074
2028
|
after=after,
|
|
2075
2029
|
graph_shape="cylinder",
|
|
2076
|
-
class_name="
|
|
2030
|
+
class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
|
|
2077
2031
|
columns=column_list,
|
|
2078
2032
|
header=True,
|
|
2079
2033
|
table=table,
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -140,6 +140,7 @@ class V3ioStore(DataStore):
|
|
|
140
140
|
max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
|
|
141
141
|
):
|
|
142
142
|
"""helper function for put method, allows for controlling max_chunk_size in testing"""
|
|
143
|
+
data, _ = self._prepare_put_data(data, append)
|
|
143
144
|
container, path = split_path(self._join(key))
|
|
144
145
|
buffer_size = len(data) # in bytes
|
|
145
146
|
buffer_offset = 0
|
mlrun/db/httpdb.py
CHANGED
|
@@ -3475,7 +3475,7 @@ class HTTPRunDB(RunDBInterface):
|
|
|
3475
3475
|
if response.status_code == http.HTTPStatus.ACCEPTED:
|
|
3476
3476
|
if delete_resources:
|
|
3477
3477
|
logger.info(
|
|
3478
|
-
"Model Monitoring is being
|
|
3478
|
+
"Model Monitoring is being disabled",
|
|
3479
3479
|
project_name=project,
|
|
3480
3480
|
)
|
|
3481
3481
|
if delete_user_applications:
|
|
@@ -4216,6 +4216,9 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4216
4216
|
:param project: The project that the alert belongs to.
|
|
4217
4217
|
:returns: The created/modified alert.
|
|
4218
4218
|
"""
|
|
4219
|
+
if not alert_data:
|
|
4220
|
+
raise mlrun.errors.MLRunInvalidArgumentError("Alert data must be provided")
|
|
4221
|
+
|
|
4219
4222
|
project = project or config.default_project
|
|
4220
4223
|
endpoint_path = f"projects/{project}/alerts/{alert_name}"
|
|
4221
4224
|
error_message = f"put alert {project}/alerts/{alert_name}"
|
|
@@ -4224,6 +4227,8 @@ class HTTPRunDB(RunDBInterface):
|
|
|
4224
4227
|
if isinstance(alert_data, AlertConfig)
|
|
4225
4228
|
else AlertConfig.from_dict(alert_data)
|
|
4226
4229
|
)
|
|
4230
|
+
# Validation is necessary here because users can directly invoke this function
|
|
4231
|
+
# through `mlrun.get_run_db().store_alert_config()`.
|
|
4227
4232
|
alert_instance.validate_required_fields()
|
|
4228
4233
|
|
|
4229
4234
|
alert_data = alert_instance.to_dict()
|
mlrun/errors.py
CHANGED
|
@@ -209,6 +209,14 @@ class MLRunInvalidMMStoreType(MLRunHTTPStatusError, ValueError):
|
|
|
209
209
|
error_status_code = HTTPStatus.BAD_REQUEST.value
|
|
210
210
|
|
|
211
211
|
|
|
212
|
+
class MLRunStreamConnectionFailure(MLRunHTTPStatusError, ValueError):
|
|
213
|
+
error_status_code = HTTPStatus.BAD_REQUEST.value
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class MLRunTSDBConnectionFailure(MLRunHTTPStatusError, ValueError):
|
|
217
|
+
error_status_code = HTTPStatus.BAD_REQUEST.value
|
|
218
|
+
|
|
219
|
+
|
|
212
220
|
class MLRunRetryExhaustedError(Exception):
|
|
213
221
|
pass
|
|
214
222
|
|
mlrun/execution.py
CHANGED
|
@@ -921,6 +921,13 @@ class MLClientCtx:
|
|
|
921
921
|
updates, self._uid, self.project, iter=self._iteration
|
|
922
922
|
)
|
|
923
923
|
|
|
924
|
+
def get_notifications(self):
|
|
925
|
+
"""Get the list of notifications"""
|
|
926
|
+
return [
|
|
927
|
+
mlrun.model.Notification.from_dict(notification)
|
|
928
|
+
for notification in self._notifications
|
|
929
|
+
]
|
|
930
|
+
|
|
924
931
|
def to_dict(self):
|
|
925
932
|
"""Convert the run context to a dictionary"""
|
|
926
933
|
|
mlrun/feature_store/api.py
CHANGED
|
@@ -230,6 +230,11 @@ def _get_offline_features(
|
|
|
230
230
|
"entity_timestamp_column param "
|
|
231
231
|
"can not be specified without entity_rows param"
|
|
232
232
|
)
|
|
233
|
+
if isinstance(target, BaseStoreTarget) and not target.support_pandas:
|
|
234
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
235
|
+
f"get_offline_features does not support targets that do not support pandas engine."
|
|
236
|
+
f" Target kind: {target.kind}"
|
|
237
|
+
)
|
|
233
238
|
|
|
234
239
|
if isinstance(feature_vector, FeatureVector):
|
|
235
240
|
update_stats = True
|
|
@@ -181,6 +181,7 @@ class RemoteVectorResponse:
|
|
|
181
181
|
file_format = kwargs.get("format")
|
|
182
182
|
if not file_format:
|
|
183
183
|
file_format = self.run.status.results["target"]["kind"]
|
|
184
|
+
|
|
184
185
|
df = mlrun.get_dataitem(self.target_uri).as_df(
|
|
185
186
|
columns=columns, df_module=df_module, format=file_format, **kwargs
|
|
186
187
|
)
|
mlrun/model.py
CHANGED
|
@@ -679,7 +679,24 @@ class ImageBuilder(ModelObj):
|
|
|
679
679
|
|
|
680
680
|
|
|
681
681
|
class Notification(ModelObj):
|
|
682
|
-
"""Notification
|
|
682
|
+
"""Notification object
|
|
683
|
+
|
|
684
|
+
:param kind: notification implementation kind - slack, webhook, etc.
|
|
685
|
+
:param name: for logging and identification
|
|
686
|
+
:param message: message content in the notification
|
|
687
|
+
:param severity: severity to display in the notification
|
|
688
|
+
:param when: list of statuses to trigger the notification: 'running', 'completed', 'error'
|
|
689
|
+
:param condition: optional condition to trigger the notification, a jinja2 expression that can use run data
|
|
690
|
+
to evaluate if the notification should be sent in addition to the 'when' statuses.
|
|
691
|
+
e.g.: '{{ run["status"]["results"]["accuracy"] < 0.9}}'
|
|
692
|
+
:param params: Implementation specific parameters for the notification implementation (e.g. slack webhook url,
|
|
693
|
+
git repository details, etc.)
|
|
694
|
+
:param secret_params: secret parameters for the notification implementation, same as params but will be stored
|
|
695
|
+
in a k8s secret and passed as a secret reference to the implementation.
|
|
696
|
+
:param status: notification status - pending, sent, error
|
|
697
|
+
:param sent_time: time the notification was sent
|
|
698
|
+
:param reason: failure reason if the notification failed to send
|
|
699
|
+
"""
|
|
683
700
|
|
|
684
701
|
def __init__(
|
|
685
702
|
self,
|
|
@@ -1468,7 +1485,11 @@ class RunObject(RunTemplate):
|
|
|
1468
1485
|
@property
|
|
1469
1486
|
def error(self) -> str:
|
|
1470
1487
|
"""error string if failed"""
|
|
1471
|
-
if
|
|
1488
|
+
if (
|
|
1489
|
+
self.status
|
|
1490
|
+
and self.status.state
|
|
1491
|
+
in mlrun.common.runtimes.constants.RunStates.error_and_abortion_states()
|
|
1492
|
+
):
|
|
1472
1493
|
unknown_error = ""
|
|
1473
1494
|
if (
|
|
1474
1495
|
self.status.state
|
|
@@ -1484,8 +1505,8 @@ class RunObject(RunTemplate):
|
|
|
1484
1505
|
|
|
1485
1506
|
return (
|
|
1486
1507
|
self.status.error
|
|
1487
|
-
or self.status.reason
|
|
1488
1508
|
or self.status.status_text
|
|
1509
|
+
or self.status.reason
|
|
1489
1510
|
or unknown_error
|
|
1490
1511
|
)
|
|
1491
1512
|
return ""
|