mlrun 1.7.0rc39__py3-none-any.whl → 1.7.0rc41__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/common/constants.py +3 -0
- mlrun/common/helpers.py +0 -1
- mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
- mlrun/config.py +1 -1
- mlrun/data_types/to_pandas.py +9 -9
- mlrun/datastore/alibaba_oss.py +1 -0
- mlrun/datastore/azure_blob.py +1 -6
- mlrun/datastore/base.py +12 -0
- mlrun/datastore/dbfs_store.py +1 -5
- mlrun/datastore/filestore.py +1 -3
- mlrun/datastore/google_cloud_storage.py +1 -9
- mlrun/datastore/redis.py +1 -0
- mlrun/datastore/s3.py +1 -0
- mlrun/datastore/storeytargets.py +147 -0
- mlrun/datastore/targets.py +67 -69
- mlrun/datastore/v3io.py +1 -0
- mlrun/model_monitoring/api.py +1 -2
- mlrun/model_monitoring/applications/_application_steps.py +25 -43
- mlrun/model_monitoring/applications/context.py +206 -70
- mlrun/model_monitoring/controller.py +0 -1
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +14 -4
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +11 -3
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -23
- mlrun/model_monitoring/helpers.py +38 -1
- mlrun/model_monitoring/stream_processing.py +8 -26
- mlrun/projects/project.py +17 -16
- mlrun/runtimes/nuclio/api_gateway.py +9 -0
- mlrun/runtimes/nuclio/application/application.py +131 -55
- mlrun/runtimes/nuclio/function.py +4 -10
- mlrun/runtimes/nuclio/serving.py +2 -2
- mlrun/runtimes/utils.py +16 -0
- mlrun/serving/routers.py +1 -1
- mlrun/serving/server.py +19 -5
- mlrun/serving/states.py +8 -0
- mlrun/serving/v2_serving.py +34 -26
- mlrun/utils/helpers.py +12 -2
- mlrun/utils/version/version.json +2 -2
- {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/METADATA +2 -2
- {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/RECORD +44 -43
- {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/WHEEL +1 -1
- {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/LICENSE +0 -0
- {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/entry_points.txt +0 -0
- {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/top_level.txt +0 -0
mlrun/common/constants.py
CHANGED
|
@@ -65,6 +65,9 @@ class MLRunInternalLabels:
|
|
|
65
65
|
task_name = f"{MLRUN_LABEL_PREFIX}task-name"
|
|
66
66
|
resource_name = f"{MLRUN_LABEL_PREFIX}resource_name"
|
|
67
67
|
created = f"{MLRUN_LABEL_PREFIX}created"
|
|
68
|
+
producer_type = f"{MLRUN_LABEL_PREFIX}producer-type"
|
|
69
|
+
app_name = f"{MLRUN_LABEL_PREFIX}app-name"
|
|
70
|
+
endpoint_id = f"{MLRUN_LABEL_PREFIX}endpoint-id"
|
|
68
71
|
host = "host"
|
|
69
72
|
job_type = "job-type"
|
|
70
73
|
kind = "kind"
|
mlrun/common/helpers.py
CHANGED
|
@@ -21,7 +21,6 @@ from typing import Any, NamedTuple, Optional
|
|
|
21
21
|
from pydantic import BaseModel, Field, validator
|
|
22
22
|
from pydantic.main import Extra
|
|
23
23
|
|
|
24
|
-
import mlrun.common.model_monitoring
|
|
25
24
|
import mlrun.common.types
|
|
26
25
|
|
|
27
26
|
from ..object import ObjectKind, ObjectSpec, ObjectStatus
|
mlrun/config.py
CHANGED
|
@@ -863,7 +863,7 @@ class Config:
|
|
|
863
863
|
f"Unable to decode {attribute_path}"
|
|
864
864
|
)
|
|
865
865
|
parsed_attribute_value = json.loads(decoded_attribute_value)
|
|
866
|
-
if
|
|
866
|
+
if not isinstance(parsed_attribute_value, expected_type):
|
|
867
867
|
raise mlrun.errors.MLRunInvalidArgumentTypeError(
|
|
868
868
|
f"Expected type {expected_type}, got {type(parsed_attribute_value)}"
|
|
869
869
|
)
|
mlrun/data_types/to_pandas.py
CHANGED
|
@@ -21,7 +21,7 @@ import semver
|
|
|
21
21
|
|
|
22
22
|
def _toPandas(spark_df):
|
|
23
23
|
"""
|
|
24
|
-
Modified version of spark DataFrame.toPandas()
|
|
24
|
+
Modified version of spark DataFrame.toPandas() -
|
|
25
25
|
https://github.com/apache/spark/blob/v3.2.3/python/pyspark/sql/pandas/conversion.py#L35
|
|
26
26
|
|
|
27
27
|
The original code (which is only replaced in pyspark 3.5.0) fails with Pandas 2 installed, with the following error:
|
|
@@ -223,21 +223,21 @@ def _to_corrected_pandas_type(dt):
|
|
|
223
223
|
TimestampType,
|
|
224
224
|
)
|
|
225
225
|
|
|
226
|
-
if
|
|
226
|
+
if isinstance(dt, ByteType):
|
|
227
227
|
return np.int8
|
|
228
|
-
elif
|
|
228
|
+
elif isinstance(dt, ShortType):
|
|
229
229
|
return np.int16
|
|
230
|
-
elif
|
|
230
|
+
elif isinstance(dt, IntegerType):
|
|
231
231
|
return np.int32
|
|
232
|
-
elif
|
|
232
|
+
elif isinstance(dt, LongType):
|
|
233
233
|
return np.int64
|
|
234
|
-
elif
|
|
234
|
+
elif isinstance(dt, FloatType):
|
|
235
235
|
return np.float32
|
|
236
|
-
elif
|
|
236
|
+
elif isinstance(dt, DoubleType):
|
|
237
237
|
return np.float64
|
|
238
|
-
elif
|
|
238
|
+
elif isinstance(dt, BooleanType):
|
|
239
239
|
return bool
|
|
240
|
-
elif
|
|
240
|
+
elif isinstance(dt, TimestampType):
|
|
241
241
|
return "datetime64[ns]"
|
|
242
242
|
else:
|
|
243
243
|
return None
|
mlrun/datastore/alibaba_oss.py
CHANGED
|
@@ -85,6 +85,7 @@ class OSSStore(DataStore):
|
|
|
85
85
|
return oss.get_object(key).read()
|
|
86
86
|
|
|
87
87
|
def put(self, key, data, append=False):
|
|
88
|
+
data, _ = self._prepare_put_data(data, append)
|
|
88
89
|
bucket, key = self.get_bucket_and_key(key)
|
|
89
90
|
oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
|
|
90
91
|
oss.put_object(key, data)
|
mlrun/datastore/azure_blob.py
CHANGED
|
@@ -189,12 +189,7 @@ class AzureBlobStore(DataStore):
|
|
|
189
189
|
"Append mode not supported for Azure blob datastore"
|
|
190
190
|
)
|
|
191
191
|
remote_path = self._convert_key_to_remote_path(key)
|
|
192
|
-
|
|
193
|
-
mode = "wb"
|
|
194
|
-
elif isinstance(data, str):
|
|
195
|
-
mode = "w"
|
|
196
|
-
else:
|
|
197
|
-
raise TypeError("Data type unknown. Unable to put in Azure!")
|
|
192
|
+
data, mode = self._prepare_put_data(data, append)
|
|
198
193
|
with self.filesystem.open(remote_path, mode) as f:
|
|
199
194
|
f.write(data)
|
|
200
195
|
|
mlrun/datastore/base.py
CHANGED
|
@@ -157,6 +157,18 @@ class DataStore:
|
|
|
157
157
|
def put(self, key, data, append=False):
|
|
158
158
|
pass
|
|
159
159
|
|
|
160
|
+
def _prepare_put_data(self, data, append=False):
|
|
161
|
+
mode = "a" if append else "w"
|
|
162
|
+
if isinstance(data, bytearray):
|
|
163
|
+
data = bytes(data)
|
|
164
|
+
|
|
165
|
+
if isinstance(data, bytes):
|
|
166
|
+
return data, f"{mode}b"
|
|
167
|
+
elif isinstance(data, str):
|
|
168
|
+
return data, mode
|
|
169
|
+
else:
|
|
170
|
+
raise TypeError(f"Unable to put a value of type {type(self).__name__}")
|
|
171
|
+
|
|
160
172
|
def stat(self, key):
|
|
161
173
|
pass
|
|
162
174
|
|
mlrun/datastore/dbfs_store.py
CHANGED
|
@@ -130,11 +130,7 @@ class DBFSStore(DataStore):
|
|
|
130
130
|
"Append mode not supported for Databricks file system"
|
|
131
131
|
)
|
|
132
132
|
# can not use append mode because it overrides data.
|
|
133
|
-
mode =
|
|
134
|
-
if isinstance(data, bytes):
|
|
135
|
-
mode += "b"
|
|
136
|
-
elif not isinstance(data, str):
|
|
137
|
-
raise TypeError(f"Unknown data type {type(data)}")
|
|
133
|
+
data, mode = self._prepare_put_data(data, append)
|
|
138
134
|
with self.filesystem.open(key, mode) as f:
|
|
139
135
|
f.write(data)
|
|
140
136
|
|
mlrun/datastore/filestore.py
CHANGED
|
@@ -66,9 +66,7 @@ class FileStore(DataStore):
|
|
|
66
66
|
dir_to_create = path.dirname(self._join(key))
|
|
67
67
|
if dir_to_create:
|
|
68
68
|
self._ensure_directory(dir_to_create)
|
|
69
|
-
mode =
|
|
70
|
-
if isinstance(data, bytes):
|
|
71
|
-
mode = mode + "b"
|
|
69
|
+
data, mode = self._prepare_put_data(data, append)
|
|
72
70
|
with open(self._join(key), mode) as fp:
|
|
73
71
|
fp.write(data)
|
|
74
72
|
fp.close()
|
|
@@ -131,15 +131,7 @@ class GoogleCloudStorageStore(DataStore):
|
|
|
131
131
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
132
132
|
"Append mode not supported for Google cloud storage datastore"
|
|
133
133
|
)
|
|
134
|
-
|
|
135
|
-
if isinstance(data, bytes):
|
|
136
|
-
mode = "wb"
|
|
137
|
-
elif isinstance(data, str):
|
|
138
|
-
mode = "w"
|
|
139
|
-
else:
|
|
140
|
-
raise TypeError(
|
|
141
|
-
"Data type unknown. Unable to put in Google cloud storage!"
|
|
142
|
-
)
|
|
134
|
+
data, mode = self._prepare_put_data(data, append)
|
|
143
135
|
with self.filesystem.open(path, mode) as f:
|
|
144
136
|
f.write(data)
|
|
145
137
|
|
mlrun/datastore/redis.py
CHANGED
mlrun/datastore/s3.py
CHANGED
|
@@ -183,6 +183,7 @@ class S3Store(DataStore):
|
|
|
183
183
|
return obj.get()["Body"].read()
|
|
184
184
|
|
|
185
185
|
def put(self, key, data, append=False):
|
|
186
|
+
data, _ = self._prepare_put_data(data, append)
|
|
186
187
|
bucket, key = self.get_bucket_and_key(key)
|
|
187
188
|
self.s3.Object(bucket, key).put(Body=data)
|
|
188
189
|
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# Copyright 2024 Iguazio
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
import storey
|
|
15
|
+
from mergedeep import merge
|
|
16
|
+
from storey import V3ioDriver
|
|
17
|
+
|
|
18
|
+
import mlrun
|
|
19
|
+
import mlrun.model_monitoring.helpers
|
|
20
|
+
from mlrun.datastore.base import DataStore
|
|
21
|
+
|
|
22
|
+
from .utils import (
|
|
23
|
+
parse_kafka_url,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
"""
|
|
27
|
+
Storey targets expect storage_options, which may contain credentials.
|
|
28
|
+
To avoid passing it openly within the graph, we use wrapper classes.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def get_url_and_storage_options(path, external_storage_options=None):
|
|
33
|
+
store, resolved_store_path, url = mlrun.store_manager.get_or_create_store(path)
|
|
34
|
+
storage_options = store.get_storage_options()
|
|
35
|
+
if storage_options and external_storage_options:
|
|
36
|
+
# merge external storage options with the store's storage options. storage_options takes precedence
|
|
37
|
+
storage_options = merge(external_storage_options, storage_options)
|
|
38
|
+
else:
|
|
39
|
+
storage_options = storage_options or external_storage_options
|
|
40
|
+
return url, DataStore._sanitize_storage_options(storage_options)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class TDEngineStoreyTarget(storey.TDEngineTarget):
|
|
44
|
+
def __init__(self, *args, **kwargs):
|
|
45
|
+
kwargs["url"] = mlrun.model_monitoring.helpers.get_tsdb_connection_string()
|
|
46
|
+
super().__init__(*args, **kwargs)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class StoreyTargetUtils:
|
|
50
|
+
@staticmethod
|
|
51
|
+
def process_args_and_kwargs(args, kwargs):
|
|
52
|
+
args = list(args)
|
|
53
|
+
path = args[0] if args else kwargs.get("path")
|
|
54
|
+
external_storage_options = kwargs.get("storage_options")
|
|
55
|
+
|
|
56
|
+
url, storage_options = get_url_and_storage_options(
|
|
57
|
+
path, external_storage_options
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
if storage_options:
|
|
61
|
+
kwargs["storage_options"] = storage_options
|
|
62
|
+
if args:
|
|
63
|
+
args[0] = url
|
|
64
|
+
if "path" in kwargs:
|
|
65
|
+
kwargs["path"] = url
|
|
66
|
+
return args, kwargs
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class ParquetStoreyTarget(storey.ParquetTarget):
|
|
70
|
+
def __init__(self, *args, **kwargs):
|
|
71
|
+
args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
|
|
72
|
+
super().__init__(*args, **kwargs)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class CSVStoreyTarget(storey.CSVTarget):
|
|
76
|
+
def __init__(self, *args, **kwargs):
|
|
77
|
+
args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
|
|
78
|
+
super().__init__(*args, **kwargs)
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
class StreamStoreyTarget(storey.StreamTarget):
|
|
82
|
+
def __init__(self, *args, **kwargs):
|
|
83
|
+
args = list(args)
|
|
84
|
+
|
|
85
|
+
path = args[0] if args else kwargs.get("stream_path")
|
|
86
|
+
endpoint, storage_options = get_url_and_storage_options(path)
|
|
87
|
+
|
|
88
|
+
if not path:
|
|
89
|
+
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
90
|
+
|
|
91
|
+
access_key = storage_options.get("v3io_access_key")
|
|
92
|
+
storage = (
|
|
93
|
+
V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key),
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if storage_options:
|
|
97
|
+
kwargs["storage"] = storage
|
|
98
|
+
if args:
|
|
99
|
+
args[0] = endpoint
|
|
100
|
+
if "stream_path" in kwargs:
|
|
101
|
+
kwargs["stream_path"] = endpoint
|
|
102
|
+
|
|
103
|
+
super().__init__(*args, **kwargs)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class KafkaStoreyTarget(storey.KafkaTarget):
|
|
107
|
+
def __init__(self, *args, **kwargs):
|
|
108
|
+
path = kwargs.pop("path")
|
|
109
|
+
attributes = kwargs.pop("attributes", None)
|
|
110
|
+
if path and path.startswith("ds://"):
|
|
111
|
+
datastore_profile = (
|
|
112
|
+
mlrun.datastore.datastore_profile.datastore_profile_read(path)
|
|
113
|
+
)
|
|
114
|
+
attributes = merge(attributes, datastore_profile.attributes())
|
|
115
|
+
brokers = attributes.pop(
|
|
116
|
+
"brokers", attributes.pop("bootstrap_servers", None)
|
|
117
|
+
)
|
|
118
|
+
topic = datastore_profile.topic
|
|
119
|
+
else:
|
|
120
|
+
brokers = attributes.pop(
|
|
121
|
+
"brokers", attributes.pop("bootstrap_servers", None)
|
|
122
|
+
)
|
|
123
|
+
topic, brokers = parse_kafka_url(path, brokers)
|
|
124
|
+
|
|
125
|
+
if not topic:
|
|
126
|
+
raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
|
|
127
|
+
kwargs["brokers"] = brokers
|
|
128
|
+
kwargs["topic"] = topic
|
|
129
|
+
super().__init__(*args, **kwargs, **attributes)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class NoSqlStoreyTarget(storey.NoSqlTarget):
|
|
133
|
+
pass
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
class RedisNoSqlStoreyTarget(storey.NoSqlTarget):
|
|
137
|
+
def __init__(self, *args, **kwargs):
|
|
138
|
+
path = kwargs.pop("path")
|
|
139
|
+
endpoint, uri = mlrun.datastore.targets.RedisNoSqlTarget.get_server_endpoint(
|
|
140
|
+
path
|
|
141
|
+
)
|
|
142
|
+
kwargs["path"] = endpoint + "/" + uri
|
|
143
|
+
super().__init__(*args, **kwargs)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class TSDBStoreyTarget(storey.TSDBTarget):
|
|
147
|
+
pass
|
mlrun/datastore/targets.py
CHANGED
|
@@ -47,7 +47,6 @@ from .spark_utils import spark_session_update_hadoop_options
|
|
|
47
47
|
from .utils import (
|
|
48
48
|
_generate_sql_query_with_time_filter,
|
|
49
49
|
filter_df_start_end_time,
|
|
50
|
-
parse_kafka_url,
|
|
51
50
|
select_columns_from_df,
|
|
52
51
|
)
|
|
53
52
|
|
|
@@ -928,8 +927,9 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
928
927
|
if time_unit == time_partitioning_granularity:
|
|
929
928
|
break
|
|
930
929
|
|
|
930
|
+
target_path = self.get_target_path()
|
|
931
931
|
if not self.partitioned and not mlrun.utils.helpers.is_parquet_file(
|
|
932
|
-
|
|
932
|
+
target_path
|
|
933
933
|
):
|
|
934
934
|
partition_cols = []
|
|
935
935
|
|
|
@@ -937,25 +937,16 @@ class ParquetTarget(BaseStoreTarget):
|
|
|
937
937
|
for key_column in key_columns:
|
|
938
938
|
tuple_key_columns.append((key_column.name, key_column.value_type))
|
|
939
939
|
|
|
940
|
-
store, path_in_store, target_path = self._get_store_and_path()
|
|
941
|
-
|
|
942
|
-
storage_options = store.get_storage_options()
|
|
943
|
-
if storage_options and self.storage_options:
|
|
944
|
-
storage_options = merge(storage_options, self.storage_options)
|
|
945
|
-
else:
|
|
946
|
-
storage_options = storage_options or self.storage_options
|
|
947
|
-
|
|
948
940
|
step = graph.add_step(
|
|
949
941
|
name=self.name or "ParquetTarget",
|
|
950
942
|
after=after,
|
|
951
943
|
graph_shape="cylinder",
|
|
952
|
-
class_name="
|
|
944
|
+
class_name="mlrun.datastore.storeytargets.ParquetStoreyTarget",
|
|
953
945
|
path=target_path,
|
|
954
946
|
columns=column_list,
|
|
955
947
|
index_cols=tuple_key_columns,
|
|
956
948
|
partition_cols=partition_cols,
|
|
957
949
|
time_field=timestamp_key,
|
|
958
|
-
storage_options=storage_options,
|
|
959
950
|
max_events=self.max_events,
|
|
960
951
|
flush_after_seconds=self.flush_after_seconds,
|
|
961
952
|
update_last_written=featureset_status.update_last_written_for_target,
|
|
@@ -1110,17 +1101,16 @@ class CSVTarget(BaseStoreTarget):
|
|
|
1110
1101
|
column_list = self._get_column_list(
|
|
1111
1102
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1112
1103
|
)
|
|
1113
|
-
|
|
1104
|
+
target_path = self.get_target_path()
|
|
1114
1105
|
graph.add_step(
|
|
1115
1106
|
name=self.name or "CSVTarget",
|
|
1116
1107
|
after=after,
|
|
1117
1108
|
graph_shape="cylinder",
|
|
1118
|
-
class_name="
|
|
1109
|
+
class_name="mlrun.datastore.storeytargets.CSVStoreyTarget",
|
|
1119
1110
|
path=target_path,
|
|
1120
1111
|
columns=column_list,
|
|
1121
1112
|
header=True,
|
|
1122
1113
|
index_cols=key_columns,
|
|
1123
|
-
storage_options=store.get_storage_options(),
|
|
1124
1114
|
**self.attributes,
|
|
1125
1115
|
)
|
|
1126
1116
|
|
|
@@ -1334,6 +1324,19 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1334
1324
|
timestamp_key=None,
|
|
1335
1325
|
featureset_status=None,
|
|
1336
1326
|
):
|
|
1327
|
+
table, column_list = self._get_table_and_columns(features, key_columns)
|
|
1328
|
+
|
|
1329
|
+
graph.add_step(
|
|
1330
|
+
name=self.name or self.writer_step_name,
|
|
1331
|
+
after=after,
|
|
1332
|
+
graph_shape="cylinder",
|
|
1333
|
+
class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
|
|
1334
|
+
columns=column_list,
|
|
1335
|
+
table=table,
|
|
1336
|
+
**self.attributes,
|
|
1337
|
+
)
|
|
1338
|
+
|
|
1339
|
+
def _get_table_and_columns(self, features, key_columns):
|
|
1337
1340
|
key_columns = list(key_columns.keys())
|
|
1338
1341
|
table = self._resource.uri
|
|
1339
1342
|
column_list = self._get_column_list(
|
|
@@ -1352,15 +1355,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
|
|
|
1352
1355
|
col for col in column_list if col[0] not in aggregate_features
|
|
1353
1356
|
]
|
|
1354
1357
|
|
|
1355
|
-
|
|
1356
|
-
name=self.name or self.writer_step_name,
|
|
1357
|
-
after=after,
|
|
1358
|
-
graph_shape="cylinder",
|
|
1359
|
-
class_name="storey.NoSqlTarget",
|
|
1360
|
-
columns=column_list,
|
|
1361
|
-
table=table,
|
|
1362
|
-
**self.attributes,
|
|
1363
|
-
)
|
|
1358
|
+
return table, column_list
|
|
1364
1359
|
|
|
1365
1360
|
def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
|
|
1366
1361
|
raise NotImplementedError()
|
|
@@ -1483,11 +1478,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1483
1478
|
support_spark = True
|
|
1484
1479
|
writer_step_name = "RedisNoSqlTarget"
|
|
1485
1480
|
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
def _get_server_endpoint(self):
|
|
1490
|
-
endpoint, uri = parse_path(self.get_target_path())
|
|
1481
|
+
@staticmethod
|
|
1482
|
+
def get_server_endpoint(path):
|
|
1483
|
+
endpoint, uri = parse_path(path)
|
|
1491
1484
|
endpoint = endpoint or mlrun.mlconf.redis.url
|
|
1492
1485
|
if endpoint.startswith("ds://"):
|
|
1493
1486
|
datastore_profile = datastore_profile_read(endpoint)
|
|
@@ -1504,8 +1497,13 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1504
1497
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1505
1498
|
"Provide Redis username and password only via secrets"
|
|
1506
1499
|
)
|
|
1507
|
-
|
|
1508
|
-
|
|
1500
|
+
credentials_prefix = mlrun.get_secret_or_env(key="CREDENTIALS_PREFIX")
|
|
1501
|
+
user = mlrun.get_secret_or_env(
|
|
1502
|
+
"REDIS_USER", default="", prefix=credentials_prefix
|
|
1503
|
+
)
|
|
1504
|
+
password = mlrun.get_secret_or_env(
|
|
1505
|
+
"REDIS_PASSWORD", default="", prefix=credentials_prefix
|
|
1506
|
+
)
|
|
1509
1507
|
host = parsed_endpoint.hostname
|
|
1510
1508
|
port = parsed_endpoint.port if parsed_endpoint.port else "6379"
|
|
1511
1509
|
scheme = parsed_endpoint.scheme
|
|
@@ -1519,7 +1517,7 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1519
1517
|
from storey import Table
|
|
1520
1518
|
from storey.redis_driver import RedisDriver
|
|
1521
1519
|
|
|
1522
|
-
endpoint, uri = self.
|
|
1520
|
+
endpoint, uri = self.get_server_endpoint(self.get_target_path())
|
|
1523
1521
|
|
|
1524
1522
|
return Table(
|
|
1525
1523
|
uri,
|
|
@@ -1528,7 +1526,7 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1528
1526
|
)
|
|
1529
1527
|
|
|
1530
1528
|
def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
|
|
1531
|
-
endpoint, uri = self.
|
|
1529
|
+
endpoint, uri = self.get_server_endpoint(self.get_target_path())
|
|
1532
1530
|
parsed_endpoint = urlparse(endpoint)
|
|
1533
1531
|
store, path_in_store, path = self._get_store_and_path()
|
|
1534
1532
|
return {
|
|
@@ -1560,6 +1558,28 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
|
|
|
1560
1558
|
|
|
1561
1559
|
return df
|
|
1562
1560
|
|
|
1561
|
+
def add_writer_step(
|
|
1562
|
+
self,
|
|
1563
|
+
graph,
|
|
1564
|
+
after,
|
|
1565
|
+
features,
|
|
1566
|
+
key_columns=None,
|
|
1567
|
+
timestamp_key=None,
|
|
1568
|
+
featureset_status=None,
|
|
1569
|
+
):
|
|
1570
|
+
table, column_list = self._get_table_and_columns(features, key_columns)
|
|
1571
|
+
|
|
1572
|
+
graph.add_step(
|
|
1573
|
+
path=self.get_target_path(),
|
|
1574
|
+
name=self.name or self.writer_step_name,
|
|
1575
|
+
after=after,
|
|
1576
|
+
graph_shape="cylinder",
|
|
1577
|
+
class_name="mlrun.datastore.storeytargets.RedisNoSqlStoreyTarget",
|
|
1578
|
+
columns=column_list,
|
|
1579
|
+
table=table,
|
|
1580
|
+
**self.attributes,
|
|
1581
|
+
)
|
|
1582
|
+
|
|
1563
1583
|
|
|
1564
1584
|
class StreamTarget(BaseStoreTarget):
|
|
1565
1585
|
kind = TargetTypes.stream
|
|
@@ -1578,29 +1598,22 @@ class StreamTarget(BaseStoreTarget):
|
|
|
1578
1598
|
timestamp_key=None,
|
|
1579
1599
|
featureset_status=None,
|
|
1580
1600
|
):
|
|
1581
|
-
from storey import V3ioDriver
|
|
1582
|
-
|
|
1583
1601
|
key_columns = list(key_columns.keys())
|
|
1584
|
-
|
|
1585
|
-
if not path:
|
|
1586
|
-
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
1587
|
-
endpoint, uri = parse_path(path)
|
|
1588
|
-
storage_options = store.get_storage_options()
|
|
1589
|
-
access_key = storage_options.get("v3io_access_key")
|
|
1602
|
+
|
|
1590
1603
|
column_list = self._get_column_list(
|
|
1591
1604
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1592
1605
|
)
|
|
1606
|
+
stream_path = self.get_target_path()
|
|
1607
|
+
if not stream_path:
|
|
1608
|
+
raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
|
|
1593
1609
|
|
|
1594
1610
|
graph.add_step(
|
|
1595
1611
|
name=self.name or "StreamTarget",
|
|
1596
1612
|
after=after,
|
|
1597
1613
|
graph_shape="cylinder",
|
|
1598
|
-
class_name="
|
|
1614
|
+
class_name="mlrun.datastore.storeytargets.StreamStoreyTarget",
|
|
1599
1615
|
columns=column_list,
|
|
1600
|
-
|
|
1601
|
-
webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
|
|
1602
|
-
),
|
|
1603
|
-
stream_path=uri,
|
|
1616
|
+
stream_path=stream_path,
|
|
1604
1617
|
**self.attributes,
|
|
1605
1618
|
)
|
|
1606
1619
|
|
|
@@ -1676,34 +1689,19 @@ class KafkaTarget(BaseStoreTarget):
|
|
|
1676
1689
|
column_list = self._get_column_list(
|
|
1677
1690
|
features=features, timestamp_key=timestamp_key, key_columns=key_columns
|
|
1678
1691
|
)
|
|
1679
|
-
|
|
1680
|
-
datastore_profile = datastore_profile_read(self.path)
|
|
1681
|
-
attributes = datastore_profile.attributes()
|
|
1682
|
-
brokers = attributes.pop(
|
|
1683
|
-
"brokers", attributes.pop("bootstrap_servers", None)
|
|
1684
|
-
)
|
|
1685
|
-
topic = datastore_profile.topic
|
|
1686
|
-
else:
|
|
1687
|
-
attributes = copy(self.attributes)
|
|
1688
|
-
brokers = attributes.pop(
|
|
1689
|
-
"brokers", attributes.pop("bootstrap_servers", None)
|
|
1690
|
-
)
|
|
1691
|
-
topic, brokers = parse_kafka_url(self.get_target_path(), brokers)
|
|
1692
|
+
path = self.get_target_path()
|
|
1692
1693
|
|
|
1693
|
-
if not
|
|
1694
|
-
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
1695
|
-
"KafkaTarget requires a path (topic)"
|
|
1696
|
-
)
|
|
1694
|
+
if not path:
|
|
1695
|
+
raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a path")
|
|
1697
1696
|
|
|
1698
1697
|
graph.add_step(
|
|
1699
1698
|
name=self.name or "KafkaTarget",
|
|
1700
1699
|
after=after,
|
|
1701
1700
|
graph_shape="cylinder",
|
|
1702
|
-
class_name="
|
|
1701
|
+
class_name="mlrun.datastore.storeytargets.KafkaStoreyTarget",
|
|
1703
1702
|
columns=column_list,
|
|
1704
|
-
|
|
1705
|
-
|
|
1706
|
-
**attributes,
|
|
1703
|
+
path=path,
|
|
1704
|
+
attributes=self.attributes,
|
|
1707
1705
|
)
|
|
1708
1706
|
|
|
1709
1707
|
def purge(self):
|
|
@@ -1740,7 +1738,7 @@ class TSDBTarget(BaseStoreTarget):
|
|
|
1740
1738
|
|
|
1741
1739
|
graph.add_step(
|
|
1742
1740
|
name=self.name or "TSDBTarget",
|
|
1743
|
-
class_name="
|
|
1741
|
+
class_name="mlrun.datastore.storeytargets.TSDBStoreyTarget",
|
|
1744
1742
|
after=after,
|
|
1745
1743
|
graph_shape="cylinder",
|
|
1746
1744
|
path=uri,
|
|
@@ -2029,7 +2027,7 @@ class SQLTarget(BaseStoreTarget):
|
|
|
2029
2027
|
name=self.name or "SqlTarget",
|
|
2030
2028
|
after=after,
|
|
2031
2029
|
graph_shape="cylinder",
|
|
2032
|
-
class_name="
|
|
2030
|
+
class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
|
|
2033
2031
|
columns=column_list,
|
|
2034
2032
|
header=True,
|
|
2035
2033
|
table=table,
|
mlrun/datastore/v3io.py
CHANGED
|
@@ -140,6 +140,7 @@ class V3ioStore(DataStore):
|
|
|
140
140
|
max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
|
|
141
141
|
):
|
|
142
142
|
"""helper function for put method, allows for controlling max_chunk_size in testing"""
|
|
143
|
+
data, _ = self._prepare_put_data(data, append)
|
|
143
144
|
container, path = split_path(self._join(key))
|
|
144
145
|
buffer_size = len(data) # in bytes
|
|
145
146
|
buffer_offset = 0
|
mlrun/model_monitoring/api.py
CHANGED
|
@@ -147,8 +147,7 @@ def record_results(
|
|
|
147
147
|
on the provided `endpoint_id`.
|
|
148
148
|
:param function_name: If a new model endpoint is created, use this function name for generating the
|
|
149
149
|
function URI.
|
|
150
|
-
:param context: MLRun context. Note that the context is required
|
|
151
|
-
following the batch drift job.
|
|
150
|
+
:param context: MLRun context. Note that the context is required generating the model endpoint.
|
|
152
151
|
:param infer_results_df: DataFrame that will be stored under the model endpoint parquet target. Will be
|
|
153
152
|
used for doing the drift analysis. Please make sure that the dataframe includes
|
|
154
153
|
both feature names and label columns.
|