mlrun 1.7.0rc39__py3-none-any.whl → 1.7.0rc41__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (44) hide show
  1. mlrun/common/constants.py +3 -0
  2. mlrun/common/helpers.py +0 -1
  3. mlrun/common/schemas/model_monitoring/model_endpoints.py +0 -1
  4. mlrun/config.py +1 -1
  5. mlrun/data_types/to_pandas.py +9 -9
  6. mlrun/datastore/alibaba_oss.py +1 -0
  7. mlrun/datastore/azure_blob.py +1 -6
  8. mlrun/datastore/base.py +12 -0
  9. mlrun/datastore/dbfs_store.py +1 -5
  10. mlrun/datastore/filestore.py +1 -3
  11. mlrun/datastore/google_cloud_storage.py +1 -9
  12. mlrun/datastore/redis.py +1 -0
  13. mlrun/datastore/s3.py +1 -0
  14. mlrun/datastore/storeytargets.py +147 -0
  15. mlrun/datastore/targets.py +67 -69
  16. mlrun/datastore/v3io.py +1 -0
  17. mlrun/model_monitoring/api.py +1 -2
  18. mlrun/model_monitoring/applications/_application_steps.py +25 -43
  19. mlrun/model_monitoring/applications/context.py +206 -70
  20. mlrun/model_monitoring/controller.py +0 -1
  21. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +17 -8
  22. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +14 -4
  23. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +11 -3
  24. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +35 -23
  25. mlrun/model_monitoring/helpers.py +38 -1
  26. mlrun/model_monitoring/stream_processing.py +8 -26
  27. mlrun/projects/project.py +17 -16
  28. mlrun/runtimes/nuclio/api_gateway.py +9 -0
  29. mlrun/runtimes/nuclio/application/application.py +131 -55
  30. mlrun/runtimes/nuclio/function.py +4 -10
  31. mlrun/runtimes/nuclio/serving.py +2 -2
  32. mlrun/runtimes/utils.py +16 -0
  33. mlrun/serving/routers.py +1 -1
  34. mlrun/serving/server.py +19 -5
  35. mlrun/serving/states.py +8 -0
  36. mlrun/serving/v2_serving.py +34 -26
  37. mlrun/utils/helpers.py +12 -2
  38. mlrun/utils/version/version.json +2 -2
  39. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/METADATA +2 -2
  40. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/RECORD +44 -43
  41. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/WHEEL +1 -1
  42. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/LICENSE +0 -0
  43. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/entry_points.txt +0 -0
  44. {mlrun-1.7.0rc39.dist-info → mlrun-1.7.0rc41.dist-info}/top_level.txt +0 -0
mlrun/common/constants.py CHANGED
@@ -65,6 +65,9 @@ class MLRunInternalLabels:
65
65
  task_name = f"{MLRUN_LABEL_PREFIX}task-name"
66
66
  resource_name = f"{MLRUN_LABEL_PREFIX}resource_name"
67
67
  created = f"{MLRUN_LABEL_PREFIX}created"
68
+ producer_type = f"{MLRUN_LABEL_PREFIX}producer-type"
69
+ app_name = f"{MLRUN_LABEL_PREFIX}app-name"
70
+ endpoint_id = f"{MLRUN_LABEL_PREFIX}endpoint-id"
68
71
  host = "host"
69
72
  job_type = "job-type"
70
73
  kind = "kind"
mlrun/common/helpers.py CHANGED
@@ -11,7 +11,6 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
- #
15
14
 
16
15
 
17
16
  def parse_versioned_object_uri(
@@ -21,7 +21,6 @@ from typing import Any, NamedTuple, Optional
21
21
  from pydantic import BaseModel, Field, validator
22
22
  from pydantic.main import Extra
23
23
 
24
- import mlrun.common.model_monitoring
25
24
  import mlrun.common.types
26
25
 
27
26
  from ..object import ObjectKind, ObjectSpec, ObjectStatus
mlrun/config.py CHANGED
@@ -863,7 +863,7 @@ class Config:
863
863
  f"Unable to decode {attribute_path}"
864
864
  )
865
865
  parsed_attribute_value = json.loads(decoded_attribute_value)
866
- if type(parsed_attribute_value) != expected_type:
866
+ if not isinstance(parsed_attribute_value, expected_type):
867
867
  raise mlrun.errors.MLRunInvalidArgumentTypeError(
868
868
  f"Expected type {expected_type}, got {type(parsed_attribute_value)}"
869
869
  )
@@ -21,7 +21,7 @@ import semver
21
21
 
22
22
  def _toPandas(spark_df):
23
23
  """
24
- Modified version of spark DataFrame.toPandas()
24
+ Modified version of spark DataFrame.toPandas() -
25
25
  https://github.com/apache/spark/blob/v3.2.3/python/pyspark/sql/pandas/conversion.py#L35
26
26
 
27
27
  The original code (which is only replaced in pyspark 3.5.0) fails with Pandas 2 installed, with the following error:
@@ -223,21 +223,21 @@ def _to_corrected_pandas_type(dt):
223
223
  TimestampType,
224
224
  )
225
225
 
226
- if type(dt) == ByteType:
226
+ if isinstance(dt, ByteType):
227
227
  return np.int8
228
- elif type(dt) == ShortType:
228
+ elif isinstance(dt, ShortType):
229
229
  return np.int16
230
- elif type(dt) == IntegerType:
230
+ elif isinstance(dt, IntegerType):
231
231
  return np.int32
232
- elif type(dt) == LongType:
232
+ elif isinstance(dt, LongType):
233
233
  return np.int64
234
- elif type(dt) == FloatType:
234
+ elif isinstance(dt, FloatType):
235
235
  return np.float32
236
- elif type(dt) == DoubleType:
236
+ elif isinstance(dt, DoubleType):
237
237
  return np.float64
238
- elif type(dt) == BooleanType:
238
+ elif isinstance(dt, BooleanType):
239
239
  return bool
240
- elif type(dt) == TimestampType:
240
+ elif isinstance(dt, TimestampType):
241
241
  return "datetime64[ns]"
242
242
  else:
243
243
  return None
@@ -85,6 +85,7 @@ class OSSStore(DataStore):
85
85
  return oss.get_object(key).read()
86
86
 
87
87
  def put(self, key, data, append=False):
88
+ data, _ = self._prepare_put_data(data, append)
88
89
  bucket, key = self.get_bucket_and_key(key)
89
90
  oss = oss2.Bucket(self.auth, self.endpoint_url, bucket)
90
91
  oss.put_object(key, data)
@@ -189,12 +189,7 @@ class AzureBlobStore(DataStore):
189
189
  "Append mode not supported for Azure blob datastore"
190
190
  )
191
191
  remote_path = self._convert_key_to_remote_path(key)
192
- if isinstance(data, bytes):
193
- mode = "wb"
194
- elif isinstance(data, str):
195
- mode = "w"
196
- else:
197
- raise TypeError("Data type unknown. Unable to put in Azure!")
192
+ data, mode = self._prepare_put_data(data, append)
198
193
  with self.filesystem.open(remote_path, mode) as f:
199
194
  f.write(data)
200
195
 
mlrun/datastore/base.py CHANGED
@@ -157,6 +157,18 @@ class DataStore:
157
157
  def put(self, key, data, append=False):
158
158
  pass
159
159
 
160
+ def _prepare_put_data(self, data, append=False):
161
+ mode = "a" if append else "w"
162
+ if isinstance(data, bytearray):
163
+ data = bytes(data)
164
+
165
+ if isinstance(data, bytes):
166
+ return data, f"{mode}b"
167
+ elif isinstance(data, str):
168
+ return data, mode
169
+ else:
170
+ raise TypeError(f"Unable to put a value of type {type(self).__name__}")
171
+
160
172
  def stat(self, key):
161
173
  pass
162
174
 
@@ -130,11 +130,7 @@ class DBFSStore(DataStore):
130
130
  "Append mode not supported for Databricks file system"
131
131
  )
132
132
  # can not use append mode because it overrides data.
133
- mode = "w"
134
- if isinstance(data, bytes):
135
- mode += "b"
136
- elif not isinstance(data, str):
137
- raise TypeError(f"Unknown data type {type(data)}")
133
+ data, mode = self._prepare_put_data(data, append)
138
134
  with self.filesystem.open(key, mode) as f:
139
135
  f.write(data)
140
136
 
@@ -66,9 +66,7 @@ class FileStore(DataStore):
66
66
  dir_to_create = path.dirname(self._join(key))
67
67
  if dir_to_create:
68
68
  self._ensure_directory(dir_to_create)
69
- mode = "a" if append else "w"
70
- if isinstance(data, bytes):
71
- mode = mode + "b"
69
+ data, mode = self._prepare_put_data(data, append)
72
70
  with open(self._join(key), mode) as fp:
73
71
  fp.write(data)
74
72
  fp.close()
@@ -131,15 +131,7 @@ class GoogleCloudStorageStore(DataStore):
131
131
  raise mlrun.errors.MLRunInvalidArgumentError(
132
132
  "Append mode not supported for Google cloud storage datastore"
133
133
  )
134
-
135
- if isinstance(data, bytes):
136
- mode = "wb"
137
- elif isinstance(data, str):
138
- mode = "w"
139
- else:
140
- raise TypeError(
141
- "Data type unknown. Unable to put in Google cloud storage!"
142
- )
134
+ data, mode = self._prepare_put_data(data, append)
143
135
  with self.filesystem.open(path, mode) as f:
144
136
  f.write(data)
145
137
 
mlrun/datastore/redis.py CHANGED
@@ -126,6 +126,7 @@ class RedisStore(DataStore):
126
126
 
127
127
  def put(self, key, data, append=False):
128
128
  key = RedisStore.build_redis_key(key)
129
+ data, _ = self._prepare_put_data(data, append)
129
130
  if append:
130
131
  self.redis.append(key, data)
131
132
  else:
mlrun/datastore/s3.py CHANGED
@@ -183,6 +183,7 @@ class S3Store(DataStore):
183
183
  return obj.get()["Body"].read()
184
184
 
185
185
  def put(self, key, data, append=False):
186
+ data, _ = self._prepare_put_data(data, append)
186
187
  bucket, key = self.get_bucket_and_key(key)
187
188
  self.s3.Object(bucket, key).put(Body=data)
188
189
 
@@ -0,0 +1,147 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import storey
15
+ from mergedeep import merge
16
+ from storey import V3ioDriver
17
+
18
+ import mlrun
19
+ import mlrun.model_monitoring.helpers
20
+ from mlrun.datastore.base import DataStore
21
+
22
+ from .utils import (
23
+ parse_kafka_url,
24
+ )
25
+
26
+ """
27
+ Storey targets expect storage_options, which may contain credentials.
28
+ To avoid passing it openly within the graph, we use wrapper classes.
29
+ """
30
+
31
+
32
+ def get_url_and_storage_options(path, external_storage_options=None):
33
+ store, resolved_store_path, url = mlrun.store_manager.get_or_create_store(path)
34
+ storage_options = store.get_storage_options()
35
+ if storage_options and external_storage_options:
36
+ # merge external storage options with the store's storage options. storage_options takes precedence
37
+ storage_options = merge(external_storage_options, storage_options)
38
+ else:
39
+ storage_options = storage_options or external_storage_options
40
+ return url, DataStore._sanitize_storage_options(storage_options)
41
+
42
+
43
+ class TDEngineStoreyTarget(storey.TDEngineTarget):
44
+ def __init__(self, *args, **kwargs):
45
+ kwargs["url"] = mlrun.model_monitoring.helpers.get_tsdb_connection_string()
46
+ super().__init__(*args, **kwargs)
47
+
48
+
49
+ class StoreyTargetUtils:
50
+ @staticmethod
51
+ def process_args_and_kwargs(args, kwargs):
52
+ args = list(args)
53
+ path = args[0] if args else kwargs.get("path")
54
+ external_storage_options = kwargs.get("storage_options")
55
+
56
+ url, storage_options = get_url_and_storage_options(
57
+ path, external_storage_options
58
+ )
59
+
60
+ if storage_options:
61
+ kwargs["storage_options"] = storage_options
62
+ if args:
63
+ args[0] = url
64
+ if "path" in kwargs:
65
+ kwargs["path"] = url
66
+ return args, kwargs
67
+
68
+
69
+ class ParquetStoreyTarget(storey.ParquetTarget):
70
+ def __init__(self, *args, **kwargs):
71
+ args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
72
+ super().__init__(*args, **kwargs)
73
+
74
+
75
+ class CSVStoreyTarget(storey.CSVTarget):
76
+ def __init__(self, *args, **kwargs):
77
+ args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
78
+ super().__init__(*args, **kwargs)
79
+
80
+
81
+ class StreamStoreyTarget(storey.StreamTarget):
82
+ def __init__(self, *args, **kwargs):
83
+ args = list(args)
84
+
85
+ path = args[0] if args else kwargs.get("stream_path")
86
+ endpoint, storage_options = get_url_and_storage_options(path)
87
+
88
+ if not path:
89
+ raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
90
+
91
+ access_key = storage_options.get("v3io_access_key")
92
+ storage = (
93
+ V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key),
94
+ )
95
+
96
+ if storage_options:
97
+ kwargs["storage"] = storage
98
+ if args:
99
+ args[0] = endpoint
100
+ if "stream_path" in kwargs:
101
+ kwargs["stream_path"] = endpoint
102
+
103
+ super().__init__(*args, **kwargs)
104
+
105
+
106
+ class KafkaStoreyTarget(storey.KafkaTarget):
107
+ def __init__(self, *args, **kwargs):
108
+ path = kwargs.pop("path")
109
+ attributes = kwargs.pop("attributes", None)
110
+ if path and path.startswith("ds://"):
111
+ datastore_profile = (
112
+ mlrun.datastore.datastore_profile.datastore_profile_read(path)
113
+ )
114
+ attributes = merge(attributes, datastore_profile.attributes())
115
+ brokers = attributes.pop(
116
+ "brokers", attributes.pop("bootstrap_servers", None)
117
+ )
118
+ topic = datastore_profile.topic
119
+ else:
120
+ brokers = attributes.pop(
121
+ "brokers", attributes.pop("bootstrap_servers", None)
122
+ )
123
+ topic, brokers = parse_kafka_url(path, brokers)
124
+
125
+ if not topic:
126
+ raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
127
+ kwargs["brokers"] = brokers
128
+ kwargs["topic"] = topic
129
+ super().__init__(*args, **kwargs, **attributes)
130
+
131
+
132
+ class NoSqlStoreyTarget(storey.NoSqlTarget):
133
+ pass
134
+
135
+
136
+ class RedisNoSqlStoreyTarget(storey.NoSqlTarget):
137
+ def __init__(self, *args, **kwargs):
138
+ path = kwargs.pop("path")
139
+ endpoint, uri = mlrun.datastore.targets.RedisNoSqlTarget.get_server_endpoint(
140
+ path
141
+ )
142
+ kwargs["path"] = endpoint + "/" + uri
143
+ super().__init__(*args, **kwargs)
144
+
145
+
146
+ class TSDBStoreyTarget(storey.TSDBTarget):
147
+ pass
@@ -47,7 +47,6 @@ from .spark_utils import spark_session_update_hadoop_options
47
47
  from .utils import (
48
48
  _generate_sql_query_with_time_filter,
49
49
  filter_df_start_end_time,
50
- parse_kafka_url,
51
50
  select_columns_from_df,
52
51
  )
53
52
 
@@ -928,8 +927,9 @@ class ParquetTarget(BaseStoreTarget):
928
927
  if time_unit == time_partitioning_granularity:
929
928
  break
930
929
 
930
+ target_path = self.get_target_path()
931
931
  if not self.partitioned and not mlrun.utils.helpers.is_parquet_file(
932
- self.get_target_path()
932
+ target_path
933
933
  ):
934
934
  partition_cols = []
935
935
 
@@ -937,25 +937,16 @@ class ParquetTarget(BaseStoreTarget):
937
937
  for key_column in key_columns:
938
938
  tuple_key_columns.append((key_column.name, key_column.value_type))
939
939
 
940
- store, path_in_store, target_path = self._get_store_and_path()
941
-
942
- storage_options = store.get_storage_options()
943
- if storage_options and self.storage_options:
944
- storage_options = merge(storage_options, self.storage_options)
945
- else:
946
- storage_options = storage_options or self.storage_options
947
-
948
940
  step = graph.add_step(
949
941
  name=self.name or "ParquetTarget",
950
942
  after=after,
951
943
  graph_shape="cylinder",
952
- class_name="storey.ParquetTarget",
944
+ class_name="mlrun.datastore.storeytargets.ParquetStoreyTarget",
953
945
  path=target_path,
954
946
  columns=column_list,
955
947
  index_cols=tuple_key_columns,
956
948
  partition_cols=partition_cols,
957
949
  time_field=timestamp_key,
958
- storage_options=storage_options,
959
950
  max_events=self.max_events,
960
951
  flush_after_seconds=self.flush_after_seconds,
961
952
  update_last_written=featureset_status.update_last_written_for_target,
@@ -1110,17 +1101,16 @@ class CSVTarget(BaseStoreTarget):
1110
1101
  column_list = self._get_column_list(
1111
1102
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1112
1103
  )
1113
- store, path_in_store, target_path = self._get_store_and_path()
1104
+ target_path = self.get_target_path()
1114
1105
  graph.add_step(
1115
1106
  name=self.name or "CSVTarget",
1116
1107
  after=after,
1117
1108
  graph_shape="cylinder",
1118
- class_name="storey.CSVTarget",
1109
+ class_name="mlrun.datastore.storeytargets.CSVStoreyTarget",
1119
1110
  path=target_path,
1120
1111
  columns=column_list,
1121
1112
  header=True,
1122
1113
  index_cols=key_columns,
1123
- storage_options=store.get_storage_options(),
1124
1114
  **self.attributes,
1125
1115
  )
1126
1116
 
@@ -1334,6 +1324,19 @@ class NoSqlBaseTarget(BaseStoreTarget):
1334
1324
  timestamp_key=None,
1335
1325
  featureset_status=None,
1336
1326
  ):
1327
+ table, column_list = self._get_table_and_columns(features, key_columns)
1328
+
1329
+ graph.add_step(
1330
+ name=self.name or self.writer_step_name,
1331
+ after=after,
1332
+ graph_shape="cylinder",
1333
+ class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
1334
+ columns=column_list,
1335
+ table=table,
1336
+ **self.attributes,
1337
+ )
1338
+
1339
+ def _get_table_and_columns(self, features, key_columns):
1337
1340
  key_columns = list(key_columns.keys())
1338
1341
  table = self._resource.uri
1339
1342
  column_list = self._get_column_list(
@@ -1352,15 +1355,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
1352
1355
  col for col in column_list if col[0] not in aggregate_features
1353
1356
  ]
1354
1357
 
1355
- graph.add_step(
1356
- name=self.name or self.writer_step_name,
1357
- after=after,
1358
- graph_shape="cylinder",
1359
- class_name="storey.NoSqlTarget",
1360
- columns=column_list,
1361
- table=table,
1362
- **self.attributes,
1363
- )
1358
+ return table, column_list
1364
1359
 
1365
1360
  def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
1366
1361
  raise NotImplementedError()
@@ -1483,11 +1478,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1483
1478
  support_spark = True
1484
1479
  writer_step_name = "RedisNoSqlTarget"
1485
1480
 
1486
- # Fetch server url from the RedisNoSqlTarget::__init__() 'path' parameter.
1487
- # If not set fetch it from 'mlrun.mlconf.redis.url' (MLRUN_REDIS__URL environment variable).
1488
- # Then look for username and password at REDIS_xxx secrets
1489
- def _get_server_endpoint(self):
1490
- endpoint, uri = parse_path(self.get_target_path())
1481
+ @staticmethod
1482
+ def get_server_endpoint(path):
1483
+ endpoint, uri = parse_path(path)
1491
1484
  endpoint = endpoint or mlrun.mlconf.redis.url
1492
1485
  if endpoint.startswith("ds://"):
1493
1486
  datastore_profile = datastore_profile_read(endpoint)
@@ -1504,8 +1497,13 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1504
1497
  raise mlrun.errors.MLRunInvalidArgumentError(
1505
1498
  "Provide Redis username and password only via secrets"
1506
1499
  )
1507
- user = self._get_credential("REDIS_USER", "")
1508
- password = self._get_credential("REDIS_PASSWORD", "")
1500
+ credentials_prefix = mlrun.get_secret_or_env(key="CREDENTIALS_PREFIX")
1501
+ user = mlrun.get_secret_or_env(
1502
+ "REDIS_USER", default="", prefix=credentials_prefix
1503
+ )
1504
+ password = mlrun.get_secret_or_env(
1505
+ "REDIS_PASSWORD", default="", prefix=credentials_prefix
1506
+ )
1509
1507
  host = parsed_endpoint.hostname
1510
1508
  port = parsed_endpoint.port if parsed_endpoint.port else "6379"
1511
1509
  scheme = parsed_endpoint.scheme
@@ -1519,7 +1517,7 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1519
1517
  from storey import Table
1520
1518
  from storey.redis_driver import RedisDriver
1521
1519
 
1522
- endpoint, uri = self._get_server_endpoint()
1520
+ endpoint, uri = self.get_server_endpoint(self.get_target_path())
1523
1521
 
1524
1522
  return Table(
1525
1523
  uri,
@@ -1528,7 +1526,7 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1528
1526
  )
1529
1527
 
1530
1528
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1531
- endpoint, uri = self._get_server_endpoint()
1529
+ endpoint, uri = self.get_server_endpoint(self.get_target_path())
1532
1530
  parsed_endpoint = urlparse(endpoint)
1533
1531
  store, path_in_store, path = self._get_store_and_path()
1534
1532
  return {
@@ -1560,6 +1558,28 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1560
1558
 
1561
1559
  return df
1562
1560
 
1561
+ def add_writer_step(
1562
+ self,
1563
+ graph,
1564
+ after,
1565
+ features,
1566
+ key_columns=None,
1567
+ timestamp_key=None,
1568
+ featureset_status=None,
1569
+ ):
1570
+ table, column_list = self._get_table_and_columns(features, key_columns)
1571
+
1572
+ graph.add_step(
1573
+ path=self.get_target_path(),
1574
+ name=self.name or self.writer_step_name,
1575
+ after=after,
1576
+ graph_shape="cylinder",
1577
+ class_name="mlrun.datastore.storeytargets.RedisNoSqlStoreyTarget",
1578
+ columns=column_list,
1579
+ table=table,
1580
+ **self.attributes,
1581
+ )
1582
+
1563
1583
 
1564
1584
  class StreamTarget(BaseStoreTarget):
1565
1585
  kind = TargetTypes.stream
@@ -1578,29 +1598,22 @@ class StreamTarget(BaseStoreTarget):
1578
1598
  timestamp_key=None,
1579
1599
  featureset_status=None,
1580
1600
  ):
1581
- from storey import V3ioDriver
1582
-
1583
1601
  key_columns = list(key_columns.keys())
1584
- store, path_in_store, path = self._get_store_and_path()
1585
- if not path:
1586
- raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
1587
- endpoint, uri = parse_path(path)
1588
- storage_options = store.get_storage_options()
1589
- access_key = storage_options.get("v3io_access_key")
1602
+
1590
1603
  column_list = self._get_column_list(
1591
1604
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1592
1605
  )
1606
+ stream_path = self.get_target_path()
1607
+ if not stream_path:
1608
+ raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
1593
1609
 
1594
1610
  graph.add_step(
1595
1611
  name=self.name or "StreamTarget",
1596
1612
  after=after,
1597
1613
  graph_shape="cylinder",
1598
- class_name="storey.StreamTarget",
1614
+ class_name="mlrun.datastore.storeytargets.StreamStoreyTarget",
1599
1615
  columns=column_list,
1600
- storage=V3ioDriver(
1601
- webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
1602
- ),
1603
- stream_path=uri,
1616
+ stream_path=stream_path,
1604
1617
  **self.attributes,
1605
1618
  )
1606
1619
 
@@ -1676,34 +1689,19 @@ class KafkaTarget(BaseStoreTarget):
1676
1689
  column_list = self._get_column_list(
1677
1690
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1678
1691
  )
1679
- if self.path and self.path.startswith("ds://"):
1680
- datastore_profile = datastore_profile_read(self.path)
1681
- attributes = datastore_profile.attributes()
1682
- brokers = attributes.pop(
1683
- "brokers", attributes.pop("bootstrap_servers", None)
1684
- )
1685
- topic = datastore_profile.topic
1686
- else:
1687
- attributes = copy(self.attributes)
1688
- brokers = attributes.pop(
1689
- "brokers", attributes.pop("bootstrap_servers", None)
1690
- )
1691
- topic, brokers = parse_kafka_url(self.get_target_path(), brokers)
1692
+ path = self.get_target_path()
1692
1693
 
1693
- if not topic:
1694
- raise mlrun.errors.MLRunInvalidArgumentError(
1695
- "KafkaTarget requires a path (topic)"
1696
- )
1694
+ if not path:
1695
+ raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a path")
1697
1696
 
1698
1697
  graph.add_step(
1699
1698
  name=self.name or "KafkaTarget",
1700
1699
  after=after,
1701
1700
  graph_shape="cylinder",
1702
- class_name="storey.KafkaTarget",
1701
+ class_name="mlrun.datastore.storeytargets.KafkaStoreyTarget",
1703
1702
  columns=column_list,
1704
- topic=topic,
1705
- brokers=brokers,
1706
- **attributes,
1703
+ path=path,
1704
+ attributes=self.attributes,
1707
1705
  )
1708
1706
 
1709
1707
  def purge(self):
@@ -1740,7 +1738,7 @@ class TSDBTarget(BaseStoreTarget):
1740
1738
 
1741
1739
  graph.add_step(
1742
1740
  name=self.name or "TSDBTarget",
1743
- class_name="storey.TSDBTarget",
1741
+ class_name="mlrun.datastore.storeytargets.TSDBStoreyTarget",
1744
1742
  after=after,
1745
1743
  graph_shape="cylinder",
1746
1744
  path=uri,
@@ -2029,7 +2027,7 @@ class SQLTarget(BaseStoreTarget):
2029
2027
  name=self.name or "SqlTarget",
2030
2028
  after=after,
2031
2029
  graph_shape="cylinder",
2032
- class_name="storey.NoSqlTarget",
2030
+ class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
2033
2031
  columns=column_list,
2034
2032
  header=True,
2035
2033
  table=table,
mlrun/datastore/v3io.py CHANGED
@@ -140,6 +140,7 @@ class V3ioStore(DataStore):
140
140
  max_chunk_size: int = V3IO_DEFAULT_UPLOAD_CHUNK_SIZE,
141
141
  ):
142
142
  """helper function for put method, allows for controlling max_chunk_size in testing"""
143
+ data, _ = self._prepare_put_data(data, append)
143
144
  container, path = split_path(self._join(key))
144
145
  buffer_size = len(data) # in bytes
145
146
  buffer_offset = 0
@@ -147,8 +147,7 @@ def record_results(
147
147
  on the provided `endpoint_id`.
148
148
  :param function_name: If a new model endpoint is created, use this function name for generating the
149
149
  function URI.
150
- :param context: MLRun context. Note that the context is required for logging the artifacts
151
- following the batch drift job.
150
+ :param context: MLRun context. Note that the context is required generating the model endpoint.
152
151
  :param infer_results_df: DataFrame that will be stored under the model endpoint parquet target. Will be
153
152
  used for doing the drift analysis. Please make sure that the dataframe includes
154
153
  both feature names and label columns.