mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (135) hide show
  1. mlrun/__main__.py +4 -2
  2. mlrun/alerts/alert.py +75 -8
  3. mlrun/artifacts/base.py +1 -0
  4. mlrun/artifacts/manager.py +9 -2
  5. mlrun/common/constants.py +4 -1
  6. mlrun/common/db/sql_session.py +3 -2
  7. mlrun/common/formatters/__init__.py +1 -0
  8. mlrun/common/formatters/artifact.py +1 -0
  9. mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
  10. mlrun/common/formatters/run.py +3 -0
  11. mlrun/common/helpers.py +0 -1
  12. mlrun/common/schemas/__init__.py +3 -1
  13. mlrun/common/schemas/alert.py +15 -12
  14. mlrun/common/schemas/api_gateway.py +6 -6
  15. mlrun/common/schemas/auth.py +5 -0
  16. mlrun/common/schemas/client_spec.py +0 -1
  17. mlrun/common/schemas/common.py +7 -4
  18. mlrun/common/schemas/frontend_spec.py +7 -0
  19. mlrun/common/schemas/function.py +7 -0
  20. mlrun/common/schemas/model_monitoring/__init__.py +4 -3
  21. mlrun/common/schemas/model_monitoring/constants.py +41 -26
  22. mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
  23. mlrun/common/schemas/notification.py +69 -12
  24. mlrun/common/schemas/project.py +45 -12
  25. mlrun/common/schemas/workflow.py +10 -2
  26. mlrun/common/types.py +1 -0
  27. mlrun/config.py +91 -35
  28. mlrun/data_types/data_types.py +6 -1
  29. mlrun/data_types/spark.py +2 -2
  30. mlrun/data_types/to_pandas.py +57 -25
  31. mlrun/datastore/__init__.py +1 -0
  32. mlrun/datastore/alibaba_oss.py +3 -2
  33. mlrun/datastore/azure_blob.py +125 -37
  34. mlrun/datastore/base.py +42 -21
  35. mlrun/datastore/datastore.py +4 -2
  36. mlrun/datastore/datastore_profile.py +1 -1
  37. mlrun/datastore/dbfs_store.py +3 -7
  38. mlrun/datastore/filestore.py +1 -3
  39. mlrun/datastore/google_cloud_storage.py +85 -29
  40. mlrun/datastore/inmem.py +4 -1
  41. mlrun/datastore/redis.py +1 -0
  42. mlrun/datastore/s3.py +25 -12
  43. mlrun/datastore/sources.py +76 -4
  44. mlrun/datastore/spark_utils.py +30 -0
  45. mlrun/datastore/storeytargets.py +151 -0
  46. mlrun/datastore/targets.py +102 -131
  47. mlrun/datastore/v3io.py +1 -0
  48. mlrun/db/base.py +15 -6
  49. mlrun/db/httpdb.py +57 -28
  50. mlrun/db/nopdb.py +29 -5
  51. mlrun/errors.py +20 -3
  52. mlrun/execution.py +46 -5
  53. mlrun/feature_store/api.py +25 -1
  54. mlrun/feature_store/common.py +6 -11
  55. mlrun/feature_store/feature_vector.py +3 -1
  56. mlrun/feature_store/retrieval/job.py +4 -1
  57. mlrun/feature_store/retrieval/spark_merger.py +10 -39
  58. mlrun/feature_store/steps.py +8 -0
  59. mlrun/frameworks/_common/plan.py +3 -3
  60. mlrun/frameworks/_ml_common/plan.py +1 -1
  61. mlrun/frameworks/parallel_coordinates.py +2 -3
  62. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  63. mlrun/k8s_utils.py +48 -2
  64. mlrun/launcher/client.py +6 -6
  65. mlrun/launcher/local.py +2 -2
  66. mlrun/model.py +215 -34
  67. mlrun/model_monitoring/api.py +38 -24
  68. mlrun/model_monitoring/applications/__init__.py +1 -2
  69. mlrun/model_monitoring/applications/_application_steps.py +60 -29
  70. mlrun/model_monitoring/applications/base.py +2 -174
  71. mlrun/model_monitoring/applications/context.py +197 -70
  72. mlrun/model_monitoring/applications/evidently_base.py +11 -85
  73. mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
  74. mlrun/model_monitoring/applications/results.py +4 -4
  75. mlrun/model_monitoring/controller.py +110 -282
  76. mlrun/model_monitoring/db/stores/__init__.py +8 -3
  77. mlrun/model_monitoring/db/stores/base/store.py +3 -0
  78. mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
  79. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
  80. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
  81. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
  82. mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
  83. mlrun/model_monitoring/db/tsdb/base.py +147 -15
  84. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
  85. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
  86. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
  87. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
  88. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
  89. mlrun/model_monitoring/helpers.py +70 -50
  90. mlrun/model_monitoring/stream_processing.py +96 -195
  91. mlrun/model_monitoring/writer.py +13 -5
  92. mlrun/package/packagers/default_packager.py +2 -2
  93. mlrun/projects/operations.py +16 -8
  94. mlrun/projects/pipelines.py +126 -115
  95. mlrun/projects/project.py +286 -129
  96. mlrun/render.py +3 -3
  97. mlrun/run.py +38 -19
  98. mlrun/runtimes/__init__.py +19 -8
  99. mlrun/runtimes/base.py +4 -1
  100. mlrun/runtimes/daskjob.py +1 -1
  101. mlrun/runtimes/funcdoc.py +1 -1
  102. mlrun/runtimes/kubejob.py +6 -6
  103. mlrun/runtimes/local.py +12 -5
  104. mlrun/runtimes/nuclio/api_gateway.py +68 -8
  105. mlrun/runtimes/nuclio/application/application.py +307 -70
  106. mlrun/runtimes/nuclio/function.py +63 -14
  107. mlrun/runtimes/nuclio/serving.py +10 -10
  108. mlrun/runtimes/pod.py +25 -19
  109. mlrun/runtimes/remotesparkjob.py +2 -5
  110. mlrun/runtimes/sparkjob/spark3job.py +16 -17
  111. mlrun/runtimes/utils.py +34 -0
  112. mlrun/serving/routers.py +2 -5
  113. mlrun/serving/server.py +37 -19
  114. mlrun/serving/states.py +30 -3
  115. mlrun/serving/v2_serving.py +44 -35
  116. mlrun/track/trackers/mlflow_tracker.py +5 -0
  117. mlrun/utils/async_http.py +1 -1
  118. mlrun/utils/db.py +18 -0
  119. mlrun/utils/helpers.py +150 -36
  120. mlrun/utils/http.py +1 -1
  121. mlrun/utils/notifications/notification/__init__.py +0 -1
  122. mlrun/utils/notifications/notification/webhook.py +8 -1
  123. mlrun/utils/notifications/notification_pusher.py +1 -1
  124. mlrun/utils/v3io_clients.py +2 -2
  125. mlrun/utils/version/version.json +2 -2
  126. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
  127. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
  128. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
  129. mlrun/feature_store/retrieval/conversion.py +0 -271
  130. mlrun/model_monitoring/controller_handler.py +0 -37
  131. mlrun/model_monitoring/evidently_application.py +0 -20
  132. mlrun/model_monitoring/prometheus.py +0 -216
  133. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
  134. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
  135. {mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0
mlrun/datastore/s3.py CHANGED
@@ -15,11 +15,12 @@
15
15
  import time
16
16
 
17
17
  import boto3
18
+ from boto3.s3.transfer import TransferConfig
18
19
  from fsspec.registry import get_filesystem_class
19
20
 
20
21
  import mlrun.errors
21
22
 
22
- from .base import DataStore, FileStats, get_range, makeDatastoreSchemaSanitizer
23
+ from .base import DataStore, FileStats, get_range, make_datastore_schema_sanitizer
23
24
 
24
25
 
25
26
  class S3Store(DataStore):
@@ -35,11 +36,18 @@ class S3Store(DataStore):
35
36
 
36
37
  access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
37
38
  secret_key = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
39
+ token_file = self._get_secret_or_env("AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE")
38
40
  endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
39
41
  force_non_anonymous = self._get_secret_or_env("S3_NON_ANONYMOUS")
40
42
  profile_name = self._get_secret_or_env("AWS_PROFILE")
41
43
  assume_role_arn = self._get_secret_or_env("MLRUN_AWS_ROLE_ARN")
42
44
 
45
+ self.config = TransferConfig(
46
+ multipart_threshold=1024 * 1024 * 25,
47
+ max_concurrency=10,
48
+ multipart_chunksize=1024 * 1024 * 25,
49
+ )
50
+
43
51
  # If user asks to assume a role, this needs to go through the STS client and retrieve temporary creds
44
52
  if assume_role_arn:
45
53
  client = boto3.client(
@@ -87,14 +95,15 @@ class S3Store(DataStore):
87
95
  self.s3 = boto3.resource(
88
96
  "s3", region_name=region, endpoint_url=endpoint_url
89
97
  )
90
- # If not using credentials, boto will still attempt to sign the requests, and will fail any operations
91
- # due to no credentials found. These commands disable signing and allow anonymous mode (same as
92
- # anon in the storage_options when working with fsspec).
93
- from botocore.handlers import disable_signing
94
-
95
- self.s3.meta.client.meta.events.register(
96
- "choose-signer.s3.*", disable_signing
97
- )
98
+ if not token_file:
99
+ # If not using credentials, boto will still attempt to sign the requests, and will fail any operations
100
+ # due to no credentials found. These commands disable signing and allow anonymous mode (same as
101
+ # anon in the storage_options when working with fsspec).
102
+ from botocore.handlers import disable_signing
103
+
104
+ self.s3.meta.client.meta.events.register(
105
+ "choose-signer.s3.*", disable_signing
106
+ )
98
107
 
99
108
  def get_spark_options(self):
100
109
  res = {}
@@ -119,7 +128,7 @@ class S3Store(DataStore):
119
128
  except ImportError as exc:
120
129
  raise ImportError("AWS s3fs not installed") from exc
121
130
  filesystem_class = get_filesystem_class(protocol=self.kind)
122
- self._filesystem = makeDatastoreSchemaSanitizer(
131
+ self._filesystem = make_datastore_schema_sanitizer(
123
132
  filesystem_class,
124
133
  using_bucket=self.using_bucket,
125
134
  **self.get_storage_options(),
@@ -132,6 +141,7 @@ class S3Store(DataStore):
132
141
  endpoint_url = self._get_secret_or_env("S3_ENDPOINT_URL")
133
142
  access_key_id = self._get_secret_or_env("AWS_ACCESS_KEY_ID")
134
143
  secret = self._get_secret_or_env("AWS_SECRET_ACCESS_KEY")
144
+ token_file = self._get_secret_or_env("AWS_CONTAINER_AUTHORIZATION_TOKEN_FILE")
135
145
 
136
146
  if self._temp_credentials:
137
147
  access_key_id = self._temp_credentials["AccessKeyId"]
@@ -141,7 +151,7 @@ class S3Store(DataStore):
141
151
  token = None
142
152
 
143
153
  storage_options = dict(
144
- anon=not (force_non_anonymous or (access_key_id and secret)),
154
+ anon=not (force_non_anonymous or (access_key_id and secret) or token_file),
145
155
  key=access_key_id,
146
156
  secret=secret,
147
157
  token=token,
@@ -166,7 +176,7 @@ class S3Store(DataStore):
166
176
 
167
177
  def upload(self, key, src_path):
168
178
  bucket, key = self.get_bucket_and_key(key)
169
- self.s3.Object(bucket, key).put(Body=open(src_path, "rb"))
179
+ self.s3.Bucket(bucket).upload_file(src_path, key, Config=self.config)
170
180
 
171
181
  def get(self, key, size=None, offset=0):
172
182
  bucket, key = self.get_bucket_and_key(key)
@@ -176,6 +186,7 @@ class S3Store(DataStore):
176
186
  return obj.get()["Body"].read()
177
187
 
178
188
  def put(self, key, data, append=False):
189
+ data, _ = self._prepare_put_data(data, append)
179
190
  bucket, key = self.get_bucket_and_key(key)
180
191
  self.s3.Object(bucket, key).put(Body=data)
181
192
 
@@ -201,6 +212,8 @@ class S3Store(DataStore):
201
212
  def rm(self, path, recursive=False, maxdepth=None):
202
213
  bucket, key = self.get_bucket_and_key(path)
203
214
  path = f"{bucket}/{key}"
215
+ # In order to raise an error if there is connection error, ML-7056.
216
+ self.filesystem.exists(path=path)
204
217
  self.filesystem.rm(path=path, recursive=recursive, maxdepth=maxdepth)
205
218
 
206
219
 
@@ -32,6 +32,7 @@ from mlrun.config import config
32
32
  from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
33
33
  from mlrun.datastore.utils import transform_list_filters_to_tuple
34
34
  from mlrun.secrets import SecretsStore
35
+ from mlrun.utils import logger
35
36
 
36
37
  from ..model import DataSource
37
38
  from ..platforms.iguazio import parse_path
@@ -85,7 +86,8 @@ class BaseSourceDriver(DataSource):
85
86
  )
86
87
 
87
88
  explicit_ack = (
88
- is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack()
89
+ is_explicit_ack_supported(context)
90
+ and mlrun.mlconf.is_explicit_ack_enabled()
89
91
  )
90
92
  return storey.SyncEmitSource(
91
93
  context=context,
@@ -826,6 +828,20 @@ class SnowflakeSource(BaseSourceDriver):
826
828
  spark_options["query"] = self.attributes.get("query")
827
829
  return spark_options
828
830
 
831
+ def to_dataframe(
832
+ self,
833
+ columns=None,
834
+ df_module=None,
835
+ entities=None,
836
+ start_time=None,
837
+ end_time=None,
838
+ time_field=None,
839
+ additional_filters=None,
840
+ ):
841
+ raise mlrun.errors.MLRunRuntimeError(
842
+ f"{type(self).__name__} supports only spark engine"
843
+ )
844
+
829
845
 
830
846
  class CustomSource(BaseSourceDriver):
831
847
  kind = "custom"
@@ -930,7 +946,8 @@ class OnlineSource(BaseSourceDriver):
930
946
 
931
947
  source_args = self.attributes.get("source_args", {})
932
948
  explicit_ack = (
933
- is_explicit_ack_supported(context) and mlrun.mlconf.is_explicit_ack()
949
+ is_explicit_ack_supported(context)
950
+ and mlrun.mlconf.is_explicit_ack_enabled()
934
951
  )
935
952
  # TODO: Change to AsyncEmitSource once we can drop support for nuclio<1.12.10
936
953
  src_class = storey.SyncEmitSource(
@@ -1015,7 +1032,8 @@ class StreamSource(OnlineSource):
1015
1032
  engine = "async"
1016
1033
  if hasattr(function.spec, "graph") and function.spec.graph.engine:
1017
1034
  engine = function.spec.graph.engine
1018
- if mlrun.mlconf.is_explicit_ack() and engine == "async":
1035
+
1036
+ if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
1019
1037
  kwargs["explicit_ack_mode"] = "explicitOnly"
1020
1038
  kwargs["worker_allocation_mode"] = "static"
1021
1039
 
@@ -1102,7 +1120,8 @@ class KafkaSource(OnlineSource):
1102
1120
  engine = "async"
1103
1121
  if hasattr(function.spec, "graph") and function.spec.graph.engine:
1104
1122
  engine = function.spec.graph.engine
1105
- if mlrun.mlconf.is_explicit_ack() and engine == "async":
1123
+
1124
+ if mlrun.mlconf.is_explicit_ack_enabled() and engine == "async":
1106
1125
  explicit_ack_mode = "explicitOnly"
1107
1126
  extra_attributes["workerAllocationMode"] = extra_attributes.get(
1108
1127
  "worker_allocation_mode", "static"
@@ -1145,6 +1164,59 @@ class KafkaSource(OnlineSource):
1145
1164
  "to a Spark dataframe is not possible, as this operation is not supported by Spark"
1146
1165
  )
1147
1166
 
1167
+ def create_topics(
1168
+ self,
1169
+ num_partitions: int = 4,
1170
+ replication_factor: int = 1,
1171
+ topics: list[str] = None,
1172
+ ):
1173
+ """
1174
+ Create Kafka topics with the specified number of partitions and replication factor.
1175
+
1176
+ :param num_partitions: number of partitions for the topics
1177
+ :param replication_factor: replication factor for the topics
1178
+ :param topics: list of topic names to create, if None,
1179
+ the topics will be taken from the source attributes
1180
+ """
1181
+ from kafka.admin import KafkaAdminClient, NewTopic
1182
+
1183
+ brokers = self.attributes.get("brokers")
1184
+ if not brokers:
1185
+ raise mlrun.errors.MLRunInvalidArgumentError(
1186
+ "brokers must be specified in the KafkaSource attributes"
1187
+ )
1188
+ topics = topics or self.attributes.get("topics")
1189
+ if not topics:
1190
+ raise mlrun.errors.MLRunInvalidArgumentError(
1191
+ "topics must be specified in the KafkaSource attributes"
1192
+ )
1193
+ new_topics = [
1194
+ NewTopic(topic, num_partitions, replication_factor) for topic in topics
1195
+ ]
1196
+ kafka_admin = KafkaAdminClient(
1197
+ bootstrap_servers=brokers,
1198
+ sasl_mechanism=self.attributes.get("sasl", {}).get("sasl_mechanism"),
1199
+ sasl_plain_username=self.attributes.get("sasl", {}).get("username"),
1200
+ sasl_plain_password=self.attributes.get("sasl", {}).get("password"),
1201
+ sasl_kerberos_service_name=self.attributes.get("sasl", {}).get(
1202
+ "sasl_kerberos_service_name", "kafka"
1203
+ ),
1204
+ sasl_kerberos_domain_name=self.attributes.get("sasl", {}).get(
1205
+ "sasl_kerberos_domain_name"
1206
+ ),
1207
+ sasl_oauth_token_provider=self.attributes.get("sasl", {}).get("mechanism"),
1208
+ )
1209
+ try:
1210
+ kafka_admin.create_topics(new_topics)
1211
+ finally:
1212
+ kafka_admin.close()
1213
+ logger.info(
1214
+ "Kafka topics created successfully",
1215
+ topics=topics,
1216
+ num_partitions=num_partitions,
1217
+ replication_factor=replication_factor,
1218
+ )
1219
+
1148
1220
 
1149
1221
  class SQLSource(BaseSourceDriver):
1150
1222
  kind = "sqldb"
@@ -13,7 +13,10 @@
13
13
  # limitations under the License.
14
14
 
15
15
 
16
+ from typing import Union
17
+
16
18
  import mlrun
19
+ from mlrun.features import Entity
17
20
 
18
21
 
19
22
  def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str]:
@@ -35,3 +38,30 @@ def spark_session_update_hadoop_options(session, spark_options) -> dict[str, str
35
38
  else:
36
39
  non_hadoop_spark_options[key] = value
37
40
  return non_hadoop_spark_options
41
+
42
+
43
+ def check_special_columns_exists(
44
+ spark_df, entities: list[Union[Entity, str]], timestamp_key: str, label_column: str
45
+ ):
46
+ columns = spark_df.columns
47
+ entities = entities or []
48
+ entities = [
49
+ entity.name if isinstance(entity, Entity) else entity for entity in entities
50
+ ]
51
+ missing_entities = [entity for entity in entities if entity not in columns]
52
+ cases_message = "Please check the letter cases (uppercase or lowercase)"
53
+ if missing_entities:
54
+ raise mlrun.errors.MLRunInvalidArgumentError(
55
+ f"There are missing entities from dataframe during ingestion. missing_entities: {missing_entities}."
56
+ f" {cases_message}"
57
+ )
58
+ if timestamp_key and timestamp_key not in columns:
59
+ raise mlrun.errors.MLRunInvalidArgumentError(
60
+ f"timestamp_key is missing from dataframe during ingestion. timestamp_key: {timestamp_key}."
61
+ f" {cases_message}"
62
+ )
63
+ if label_column and label_column not in columns:
64
+ raise mlrun.errors.MLRunInvalidArgumentError(
65
+ f"label_column is missing from dataframe during ingestion. label_column: {label_column}. "
66
+ f"{cases_message}"
67
+ )
@@ -0,0 +1,151 @@
1
+ # Copyright 2024 Iguazio
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import storey
15
+ from mergedeep import merge
16
+ from storey import V3ioDriver
17
+
18
+ import mlrun
19
+ import mlrun.model_monitoring.helpers
20
+ from mlrun.datastore.base import DataStore
21
+
22
+ from ..platforms.iguazio import parse_path
23
+ from .utils import (
24
+ parse_kafka_url,
25
+ )
26
+
27
+ """
28
+ Storey targets expect storage_options, which may contain credentials.
29
+ To avoid passing it openly within the graph, we use wrapper classes.
30
+ """
31
+
32
+
33
+ def get_url_and_storage_options(path, external_storage_options=None):
34
+ store, resolved_store_path, url = mlrun.store_manager.get_or_create_store(path)
35
+ storage_options = store.get_storage_options()
36
+ if storage_options and external_storage_options:
37
+ # merge external storage options with the store's storage options. storage_options takes precedence
38
+ storage_options = merge(external_storage_options, storage_options)
39
+ else:
40
+ storage_options = storage_options or external_storage_options
41
+ return url, DataStore._sanitize_storage_options(storage_options)
42
+
43
+
44
+ class TDEngineStoreyTarget(storey.TDEngineTarget):
45
+ def __init__(self, *args, **kwargs):
46
+ kwargs["url"] = mlrun.model_monitoring.helpers.get_tsdb_connection_string()
47
+ super().__init__(*args, **kwargs)
48
+
49
+
50
+ class StoreyTargetUtils:
51
+ @staticmethod
52
+ def process_args_and_kwargs(args, kwargs):
53
+ args = list(args)
54
+ path = args[0] if args else kwargs.get("path")
55
+ external_storage_options = kwargs.get("storage_options")
56
+
57
+ url, storage_options = get_url_and_storage_options(
58
+ path, external_storage_options
59
+ )
60
+
61
+ if storage_options:
62
+ kwargs["storage_options"] = storage_options
63
+ if args:
64
+ args[0] = url
65
+ if "path" in kwargs:
66
+ kwargs["path"] = url
67
+ return args, kwargs
68
+
69
+
70
+ class ParquetStoreyTarget(storey.ParquetTarget):
71
+ def __init__(self, *args, **kwargs):
72
+ args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
73
+ super().__init__(*args, **kwargs)
74
+
75
+
76
+ class CSVStoreyTarget(storey.CSVTarget):
77
+ def __init__(self, *args, **kwargs):
78
+ args, kwargs = StoreyTargetUtils.process_args_and_kwargs(args, kwargs)
79
+ super().__init__(*args, **kwargs)
80
+
81
+
82
+ class StreamStoreyTarget(storey.StreamTarget):
83
+ def __init__(self, *args, **kwargs):
84
+ args = list(args)
85
+
86
+ uri = args[0] if args else kwargs.get("stream_path")
87
+
88
+ if not uri:
89
+ raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
90
+
91
+ _, storage_options = get_url_and_storage_options(uri)
92
+ endpoint, path = parse_path(uri)
93
+
94
+ access_key = storage_options.get("v3io_access_key")
95
+ storage = V3ioDriver(
96
+ webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
97
+ )
98
+
99
+ if storage_options:
100
+ kwargs["storage"] = storage
101
+ if args:
102
+ args[0] = endpoint
103
+ if "stream_path" in kwargs:
104
+ kwargs["stream_path"] = path
105
+
106
+ super().__init__(*args, **kwargs)
107
+
108
+
109
+ class KafkaStoreyTarget(storey.KafkaTarget):
110
+ def __init__(self, *args, **kwargs):
111
+ path = kwargs.pop("path")
112
+ attributes = kwargs.pop("attributes", None)
113
+ if path and path.startswith("ds://"):
114
+ datastore_profile = (
115
+ mlrun.datastore.datastore_profile.datastore_profile_read(path)
116
+ )
117
+ attributes = merge(attributes, datastore_profile.attributes())
118
+ brokers = attributes.pop(
119
+ "brokers", attributes.pop("bootstrap_servers", None)
120
+ )
121
+ topic = datastore_profile.topic
122
+ else:
123
+ brokers = attributes.pop(
124
+ "brokers", attributes.pop("bootstrap_servers", None)
125
+ )
126
+ topic, brokers = parse_kafka_url(path, brokers)
127
+
128
+ if not topic:
129
+ raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a topic")
130
+ kwargs["brokers"] = brokers
131
+ kwargs["topic"] = topic
132
+ super().__init__(*args, **kwargs, **attributes)
133
+
134
+
135
+ class NoSqlStoreyTarget(storey.NoSqlTarget):
136
+ pass
137
+
138
+
139
+ class RedisNoSqlStoreyTarget(storey.NoSqlTarget):
140
+ def __init__(self, *args, **kwargs):
141
+ path = kwargs.pop("path")
142
+ endpoint, uri = mlrun.datastore.targets.RedisNoSqlTarget.get_server_endpoint(
143
+ path,
144
+ kwargs.pop("credentials_prefix", None),
145
+ )
146
+ kwargs["path"] = endpoint + "/" + uri
147
+ super().__init__(*args, **kwargs)
148
+
149
+
150
+ class TSDBStoreyTarget(storey.TSDBTarget):
151
+ pass