mlrun 1.6.4rc2__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (291) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +26 -112
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +5 -4
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +46 -257
  8. mlrun/artifacts/dataset.py +11 -192
  9. mlrun/artifacts/manager.py +47 -48
  10. mlrun/artifacts/model.py +31 -159
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +69 -0
  13. mlrun/common/db/sql_session.py +2 -3
  14. mlrun/common/formatters/__init__.py +19 -0
  15. mlrun/common/formatters/artifact.py +21 -0
  16. mlrun/common/formatters/base.py +78 -0
  17. mlrun/common/formatters/function.py +41 -0
  18. mlrun/common/formatters/pipeline.py +53 -0
  19. mlrun/common/formatters/project.py +51 -0
  20. mlrun/common/helpers.py +1 -2
  21. mlrun/common/model_monitoring/helpers.py +9 -5
  22. mlrun/{runtimes → common/runtimes}/constants.py +37 -9
  23. mlrun/common/schemas/__init__.py +24 -4
  24. mlrun/common/schemas/alert.py +203 -0
  25. mlrun/common/schemas/api_gateway.py +148 -0
  26. mlrun/common/schemas/artifact.py +18 -8
  27. mlrun/common/schemas/auth.py +11 -5
  28. mlrun/common/schemas/background_task.py +1 -1
  29. mlrun/common/schemas/client_spec.py +4 -1
  30. mlrun/common/schemas/feature_store.py +16 -16
  31. mlrun/common/schemas/frontend_spec.py +8 -7
  32. mlrun/common/schemas/function.py +5 -1
  33. mlrun/common/schemas/hub.py +11 -18
  34. mlrun/common/schemas/memory_reports.py +2 -2
  35. mlrun/common/schemas/model_monitoring/__init__.py +18 -3
  36. mlrun/common/schemas/model_monitoring/constants.py +83 -26
  37. mlrun/common/schemas/model_monitoring/grafana.py +13 -9
  38. mlrun/common/schemas/model_monitoring/model_endpoints.py +99 -16
  39. mlrun/common/schemas/notification.py +4 -4
  40. mlrun/common/schemas/object.py +2 -2
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +1 -10
  43. mlrun/common/schemas/project.py +24 -23
  44. mlrun/common/schemas/runtime_resource.py +8 -12
  45. mlrun/common/schemas/schedule.py +3 -3
  46. mlrun/common/schemas/tag.py +1 -2
  47. mlrun/common/schemas/workflow.py +2 -2
  48. mlrun/common/types.py +7 -1
  49. mlrun/config.py +54 -17
  50. mlrun/data_types/to_pandas.py +10 -12
  51. mlrun/datastore/__init__.py +5 -8
  52. mlrun/datastore/alibaba_oss.py +130 -0
  53. mlrun/datastore/azure_blob.py +17 -5
  54. mlrun/datastore/base.py +62 -39
  55. mlrun/datastore/datastore.py +28 -9
  56. mlrun/datastore/datastore_profile.py +146 -20
  57. mlrun/datastore/filestore.py +0 -1
  58. mlrun/datastore/google_cloud_storage.py +6 -2
  59. mlrun/datastore/hdfs.py +56 -0
  60. mlrun/datastore/inmem.py +2 -2
  61. mlrun/datastore/redis.py +6 -2
  62. mlrun/datastore/s3.py +9 -0
  63. mlrun/datastore/snowflake_utils.py +43 -0
  64. mlrun/datastore/sources.py +201 -96
  65. mlrun/datastore/spark_utils.py +1 -2
  66. mlrun/datastore/store_resources.py +7 -7
  67. mlrun/datastore/targets.py +358 -104
  68. mlrun/datastore/utils.py +72 -58
  69. mlrun/datastore/v3io.py +5 -1
  70. mlrun/db/base.py +185 -35
  71. mlrun/db/factory.py +1 -1
  72. mlrun/db/httpdb.py +614 -179
  73. mlrun/db/nopdb.py +210 -26
  74. mlrun/errors.py +12 -1
  75. mlrun/execution.py +41 -24
  76. mlrun/feature_store/__init__.py +0 -2
  77. mlrun/feature_store/api.py +40 -72
  78. mlrun/feature_store/common.py +1 -1
  79. mlrun/feature_store/feature_set.py +76 -55
  80. mlrun/feature_store/feature_vector.py +28 -30
  81. mlrun/feature_store/ingestion.py +7 -6
  82. mlrun/feature_store/retrieval/base.py +16 -11
  83. mlrun/feature_store/retrieval/conversion.py +11 -13
  84. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  85. mlrun/feature_store/retrieval/job.py +9 -3
  86. mlrun/feature_store/retrieval/local_merger.py +2 -0
  87. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  88. mlrun/feature_store/steps.py +37 -34
  89. mlrun/features.py +9 -20
  90. mlrun/frameworks/_common/artifacts_library.py +9 -9
  91. mlrun/frameworks/_common/mlrun_interface.py +5 -5
  92. mlrun/frameworks/_common/model_handler.py +48 -48
  93. mlrun/frameworks/_common/plan.py +2 -3
  94. mlrun/frameworks/_common/producer.py +3 -4
  95. mlrun/frameworks/_common/utils.py +5 -5
  96. mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
  97. mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
  98. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
  99. mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
  100. mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
  101. mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
  102. mlrun/frameworks/_ml_common/model_handler.py +24 -24
  103. mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
  104. mlrun/frameworks/_ml_common/plan.py +1 -1
  105. mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
  106. mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
  107. mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
  108. mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
  109. mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
  110. mlrun/frameworks/_ml_common/utils.py +4 -4
  111. mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
  112. mlrun/frameworks/huggingface/model_server.py +4 -4
  113. mlrun/frameworks/lgbm/__init__.py +33 -33
  114. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  115. mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
  116. mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
  117. mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
  118. mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
  119. mlrun/frameworks/lgbm/model_handler.py +10 -10
  120. mlrun/frameworks/lgbm/model_server.py +6 -6
  121. mlrun/frameworks/lgbm/utils.py +5 -5
  122. mlrun/frameworks/onnx/dataset.py +8 -8
  123. mlrun/frameworks/onnx/mlrun_interface.py +3 -3
  124. mlrun/frameworks/onnx/model_handler.py +6 -6
  125. mlrun/frameworks/onnx/model_server.py +7 -7
  126. mlrun/frameworks/parallel_coordinates.py +4 -3
  127. mlrun/frameworks/pytorch/__init__.py +18 -18
  128. mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
  129. mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
  130. mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
  131. mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
  132. mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
  133. mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
  134. mlrun/frameworks/pytorch/model_handler.py +17 -17
  135. mlrun/frameworks/pytorch/model_server.py +7 -7
  136. mlrun/frameworks/sklearn/__init__.py +13 -13
  137. mlrun/frameworks/sklearn/estimator.py +4 -4
  138. mlrun/frameworks/sklearn/metrics_library.py +14 -14
  139. mlrun/frameworks/sklearn/mlrun_interface.py +3 -6
  140. mlrun/frameworks/sklearn/model_handler.py +2 -2
  141. mlrun/frameworks/tf_keras/__init__.py +10 -7
  142. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
  143. mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
  144. mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
  145. mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
  146. mlrun/frameworks/tf_keras/model_handler.py +14 -14
  147. mlrun/frameworks/tf_keras/model_server.py +6 -6
  148. mlrun/frameworks/xgboost/__init__.py +13 -13
  149. mlrun/frameworks/xgboost/model_handler.py +6 -6
  150. mlrun/k8s_utils.py +14 -16
  151. mlrun/launcher/__init__.py +1 -1
  152. mlrun/launcher/base.py +16 -15
  153. mlrun/launcher/client.py +8 -6
  154. mlrun/launcher/factory.py +1 -1
  155. mlrun/launcher/local.py +17 -11
  156. mlrun/launcher/remote.py +16 -10
  157. mlrun/lists.py +7 -6
  158. mlrun/model.py +238 -73
  159. mlrun/model_monitoring/__init__.py +1 -1
  160. mlrun/model_monitoring/api.py +138 -315
  161. mlrun/model_monitoring/application.py +5 -296
  162. mlrun/model_monitoring/applications/__init__.py +24 -0
  163. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  164. mlrun/model_monitoring/applications/base.py +282 -0
  165. mlrun/model_monitoring/applications/context.py +214 -0
  166. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  167. mlrun/model_monitoring/applications/histogram_data_drift.py +349 -0
  168. mlrun/model_monitoring/applications/results.py +99 -0
  169. mlrun/model_monitoring/controller.py +104 -84
  170. mlrun/model_monitoring/controller_handler.py +13 -5
  171. mlrun/model_monitoring/db/__init__.py +18 -0
  172. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  173. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  174. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +64 -40
  175. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  176. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  177. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  178. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  179. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  180. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  181. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  182. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +310 -165
  183. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  184. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  185. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  186. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  187. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  188. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  189. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  190. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  191. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  192. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  193. mlrun/model_monitoring/evidently_application.py +6 -118
  194. mlrun/model_monitoring/features_drift_table.py +134 -106
  195. mlrun/model_monitoring/helpers.py +127 -28
  196. mlrun/model_monitoring/metrics/__init__.py +13 -0
  197. mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
  198. mlrun/model_monitoring/model_endpoint.py +3 -2
  199. mlrun/model_monitoring/prometheus.py +1 -4
  200. mlrun/model_monitoring/stream_processing.py +62 -231
  201. mlrun/model_monitoring/tracking_policy.py +9 -2
  202. mlrun/model_monitoring/writer.py +152 -124
  203. mlrun/package/__init__.py +6 -6
  204. mlrun/package/context_handler.py +5 -5
  205. mlrun/package/packager.py +7 -7
  206. mlrun/package/packagers/default_packager.py +6 -6
  207. mlrun/package/packagers/numpy_packagers.py +15 -15
  208. mlrun/package/packagers/pandas_packagers.py +5 -5
  209. mlrun/package/packagers/python_standard_library_packagers.py +10 -10
  210. mlrun/package/packagers_manager.py +19 -23
  211. mlrun/package/utils/_formatter.py +6 -6
  212. mlrun/package/utils/_pickler.py +2 -2
  213. mlrun/package/utils/_supported_format.py +4 -4
  214. mlrun/package/utils/log_hint_utils.py +2 -2
  215. mlrun/package/utils/type_hint_utils.py +4 -9
  216. mlrun/platforms/__init__.py +11 -10
  217. mlrun/platforms/iguazio.py +24 -203
  218. mlrun/projects/operations.py +35 -21
  219. mlrun/projects/pipelines.py +68 -99
  220. mlrun/projects/project.py +830 -266
  221. mlrun/render.py +3 -11
  222. mlrun/run.py +162 -166
  223. mlrun/runtimes/__init__.py +62 -7
  224. mlrun/runtimes/base.py +39 -32
  225. mlrun/runtimes/daskjob.py +8 -8
  226. mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
  227. mlrun/runtimes/databricks_job/databricks_runtime.py +7 -7
  228. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  229. mlrun/runtimes/funcdoc.py +0 -28
  230. mlrun/runtimes/function_reference.py +1 -1
  231. mlrun/runtimes/kubejob.py +28 -122
  232. mlrun/runtimes/local.py +6 -3
  233. mlrun/runtimes/mpijob/__init__.py +0 -20
  234. mlrun/runtimes/mpijob/abstract.py +9 -10
  235. mlrun/runtimes/mpijob/v1.py +1 -1
  236. mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
  237. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  238. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  239. mlrun/runtimes/nuclio/application/application.py +523 -0
  240. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  241. mlrun/runtimes/{function.py → nuclio/function.py} +112 -73
  242. mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
  243. mlrun/runtimes/{serving.py → nuclio/serving.py} +45 -51
  244. mlrun/runtimes/pod.py +286 -88
  245. mlrun/runtimes/remotesparkjob.py +2 -2
  246. mlrun/runtimes/sparkjob/spark3job.py +51 -34
  247. mlrun/runtimes/utils.py +7 -75
  248. mlrun/secrets.py +9 -5
  249. mlrun/serving/remote.py +2 -7
  250. mlrun/serving/routers.py +13 -10
  251. mlrun/serving/server.py +22 -26
  252. mlrun/serving/states.py +99 -25
  253. mlrun/serving/utils.py +3 -3
  254. mlrun/serving/v1_serving.py +6 -7
  255. mlrun/serving/v2_serving.py +59 -20
  256. mlrun/track/tracker.py +2 -1
  257. mlrun/track/tracker_manager.py +3 -3
  258. mlrun/track/trackers/mlflow_tracker.py +1 -2
  259. mlrun/utils/async_http.py +5 -7
  260. mlrun/utils/azure_vault.py +1 -1
  261. mlrun/utils/clones.py +1 -2
  262. mlrun/utils/condition_evaluator.py +3 -3
  263. mlrun/utils/db.py +3 -3
  264. mlrun/utils/helpers.py +183 -197
  265. mlrun/utils/http.py +2 -5
  266. mlrun/utils/logger.py +76 -14
  267. mlrun/utils/notifications/notification/__init__.py +17 -12
  268. mlrun/utils/notifications/notification/base.py +14 -2
  269. mlrun/utils/notifications/notification/console.py +2 -0
  270. mlrun/utils/notifications/notification/git.py +3 -1
  271. mlrun/utils/notifications/notification/ipython.py +3 -1
  272. mlrun/utils/notifications/notification/slack.py +101 -21
  273. mlrun/utils/notifications/notification/webhook.py +11 -1
  274. mlrun/utils/notifications/notification_pusher.py +155 -30
  275. mlrun/utils/retryer.py +208 -0
  276. mlrun/utils/singleton.py +1 -1
  277. mlrun/utils/v3io_clients.py +2 -4
  278. mlrun/utils/version/version.json +2 -2
  279. mlrun/utils/version/version.py +2 -6
  280. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +31 -19
  281. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  282. mlrun/kfpops.py +0 -868
  283. mlrun/model_monitoring/batch.py +0 -1095
  284. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  285. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
  286. mlrun/platforms/other.py +0 -306
  287. mlrun-1.6.4rc2.dist-info/RECORD +0 -314
  288. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  289. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +0 -0
  290. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  291. {mlrun-1.6.4rc2.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
@@ -17,9 +17,10 @@ import os
17
17
  import random
18
18
  import sys
19
19
  import time
20
+ import warnings
20
21
  from collections import Counter
21
22
  from copy import copy
22
- from typing import Any, Dict, List, Optional, Union
23
+ from typing import Any, Optional, Union
23
24
  from urllib.parse import urlparse
24
25
 
25
26
  import pandas as pd
@@ -28,8 +29,10 @@ from mergedeep import merge
28
29
  import mlrun
29
30
  import mlrun.utils.helpers
30
31
  from mlrun.config import config
32
+ from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
33
+ from mlrun.datastore.utils import transform_list_filters_to_tuple
31
34
  from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
32
- from mlrun.utils import now_date
35
+ from mlrun.utils import logger, now_date
33
36
  from mlrun.utils.helpers import to_parquet
34
37
  from mlrun.utils.v3io_clients import get_frames_client
35
38
 
@@ -43,7 +46,6 @@ from .utils import (
43
46
  filter_df_start_end_time,
44
47
  parse_kafka_url,
45
48
  select_columns_from_df,
46
- store_path_to_spark,
47
49
  )
48
50
 
49
51
 
@@ -58,6 +60,7 @@ class TargetTypes:
58
60
  dataframe = "dataframe"
59
61
  custom = "custom"
60
62
  sql = "sql"
63
+ snowflake = "snowflake"
61
64
 
62
65
  @staticmethod
63
66
  def all():
@@ -72,6 +75,7 @@ class TargetTypes:
72
75
  TargetTypes.dataframe,
73
76
  TargetTypes.custom,
74
77
  TargetTypes.sql,
78
+ TargetTypes.snowflake,
75
79
  ]
76
80
 
77
81
 
@@ -79,11 +83,14 @@ def generate_target_run_id():
79
83
  return f"{round(time.time() * 1000)}_{random.randint(0, 999)}"
80
84
 
81
85
 
82
- def write_spark_dataframe_with_options(spark_options, df, mode):
86
+ def write_spark_dataframe_with_options(spark_options, df, mode, write_format=None):
83
87
  non_hadoop_spark_options = spark_session_update_hadoop_options(
84
88
  df.sql_ctx.sparkSession, spark_options
85
89
  )
86
- df.write.mode(mode).save(**non_hadoop_spark_options)
90
+ if write_format:
91
+ df.write.format(write_format).mode(mode).save(**non_hadoop_spark_options)
92
+ else:
93
+ df.write.mode(mode).save(**non_hadoop_spark_options)
87
94
 
88
95
 
89
96
  def default_target_names():
@@ -215,9 +222,8 @@ def validate_target_list(targets):
215
222
  ]
216
223
  if target_types_requiring_name:
217
224
  raise mlrun.errors.MLRunInvalidArgumentError(
218
- "Only one default name per target type is allowed (please specify name for {0} target)".format(
219
- target_types_requiring_name
220
- )
225
+ "Only one default name per target type is allowed (please "
226
+ f"specify name for {target_types_requiring_name} target)"
221
227
  )
222
228
 
223
229
  target_names_count = Counter(
@@ -232,9 +238,8 @@ def validate_target_list(targets):
232
238
 
233
239
  if targets_with_same_name:
234
240
  raise mlrun.errors.MLRunInvalidArgumentError(
235
- "Each target must have a unique name (more than one target with those names found {0})".format(
236
- targets_with_same_name
237
- )
241
+ "Each target must have a unique name (more than one target with "
242
+ f"those names found {targets_with_same_name})"
238
243
  )
239
244
 
240
245
  no_path_target_types_count = Counter(
@@ -252,9 +257,8 @@ def validate_target_list(targets):
252
257
  ]
253
258
  if target_types_requiring_path:
254
259
  raise mlrun.errors.MLRunInvalidArgumentError(
255
- "Only one default path per target type is allowed (please specify path for {0} target)".format(
256
- target_types_requiring_path
257
- )
260
+ "Only one default path per target type is allowed (please specify "
261
+ f"path for {target_types_requiring_path} target)"
258
262
  )
259
263
 
260
264
  target_paths_count = Counter(
@@ -269,9 +273,8 @@ def validate_target_list(targets):
269
273
 
270
274
  if targets_with_same_path:
271
275
  raise mlrun.errors.MLRunInvalidArgumentError(
272
- "Each target must have a unique path (more than one target with those names found {0})".format(
273
- targets_with_same_path
274
- )
276
+ "Each target must have a unique path (more than one target "
277
+ f"with those names found {targets_with_same_path})"
275
278
  )
276
279
 
277
280
 
@@ -390,17 +393,17 @@ class BaseStoreTarget(DataTargetBase):
390
393
  self,
391
394
  name: str = "",
392
395
  path=None,
393
- attributes: Dict[str, str] = None,
396
+ attributes: dict[str, str] = None,
394
397
  after_step=None,
395
398
  columns=None,
396
399
  partitioned: bool = False,
397
400
  key_bucketing_number: Optional[int] = None,
398
- partition_cols: Optional[List[str]] = None,
401
+ partition_cols: Optional[list[str]] = None,
399
402
  time_partitioning_granularity: Optional[str] = None,
400
403
  max_events: Optional[int] = None,
401
404
  flush_after_seconds: Optional[int] = None,
402
- storage_options: Dict[str, str] = None,
403
- schema: Dict[str, Any] = None,
405
+ storage_options: dict[str, str] = None,
406
+ schema: dict[str, Any] = None,
404
407
  credentials_prefix=None,
405
408
  ):
406
409
  super().__init__(
@@ -452,14 +455,11 @@ class BaseStoreTarget(DataTargetBase):
452
455
  if self.credentials_prefix
453
456
  else None
454
457
  )
455
- store, resolved_store_path = mlrun.store_manager.get_or_create_store(
458
+ store, resolved_store_path, url = mlrun.store_manager.get_or_create_store(
456
459
  self.get_target_path(),
457
460
  credentials_prefix_secrets,
458
461
  )
459
- if self.get_target_path() and self.get_target_path().startswith("ds://"):
460
- return store, store.url + resolved_store_path
461
- else:
462
- return store, self.get_target_path()
462
+ return store, resolved_store_path, url
463
463
 
464
464
  def _get_column_list(self, features, timestamp_key, key_columns, with_type=False):
465
465
  result = []
@@ -505,10 +505,13 @@ class BaseStoreTarget(DataTargetBase):
505
505
  options = self.get_spark_options(key_column, timestamp_key)
506
506
  options.update(kwargs)
507
507
  df = self.prepare_spark_df(df, key_column, timestamp_key, options)
508
- write_spark_dataframe_with_options(options, df, "overwrite")
508
+ write_format = options.pop("format", None)
509
+ write_spark_dataframe_with_options(
510
+ options, df, "overwrite", write_format=write_format
511
+ )
509
512
  elif hasattr(df, "dask"):
510
513
  dask_options = self.get_dask_options()
511
- store, target_path = self._get_store_and_path()
514
+ store, path_in_store, target_path = self._get_store_and_path()
512
515
  storage_options = store.get_storage_options()
513
516
  df = df.repartition(partition_size="100MB")
514
517
  try:
@@ -529,10 +532,15 @@ class BaseStoreTarget(DataTargetBase):
529
532
  except Exception as exc:
530
533
  raise RuntimeError("Failed to write Dask Dataframe") from exc
531
534
  else:
532
- store, target_path = self._get_store_and_path()
535
+ store, path_in_store, target_path = self._get_store_and_path()
533
536
  target_path = generate_path_with_chunk(self, chunk_id, target_path)
534
537
  file_system = store.filesystem
535
- if file_system.protocol == "file":
538
+ if (
539
+ file_system.protocol == "file"
540
+ # fsspec 2023.10.0 changed protocol from "file" to ("file", "local")
541
+ or isinstance(file_system.protocol, (tuple, list))
542
+ and "file" in file_system.protocol
543
+ ):
536
544
  dir = os.path.dirname(target_path)
537
545
  if dir:
538
546
  os.makedirs(dir, exist_ok=True)
@@ -649,6 +657,29 @@ class BaseStoreTarget(DataTargetBase):
649
657
  def _target_path_object(self):
650
658
  """return the actual/computed target path"""
651
659
  is_single_file = hasattr(self, "is_single_file") and self.is_single_file()
660
+
661
+ if self._resource and self.path:
662
+ parsed_url = urlparse(self.path)
663
+ # When the URL consists only from scheme and endpoint and no path,
664
+ # make a default path for DS and redis targets.
665
+ # Also ignore KafkaTarget when it uses the ds scheme (no default path for KafkaTarget)
666
+ if (
667
+ not isinstance(self, KafkaTarget)
668
+ and parsed_url.scheme in ["ds", "redis", "rediss"]
669
+ and (not parsed_url.path or parsed_url.path == "/")
670
+ ):
671
+ return TargetPathObject(
672
+ _get_target_path(
673
+ self,
674
+ self._resource,
675
+ self.run_id is not None,
676
+ netloc=parsed_url.netloc,
677
+ scheme=parsed_url.scheme,
678
+ ),
679
+ self.run_id,
680
+ is_single_file,
681
+ )
682
+
652
683
  return self.get_path() or (
653
684
  TargetPathObject(
654
685
  _get_target_path(self, self._resource, self.run_id is not None),
@@ -696,7 +727,7 @@ class BaseStoreTarget(DataTargetBase):
696
727
  raise NotImplementedError()
697
728
 
698
729
  def purge(self):
699
- store, target_path = self._get_store_and_path()
730
+ store, path_in_store, target_path = self._get_store_and_path()
700
731
  store.rm(target_path, recursive=True)
701
732
 
702
733
  def as_df(
@@ -707,9 +738,13 @@ class BaseStoreTarget(DataTargetBase):
707
738
  start_time=None,
708
739
  end_time=None,
709
740
  time_column=None,
741
+ additional_filters=None,
710
742
  **kwargs,
711
743
  ):
712
744
  """return the target data as dataframe"""
745
+ mlrun.utils.helpers.additional_filters_warning(
746
+ additional_filters, self.__class__
747
+ )
713
748
  return mlrun.get_dataitem(self.get_target_path()).as_df(
714
749
  columns=columns,
715
750
  df_module=df_module,
@@ -723,7 +758,7 @@ class BaseStoreTarget(DataTargetBase):
723
758
  # options used in spark.read.load(**options)
724
759
  raise NotImplementedError()
725
760
 
726
- def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options={}):
761
+ def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
727
762
  return df
728
763
 
729
764
  def get_dask_options(self):
@@ -731,7 +766,7 @@ class BaseStoreTarget(DataTargetBase):
731
766
 
732
767
 
733
768
  class ParquetTarget(BaseStoreTarget):
734
- """parquet target storage driver, used to materialize feature set/vector data into parquet files
769
+ """Parquet target storage driver, used to materialize feature set/vector data into parquet files.
735
770
 
736
771
  :param name: optional, target name. By default will be called ParquetTarget
737
772
  :param path: optional, Output path. Can be either a file or directory.
@@ -766,16 +801,16 @@ class ParquetTarget(BaseStoreTarget):
766
801
  self,
767
802
  name: str = "",
768
803
  path=None,
769
- attributes: Dict[str, str] = None,
804
+ attributes: dict[str, str] = None,
770
805
  after_step=None,
771
806
  columns=None,
772
807
  partitioned: bool = None,
773
808
  key_bucketing_number: Optional[int] = None,
774
- partition_cols: Optional[List[str]] = None,
809
+ partition_cols: Optional[list[str]] = None,
775
810
  time_partitioning_granularity: Optional[str] = None,
776
811
  max_events: Optional[int] = 10000,
777
812
  flush_after_seconds: Optional[int] = 900,
778
- storage_options: Dict[str, str] = None,
813
+ storage_options: dict[str, str] = None,
779
814
  ):
780
815
  self.path = path
781
816
  if partitioned is None:
@@ -876,7 +911,7 @@ class ParquetTarget(BaseStoreTarget):
876
911
  for key_column in key_columns:
877
912
  tuple_key_columns.append((key_column.name, key_column.value_type))
878
913
 
879
- store, target_path = self._get_store_and_path()
914
+ store, path_in_store, target_path = self._get_store_and_path()
880
915
 
881
916
  storage_options = store.get_storage_options()
882
917
  if storage_options and self.storage_options:
@@ -929,27 +964,19 @@ class ParquetTarget(BaseStoreTarget):
929
964
  if unit == time_partitioning_granularity:
930
965
  break
931
966
 
932
- if self.path and self.path.startswith("ds://"):
933
- store, path = mlrun.store_manager.get_or_create_store(
934
- self.get_target_path()
935
- )
936
- storage_spark_options = store.get_spark_options()
937
- path = store.url + path
938
- result = {
939
- "path": store_path_to_spark(path, storage_spark_options),
940
- "format": "parquet",
941
- }
942
- result = {**result, **storage_spark_options}
943
- else:
944
- result = {
945
- "path": store_path_to_spark(self.get_target_path()),
967
+ store, path, url = self._get_store_and_path()
968
+ spark_options = store.get_spark_options()
969
+ spark_options.update(
970
+ {
971
+ "path": store.spark_url + path,
946
972
  "format": "parquet",
947
973
  }
974
+ )
948
975
  for partition_col in self.partition_cols or []:
949
976
  partition_cols.append(partition_col)
950
977
  if partition_cols:
951
- result["partitionBy"] = partition_cols
952
- return result
978
+ spark_options["partitionBy"] = partition_cols
979
+ return spark_options
953
980
 
954
981
  def get_dask_options(self):
955
982
  return {"format": "parquet"}
@@ -962,6 +989,7 @@ class ParquetTarget(BaseStoreTarget):
962
989
  start_time=None,
963
990
  end_time=None,
964
991
  time_column=None,
992
+ additional_filters=None,
965
993
  **kwargs,
966
994
  ):
967
995
  """return the target data as dataframe"""
@@ -972,6 +1000,7 @@ class ParquetTarget(BaseStoreTarget):
972
1000
  start_time=start_time,
973
1001
  end_time=end_time,
974
1002
  time_column=time_column,
1003
+ additional_filters=transform_list_filters_to_tuple(additional_filters),
975
1004
  **kwargs,
976
1005
  )
977
1006
  if not columns:
@@ -1056,7 +1085,7 @@ class CSVTarget(BaseStoreTarget):
1056
1085
  column_list = self._get_column_list(
1057
1086
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1058
1087
  )
1059
- store, target_path = self._get_store_and_path()
1088
+ store, path_in_store, target_path = self._get_store_and_path()
1060
1089
  graph.add_step(
1061
1090
  name=self.name or "CSVTarget",
1062
1091
  after=after,
@@ -1071,24 +1100,16 @@ class CSVTarget(BaseStoreTarget):
1071
1100
  )
1072
1101
 
1073
1102
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1074
- if self.path and self.path.startswith("ds://"):
1075
- store, path = mlrun.store_manager.get_or_create_store(
1076
- self.get_target_path()
1077
- )
1078
- storage_spark_options = store.get_spark_options()
1079
- path = store.url + path
1080
- result = {
1081
- "path": store_path_to_spark(path, storage_spark_options),
1082
- "format": "csv",
1083
- "header": "true",
1084
- }
1085
- return {**result, **storage_spark_options}
1086
- else:
1087
- return {
1088
- "path": store_path_to_spark(self.get_target_path()),
1103
+ store, path, url = self._get_store_and_path()
1104
+ spark_options = store.get_spark_options()
1105
+ spark_options.update(
1106
+ {
1107
+ "path": store.spark_url + path,
1089
1108
  "format": "csv",
1090
1109
  "header": "true",
1091
1110
  }
1111
+ )
1112
+ return spark_options
1092
1113
 
1093
1114
  def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
1094
1115
  import pyspark.sql.functions as funcs
@@ -1110,8 +1131,12 @@ class CSVTarget(BaseStoreTarget):
1110
1131
  start_time=None,
1111
1132
  end_time=None,
1112
1133
  time_column=None,
1134
+ additional_filters=None,
1113
1135
  **kwargs,
1114
1136
  ):
1137
+ mlrun.utils.helpers.additional_filters_warning(
1138
+ additional_filters, self.__class__
1139
+ )
1115
1140
  df = super().as_df(
1116
1141
  columns=columns,
1117
1142
  df_module=df_module,
@@ -1132,6 +1157,98 @@ class CSVTarget(BaseStoreTarget):
1132
1157
  return True
1133
1158
 
1134
1159
 
1160
+ class SnowflakeTarget(BaseStoreTarget):
1161
+ """
1162
+ :param attributes: A dictionary of attributes for Snowflake connection; will be overridden by database parameters
1163
+ if they exist.
1164
+ :param url: Snowflake hostname, in the format: <account_name>.<region>.snowflakecomputing.com
1165
+ :param user: Snowflake user for login
1166
+ :param db_schema: Database schema
1167
+ :param database: Database name
1168
+ :param warehouse: Snowflake warehouse name
1169
+ :param table_name: Snowflake table name
1170
+ """
1171
+
1172
+ support_spark = True
1173
+ support_append = True
1174
+ is_offline = True
1175
+ kind = TargetTypes.snowflake
1176
+
1177
+ def __init__(
1178
+ self,
1179
+ name: str = "",
1180
+ path=None,
1181
+ attributes: dict[str, str] = None,
1182
+ after_step=None,
1183
+ columns=None,
1184
+ partitioned: bool = False,
1185
+ key_bucketing_number: Optional[int] = None,
1186
+ partition_cols: Optional[list[str]] = None,
1187
+ time_partitioning_granularity: Optional[str] = None,
1188
+ max_events: Optional[int] = None,
1189
+ flush_after_seconds: Optional[int] = None,
1190
+ storage_options: dict[str, str] = None,
1191
+ schema: dict[str, Any] = None,
1192
+ credentials_prefix=None,
1193
+ url: str = None,
1194
+ user: str = None,
1195
+ db_schema: str = None,
1196
+ database: str = None,
1197
+ warehouse: str = None,
1198
+ table_name: str = None,
1199
+ ):
1200
+ attrs = {
1201
+ "url": url,
1202
+ "user": user,
1203
+ "database": database,
1204
+ "schema": db_schema,
1205
+ "warehouse": warehouse,
1206
+ "table": table_name,
1207
+ }
1208
+ extended_attrs = {
1209
+ key: value for key, value in attrs.items() if value is not None
1210
+ }
1211
+ attributes = {} if not attributes else attributes
1212
+ attributes.update(extended_attrs)
1213
+ super().__init__(
1214
+ name,
1215
+ path,
1216
+ attributes,
1217
+ after_step,
1218
+ list(schema.keys()) if schema else columns,
1219
+ partitioned,
1220
+ key_bucketing_number,
1221
+ partition_cols,
1222
+ time_partitioning_granularity,
1223
+ max_events=max_events,
1224
+ flush_after_seconds=flush_after_seconds,
1225
+ storage_options=storage_options,
1226
+ schema=schema,
1227
+ credentials_prefix=credentials_prefix,
1228
+ )
1229
+
1230
+ def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1231
+ spark_options = get_snowflake_spark_options(self.attributes)
1232
+ spark_options["dbtable"] = self.attributes.get("table")
1233
+ return spark_options
1234
+
1235
+ def purge(self):
1236
+ pass
1237
+
1238
+ def as_df(
1239
+ self,
1240
+ columns=None,
1241
+ df_module=None,
1242
+ entities=None,
1243
+ start_time=None,
1244
+ end_time=None,
1245
+ time_column=None,
1246
+ additional_filters=None,
1247
+ **kwargs,
1248
+ ):
1249
+ raise NotImplementedError()
1250
+
1251
+
1135
1252
  class NoSqlBaseTarget(BaseStoreTarget):
1136
1253
  is_table = True
1137
1254
  is_online = True
@@ -1193,7 +1310,17 @@ class NoSqlBaseTarget(BaseStoreTarget):
1193
1310
  def get_dask_options(self):
1194
1311
  return {"format": "csv"}
1195
1312
 
1196
- def as_df(self, columns=None, df_module=None, **kwargs):
1313
+ def as_df(
1314
+ self,
1315
+ columns=None,
1316
+ df_module=None,
1317
+ entities=None,
1318
+ start_time=None,
1319
+ end_time=None,
1320
+ time_column=None,
1321
+ additional_filters=None,
1322
+ **kwargs,
1323
+ ):
1197
1324
  raise NotImplementedError()
1198
1325
 
1199
1326
  def write_dataframe(
@@ -1203,7 +1330,10 @@ class NoSqlBaseTarget(BaseStoreTarget):
1203
1330
  options = self.get_spark_options(key_column, timestamp_key)
1204
1331
  options.update(kwargs)
1205
1332
  df = self.prepare_spark_df(df)
1206
- write_spark_dataframe_with_options(options, df, "overwrite")
1333
+ write_format = options.pop("format", None)
1334
+ write_spark_dataframe_with_options(
1335
+ options, df, "overwrite", write_format=write_format
1336
+ )
1207
1337
  else:
1208
1338
  # To prevent modification of the original dataframe and make sure
1209
1339
  # that the last event of a key is the one being persisted
@@ -1213,7 +1343,11 @@ class NoSqlBaseTarget(BaseStoreTarget):
1213
1343
  df = df.copy(deep=False)
1214
1344
  access_key = self._get_credential("V3IO_ACCESS_KEY")
1215
1345
 
1216
- _, path_with_container = parse_path(self.get_target_path())
1346
+ store, path_in_store, target_path = self._get_store_and_path()
1347
+ storage_options = store.get_storage_options()
1348
+ access_key = storage_options.get("v3io_access_key", access_key)
1349
+
1350
+ _, path_with_container = parse_path(target_path)
1217
1351
  container, path = split_path(path_with_container)
1218
1352
 
1219
1353
  frames_client = get_frames_client(
@@ -1231,17 +1365,31 @@ class NoSqlTarget(NoSqlBaseTarget):
1231
1365
  def get_table_object(self):
1232
1366
  from storey import Table, V3ioDriver
1233
1367
 
1234
- # TODO use options/cred
1235
- endpoint, uri = parse_path(self.get_target_path())
1368
+ store, path_in_store, target_path = self._get_store_and_path()
1369
+ endpoint, uri = parse_path(target_path)
1370
+ storage_options = store.get_storage_options()
1371
+ access_key = storage_options.get("v3io_access_key")
1372
+
1236
1373
  return Table(
1237
1374
  uri,
1238
- V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
1375
+ V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key),
1239
1376
  flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
1240
1377
  )
1241
1378
 
1242
1379
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1380
+ store, path_in_store, target_path = self._get_store_and_path()
1381
+ storage_options = store.get_storage_options()
1382
+ store_access_key = storage_options.get("v3io_access_key")
1383
+ env_access_key = self._secrets.get(
1384
+ "V3IO_ACCESS_KEY", os.getenv("V3IO_ACCESS_KEY")
1385
+ )
1386
+ if store_access_key and env_access_key and store_access_key != env_access_key:
1387
+ logger.warning(
1388
+ "The Spark v3io connector does not support access_key parameterization."
1389
+ "Spark will disregard the store-provided key."
1390
+ )
1243
1391
  spark_options = {
1244
- "path": store_path_to_spark(self.get_target_path()),
1392
+ "path": store.spark_url + path_in_store,
1245
1393
  "format": "io.iguaz.v3io.spark.sql.kv",
1246
1394
  }
1247
1395
  if isinstance(key_column, list) and len(key_column) >= 1:
@@ -1334,10 +1482,10 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1334
1482
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1335
1483
  endpoint, uri = self._get_server_endpoint()
1336
1484
  parsed_endpoint = urlparse(endpoint)
1337
-
1485
+ store, path_in_store, path = self._get_store_and_path()
1338
1486
  return {
1339
1487
  "key.column": "_spark_object_name",
1340
- "table": "{" + store_path_to_spark(self.get_target_path()),
1488
+ "table": "{" + path_in_store,
1341
1489
  "format": "org.apache.spark.sql.redis",
1342
1490
  "host": parsed_endpoint.hostname,
1343
1491
  "port": parsed_endpoint.port,
@@ -1385,10 +1533,12 @@ class StreamTarget(BaseStoreTarget):
1385
1533
  from storey import V3ioDriver
1386
1534
 
1387
1535
  key_columns = list(key_columns.keys())
1388
- path = self.get_target_path()
1536
+ store, path_in_store, path = self._get_store_and_path()
1389
1537
  if not path:
1390
1538
  raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
1391
1539
  endpoint, uri = parse_path(path)
1540
+ storage_options = store.get_storage_options()
1541
+ access_key = storage_options.get("v3io_access_key")
1392
1542
  column_list = self._get_column_list(
1393
1543
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1394
1544
  )
@@ -1399,16 +1549,47 @@ class StreamTarget(BaseStoreTarget):
1399
1549
  graph_shape="cylinder",
1400
1550
  class_name="storey.StreamTarget",
1401
1551
  columns=column_list,
1402
- storage=V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
1552
+ storage=V3ioDriver(
1553
+ webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
1554
+ ),
1403
1555
  stream_path=uri,
1404
1556
  **self.attributes,
1405
1557
  )
1406
1558
 
1407
- def as_df(self, columns=None, df_module=None, **kwargs):
1559
+ def as_df(
1560
+ self,
1561
+ columns=None,
1562
+ df_module=None,
1563
+ entities=None,
1564
+ start_time=None,
1565
+ end_time=None,
1566
+ time_column=None,
1567
+ additional_filters=None,
1568
+ **kwargs,
1569
+ ):
1408
1570
  raise NotImplementedError()
1409
1571
 
1410
1572
 
1411
1573
  class KafkaTarget(BaseStoreTarget):
1574
+ """
1575
+ Kafka target storage driver, used to write data into kafka topics.
1576
+ example::
1577
+ # define target
1578
+ kafka_target = KafkaTarget(
1579
+ name="kafka", path="my_topic", brokers="localhost:9092"
1580
+ )
1581
+ # ingest
1582
+ stocks_set.ingest(stocks, [kafka_target])
1583
+ :param name: target name
1584
+ :param path: topic name e.g. "my_topic"
1585
+ :param after_step: optional, after what step in the graph to add the target
1586
+ :param columns: optional, which columns from data to write
1587
+ :param bootstrap_servers: Deprecated. Use the brokers parameter instead
1588
+ :param producer_options: additional configurations for kafka producer
1589
+ :param brokers: kafka broker as represented by a host:port pair, or a list of kafka brokers, e.g.
1590
+ "localhost:9092", or ["kafka-broker-1:9092", "kafka-broker-2:9092"]
1591
+ """
1592
+
1412
1593
  kind = TargetTypes.kafka
1413
1594
  is_table = False
1414
1595
  is_online = False
@@ -1421,11 +1602,27 @@ class KafkaTarget(BaseStoreTarget):
1421
1602
  *args,
1422
1603
  bootstrap_servers=None,
1423
1604
  producer_options=None,
1605
+ brokers=None,
1424
1606
  **kwargs,
1425
1607
  ):
1426
1608
  attrs = {}
1427
- if bootstrap_servers is not None:
1428
- attrs["bootstrap_servers"] = bootstrap_servers
1609
+
1610
+ # TODO: Remove this in 1.9.0
1611
+ if bootstrap_servers:
1612
+ if brokers:
1613
+ raise mlrun.errors.MLRunInvalidArgumentError(
1614
+ "KafkaTarget cannot be created with both the 'brokers' parameter and the deprecated "
1615
+ "'bootstrap_servers' parameter. Please use 'brokers' only."
1616
+ )
1617
+ warnings.warn(
1618
+ "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
1619
+ "use 'brokers' instead.",
1620
+ FutureWarning,
1621
+ )
1622
+ brokers = bootstrap_servers
1623
+
1624
+ if brokers:
1625
+ attrs["brokers"] = brokers
1429
1626
  if producer_options is not None:
1430
1627
  attrs["producer_options"] = producer_options
1431
1628
 
@@ -1447,14 +1644,16 @@ class KafkaTarget(BaseStoreTarget):
1447
1644
  if self.path and self.path.startswith("ds://"):
1448
1645
  datastore_profile = datastore_profile_read(self.path)
1449
1646
  attributes = datastore_profile.attributes()
1450
- bootstrap_servers = attributes.pop("bootstrap_servers", None)
1647
+ brokers = attributes.pop(
1648
+ "brokers", attributes.pop("bootstrap_servers", None)
1649
+ )
1451
1650
  topic = datastore_profile.topic
1452
1651
  else:
1453
1652
  attributes = copy(self.attributes)
1454
- bootstrap_servers = attributes.pop("bootstrap_servers", None)
1455
- topic, bootstrap_servers = parse_kafka_url(
1456
- self.get_target_path(), bootstrap_servers
1653
+ brokers = attributes.pop(
1654
+ "brokers", attributes.pop("bootstrap_servers", None)
1457
1655
  )
1656
+ topic, brokers = parse_kafka_url(self.get_target_path(), brokers)
1458
1657
 
1459
1658
  if not topic:
1460
1659
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -1468,11 +1667,21 @@ class KafkaTarget(BaseStoreTarget):
1468
1667
  class_name="storey.KafkaTarget",
1469
1668
  columns=column_list,
1470
1669
  topic=topic,
1471
- bootstrap_servers=bootstrap_servers,
1670
+ brokers=brokers,
1472
1671
  **attributes,
1473
1672
  )
1474
1673
 
1475
- def as_df(self, columns=None, df_module=None, **kwargs):
1674
+ def as_df(
1675
+ self,
1676
+ columns=None,
1677
+ df_module=None,
1678
+ entities=None,
1679
+ start_time=None,
1680
+ end_time=None,
1681
+ time_column=None,
1682
+ additional_filters=None,
1683
+ **kwargs,
1684
+ ):
1476
1685
  raise NotImplementedError()
1477
1686
 
1478
1687
  def purge(self):
@@ -1519,7 +1728,17 @@ class TSDBTarget(BaseStoreTarget):
1519
1728
  **self.attributes,
1520
1729
  )
1521
1730
 
1522
- def as_df(self, columns=None, df_module=None, **kwargs):
1731
+ def as_df(
1732
+ self,
1733
+ columns=None,
1734
+ df_module=None,
1735
+ entities=None,
1736
+ start_time=None,
1737
+ end_time=None,
1738
+ time_column=None,
1739
+ additional_filters=None,
1740
+ **kwargs,
1741
+ ):
1523
1742
  raise NotImplementedError()
1524
1743
 
1525
1744
  def write_dataframe(
@@ -1535,7 +1754,11 @@ class TSDBTarget(BaseStoreTarget):
1535
1754
  key_column = [key_column]
1536
1755
  new_index.extend(key_column)
1537
1756
 
1538
- _, path_with_container = parse_path(self.get_target_path())
1757
+ store, path_in_store, target_path = self._get_store_and_path()
1758
+ storage_options = store.get_storage_options()
1759
+ access_key = storage_options.get("v3io_access_key", access_key)
1760
+
1761
+ _, path_with_container = parse_path(target_path)
1539
1762
  container, path = split_path(path_with_container)
1540
1763
 
1541
1764
  frames_client = get_frames_client(
@@ -1626,11 +1849,16 @@ class DFTarget(BaseStoreTarget):
1626
1849
  self,
1627
1850
  columns=None,
1628
1851
  df_module=None,
1852
+ entities=None,
1629
1853
  start_time=None,
1630
1854
  end_time=None,
1631
1855
  time_column=None,
1856
+ additional_filters=None,
1632
1857
  **kwargs,
1633
1858
  ):
1859
+ mlrun.utils.helpers.additional_filters_warning(
1860
+ additional_filters, self.__class__
1861
+ )
1634
1862
  return select_columns_from_df(
1635
1863
  filter_df_start_end_time(
1636
1864
  self._df,
@@ -1652,24 +1880,24 @@ class SQLTarget(BaseStoreTarget):
1652
1880
  self,
1653
1881
  name: str = "",
1654
1882
  path=None,
1655
- attributes: Dict[str, str] = None,
1883
+ attributes: dict[str, str] = None,
1656
1884
  after_step=None,
1657
1885
  partitioned: bool = False,
1658
1886
  key_bucketing_number: Optional[int] = None,
1659
- partition_cols: Optional[List[str]] = None,
1887
+ partition_cols: Optional[list[str]] = None,
1660
1888
  time_partitioning_granularity: Optional[str] = None,
1661
1889
  max_events: Optional[int] = None,
1662
1890
  flush_after_seconds: Optional[int] = None,
1663
- storage_options: Dict[str, str] = None,
1891
+ storage_options: dict[str, str] = None,
1664
1892
  db_url: str = None,
1665
1893
  table_name: str = None,
1666
- schema: Dict[str, Any] = None,
1894
+ schema: dict[str, Any] = None,
1667
1895
  primary_key_column: str = "",
1668
1896
  if_exists: str = "append",
1669
1897
  create_table: bool = False,
1670
1898
  # create_according_to_data: bool = False,
1671
1899
  varchar_len: int = 50,
1672
- parse_dates: List[str] = None,
1900
+ parse_dates: list[str] = None,
1673
1901
  ):
1674
1902
  """
1675
1903
  Write to SqlDB as output target for a flow.
@@ -1805,6 +2033,7 @@ class SQLTarget(BaseStoreTarget):
1805
2033
  start_time=None,
1806
2034
  end_time=None,
1807
2035
  time_column=None,
2036
+ additional_filters=None,
1808
2037
  **kwargs,
1809
2038
  ):
1810
2039
  try:
@@ -1813,9 +2042,13 @@ class SQLTarget(BaseStoreTarget):
1813
2042
  except (ModuleNotFoundError, ImportError) as exc:
1814
2043
  self._raise_sqlalchemy_import_error(exc)
1815
2044
 
2045
+ mlrun.utils.helpers.additional_filters_warning(
2046
+ additional_filters, self.__class__
2047
+ )
2048
+
1816
2049
  db_path, table_name, _, _, _, _ = self._parse_url()
1817
2050
  engine = sqlalchemy.create_engine(db_path)
1818
- parse_dates: Optional[List[str]] = self.attributes.get("parse_dates")
2051
+ parse_dates: Optional[list[str]] = self.attributes.get("parse_dates")
1819
2052
  with engine.connect() as conn:
1820
2053
  query, parse_dates = _generate_sql_query_with_time_filter(
1821
2054
  table_name=table_name,
@@ -1902,7 +2135,7 @@ class SQLTarget(BaseStoreTarget):
1902
2135
  raise ValueError(f"Table named {table_name} is not exist")
1903
2136
 
1904
2137
  elif not table_exists and create_table:
1905
- TYPE_TO_SQL_TYPE = {
2138
+ type_to_sql_type = {
1906
2139
  int: sqlalchemy.Integer,
1907
2140
  str: sqlalchemy.String(self.attributes.get("varchar_len")),
1908
2141
  datetime.datetime: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
@@ -1915,12 +2148,16 @@ class SQLTarget(BaseStoreTarget):
1915
2148
  # creat new table with the given name
1916
2149
  columns = []
1917
2150
  for col, col_type in self.schema.items():
1918
- col_type = TYPE_TO_SQL_TYPE.get(col_type)
1919
- if col_type is None:
1920
- raise TypeError(f"{col_type} unsupported type")
2151
+ col_type_sql = type_to_sql_type.get(col_type)
2152
+ if col_type_sql is None:
2153
+ raise TypeError(
2154
+ f"'{col_type}' unsupported type for column '{col}'"
2155
+ )
1921
2156
  columns.append(
1922
2157
  sqlalchemy.Column(
1923
- col, col_type, primary_key=(col in primary_key_for_check)
2158
+ col,
2159
+ col_type_sql,
2160
+ primary_key=(col in primary_key_for_check),
1924
2161
  )
1925
2162
  )
1926
2163
 
@@ -1951,10 +2188,11 @@ kind_to_driver = {
1951
2188
  TargetTypes.tsdb: TSDBTarget,
1952
2189
  TargetTypes.custom: CustomTarget,
1953
2190
  TargetTypes.sql: SQLTarget,
2191
+ TargetTypes.snowflake: SnowflakeTarget,
1954
2192
  }
1955
2193
 
1956
2194
 
1957
- def _get_target_path(driver, resource, run_id_mode=False):
2195
+ def _get_target_path(driver, resource, run_id_mode=False, netloc=None, scheme=""):
1958
2196
  """return the default target path given the resource and target kind"""
1959
2197
  kind = driver.kind
1960
2198
  suffix = driver.suffix
@@ -1971,11 +2209,27 @@ def _get_target_path(driver, resource, run_id_mode=False):
1971
2209
  )
1972
2210
  name = resource.metadata.name
1973
2211
  project = resource.metadata.project or mlrun.mlconf.default_project
1974
- data_prefix = get_default_prefix_for_target(kind).format(
2212
+
2213
+ default_kind_name = kind
2214
+ if scheme == "ds":
2215
+ # "dsnosql" is not an actual target like Parquet or Redis; rather, it serves
2216
+ # as a placeholder that can be used in any specified target
2217
+ default_kind_name = "dsnosql"
2218
+ if scheme == "redis" or scheme == "rediss":
2219
+ default_kind_name = TargetTypes.redisnosql
2220
+
2221
+ netloc = netloc or ""
2222
+ data_prefix = get_default_prefix_for_target(default_kind_name).format(
2223
+ ds_profile_name=netloc, # In case of ds profile, set its the name
2224
+ authority=netloc, # In case of redis, replace {authority} with netloc
1975
2225
  project=project,
1976
2226
  kind=kind,
1977
2227
  name=name,
1978
2228
  )
2229
+
2230
+ if scheme == "rediss":
2231
+ data_prefix = data_prefix.replace("redis://", "rediss://", 1)
2232
+
1979
2233
  # todo: handle ver tag changes, may need to copy files?
1980
2234
  if not run_id_mode:
1981
2235
  version = resource.metadata.tag