mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -2
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +21 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +113 -2
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +11 -0
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +224 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +374 -102
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +231 -22
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +864 -228
  77. mlrun/db/nopdb.py +268 -16
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1125 -414
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +207 -180
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +40 -14
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/api_gateway.py +646 -177
  178. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  179. mlrun/runtimes/nuclio/application/application.py +758 -0
  180. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  181. mlrun/runtimes/nuclio/function.py +188 -68
  182. mlrun/runtimes/nuclio/serving.py +57 -60
  183. mlrun/runtimes/pod.py +191 -58
  184. mlrun/runtimes/remotesparkjob.py +11 -8
  185. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  186. mlrun/runtimes/utils.py +40 -73
  187. mlrun/secrets.py +6 -2
  188. mlrun/serving/__init__.py +8 -1
  189. mlrun/serving/remote.py +2 -3
  190. mlrun/serving/routers.py +89 -64
  191. mlrun/serving/server.py +54 -26
  192. mlrun/serving/states.py +187 -56
  193. mlrun/serving/utils.py +19 -11
  194. mlrun/serving/v2_serving.py +136 -63
  195. mlrun/track/tracker.py +2 -1
  196. mlrun/track/trackers/mlflow_tracker.py +5 -0
  197. mlrun/utils/async_http.py +26 -6
  198. mlrun/utils/db.py +18 -0
  199. mlrun/utils/helpers.py +375 -105
  200. mlrun/utils/http.py +2 -2
  201. mlrun/utils/logger.py +75 -9
  202. mlrun/utils/notifications/notification/__init__.py +14 -10
  203. mlrun/utils/notifications/notification/base.py +48 -0
  204. mlrun/utils/notifications/notification/console.py +2 -0
  205. mlrun/utils/notifications/notification/git.py +24 -1
  206. mlrun/utils/notifications/notification/ipython.py +2 -0
  207. mlrun/utils/notifications/notification/slack.py +96 -21
  208. mlrun/utils/notifications/notification/webhook.py +63 -2
  209. mlrun/utils/notifications/notification_pusher.py +146 -16
  210. mlrun/utils/regex.py +9 -0
  211. mlrun/utils/retryer.py +3 -2
  212. mlrun/utils/v3io_clients.py +2 -3
  213. mlrun/utils/version/version.json +2 -2
  214. mlrun-1.7.2.dist-info/METADATA +390 -0
  215. mlrun-1.7.2.dist-info/RECORD +351 -0
  216. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  217. mlrun/feature_store/retrieval/conversion.py +0 -271
  218. mlrun/kfpops.py +0 -868
  219. mlrun/model_monitoring/application.py +0 -310
  220. mlrun/model_monitoring/batch.py +0 -974
  221. mlrun/model_monitoring/controller_handler.py +0 -37
  222. mlrun/model_monitoring/prometheus.py +0 -216
  223. mlrun/model_monitoring/stores/__init__.py +0 -111
  224. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  225. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  226. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  227. mlrun/model_monitoring/stores/models/base.py +0 -84
  228. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  229. mlrun/platforms/other.py +0 -305
  230. mlrun-1.7.0rc5.dist-info/METADATA +0 -269
  231. mlrun-1.7.0rc5.dist-info/RECORD +0 -323
  232. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  233. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  234. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
@@ -17,6 +17,7 @@ import os
17
17
  import random
18
18
  import sys
19
19
  import time
20
+ import warnings
20
21
  from collections import Counter
21
22
  from copy import copy
22
23
  from typing import Any, Optional, Union
@@ -28,6 +29,11 @@ from mergedeep import merge
28
29
  import mlrun
29
30
  import mlrun.utils.helpers
30
31
  from mlrun.config import config
32
+ from mlrun.datastore.snowflake_utils import (
33
+ get_snowflake_password,
34
+ get_snowflake_spark_options,
35
+ )
36
+ from mlrun.datastore.utils import transform_list_filters_to_tuple
31
37
  from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
32
38
  from mlrun.utils import logger, now_date
33
39
  from mlrun.utils.helpers import to_parquet
@@ -41,7 +47,6 @@ from .spark_utils import spark_session_update_hadoop_options
41
47
  from .utils import (
42
48
  _generate_sql_query_with_time_filter,
43
49
  filter_df_start_end_time,
44
- parse_kafka_url,
45
50
  select_columns_from_df,
46
51
  )
47
52
 
@@ -57,6 +62,7 @@ class TargetTypes:
57
62
  dataframe = "dataframe"
58
63
  custom = "custom"
59
64
  sql = "sql"
65
+ snowflake = "snowflake"
60
66
 
61
67
  @staticmethod
62
68
  def all():
@@ -71,6 +77,7 @@ class TargetTypes:
71
77
  TargetTypes.dataframe,
72
78
  TargetTypes.custom,
73
79
  TargetTypes.sql,
80
+ TargetTypes.snowflake,
74
81
  ]
75
82
 
76
83
 
@@ -78,11 +85,14 @@ def generate_target_run_id():
78
85
  return f"{round(time.time() * 1000)}_{random.randint(0, 999)}"
79
86
 
80
87
 
81
- def write_spark_dataframe_with_options(spark_options, df, mode):
88
+ def write_spark_dataframe_with_options(spark_options, df, mode, write_format=None):
82
89
  non_hadoop_spark_options = spark_session_update_hadoop_options(
83
90
  df.sql_ctx.sparkSession, spark_options
84
91
  )
85
- df.write.mode(mode).save(**non_hadoop_spark_options)
92
+ if write_format:
93
+ df.write.format(write_format).mode(mode).save(**non_hadoop_spark_options)
94
+ else:
95
+ df.write.mode(mode).save(**non_hadoop_spark_options)
86
96
 
87
97
 
88
98
  def default_target_names():
@@ -379,6 +389,7 @@ class BaseStoreTarget(DataTargetBase):
379
389
  is_offline = False
380
390
  support_spark = False
381
391
  support_storey = False
392
+ support_pandas = False
382
393
  support_append = False
383
394
 
384
395
  def __init__(
@@ -428,6 +439,12 @@ class BaseStoreTarget(DataTargetBase):
428
439
  self.storage_options = storage_options
429
440
  self.schema = schema or {}
430
441
  self.credentials_prefix = credentials_prefix
442
+ if credentials_prefix:
443
+ warnings.warn(
444
+ "The 'credentials_prefix' parameter is deprecated and will be removed in "
445
+ "1.9.0. Please use datastore profiles instead.",
446
+ FutureWarning,
447
+ )
431
448
 
432
449
  self._target = None
433
450
  self._resource = None
@@ -497,7 +514,10 @@ class BaseStoreTarget(DataTargetBase):
497
514
  options = self.get_spark_options(key_column, timestamp_key)
498
515
  options.update(kwargs)
499
516
  df = self.prepare_spark_df(df, key_column, timestamp_key, options)
500
- write_spark_dataframe_with_options(options, df, "overwrite")
517
+ write_format = options.pop("format", None)
518
+ write_spark_dataframe_with_options(
519
+ options, df, "overwrite", write_format=write_format
520
+ )
501
521
  elif hasattr(df, "dask"):
502
522
  dask_options = self.get_dask_options()
503
523
  store, path_in_store, target_path = self._get_store_and_path()
@@ -524,15 +544,18 @@ class BaseStoreTarget(DataTargetBase):
524
544
  store, path_in_store, target_path = self._get_store_and_path()
525
545
  target_path = generate_path_with_chunk(self, chunk_id, target_path)
526
546
  file_system = store.filesystem
527
- if file_system.protocol == "file":
547
+ if (
548
+ file_system.protocol == "file"
549
+ # fsspec 2023.10.0 changed protocol from "file" to ("file", "local")
550
+ or isinstance(file_system.protocol, (tuple, list))
551
+ and "file" in file_system.protocol
552
+ ):
528
553
  dir = os.path.dirname(target_path)
529
554
  if dir:
530
555
  os.makedirs(dir, exist_ok=True)
531
556
  target_df = df
532
557
  partition_cols = None # single parquet file
533
- if not target_path.endswith(".parquet") and not target_path.endswith(
534
- ".pq"
535
- ): # directory
558
+ if not mlrun.utils.helpers.is_parquet_file(target_path): # directory
536
559
  partition_cols = []
537
560
  if timestamp_key and (
538
561
  self.partitioned or self.time_partitioning_granularity
@@ -641,6 +664,29 @@ class BaseStoreTarget(DataTargetBase):
641
664
  def _target_path_object(self):
642
665
  """return the actual/computed target path"""
643
666
  is_single_file = hasattr(self, "is_single_file") and self.is_single_file()
667
+
668
+ if self._resource and self.path:
669
+ parsed_url = urlparse(self.path)
670
+ # When the URL consists only from scheme and endpoint and no path,
671
+ # make a default path for DS and redis targets.
672
+ # Also ignore KafkaTarget when it uses the ds scheme (no default path for KafkaTarget)
673
+ if (
674
+ not isinstance(self, KafkaTarget)
675
+ and parsed_url.scheme in ["ds", "redis", "rediss"]
676
+ and (not parsed_url.path or parsed_url.path == "/")
677
+ ):
678
+ return TargetPathObject(
679
+ _get_target_path(
680
+ self,
681
+ self._resource,
682
+ self.run_id is not None,
683
+ netloc=parsed_url.netloc,
684
+ scheme=parsed_url.scheme,
685
+ ),
686
+ self.run_id,
687
+ is_single_file,
688
+ )
689
+
644
690
  return self.get_path() or (
645
691
  TargetPathObject(
646
692
  _get_target_path(self, self._resource, self.run_id is not None),
@@ -657,6 +703,7 @@ class BaseStoreTarget(DataTargetBase):
657
703
  self.kind, self.name, self.get_target_templated_path()
658
704
  )
659
705
  target = self._target
706
+ target.attributes = self.attributes
660
707
  target.run_id = self.run_id
661
708
  target.status = status or target.status or "created"
662
709
  target.updated = now_date().isoformat()
@@ -685,11 +732,25 @@ class BaseStoreTarget(DataTargetBase):
685
732
  timestamp_key=None,
686
733
  featureset_status=None,
687
734
  ):
735
+ if not self.support_storey:
736
+ raise mlrun.errors.MLRunRuntimeError(
737
+ f"{type(self).__name__} does not support storey engine"
738
+ )
688
739
  raise NotImplementedError()
689
740
 
690
741
  def purge(self):
742
+ """
743
+ Delete the files of the target.
744
+
745
+ Do not use this function directly from the sdk. Use FeatureSet.purge_targets.
746
+ """
691
747
  store, path_in_store, target_path = self._get_store_and_path()
692
- store.rm(target_path, recursive=True)
748
+ if path_in_store not in ["", "/"]:
749
+ store.rm(path_in_store, recursive=True)
750
+ else:
751
+ raise mlrun.errors.MLRunInvalidArgumentError(
752
+ "Unable to delete target. Please Use purge_targets from FeatureSet object."
753
+ )
693
754
 
694
755
  def as_df(
695
756
  self,
@@ -699,9 +760,15 @@ class BaseStoreTarget(DataTargetBase):
699
760
  start_time=None,
700
761
  end_time=None,
701
762
  time_column=None,
763
+ additional_filters=None,
702
764
  **kwargs,
703
765
  ):
704
766
  """return the target data as dataframe"""
767
+ if not self.support_pandas:
768
+ raise NotImplementedError()
769
+ mlrun.utils.helpers.additional_filters_warning(
770
+ additional_filters, self.__class__
771
+ )
705
772
  return mlrun.get_dataitem(self.get_target_path()).as_df(
706
773
  columns=columns,
707
774
  df_module=df_module,
@@ -713,14 +780,22 @@ class BaseStoreTarget(DataTargetBase):
713
780
 
714
781
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
715
782
  # options used in spark.read.load(**options)
783
+ if not self.support_spark:
784
+ raise mlrun.errors.MLRunRuntimeError(
785
+ f"{type(self).__name__} does not support spark engine"
786
+ )
716
787
  raise NotImplementedError()
717
788
 
718
- def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options={}):
789
+ def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
719
790
  return df
720
791
 
721
792
  def get_dask_options(self):
722
793
  raise NotImplementedError()
723
794
 
795
+ @property
796
+ def source_spark_attributes(self) -> dict:
797
+ return {}
798
+
724
799
 
725
800
  class ParquetTarget(BaseStoreTarget):
726
801
  """Parquet target storage driver, used to materialize feature set/vector data into parquet files.
@@ -752,6 +827,7 @@ class ParquetTarget(BaseStoreTarget):
752
827
  support_spark = True
753
828
  support_storey = True
754
829
  support_dask = True
830
+ support_pandas = True
755
831
  support_append = True
756
832
 
757
833
  def __init__(
@@ -857,10 +933,9 @@ class ParquetTarget(BaseStoreTarget):
857
933
  if time_unit == time_partitioning_granularity:
858
934
  break
859
935
 
860
- if (
861
- not self.partitioned
862
- and not self.get_target_path().endswith(".parquet")
863
- and not self.get_target_path().endswith(".pq")
936
+ target_path = self.get_target_path()
937
+ if not self.partitioned and not mlrun.utils.helpers.is_parquet_file(
938
+ target_path
864
939
  ):
865
940
  partition_cols = []
866
941
 
@@ -868,25 +943,16 @@ class ParquetTarget(BaseStoreTarget):
868
943
  for key_column in key_columns:
869
944
  tuple_key_columns.append((key_column.name, key_column.value_type))
870
945
 
871
- store, path_in_store, target_path = self._get_store_and_path()
872
-
873
- storage_options = store.get_storage_options()
874
- if storage_options and self.storage_options:
875
- storage_options = merge(storage_options, self.storage_options)
876
- else:
877
- storage_options = storage_options or self.storage_options
878
-
879
946
  step = graph.add_step(
880
947
  name=self.name or "ParquetTarget",
881
948
  after=after,
882
949
  graph_shape="cylinder",
883
- class_name="storey.ParquetTarget",
950
+ class_name="mlrun.datastore.storeytargets.ParquetStoreyTarget",
884
951
  path=target_path,
885
952
  columns=column_list,
886
953
  index_cols=tuple_key_columns,
887
954
  partition_cols=partition_cols,
888
955
  time_field=timestamp_key,
889
- storage_options=storage_options,
890
956
  max_events=self.max_events,
891
957
  flush_after_seconds=self.flush_after_seconds,
892
958
  update_last_written=featureset_status.update_last_written_for_target,
@@ -946,6 +1012,7 @@ class ParquetTarget(BaseStoreTarget):
946
1012
  start_time=None,
947
1013
  end_time=None,
948
1014
  time_column=None,
1015
+ additional_filters=None,
949
1016
  **kwargs,
950
1017
  ):
951
1018
  """return the target data as dataframe"""
@@ -956,6 +1023,7 @@ class ParquetTarget(BaseStoreTarget):
956
1023
  start_time=start_time,
957
1024
  end_time=end_time,
958
1025
  time_column=time_column,
1026
+ additional_filters=transform_list_filters_to_tuple(additional_filters),
959
1027
  **kwargs,
960
1028
  )
961
1029
  if not columns:
@@ -977,9 +1045,7 @@ class ParquetTarget(BaseStoreTarget):
977
1045
  return result
978
1046
 
979
1047
  def is_single_file(self):
980
- if self.path:
981
- return self.path.endswith(".parquet") or self.path.endswith(".pq")
982
- return False
1048
+ return mlrun.utils.helpers.is_parquet_file(self.path)
983
1049
 
984
1050
  def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
985
1051
  # If partitioning by time, add the necessary columns
@@ -1019,6 +1085,7 @@ class CSVTarget(BaseStoreTarget):
1019
1085
  is_offline = True
1020
1086
  support_spark = True
1021
1087
  support_storey = True
1088
+ support_pandas = True
1022
1089
 
1023
1090
  @staticmethod
1024
1091
  def _write_dataframe(df, storage_options, target_path, partition_cols, **kwargs):
@@ -1040,17 +1107,16 @@ class CSVTarget(BaseStoreTarget):
1040
1107
  column_list = self._get_column_list(
1041
1108
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1042
1109
  )
1043
- store, path_in_store, target_path = self._get_store_and_path()
1110
+ target_path = self.get_target_path()
1044
1111
  graph.add_step(
1045
1112
  name=self.name or "CSVTarget",
1046
1113
  after=after,
1047
1114
  graph_shape="cylinder",
1048
- class_name="storey.CSVTarget",
1115
+ class_name="mlrun.datastore.storeytargets.CSVStoreyTarget",
1049
1116
  path=target_path,
1050
1117
  columns=column_list,
1051
1118
  header=True,
1052
1119
  index_cols=key_columns,
1053
- storage_options=store.get_storage_options(),
1054
1120
  **self.attributes,
1055
1121
  )
1056
1122
 
@@ -1070,7 +1136,8 @@ class CSVTarget(BaseStoreTarget):
1070
1136
  import pyspark.sql.functions as funcs
1071
1137
 
1072
1138
  for col_name, col_type in df.dtypes:
1073
- if col_type == "timestamp":
1139
+ # covers TimestampType and TimestampNTZType, which was added in PySpark 3.4.0
1140
+ if col_type.startswith("timestamp"):
1074
1141
  # df.write.csv saves timestamps with millisecond precision, but we want microsecond precision
1075
1142
  # for compatibility with storey.
1076
1143
  df = df.withColumn(
@@ -1086,8 +1153,12 @@ class CSVTarget(BaseStoreTarget):
1086
1153
  start_time=None,
1087
1154
  end_time=None,
1088
1155
  time_column=None,
1156
+ additional_filters=None,
1089
1157
  **kwargs,
1090
1158
  ):
1159
+ mlrun.utils.helpers.additional_filters_warning(
1160
+ additional_filters, self.__class__
1161
+ )
1091
1162
  df = super().as_df(
1092
1163
  columns=columns,
1093
1164
  df_module=df_module,
@@ -1108,6 +1179,134 @@ class CSVTarget(BaseStoreTarget):
1108
1179
  return True
1109
1180
 
1110
1181
 
1182
+ class SnowflakeTarget(BaseStoreTarget):
1183
+ """
1184
+ :param attributes: A dictionary of attributes for Snowflake connection; will be overridden by database parameters
1185
+ if they exist.
1186
+ :param url: Snowflake hostname, in the format: <account_name>.<region>.snowflakecomputing.com
1187
+ :param user: Snowflake user for login
1188
+ :param db_schema: Database schema
1189
+ :param database: Database name
1190
+ :param warehouse: Snowflake warehouse name
1191
+ :param table_name: Snowflake table name
1192
+ """
1193
+
1194
+ support_spark = True
1195
+ support_append = True
1196
+ is_offline = True
1197
+ kind = TargetTypes.snowflake
1198
+
1199
+ def __init__(
1200
+ self,
1201
+ name: str = "",
1202
+ path=None,
1203
+ attributes: dict[str, str] = None,
1204
+ after_step=None,
1205
+ columns=None,
1206
+ partitioned: bool = False,
1207
+ key_bucketing_number: Optional[int] = None,
1208
+ partition_cols: Optional[list[str]] = None,
1209
+ time_partitioning_granularity: Optional[str] = None,
1210
+ max_events: Optional[int] = None,
1211
+ flush_after_seconds: Optional[int] = None,
1212
+ storage_options: dict[str, str] = None,
1213
+ schema: dict[str, Any] = None,
1214
+ credentials_prefix=None,
1215
+ url: str = None,
1216
+ user: str = None,
1217
+ db_schema: str = None,
1218
+ database: str = None,
1219
+ warehouse: str = None,
1220
+ table_name: str = None,
1221
+ ):
1222
+ attributes = attributes or {}
1223
+ if url:
1224
+ attributes["url"] = url
1225
+ if user:
1226
+ attributes["user"] = user
1227
+ if database:
1228
+ attributes["database"] = database
1229
+ if db_schema:
1230
+ attributes["db_schema"] = db_schema
1231
+ if warehouse:
1232
+ attributes["warehouse"] = warehouse
1233
+ if table_name:
1234
+ attributes["table"] = table_name
1235
+
1236
+ super().__init__(
1237
+ name,
1238
+ path,
1239
+ attributes,
1240
+ after_step,
1241
+ list(schema.keys()) if schema else columns,
1242
+ partitioned,
1243
+ key_bucketing_number,
1244
+ partition_cols,
1245
+ time_partitioning_granularity,
1246
+ max_events=max_events,
1247
+ flush_after_seconds=flush_after_seconds,
1248
+ storage_options=storage_options,
1249
+ schema=schema,
1250
+ credentials_prefix=credentials_prefix,
1251
+ )
1252
+
1253
+ def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1254
+ spark_options = get_snowflake_spark_options(self.attributes)
1255
+ spark_options["dbtable"] = self.attributes.get("table")
1256
+ return spark_options
1257
+
1258
+ def purge(self):
1259
+ import snowflake.connector
1260
+
1261
+ missing = [
1262
+ key
1263
+ for key in ["database", "db_schema", "table", "url", "user", "warehouse"]
1264
+ if self.attributes.get(key) is None
1265
+ ]
1266
+ if missing:
1267
+ raise mlrun.errors.MLRunRuntimeError(
1268
+ f"Can't purge Snowflake target, "
1269
+ f"some attributes are missing: {', '.join(missing)}"
1270
+ )
1271
+ account = self.attributes["url"].replace(".snowflakecomputing.com", "")
1272
+
1273
+ with snowflake.connector.connect(
1274
+ account=account,
1275
+ user=self.attributes["user"],
1276
+ password=get_snowflake_password(),
1277
+ warehouse=self.attributes["warehouse"],
1278
+ ) as snowflake_connector:
1279
+ drop_statement = (
1280
+ f"DROP TABLE IF EXISTS {self.attributes['database']}.{self.attributes['db_schema']}"
1281
+ f".{self.attributes['table']}"
1282
+ )
1283
+ snowflake_connector.execute_string(drop_statement)
1284
+
1285
+ def as_df(
1286
+ self,
1287
+ columns=None,
1288
+ df_module=None,
1289
+ entities=None,
1290
+ start_time=None,
1291
+ end_time=None,
1292
+ time_column=None,
1293
+ additional_filters=None,
1294
+ **kwargs,
1295
+ ):
1296
+ raise mlrun.errors.MLRunRuntimeError(
1297
+ f"{type(self).__name__} does not support pandas engine"
1298
+ )
1299
+
1300
+ @property
1301
+ def source_spark_attributes(self) -> dict:
1302
+ keys = ["url", "user", "database", "db_schema", "warehouse"]
1303
+ attributes = self.attributes or {}
1304
+ snowflake_dict = {key: attributes.get(key) for key in keys}
1305
+ table = attributes.get("table")
1306
+ snowflake_dict["query"] = f"SELECT * from {table}" if table else None
1307
+ return snowflake_dict
1308
+
1309
+
1111
1310
  class NoSqlBaseTarget(BaseStoreTarget):
1112
1311
  is_table = True
1113
1312
  is_online = True
@@ -1132,6 +1331,19 @@ class NoSqlBaseTarget(BaseStoreTarget):
1132
1331
  timestamp_key=None,
1133
1332
  featureset_status=None,
1134
1333
  ):
1334
+ table, column_list = self._get_table_and_columns(features, key_columns)
1335
+
1336
+ graph.add_step(
1337
+ name=self.name or self.writer_step_name,
1338
+ after=after,
1339
+ graph_shape="cylinder",
1340
+ class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
1341
+ columns=column_list,
1342
+ table=table,
1343
+ **self.attributes,
1344
+ )
1345
+
1346
+ def _get_table_and_columns(self, features, key_columns):
1135
1347
  key_columns = list(key_columns.keys())
1136
1348
  table = self._resource.uri
1137
1349
  column_list = self._get_column_list(
@@ -1150,15 +1362,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
1150
1362
  col for col in column_list if col[0] not in aggregate_features
1151
1363
  ]
1152
1364
 
1153
- graph.add_step(
1154
- name=self.name or self.writer_step_name,
1155
- after=after,
1156
- graph_shape="cylinder",
1157
- class_name="storey.NoSqlTarget",
1158
- columns=column_list,
1159
- table=table,
1160
- **self.attributes,
1161
- )
1365
+ return table, column_list
1162
1366
 
1163
1367
  def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
1164
1368
  raise NotImplementedError()
@@ -1169,9 +1373,6 @@ class NoSqlBaseTarget(BaseStoreTarget):
1169
1373
  def get_dask_options(self):
1170
1374
  return {"format": "csv"}
1171
1375
 
1172
- def as_df(self, columns=None, df_module=None, **kwargs):
1173
- raise NotImplementedError()
1174
-
1175
1376
  def write_dataframe(
1176
1377
  self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
1177
1378
  ):
@@ -1179,7 +1380,10 @@ class NoSqlBaseTarget(BaseStoreTarget):
1179
1380
  options = self.get_spark_options(key_column, timestamp_key)
1180
1381
  options.update(kwargs)
1181
1382
  df = self.prepare_spark_df(df)
1182
- write_spark_dataframe_with_options(options, df, "overwrite")
1383
+ write_format = options.pop("format", None)
1384
+ write_spark_dataframe_with_options(
1385
+ options, df, "overwrite", write_format=write_format
1386
+ )
1183
1387
  else:
1184
1388
  # To prevent modification of the original dataframe and make sure
1185
1389
  # that the last event of a key is the one being persisted
@@ -1281,11 +1485,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1281
1485
  support_spark = True
1282
1486
  writer_step_name = "RedisNoSqlTarget"
1283
1487
 
1284
- # Fetch server url from the RedisNoSqlTarget::__init__() 'path' parameter.
1285
- # If not set fetch it from 'mlrun.mlconf.redis.url' (MLRUN_REDIS__URL environment variable).
1286
- # Then look for username and password at REDIS_xxx secrets
1287
- def _get_server_endpoint(self):
1288
- endpoint, uri = parse_path(self.get_target_path())
1488
+ @staticmethod
1489
+ def get_server_endpoint(path, credentials_prefix=None):
1490
+ endpoint, uri = parse_path(path)
1289
1491
  endpoint = endpoint or mlrun.mlconf.redis.url
1290
1492
  if endpoint.startswith("ds://"):
1291
1493
  datastore_profile = datastore_profile_read(endpoint)
@@ -1302,8 +1504,15 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1302
1504
  raise mlrun.errors.MLRunInvalidArgumentError(
1303
1505
  "Provide Redis username and password only via secrets"
1304
1506
  )
1305
- user = self._get_credential("REDIS_USER", "")
1306
- password = self._get_credential("REDIS_PASSWORD", "")
1507
+ credentials_prefix = credentials_prefix or mlrun.get_secret_or_env(
1508
+ key="CREDENTIALS_PREFIX"
1509
+ )
1510
+ user = mlrun.get_secret_or_env(
1511
+ "REDIS_USER", default="", prefix=credentials_prefix
1512
+ )
1513
+ password = mlrun.get_secret_or_env(
1514
+ "REDIS_PASSWORD", default="", prefix=credentials_prefix
1515
+ )
1307
1516
  host = parsed_endpoint.hostname
1308
1517
  port = parsed_endpoint.port if parsed_endpoint.port else "6379"
1309
1518
  scheme = parsed_endpoint.scheme
@@ -1317,7 +1526,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1317
1526
  from storey import Table
1318
1527
  from storey.redis_driver import RedisDriver
1319
1528
 
1320
- endpoint, uri = self._get_server_endpoint()
1529
+ endpoint, uri = self.get_server_endpoint(
1530
+ self.get_target_path(), self.credentials_prefix
1531
+ )
1321
1532
 
1322
1533
  return Table(
1323
1534
  uri,
@@ -1326,7 +1537,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1326
1537
  )
1327
1538
 
1328
1539
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1329
- endpoint, uri = self._get_server_endpoint()
1540
+ endpoint, uri = self.get_server_endpoint(
1541
+ self.get_target_path(), self.credentials_prefix
1542
+ )
1330
1543
  parsed_endpoint = urlparse(endpoint)
1331
1544
  store, path_in_store, path = self._get_store_and_path()
1332
1545
  return {
@@ -1358,6 +1571,29 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1358
1571
 
1359
1572
  return df
1360
1573
 
1574
+ def add_writer_step(
1575
+ self,
1576
+ graph,
1577
+ after,
1578
+ features,
1579
+ key_columns=None,
1580
+ timestamp_key=None,
1581
+ featureset_status=None,
1582
+ ):
1583
+ table, column_list = self._get_table_and_columns(features, key_columns)
1584
+
1585
+ graph.add_step(
1586
+ path=self.get_target_path(),
1587
+ name=self.name or self.writer_step_name,
1588
+ after=after,
1589
+ graph_shape="cylinder",
1590
+ class_name="mlrun.datastore.storeytargets.RedisNoSqlStoreyTarget",
1591
+ columns=column_list,
1592
+ table=table,
1593
+ credentials_prefix=self.credentials_prefix,
1594
+ **self.attributes,
1595
+ )
1596
+
1361
1597
 
1362
1598
  class StreamTarget(BaseStoreTarget):
1363
1599
  kind = TargetTypes.stream
@@ -1376,37 +1612,46 @@ class StreamTarget(BaseStoreTarget):
1376
1612
  timestamp_key=None,
1377
1613
  featureset_status=None,
1378
1614
  ):
1379
- from storey import V3ioDriver
1380
-
1381
1615
  key_columns = list(key_columns.keys())
1382
- store, path_in_store, path = self._get_store_and_path()
1383
- if not path:
1384
- raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
1385
- endpoint, uri = parse_path(path)
1386
- storage_options = store.get_storage_options()
1387
- access_key = storage_options.get("v3io_access_key")
1616
+
1388
1617
  column_list = self._get_column_list(
1389
1618
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1390
1619
  )
1620
+ stream_path = self.get_target_path()
1621
+ if not stream_path:
1622
+ raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
1391
1623
 
1392
1624
  graph.add_step(
1393
1625
  name=self.name or "StreamTarget",
1394
1626
  after=after,
1395
1627
  graph_shape="cylinder",
1396
- class_name="storey.StreamTarget",
1628
+ class_name="mlrun.datastore.storeytargets.StreamStoreyTarget",
1397
1629
  columns=column_list,
1398
- storage=V3ioDriver(
1399
- webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key
1400
- ),
1401
- stream_path=uri,
1630
+ stream_path=stream_path,
1402
1631
  **self.attributes,
1403
1632
  )
1404
1633
 
1405
- def as_df(self, columns=None, df_module=None, **kwargs):
1406
- raise NotImplementedError()
1407
-
1408
1634
 
1409
1635
  class KafkaTarget(BaseStoreTarget):
1636
+ """
1637
+ Kafka target storage driver, used to write data into kafka topics.
1638
+ example::
1639
+ # define target
1640
+ kafka_target = KafkaTarget(
1641
+ name="kafka", path="my_topic", brokers="localhost:9092"
1642
+ )
1643
+ # ingest
1644
+ stocks_set.ingest(stocks, [kafka_target])
1645
+ :param name: target name
1646
+ :param path: topic name e.g. "my_topic"
1647
+ :param after_step: optional, after what step in the graph to add the target
1648
+ :param columns: optional, which columns from data to write
1649
+ :param bootstrap_servers: Deprecated. Use the brokers parameter instead
1650
+ :param producer_options: additional configurations for kafka producer
1651
+ :param brokers: kafka broker as represented by a host:port pair, or a list of kafka brokers, e.g.
1652
+ "localhost:9092", or ["kafka-broker-1:9092", "kafka-broker-2:9092"]
1653
+ """
1654
+
1410
1655
  kind = TargetTypes.kafka
1411
1656
  is_table = False
1412
1657
  is_online = False
@@ -1419,11 +1664,27 @@ class KafkaTarget(BaseStoreTarget):
1419
1664
  *args,
1420
1665
  bootstrap_servers=None,
1421
1666
  producer_options=None,
1667
+ brokers=None,
1422
1668
  **kwargs,
1423
1669
  ):
1424
1670
  attrs = {}
1425
- if bootstrap_servers is not None:
1426
- attrs["bootstrap_servers"] = bootstrap_servers
1671
+
1672
+ # TODO: Remove this in 1.9.0
1673
+ if bootstrap_servers:
1674
+ if brokers:
1675
+ raise mlrun.errors.MLRunInvalidArgumentError(
1676
+ "KafkaTarget cannot be created with both the 'brokers' parameter and the deprecated "
1677
+ "'bootstrap_servers' parameter. Please use 'brokers' only."
1678
+ )
1679
+ warnings.warn(
1680
+ "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
1681
+ "use 'brokers' instead.",
1682
+ FutureWarning,
1683
+ )
1684
+ brokers = bootstrap_servers
1685
+
1686
+ if brokers:
1687
+ attrs["brokers"] = brokers
1427
1688
  if producer_options is not None:
1428
1689
  attrs["producer_options"] = producer_options
1429
1690
 
@@ -1442,37 +1703,21 @@ class KafkaTarget(BaseStoreTarget):
1442
1703
  column_list = self._get_column_list(
1443
1704
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1444
1705
  )
1445
- if self.path and self.path.startswith("ds://"):
1446
- datastore_profile = datastore_profile_read(self.path)
1447
- attributes = datastore_profile.attributes()
1448
- bootstrap_servers = attributes.pop("bootstrap_servers", None)
1449
- topic = datastore_profile.topic
1450
- else:
1451
- attributes = copy(self.attributes)
1452
- bootstrap_servers = attributes.pop("bootstrap_servers", None)
1453
- topic, bootstrap_servers = parse_kafka_url(
1454
- self.get_target_path(), bootstrap_servers
1455
- )
1706
+ path = self.get_target_path()
1456
1707
 
1457
- if not topic:
1458
- raise mlrun.errors.MLRunInvalidArgumentError(
1459
- "KafkaTarget requires a path (topic)"
1460
- )
1708
+ if not path:
1709
+ raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a path")
1461
1710
 
1462
1711
  graph.add_step(
1463
1712
  name=self.name or "KafkaTarget",
1464
1713
  after=after,
1465
1714
  graph_shape="cylinder",
1466
- class_name="storey.KafkaTarget",
1715
+ class_name="mlrun.datastore.storeytargets.KafkaStoreyTarget",
1467
1716
  columns=column_list,
1468
- topic=topic,
1469
- bootstrap_servers=bootstrap_servers,
1470
- **attributes,
1717
+ path=path,
1718
+ attributes=self.attributes,
1471
1719
  )
1472
1720
 
1473
- def as_df(self, columns=None, df_module=None, **kwargs):
1474
- raise NotImplementedError()
1475
-
1476
1721
  def purge(self):
1477
1722
  pass
1478
1723
 
@@ -1507,7 +1752,7 @@ class TSDBTarget(BaseStoreTarget):
1507
1752
 
1508
1753
  graph.add_step(
1509
1754
  name=self.name or "TSDBTarget",
1510
- class_name="storey.TSDBTarget",
1755
+ class_name="mlrun.datastore.storeytargets.TSDBStoreyTarget",
1511
1756
  after=after,
1512
1757
  graph_shape="cylinder",
1513
1758
  path=uri,
@@ -1517,9 +1762,6 @@ class TSDBTarget(BaseStoreTarget):
1517
1762
  **self.attributes,
1518
1763
  )
1519
1764
 
1520
- def as_df(self, columns=None, df_module=None, **kwargs):
1521
- raise NotImplementedError()
1522
-
1523
1765
  def write_dataframe(
1524
1766
  self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
1525
1767
  ):
@@ -1557,6 +1799,7 @@ class CustomTarget(BaseStoreTarget):
1557
1799
  is_online = False
1558
1800
  support_spark = False
1559
1801
  support_storey = True
1802
+ support_pandas = True
1560
1803
 
1561
1804
  def __init__(
1562
1805
  self,
@@ -1592,6 +1835,7 @@ class CustomTarget(BaseStoreTarget):
1592
1835
  class DFTarget(BaseStoreTarget):
1593
1836
  kind = TargetTypes.dataframe
1594
1837
  support_storey = True
1838
+ support_pandas = True
1595
1839
 
1596
1840
  def __init__(self, *args, name="dataframe", **kwargs):
1597
1841
  self._df = None
@@ -1628,11 +1872,16 @@ class DFTarget(BaseStoreTarget):
1628
1872
  self,
1629
1873
  columns=None,
1630
1874
  df_module=None,
1875
+ entities=None,
1631
1876
  start_time=None,
1632
1877
  end_time=None,
1633
1878
  time_column=None,
1879
+ additional_filters=None,
1634
1880
  **kwargs,
1635
1881
  ):
1882
+ mlrun.utils.helpers.additional_filters_warning(
1883
+ additional_filters, self.__class__
1884
+ )
1636
1885
  return select_columns_from_df(
1637
1886
  filter_df_start_end_time(
1638
1887
  self._df,
@@ -1649,6 +1898,7 @@ class SQLTarget(BaseStoreTarget):
1649
1898
  is_online = True
1650
1899
  support_spark = False
1651
1900
  support_storey = True
1901
+ support_pandas = True
1652
1902
 
1653
1903
  def __init__(
1654
1904
  self,
@@ -1791,7 +2041,7 @@ class SQLTarget(BaseStoreTarget):
1791
2041
  name=self.name or "SqlTarget",
1792
2042
  after=after,
1793
2043
  graph_shape="cylinder",
1794
- class_name="storey.NoSqlTarget",
2044
+ class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
1795
2045
  columns=column_list,
1796
2046
  header=True,
1797
2047
  table=table,
@@ -1807,6 +2057,7 @@ class SQLTarget(BaseStoreTarget):
1807
2057
  start_time=None,
1808
2058
  end_time=None,
1809
2059
  time_column=None,
2060
+ additional_filters=None,
1810
2061
  **kwargs,
1811
2062
  ):
1812
2063
  try:
@@ -1815,6 +2066,10 @@ class SQLTarget(BaseStoreTarget):
1815
2066
  except (ModuleNotFoundError, ImportError) as exc:
1816
2067
  self._raise_sqlalchemy_import_error(exc)
1817
2068
 
2069
+ mlrun.utils.helpers.additional_filters_warning(
2070
+ additional_filters, self.__class__
2071
+ )
2072
+
1818
2073
  db_path, table_name, _, _, _, _ = self._parse_url()
1819
2074
  engine = sqlalchemy.create_engine(db_path)
1820
2075
  parse_dates: Optional[list[str]] = self.attributes.get("parse_dates")
@@ -1904,7 +2159,7 @@ class SQLTarget(BaseStoreTarget):
1904
2159
  raise ValueError(f"Table named {table_name} is not exist")
1905
2160
 
1906
2161
  elif not table_exists and create_table:
1907
- TYPE_TO_SQL_TYPE = {
2162
+ type_to_sql_type = {
1908
2163
  int: sqlalchemy.Integer,
1909
2164
  str: sqlalchemy.String(self.attributes.get("varchar_len")),
1910
2165
  datetime.datetime: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
@@ -1917,7 +2172,7 @@ class SQLTarget(BaseStoreTarget):
1917
2172
  # creat new table with the given name
1918
2173
  columns = []
1919
2174
  for col, col_type in self.schema.items():
1920
- col_type_sql = TYPE_TO_SQL_TYPE.get(col_type)
2175
+ col_type_sql = type_to_sql_type.get(col_type)
1921
2176
  if col_type_sql is None:
1922
2177
  raise TypeError(
1923
2178
  f"'{col_type}' unsupported type for column '{col}'"
@@ -1957,10 +2212,11 @@ kind_to_driver = {
1957
2212
  TargetTypes.tsdb: TSDBTarget,
1958
2213
  TargetTypes.custom: CustomTarget,
1959
2214
  TargetTypes.sql: SQLTarget,
2215
+ TargetTypes.snowflake: SnowflakeTarget,
1960
2216
  }
1961
2217
 
1962
2218
 
1963
- def _get_target_path(driver, resource, run_id_mode=False):
2219
+ def _get_target_path(driver, resource, run_id_mode=False, netloc=None, scheme=""):
1964
2220
  """return the default target path given the resource and target kind"""
1965
2221
  kind = driver.kind
1966
2222
  suffix = driver.suffix
@@ -1977,11 +2233,27 @@ def _get_target_path(driver, resource, run_id_mode=False):
1977
2233
  )
1978
2234
  name = resource.metadata.name
1979
2235
  project = resource.metadata.project or mlrun.mlconf.default_project
1980
- data_prefix = get_default_prefix_for_target(kind).format(
2236
+
2237
+ default_kind_name = kind
2238
+ if scheme == "ds":
2239
+ # "dsnosql" is not an actual target like Parquet or Redis; rather, it serves
2240
+ # as a placeholder that can be used in any specified target
2241
+ default_kind_name = "dsnosql"
2242
+ if scheme == "redis" or scheme == "rediss":
2243
+ default_kind_name = TargetTypes.redisnosql
2244
+
2245
+ netloc = netloc or ""
2246
+ data_prefix = get_default_prefix_for_target(default_kind_name).format(
2247
+ ds_profile_name=netloc, # In case of ds profile, set its the name
2248
+ authority=netloc, # In case of redis, replace {authority} with netloc
1981
2249
  project=project,
1982
2250
  kind=kind,
1983
2251
  name=name,
1984
2252
  )
2253
+
2254
+ if scheme == "rediss":
2255
+ data_prefix = data_prefix.replace("redis://", "rediss://", 1)
2256
+
1985
2257
  # todo: handle ver tag changes, may need to copy files?
1986
2258
  if not run_id_mode:
1987
2259
  version = resource.metadata.tag