mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +25 -111
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +38 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +41 -47
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +68 -0
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
  15. mlrun/common/formatters/base.py +78 -0
  16. mlrun/common/formatters/function.py +41 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +25 -4
  21. mlrun/common/schemas/alert.py +203 -0
  22. mlrun/common/schemas/api_gateway.py +148 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +8 -2
  25. mlrun/common/schemas/client_spec.py +2 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/hub.py +7 -9
  29. mlrun/common/schemas/model_monitoring/__init__.py +19 -3
  30. mlrun/common/schemas/model_monitoring/constants.py +96 -26
  31. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  32. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  33. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  34. mlrun/common/schemas/pipeline.py +0 -9
  35. mlrun/common/schemas/project.py +22 -21
  36. mlrun/common/types.py +7 -1
  37. mlrun/config.py +87 -19
  38. mlrun/data_types/data_types.py +4 -0
  39. mlrun/data_types/to_pandas.py +9 -9
  40. mlrun/datastore/__init__.py +5 -8
  41. mlrun/datastore/alibaba_oss.py +130 -0
  42. mlrun/datastore/azure_blob.py +4 -5
  43. mlrun/datastore/base.py +69 -30
  44. mlrun/datastore/datastore.py +10 -2
  45. mlrun/datastore/datastore_profile.py +90 -6
  46. mlrun/datastore/google_cloud_storage.py +1 -1
  47. mlrun/datastore/hdfs.py +5 -0
  48. mlrun/datastore/inmem.py +2 -2
  49. mlrun/datastore/redis.py +2 -2
  50. mlrun/datastore/s3.py +5 -0
  51. mlrun/datastore/snowflake_utils.py +43 -0
  52. mlrun/datastore/sources.py +172 -44
  53. mlrun/datastore/store_resources.py +7 -7
  54. mlrun/datastore/targets.py +285 -41
  55. mlrun/datastore/utils.py +68 -5
  56. mlrun/datastore/v3io.py +27 -50
  57. mlrun/db/auth_utils.py +152 -0
  58. mlrun/db/base.py +149 -14
  59. mlrun/db/factory.py +1 -1
  60. mlrun/db/httpdb.py +608 -178
  61. mlrun/db/nopdb.py +191 -7
  62. mlrun/errors.py +11 -0
  63. mlrun/execution.py +37 -20
  64. mlrun/feature_store/__init__.py +0 -2
  65. mlrun/feature_store/api.py +21 -52
  66. mlrun/feature_store/feature_set.py +48 -23
  67. mlrun/feature_store/feature_vector.py +2 -1
  68. mlrun/feature_store/ingestion.py +7 -6
  69. mlrun/feature_store/retrieval/base.py +9 -4
  70. mlrun/feature_store/retrieval/conversion.py +9 -9
  71. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  72. mlrun/feature_store/retrieval/job.py +9 -3
  73. mlrun/feature_store/retrieval/local_merger.py +2 -0
  74. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  75. mlrun/feature_store/steps.py +30 -19
  76. mlrun/features.py +4 -13
  77. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  78. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  79. mlrun/frameworks/lgbm/__init__.py +1 -1
  80. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  81. mlrun/frameworks/lgbm/model_handler.py +1 -1
  82. mlrun/frameworks/parallel_coordinates.py +2 -1
  83. mlrun/frameworks/pytorch/__init__.py +2 -2
  84. mlrun/frameworks/sklearn/__init__.py +1 -1
  85. mlrun/frameworks/tf_keras/__init__.py +5 -2
  86. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  87. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  88. mlrun/frameworks/xgboost/__init__.py +1 -1
  89. mlrun/k8s_utils.py +10 -11
  90. mlrun/launcher/__init__.py +1 -1
  91. mlrun/launcher/base.py +6 -5
  92. mlrun/launcher/client.py +8 -6
  93. mlrun/launcher/factory.py +1 -1
  94. mlrun/launcher/local.py +9 -3
  95. mlrun/launcher/remote.py +9 -3
  96. mlrun/lists.py +6 -2
  97. mlrun/model.py +58 -19
  98. mlrun/model_monitoring/__init__.py +1 -1
  99. mlrun/model_monitoring/api.py +127 -301
  100. mlrun/model_monitoring/application.py +5 -296
  101. mlrun/model_monitoring/applications/__init__.py +11 -0
  102. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  103. mlrun/model_monitoring/applications/base.py +282 -0
  104. mlrun/model_monitoring/applications/context.py +214 -0
  105. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  106. mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
  107. mlrun/model_monitoring/applications/results.py +99 -0
  108. mlrun/model_monitoring/controller.py +30 -36
  109. mlrun/model_monitoring/db/__init__.py +18 -0
  110. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  111. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  112. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
  113. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  114. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  115. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  116. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  117. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  118. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  119. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  120. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
  121. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  122. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  123. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  124. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  125. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  126. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  127. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  128. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  129. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  130. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  131. mlrun/model_monitoring/evidently_application.py +6 -118
  132. mlrun/model_monitoring/features_drift_table.py +34 -22
  133. mlrun/model_monitoring/helpers.py +100 -7
  134. mlrun/model_monitoring/model_endpoint.py +3 -2
  135. mlrun/model_monitoring/stream_processing.py +93 -228
  136. mlrun/model_monitoring/tracking_policy.py +7 -1
  137. mlrun/model_monitoring/writer.py +152 -124
  138. mlrun/package/packagers_manager.py +1 -0
  139. mlrun/package/utils/_formatter.py +2 -2
  140. mlrun/platforms/__init__.py +11 -10
  141. mlrun/platforms/iguazio.py +21 -202
  142. mlrun/projects/operations.py +30 -16
  143. mlrun/projects/pipelines.py +92 -99
  144. mlrun/projects/project.py +757 -268
  145. mlrun/render.py +15 -14
  146. mlrun/run.py +160 -162
  147. mlrun/runtimes/__init__.py +55 -3
  148. mlrun/runtimes/base.py +33 -19
  149. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  150. mlrun/runtimes/funcdoc.py +0 -28
  151. mlrun/runtimes/kubejob.py +28 -122
  152. mlrun/runtimes/local.py +5 -2
  153. mlrun/runtimes/mpijob/__init__.py +0 -20
  154. mlrun/runtimes/mpijob/abstract.py +8 -8
  155. mlrun/runtimes/mpijob/v1.py +1 -1
  156. mlrun/runtimes/nuclio/__init__.py +1 -0
  157. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  158. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  159. mlrun/runtimes/nuclio/application/application.py +523 -0
  160. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  161. mlrun/runtimes/nuclio/function.py +98 -58
  162. mlrun/runtimes/nuclio/serving.py +36 -42
  163. mlrun/runtimes/pod.py +196 -45
  164. mlrun/runtimes/remotesparkjob.py +1 -1
  165. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  166. mlrun/runtimes/utils.py +6 -73
  167. mlrun/secrets.py +6 -2
  168. mlrun/serving/remote.py +2 -3
  169. mlrun/serving/routers.py +7 -4
  170. mlrun/serving/server.py +7 -8
  171. mlrun/serving/states.py +73 -43
  172. mlrun/serving/v2_serving.py +8 -7
  173. mlrun/track/tracker.py +2 -1
  174. mlrun/utils/async_http.py +25 -5
  175. mlrun/utils/helpers.py +141 -75
  176. mlrun/utils/http.py +1 -1
  177. mlrun/utils/logger.py +39 -7
  178. mlrun/utils/notifications/notification/__init__.py +14 -9
  179. mlrun/utils/notifications/notification/base.py +12 -0
  180. mlrun/utils/notifications/notification/console.py +2 -0
  181. mlrun/utils/notifications/notification/git.py +3 -1
  182. mlrun/utils/notifications/notification/ipython.py +2 -0
  183. mlrun/utils/notifications/notification/slack.py +101 -21
  184. mlrun/utils/notifications/notification/webhook.py +11 -1
  185. mlrun/utils/notifications/notification_pusher.py +147 -16
  186. mlrun/utils/retryer.py +3 -2
  187. mlrun/utils/v3io_clients.py +0 -1
  188. mlrun/utils/version/version.json +2 -2
  189. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
  190. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  191. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
  192. mlrun/kfpops.py +0 -868
  193. mlrun/model_monitoring/batch.py +0 -974
  194. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  195. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  196. mlrun/platforms/other.py +0 -305
  197. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  198. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  199. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  200. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
@@ -17,6 +17,7 @@ import os
17
17
  import random
18
18
  import sys
19
19
  import time
20
+ import warnings
20
21
  from collections import Counter
21
22
  from copy import copy
22
23
  from typing import Any, Optional, Union
@@ -28,6 +29,8 @@ from mergedeep import merge
28
29
  import mlrun
29
30
  import mlrun.utils.helpers
30
31
  from mlrun.config import config
32
+ from mlrun.datastore.snowflake_utils import get_snowflake_spark_options
33
+ from mlrun.datastore.utils import transform_list_filters_to_tuple
31
34
  from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
32
35
  from mlrun.utils import logger, now_date
33
36
  from mlrun.utils.helpers import to_parquet
@@ -57,6 +60,7 @@ class TargetTypes:
57
60
  dataframe = "dataframe"
58
61
  custom = "custom"
59
62
  sql = "sql"
63
+ snowflake = "snowflake"
60
64
 
61
65
  @staticmethod
62
66
  def all():
@@ -71,6 +75,7 @@ class TargetTypes:
71
75
  TargetTypes.dataframe,
72
76
  TargetTypes.custom,
73
77
  TargetTypes.sql,
78
+ TargetTypes.snowflake,
74
79
  ]
75
80
 
76
81
 
@@ -78,11 +83,14 @@ def generate_target_run_id():
78
83
  return f"{round(time.time() * 1000)}_{random.randint(0, 999)}"
79
84
 
80
85
 
81
- def write_spark_dataframe_with_options(spark_options, df, mode):
86
+ def write_spark_dataframe_with_options(spark_options, df, mode, write_format=None):
82
87
  non_hadoop_spark_options = spark_session_update_hadoop_options(
83
88
  df.sql_ctx.sparkSession, spark_options
84
89
  )
85
- df.write.mode(mode).save(**non_hadoop_spark_options)
90
+ if write_format:
91
+ df.write.format(write_format).mode(mode).save(**non_hadoop_spark_options)
92
+ else:
93
+ df.write.mode(mode).save(**non_hadoop_spark_options)
86
94
 
87
95
 
88
96
  def default_target_names():
@@ -451,7 +459,7 @@ class BaseStoreTarget(DataTargetBase):
451
459
  self.get_target_path(),
452
460
  credentials_prefix_secrets,
453
461
  )
454
- return store, url
462
+ return store, resolved_store_path, url
455
463
 
456
464
  def _get_column_list(self, features, timestamp_key, key_columns, with_type=False):
457
465
  result = []
@@ -497,10 +505,13 @@ class BaseStoreTarget(DataTargetBase):
497
505
  options = self.get_spark_options(key_column, timestamp_key)
498
506
  options.update(kwargs)
499
507
  df = self.prepare_spark_df(df, key_column, timestamp_key, options)
500
- write_spark_dataframe_with_options(options, df, "overwrite")
508
+ write_format = options.pop("format", None)
509
+ write_spark_dataframe_with_options(
510
+ options, df, "overwrite", write_format=write_format
511
+ )
501
512
  elif hasattr(df, "dask"):
502
513
  dask_options = self.get_dask_options()
503
- store, target_path = self._get_store_and_path()
514
+ store, path_in_store, target_path = self._get_store_and_path()
504
515
  storage_options = store.get_storage_options()
505
516
  df = df.repartition(partition_size="100MB")
506
517
  try:
@@ -521,10 +532,15 @@ class BaseStoreTarget(DataTargetBase):
521
532
  except Exception as exc:
522
533
  raise RuntimeError("Failed to write Dask Dataframe") from exc
523
534
  else:
524
- store, target_path = self._get_store_and_path()
535
+ store, path_in_store, target_path = self._get_store_and_path()
525
536
  target_path = generate_path_with_chunk(self, chunk_id, target_path)
526
537
  file_system = store.filesystem
527
- if file_system.protocol == "file":
538
+ if (
539
+ file_system.protocol == "file"
540
+ # fsspec 2023.10.0 changed protocol from "file" to ("file", "local")
541
+ or isinstance(file_system.protocol, (tuple, list))
542
+ and "file" in file_system.protocol
543
+ ):
528
544
  dir = os.path.dirname(target_path)
529
545
  if dir:
530
546
  os.makedirs(dir, exist_ok=True)
@@ -641,6 +657,29 @@ class BaseStoreTarget(DataTargetBase):
641
657
  def _target_path_object(self):
642
658
  """return the actual/computed target path"""
643
659
  is_single_file = hasattr(self, "is_single_file") and self.is_single_file()
660
+
661
+ if self._resource and self.path:
662
+ parsed_url = urlparse(self.path)
663
+ # When the URL consists only from scheme and endpoint and no path,
664
+ # make a default path for DS and redis targets.
665
+ # Also ignore KafkaTarget when it uses the ds scheme (no default path for KafkaTarget)
666
+ if (
667
+ not isinstance(self, KafkaTarget)
668
+ and parsed_url.scheme in ["ds", "redis", "rediss"]
669
+ and (not parsed_url.path or parsed_url.path == "/")
670
+ ):
671
+ return TargetPathObject(
672
+ _get_target_path(
673
+ self,
674
+ self._resource,
675
+ self.run_id is not None,
676
+ netloc=parsed_url.netloc,
677
+ scheme=parsed_url.scheme,
678
+ ),
679
+ self.run_id,
680
+ is_single_file,
681
+ )
682
+
644
683
  return self.get_path() or (
645
684
  TargetPathObject(
646
685
  _get_target_path(self, self._resource, self.run_id is not None),
@@ -688,7 +727,7 @@ class BaseStoreTarget(DataTargetBase):
688
727
  raise NotImplementedError()
689
728
 
690
729
  def purge(self):
691
- store, target_path = self._get_store_and_path()
730
+ store, path_in_store, target_path = self._get_store_and_path()
692
731
  store.rm(target_path, recursive=True)
693
732
 
694
733
  def as_df(
@@ -699,9 +738,13 @@ class BaseStoreTarget(DataTargetBase):
699
738
  start_time=None,
700
739
  end_time=None,
701
740
  time_column=None,
741
+ additional_filters=None,
702
742
  **kwargs,
703
743
  ):
704
744
  """return the target data as dataframe"""
745
+ mlrun.utils.helpers.additional_filters_warning(
746
+ additional_filters, self.__class__
747
+ )
705
748
  return mlrun.get_dataitem(self.get_target_path()).as_df(
706
749
  columns=columns,
707
750
  df_module=df_module,
@@ -715,7 +758,7 @@ class BaseStoreTarget(DataTargetBase):
715
758
  # options used in spark.read.load(**options)
716
759
  raise NotImplementedError()
717
760
 
718
- def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options={}):
761
+ def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
719
762
  return df
720
763
 
721
764
  def get_dask_options(self):
@@ -868,7 +911,7 @@ class ParquetTarget(BaseStoreTarget):
868
911
  for key_column in key_columns:
869
912
  tuple_key_columns.append((key_column.name, key_column.value_type))
870
913
 
871
- store, target_path = self._get_store_and_path()
914
+ store, path_in_store, target_path = self._get_store_and_path()
872
915
 
873
916
  storage_options = store.get_storage_options()
874
917
  if storage_options and self.storage_options:
@@ -921,9 +964,7 @@ class ParquetTarget(BaseStoreTarget):
921
964
  if unit == time_partitioning_granularity:
922
965
  break
923
966
 
924
- store, path, url = mlrun.store_manager.get_or_create_store(
925
- self.get_target_path()
926
- )
967
+ store, path, url = self._get_store_and_path()
927
968
  spark_options = store.get_spark_options()
928
969
  spark_options.update(
929
970
  {
@@ -948,6 +989,7 @@ class ParquetTarget(BaseStoreTarget):
948
989
  start_time=None,
949
990
  end_time=None,
950
991
  time_column=None,
992
+ additional_filters=None,
951
993
  **kwargs,
952
994
  ):
953
995
  """return the target data as dataframe"""
@@ -958,6 +1000,7 @@ class ParquetTarget(BaseStoreTarget):
958
1000
  start_time=start_time,
959
1001
  end_time=end_time,
960
1002
  time_column=time_column,
1003
+ additional_filters=transform_list_filters_to_tuple(additional_filters),
961
1004
  **kwargs,
962
1005
  )
963
1006
  if not columns:
@@ -1042,7 +1085,7 @@ class CSVTarget(BaseStoreTarget):
1042
1085
  column_list = self._get_column_list(
1043
1086
  features=features, timestamp_key=timestamp_key, key_columns=key_columns
1044
1087
  )
1045
- store, target_path = self._get_store_and_path()
1088
+ store, path_in_store, target_path = self._get_store_and_path()
1046
1089
  graph.add_step(
1047
1090
  name=self.name or "CSVTarget",
1048
1091
  after=after,
@@ -1057,9 +1100,7 @@ class CSVTarget(BaseStoreTarget):
1057
1100
  )
1058
1101
 
1059
1102
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1060
- store, path, url = mlrun.store_manager.get_or_create_store(
1061
- self.get_target_path()
1062
- )
1103
+ store, path, url = self._get_store_and_path()
1063
1104
  spark_options = store.get_spark_options()
1064
1105
  spark_options.update(
1065
1106
  {
@@ -1090,8 +1131,12 @@ class CSVTarget(BaseStoreTarget):
1090
1131
  start_time=None,
1091
1132
  end_time=None,
1092
1133
  time_column=None,
1134
+ additional_filters=None,
1093
1135
  **kwargs,
1094
1136
  ):
1137
+ mlrun.utils.helpers.additional_filters_warning(
1138
+ additional_filters, self.__class__
1139
+ )
1095
1140
  df = super().as_df(
1096
1141
  columns=columns,
1097
1142
  df_module=df_module,
@@ -1112,6 +1157,98 @@ class CSVTarget(BaseStoreTarget):
1112
1157
  return True
1113
1158
 
1114
1159
 
1160
+ class SnowflakeTarget(BaseStoreTarget):
1161
+ """
1162
+ :param attributes: A dictionary of attributes for Snowflake connection; will be overridden by database parameters
1163
+ if they exist.
1164
+ :param url: Snowflake hostname, in the format: <account_name>.<region>.snowflakecomputing.com
1165
+ :param user: Snowflake user for login
1166
+ :param db_schema: Database schema
1167
+ :param database: Database name
1168
+ :param warehouse: Snowflake warehouse name
1169
+ :param table_name: Snowflake table name
1170
+ """
1171
+
1172
+ support_spark = True
1173
+ support_append = True
1174
+ is_offline = True
1175
+ kind = TargetTypes.snowflake
1176
+
1177
+ def __init__(
1178
+ self,
1179
+ name: str = "",
1180
+ path=None,
1181
+ attributes: dict[str, str] = None,
1182
+ after_step=None,
1183
+ columns=None,
1184
+ partitioned: bool = False,
1185
+ key_bucketing_number: Optional[int] = None,
1186
+ partition_cols: Optional[list[str]] = None,
1187
+ time_partitioning_granularity: Optional[str] = None,
1188
+ max_events: Optional[int] = None,
1189
+ flush_after_seconds: Optional[int] = None,
1190
+ storage_options: dict[str, str] = None,
1191
+ schema: dict[str, Any] = None,
1192
+ credentials_prefix=None,
1193
+ url: str = None,
1194
+ user: str = None,
1195
+ db_schema: str = None,
1196
+ database: str = None,
1197
+ warehouse: str = None,
1198
+ table_name: str = None,
1199
+ ):
1200
+ attrs = {
1201
+ "url": url,
1202
+ "user": user,
1203
+ "database": database,
1204
+ "schema": db_schema,
1205
+ "warehouse": warehouse,
1206
+ "table": table_name,
1207
+ }
1208
+ extended_attrs = {
1209
+ key: value for key, value in attrs.items() if value is not None
1210
+ }
1211
+ attributes = {} if not attributes else attributes
1212
+ attributes.update(extended_attrs)
1213
+ super().__init__(
1214
+ name,
1215
+ path,
1216
+ attributes,
1217
+ after_step,
1218
+ list(schema.keys()) if schema else columns,
1219
+ partitioned,
1220
+ key_bucketing_number,
1221
+ partition_cols,
1222
+ time_partitioning_granularity,
1223
+ max_events=max_events,
1224
+ flush_after_seconds=flush_after_seconds,
1225
+ storage_options=storage_options,
1226
+ schema=schema,
1227
+ credentials_prefix=credentials_prefix,
1228
+ )
1229
+
1230
+ def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1231
+ spark_options = get_snowflake_spark_options(self.attributes)
1232
+ spark_options["dbtable"] = self.attributes.get("table")
1233
+ return spark_options
1234
+
1235
+ def purge(self):
1236
+ pass
1237
+
1238
+ def as_df(
1239
+ self,
1240
+ columns=None,
1241
+ df_module=None,
1242
+ entities=None,
1243
+ start_time=None,
1244
+ end_time=None,
1245
+ time_column=None,
1246
+ additional_filters=None,
1247
+ **kwargs,
1248
+ ):
1249
+ raise NotImplementedError()
1250
+
1251
+
1115
1252
  class NoSqlBaseTarget(BaseStoreTarget):
1116
1253
  is_table = True
1117
1254
  is_online = True
@@ -1173,7 +1310,17 @@ class NoSqlBaseTarget(BaseStoreTarget):
1173
1310
  def get_dask_options(self):
1174
1311
  return {"format": "csv"}
1175
1312
 
1176
- def as_df(self, columns=None, df_module=None, **kwargs):
1313
+ def as_df(
1314
+ self,
1315
+ columns=None,
1316
+ df_module=None,
1317
+ entities=None,
1318
+ start_time=None,
1319
+ end_time=None,
1320
+ time_column=None,
1321
+ additional_filters=None,
1322
+ **kwargs,
1323
+ ):
1177
1324
  raise NotImplementedError()
1178
1325
 
1179
1326
  def write_dataframe(
@@ -1183,7 +1330,10 @@ class NoSqlBaseTarget(BaseStoreTarget):
1183
1330
  options = self.get_spark_options(key_column, timestamp_key)
1184
1331
  options.update(kwargs)
1185
1332
  df = self.prepare_spark_df(df)
1186
- write_spark_dataframe_with_options(options, df, "overwrite")
1333
+ write_format = options.pop("format", None)
1334
+ write_spark_dataframe_with_options(
1335
+ options, df, "overwrite", write_format=write_format
1336
+ )
1187
1337
  else:
1188
1338
  # To prevent modification of the original dataframe and make sure
1189
1339
  # that the last event of a key is the one being persisted
@@ -1193,7 +1343,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
1193
1343
  df = df.copy(deep=False)
1194
1344
  access_key = self._get_credential("V3IO_ACCESS_KEY")
1195
1345
 
1196
- store, target_path = self._get_store_and_path()
1346
+ store, path_in_store, target_path = self._get_store_and_path()
1197
1347
  storage_options = store.get_storage_options()
1198
1348
  access_key = storage_options.get("v3io_access_key", access_key)
1199
1349
 
@@ -1215,7 +1365,7 @@ class NoSqlTarget(NoSqlBaseTarget):
1215
1365
  def get_table_object(self):
1216
1366
  from storey import Table, V3ioDriver
1217
1367
 
1218
- store, target_path = self._get_store_and_path()
1368
+ store, path_in_store, target_path = self._get_store_and_path()
1219
1369
  endpoint, uri = parse_path(target_path)
1220
1370
  storage_options = store.get_storage_options()
1221
1371
  access_key = storage_options.get("v3io_access_key")
@@ -1227,7 +1377,7 @@ class NoSqlTarget(NoSqlBaseTarget):
1227
1377
  )
1228
1378
 
1229
1379
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1230
- store, target_path = self._get_store_and_path()
1380
+ store, path_in_store, target_path = self._get_store_and_path()
1231
1381
  storage_options = store.get_storage_options()
1232
1382
  store_access_key = storage_options.get("v3io_access_key")
1233
1383
  env_access_key = self._secrets.get(
@@ -1239,7 +1389,7 @@ class NoSqlTarget(NoSqlBaseTarget):
1239
1389
  "Spark will disregard the store-provided key."
1240
1390
  )
1241
1391
  spark_options = {
1242
- "path": store.spark_url + target_path,
1392
+ "path": store.spark_url + path_in_store,
1243
1393
  "format": "io.iguaz.v3io.spark.sql.kv",
1244
1394
  }
1245
1395
  if isinstance(key_column, list) and len(key_column) >= 1:
@@ -1332,10 +1482,10 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
1332
1482
  def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
1333
1483
  endpoint, uri = self._get_server_endpoint()
1334
1484
  parsed_endpoint = urlparse(endpoint)
1335
- store, path = self._get_store_and_path()
1485
+ store, path_in_store, path = self._get_store_and_path()
1336
1486
  return {
1337
1487
  "key.column": "_spark_object_name",
1338
- "table": "{" + store.spark_url + path,
1488
+ "table": "{" + path_in_store,
1339
1489
  "format": "org.apache.spark.sql.redis",
1340
1490
  "host": parsed_endpoint.hostname,
1341
1491
  "port": parsed_endpoint.port,
@@ -1383,7 +1533,7 @@ class StreamTarget(BaseStoreTarget):
1383
1533
  from storey import V3ioDriver
1384
1534
 
1385
1535
  key_columns = list(key_columns.keys())
1386
- store, path = self._get_store_and_path()
1536
+ store, path_in_store, path = self._get_store_and_path()
1387
1537
  if not path:
1388
1538
  raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
1389
1539
  endpoint, uri = parse_path(path)
@@ -1406,11 +1556,40 @@ class StreamTarget(BaseStoreTarget):
1406
1556
  **self.attributes,
1407
1557
  )
1408
1558
 
1409
- def as_df(self, columns=None, df_module=None, **kwargs):
1559
+ def as_df(
1560
+ self,
1561
+ columns=None,
1562
+ df_module=None,
1563
+ entities=None,
1564
+ start_time=None,
1565
+ end_time=None,
1566
+ time_column=None,
1567
+ additional_filters=None,
1568
+ **kwargs,
1569
+ ):
1410
1570
  raise NotImplementedError()
1411
1571
 
1412
1572
 
1413
1573
  class KafkaTarget(BaseStoreTarget):
1574
+ """
1575
+ Kafka target storage driver, used to write data into kafka topics.
1576
+ example::
1577
+ # define target
1578
+ kafka_target = KafkaTarget(
1579
+ name="kafka", path="my_topic", brokers="localhost:9092"
1580
+ )
1581
+ # ingest
1582
+ stocks_set.ingest(stocks, [kafka_target])
1583
+ :param name: target name
1584
+ :param path: topic name e.g. "my_topic"
1585
+ :param after_step: optional, after what step in the graph to add the target
1586
+ :param columns: optional, which columns from data to write
1587
+ :param bootstrap_servers: Deprecated. Use the brokers parameter instead
1588
+ :param producer_options: additional configurations for kafka producer
1589
+ :param brokers: kafka broker as represented by a host:port pair, or a list of kafka brokers, e.g.
1590
+ "localhost:9092", or ["kafka-broker-1:9092", "kafka-broker-2:9092"]
1591
+ """
1592
+
1414
1593
  kind = TargetTypes.kafka
1415
1594
  is_table = False
1416
1595
  is_online = False
@@ -1423,11 +1602,27 @@ class KafkaTarget(BaseStoreTarget):
1423
1602
  *args,
1424
1603
  bootstrap_servers=None,
1425
1604
  producer_options=None,
1605
+ brokers=None,
1426
1606
  **kwargs,
1427
1607
  ):
1428
1608
  attrs = {}
1429
- if bootstrap_servers is not None:
1430
- attrs["bootstrap_servers"] = bootstrap_servers
1609
+
1610
+ # TODO: Remove this in 1.9.0
1611
+ if bootstrap_servers:
1612
+ if brokers:
1613
+ raise mlrun.errors.MLRunInvalidArgumentError(
1614
+ "KafkaTarget cannot be created with both the 'brokers' parameter and the deprecated "
1615
+ "'bootstrap_servers' parameter. Please use 'brokers' only."
1616
+ )
1617
+ warnings.warn(
1618
+ "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
1619
+ "use 'brokers' instead.",
1620
+ FutureWarning,
1621
+ )
1622
+ brokers = bootstrap_servers
1623
+
1624
+ if brokers:
1625
+ attrs["brokers"] = brokers
1431
1626
  if producer_options is not None:
1432
1627
  attrs["producer_options"] = producer_options
1433
1628
 
@@ -1449,14 +1644,16 @@ class KafkaTarget(BaseStoreTarget):
1449
1644
  if self.path and self.path.startswith("ds://"):
1450
1645
  datastore_profile = datastore_profile_read(self.path)
1451
1646
  attributes = datastore_profile.attributes()
1452
- bootstrap_servers = attributes.pop("bootstrap_servers", None)
1647
+ brokers = attributes.pop(
1648
+ "brokers", attributes.pop("bootstrap_servers", None)
1649
+ )
1453
1650
  topic = datastore_profile.topic
1454
1651
  else:
1455
1652
  attributes = copy(self.attributes)
1456
- bootstrap_servers = attributes.pop("bootstrap_servers", None)
1457
- topic, bootstrap_servers = parse_kafka_url(
1458
- self.get_target_path(), bootstrap_servers
1653
+ brokers = attributes.pop(
1654
+ "brokers", attributes.pop("bootstrap_servers", None)
1459
1655
  )
1656
+ topic, brokers = parse_kafka_url(self.get_target_path(), brokers)
1460
1657
 
1461
1658
  if not topic:
1462
1659
  raise mlrun.errors.MLRunInvalidArgumentError(
@@ -1470,11 +1667,21 @@ class KafkaTarget(BaseStoreTarget):
1470
1667
  class_name="storey.KafkaTarget",
1471
1668
  columns=column_list,
1472
1669
  topic=topic,
1473
- bootstrap_servers=bootstrap_servers,
1670
+ brokers=brokers,
1474
1671
  **attributes,
1475
1672
  )
1476
1673
 
1477
- def as_df(self, columns=None, df_module=None, **kwargs):
1674
+ def as_df(
1675
+ self,
1676
+ columns=None,
1677
+ df_module=None,
1678
+ entities=None,
1679
+ start_time=None,
1680
+ end_time=None,
1681
+ time_column=None,
1682
+ additional_filters=None,
1683
+ **kwargs,
1684
+ ):
1478
1685
  raise NotImplementedError()
1479
1686
 
1480
1687
  def purge(self):
@@ -1521,7 +1728,17 @@ class TSDBTarget(BaseStoreTarget):
1521
1728
  **self.attributes,
1522
1729
  )
1523
1730
 
1524
- def as_df(self, columns=None, df_module=None, **kwargs):
1731
+ def as_df(
1732
+ self,
1733
+ columns=None,
1734
+ df_module=None,
1735
+ entities=None,
1736
+ start_time=None,
1737
+ end_time=None,
1738
+ time_column=None,
1739
+ additional_filters=None,
1740
+ **kwargs,
1741
+ ):
1525
1742
  raise NotImplementedError()
1526
1743
 
1527
1744
  def write_dataframe(
@@ -1537,7 +1754,7 @@ class TSDBTarget(BaseStoreTarget):
1537
1754
  key_column = [key_column]
1538
1755
  new_index.extend(key_column)
1539
1756
 
1540
- store, target_path = self._get_store_and_path()
1757
+ store, path_in_store, target_path = self._get_store_and_path()
1541
1758
  storage_options = store.get_storage_options()
1542
1759
  access_key = storage_options.get("v3io_access_key", access_key)
1543
1760
 
@@ -1632,11 +1849,16 @@ class DFTarget(BaseStoreTarget):
1632
1849
  self,
1633
1850
  columns=None,
1634
1851
  df_module=None,
1852
+ entities=None,
1635
1853
  start_time=None,
1636
1854
  end_time=None,
1637
1855
  time_column=None,
1856
+ additional_filters=None,
1638
1857
  **kwargs,
1639
1858
  ):
1859
+ mlrun.utils.helpers.additional_filters_warning(
1860
+ additional_filters, self.__class__
1861
+ )
1640
1862
  return select_columns_from_df(
1641
1863
  filter_df_start_end_time(
1642
1864
  self._df,
@@ -1811,6 +2033,7 @@ class SQLTarget(BaseStoreTarget):
1811
2033
  start_time=None,
1812
2034
  end_time=None,
1813
2035
  time_column=None,
2036
+ additional_filters=None,
1814
2037
  **kwargs,
1815
2038
  ):
1816
2039
  try:
@@ -1819,6 +2042,10 @@ class SQLTarget(BaseStoreTarget):
1819
2042
  except (ModuleNotFoundError, ImportError) as exc:
1820
2043
  self._raise_sqlalchemy_import_error(exc)
1821
2044
 
2045
+ mlrun.utils.helpers.additional_filters_warning(
2046
+ additional_filters, self.__class__
2047
+ )
2048
+
1822
2049
  db_path, table_name, _, _, _, _ = self._parse_url()
1823
2050
  engine = sqlalchemy.create_engine(db_path)
1824
2051
  parse_dates: Optional[list[str]] = self.attributes.get("parse_dates")
@@ -1908,7 +2135,7 @@ class SQLTarget(BaseStoreTarget):
1908
2135
  raise ValueError(f"Table named {table_name} is not exist")
1909
2136
 
1910
2137
  elif not table_exists and create_table:
1911
- TYPE_TO_SQL_TYPE = {
2138
+ type_to_sql_type = {
1912
2139
  int: sqlalchemy.Integer,
1913
2140
  str: sqlalchemy.String(self.attributes.get("varchar_len")),
1914
2141
  datetime.datetime: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
@@ -1921,7 +2148,7 @@ class SQLTarget(BaseStoreTarget):
1921
2148
  # creat new table with the given name
1922
2149
  columns = []
1923
2150
  for col, col_type in self.schema.items():
1924
- col_type_sql = TYPE_TO_SQL_TYPE.get(col_type)
2151
+ col_type_sql = type_to_sql_type.get(col_type)
1925
2152
  if col_type_sql is None:
1926
2153
  raise TypeError(
1927
2154
  f"'{col_type}' unsupported type for column '{col}'"
@@ -1961,10 +2188,11 @@ kind_to_driver = {
1961
2188
  TargetTypes.tsdb: TSDBTarget,
1962
2189
  TargetTypes.custom: CustomTarget,
1963
2190
  TargetTypes.sql: SQLTarget,
2191
+ TargetTypes.snowflake: SnowflakeTarget,
1964
2192
  }
1965
2193
 
1966
2194
 
1967
- def _get_target_path(driver, resource, run_id_mode=False):
2195
+ def _get_target_path(driver, resource, run_id_mode=False, netloc=None, scheme=""):
1968
2196
  """return the default target path given the resource and target kind"""
1969
2197
  kind = driver.kind
1970
2198
  suffix = driver.suffix
@@ -1981,11 +2209,27 @@ def _get_target_path(driver, resource, run_id_mode=False):
1981
2209
  )
1982
2210
  name = resource.metadata.name
1983
2211
  project = resource.metadata.project or mlrun.mlconf.default_project
1984
- data_prefix = get_default_prefix_for_target(kind).format(
2212
+
2213
+ default_kind_name = kind
2214
+ if scheme == "ds":
2215
+ # "dsnosql" is not an actual target like Parquet or Redis; rather, it serves
2216
+ # as a placeholder that can be used in any specified target
2217
+ default_kind_name = "dsnosql"
2218
+ if scheme == "redis" or scheme == "rediss":
2219
+ default_kind_name = TargetTypes.redisnosql
2220
+
2221
+ netloc = netloc or ""
2222
+ data_prefix = get_default_prefix_for_target(default_kind_name).format(
2223
+ ds_profile_name=netloc, # In case of ds profile, set its the name
2224
+ authority=netloc, # In case of redis, replace {authority} with netloc
1985
2225
  project=project,
1986
2226
  kind=kind,
1987
2227
  name=name,
1988
2228
  )
2229
+
2230
+ if scheme == "rediss":
2231
+ data_prefix = data_prefix.replace("redis://", "rediss://", 1)
2232
+
1989
2233
  # todo: handle ver tag changes, may need to copy files?
1990
2234
  if not run_id_mode:
1991
2235
  version = resource.metadata.tag