PyPI - mlrun - Versions diffs - 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

mlrun 1.6.4rc7py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (305) hide show

mlrun/__init__.py +11 -1
mlrun/__main__.py +40 -122
mlrun/alerts/__init__.py +15 -0
mlrun/alerts/alert.py +248 -0
mlrun/api/schemas/__init__.py +5 -4
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +47 -257
mlrun/artifacts/dataset.py +11 -192
mlrun/artifacts/manager.py +79 -47
mlrun/artifacts/model.py +31 -159
mlrun/artifacts/plots.py +23 -380
mlrun/common/constants.py +74 -1
mlrun/common/db/sql_session.py +5 -5
mlrun/common/formatters/__init__.py +21 -0
mlrun/common/formatters/artifact.py +45 -0
mlrun/common/formatters/base.py +113 -0
mlrun/common/formatters/feature_set.py +33 -0
mlrun/common/formatters/function.py +46 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/common/formatters/run.py +29 -0
mlrun/common/helpers.py +12 -3
mlrun/common/model_monitoring/helpers.py +9 -5
mlrun/{runtimes → common/runtimes}/constants.py +37 -9
mlrun/common/schemas/__init__.py +31 -5
mlrun/common/schemas/alert.py +202 -0
mlrun/common/schemas/api_gateway.py +196 -0
mlrun/common/schemas/artifact.py +25 -4
mlrun/common/schemas/auth.py +16 -5
mlrun/common/schemas/background_task.py +1 -1
mlrun/common/schemas/client_spec.py +4 -2
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/constants.py +3 -0
mlrun/common/schemas/feature_store.py +74 -44
mlrun/common/schemas/frontend_spec.py +15 -7
mlrun/common/schemas/function.py +12 -1
mlrun/common/schemas/hub.py +11 -18
mlrun/common/schemas/memory_reports.py +2 -2
mlrun/common/schemas/model_monitoring/__init__.py +20 -4
mlrun/common/schemas/model_monitoring/constants.py +123 -42
mlrun/common/schemas/model_monitoring/grafana.py +13 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
mlrun/common/schemas/notification.py +71 -14
mlrun/common/schemas/object.py +2 -2
mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
mlrun/common/schemas/pipeline.py +8 -1
mlrun/common/schemas/project.py +69 -18
mlrun/common/schemas/runs.py +7 -1
mlrun/common/schemas/runtime_resource.py +8 -12
mlrun/common/schemas/schedule.py +4 -4
mlrun/common/schemas/tag.py +1 -2
mlrun/common/schemas/workflow.py +12 -4
mlrun/common/types.py +14 -1
mlrun/config.py +154 -69
mlrun/data_types/data_types.py +6 -1
mlrun/data_types/spark.py +2 -2
mlrun/data_types/to_pandas.py +67 -37
mlrun/datastore/__init__.py +6 -8
mlrun/datastore/alibaba_oss.py +131 -0
mlrun/datastore/azure_blob.py +143 -42
mlrun/datastore/base.py +102 -58
mlrun/datastore/datastore.py +34 -13
mlrun/datastore/datastore_profile.py +146 -20
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -4
mlrun/datastore/google_cloud_storage.py +97 -33
mlrun/datastore/hdfs.py +56 -0
mlrun/datastore/inmem.py +6 -3
mlrun/datastore/redis.py +7 -2
mlrun/datastore/s3.py +34 -12
mlrun/datastore/snowflake_utils.py +45 -0
mlrun/datastore/sources.py +303 -111
mlrun/datastore/spark_utils.py +31 -2
mlrun/datastore/store_resources.py +9 -7
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +453 -176
mlrun/datastore/utils.py +72 -58
mlrun/datastore/v3io.py +6 -1
mlrun/db/base.py +274 -41
mlrun/db/factory.py +1 -1
mlrun/db/httpdb.py +893 -225
mlrun/db/nopdb.py +291 -33
mlrun/errors.py +36 -6
mlrun/execution.py +115 -42
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +65 -73
mlrun/feature_store/common.py +7 -12
mlrun/feature_store/feature_set.py +76 -55
mlrun/feature_store/feature_vector.py +39 -31
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +16 -11
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +13 -4
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +24 -32
mlrun/feature_store/steps.py +45 -34
mlrun/features.py +11 -21
mlrun/frameworks/_common/artifacts_library.py +9 -9
mlrun/frameworks/_common/mlrun_interface.py +5 -5
mlrun/frameworks/_common/model_handler.py +48 -48
mlrun/frameworks/_common/plan.py +5 -6
mlrun/frameworks/_common/producer.py +3 -4
mlrun/frameworks/_common/utils.py +5 -5
mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
mlrun/frameworks/_ml_common/model_handler.py +24 -24
mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
mlrun/frameworks/_ml_common/plan.py +2 -2
mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/_ml_common/utils.py +4 -4
mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
mlrun/frameworks/huggingface/model_server.py +4 -4
mlrun/frameworks/lgbm/__init__.py +33 -33
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
mlrun/frameworks/lgbm/model_handler.py +10 -10
mlrun/frameworks/lgbm/model_server.py +6 -6
mlrun/frameworks/lgbm/utils.py +5 -5
mlrun/frameworks/onnx/dataset.py +8 -8
mlrun/frameworks/onnx/mlrun_interface.py +3 -3
mlrun/frameworks/onnx/model_handler.py +6 -6
mlrun/frameworks/onnx/model_server.py +7 -7
mlrun/frameworks/parallel_coordinates.py +6 -6
mlrun/frameworks/pytorch/__init__.py +18 -18
mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
mlrun/frameworks/pytorch/model_handler.py +17 -17
mlrun/frameworks/pytorch/model_server.py +7 -7
mlrun/frameworks/sklearn/__init__.py +13 -13
mlrun/frameworks/sklearn/estimator.py +4 -4
mlrun/frameworks/sklearn/metrics_library.py +14 -14
mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
mlrun/frameworks/sklearn/model_handler.py +2 -2
mlrun/frameworks/tf_keras/__init__.py +10 -7
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
mlrun/frameworks/tf_keras/model_handler.py +14 -14
mlrun/frameworks/tf_keras/model_server.py +6 -6
mlrun/frameworks/xgboost/__init__.py +13 -13
mlrun/frameworks/xgboost/model_handler.py +6 -6
mlrun/k8s_utils.py +61 -17
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +16 -15
mlrun/launcher/client.py +13 -11
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +23 -13
mlrun/launcher/remote.py +17 -10
mlrun/lists.py +7 -6
mlrun/model.py +478 -103
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +163 -371
mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
mlrun/model_monitoring/applications/_application_steps.py +188 -0
mlrun/model_monitoring/applications/base.py +108 -0
mlrun/model_monitoring/applications/context.py +341 -0
mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +131 -278
mlrun/model_monitoring/db/__init__.py +18 -0
mlrun/model_monitoring/db/stores/__init__.py +136 -0
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/db/stores/base/store.py +213 -0
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
mlrun/model_monitoring/db/tsdb/base.py +448 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
mlrun/model_monitoring/features_drift_table.py +134 -106
mlrun/model_monitoring/helpers.py +199 -55
mlrun/model_monitoring/metrics/__init__.py +13 -0
mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +131 -398
mlrun/model_monitoring/tracking_policy.py +9 -2
mlrun/model_monitoring/writer.py +161 -125
mlrun/package/__init__.py +6 -6
mlrun/package/context_handler.py +5 -5
mlrun/package/packager.py +7 -7
mlrun/package/packagers/default_packager.py +8 -8
mlrun/package/packagers/numpy_packagers.py +15 -15
mlrun/package/packagers/pandas_packagers.py +5 -5
mlrun/package/packagers/python_standard_library_packagers.py +10 -10
mlrun/package/packagers_manager.py +19 -23
mlrun/package/utils/_formatter.py +6 -6
mlrun/package/utils/_pickler.py +2 -2
mlrun/package/utils/_supported_format.py +4 -4
mlrun/package/utils/log_hint_utils.py +2 -2
mlrun/package/utils/type_hint_utils.py +4 -9
mlrun/platforms/__init__.py +11 -10
mlrun/platforms/iguazio.py +24 -203
mlrun/projects/operations.py +52 -25
mlrun/projects/pipelines.py +191 -197
mlrun/projects/project.py +1227 -400
mlrun/render.py +16 -19
mlrun/run.py +209 -184
mlrun/runtimes/__init__.py +83 -15
mlrun/runtimes/base.py +51 -35
mlrun/runtimes/daskjob.py +17 -10
mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +1 -29
mlrun/runtimes/function_reference.py +1 -1
mlrun/runtimes/kubejob.py +34 -128
mlrun/runtimes/local.py +40 -11
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/abstract.py +9 -10
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
mlrun/runtimes/nuclio/api_gateway.py +769 -0
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +758 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
mlrun/runtimes/pod.py +281 -101
mlrun/runtimes/remotesparkjob.py +12 -9
mlrun/runtimes/sparkjob/spark3job.py +67 -51
mlrun/runtimes/utils.py +41 -75
mlrun/secrets.py +9 -5
mlrun/serving/__init__.py +8 -1
mlrun/serving/remote.py +2 -7
mlrun/serving/routers.py +85 -69
mlrun/serving/server.py +69 -44
mlrun/serving/states.py +209 -36
mlrun/serving/utils.py +22 -14
mlrun/serving/v1_serving.py +6 -7
mlrun/serving/v2_serving.py +129 -54
mlrun/track/tracker.py +2 -1
mlrun/track/tracker_manager.py +3 -3
mlrun/track/trackers/mlflow_tracker.py +6 -2
mlrun/utils/async_http.py +6 -8
mlrun/utils/azure_vault.py +1 -1
mlrun/utils/clones.py +1 -2
mlrun/utils/condition_evaluator.py +3 -3
mlrun/utils/db.py +21 -3
mlrun/utils/helpers.py +405 -225
mlrun/utils/http.py +3 -6
mlrun/utils/logger.py +112 -16
mlrun/utils/notifications/notification/__init__.py +17 -13
mlrun/utils/notifications/notification/base.py +50 -2
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +24 -1
mlrun/utils/notifications/notification/ipython.py +3 -1
mlrun/utils/notifications/notification/slack.py +96 -21
mlrun/utils/notifications/notification/webhook.py +59 -2
mlrun/utils/notifications/notification_pusher.py +149 -30
mlrun/utils/regex.py +9 -0
mlrun/utils/retryer.py +208 -0
mlrun/utils/singleton.py +1 -1
mlrun/utils/v3io_clients.py +4 -6
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +2 -6
mlrun-1.7.0.dist-info/METADATA +378 -0
mlrun-1.7.0.dist-info/RECORD +351 -0
{mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -273
mlrun/kfpops.py +0 -868
mlrun/model_monitoring/application.py +0 -310
mlrun/model_monitoring/batch.py +0 -1095
mlrun/model_monitoring/prometheus.py +0 -219
mlrun/model_monitoring/stores/__init__.py +0 -111
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/models/base.py +0 -84
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
mlrun/platforms/other.py +0 -306
mlrun-1.6.4rc7.dist-info/METADATA +0 -272
mlrun-1.6.4rc7.dist-info/RECORD +0 -314
{mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
{mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0

mlrun/datastore/targets.py CHANGED Viewed

@@ -17,9 +17,10 @@ import os
 import random
 import sys
 import time
+import warnings
 from collections import Counter
 from copy import copy
-from typing import Any, Dict, List, Optional, Union
+from typing import Any, Optional, Union
 from urllib.parse import urlparse
 import pandas as pd
@@ -28,8 +29,13 @@ from mergedeep import merge
 import mlrun
 import mlrun.utils.helpers
 from mlrun.config import config
+from mlrun.datastore.snowflake_utils import (
+    get_snowflake_password,
+    get_snowflake_spark_options,
+)
+from mlrun.datastore.utils import transform_list_filters_to_tuple
 from mlrun.model import DataSource, DataTarget, DataTargetBase, TargetPathObject
-from mlrun.utils import now_date
+from mlrun.utils import logger, now_date
 from mlrun.utils.helpers import to_parquet
 from mlrun.utils.v3io_clients import get_frames_client
@@ -41,9 +47,7 @@ from .spark_utils import spark_session_update_hadoop_options
 from .utils import (
     _generate_sql_query_with_time_filter,
     filter_df_start_end_time,
-    parse_kafka_url,
     select_columns_from_df,
-    store_path_to_spark,
 )
@@ -58,6 +62,7 @@ class TargetTypes:
     dataframe = "dataframe"
     custom = "custom"
     sql = "sql"
+    snowflake = "snowflake"
     @staticmethod
     def all():
@@ -72,6 +77,7 @@ class TargetTypes:
             TargetTypes.dataframe,
             TargetTypes.custom,
             TargetTypes.sql,
+            TargetTypes.snowflake,
         ]
@@ -79,11 +85,14 @@ def generate_target_run_id():
     return f"{round(time.time() * 1000)}_{random.randint(0, 999)}"
-def write_spark_dataframe_with_options(spark_options, df, mode):
+def write_spark_dataframe_with_options(spark_options, df, mode, write_format=None):
     non_hadoop_spark_options = spark_session_update_hadoop_options(
         df.sql_ctx.sparkSession, spark_options
     )
-    df.write.mode(mode).save(**non_hadoop_spark_options)
+    if write_format:
+        df.write.format(write_format).mode(mode).save(**non_hadoop_spark_options)
+    else:
+        df.write.mode(mode).save(**non_hadoop_spark_options)
 def default_target_names():
@@ -215,9 +224,8 @@ def validate_target_list(targets):
     ]
     if target_types_requiring_name:
         raise mlrun.errors.MLRunInvalidArgumentError(
-            "Only one default name per target type is allowed (please specify name for {0} target)".format(
-                target_types_requiring_name
-            )
+            "Only one default name per target type is allowed (please "
+            f"specify name for {target_types_requiring_name} target)"
         )
     target_names_count = Counter(
@@ -232,9 +240,8 @@ def validate_target_list(targets):
     if targets_with_same_name:
         raise mlrun.errors.MLRunInvalidArgumentError(
-            "Each target must have a unique name (more than one target with those names found {0})".format(
-                targets_with_same_name
-            )
+            "Each target must have a unique name (more than one target with "
+            f"those names found {targets_with_same_name})"
         )
     no_path_target_types_count = Counter(
@@ -252,9 +259,8 @@ def validate_target_list(targets):
     ]
     if target_types_requiring_path:
         raise mlrun.errors.MLRunInvalidArgumentError(
-            "Only one default path per target type is allowed (please specify path for {0} target)".format(
-                target_types_requiring_path
-            )
+            "Only one default path per target type is allowed (please specify "
+            f"path for {target_types_requiring_path} target)"
         )
     target_paths_count = Counter(
@@ -269,9 +275,8 @@ def validate_target_list(targets):
     if targets_with_same_path:
         raise mlrun.errors.MLRunInvalidArgumentError(
-            "Each target must have a unique path (more than one target with those names found {0})".format(
-                targets_with_same_path
-            )
+            "Each target must have a unique path (more than one target "
+            f"with those names found {targets_with_same_path})"
         )
@@ -384,23 +389,24 @@ class BaseStoreTarget(DataTargetBase):
     is_offline = False
     support_spark = False
     support_storey = False
+    support_pandas = False
     support_append = False
     def __init__(
         self,
         name: str = "",
         path=None,
-        attributes: Dict[str, str] = None,
+        attributes: dict[str, str] = None,
         after_step=None,
         columns=None,
         partitioned: bool = False,
         key_bucketing_number: Optional[int] = None,
-        partition_cols: Optional[List[str]] = None,
+        partition_cols: Optional[list[str]] = None,
         time_partitioning_granularity: Optional[str] = None,
         max_events: Optional[int] = None,
         flush_after_seconds: Optional[int] = None,
-        storage_options: Dict[str, str] = None,
-        schema: Dict[str, Any] = None,
+        storage_options: dict[str, str] = None,
+        schema: dict[str, Any] = None,
         credentials_prefix=None,
     ):
         super().__init__(
@@ -433,6 +439,12 @@ class BaseStoreTarget(DataTargetBase):
         self.storage_options = storage_options
         self.schema = schema or {}
         self.credentials_prefix = credentials_prefix
+        if credentials_prefix:
+            warnings.warn(
+                "The 'credentials_prefix' parameter is deprecated and will be removed in "
+                "1.9.0. Please use datastore profiles instead.",
+                FutureWarning,
+            )
         self._target = None
         self._resource = None
@@ -452,14 +464,11 @@ class BaseStoreTarget(DataTargetBase):
             if self.credentials_prefix
             else None
         )
-        store, resolved_store_path = mlrun.store_manager.get_or_create_store(
+        store, resolved_store_path, url = mlrun.store_manager.get_or_create_store(
             self.get_target_path(),
             credentials_prefix_secrets,
         )
-        if self.get_target_path() and self.get_target_path().startswith("ds://"):
-            return store, store.url + resolved_store_path
-        else:
-            return store, self.get_target_path()
+        return store, resolved_store_path, url
     def _get_column_list(self, features, timestamp_key, key_columns, with_type=False):
         result = []
@@ -505,10 +514,13 @@ class BaseStoreTarget(DataTargetBase):
             options = self.get_spark_options(key_column, timestamp_key)
             options.update(kwargs)
             df = self.prepare_spark_df(df, key_column, timestamp_key, options)
-            write_spark_dataframe_with_options(options, df, "overwrite")
+            write_format = options.pop("format", None)
+            write_spark_dataframe_with_options(
+                options, df, "overwrite", write_format=write_format
+            )
         elif hasattr(df, "dask"):
             dask_options = self.get_dask_options()
-            store, target_path = self._get_store_and_path()
+            store, path_in_store, target_path = self._get_store_and_path()
             storage_options = store.get_storage_options()
             df = df.repartition(partition_size="100MB")
             try:
@@ -529,18 +541,21 @@ class BaseStoreTarget(DataTargetBase):
             except Exception as exc:
                 raise RuntimeError("Failed to write Dask Dataframe") from exc
         else:
-            store, target_path = self._get_store_and_path()
+            store, path_in_store, target_path = self._get_store_and_path()
             target_path = generate_path_with_chunk(self, chunk_id, target_path)
             file_system = store.filesystem
-            if file_system.protocol == "file":
+            if (
+                file_system.protocol == "file"
+                # fsspec 2023.10.0 changed protocol from "file" to ("file", "local")
+                or isinstance(file_system.protocol, (tuple, list))
+                and "file" in file_system.protocol
+            ):
                 dir = os.path.dirname(target_path)
                 if dir:
                     os.makedirs(dir, exist_ok=True)
             target_df = df
             partition_cols = None  # single parquet file
-            if not target_path.endswith(".parquet") and not target_path.endswith(
-                ".pq"
-            ):  # directory
+            if not mlrun.utils.helpers.is_parquet_file(target_path):  # directory
                 partition_cols = []
                 if timestamp_key and (
                     self.partitioned or self.time_partitioning_granularity
@@ -649,6 +664,29 @@ class BaseStoreTarget(DataTargetBase):
     def _target_path_object(self):
         """return the actual/computed target path"""
         is_single_file = hasattr(self, "is_single_file") and self.is_single_file()
+        if self._resource and self.path:
+            parsed_url = urlparse(self.path)
+            # When the URL consists only from scheme and endpoint and no path,
+            # make a default path for DS and redis targets.
+            # Also ignore KafkaTarget when it uses the ds scheme (no default path for KafkaTarget)
+            if (
+                not isinstance(self, KafkaTarget)
+                and parsed_url.scheme in ["ds", "redis", "rediss"]
+                and (not parsed_url.path or parsed_url.path == "/")
+            ):
+                return TargetPathObject(
+                    _get_target_path(
+                        self,
+                        self._resource,
+                        self.run_id is not None,
+                        netloc=parsed_url.netloc,
+                        scheme=parsed_url.scheme,
+                    ),
+                    self.run_id,
+                    is_single_file,
+                )
         return self.get_path() or (
             TargetPathObject(
                 _get_target_path(self, self._resource, self.run_id is not None),
@@ -665,6 +703,7 @@ class BaseStoreTarget(DataTargetBase):
             self.kind, self.name, self.get_target_templated_path()
         )
         target = self._target
+        target.attributes = self.attributes
         target.run_id = self.run_id
         target.status = status or target.status or "created"
         target.updated = now_date().isoformat()
@@ -693,11 +732,25 @@ class BaseStoreTarget(DataTargetBase):
         timestamp_key=None,
         featureset_status=None,
     ):
+        if not self.support_storey:
+            raise mlrun.errors.MLRunRuntimeError(
+                f"{type(self).__name__} does not support storey engine"
+            )
         raise NotImplementedError()
     def purge(self):
-        store, target_path = self._get_store_and_path()
-        store.rm(target_path, recursive=True)
+        """
+        Delete the files of the target.
+        Do not use this function directly from the sdk. Use FeatureSet.purge_targets.
+        """
+        store, path_in_store, target_path = self._get_store_and_path()
+        if path_in_store not in ["", "/"]:
+            store.rm(path_in_store, recursive=True)
+        else:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                "Unable to delete target. Please Use purge_targets from FeatureSet object."
+            )
     def as_df(
         self,
@@ -707,9 +760,15 @@ class BaseStoreTarget(DataTargetBase):
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
         **kwargs,
     ):
         """return the target data as dataframe"""
+        if not self.support_pandas:
+            raise NotImplementedError()
+        mlrun.utils.helpers.additional_filters_warning(
+            additional_filters, self.__class__
+        )
         return mlrun.get_dataitem(self.get_target_path()).as_df(
             columns=columns,
             df_module=df_module,
@@ -721,17 +780,25 @@ class BaseStoreTarget(DataTargetBase):
     def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
         # options used in spark.read.load(**options)
+        if not self.support_spark:
+            raise mlrun.errors.MLRunRuntimeError(
+                f"{type(self).__name__} does not support spark engine"
+            )
         raise NotImplementedError()
-    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options={}):
+    def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         return df
     def get_dask_options(self):
         raise NotImplementedError()
+    @property
+    def source_spark_attributes(self) -> dict:
+        return {}
 class ParquetTarget(BaseStoreTarget):
-    """parquet target storage driver, used to materialize feature set/vector data into parquet files
+    """Parquet target storage driver, used to materialize feature set/vector data into parquet files.
     :param name:       optional, target name. By default will be called ParquetTarget
     :param path:       optional, Output path. Can be either a file or directory.
@@ -760,22 +827,23 @@ class ParquetTarget(BaseStoreTarget):
     support_spark = True
     support_storey = True
     support_dask = True
+    support_pandas = True
     support_append = True
     def __init__(
         self,
         name: str = "",
         path=None,
-        attributes: Dict[str, str] = None,
+        attributes: dict[str, str] = None,
         after_step=None,
         columns=None,
         partitioned: bool = None,
         key_bucketing_number: Optional[int] = None,
-        partition_cols: Optional[List[str]] = None,
+        partition_cols: Optional[list[str]] = None,
         time_partitioning_granularity: Optional[str] = None,
         max_events: Optional[int] = 10000,
         flush_after_seconds: Optional[int] = 900,
-        storage_options: Dict[str, str] = None,
+        storage_options: dict[str, str] = None,
     ):
         self.path = path
         if partitioned is None:
@@ -865,10 +933,9 @@ class ParquetTarget(BaseStoreTarget):
                 if time_unit == time_partitioning_granularity:
                     break
-        if (
-            not self.partitioned
-            and not self.get_target_path().endswith(".parquet")
-            and not self.get_target_path().endswith(".pq")
+        target_path = self.get_target_path()
+        if not self.partitioned and not mlrun.utils.helpers.is_parquet_file(
+            target_path
         ):
             partition_cols = []
@@ -876,25 +943,16 @@ class ParquetTarget(BaseStoreTarget):
         for key_column in key_columns:
             tuple_key_columns.append((key_column.name, key_column.value_type))
-        store, target_path = self._get_store_and_path()
-        storage_options = store.get_storage_options()
-        if storage_options and self.storage_options:
-            storage_options = merge(storage_options, self.storage_options)
-        else:
-            storage_options = storage_options or self.storage_options
         step = graph.add_step(
             name=self.name or "ParquetTarget",
             after=after,
             graph_shape="cylinder",
-            class_name="storey.ParquetTarget",
+            class_name="mlrun.datastore.storeytargets.ParquetStoreyTarget",
             path=target_path,
             columns=column_list,
             index_cols=tuple_key_columns,
             partition_cols=partition_cols,
             time_field=timestamp_key,
-            storage_options=storage_options,
             max_events=self.max_events,
             flush_after_seconds=self.flush_after_seconds,
             update_last_written=featureset_status.update_last_written_for_target,
@@ -929,27 +987,19 @@ class ParquetTarget(BaseStoreTarget):
                     if unit == time_partitioning_granularity:
                         break
-        if self.path and self.path.startswith("ds://"):
-            store, path = mlrun.store_manager.get_or_create_store(
-                self.get_target_path()
-            )
-            storage_spark_options = store.get_spark_options()
-            path = store.url + path
-            result = {
-                "path": store_path_to_spark(path, storage_spark_options),
-                "format": "parquet",
-            }
-            result = {**result, **storage_spark_options}
-        else:
-            result = {
-                "path": store_path_to_spark(self.get_target_path()),
+        store, path, url = self._get_store_and_path()
+        spark_options = store.get_spark_options()
+        spark_options.update(
+            {
+                "path": store.spark_url + path,
                 "format": "parquet",
             }
+        )
         for partition_col in self.partition_cols or []:
             partition_cols.append(partition_col)
         if partition_cols:
-            result["partitionBy"] = partition_cols
-        return result
+            spark_options["partitionBy"] = partition_cols
+        return spark_options
     def get_dask_options(self):
         return {"format": "parquet"}
@@ -962,6 +1012,7 @@ class ParquetTarget(BaseStoreTarget):
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
         **kwargs,
     ):
         """return the target data as dataframe"""
@@ -972,6 +1023,7 @@ class ParquetTarget(BaseStoreTarget):
             start_time=start_time,
             end_time=end_time,
             time_column=time_column,
+            additional_filters=transform_list_filters_to_tuple(additional_filters),
             **kwargs,
         )
         if not columns:
@@ -993,9 +1045,7 @@ class ParquetTarget(BaseStoreTarget):
         return result
     def is_single_file(self):
-        if self.path:
-            return self.path.endswith(".parquet") or self.path.endswith(".pq")
-        return False
+        return mlrun.utils.helpers.is_parquet_file(self.path)
     def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         # If partitioning by time, add the necessary columns
@@ -1035,6 +1085,7 @@ class CSVTarget(BaseStoreTarget):
     is_offline = True
     support_spark = True
     support_storey = True
+    support_pandas = True
     @staticmethod
     def _write_dataframe(df, storage_options, target_path, partition_cols, **kwargs):
@@ -1056,39 +1107,30 @@ class CSVTarget(BaseStoreTarget):
         column_list = self._get_column_list(
             features=features, timestamp_key=timestamp_key, key_columns=key_columns
         )
-        store, target_path = self._get_store_and_path()
+        target_path = self.get_target_path()
         graph.add_step(
             name=self.name or "CSVTarget",
             after=after,
             graph_shape="cylinder",
-            class_name="storey.CSVTarget",
+            class_name="mlrun.datastore.storeytargets.CSVStoreyTarget",
             path=target_path,
             columns=column_list,
             header=True,
             index_cols=key_columns,
-            storage_options=store.get_storage_options(),
             **self.attributes,
         )
     def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
-        if self.path and self.path.startswith("ds://"):
-            store, path = mlrun.store_manager.get_or_create_store(
-                self.get_target_path()
-            )
-            storage_spark_options = store.get_spark_options()
-            path = store.url + path
-            result = {
-                "path": store_path_to_spark(path, storage_spark_options),
-                "format": "csv",
-                "header": "true",
-            }
-            return {**result, **storage_spark_options}
-        else:
-            return {
-                "path": store_path_to_spark(self.get_target_path()),
+        store, path, url = self._get_store_and_path()
+        spark_options = store.get_spark_options()
+        spark_options.update(
+            {
+                "path": store.spark_url + path,
                 "format": "csv",
                 "header": "true",
             }
+        )
+        return spark_options
     def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         import pyspark.sql.functions as funcs
@@ -1110,8 +1152,12 @@ class CSVTarget(BaseStoreTarget):
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
         **kwargs,
     ):
+        mlrun.utils.helpers.additional_filters_warning(
+            additional_filters, self.__class__
+        )
         df = super().as_df(
             columns=columns,
             df_module=df_module,
@@ -1132,6 +1178,134 @@ class CSVTarget(BaseStoreTarget):
         return True
+class SnowflakeTarget(BaseStoreTarget):
+    """
+    :param attributes: A dictionary of attributes for Snowflake connection; will be overridden by database parameters
+                       if they exist.
+    :param url: Snowflake hostname, in the format: <account_name>.<region>.snowflakecomputing.com
+    :param user: Snowflake user for login
+    :param db_schema: Database schema
+    :param database: Database name
+    :param warehouse: Snowflake warehouse name
+    :param table_name: Snowflake table name
+    """
+    support_spark = True
+    support_append = True
+    is_offline = True
+    kind = TargetTypes.snowflake
+    def __init__(
+        self,
+        name: str = "",
+        path=None,
+        attributes: dict[str, str] = None,
+        after_step=None,
+        columns=None,
+        partitioned: bool = False,
+        key_bucketing_number: Optional[int] = None,
+        partition_cols: Optional[list[str]] = None,
+        time_partitioning_granularity: Optional[str] = None,
+        max_events: Optional[int] = None,
+        flush_after_seconds: Optional[int] = None,
+        storage_options: dict[str, str] = None,
+        schema: dict[str, Any] = None,
+        credentials_prefix=None,
+        url: str = None,
+        user: str = None,
+        db_schema: str = None,
+        database: str = None,
+        warehouse: str = None,
+        table_name: str = None,
+    ):
+        attributes = attributes or {}
+        if url:
+            attributes["url"] = url
+        if user:
+            attributes["user"] = user
+        if database:
+            attributes["database"] = database
+        if db_schema:
+            attributes["db_schema"] = db_schema
+        if warehouse:
+            attributes["warehouse"] = warehouse
+        if table_name:
+            attributes["table"] = table_name
+        super().__init__(
+            name,
+            path,
+            attributes,
+            after_step,
+            list(schema.keys()) if schema else columns,
+            partitioned,
+            key_bucketing_number,
+            partition_cols,
+            time_partitioning_granularity,
+            max_events=max_events,
+            flush_after_seconds=flush_after_seconds,
+            storage_options=storage_options,
+            schema=schema,
+            credentials_prefix=credentials_prefix,
+        )
+    def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
+        spark_options = get_snowflake_spark_options(self.attributes)
+        spark_options["dbtable"] = self.attributes.get("table")
+        return spark_options
+    def purge(self):
+        import snowflake.connector
+        missing = [
+            key
+            for key in ["database", "db_schema", "table", "url", "user", "warehouse"]
+            if self.attributes.get(key) is None
+        ]
+        if missing:
+            raise mlrun.errors.MLRunRuntimeError(
+                f"Can't purge Snowflake target, "
+                f"some attributes are missing: {', '.join(missing)}"
+            )
+        account = self.attributes["url"].replace(".snowflakecomputing.com", "")
+        with snowflake.connector.connect(
+            account=account,
+            user=self.attributes["user"],
+            password=get_snowflake_password(),
+            warehouse=self.attributes["warehouse"],
+        ) as snowflake_connector:
+            drop_statement = (
+                f"DROP TABLE IF EXISTS {self.attributes['database']}.{self.attributes['db_schema']}"
+                f".{self.attributes['table']}"
+            )
+            snowflake_connector.execute_string(drop_statement)
+    def as_df(
+        self,
+        columns=None,
+        df_module=None,
+        entities=None,
+        start_time=None,
+        end_time=None,
+        time_column=None,
+        additional_filters=None,
+        **kwargs,
+    ):
+        raise mlrun.errors.MLRunRuntimeError(
+            f"{type(self).__name__} does not support pandas engine"
+        )
+    @property
+    def source_spark_attributes(self) -> dict:
+        keys = ["url", "user", "database", "db_schema", "warehouse"]
+        attributes = self.attributes or {}
+        snowflake_dict = {key: attributes.get(key) for key in keys}
+        table = attributes.get("table")
+        snowflake_dict["query"] = f"SELECT * from {table}" if table else None
+        return snowflake_dict
 class NoSqlBaseTarget(BaseStoreTarget):
     is_table = True
     is_online = True
@@ -1156,6 +1330,19 @@ class NoSqlBaseTarget(BaseStoreTarget):
         timestamp_key=None,
         featureset_status=None,
     ):
+        table, column_list = self._get_table_and_columns(features, key_columns)
+        graph.add_step(
+            name=self.name or self.writer_step_name,
+            after=after,
+            graph_shape="cylinder",
+            class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
+            columns=column_list,
+            table=table,
+            **self.attributes,
+        )
+    def _get_table_and_columns(self, features, key_columns):
         key_columns = list(key_columns.keys())
         table = self._resource.uri
         column_list = self._get_column_list(
@@ -1174,15 +1361,7 @@ class NoSqlBaseTarget(BaseStoreTarget):
                 col for col in column_list if col[0] not in aggregate_features
             ]
-        graph.add_step(
-            name=self.name or self.writer_step_name,
-            after=after,
-            graph_shape="cylinder",
-            class_name="storey.NoSqlTarget",
-            columns=column_list,
-            table=table,
-            **self.attributes,
-        )
+        return table, column_list
     def prepare_spark_df(self, df, key_columns, timestamp_key=None, spark_options=None):
         raise NotImplementedError()
@@ -1193,9 +1372,6 @@ class NoSqlBaseTarget(BaseStoreTarget):
     def get_dask_options(self):
         return {"format": "csv"}
-    def as_df(self, columns=None, df_module=None, **kwargs):
-        raise NotImplementedError()
     def write_dataframe(
         self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
     ):
@@ -1203,7 +1379,10 @@ class NoSqlBaseTarget(BaseStoreTarget):
             options = self.get_spark_options(key_column, timestamp_key)
             options.update(kwargs)
             df = self.prepare_spark_df(df)
-            write_spark_dataframe_with_options(options, df, "overwrite")
+            write_format = options.pop("format", None)
+            write_spark_dataframe_with_options(
+                options, df, "overwrite", write_format=write_format
+            )
         else:
             # To prevent modification of the original dataframe and make sure
             # that the last event of a key is the one being persisted
@@ -1213,7 +1392,11 @@ class NoSqlBaseTarget(BaseStoreTarget):
                 df = df.copy(deep=False)
             access_key = self._get_credential("V3IO_ACCESS_KEY")
-            _, path_with_container = parse_path(self.get_target_path())
+            store, path_in_store, target_path = self._get_store_and_path()
+            storage_options = store.get_storage_options()
+            access_key = storage_options.get("v3io_access_key", access_key)
+            _, path_with_container = parse_path(target_path)
             container, path = split_path(path_with_container)
             frames_client = get_frames_client(
@@ -1231,17 +1414,31 @@ class NoSqlTarget(NoSqlBaseTarget):
     def get_table_object(self):
         from storey import Table, V3ioDriver
-        # TODO use options/cred
-        endpoint, uri = parse_path(self.get_target_path())
+        store, path_in_store, target_path = self._get_store_and_path()
+        endpoint, uri = parse_path(target_path)
+        storage_options = store.get_storage_options()
+        access_key = storage_options.get("v3io_access_key")
         return Table(
             uri,
-            V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
+            V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api, access_key=access_key),
             flush_interval_secs=mlrun.mlconf.feature_store.flush_interval,
         )
     def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
+        store, path_in_store, target_path = self._get_store_and_path()
+        storage_options = store.get_storage_options()
+        store_access_key = storage_options.get("v3io_access_key")
+        env_access_key = self._secrets.get(
+            "V3IO_ACCESS_KEY", os.getenv("V3IO_ACCESS_KEY")
+        )
+        if store_access_key and env_access_key and store_access_key != env_access_key:
+            logger.warning(
+                "The Spark v3io connector does not support access_key parameterization."
+                "Spark will disregard the store-provided key."
+            )
         spark_options = {
-            "path": store_path_to_spark(self.get_target_path()),
+            "path": store.spark_url + path_in_store,
             "format": "io.iguaz.v3io.spark.sql.kv",
         }
         if isinstance(key_column, list) and len(key_column) >= 1:
@@ -1287,11 +1484,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
     support_spark = True
     writer_step_name = "RedisNoSqlTarget"
-    # Fetch server url from the RedisNoSqlTarget::__init__() 'path' parameter.
-    # If not set fetch it from 'mlrun.mlconf.redis.url' (MLRUN_REDIS__URL environment variable).
-    # Then look for username and password at REDIS_xxx secrets
-    def _get_server_endpoint(self):
-        endpoint, uri = parse_path(self.get_target_path())
+    @staticmethod
+    def get_server_endpoint(path, credentials_prefix=None):
+        endpoint, uri = parse_path(path)
         endpoint = endpoint or mlrun.mlconf.redis.url
         if endpoint.startswith("ds://"):
             datastore_profile = datastore_profile_read(endpoint)
@@ -1308,8 +1503,15 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
                 raise mlrun.errors.MLRunInvalidArgumentError(
                     "Provide Redis username and password only via secrets"
                 )
-            user = self._get_credential("REDIS_USER", "")
-            password = self._get_credential("REDIS_PASSWORD", "")
+            credentials_prefix = credentials_prefix or mlrun.get_secret_or_env(
+                key="CREDENTIALS_PREFIX"
+            )
+            user = mlrun.get_secret_or_env(
+                "REDIS_USER", default="", prefix=credentials_prefix
+            )
+            password = mlrun.get_secret_or_env(
+                "REDIS_PASSWORD", default="", prefix=credentials_prefix
+            )
             host = parsed_endpoint.hostname
             port = parsed_endpoint.port if parsed_endpoint.port else "6379"
             scheme = parsed_endpoint.scheme
@@ -1323,7 +1525,9 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
         from storey import Table
         from storey.redis_driver import RedisDriver
-        endpoint, uri = self._get_server_endpoint()
+        endpoint, uri = self.get_server_endpoint(
+            self.get_target_path(), self.credentials_prefix
+        )
         return Table(
             uri,
@@ -1332,12 +1536,14 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
         )
     def get_spark_options(self, key_column=None, timestamp_key=None, overwrite=True):
-        endpoint, uri = self._get_server_endpoint()
+        endpoint, uri = self.get_server_endpoint(
+            self.get_target_path(), self.credentials_prefix
+        )
         parsed_endpoint = urlparse(endpoint)
+        store, path_in_store, path = self._get_store_and_path()
         return {
             "key.column": "_spark_object_name",
-            "table": "{" + store_path_to_spark(self.get_target_path()),
+            "table": "{" + path_in_store,
             "format": "org.apache.spark.sql.redis",
             "host": parsed_endpoint.hostname,
             "port": parsed_endpoint.port,
@@ -1364,6 +1570,29 @@ class RedisNoSqlTarget(NoSqlBaseTarget):
         return df
+    def add_writer_step(
+        self,
+        graph,
+        after,
+        features,
+        key_columns=None,
+        timestamp_key=None,
+        featureset_status=None,
+    ):
+        table, column_list = self._get_table_and_columns(features, key_columns)
+        graph.add_step(
+            path=self.get_target_path(),
+            name=self.name or self.writer_step_name,
+            after=after,
+            graph_shape="cylinder",
+            class_name="mlrun.datastore.storeytargets.RedisNoSqlStoreyTarget",
+            columns=column_list,
+            table=table,
+            credentials_prefix=self.credentials_prefix,
+            **self.attributes,
+        )
 class StreamTarget(BaseStoreTarget):
     kind = TargetTypes.stream
@@ -1382,33 +1611,46 @@ class StreamTarget(BaseStoreTarget):
         timestamp_key=None,
         featureset_status=None,
     ):
-        from storey import V3ioDriver
         key_columns = list(key_columns.keys())
-        path = self.get_target_path()
-        if not path:
-            raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
-        endpoint, uri = parse_path(path)
         column_list = self._get_column_list(
             features=features, timestamp_key=timestamp_key, key_columns=key_columns
         )
+        stream_path = self.get_target_path()
+        if not stream_path:
+            raise mlrun.errors.MLRunInvalidArgumentError("StreamTarget requires a path")
         graph.add_step(
             name=self.name or "StreamTarget",
             after=after,
             graph_shape="cylinder",
-            class_name="storey.StreamTarget",
+            class_name="mlrun.datastore.storeytargets.StreamStoreyTarget",
             columns=column_list,
-            storage=V3ioDriver(webapi=endpoint or mlrun.mlconf.v3io_api),
-            stream_path=uri,
+            stream_path=stream_path,
             **self.attributes,
         )
-    def as_df(self, columns=None, df_module=None, **kwargs):
-        raise NotImplementedError()
 class KafkaTarget(BaseStoreTarget):
+    """
+    Kafka target storage driver, used to write data into kafka topics.
+    example::
+        # define target
+        kafka_target = KafkaTarget(
+            name="kafka", path="my_topic", brokers="localhost:9092"
+        )
+        # ingest
+        stocks_set.ingest(stocks, [kafka_target])
+    :param name:                target name
+    :param path:                topic name e.g. "my_topic"
+    :param after_step:          optional, after what step in the graph to add the target
+    :param columns:             optional, which columns from data to write
+    :param bootstrap_servers:   Deprecated. Use the brokers parameter instead
+    :param producer_options:    additional configurations for kafka producer
+    :param brokers:             kafka broker as represented by a host:port pair, or a list of kafka brokers, e.g.
+        "localhost:9092", or ["kafka-broker-1:9092", "kafka-broker-2:9092"]
+    """
     kind = TargetTypes.kafka
     is_table = False
     is_online = False
@@ -1421,11 +1663,27 @@ class KafkaTarget(BaseStoreTarget):
         *args,
         bootstrap_servers=None,
         producer_options=None,
+        brokers=None,
         **kwargs,
     ):
         attrs = {}
-        if bootstrap_servers is not None:
-            attrs["bootstrap_servers"] = bootstrap_servers
+        # TODO: Remove this in 1.9.0
+        if bootstrap_servers:
+            if brokers:
+                raise mlrun.errors.MLRunInvalidArgumentError(
+                    "KafkaTarget cannot be created with both the 'brokers' parameter and the deprecated "
+                    "'bootstrap_servers' parameter. Please use 'brokers' only."
+                )
+            warnings.warn(
+                "'bootstrap_servers' parameter is deprecated in 1.7.0 and will be removed in 1.9.0, "
+                "use 'brokers' instead.",
+                FutureWarning,
+            )
+            brokers = bootstrap_servers
+        if brokers:
+            attrs["brokers"] = brokers
         if producer_options is not None:
             attrs["producer_options"] = producer_options
@@ -1444,37 +1702,21 @@ class KafkaTarget(BaseStoreTarget):
         column_list = self._get_column_list(
             features=features, timestamp_key=timestamp_key, key_columns=key_columns
         )
-        if self.path and self.path.startswith("ds://"):
-            datastore_profile = datastore_profile_read(self.path)
-            attributes = datastore_profile.attributes()
-            bootstrap_servers = attributes.pop("bootstrap_servers", None)
-            topic = datastore_profile.topic
-        else:
-            attributes = copy(self.attributes)
-            bootstrap_servers = attributes.pop("bootstrap_servers", None)
-            topic, bootstrap_servers = parse_kafka_url(
-                self.get_target_path(), bootstrap_servers
-            )
+        path = self.get_target_path()
-        if not topic:
-            raise mlrun.errors.MLRunInvalidArgumentError(
-                "KafkaTarget requires a path (topic)"
-            )
+        if not path:
+            raise mlrun.errors.MLRunInvalidArgumentError("KafkaTarget requires a path")
         graph.add_step(
             name=self.name or "KafkaTarget",
             after=after,
             graph_shape="cylinder",
-            class_name="storey.KafkaTarget",
+            class_name="mlrun.datastore.storeytargets.KafkaStoreyTarget",
             columns=column_list,
-            topic=topic,
-            bootstrap_servers=bootstrap_servers,
-            **attributes,
+            path=path,
+            attributes=self.attributes,
         )
-    def as_df(self, columns=None, df_module=None, **kwargs):
-        raise NotImplementedError()
     def purge(self):
         pass
@@ -1509,7 +1751,7 @@ class TSDBTarget(BaseStoreTarget):
         graph.add_step(
             name=self.name or "TSDBTarget",
-            class_name="storey.TSDBTarget",
+            class_name="mlrun.datastore.storeytargets.TSDBStoreyTarget",
             after=after,
             graph_shape="cylinder",
             path=uri,
@@ -1519,9 +1761,6 @@ class TSDBTarget(BaseStoreTarget):
             **self.attributes,
         )
-    def as_df(self, columns=None, df_module=None, **kwargs):
-        raise NotImplementedError()
     def write_dataframe(
         self, df, key_column=None, timestamp_key=None, chunk_id=0, **kwargs
     ):
@@ -1535,7 +1774,11 @@ class TSDBTarget(BaseStoreTarget):
                 key_column = [key_column]
             new_index.extend(key_column)
-        _, path_with_container = parse_path(self.get_target_path())
+        store, path_in_store, target_path = self._get_store_and_path()
+        storage_options = store.get_storage_options()
+        access_key = storage_options.get("v3io_access_key", access_key)
+        _, path_with_container = parse_path(target_path)
         container, path = split_path(path_with_container)
         frames_client = get_frames_client(
@@ -1555,6 +1798,7 @@ class CustomTarget(BaseStoreTarget):
     is_online = False
     support_spark = False
     support_storey = True
+    support_pandas = True
     def __init__(
         self,
@@ -1590,6 +1834,7 @@ class CustomTarget(BaseStoreTarget):
 class DFTarget(BaseStoreTarget):
     kind = TargetTypes.dataframe
     support_storey = True
+    support_pandas = True
     def __init__(self, *args, name="dataframe", **kwargs):
         self._df = None
@@ -1626,11 +1871,16 @@ class DFTarget(BaseStoreTarget):
         self,
         columns=None,
         df_module=None,
+        entities=None,
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
         **kwargs,
     ):
+        mlrun.utils.helpers.additional_filters_warning(
+            additional_filters, self.__class__
+        )
         return select_columns_from_df(
             filter_df_start_end_time(
                 self._df,
@@ -1647,29 +1897,30 @@ class SQLTarget(BaseStoreTarget):
     is_online = True
     support_spark = False
     support_storey = True
+    support_pandas = True
     def __init__(
         self,
         name: str = "",
         path=None,
-        attributes: Dict[str, str] = None,
+        attributes: dict[str, str] = None,
         after_step=None,
         partitioned: bool = False,
         key_bucketing_number: Optional[int] = None,
-        partition_cols: Optional[List[str]] = None,
+        partition_cols: Optional[list[str]] = None,
         time_partitioning_granularity: Optional[str] = None,
         max_events: Optional[int] = None,
         flush_after_seconds: Optional[int] = None,
-        storage_options: Dict[str, str] = None,
+        storage_options: dict[str, str] = None,
         db_url: str = None,
         table_name: str = None,
-        schema: Dict[str, Any] = None,
+        schema: dict[str, Any] = None,
         primary_key_column: str = "",
         if_exists: str = "append",
         create_table: bool = False,
         # create_according_to_data: bool = False,
         varchar_len: int = 50,
-        parse_dates: List[str] = None,
+        parse_dates: list[str] = None,
     ):
         """
         Write to SqlDB as output target for a flow.
@@ -1789,7 +2040,7 @@ class SQLTarget(BaseStoreTarget):
             name=self.name or "SqlTarget",
             after=after,
             graph_shape="cylinder",
-            class_name="storey.NoSqlTarget",
+            class_name="mlrun.datastore.storeytargets.NoSqlStoreyTarget",
             columns=column_list,
             header=True,
             table=table,
@@ -1805,6 +2056,7 @@ class SQLTarget(BaseStoreTarget):
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
         **kwargs,
     ):
         try:
@@ -1813,9 +2065,13 @@ class SQLTarget(BaseStoreTarget):
         except (ModuleNotFoundError, ImportError) as exc:
             self._raise_sqlalchemy_import_error(exc)
+        mlrun.utils.helpers.additional_filters_warning(
+            additional_filters, self.__class__
+        )
         db_path, table_name, _, _, _, _ = self._parse_url()
         engine = sqlalchemy.create_engine(db_path)
-        parse_dates: Optional[List[str]] = self.attributes.get("parse_dates")
+        parse_dates: Optional[list[str]] = self.attributes.get("parse_dates")
         with engine.connect() as conn:
             query, parse_dates = _generate_sql_query_with_time_filter(
                 table_name=table_name,
@@ -1902,7 +2158,7 @@ class SQLTarget(BaseStoreTarget):
                 raise ValueError(f"Table named {table_name} is not exist")
             elif not table_exists and create_table:
-                TYPE_TO_SQL_TYPE = {
+                type_to_sql_type = {
                     int: sqlalchemy.Integer,
                     str: sqlalchemy.String(self.attributes.get("varchar_len")),
                     datetime.datetime: sqlalchemy.dialects.mysql.DATETIME(fsp=6),
@@ -1915,12 +2171,16 @@ class SQLTarget(BaseStoreTarget):
                 # creat new table with the given name
                 columns = []
                 for col, col_type in self.schema.items():
-                    col_type = TYPE_TO_SQL_TYPE.get(col_type)
-                    if col_type is None:
-                        raise TypeError(f"{col_type} unsupported type")
+                    col_type_sql = type_to_sql_type.get(col_type)
+                    if col_type_sql is None:
+                        raise TypeError(
+                            f"'{col_type}' unsupported type for column '{col}'"
+                        )
                     columns.append(
                         sqlalchemy.Column(
-                            col, col_type, primary_key=(col in primary_key_for_check)
+                            col,
+                            col_type_sql,
+                            primary_key=(col in primary_key_for_check),
                         )
                     )
@@ -1951,10 +2211,11 @@ kind_to_driver = {
     TargetTypes.tsdb: TSDBTarget,
     TargetTypes.custom: CustomTarget,
     TargetTypes.sql: SQLTarget,
+    TargetTypes.snowflake: SnowflakeTarget,
 }
-def _get_target_path(driver, resource, run_id_mode=False):
+def _get_target_path(driver, resource, run_id_mode=False, netloc=None, scheme=""):
     """return the default target path given the resource and target kind"""
     kind = driver.kind
     suffix = driver.suffix
@@ -1971,11 +2232,27 @@ def _get_target_path(driver, resource, run_id_mode=False):
     )
     name = resource.metadata.name
     project = resource.metadata.project or mlrun.mlconf.default_project
-    data_prefix = get_default_prefix_for_target(kind).format(
+    default_kind_name = kind
+    if scheme == "ds":
+        # "dsnosql" is not an actual target like Parquet or Redis; rather, it serves
+        # as a placeholder that can be used in any specified target
+        default_kind_name = "dsnosql"
+    if scheme == "redis" or scheme == "rediss":
+        default_kind_name = TargetTypes.redisnosql
+    netloc = netloc or ""
+    data_prefix = get_default_prefix_for_target(default_kind_name).format(
+        ds_profile_name=netloc,  # In case of ds profile, set its the name
+        authority=netloc,  # In case of redis, replace {authority} with netloc
         project=project,
         kind=kind,
         name=name,
     )
+    if scheme == "rediss":
+        data_prefix = data_prefix.replace("redis://", "rediss://", 1)
     # todo: handle ver tag changes, may need to copy files?
     if not run_id_mode:
         version = resource.metadata.tag

mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.4rc7py3-none-any.whl → 1.7.0py3-none-any.whl