PyPI - mlrun - Versions diffs - 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

mlrun 1.6.4rc8py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (305) hide show

mlrun/__init__.py +11 -1
mlrun/__main__.py +40 -122
mlrun/alerts/__init__.py +15 -0
mlrun/alerts/alert.py +248 -0
mlrun/api/schemas/__init__.py +5 -4
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +47 -257
mlrun/artifacts/dataset.py +11 -192
mlrun/artifacts/manager.py +79 -47
mlrun/artifacts/model.py +31 -159
mlrun/artifacts/plots.py +23 -380
mlrun/common/constants.py +74 -1
mlrun/common/db/sql_session.py +5 -5
mlrun/common/formatters/__init__.py +21 -0
mlrun/common/formatters/artifact.py +45 -0
mlrun/common/formatters/base.py +113 -0
mlrun/common/formatters/feature_set.py +33 -0
mlrun/common/formatters/function.py +46 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/common/formatters/run.py +29 -0
mlrun/common/helpers.py +12 -3
mlrun/common/model_monitoring/helpers.py +9 -5
mlrun/{runtimes → common/runtimes}/constants.py +37 -9
mlrun/common/schemas/__init__.py +31 -5
mlrun/common/schemas/alert.py +202 -0
mlrun/common/schemas/api_gateway.py +196 -0
mlrun/common/schemas/artifact.py +25 -4
mlrun/common/schemas/auth.py +16 -5
mlrun/common/schemas/background_task.py +1 -1
mlrun/common/schemas/client_spec.py +4 -2
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/constants.py +3 -0
mlrun/common/schemas/feature_store.py +74 -44
mlrun/common/schemas/frontend_spec.py +15 -7
mlrun/common/schemas/function.py +12 -1
mlrun/common/schemas/hub.py +11 -18
mlrun/common/schemas/memory_reports.py +2 -2
mlrun/common/schemas/model_monitoring/__init__.py +20 -4
mlrun/common/schemas/model_monitoring/constants.py +123 -42
mlrun/common/schemas/model_monitoring/grafana.py +13 -9
mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
mlrun/common/schemas/notification.py +71 -14
mlrun/common/schemas/object.py +2 -2
mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
mlrun/common/schemas/pipeline.py +8 -1
mlrun/common/schemas/project.py +69 -18
mlrun/common/schemas/runs.py +7 -1
mlrun/common/schemas/runtime_resource.py +8 -12
mlrun/common/schemas/schedule.py +4 -4
mlrun/common/schemas/tag.py +1 -2
mlrun/common/schemas/workflow.py +12 -4
mlrun/common/types.py +14 -1
mlrun/config.py +154 -69
mlrun/data_types/data_types.py +6 -1
mlrun/data_types/spark.py +2 -2
mlrun/data_types/to_pandas.py +67 -37
mlrun/datastore/__init__.py +6 -8
mlrun/datastore/alibaba_oss.py +131 -0
mlrun/datastore/azure_blob.py +143 -42
mlrun/datastore/base.py +102 -58
mlrun/datastore/datastore.py +34 -13
mlrun/datastore/datastore_profile.py +146 -20
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -4
mlrun/datastore/google_cloud_storage.py +97 -33
mlrun/datastore/hdfs.py +56 -0
mlrun/datastore/inmem.py +6 -3
mlrun/datastore/redis.py +7 -2
mlrun/datastore/s3.py +34 -12
mlrun/datastore/snowflake_utils.py +45 -0
mlrun/datastore/sources.py +303 -111
mlrun/datastore/spark_utils.py +31 -2
mlrun/datastore/store_resources.py +9 -7
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +453 -176
mlrun/datastore/utils.py +72 -58
mlrun/datastore/v3io.py +6 -1
mlrun/db/base.py +274 -41
mlrun/db/factory.py +1 -1
mlrun/db/httpdb.py +893 -225
mlrun/db/nopdb.py +291 -33
mlrun/errors.py +36 -6
mlrun/execution.py +115 -42
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +65 -73
mlrun/feature_store/common.py +7 -12
mlrun/feature_store/feature_set.py +76 -55
mlrun/feature_store/feature_vector.py +39 -31
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +16 -11
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +13 -4
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +24 -32
mlrun/feature_store/steps.py +45 -34
mlrun/features.py +11 -21
mlrun/frameworks/_common/artifacts_library.py +9 -9
mlrun/frameworks/_common/mlrun_interface.py +5 -5
mlrun/frameworks/_common/model_handler.py +48 -48
mlrun/frameworks/_common/plan.py +5 -6
mlrun/frameworks/_common/producer.py +3 -4
mlrun/frameworks/_common/utils.py +5 -5
mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
mlrun/frameworks/_ml_common/model_handler.py +24 -24
mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
mlrun/frameworks/_ml_common/plan.py +2 -2
mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
mlrun/frameworks/_ml_common/utils.py +4 -4
mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
mlrun/frameworks/huggingface/model_server.py +4 -4
mlrun/frameworks/lgbm/__init__.py +33 -33
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
mlrun/frameworks/lgbm/model_handler.py +10 -10
mlrun/frameworks/lgbm/model_server.py +6 -6
mlrun/frameworks/lgbm/utils.py +5 -5
mlrun/frameworks/onnx/dataset.py +8 -8
mlrun/frameworks/onnx/mlrun_interface.py +3 -3
mlrun/frameworks/onnx/model_handler.py +6 -6
mlrun/frameworks/onnx/model_server.py +7 -7
mlrun/frameworks/parallel_coordinates.py +6 -6
mlrun/frameworks/pytorch/__init__.py +18 -18
mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
mlrun/frameworks/pytorch/model_handler.py +17 -17
mlrun/frameworks/pytorch/model_server.py +7 -7
mlrun/frameworks/sklearn/__init__.py +13 -13
mlrun/frameworks/sklearn/estimator.py +4 -4
mlrun/frameworks/sklearn/metrics_library.py +14 -14
mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
mlrun/frameworks/sklearn/model_handler.py +2 -2
mlrun/frameworks/tf_keras/__init__.py +10 -7
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
mlrun/frameworks/tf_keras/model_handler.py +14 -14
mlrun/frameworks/tf_keras/model_server.py +6 -6
mlrun/frameworks/xgboost/__init__.py +13 -13
mlrun/frameworks/xgboost/model_handler.py +6 -6
mlrun/k8s_utils.py +61 -17
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +16 -15
mlrun/launcher/client.py +13 -11
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +23 -13
mlrun/launcher/remote.py +17 -10
mlrun/lists.py +7 -6
mlrun/model.py +478 -103
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +163 -371
mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
mlrun/model_monitoring/applications/_application_steps.py +188 -0
mlrun/model_monitoring/applications/base.py +108 -0
mlrun/model_monitoring/applications/context.py +341 -0
mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +131 -278
mlrun/model_monitoring/db/__init__.py +18 -0
mlrun/model_monitoring/db/stores/__init__.py +136 -0
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/db/stores/base/store.py +213 -0
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
mlrun/model_monitoring/db/tsdb/base.py +448 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
mlrun/model_monitoring/features_drift_table.py +134 -106
mlrun/model_monitoring/helpers.py +199 -55
mlrun/model_monitoring/metrics/__init__.py +13 -0
mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +134 -398
mlrun/model_monitoring/tracking_policy.py +9 -2
mlrun/model_monitoring/writer.py +161 -125
mlrun/package/__init__.py +6 -6
mlrun/package/context_handler.py +5 -5
mlrun/package/packager.py +7 -7
mlrun/package/packagers/default_packager.py +8 -8
mlrun/package/packagers/numpy_packagers.py +15 -15
mlrun/package/packagers/pandas_packagers.py +5 -5
mlrun/package/packagers/python_standard_library_packagers.py +10 -10
mlrun/package/packagers_manager.py +19 -23
mlrun/package/utils/_formatter.py +6 -6
mlrun/package/utils/_pickler.py +2 -2
mlrun/package/utils/_supported_format.py +4 -4
mlrun/package/utils/log_hint_utils.py +2 -2
mlrun/package/utils/type_hint_utils.py +4 -9
mlrun/platforms/__init__.py +11 -10
mlrun/platforms/iguazio.py +24 -203
mlrun/projects/operations.py +52 -25
mlrun/projects/pipelines.py +191 -197
mlrun/projects/project.py +1227 -400
mlrun/render.py +16 -19
mlrun/run.py +209 -184
mlrun/runtimes/__init__.py +83 -15
mlrun/runtimes/base.py +51 -35
mlrun/runtimes/daskjob.py +17 -10
mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +1 -29
mlrun/runtimes/function_reference.py +1 -1
mlrun/runtimes/kubejob.py +34 -128
mlrun/runtimes/local.py +40 -11
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/abstract.py +9 -10
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
mlrun/runtimes/nuclio/api_gateway.py +769 -0
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +758 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
mlrun/runtimes/pod.py +281 -101
mlrun/runtimes/remotesparkjob.py +12 -9
mlrun/runtimes/sparkjob/spark3job.py +67 -51
mlrun/runtimes/utils.py +41 -75
mlrun/secrets.py +9 -5
mlrun/serving/__init__.py +8 -1
mlrun/serving/remote.py +2 -7
mlrun/serving/routers.py +85 -69
mlrun/serving/server.py +69 -44
mlrun/serving/states.py +209 -36
mlrun/serving/utils.py +22 -14
mlrun/serving/v1_serving.py +6 -7
mlrun/serving/v2_serving.py +133 -54
mlrun/track/tracker.py +2 -1
mlrun/track/tracker_manager.py +3 -3
mlrun/track/trackers/mlflow_tracker.py +6 -2
mlrun/utils/async_http.py +6 -8
mlrun/utils/azure_vault.py +1 -1
mlrun/utils/clones.py +1 -2
mlrun/utils/condition_evaluator.py +3 -3
mlrun/utils/db.py +21 -3
mlrun/utils/helpers.py +405 -225
mlrun/utils/http.py +3 -6
mlrun/utils/logger.py +112 -16
mlrun/utils/notifications/notification/__init__.py +17 -13
mlrun/utils/notifications/notification/base.py +50 -2
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +24 -1
mlrun/utils/notifications/notification/ipython.py +3 -1
mlrun/utils/notifications/notification/slack.py +96 -21
mlrun/utils/notifications/notification/webhook.py +59 -2
mlrun/utils/notifications/notification_pusher.py +149 -30
mlrun/utils/regex.py +9 -0
mlrun/utils/retryer.py +208 -0
mlrun/utils/singleton.py +1 -1
mlrun/utils/v3io_clients.py +4 -6
mlrun/utils/version/version.json +2 -2
mlrun/utils/version/version.py +2 -6
mlrun-1.7.0.dist-info/METADATA +378 -0
mlrun-1.7.0.dist-info/RECORD +351 -0
{mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -273
mlrun/kfpops.py +0 -868
mlrun/model_monitoring/application.py +0 -310
mlrun/model_monitoring/batch.py +0 -1095
mlrun/model_monitoring/prometheus.py +0 -219
mlrun/model_monitoring/stores/__init__.py +0 -111
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/models/base.py +0 -84
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
mlrun/platforms/other.py +0 -306
mlrun-1.6.4rc8.dist-info/METADATA +0 -272
mlrun-1.6.4rc8.dist-info/RECORD +0 -314
{mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
{mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
{mlrun-1.6.4rc8.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0

mlrun/feature_store/feature_set.py CHANGED Viewed

@@ -13,7 +13,7 @@
 # limitations under the License.
 import warnings
 from datetime import datetime
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Optional, Union
 import pandas as pd
 from storey import EmitEveryEvent, EmitPolicy
@@ -119,9 +119,9 @@ class FeatureSetSpec(ModelObj):
         self.owner = owner
         self.description = description
-        self.entities: List[Union[Entity, str]] = entities or []
-        self.relations: Dict[str, Union[Entity, str]] = relations or {}
-        self.features: List[Feature] = features or []
+        self.entities: list[Union[Entity, str]] = entities or []
+        self.relations: dict[str, Union[Entity, str]] = relations or {}
+        self.features: list[Feature] = features or []
         self.partition_keys = partition_keys or []
         self.timestamp_key = timestamp_key
         self.source = source
@@ -136,12 +136,12 @@ class FeatureSetSpec(ModelObj):
         self.with_default_targets = True
     @property
-    def entities(self) -> List[Entity]:
+    def entities(self) -> list[Entity]:
         """feature set entities (indexes)"""
         return self._entities
     @entities.setter
-    def entities(self, entities: List[Union[Entity, str]]):
+    def entities(self, entities: list[Union[Entity, str]]):
         if entities:
             # if the entity is a string, convert it to Entity class
             for i, entity in enumerate(entities):
@@ -163,21 +163,21 @@ class FeatureSetSpec(ModelObj):
         self._entities = ObjectList.from_list(Entity, entities)
     @property
-    def features(self) -> List[Feature]:
+    def features(self) -> list[Feature]:
         """feature set features list"""
         return self._features
     @features.setter
-    def features(self, features: List[Feature]):
+    def features(self, features: list[Feature]):
         self._features = ObjectList.from_list(Feature, features)
     @property
-    def targets(self) -> List[DataTargetBase]:
+    def targets(self) -> list[DataTargetBase]:
         """list of desired targets (material storage)"""
         return self._targets
     @targets.setter
-    def targets(self, targets: List[DataTargetBase]):
+    def targets(self, targets: list[DataTargetBase]):
         self._targets = ObjectList.from_list(DataTargetBase, targets)
     @property
@@ -230,12 +230,12 @@ class FeatureSetSpec(ModelObj):
         self._source = source
     @property
-    def relations(self) -> Dict[str, Entity]:
+    def relations(self) -> dict[str, Entity]:
         """feature set relations dict"""
         return self._relations
     @relations.setter
-    def relations(self, relations: Dict[str, Entity]):
+    def relations(self, relations: dict[str, Entity]):
         for col, ent in relations.items():
             if isinstance(ent, str):
                 relations[col] = Entity(ent)
@@ -284,12 +284,12 @@ class FeatureSetStatus(ModelObj):
         self.run_uri = run_uri
     @property
-    def targets(self) -> List[DataTarget]:
+    def targets(self) -> list[DataTarget]:
         """list of material storage targets + their status/path"""
         return self._targets
     @targets.setter
-    def targets(self, targets: List[DataTarget]):
+    def targets(self, targets: list[DataTarget]):
         self._targets = ObjectList.from_list(DataTarget, targets)
     def update_target(self, target: DataTarget):
@@ -318,8 +318,6 @@ def emit_policy_to_dict(policy: EmitPolicy):
 class FeatureSet(ModelObj):
-    """Feature set object, defines a set of features and their data pipeline"""
     kind = mlrun.common.schemas.ObjectKind.feature_set.value
     _dict_fields = ["kind", "metadata", "spec", "status"]
@@ -327,11 +325,11 @@ class FeatureSet(ModelObj):
         self,
         name: str = None,
         description: str = None,
-        entities: List[Union[Entity, str]] = None,
+        entities: list[Union[Entity, str]] = None,
         timestamp_key: str = None,
         engine: str = None,
         label_column: str = None,
-        relations: Dict[str, Union[Entity, str]] = None,
+        relations: dict[str, Union[Entity, str]] = None,
         passthrough: bool = None,
     ):
         """Feature set object, defines a set of features and their data pipeline
@@ -339,7 +337,10 @@ class FeatureSet(ModelObj):
         example::
             import mlrun.feature_store as fstore
-            ticks = fstore.FeatureSet("ticks", entities=["stock"], timestamp_key="timestamp")
+            ticks = fstore.FeatureSet(
+                "ticks", entities=["stock"], timestamp_key="timestamp"
+            )
             ticks.ingest(df)
         :param name:          name of the feature set
@@ -532,7 +533,7 @@ class FeatureSet(ModelObj):
                     self, **(class_args if class_args is not None else {})
                 )
-    def purge_targets(self, target_names: List[str] = None, silent: bool = False):
+    def purge_targets(self, target_names: list[str] = None, silent: bool = False):
         """Delete data of specific targets
         :param target_names: List of names of targets to delete (default: delete all ingested targets)
         :param silent: Fail silently if target doesn't exist in featureset status"""
@@ -560,7 +561,7 @@ class FeatureSet(ModelObj):
     def update_targets_for_ingest(
         self,
-        targets: List[DataTargetBase],
+        targets: list[DataTargetBase],
         overwrite: bool = None,
     ):
         if not targets:
@@ -581,7 +582,7 @@ class FeatureSet(ModelObj):
         update_targets_run_id_for_ingest(overwrite, targets, status_targets)
     def _reload_and_get_status_targets(
-        self, target_names: List[str] = None, silent: bool = False
+        self, target_names: list[str] = None, silent: bool = False
     ):
         try:
             self.reload(update_spec=False)
@@ -602,9 +603,7 @@ class FeatureSet(ModelObj):
                         pass
                     else:
                         raise mlrun.errors.MLRunNotFoundError(
-                            "Target not found in status (fset={0}, target={1})".format(
-                                self.metadata.name, target_name
-                            )
+                            f"Target not found in status (fset={self.metadata.name}, target={target_name})"
                         )
         else:
             targets = self.status.targets
@@ -621,7 +620,7 @@ class FeatureSet(ModelObj):
         name: str,
         value_type: mlrun.data_types.ValueType = None,
         description: str = None,
-        labels: Optional[Dict[str, str]] = None,
+        labels: Optional[dict[str, str]] = None,
     ):
         """add/set an entity (dataset index)
@@ -629,12 +628,12 @@ class FeatureSet(ModelObj):
             import mlrun.feature_store as fstore
-            ticks = fstore.FeatureSet("ticks",
-                            entities=["stock"],
-                            timestamp_key="timestamp")
-            ticks.add_entity("country",
-                            mlrun.data_types.ValueType.STRING,
-                            description="stock country")
+            ticks = fstore.FeatureSet(
+                "ticks", entities=["stock"], timestamp_key="timestamp"
+            )
+            ticks.add_entity(
+                "country", mlrun.data_types.ValueType.STRING, description="stock country"
+            )
             ticks.add_entity("year", mlrun.data_types.ValueType.INT16)
             ticks.save()
@@ -654,13 +653,23 @@ class FeatureSet(ModelObj):
             import mlrun.feature_store as fstore
             from mlrun.features import Feature
-            ticks = fstore.FeatureSet("ticks",
-                            entities=["stock"],
-                            timestamp_key="timestamp")
-            ticks.add_feature(Feature(value_type=mlrun.data_types.ValueType.STRING,
-                            description="client consistency"),"ABC01")
-            ticks.add_feature(Feature(value_type=mlrun.data_types.ValueType.FLOAT,
-                            description="client volatility"),"SAB")
+            ticks = fstore.FeatureSet(
+                "ticks", entities=["stock"], timestamp_key="timestamp"
+            )
+            ticks.add_feature(
+                Feature(
+                    value_type=mlrun.data_types.ValueType.STRING,
+                    description="client consistency",
+                ),
+                "ABC01",
+            )
+            ticks.add_feature(
+                Feature(
+                    value_type=mlrun.data_types.ValueType.FLOAT,
+                    description="client volatility",
+                ),
+                "SAB",
+            )
             ticks.save()
         :param feature:         setting of Feature
@@ -864,15 +873,18 @@ class FeatureSet(ModelObj):
         example::
             import mlrun.feature_store as fstore
             ...
-            ticks = fstore.FeatureSet("ticks",
-                            entities=["stock"],
-                            timestamp_key="timestamp")
-            ticks.add_aggregation(name='priceN',
-                                column='price',
-                                operations=['avg'],
-                                windows=['1d'],
-                                period='1h')
+            ticks = fstore.FeatureSet(
+                "ticks", entities=["stock"], timestamp_key="timestamp"
+            )
+            ticks.add_aggregation(
+                name="priceN",
+                column="price",
+                operations=["avg"],
+                windows=["1d"],
+                period="1h",
+            )
             ticks.plot(rankdir="LR", with_targets=True)
         :param filename:     target filepath for the graph image (None for the notebook)
@@ -905,6 +917,7 @@ class FeatureSet(ModelObj):
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
         **kwargs,
     ):
         """return featureset (offline) data as dataframe
@@ -916,6 +929,12 @@ class FeatureSet(ModelObj):
         :param end_time:     filter by end time
         :param time_column:  specify the time column name in the file
         :param kwargs:       additional reader (csv, parquet, ..) args
+        :param additional_filters: List of additional_filter conditions as tuples.
+                                    Each tuple should be in the format (column_name, operator, value).
+                                    Supported operators: "=", ">=", "<=", ">", "<".
+                                    Example: [("Product", "=", "Computer")]
+                                    For all supported filters, please see:
+                                    https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
         :return: DataFrame
         """
         entities = list(self.spec.entities.keys())
@@ -934,6 +953,7 @@ class FeatureSet(ModelObj):
                 start_time=start_time,
                 end_time=end_time,
                 time_field=time_column,
+                additional_filters=additional_filters,
                 **kwargs,
             )
             # to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
@@ -953,6 +973,7 @@ class FeatureSet(ModelObj):
             start_time=start_time,
             end_time=end_time,
             time_column=time_column,
+            additional_filters=additional_filters,
             **kwargs,
         )
         return result
@@ -983,7 +1004,7 @@ class FeatureSet(ModelObj):
     def ingest(
         self,
         source=None,
-        targets: List[DataTargetBase] = None,
+        targets: list[DataTargetBase] = None,
         namespace=None,
         return_df: bool = True,
         infer_options: InferOptions = InferOptions.default(),
@@ -1009,7 +1030,7 @@ class FeatureSet(ModelObj):
             df = stocks_set.ingest(stocks, infer_options=fstore.InferOptions.default())
             # for running as remote job
-            config = RunConfig(image='mlrun/mlrun')
+            config = RunConfig(image="mlrun/mlrun")
             df = ingest(stocks_set, stocks, run_config=config)
             # specify source and targets
@@ -1085,11 +1106,11 @@ class FeatureSet(ModelObj):
     def deploy_ingestion_service(
         self,
         source: DataSource = None,
-        targets: List[DataTargetBase] = None,
+        targets: list[DataTargetBase] = None,
         name: str = None,
         run_config: RunConfig = None,
         verbose=False,
-    ) -> Tuple[str, BaseRuntime]:
+    ) -> tuple[str, BaseRuntime]:
         """Start real-time ingestion service using nuclio function
         Deploy a real-time function implementing feature ingestion pipeline
@@ -1122,7 +1143,7 @@ class FeatureSet(ModelObj):
     def extract_relation_keys(
         self,
         other_feature_set,
-        relations: Dict[str, Union[str, Entity]] = None,
+        relations: dict[str, Union[str, Entity]] = None,
     ) -> list[str]:
         """
         Checks whether a feature set can be merged to the right of this feature set.
@@ -1189,10 +1210,10 @@ class SparkAggregateByKey(StepToDict):
     def __init__(
         self,
-        key_columns: List[str],
+        key_columns: list[str],
         time_column: str,
-        aggregates: List[Dict],
-        emit_policy: Union[EmitPolicy, Dict] = None,
+        aggregates: list[dict],
+        emit_policy: Union[EmitPolicy, dict] = None,
     ):
         self.key_columns = key_columns
         self.time_column = time_column

mlrun/feature_store/feature_vector.py CHANGED Viewed

@@ -17,7 +17,7 @@ import typing
 from copy import copy
 from datetime import datetime
 from enum import Enum
-from typing import Dict, List, Union
+from typing import Union
 import numpy as np
 import pandas as pd
@@ -69,18 +69,16 @@ class FeatureVectorSpec(ModelObj):
         self._entity_fields: ObjectList = None
         self._entity_source: DataSource = None
         self._function: FunctionReference = None
-        self._relations: typing.Dict[str, ObjectDict] = None
+        self._relations: dict[str, ObjectDict] = None
         self._join_graph: JoinGraph = None
         self.description = description
-        self.features: List[str] = features or []
+        self.features: list[str] = features or []
         self.entity_source = entity_source
         self.entity_fields = entity_fields or []
         self.graph = graph
         self.join_graph = join_graph
-        self.relations: typing.Dict[str, typing.Dict[str, Union[Entity, str]]] = (
-            relations or {}
-        )
+        self.relations: dict[str, dict[str, Union[Entity, str]]] = relations or {}
         self.timestamp_field = timestamp_field
         self.label_feature = label_feature
         self.with_indexes = with_indexes
@@ -97,12 +95,12 @@ class FeatureVectorSpec(ModelObj):
         self._entity_source = self._verify_dict(source, "entity_source", DataSource)
     @property
-    def entity_fields(self) -> List[Feature]:
+    def entity_fields(self) -> list[Feature]:
         """the schema/metadata for the entity source fields"""
         return self._entity_fields
     @entity_fields.setter
-    def entity_fields(self, entity_fields: List[Feature]):
+    def entity_fields(self, entity_fields: list[Feature]):
         self._entity_fields = ObjectList.from_list(Feature, entity_fields)
     @property
@@ -125,14 +123,12 @@ class FeatureVectorSpec(ModelObj):
         self._function = self._verify_dict(function, "function", FunctionReference)
     @property
-    def relations(self) -> typing.Dict[str, ObjectDict]:
+    def relations(self) -> dict[str, ObjectDict]:
         """feature set relations dict"""
         return self._relations
     @relations.setter
-    def relations(
-        self, relations: typing.Dict[str, typing.Dict[str, Union[Entity, str]]]
-    ):
+    def relations(self, relations: dict[str, dict[str, Union[Entity, str]]]):
         temp_relations = {}
         for fs_name, relation in relations.items():
             for col, ent in relation.items():
@@ -179,29 +175,29 @@ class FeatureVectorStatus(ModelObj):
         self.stats = stats or {}
         self.index_keys = index_keys
         self.preview = preview or []
-        self.features: List[Feature] = features or []
+        self.features: list[Feature] = features or []
         self.run_uri = run_uri
         self.timestamp_key = timestamp_key
     @property
-    def targets(self) -> List[DataTarget]:
+    def targets(self) -> list[DataTarget]:
         """list of material storage targets + their status/path"""
         return self._targets
     @targets.setter
-    def targets(self, targets: List[DataTarget]):
+    def targets(self, targets: list[DataTarget]):
         self._targets = ObjectList.from_list(DataTarget, targets)
     def update_target(self, target: DataTarget):
         self._targets.update(target)
     @property
-    def features(self) -> List[Feature]:
+    def features(self) -> list[Feature]:
         """list of features (result of joining features from the source feature sets)"""
         return self._features
     @features.setter
-    def features(self, features: List[Feature]):
+    def features(self, features: list[Feature]):
         self._features = ObjectList.from_list(Feature, features)
@@ -378,7 +374,7 @@ class _JoinStep(ModelObj):
         name: str = None,
         left_step_name: str = None,
         right_step_name: str = None,
-        left_feature_set_names: Union[str, List[str]] = None,
+        left_feature_set_names: Union[str, list[str]] = None,
         right_feature_set_name: str = None,
         join_type: str = "inner",
         asof_join: bool = False,
@@ -388,7 +384,8 @@ class _JoinStep(ModelObj):
         self.right_step_name = right_step_name
         self.left_feature_set_names = (
             left_feature_set_names
-            if isinstance(left_feature_set_names, list)
+            if left_feature_set_names is None
+            or isinstance(left_feature_set_names, list)
             else [left_feature_set_names]
         )
         self.right_feature_set_name = right_feature_set_name
@@ -402,7 +399,7 @@ class _JoinStep(ModelObj):
         self,
         feature_set_objects: ObjectList,
         vector,
-        entity_rows_keys: List[str] = None,
+        entity_rows_keys: list[str] = None,
     ):
         if feature_set_objects[self.right_feature_set_name].is_connectable_to_df(
             entity_rows_keys
@@ -482,21 +479,22 @@ class FeatureVector(ModelObj):
         description=None,
         with_indexes=None,
         join_graph: JoinGraph = None,
-        relations: typing.Dict[str, typing.Dict[str, Union[Entity, str]]] = None,
+        relations: dict[str, dict[str, Union[Entity, str]]] = None,
     ):
         """Feature vector, specify selected features, their metadata and material views
         example::
             import mlrun.feature_store as fstore
             features = ["quotes.bid", "quotes.asks_sum_5h as asks_5h", "stocks.*"]
             vector = fstore.FeatureVector("my-vec", features)
             # get the vector as a dataframe
-            df = fstore.get_offline_features(vector).to_dataframe()
+            df = vector.get_offline_features().to_dataframe()
             # return an online/real-time feature service
-            svc = fstore.get_online_feature_service(vector, impute_policy={"*": "$mean"})
+            svc = vector.get_online_feature_service(impute_policy={"*": "$mean"})
             resp = svc.get([{"stock": "GOOG"}])
         :param name:           List of names of targets to delete (default: delete all ingested targets)
@@ -732,7 +730,7 @@ class FeatureVector(ModelObj):
         entity_timestamp_column: str = None,
         target: DataTargetBase = None,
         run_config: RunConfig = None,
-        drop_columns: List[str] = None,
+        drop_columns: list[str] = None,
         start_time: Union[str, datetime] = None,
         end_time: Union[str, datetime] = None,
         with_indexes: bool = False,
@@ -740,9 +738,10 @@ class FeatureVector(ModelObj):
         engine: str = None,
         engine_args: dict = None,
         query: str = None,
-        order_by: Union[str, List[str]] = None,
+        order_by: Union[str, list[str]] = None,
         spark_service: str = None,
-        timestamp_for_filtering: Union[str, Dict[str, str]] = None,
+        timestamp_for_filtering: Union[str, dict[str, str]] = None,
+        additional_filters: list = None,
     ):
         """retrieve offline feature vector results
@@ -799,6 +798,12 @@ class FeatureVector(ModelObj):
                                         By default, the filter executes on the timestamp_key of each feature set.
                                         Note: the time filtering is performed on each feature set before the
                                         merge process using start_time and end_time params.
+        :param additional_filters: List of additional_filter conditions as tuples.
+                            Each tuple should be in the format (column_name, operator, value).
+                            Supported operators: "=", ">=", "<=", ">", "<".
+                            Example: [("Product", "=", "Computer")]
+                            For all supported filters, please see:
+                            https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
         """
@@ -819,6 +824,7 @@ class FeatureVector(ModelObj):
             order_by,
             spark_service,
             timestamp_for_filtering,
+            additional_filters,
         )
     def get_online_feature_service(
@@ -827,7 +833,7 @@ class FeatureVector(ModelObj):
         fixed_window_type: FixedWindowType = FixedWindowType.LastClosedWindow,
         impute_policy: dict = None,
         update_stats: bool = False,
-        entity_keys: List[str] = None,
+        entity_keys: list[str] = None,
     ):
         """initialize and return online feature vector service api,
         returns :py:class:`~mlrun.feature_store.OnlineVectorService`
@@ -855,7 +861,7 @@ class FeatureVector(ModelObj):
                 Example::
-                    svc = vector_uri.get_online_feature_service(entity_keys=['ticker'])
+                    svc = vector_uri.get_online_feature_service(entity_keys=["ticker"])
                     try:
                         resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
                         print(resp)
@@ -910,7 +916,7 @@ class OnlineVectorService:
         graph,
         index_columns,
         impute_policy: dict = None,
-        requested_columns: List[str] = None,
+        requested_columns: list[str] = None,
     ):
         self.vector = vector
         self.impute_policy = impute_policy or {}
@@ -966,7 +972,7 @@ class OnlineVectorService:
         """vector merger function status (ready, running, error)"""
         return "ready"
-    def get(self, entity_rows: List[Union[dict, list]], as_list=False):
+    def get(self, entity_rows: list[Union[dict, list]], as_list=False):
         """get feature vector given the provided entity inputs
         take a list of input vectors/rows and return a list of enriched feature vectors
@@ -1080,7 +1086,9 @@ class OfflineVectorResponse:
     def to_dataframe(self, to_pandas=True):
         """return result as dataframe"""
         if self.status != "completed":
-            raise mlrun.errors.MLRunTaskNotReady("feature vector dataset is not ready")
+            raise mlrun.errors.MLRunTaskNotReadyError(
+                "feature vector dataset is not ready"
+            )
         return self._merger.get_df(to_pandas=to_pandas)
     def to_parquet(self, target_path, **kw):

mlrun/feature_store/ingestion.py CHANGED Viewed

@@ -17,6 +17,7 @@ import uuid
 import pandas as pd
 import mlrun
+import mlrun.common.constants as mlrun_constants
 from mlrun.datastore.sources import get_source_from_dict, get_source_step
 from mlrun.datastore.targets import (
     add_target_steps,
@@ -263,13 +264,13 @@ def run_ingestion_job(name, featureset, run_config, schedule=None, spark_service
         out_path=featureset.spec.output_path,
     )
     task.spec.secret_sources = run_config.secret_sources
-    task.set_label("job-type", "feature-ingest").set_label(
-        "feature-set", featureset.uri
-    )
+    task.set_label(
+        mlrun_constants.MLRunInternalLabels.job_type, "feature-ingest"
+    ).set_label("feature-set", featureset.uri)
     if run_config.owner:
-        task.set_label("owner", run_config.owner).set_label(
-            "v3io_user", run_config.owner
-        )
+        task.set_label(
+            mlrun_constants.MLRunInternalLabels.owner, run_config.owner
+        ).set_label(mlrun_constants.MLRunInternalLabels.v3io_user, run_config.owner)
     # set run UID and save in the feature set status (linking the features et to the job)
     task.metadata.uid = uuid.uuid4().hex

mlrun 1.6.4rc8__py3-none-any.whl → 1.7.0__py3-none-any.whl

Potentially problematic release.

mlrun 1.6.4rc8py3-none-any.whl → 1.7.0py3-none-any.whl