PyPI - mlrun - Versions diffs - 1.7.0rc14__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl - Mend

mlrun 1.7.0rc14py3-none-any.whl → 1.7.0rc16py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (107) hide show

mlrun/__init__.py +10 -1
mlrun/__main__.py +18 -109
mlrun/{runtimes/mpijob/v1alpha1.py → alerts/__init__.py} +2 -16
mlrun/alerts/alert.py +141 -0
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +36 -253
mlrun/artifacts/dataset.py +9 -190
mlrun/artifacts/manager.py +20 -41
mlrun/artifacts/model.py +8 -140
mlrun/artifacts/plots.py +14 -375
mlrun/common/schemas/__init__.py +4 -2
mlrun/common/schemas/alert.py +46 -4
mlrun/common/schemas/api_gateway.py +4 -0
mlrun/common/schemas/artifact.py +15 -0
mlrun/common/schemas/auth.py +2 -0
mlrun/common/schemas/model_monitoring/__init__.py +8 -1
mlrun/common/schemas/model_monitoring/constants.py +40 -4
mlrun/common/schemas/model_monitoring/model_endpoints.py +73 -2
mlrun/common/schemas/project.py +2 -0
mlrun/config.py +7 -4
mlrun/data_types/to_pandas.py +4 -4
mlrun/datastore/base.py +41 -9
mlrun/datastore/datastore_profile.py +54 -4
mlrun/datastore/inmem.py +2 -2
mlrun/datastore/sources.py +43 -2
mlrun/datastore/store_resources.py +2 -6
mlrun/datastore/targets.py +106 -39
mlrun/db/base.py +23 -3
mlrun/db/httpdb.py +101 -47
mlrun/db/nopdb.py +20 -2
mlrun/errors.py +5 -0
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +12 -47
mlrun/feature_store/feature_set.py +9 -0
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/conversion.py +4 -4
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +2 -0
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +5 -0
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +5 -10
mlrun/launcher/base.py +4 -3
mlrun/launcher/client.py +1 -1
mlrun/lists.py +4 -2
mlrun/model.py +25 -11
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +41 -18
mlrun/model_monitoring/application.py +5 -305
mlrun/model_monitoring/applications/__init__.py +11 -0
mlrun/model_monitoring/applications/_application_steps.py +157 -0
mlrun/model_monitoring/applications/base.py +282 -0
mlrun/model_monitoring/applications/context.py +214 -0
mlrun/model_monitoring/applications/evidently_base.py +211 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +3 -1
mlrun/model_monitoring/db/__init__.py +2 -0
mlrun/model_monitoring/db/stores/base/store.py +9 -36
mlrun/model_monitoring/db/stores/sqldb/models/base.py +7 -6
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +63 -110
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +104 -187
mlrun/model_monitoring/db/tsdb/__init__.py +71 -0
mlrun/model_monitoring/db/tsdb/base.py +135 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +404 -0
mlrun/model_monitoring/db/v3io_tsdb_reader.py +134 -0
mlrun/model_monitoring/evidently_application.py +6 -118
mlrun/model_monitoring/helpers.py +1 -1
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +48 -213
mlrun/model_monitoring/writer.py +101 -121
mlrun/platforms/__init__.py +10 -9
mlrun/platforms/iguazio.py +21 -202
mlrun/projects/operations.py +11 -7
mlrun/projects/pipelines.py +13 -76
mlrun/projects/project.py +73 -45
mlrun/render.py +11 -13
mlrun/run.py +6 -41
mlrun/runtimes/__init__.py +3 -3
mlrun/runtimes/base.py +6 -6
mlrun/runtimes/funcdoc.py +0 -28
mlrun/runtimes/kubejob.py +2 -1
mlrun/runtimes/local.py +1 -1
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/api_gateway.py +75 -9
mlrun/runtimes/nuclio/function.py +9 -35
mlrun/runtimes/pod.py +16 -36
mlrun/runtimes/remotesparkjob.py +1 -1
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/runtimes/utils.py +1 -39
mlrun/utils/helpers.py +72 -71
mlrun/utils/notifications/notification/base.py +1 -1
mlrun/utils/notifications/notification/slack.py +12 -5
mlrun/utils/notifications/notification/webhook.py +1 -1
mlrun/utils/notifications/notification_pusher.py +134 -14
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/METADATA +4 -3
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/RECORD +105 -95
mlrun/kfpops.py +0 -865
mlrun/platforms/other.py +0 -305
/mlrun/{runtimes → common/runtimes}/constants.py +0 -0
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/WHEEL +0 -0
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc14.dist-info → mlrun-1.7.0rc16.dist-info}/top_level.txt +0 -0

mlrun/db/httpdb.py CHANGED Viewed

@@ -15,7 +15,6 @@
 import enum
 import http
 import re
-import tempfile
 import time
 import traceback
 import typing
@@ -26,9 +25,9 @@ from os import path, remove
 from typing import Optional, Union
 from urllib.parse import urlparse
-import kfp
 import requests
 import semver
+from mlrun_pipelines.utils import compile_pipeline
 import mlrun
 import mlrun.common.schemas
@@ -38,6 +37,7 @@ import mlrun.platforms
 import mlrun.projects
 import mlrun.runtimes.nuclio.api_gateway
 import mlrun.utils
+from mlrun.alerts.alert import AlertConfig
 from mlrun.db.auth_utils import OAuthClientIDTokenProvider, StaticTokenProvider
 from mlrun.errors import MLRunInvalidArgumentError, err_to_str
@@ -51,7 +51,6 @@ from ..utils import (
     datetime_to_iso,
     dict_to_json,
     logger,
-    new_pipe_metadata,
     normalize_name,
     version,
 )
@@ -590,7 +589,7 @@ class HTTPRunDB(RunDBInterface):
         if offset < 0:
             raise MLRunInvalidArgumentError("Offset cannot be negative")
         if size is None:
-            size = int(config.httpdb.logs.pull_logs_default_size_limit)
+            size = int(mlrun.mlconf.httpdb.logs.pull_logs_default_size_limit)
         elif size == -1:
             logger.warning(
                 "Retrieving all logs. This may be inefficient and can result in a large log."
@@ -636,33 +635,35 @@ class HTTPRunDB(RunDBInterface):
         state, text = self.get_log(uid, project, offset=offset)
         if text:
-            print(text.decode(errors=config.httpdb.logs.decode.errors))
+            print(text.decode(errors=mlrun.mlconf.httpdb.logs.decode.errors))
         nil_resp = 0
         while True:
             offset += len(text)
             # if we get 3 nil responses in a row, increase the sleep time to 10 seconds
             # TODO: refactor this to use a conditional backoff mechanism
             if nil_resp < 3:
-                time.sleep(int(config.httpdb.logs.pull_logs_default_interval))
+                time.sleep(int(mlrun.mlconf.httpdb.logs.pull_logs_default_interval))
             else:
                 time.sleep(
-                    int(config.httpdb.logs.pull_logs_backoff_no_logs_default_interval)
+                    int(
+                        mlrun.mlconf.httpdb.logs.pull_logs_backoff_no_logs_default_interval
+                    )
                 )
             state, text = self.get_log(uid, project, offset=offset)
             if text:
                 nil_resp = 0
                 print(
-                    text.decode(errors=config.httpdb.logs.decode.errors),
+                    text.decode(errors=mlrun.mlconf.httpdb.logs.decode.errors),
                     end="",
                 )
             else:
                 nil_resp += 1
             if watch and state in [
-                mlrun.runtimes.constants.RunStates.pending,
-                mlrun.runtimes.constants.RunStates.running,
-                mlrun.runtimes.constants.RunStates.created,
-                mlrun.runtimes.constants.RunStates.aborting,
+                mlrun.common.runtimes.constants.RunStates.pending,
+                mlrun.common.runtimes.constants.RunStates.running,
+                mlrun.common.runtimes.constants.RunStates.created,
+                mlrun.common.runtimes.constants.RunStates.aborting,
             ]:
                 continue
             else:
@@ -985,7 +986,18 @@ class HTTPRunDB(RunDBInterface):
         resp = self.api_call("GET", endpoint_path, error, params=params, version="v2")
         return resp.json()
-    def del_artifact(self, key, tag=None, project="", tree=None, uid=None):
+    def del_artifact(
+        self,
+        key,
+        tag=None,
+        project="",
+        tree=None,
+        uid=None,
+        deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
+            mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
+        ),
+        secrets: dict = None,
+    ):
         """Delete an artifact.
         :param key: Identifying key of the artifact.
@@ -993,6 +1005,8 @@ class HTTPRunDB(RunDBInterface):
         :param project: Project that the artifact belongs to.
         :param tree: The tree which generated this artifact.
         :param uid: A unique ID for this specific version of the artifact (the uid that was generated in the backend)
+        :param deletion_strategy: The artifact deletion strategy types.
+        :param secrets: Credentials needed to access the artifact data.
         """
         endpoint_path = f"projects/{project}/artifacts/{key}"
@@ -1001,9 +1015,17 @@ class HTTPRunDB(RunDBInterface):
             "tag": tag,
             "tree": tree,
             "uid": uid,
+            "deletion_strategy": deletion_strategy,
         }
         error = f"del artifact {project}/{key}"
-        self.api_call("DELETE", endpoint_path, error, params=params, version="v2")
+        self.api_call(
+            "DELETE",
+            endpoint_path,
+            error,
+            params=params,
+            version="v2",
+            body=dict_to_json(secrets),
+        )
     def list_artifacts(
         self,
@@ -1018,6 +1040,7 @@ class HTTPRunDB(RunDBInterface):
         kind: str = None,
         category: Union[str, mlrun.common.schemas.ArtifactCategories] = None,
         tree: str = None,
+        producer_uri: str = None,
     ) -> ArtifactList:
         """List artifacts filtered by various parameters.
@@ -1046,9 +1069,12 @@ class HTTPRunDB(RunDBInterface):
         :param best_iteration: Returns the artifact which belongs to the best iteration of a given run, in the case of
             artifacts generated from a hyper-param run. If only a single iteration exists, will return the artifact
             from that iteration. If using ``best_iter``, the ``iter`` parameter must not be used.
-        :param kind: Return artifacts of the requested kind.
-        :param category: Return artifacts of the requested category.
-        :param tree: Return artifacts of the requested tree.
+        :param kind:            Return artifacts of the requested kind.
+        :param category:        Return artifacts of the requested category.
+        :param tree:            Return artifacts of the requested tree.
+        :param producer_uri:    Return artifacts produced by the requested producer URI. Producer URI usually
+            points to a run and is used to filter artifacts by the run that produced them when the artifact producer id
+            is a workflow id (artifact was created as part of a workflow).
         """
         project = project or config.default_project
@@ -1067,6 +1093,7 @@ class HTTPRunDB(RunDBInterface):
             "category": category,
             "tree": tree,
             "format": mlrun.common.schemas.ArtifactsFormat.full.value,
+            "producer_uri": producer_uri,
         }
         error = "list artifacts"
         endpoint_path = f"projects/{project}/artifacts"
@@ -1828,14 +1855,11 @@ class HTTPRunDB(RunDBInterface):
         if isinstance(pipeline, str):
             pipe_file = pipeline
         else:
-            pipe_file = tempfile.NamedTemporaryFile(suffix=".yaml", delete=False).name
-            conf = new_pipe_metadata(
+            pipe_file = compile_pipeline(
                 artifact_path=artifact_path,
                 cleanup_ttl=cleanup_ttl,
-                op_transformers=ops,
-            )
-            kfp.compiler.Compiler().compile(
-                pipeline, pipe_file, type_check=False, pipeline_conf=conf
+                ops=ops,
+                pipeline=pipeline,
             )
         if pipe_file.endswith(".yaml"):
@@ -3112,14 +3136,12 @@ class HTTPRunDB(RunDBInterface):
         :param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a
          label (i.e. list("key=value")) or by looking for the existence of a given key (i.e. "key")
         :param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric'
-        :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339
-                                 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
-                                 `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` =
-                                 days), or 0 for the earliest time.
-        :param end: The end time of the metrics. Can be represented by a string containing an RFC 3339
-                                 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
-                                 `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` =
-                                 days), or 0 for the earliest time.
+        :param start: The start time of the metrics. Can be represented by a string containing an RFC 3339 time, a
+                      Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
+                      `m` = minutes, `h` = hours, `'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
+        :param end: The end time of the metrics. Can be represented by a string containing an RFC 3339 time, a
+                      Unix timestamp in milliseconds, a relative time (`'now'` or `'now-[0-9]+[mhd]'`, where
+                      `m` = minutes, `h` = hours, `'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
         :param top_level: if true will return only routers and endpoint that are NOT children of any router
         :param uids: if passed will return a list `ModelEndpoint` object with uid in uids
         """
@@ -3168,13 +3190,13 @@ class HTTPRunDB(RunDBInterface):
         :param project:                    The name of the project
         :param endpoint_id:                The unique id of the model endpoint.
         :param start:                      The start time of the metrics. Can be represented by a string containing an
-                                           RFC 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
-                                           `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or
-                                           0 for the earliest time.
+                                           RFC 3339 time, a  Unix timestamp in milliseconds, a relative time
+                                           (`'now'` or `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours,
+                                           `'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
         :param end:                        The end time of the metrics. Can be represented by a string containing an
-                                           RFC 3339 time, a Unix timestamp in milliseconds, a relative time (`'now'` or
-                                           `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours, and `'d'` = days), or
-                                           0 for the earliest time.
+                                           RFC 3339 time, a  Unix timestamp in milliseconds, a relative time
+                                           (`'now'` or `'now-[0-9]+[mhd]'`, where `m` = minutes, `h` = hours,
+                                           `'d'` = days, and `'s'` = seconds), or 0 for the earliest time.
         :param metrics:                    A list of metrics to return for the model endpoint. There are pre-defined
                                            metrics for model endpoints such as predictions_per_second and
                                            latency_avg_5m but also custom metrics defined by the user. Please note that
@@ -3915,7 +3937,7 @@ class HTTPRunDB(RunDBInterface):
             logger.warning(
                 "Building a function image to ECR and loading an S3 source to the image may require conflicting access "
                 "keys. Only the permissions granted to the platform's configured secret will take affect "
-                "(see mlrun.config.config.httpdb.builder.docker_registry_secret). "
+                "(see mlrun.mlconf.httpdb.builder.docker_registry_secret). "
                 "In case the permissions are limited to ECR scope, you may use pull_at_runtime=True instead",
                 source=func.spec.build.source,
                 load_source_on_run=func.spec.build.load_source_on_run,
@@ -3943,9 +3965,9 @@ class HTTPRunDB(RunDBInterface):
     def store_alert_config(
         self,
         alert_name: str,
-        alert_data: Union[dict, mlrun.common.schemas.AlertConfig],
+        alert_data: Union[dict, AlertConfig],
         project="",
-    ):
+    ) -> AlertConfig:
         """
         Create/modify an alert.
         :param alert_name: The name of the alert.
@@ -3956,13 +3978,19 @@ class HTTPRunDB(RunDBInterface):
         project = project or config.default_project
         endpoint_path = f"projects/{project}/alerts/{alert_name}"
         error_message = f"put alert {project}/alerts/{alert_name}"
-        if isinstance(alert_data, mlrun.common.schemas.AlertConfig):
-            alert_data = alert_data.dict()
+        alert_instance = (
+            alert_data
+            if isinstance(alert_data, AlertConfig)
+            else AlertConfig.from_dict(alert_data)
+        )
+        alert_instance.validate_required_fields()
+        alert_data = alert_instance.to_dict()
         body = _as_json(alert_data)
         response = self.api_call("PUT", endpoint_path, error_message, body=body)
-        return mlrun.common.schemas.AlertConfig(**response.json())
+        return AlertConfig.from_dict(response.json())
-    def get_alert_config(self, alert_name: str, project=""):
+    def get_alert_config(self, alert_name: str, project="") -> AlertConfig:
         """
         Retrieve an alert.
         :param alert_name: The name of the alert to retrieve.
@@ -3973,9 +4001,9 @@ class HTTPRunDB(RunDBInterface):
         endpoint_path = f"projects/{project}/alerts/{alert_name}"
         error_message = f"get alert {project}/alerts/{alert_name}"
         response = self.api_call("GET", endpoint_path, error_message)
-        return mlrun.common.schemas.AlertConfig(**response.json())
+        return AlertConfig.from_dict(response.json())
-    def list_alerts_configs(self, project=""):
+    def list_alerts_configs(self, project="") -> list[AlertConfig]:
         """
         Retrieve list of alerts of a project.
         :param project: The project name.
@@ -3987,7 +4015,7 @@ class HTTPRunDB(RunDBInterface):
         response = self.api_call("GET", endpoint_path, error_message).json()
         results = []
         for item in response:
-            results.append(mlrun.common.schemas.AlertConfig(**item))
+            results.append(AlertConfig(**item))
         return results
     def delete_alert_config(self, alert_name: str, project=""):
@@ -4012,6 +4040,32 @@ class HTTPRunDB(RunDBInterface):
         error_message = f"post alert {project}/alerts/{alert_name}/reset"
         self.api_call("POST", endpoint_path, error_message)
+    def get_alert_template(
+        self, template_name: str
+    ) -> mlrun.common.schemas.AlertTemplate:
+        """
+        Retrieve a specific alert template.
+        :param template_name: The name of the template to retrieve.
+        :return: The template object.
+        """
+        endpoint_path = f"alert-templates/{template_name}"
+        error_message = f"get template alert-templates/{template_name}"
+        response = self.api_call("GET", endpoint_path, error_message)
+        return mlrun.common.schemas.AlertTemplate(**response.json())
+    def list_alert_templates(self) -> list[mlrun.common.schemas.AlertTemplate]:
+        """
+        Retrieve list of all alert templates.
+        :return: All the alert template objects in the database.
+        """
+        endpoint_path = "alert-templates"
+        error_message = "get templates /alert-templates"
+        response = self.api_call("GET", endpoint_path, error_message).json()
+        results = []
+        for item in response:
+            results.append(mlrun.common.schemas.AlertTemplate(**item))
+        return results
 def _as_json(obj):
     fn = getattr(obj, "to_json", None)

mlrun/db/nopdb.py CHANGED Viewed

@@ -16,6 +16,7 @@
 import datetime
 from typing import Optional, Union
+import mlrun.alerts
 import mlrun.common.schemas
 import mlrun.errors
@@ -128,7 +129,18 @@ class NopDB(RunDBInterface):
     ):
         pass
-    def del_artifact(self, key, tag="", project="", tree=None, uid=None):
+    def del_artifact(
+        self,
+        key,
+        tag="",
+        project="",
+        tree=None,
+        uid=None,
+        deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
+            mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
+        ),
+        secrets: dict = None,
+    ):
         pass
     def del_artifacts(self, name="", project="", tag="", labels=None):
@@ -671,7 +683,7 @@ class NopDB(RunDBInterface):
     def store_alert_config(
         self,
         alert_name: str,
-        alert_data: Union[dict, mlrun.common.schemas.AlertConfig],
+        alert_data: Union[dict, mlrun.alerts.alert.AlertConfig],
         project="",
     ):
         pass
@@ -687,3 +699,9 @@ class NopDB(RunDBInterface):
     def reset_alert_config(self, alert_name: str, project=""):
         pass
+    def get_alert_template(self, template_name: str):
+        pass
+    def list_alert_templates(self):
+        pass

mlrun/errors.py CHANGED Viewed

@@ -183,6 +183,10 @@ class MLRunInternalServerError(MLRunHTTPStatusError):
     error_status_code = HTTPStatus.INTERNAL_SERVER_ERROR.value
+class MLRunNotImplementedServerError(MLRunHTTPStatusError):
+    error_status_code = HTTPStatus.NOT_IMPLEMENTED.value
 class MLRunServiceUnavailableError(MLRunHTTPStatusError):
     error_status_code = HTTPStatus.SERVICE_UNAVAILABLE.value
@@ -234,4 +238,5 @@ STATUS_ERRORS = {
     HTTPStatus.PRECONDITION_FAILED.value: MLRunPreconditionFailedError,
     HTTPStatus.INTERNAL_SERVER_ERROR.value: MLRunInternalServerError,
     HTTPStatus.SERVICE_UNAVAILABLE.value: MLRunServiceUnavailableError,
+    HTTPStatus.NOT_IMPLEMENTED.value: MLRunNotImplementedServerError,
 }

mlrun/feature_store/__init__.py CHANGED Viewed

@@ -19,7 +19,6 @@ __all__ = [
     "get_online_feature_service",
     "ingest",
     "preview",
-    "deploy_ingestion_service",
     "deploy_ingestion_service_v2",
     "delete_feature_set",
     "delete_feature_vector",
@@ -41,7 +40,6 @@ from ..features import Entity, Feature
 from .api import (
     delete_feature_set,
     delete_feature_vector,
-    deploy_ingestion_service,
     deploy_ingestion_service_v2,
     get_feature_set,
     get_feature_vector,

mlrun/feature_store/api.py CHANGED Viewed

@@ -113,6 +113,7 @@ def get_offline_features(
     order_by: Union[str, list[str]] = None,
     spark_service: str = None,
     timestamp_for_filtering: Union[str, dict[str, str]] = None,
+    additional_filters: list = None,
 ):
     """retrieve offline feature vector results
@@ -175,6 +176,13 @@ def get_offline_features(
                                     By default, the filter executes on the timestamp_key of each feature set.
                                     Note: the time filtering is performed on each feature set before the
                                     merge process using start_time and end_time params.
+    :param additional_filters: List of additional_filter conditions as tuples.
+                                Each tuple should be in the format (column_name, operator, value).
+                                Supported operators: "=", ">=", "<=", ">", "<".
+                                Example: [("Product", "=", "Computer")]
+                                For all supported filters, please see:
+                                https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
     """
     return _get_offline_features(
@@ -194,6 +202,7 @@ def get_offline_features(
         order_by,
         spark_service,
         timestamp_for_filtering,
+        additional_filters,
     )
@@ -214,6 +223,7 @@ def _get_offline_features(
     order_by: Union[str, list[str]] = None,
     spark_service: str = None,
     timestamp_for_filtering: Union[str, dict[str, str]] = None,
+    additional_filters=None,
 ) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
     if entity_rows is None and entity_timestamp_column is not None:
         raise mlrun.errors.MLRunInvalidArgumentError(
@@ -252,6 +262,7 @@ def _get_offline_features(
             start_time=start_time,
             end_time=end_time,
             timestamp_for_filtering=timestamp_for_filtering,
+            additional_filters=additional_filters,
         )
     merger = merger_engine(feature_vector, **(engine_args or {}))
@@ -267,6 +278,7 @@ def _get_offline_features(
         update_stats=update_stats,
         query=query,
         order_by=order_by,
+        additional_filters=additional_filters,
     )
@@ -1005,53 +1017,6 @@ def _deploy_ingestion_service_v2(
     return function.deploy(), function
-@deprecated(
-    version="1.5.0",
-    reason="'deploy_ingestion_service' will be removed in 1.7.0, use 'deploy_ingestion_service_v2' instead",
-    category=FutureWarning,
-)
-def deploy_ingestion_service(
-    featureset: Union[FeatureSet, str],
-    source: DataSource = None,
-    targets: list[DataTargetBase] = None,
-    name: str = None,
-    run_config: RunConfig = None,
-    verbose=False,
-) -> str:
-    """Start real-time ingestion service using nuclio function
-    Deploy a real-time function implementing feature ingestion pipeline
-    the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
-    the `run_config` parameter allow specifying the function and job configuration,
-    see: :py:class:`~mlrun.feature_store.RunConfig`
-    example::
-        source = HTTPSource()
-        func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
-        config = RunConfig(function=func)
-        my_set.deploy_ingestion_service(source, run_config=config)
-    :param featureset:    feature set object or uri
-    :param source:        data source object describing the online or offline source
-    :param targets:       list of data target objects
-    :param name:          name for the job/function
-    :param run_config:    service runtime configuration (function object/uri, resources, etc..)
-    :param verbose:       verbose log
-    :return: URL to access the deployed ingestion service
-    """
-    endpoint, _ = featureset.deploy_ingestion_service(
-        source=source,
-        targets=targets,
-        name=name,
-        run_config=run_config,
-        verbose=verbose,
-    )
-    return endpoint
 def _ingest_with_spark(
     spark=None,
     featureset: Union[FeatureSet, str] = None,

mlrun/feature_store/feature_set.py CHANGED Viewed

@@ -917,6 +917,7 @@ class FeatureSet(ModelObj):
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
         **kwargs,
     ):
         """return featureset (offline) data as dataframe
@@ -928,6 +929,12 @@ class FeatureSet(ModelObj):
         :param end_time:     filter by end time
         :param time_column:  specify the time column name in the file
         :param kwargs:       additional reader (csv, parquet, ..) args
+        :param additional_filters: List of additional_filter conditions as tuples.
+                                    Each tuple should be in the format (column_name, operator, value).
+                                    Supported operators: "=", ">=", "<=", ">", "<".
+                                    Example: [("Product", "=", "Computer")]
+                                    For all supported filters, please see:
+                                    https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
         :return: DataFrame
         """
         entities = list(self.spec.entities.keys())
@@ -946,6 +953,7 @@ class FeatureSet(ModelObj):
                 start_time=start_time,
                 end_time=end_time,
                 time_field=time_column,
+                additional_filters=additional_filters,
                 **kwargs,
             )
             # to_dataframe() can sometimes return an iterator of dataframes instead of one dataframe
@@ -965,6 +973,7 @@ class FeatureSet(ModelObj):
             start_time=start_time,
             end_time=end_time,
             time_column=time_column,
+            additional_filters=additional_filters,
             **kwargs,
         )
         return result

mlrun/feature_store/retrieval/base.py CHANGED Viewed

@@ -88,6 +88,7 @@ class BaseMerger(abc.ABC):
         update_stats=None,
         query=None,
         order_by=None,
+        additional_filters=None,
     ):
         self._target = target
@@ -134,6 +135,7 @@ class BaseMerger(abc.ABC):
             timestamp_for_filtering=timestamp_for_filtering,
             query=query,
             order_by=order_by,
+            additional_filters=additional_filters,
         )
     def _write_to_offline_target(self, timestamp_key=None):
@@ -186,6 +188,7 @@ class BaseMerger(abc.ABC):
         timestamp_for_filtering=None,
         query=None,
         order_by=None,
+        additional_filters=None,
     ):
         self._create_engine_env()
@@ -212,7 +215,7 @@ class BaseMerger(abc.ABC):
             feature_sets.append(None)
             join_types.append(None)
-        filtered = False
+        timestamp_filtered = False
         for step in join_graph.steps:
             name = step.right_feature_set_name
             feature_set = feature_set_objects[name]
@@ -250,7 +253,7 @@ class BaseMerger(abc.ABC):
             if self._drop_indexes:
                 self._append_drop_column(time_column)
             if (start_time or end_time) and time_column:
-                filtered = True
+                timestamp_filtered = True
             df = self._get_engine_df(
                 feature_set,
@@ -259,6 +262,7 @@ class BaseMerger(abc.ABC):
                 start_time if time_column else None,
                 end_time if time_column else None,
                 time_column,
+                additional_filters,
             )
             fs_entities_and_timestamp = list(feature_set.spec.entities.keys())
@@ -302,8 +306,8 @@ class BaseMerger(abc.ABC):
                     new_columns.append((column, alias))
             self._update_alias(dictionary={name: alias for name, alias in new_columns})
-        # None of the feature sets was filtered as required
-        if not filtered and (start_time or end_time):
+        # None of the feature sets was timestamp filtered as required
+        if not timestamp_filtered and (start_time or end_time):
             raise mlrun.errors.MLRunRuntimeError(
                 "start_time and end_time can only be provided in conjunction with "
                 "a timestamp column, or when the at least one feature_set has a timestamp key"
@@ -755,6 +759,7 @@ class BaseMerger(abc.ABC):
         start_time: typing.Union[str, datetime] = None,
         end_time: typing.Union[str, datetime] = None,
         time_column: typing.Optional[str] = None,
+        additional_filters=None,
     ):
         """
         Return the feature_set data frame according to the args

mlrun/feature_store/retrieval/conversion.py CHANGED Viewed

@@ -79,10 +79,10 @@ class PandasConversionMixin:
                     msg = (
                         "toPandas attempted Arrow optimization because "
                         "'spark.sql.execution.arrow.pyspark.enabled' is set to true; however, "
-                        "failed by the reason below:\n  %s\n"
+                        f"failed by the reason below:\n  {e}\n"
                         "Attempting non-optimization as "
                         "'spark.sql.execution.arrow.pyspark.fallback.enabled' is set to "
-                        "true." % str(e)
+                        "true."
                     )
                     warnings.warn(msg)
                     use_arrow = False
@@ -92,7 +92,7 @@ class PandasConversionMixin:
                         "'spark.sql.execution.arrow.pyspark.enabled' is set to true, but has "
                         "reached the error below and will not continue because automatic fallback "
                         "with 'spark.sql.execution.arrow.pyspark.fallback.enabled' has been set to "
-                        "false.\n  %s" % str(e)
+                        f"false.\n  {e}"
                     )
                     warnings.warn(msg)
                     raise
@@ -158,7 +158,7 @@ class PandasConversionMixin:
                         "reached the error below and can not continue. Note that "
                         "'spark.sql.execution.arrow.pyspark.fallback.enabled' does not have an "
                         "effect on failures in the middle of "
-                        "computation.\n  %s" % str(e)
+                        f"computation.\n  {e}"
                     )
                     warnings.warn(msg)
                     raise

mlrun/feature_store/retrieval/dask_merger.py CHANGED Viewed

@@ -145,6 +145,7 @@ class DaskFeatureMerger(BaseMerger):
         start_time=None,
         end_time=None,
         time_column=None,
+        additional_filters=None,
     ):
         import dask.dataframe as dd
@@ -155,6 +156,7 @@ class DaskFeatureMerger(BaseMerger):
             end_time=end_time,
             time_column=time_column,
             index=False,
+            additional_filters=additional_filters,
         )
         return self._reset_index(df).persist()

mlrun 1.7.0rc14__py3-none-any.whl → 1.7.0rc16__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc14py3-none-any.whl → 1.7.0rc16py3-none-any.whl