PyPI - mlrun - Versions diffs - 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl - Mend

mlrun 1.7.0rc4py3-none-any.whl → 1.7.0rc20py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show

mlrun/__init__.py +11 -1
mlrun/__main__.py +25 -111
mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
mlrun/alerts/alert.py +144 -0
mlrun/api/schemas/__init__.py +4 -3
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +38 -254
mlrun/artifacts/dataset.py +9 -190
mlrun/artifacts/manager.py +41 -47
mlrun/artifacts/model.py +30 -158
mlrun/artifacts/plots.py +23 -380
mlrun/common/constants.py +68 -0
mlrun/common/formatters/__init__.py +19 -0
mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
mlrun/common/formatters/base.py +78 -0
mlrun/common/formatters/function.py +41 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/{runtimes → common/runtimes}/constants.py +32 -4
mlrun/common/schemas/__init__.py +25 -4
mlrun/common/schemas/alert.py +203 -0
mlrun/common/schemas/api_gateway.py +148 -0
mlrun/common/schemas/artifact.py +15 -5
mlrun/common/schemas/auth.py +8 -2
mlrun/common/schemas/client_spec.py +2 -0
mlrun/common/schemas/frontend_spec.py +1 -0
mlrun/common/schemas/function.py +4 -0
mlrun/common/schemas/hub.py +7 -9
mlrun/common/schemas/model_monitoring/__init__.py +19 -3
mlrun/common/schemas/model_monitoring/constants.py +96 -26
mlrun/common/schemas/model_monitoring/grafana.py +9 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
mlrun/common/schemas/pipeline.py +0 -9
mlrun/common/schemas/project.py +22 -21
mlrun/common/types.py +7 -1
mlrun/config.py +87 -19
mlrun/data_types/data_types.py +4 -0
mlrun/data_types/to_pandas.py +9 -9
mlrun/datastore/__init__.py +5 -8
mlrun/datastore/alibaba_oss.py +130 -0
mlrun/datastore/azure_blob.py +4 -5
mlrun/datastore/base.py +69 -30
mlrun/datastore/datastore.py +10 -2
mlrun/datastore/datastore_profile.py +90 -6
mlrun/datastore/google_cloud_storage.py +1 -1
mlrun/datastore/hdfs.py +5 -0
mlrun/datastore/inmem.py +2 -2
mlrun/datastore/redis.py +2 -2
mlrun/datastore/s3.py +5 -0
mlrun/datastore/snowflake_utils.py +43 -0
mlrun/datastore/sources.py +172 -44
mlrun/datastore/store_resources.py +7 -7
mlrun/datastore/targets.py +285 -41
mlrun/datastore/utils.py +68 -5
mlrun/datastore/v3io.py +27 -50
mlrun/db/auth_utils.py +152 -0
mlrun/db/base.py +149 -14
mlrun/db/factory.py +1 -1
mlrun/db/httpdb.py +608 -178
mlrun/db/nopdb.py +191 -7
mlrun/errors.py +11 -0
mlrun/execution.py +37 -20
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +21 -52
mlrun/feature_store/feature_set.py +48 -23
mlrun/feature_store/feature_vector.py +2 -1
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/conversion.py +9 -9
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +9 -3
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +34 -24
mlrun/feature_store/steps.py +30 -19
mlrun/features.py +4 -13
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/parallel_coordinates.py +2 -1
mlrun/frameworks/pytorch/__init__.py +2 -2
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/tf_keras/__init__.py +5 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/k8s_utils.py +10 -11
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +6 -5
mlrun/launcher/client.py +8 -6
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +9 -3
mlrun/launcher/remote.py +9 -3
mlrun/lists.py +6 -2
mlrun/model.py +58 -19
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +127 -301
mlrun/model_monitoring/application.py +5 -296
mlrun/model_monitoring/applications/__init__.py +11 -0
mlrun/model_monitoring/applications/_application_steps.py +157 -0
mlrun/model_monitoring/applications/base.py +282 -0
mlrun/model_monitoring/applications/context.py +214 -0
mlrun/model_monitoring/applications/evidently_base.py +211 -0
mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +30 -36
mlrun/model_monitoring/db/__init__.py +18 -0
mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
mlrun/model_monitoring/db/tsdb/base.py +329 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
mlrun/model_monitoring/evidently_application.py +6 -118
mlrun/model_monitoring/features_drift_table.py +34 -22
mlrun/model_monitoring/helpers.py +100 -7
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +93 -228
mlrun/model_monitoring/tracking_policy.py +7 -1
mlrun/model_monitoring/writer.py +152 -124
mlrun/package/packagers_manager.py +1 -0
mlrun/package/utils/_formatter.py +2 -2
mlrun/platforms/__init__.py +11 -10
mlrun/platforms/iguazio.py +21 -202
mlrun/projects/operations.py +30 -16
mlrun/projects/pipelines.py +92 -99
mlrun/projects/project.py +757 -268
mlrun/render.py +15 -14
mlrun/run.py +160 -162
mlrun/runtimes/__init__.py +55 -3
mlrun/runtimes/base.py +33 -19
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +0 -28
mlrun/runtimes/kubejob.py +28 -122
mlrun/runtimes/local.py +5 -2
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/abstract.py +8 -8
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/__init__.py +1 -0
mlrun/runtimes/nuclio/api_gateway.py +709 -0
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +523 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
mlrun/runtimes/nuclio/function.py +98 -58
mlrun/runtimes/nuclio/serving.py +36 -42
mlrun/runtimes/pod.py +196 -45
mlrun/runtimes/remotesparkjob.py +1 -1
mlrun/runtimes/sparkjob/spark3job.py +1 -1
mlrun/runtimes/utils.py +6 -73
mlrun/secrets.py +6 -2
mlrun/serving/remote.py +2 -3
mlrun/serving/routers.py +7 -4
mlrun/serving/server.py +7 -8
mlrun/serving/states.py +73 -43
mlrun/serving/v2_serving.py +8 -7
mlrun/track/tracker.py +2 -1
mlrun/utils/async_http.py +25 -5
mlrun/utils/helpers.py +141 -75
mlrun/utils/http.py +1 -1
mlrun/utils/logger.py +39 -7
mlrun/utils/notifications/notification/__init__.py +14 -9
mlrun/utils/notifications/notification/base.py +12 -0
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +3 -1
mlrun/utils/notifications/notification/ipython.py +2 -0
mlrun/utils/notifications/notification/slack.py +101 -21
mlrun/utils/notifications/notification/webhook.py +11 -1
mlrun/utils/notifications/notification_pusher.py +147 -16
mlrun/utils/retryer.py +3 -2
mlrun/utils/v3io_clients.py +0 -1
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
mlrun-1.7.0rc20.dist-info/RECORD +353 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
mlrun/kfpops.py +0 -868
mlrun/model_monitoring/batch.py +0 -974
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
mlrun/platforms/other.py +0 -305
mlrun-1.7.0rc4.dist-info/RECORD +0 -321
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0

mlrun/db/nopdb.py CHANGED Viewed

@@ -16,6 +16,9 @@
 import datetime
 from typing import Optional, Union
+import mlrun.alerts
+import mlrun.common.formatters
+import mlrun.common.runtimes.constants
 import mlrun.common.schemas
 import mlrun.errors
@@ -79,7 +82,10 @@ class NopDB(RunDBInterface):
         uid: Optional[Union[str, list[str]]] = None,
         project: Optional[str] = None,
         labels: Optional[Union[str, list[str]]] = None,
-        state: Optional[str] = None,
+        state: Optional[
+            mlrun.common.runtimes.constants.RunStates
+        ] = None,  # Backward compatibility
+        states: Optional[list[mlrun.common.runtimes.constants.RunStates]] = None,
         sort: bool = True,
         last: int = 0,
         iter: bool = False,
@@ -128,7 +134,18 @@ class NopDB(RunDBInterface):
     ):
         pass
-    def del_artifact(self, key, tag="", project="", tree=None, uid=None):
+    def del_artifact(
+        self,
+        key,
+        tag="",
+        project="",
+        tree=None,
+        uid=None,
+        deletion_strategy: mlrun.common.schemas.artifact.ArtifactsDeletionStrategies = (
+            mlrun.common.schemas.artifact.ArtifactsDeletionStrategies.metadata_only
+        ),
+        secrets: dict = None,
+    ):
         pass
     def del_artifacts(self, name="", project="", tag="", labels=None):
@@ -196,7 +213,7 @@ class NopDB(RunDBInterface):
     def list_projects(
         self,
         owner: str = None,
-        format_: mlrun.common.schemas.ProjectsFormat = mlrun.common.schemas.ProjectsFormat.name_only,
+        format_: mlrun.common.formatters.ProjectFormat = mlrun.common.formatters.ProjectFormat.name_only,
         labels: list[str] = None,
         state: mlrun.common.schemas.ProjectState = None,
     ) -> mlrun.common.schemas.ProjectsOutput:
@@ -351,8 +368,8 @@ class NopDB(RunDBInterface):
         namespace: str = None,
         timeout: int = 30,
         format_: Union[
-            str, mlrun.common.schemas.PipelinesFormat
-        ] = mlrun.common.schemas.PipelinesFormat.summary,
+            str, mlrun.common.formatters.PipelineFormat
+        ] = mlrun.common.formatters.PipelineFormat.summary,
         project: str = None,
     ):
         pass
@@ -365,8 +382,8 @@ class NopDB(RunDBInterface):
         page_token: str = "",
         filter_: str = "",
         format_: Union[
-            str, mlrun.common.schemas.PipelinesFormat
-        ] = mlrun.common.schemas.PipelinesFormat.metadata_only,
+            str, mlrun.common.formatters.PipelineFormat
+        ] = mlrun.common.formatters.PipelineFormat.metadata_only,
         page_size: int = None,
     ) -> mlrun.common.schemas.PipelinesOutput:
         pass
@@ -506,12 +523,100 @@ class NopDB(RunDBInterface):
     ):
         pass
+    def store_api_gateway(
+        self,
+        api_gateway: Union[
+            mlrun.common.schemas.APIGateway,
+            mlrun.runtimes.nuclio.api_gateway.APIGateway,
+        ],
+        project: str = None,
+    ) -> mlrun.common.schemas.APIGateway:
+        pass
+    def list_api_gateways(self, project=None):
+        pass
+    def get_api_gateway(self, name, project=None):
+        pass
+    def delete_api_gateway(self, name, project=None):
+        pass
     def verify_authorization(
         self,
         authorization_verification_input: mlrun.common.schemas.AuthorizationVerificationInput,
     ):
         pass
+    def remote_builder(
+        self,
+        func: "mlrun.runtimes.BaseRuntime",
+        with_mlrun: bool,
+        mlrun_version_specifier: Optional[str] = None,
+        skip_deployed: bool = False,
+        builder_env: Optional[dict] = None,
+        force_build: bool = False,
+    ):
+        pass
+    def deploy_nuclio_function(
+        self,
+        func: "mlrun.runtimes.RemoteRuntime",
+        builder_env: Optional[dict] = None,
+    ):
+        pass
+    def get_builder_status(
+        self,
+        func: "mlrun.runtimes.BaseRuntime",
+        offset: int = 0,
+        logs: bool = True,
+        last_log_timestamp: float = 0.0,
+        verbose: bool = False,
+    ):
+        pass
+    def get_nuclio_deploy_status(
+        self,
+        func: "mlrun.runtimes.RemoteRuntime",
+        last_log_timestamp: float = 0.0,
+        verbose: bool = False,
+    ):
+        pass
+    def set_run_notifications(
+        self,
+        project: str,
+        runs: list[mlrun.model.RunObject],
+        notifications: list[mlrun.model.Notification],
+    ):
+        pass
+    def store_run_notifications(
+        self,
+        notification_objects: list[mlrun.model.Notification],
+        run_uid: str,
+        project: str = None,
+        mask_params: bool = True,
+    ):
+        pass
+    def store_alert_notifications(
+        self,
+        session,
+        notification_objects: list[mlrun.model.Notification],
+        alert_id: str,
+        project: str,
+        mask_params: bool = True,
+    ):
+        pass
+    def get_log_size(self, uid, project=""):
+        pass
+    def watch_log(self, uid, project="", watch=True, offset=0):
+        pass
     def get_datastore_profile(
         self, name: str, project: str
     ) -> Optional[mlrun.common.schemas.DatastoreProfile]:
@@ -529,3 +634,82 @@ class NopDB(RunDBInterface):
         self, profile: mlrun.common.schemas.DatastoreProfile, project: str
     ):
         pass
+    def function_status(self, project, name, kind, selector):
+        pass
+    def start_function(
+        self, func_url: str = None, function: "mlrun.runtimes.BaseRuntime" = None
+    ):
+        pass
+    def submit_workflow(
+        self,
+        project: str,
+        name: str,
+        workflow_spec: Union[
+            "mlrun.projects.pipelines.WorkflowSpec",
+            "mlrun.common.schemas.WorkflowSpec",
+            dict,
+        ],
+        arguments: Optional[dict] = None,
+        artifact_path: Optional[str] = None,
+        source: Optional[str] = None,
+        run_name: Optional[str] = None,
+        namespace: Optional[str] = None,
+        notifications: list["mlrun.model.Notification"] = None,
+    ) -> "mlrun.common.schemas.WorkflowResponse":
+        pass
+    def update_model_monitoring_controller(
+        self,
+        project: str,
+        base_period: int = 10,
+        image: str = "mlrun/mlrun",
+    ):
+        pass
+    def enable_model_monitoring(
+        self,
+        project: str,
+        base_period: int = 10,
+        image: str = "mlrun/mlrun",
+        deploy_histogram_data_drift_app: bool = True,
+    ) -> None:
+        pass
+    def deploy_histogram_data_drift_app(
+        self, project: str, image: str = "mlrun/mlrun"
+    ) -> None:
+        raise NotImplementedError
+    def generate_event(
+        self, name: str, event_data: Union[dict, mlrun.common.schemas.Event], project=""
+    ):
+        pass
+    def store_alert_config(
+        self,
+        alert_name: str,
+        alert_data: Union[dict, mlrun.alerts.alert.AlertConfig],
+        project="",
+    ):
+        pass
+    def get_alert_config(self, alert_name: str, project=""):
+        pass
+    def list_alerts_configs(self, project=""):
+        pass
+    def delete_alert_config(self, alert_name: str, project=""):
+        pass
+    def reset_alert_config(self, alert_name: str, project=""):
+        pass
+    def get_alert_template(self, template_name: str):
+        pass
+    def list_alert_templates(self):
+        pass

mlrun/errors.py CHANGED Viewed

@@ -155,6 +155,10 @@ class MLRunNotFoundError(MLRunHTTPStatusError):
     error_status_code = HTTPStatus.NOT_FOUND.value
+class MLRunPaginationEndOfResultsError(MLRunNotFoundError):
+    pass
 class MLRunBadRequestError(MLRunHTTPStatusError):
     error_status_code = HTTPStatus.BAD_REQUEST.value
@@ -183,6 +187,10 @@ class MLRunInternalServerError(MLRunHTTPStatusError):
     error_status_code = HTTPStatus.INTERNAL_SERVER_ERROR.value
+class MLRunNotImplementedServerError(MLRunHTTPStatusError):
+    error_status_code = HTTPStatus.NOT_IMPLEMENTED.value
 class MLRunServiceUnavailableError(MLRunHTTPStatusError):
     error_status_code = HTTPStatus.SERVICE_UNAVAILABLE.value
@@ -234,4 +242,7 @@ STATUS_ERRORS = {
     HTTPStatus.PRECONDITION_FAILED.value: MLRunPreconditionFailedError,
     HTTPStatus.INTERNAL_SERVER_ERROR.value: MLRunInternalServerError,
     HTTPStatus.SERVICE_UNAVAILABLE.value: MLRunServiceUnavailableError,
+    HTTPStatus.NOT_IMPLEMENTED.value: MLRunNotImplementedServerError,
 }
+EXPECTED_ERRORS = (MLRunPaginationEndOfResultsError,)

mlrun/execution.py CHANGED Viewed

@@ -22,6 +22,7 @@ import yaml
 from dateutil import parser
 import mlrun
+import mlrun.common.constants as mlrun_constants
 from mlrun.artifacts import ModelArtifact
 from mlrun.datastore.store_resources import get_store_resource
 from mlrun.errors import MLRunInvalidArgumentError
@@ -129,7 +130,9 @@ class MLClientCtx:
     @property
     def tag(self):
         """Run tag (uid or workflow id if exists)"""
-        return self._labels.get("workflow") or self._uid
+        return (
+            self._labels.get(mlrun_constants.MLRunInternalLabels.workflow) or self._uid
+        )
     @property
     def state(self):
@@ -224,12 +227,12 @@ class MLClientCtx:
                     with context.get_child_context(myparam=param) as child:
                         accuracy = child_handler(child, df, **child.parameters)
                         accuracy_sum += accuracy
-                        child.log_result('accuracy', accuracy)
+                        child.log_result("accuracy", accuracy)
                         if accuracy > best_accuracy:
                             child.mark_as_best()
                             best_accuracy = accuracy
-                context.log_result('avg_accuracy', accuracy_sum / len(param_list))
+                context.log_result("avg_accuracy", accuracy_sum / len(param_list))
         :param params:  Extra (or override) params to parent context
         :param with_parent_params:  Child will copy the parent parameters and add to them
@@ -289,7 +292,9 @@ class MLClientCtx:
         Example::
-            feature_vector = context.get_store_resource("store://feature-vectors/default/myvec")
+            feature_vector = context.get_store_resource(
+                "store://feature-vectors/default/myvec"
+            )
             dataset = context.get_store_resource("store://artifacts/default/mydata")
         :param url:    Store resource uri/path, store://<type>/<project>/<name>:<version>
@@ -327,8 +332,10 @@ class MLClientCtx:
             "uri": uri,
             "owner": get_in(self._labels, "owner"),
         }
-        if "workflow" in self._labels:
-            resp["workflow"] = self._labels["workflow"]
+        if mlrun_constants.MLRunInternalLabels.workflow in self._labels:
+            resp[mlrun_constants.MLRunInternalLabels.workflow] = self._labels[
+                mlrun_constants.MLRunInternalLabels.workflow
+            ]
         return resp
     @classmethod
@@ -394,7 +401,7 @@ class MLClientCtx:
                         self._set_input(k, v)
         if host and not is_api:
-            self.set_label("host", host)
+            self.set_label(mlrun_constants.MLRunInternalLabels.host, host)
         start = get_in(attrs, "status.start_time")
         if start:
@@ -421,7 +428,7 @@ class MLClientCtx:
         Example::
-            data_path=context.artifact_subpath('data')
+            data_path = context.artifact_subpath("data")
         """
         return os.path.join(self.artifact_path, *subpaths)
@@ -525,7 +532,7 @@ class MLClientCtx:
         Example::
-            context.log_result('accuracy', 0.85)
+            context.log_result("accuracy", 0.85)
         :param key:    Result key
         :param value:  Result value
@@ -539,7 +546,7 @@ class MLClientCtx:
         Example::
-            context.log_results({'accuracy': 0.85, 'loss': 0.2})
+            context.log_results({"accuracy": 0.85, "loss": 0.2})
         :param results:  Key/value dict or results
         :param commit:   Commit (write to DB now vs wait for the end of the run)
@@ -674,7 +681,9 @@ class MLClientCtx:
                 "age": [42, 52, 36, 24, 73],
                 "testScore": [25, 94, 57, 62, 70],
             }
-            df = pd.DataFrame(raw_data, columns=["first_name", "last_name", "age", "testScore"])
+            df = pd.DataFrame(
+                raw_data, columns=["first_name", "last_name", "age", "testScore"]
+            )
             context.log_dataset("mydf", df=df, stats=True)
         :param key:           Artifact key
@@ -752,13 +761,16 @@ class MLClientCtx:
         Example::
-            context.log_model("model", body=dumps(model),
-                              model_file="model.pkl",
-                              metrics=context.results,
-                              training_set=training_df,
-                              label_column='label',
-                              feature_vector=feature_vector_uri,
-                              labels={"app": "fraud"})
+            context.log_model(
+                "model",
+                body=dumps(model),
+                model_file="model.pkl",
+                metrics=context.results,
+                training_set=training_df,
+                label_column="label",
+                feature_vector=feature_vector_uri,
+                labels={"app": "fraud"},
+            )
         :param key:             Artifact key or artifact class ()
         :param body:            Will use the body as the artifact content
@@ -983,10 +995,15 @@ class MLClientCtx:
         # If it's a OpenMPI job, get the global rank and compare to the logging rank (worker) set in MLRun's
         # configuration:
         labels = self.labels
-        if "host" in labels and labels.get("kind", "job") == "mpijob":
+        if (
+            mlrun_constants.MLRunInternalLabels.host in labels
+            and labels.get(mlrun_constants.MLRunInternalLabels.kind, "job") == "mpijob"
+        ):
             # The host (pod name) of each worker is created by k8s, and by default it uses the rank number as the id in
             # the following template: ...-worker-<rank>
-            rank = int(labels["host"].rsplit("-", 1)[1])
+            rank = int(
+                labels[mlrun_constants.MLRunInternalLabels.host].rsplit("-", 1)[1]
+            )
             return rank == mlrun.mlconf.packagers.logging_worker
         # Single worker is always the logging worker:

mlrun/feature_store/__init__.py CHANGED Viewed

@@ -19,7 +19,6 @@ __all__ = [
     "get_online_feature_service",
     "ingest",
     "preview",
-    "deploy_ingestion_service",
     "deploy_ingestion_service_v2",
     "delete_feature_set",
     "delete_feature_vector",
@@ -41,7 +40,6 @@ from ..features import Entity, Feature
 from .api import (
     delete_feature_set,
     delete_feature_vector,
-    deploy_ingestion_service,
     deploy_ingestion_service_v2,
     get_feature_set,
     get_feature_vector,

mlrun/feature_store/api.py CHANGED Viewed

@@ -113,6 +113,7 @@ def get_offline_features(
     order_by: Union[str, list[str]] = None,
     spark_service: str = None,
     timestamp_for_filtering: Union[str, dict[str, str]] = None,
+    additional_filters: list = None,
 ):
     """retrieve offline feature vector results
@@ -136,7 +137,10 @@ def get_offline_features(
         ]
         vector = FeatureVector(features=features)
         resp = get_offline_features(
-            vector, entity_rows=trades, entity_timestamp_column="time", query="ticker in ['GOOG'] and bid>100"
+            vector,
+            entity_rows=trades,
+            entity_timestamp_column="time",
+            query="ticker in ['GOOG'] and bid>100",
         )
         print(resp.to_dataframe())
         print(vector.get_stats_table())
@@ -172,6 +176,13 @@ def get_offline_features(
                                     By default, the filter executes on the timestamp_key of each feature set.
                                     Note: the time filtering is performed on each feature set before the
                                     merge process using start_time and end_time params.
+    :param additional_filters: List of additional_filter conditions as tuples.
+                                Each tuple should be in the format (column_name, operator, value).
+                                Supported operators: "=", ">=", "<=", ">", "<".
+                                Example: [("Product", "=", "Computer")]
+                                For all supported filters, please see:
+                                https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetDataset.html
     """
     return _get_offline_features(
@@ -191,6 +202,7 @@ def get_offline_features(
         order_by,
         spark_service,
         timestamp_for_filtering,
+        additional_filters,
     )
@@ -211,6 +223,7 @@ def _get_offline_features(
     order_by: Union[str, list[str]] = None,
     spark_service: str = None,
     timestamp_for_filtering: Union[str, dict[str, str]] = None,
+    additional_filters=None,
 ) -> Union[OfflineVectorResponse, RemoteVectorResponse]:
     if entity_rows is None and entity_timestamp_column is not None:
         raise mlrun.errors.MLRunInvalidArgumentError(
@@ -249,6 +262,7 @@ def _get_offline_features(
             start_time=start_time,
             end_time=end_time,
             timestamp_for_filtering=timestamp_for_filtering,
+            additional_filters=additional_filters,
         )
     merger = merger_engine(feature_vector, **(engine_args or {}))
@@ -264,6 +278,7 @@ def _get_offline_features(
         update_stats=update_stats,
         query=query,
         order_by=order_by,
+        additional_filters=additional_filters,
     )
@@ -307,7 +322,7 @@ def get_online_feature_service(
             Example::
-                svc = get_online_feature_service(vector_uri, entity_keys=['ticker'])
+                svc = get_online_feature_service(vector_uri, entity_keys=["ticker"])
                 try:
                     resp = svc.get([{"ticker": "GOOG"}, {"ticker": "MSFT"}])
                     print(resp)
@@ -456,7 +471,7 @@ def ingest(
         df = ingest(stocks_set, stocks, infer_options=fstore.InferOptions.default())
         # for running as remote job
-        config = RunConfig(image='mlrun/mlrun')
+        config = RunConfig(image="mlrun/mlrun")
         df = ingest(stocks_set, stocks, run_config=config)
         # specify source and targets
@@ -1002,53 +1017,6 @@ def _deploy_ingestion_service_v2(
     return function.deploy(), function
-@deprecated(
-    version="1.5.0",
-    reason="'deploy_ingestion_service' will be removed in 1.7.0, use 'deploy_ingestion_service_v2' instead",
-    category=FutureWarning,
-)
-def deploy_ingestion_service(
-    featureset: Union[FeatureSet, str],
-    source: DataSource = None,
-    targets: list[DataTargetBase] = None,
-    name: str = None,
-    run_config: RunConfig = None,
-    verbose=False,
-) -> str:
-    """Start real-time ingestion service using nuclio function
-    Deploy a real-time function implementing feature ingestion pipeline
-    the source maps to Nuclio event triggers (http, kafka, v3io stream, etc.)
-    the `run_config` parameter allow specifying the function and job configuration,
-    see: :py:class:`~mlrun.feature_store.RunConfig`
-    example::
-        source = HTTPSource()
-        func = mlrun.code_to_function("ingest", kind="serving").apply(mount_v3io())
-        config = RunConfig(function=func)
-        my_set.deploy_ingestion_service(source, run_config=config)
-    :param featureset:    feature set object or uri
-    :param source:        data source object describing the online or offline source
-    :param targets:       list of data target objects
-    :param name:          name for the job/function
-    :param run_config:    service runtime configuration (function object/uri, resources, etc..)
-    :param verbose:       verbose log
-    :return: URL to access the deployed ingestion service
-    """
-    endpoint, _ = featureset.deploy_ingestion_service(
-        source=source,
-        targets=targets,
-        name=name,
-        run_config=run_config,
-        verbose=verbose,
-    )
-    return endpoint
 def _ingest_with_spark(
     spark=None,
     featureset: Union[FeatureSet, str] = None,
@@ -1121,9 +1089,10 @@ def _ingest_with_spark(
             df_to_write = target.prepare_spark_df(
                 df_to_write, key_columns, timestamp_key, spark_options
             )
+            write_format = spark_options.pop("format", None)
             if overwrite:
                 write_spark_dataframe_with_options(
-                    spark_options, df_to_write, "overwrite"
+                    spark_options, df_to_write, "overwrite", write_format=write_format
                 )
             else:
                 # appending an empty dataframe may cause an empty file to be created (e.g. when writing to parquet)
@@ -1131,7 +1100,7 @@ def _ingest_with_spark(
                 df_to_write.persist()
                 if df_to_write.count() > 0:
                     write_spark_dataframe_with_options(
-                        spark_options, df_to_write, "append"
+                        spark_options, df_to_write, "append", write_format=write_format
                     )
             target.update_resource_status("ready")

mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc4py3-none-any.whl → 1.7.0rc20py3-none-any.whl