PyPI - mlrun - Versions diffs - 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl - Mend

mlrun 1.7.0rc5py3-none-any.whl → 1.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show

mlrun/__init__.py +11 -1
mlrun/__main__.py +39 -121
mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
mlrun/alerts/alert.py +248 -0
mlrun/api/schemas/__init__.py +4 -3
mlrun/artifacts/__init__.py +8 -3
mlrun/artifacts/base.py +39 -254
mlrun/artifacts/dataset.py +9 -190
mlrun/artifacts/manager.py +73 -46
mlrun/artifacts/model.py +30 -158
mlrun/artifacts/plots.py +23 -380
mlrun/common/constants.py +73 -2
mlrun/common/db/sql_session.py +3 -2
mlrun/common/formatters/__init__.py +21 -0
mlrun/common/formatters/artifact.py +46 -0
mlrun/common/formatters/base.py +113 -0
mlrun/common/formatters/feature_set.py +44 -0
mlrun/common/formatters/function.py +46 -0
mlrun/common/formatters/pipeline.py +53 -0
mlrun/common/formatters/project.py +51 -0
mlrun/common/formatters/run.py +29 -0
mlrun/common/helpers.py +11 -1
mlrun/{runtimes → common/runtimes}/constants.py +32 -4
mlrun/common/schemas/__init__.py +21 -4
mlrun/common/schemas/alert.py +202 -0
mlrun/common/schemas/api_gateway.py +113 -2
mlrun/common/schemas/artifact.py +28 -1
mlrun/common/schemas/auth.py +11 -0
mlrun/common/schemas/client_spec.py +2 -1
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/constants.py +3 -0
mlrun/common/schemas/feature_store.py +58 -28
mlrun/common/schemas/frontend_spec.py +8 -0
mlrun/common/schemas/function.py +11 -0
mlrun/common/schemas/hub.py +7 -9
mlrun/common/schemas/model_monitoring/__init__.py +21 -4
mlrun/common/schemas/model_monitoring/constants.py +136 -42
mlrun/common/schemas/model_monitoring/grafana.py +9 -5
mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
mlrun/common/schemas/notification.py +69 -12
mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
mlrun/common/schemas/pipeline.py +7 -0
mlrun/common/schemas/project.py +67 -16
mlrun/common/schemas/runs.py +17 -0
mlrun/common/schemas/schedule.py +1 -1
mlrun/common/schemas/workflow.py +10 -2
mlrun/common/types.py +14 -1
mlrun/config.py +224 -58
mlrun/data_types/data_types.py +11 -1
mlrun/data_types/spark.py +5 -4
mlrun/data_types/to_pandas.py +75 -34
mlrun/datastore/__init__.py +8 -10
mlrun/datastore/alibaba_oss.py +131 -0
mlrun/datastore/azure_blob.py +131 -43
mlrun/datastore/base.py +107 -47
mlrun/datastore/datastore.py +17 -7
mlrun/datastore/datastore_profile.py +91 -7
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +92 -32
mlrun/datastore/hdfs.py +5 -0
mlrun/datastore/inmem.py +6 -3
mlrun/datastore/redis.py +3 -2
mlrun/datastore/s3.py +30 -12
mlrun/datastore/snowflake_utils.py +45 -0
mlrun/datastore/sources.py +274 -59
mlrun/datastore/spark_utils.py +30 -0
mlrun/datastore/store_resources.py +9 -7
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +374 -102
mlrun/datastore/utils.py +68 -5
mlrun/datastore/v3io.py +28 -50
mlrun/db/auth_utils.py +152 -0
mlrun/db/base.py +231 -22
mlrun/db/factory.py +1 -4
mlrun/db/httpdb.py +864 -228
mlrun/db/nopdb.py +268 -16
mlrun/errors.py +35 -5
mlrun/execution.py +111 -38
mlrun/feature_store/__init__.py +0 -2
mlrun/feature_store/api.py +46 -53
mlrun/feature_store/common.py +6 -11
mlrun/feature_store/feature_set.py +48 -23
mlrun/feature_store/feature_vector.py +13 -2
mlrun/feature_store/ingestion.py +7 -6
mlrun/feature_store/retrieval/base.py +9 -4
mlrun/feature_store/retrieval/dask_merger.py +2 -0
mlrun/feature_store/retrieval/job.py +13 -4
mlrun/feature_store/retrieval/local_merger.py +2 -0
mlrun/feature_store/retrieval/spark_merger.py +24 -32
mlrun/feature_store/steps.py +38 -19
mlrun/features.py +6 -14
mlrun/frameworks/_common/plan.py +3 -3
mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
mlrun/frameworks/lgbm/__init__.py +1 -1
mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
mlrun/frameworks/lgbm/model_handler.py +1 -1
mlrun/frameworks/parallel_coordinates.py +4 -4
mlrun/frameworks/pytorch/__init__.py +2 -2
mlrun/frameworks/sklearn/__init__.py +1 -1
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/frameworks/tf_keras/__init__.py +5 -2
mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
mlrun/frameworks/xgboost/__init__.py +1 -1
mlrun/k8s_utils.py +57 -12
mlrun/launcher/__init__.py +1 -1
mlrun/launcher/base.py +6 -5
mlrun/launcher/client.py +13 -11
mlrun/launcher/factory.py +1 -1
mlrun/launcher/local.py +15 -5
mlrun/launcher/remote.py +10 -3
mlrun/lists.py +6 -2
mlrun/model.py +297 -48
mlrun/model_monitoring/__init__.py +1 -1
mlrun/model_monitoring/api.py +152 -357
mlrun/model_monitoring/applications/__init__.py +10 -0
mlrun/model_monitoring/applications/_application_steps.py +190 -0
mlrun/model_monitoring/applications/base.py +108 -0
mlrun/model_monitoring/applications/context.py +341 -0
mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
mlrun/model_monitoring/applications/results.py +99 -0
mlrun/model_monitoring/controller.py +130 -303
mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
mlrun/model_monitoring/db/stores/__init__.py +136 -0
mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
mlrun/model_monitoring/db/stores/base/store.py +213 -0
mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
mlrun/model_monitoring/db/tsdb/base.py +448 -0
mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
mlrun/model_monitoring/features_drift_table.py +34 -22
mlrun/model_monitoring/helpers.py +177 -39
mlrun/model_monitoring/model_endpoint.py +3 -2
mlrun/model_monitoring/stream_processing.py +165 -398
mlrun/model_monitoring/tracking_policy.py +7 -1
mlrun/model_monitoring/writer.py +161 -125
mlrun/package/packagers/default_packager.py +2 -2
mlrun/package/packagers_manager.py +1 -0
mlrun/package/utils/_formatter.py +2 -2
mlrun/platforms/__init__.py +11 -10
mlrun/platforms/iguazio.py +67 -228
mlrun/projects/__init__.py +6 -1
mlrun/projects/operations.py +47 -20
mlrun/projects/pipelines.py +396 -249
mlrun/projects/project.py +1125 -414
mlrun/render.py +28 -22
mlrun/run.py +207 -180
mlrun/runtimes/__init__.py +76 -11
mlrun/runtimes/base.py +40 -14
mlrun/runtimes/daskjob.py +9 -2
mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
mlrun/runtimes/funcdoc.py +1 -29
mlrun/runtimes/kubejob.py +34 -128
mlrun/runtimes/local.py +39 -10
mlrun/runtimes/mpijob/__init__.py +0 -20
mlrun/runtimes/mpijob/abstract.py +8 -8
mlrun/runtimes/mpijob/v1.py +1 -1
mlrun/runtimes/nuclio/api_gateway.py +646 -177
mlrun/runtimes/nuclio/application/__init__.py +15 -0
mlrun/runtimes/nuclio/application/application.py +758 -0
mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
mlrun/runtimes/nuclio/function.py +188 -68
mlrun/runtimes/nuclio/serving.py +57 -60
mlrun/runtimes/pod.py +191 -58
mlrun/runtimes/remotesparkjob.py +11 -8
mlrun/runtimes/sparkjob/spark3job.py +17 -18
mlrun/runtimes/utils.py +40 -73
mlrun/secrets.py +6 -2
mlrun/serving/__init__.py +8 -1
mlrun/serving/remote.py +2 -3
mlrun/serving/routers.py +89 -64
mlrun/serving/server.py +54 -26
mlrun/serving/states.py +187 -56
mlrun/serving/utils.py +19 -11
mlrun/serving/v2_serving.py +136 -63
mlrun/track/tracker.py +2 -1
mlrun/track/trackers/mlflow_tracker.py +5 -0
mlrun/utils/async_http.py +26 -6
mlrun/utils/db.py +18 -0
mlrun/utils/helpers.py +375 -105
mlrun/utils/http.py +2 -2
mlrun/utils/logger.py +75 -9
mlrun/utils/notifications/notification/__init__.py +14 -10
mlrun/utils/notifications/notification/base.py +48 -0
mlrun/utils/notifications/notification/console.py +2 -0
mlrun/utils/notifications/notification/git.py +24 -1
mlrun/utils/notifications/notification/ipython.py +2 -0
mlrun/utils/notifications/notification/slack.py +96 -21
mlrun/utils/notifications/notification/webhook.py +63 -2
mlrun/utils/notifications/notification_pusher.py +146 -16
mlrun/utils/regex.py +9 -0
mlrun/utils/retryer.py +3 -2
mlrun/utils/v3io_clients.py +2 -3
mlrun/utils/version/version.json +2 -2
mlrun-1.7.2.dist-info/METADATA +390 -0
mlrun-1.7.2.dist-info/RECORD +351 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -271
mlrun/kfpops.py +0 -868
mlrun/model_monitoring/application.py +0 -310
mlrun/model_monitoring/batch.py +0 -974
mlrun/model_monitoring/controller_handler.py +0 -37
mlrun/model_monitoring/prometheus.py +0 -216
mlrun/model_monitoring/stores/__init__.py +0 -111
mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
mlrun/model_monitoring/stores/models/__init__.py +0 -27
mlrun/model_monitoring/stores/models/base.py +0 -84
mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
mlrun/platforms/other.py +0 -305
mlrun-1.7.0rc5.dist-info/METADATA +0 -269
mlrun-1.7.0rc5.dist-info/RECORD +0 -323
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0

mlrun/projects/pipelines.py CHANGED Viewed

@@ -13,24 +13,27 @@
 # limitations under the License.
 import abc
 import builtins
+import http
 import importlib.util as imputil
 import os
 import tempfile
 import typing
 import uuid
-import kfp.compiler
-from kfp import dsl
-from kfp.compiler import compiler
+import mlrun_pipelines.common.models
+import mlrun_pipelines.patcher
+import mlrun_pipelines.utils
 import mlrun
+import mlrun.common.runtimes.constants
 import mlrun.common.schemas
+import mlrun.common.schemas.function
+import mlrun.common.schemas.workflow
 import mlrun.utils.notifications
 from mlrun.errors import err_to_str
 from mlrun.utils import (
     get_ui_url,
     logger,
-    new_pipe_metadata,
     normalize_workflow_name,
     retry_until_successful,
 )
@@ -43,21 +46,21 @@ from ..runtimes.pod import AutoMountType
 def get_workflow_engine(engine_kind, local=False):
     if pipeline_context.is_run_local(local):
-        if engine_kind == "kfp":
+        if engine_kind == mlrun.common.schemas.workflow.EngineType.KFP:
             logger.warning(
                 "Running kubeflow pipeline locally, note some ops may not run locally!"
             )
-        elif engine_kind == "remote":
+        elif engine_kind == mlrun.common.schemas.workflow.EngineType.REMOTE:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Cannot run a remote pipeline locally using `kind='remote'` and `local=True`. "
                 "in order to run a local pipeline remotely, please use `engine='remote:local'` instead"
             )
         return _LocalRunner
-    if not engine_kind or engine_kind == "kfp":
+    if not engine_kind or engine_kind == mlrun.common.schemas.workflow.EngineType.KFP:
         return _KFPRunner
-    if engine_kind == "local":
+    if engine_kind == mlrun.common.schemas.workflow.EngineType.LOCAL:
         return _LocalRunner
-    if engine_kind == "remote":
+    if engine_kind == mlrun.common.schemas.workflow.EngineType.REMOTE:
         return _RemoteRunner
     raise mlrun.errors.MLRunInvalidArgumentError(
         f"Provided workflow engine is not supported. engine_kind={engine_kind}"
@@ -79,6 +82,7 @@ class WorkflowSpec(mlrun.model.ModelObj):
         schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
         cleanup_ttl: typing.Optional[int] = None,
         image: typing.Optional[str] = None,
+        workflow_runner_node_selector: typing.Optional[dict[str, str]] = None,
     ):
         self.engine = engine
         self.code = code
@@ -92,6 +96,7 @@ class WorkflowSpec(mlrun.model.ModelObj):
         self._tmp_path = None
         self.schedule = schedule
         self.image = image
+        self.workflow_runner_node_selector = workflow_runner_node_selector
     def get_source_file(self, context=""):
         if not self.code and not self.path:
@@ -218,9 +223,10 @@ class _PipelineContext:
         force_run_local = mlrun.mlconf.force_run_local
         if force_run_local is None or force_run_local == "auto":
             force_run_local = not mlrun.mlconf.is_api_running_on_k8s()
-            kfp_url = mlrun.mlconf.resolve_kfp_url()
-            if not kfp_url:
+            if not mlrun.mlconf.kfp_url:
+                logger.debug("Kubeflow pipeline URL is not set, running locally")
                 force_run_local = True
         if self.workflow:
             force_run_local = force_run_local or self.workflow.run_local
@@ -300,72 +306,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
     }
-# When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
-# converts it to a k8s object. As part of the flow in the Compile.compile() method,
-# we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
-# Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
-# I ran across the following problem when seeking for a method to set the priority_class_name:
-# https://github.com/kubeflow/pipelines/issues/3594
-# When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
-# to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
-# https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
-def _create_enriched_mlrun_workflow(
-    self,
-    pipeline_func: typing.Callable,
-    pipeline_name: typing.Optional[str] = None,
-    pipeline_description: typing.Optional[str] = None,
-    params_list: typing.Optional[list[dsl.PipelineParam]] = None,
-    pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
-):
-    """Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
-    workflow = self._original_create_workflow(
-        pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
-    )
-    # We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
-    # that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
-    # we know can be raised.
-    try:
-        functions = []
-        if pipeline_context.functions:
-            try:
-                functions = pipeline_context.functions.values()
-            except Exception as err:
-                logger.debug(
-                    "Unable to retrieve project functions, not enriching workflow with mlrun",
-                    error=err_to_str(err),
-                )
-                return workflow
-        # enrich each pipeline step with your desire k8s attribute
-        for kfp_step_template in workflow["spec"]["templates"]:
-            if kfp_step_template.get("container"):
-                for function_obj in functions:
-                    # we condition within each function since the comparison between the function and
-                    # the kfp pod may change depending on the attribute type.
-                    _set_function_attribute_on_kfp_pod(
-                        kfp_step_template,
-                        function_obj,
-                        "PriorityClassName",
-                        "priority_class_name",
-                    )
-                    _enrich_kfp_pod_security_context(
-                        kfp_step_template,
-                        function_obj,
-                    )
-    except mlrun.errors.MLRunInvalidArgumentError:
-        raise
-    except Exception as err:
-        logger.debug(
-            "Something in the enrichment of kfp pods failed", error=err_to_str(err)
-        )
-    return workflow
-# patching function as class method
-kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
-kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
 def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
     project_instance, name, tag, hash_key = parse_versioned_object_uri(
         key, project.metadata.name
@@ -375,7 +315,11 @@ def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
 def enrich_function_object(
-    project, function, decorator=None, copy_function=True, try_auto_mount=True
+    project: mlrun.common.schemas.Project,
+    function: mlrun.runtimes.BaseRuntime,
+    decorator: typing.Callable = None,
+    copy_function: bool = True,
+    try_auto_mount: bool = True,
 ) -> mlrun.runtimes.BaseRuntime:
     if hasattr(function, "_enriched"):
         return function
@@ -412,6 +356,10 @@ def enrich_function_object(
     if decorator:
         decorator(f)
+    if project.spec.default_function_node_selector:
+        f.enrich_runtime_spec(
+            project.spec.default_function_node_selector,
+        )
     if try_auto_mount:
         if (
             decorator and AutoMountType.is_auto_modifier(decorator)
@@ -431,7 +379,7 @@ class _PipelineRunStatus:
         engine: type["_PipelineRunner"],
         project: "mlrun.projects.MlrunProject",
         workflow: WorkflowSpec = None,
-        state: str = "",
+        state: mlrun_pipelines.common.models.RunStatuses = "",
         exc: Exception = None,
     ):
         """
@@ -451,7 +399,10 @@ class _PipelineRunStatus:
     @property
     def state(self):
-        if self._state not in mlrun.run.RunStatuses.stable_statuses():
+        if (
+            self._state
+            not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
+        ):
             self._state = self._engine.get_state(self.run_id, self.project)
         return self._state
@@ -460,12 +411,15 @@ class _PipelineRunStatus:
         return self._exc
     def wait_for_completion(self, timeout=None, expected_statuses=None):
-        self._state = self._engine.wait_for_completion(
-            self.run_id,
+        returned_state = self._engine.wait_for_completion(
+            self,
             project=self.project,
             timeout=timeout,
             expected_statuses=expected_statuses,
         )
+        # TODO: returning a state is optional until all runners implement wait_for_completion
+        if returned_state:
+            self._state = returned_state
         return self._state
     def __str__(self):
@@ -505,7 +459,12 @@ class _PipelineRunner(abc.ABC):
     @staticmethod
     @abc.abstractmethod
-    def wait_for_completion(run_id, project=None, timeout=None, expected_statuses=None):
+    def wait_for_completion(
+        run: "_PipelineRunStatus",
+        project: typing.Optional["mlrun.projects.MlrunProject"] = None,
+        timeout: typing.Optional[int] = None,
+        expected_statuses: list[str] = None,
+    ):
         pass
     @staticmethod
@@ -513,10 +472,52 @@ class _PipelineRunner(abc.ABC):
     def get_state(run_id, project=None):
         pass
+    @staticmethod
+    def get_run_status(
+        project,
+        run: _PipelineRunStatus,
+        timeout=None,
+        expected_statuses=None,
+        notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
+        **kwargs,
+    ):
+        timeout = timeout or 60 * 60
+        raise_error = None
+        state = ""
+        try:
+            if timeout:
+                state = run.wait_for_completion(
+                    timeout=timeout, expected_statuses=expected_statuses
+                )
+        except RuntimeError as exc:
+            # push runs table also when we have errors
+            raise_error = exc
+        mldb = mlrun.db.get_run_db(secrets=project._secrets)
+        runs = mldb.list_runs(project=project.name, labels=f"workflow={run.run_id}")
+        # TODO: The below section duplicates notifiers.push_pipeline_run_results() logic. We should use it instead.
+        errors_counter = 0
+        for r in runs:
+            if r["status"].get("state", "") == "error":
+                errors_counter += 1
+        text = _PipelineRunner._generate_workflow_finished_message(
+            run.run_id, errors_counter, run._state
+        )
+        notifiers = notifiers or project.notifiers
+        if notifiers:
+            notifiers.push(text, "info", runs)
+        if raise_error:
+            raise raise_error
+        return state or run._state, errors_counter, text
     @staticmethod
     def _get_handler(workflow_handler, workflow_spec, project, secrets):
         if not (workflow_handler and callable(workflow_handler)):
-            workflow_file = workflow_spec.get_source_file(project.spec.context)
+            workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
             workflow_handler = create_pipeline(
                 project,
                 workflow_file,
@@ -529,15 +530,13 @@ class _PipelineRunner(abc.ABC):
         return workflow_handler
     @staticmethod
-    @abc.abstractmethod
-    def get_run_status(
-        project,
-        run,
-        timeout=None,
-        expected_statuses=None,
-        notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
-    ):
-        pass
+    def _generate_workflow_finished_message(run_id, errors_counter, state):
+        text = f"Workflow {run_id} finished"
+        if errors_counter:
+            text += f" with {errors_counter} errors"
+        if state:
+            text += f", state={state}"
+        return text
 class _KFPRunner(_PipelineRunner):
@@ -548,7 +547,7 @@ class _KFPRunner(_PipelineRunner):
     @classmethod
     def save(cls, project, workflow_spec: WorkflowSpec, target, artifact_path=None):
         pipeline_context.set(project, workflow_spec)
-        workflow_file = workflow_spec.get_source_file(project.spec.context)
+        workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
         functions = FunctionsDict(project)
         pipeline = create_pipeline(
             project,
@@ -556,13 +555,14 @@ class _KFPRunner(_PipelineRunner):
             functions,
             secrets=project._secrets,
         )
-        artifact_path = artifact_path or project.spec.artifact_path
-        conf = new_pipe_metadata(
-            artifact_path=artifact_path,
+        mlrun_pipelines.utils.compile_pipeline(
+            artifact_path=artifact_path or project.spec.artifact_path,
             cleanup_ttl=workflow_spec.cleanup_ttl,
+            ops=None,
+            pipeline=pipeline,
+            pipe_file=target,
+            type_check=True,
         )
-        compiler.Compiler().compile(pipeline, target, pipeline_conf=conf)
         workflow_spec.clear_tmp()
         pipeline_context.clear()
@@ -593,12 +593,13 @@ class _KFPRunner(_PipelineRunner):
             logger.warning(
                 "Setting notifications on kfp pipeline runner uses old notification behavior. "
                 "Notifications will only be sent if you wait for pipeline completion. "
-                "To use the new notification behavior, use the remote pipeline runner."
+                "Some of the features (like setting message or severity level) are not supported."
             )
-            for notification in notifications:
-                project.notifiers.add_notification(
-                    notification.kind, notification.params
-                )
+            # for start message, fallback to old notification behavior
+            for notification in notifications or []:
+                params = notification.params
+                params.update(notification.secret_params)
+                project.notifiers.add_notification(notification.kind, params)
         run_id = _run_pipeline(
             workflow_handler,
@@ -608,6 +609,7 @@ class _KFPRunner(_PipelineRunner):
             namespace=namespace,
             artifact_path=artifact_path,
             cleanup_ttl=workflow_spec.cleanup_ttl,
+            timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
         )
         # The user provided workflow code might have made changes to function specs that require cleanup
@@ -625,7 +627,6 @@ class _KFPRunner(_PipelineRunner):
                     func_name=func.metadata.name,
                     exc_info=err_to_str(exc),
                 )
         project.notifiers.push_pipeline_start_message(
             project.metadata.name,
             project.get_param("commit_id", None),
@@ -636,12 +637,21 @@ class _KFPRunner(_PipelineRunner):
         return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
     @staticmethod
-    def wait_for_completion(run_id, project=None, timeout=None, expected_statuses=None):
-        if timeout is None:
-            timeout = 60 * 60
+    def wait_for_completion(
+        run: "_PipelineRunStatus",
+        project: typing.Optional["mlrun.projects.MlrunProject"] = None,
+        timeout: typing.Optional[int] = None,
+        expected_statuses: list[str] = None,
+    ):
         project_name = project.metadata.name if project else ""
+        logger.info(
+            "Waiting for pipeline run completion",
+            run_id=run.run_id,
+            project=project_name,
+        )
+        timeout = timeout or 60 * 60
         run_info = wait_for_pipeline_completion(
-            run_id,
+            run.run_id,
             timeout=timeout,
             expected_statuses=expected_statuses,
             project=project_name,
@@ -659,50 +669,6 @@ class _KFPRunner(_PipelineRunner):
             return resp["run"].get("status", "")
         return ""
-    @staticmethod
-    def get_run_status(
-        project,
-        run,
-        timeout=None,
-        expected_statuses=None,
-        notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
-    ):
-        if timeout is None:
-            timeout = 60 * 60
-        state = ""
-        raise_error = None
-        try:
-            if timeout:
-                logger.info("Waiting for pipeline run completion")
-                state = run.wait_for_completion(
-                    timeout=timeout, expected_statuses=expected_statuses
-                )
-        except RuntimeError as exc:
-            # push runs table also when we have errors
-            raise_error = exc
-        mldb = mlrun.db.get_run_db(secrets=project._secrets)
-        runs = mldb.list_runs(project=project.name, labels=f"workflow={run.run_id}")
-        # TODO: The below section duplicates notifiers.push_pipeline_run_results() logic. We should use it instead.
-        had_errors = 0
-        for r in runs:
-            if r["status"].get("state", "") == "error":
-                had_errors += 1
-        text = f"Workflow {run.run_id} finished"
-        if had_errors:
-            text += f" with {had_errors} errors"
-        if state:
-            text += f", state={state}"
-        notifiers = notifiers or project.notifiers
-        notifiers.push(text, "info", runs)
-        if raise_error:
-            raise raise_error
-        return state, had_errors, text
 class _LocalRunner(_PipelineRunner):
     """local pipelines runner"""
@@ -741,13 +707,14 @@ class _LocalRunner(_PipelineRunner):
             original_source = project.spec.source
             project.set_source(source=source)
         pipeline_context.workflow_artifact_path = artifact_path
         project.notifiers.push_pipeline_start_message(
             project.metadata.name, pipeline_id=workflow_id
         )
         err = None
         try:
             workflow_handler(**workflow_spec.args)
-            state = mlrun.run.RunStatuses.succeeded
+            state = mlrun_pipelines.common.models.RunStatuses.succeeded
         except Exception as exc:
             err = exc
             logger.exception("Workflow run failed")
@@ -755,7 +722,7 @@ class _LocalRunner(_PipelineRunner):
                 f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
                 mlrun.common.schemas.NotificationSeverity.ERROR,
             )
-            state = mlrun.run.RunStatuses.failed
+            state = mlrun_pipelines.common.models.RunStatuses.failed
         mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
         project.notifiers.push_pipeline_run_results(
             pipeline_context.runs_map.values(), state=state
@@ -779,17 +746,10 @@ class _LocalRunner(_PipelineRunner):
         return ""
     @staticmethod
-    def wait_for_completion(run_id, project=None, timeout=None, expected_statuses=None):
-        pass
-    @staticmethod
-    def get_run_status(
-        project,
-        run,
-        timeout=None,
-        expected_statuses=None,
-        notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
-    ):
+    def wait_for_completion(run, project=None, timeout=None, expected_statuses=None):
+        # TODO: local runner blocks for the duration of the pipeline.
+        # Therefore usually there will be nothing to wait for.
+        # However, users may run functions with watch=False and then it can be useful to wait for the runs here.
         pass
@@ -814,10 +774,6 @@ class _RemoteRunner(_PipelineRunner):
         workflow_name = normalize_workflow_name(name=name, project_name=project.name)
         workflow_id = None
-        # for start message, fallback to old notification behavior
-        for notification in notifications or []:
-            project.notifiers.add_notification(notification.kind, notification.params)
         # The returned engine for this runner is the engine of the workflow.
         # In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
         inner_engine = get_workflow_engine(workflow_spec.engine)
@@ -865,22 +821,44 @@ class _RemoteRunner(_PipelineRunner):
                 )
                 return
+            get_workflow_id_timeout = max(
+                int(mlrun.mlconf.workflows.timeouts.remote),
+                int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
+            )
             logger.debug(
                 "Workflow submitted, waiting for pipeline run to start",
                 workflow_name=workflow_response.name,
+                get_workflow_id_timeout=get_workflow_id_timeout,
             )
+            def _get_workflow_id_or_bail():
+                try:
+                    return run_db.get_workflow_id(
+                        project=project.name,
+                        name=workflow_response.name,
+                        run_id=workflow_response.run_id,
+                        engine=workflow_spec.engine,
+                    )
+                except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
+                    # fail fast on specific errors
+                    if get_wf_exc.error_status_code in [
+                        http.HTTPStatus.PRECONDITION_FAILED
+                    ]:
+                        raise mlrun.errors.MLRunFatalFailureError(
+                            original_exception=get_wf_exc
+                        )
+                    # raise for a retry (on other errors)
+                    raise
             # Getting workflow id from run:
             response = retry_until_successful(
                 1,
-                getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine),
+                get_workflow_id_timeout,
                 logger,
                 False,
-                run_db.get_workflow_id,
-                project=project.name,
-                name=workflow_response.name,
-                run_id=workflow_response.run_id,
-                engine=workflow_spec.engine,
+                _get_workflow_id_or_bail,
             )
             workflow_id = response.workflow_id
             # After fetching the workflow_id the workflow executed successfully
@@ -892,12 +870,9 @@ class _RemoteRunner(_PipelineRunner):
                 f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
                 mlrun.common.schemas.NotificationSeverity.ERROR,
             )
-            state = mlrun.run.RunStatuses.failed
+            state = mlrun_pipelines.common.models.RunStatuses.failed
         else:
-            state = mlrun.run.RunStatuses.succeeded
-            project.notifiers.push_pipeline_start_message(
-                project.metadata.name,
-            )
+            state = mlrun_pipelines.common.models.RunStatuses.running
             pipeline_context.clear()
         return _PipelineRunStatus(
             run_id=workflow_id,
@@ -911,24 +886,59 @@ class _RemoteRunner(_PipelineRunner):
     @staticmethod
     def get_run_status(
         project,
-        run,
+        run: _PipelineRunStatus,
         timeout=None,
         expected_statuses=None,
         notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
+        inner_engine: type[_PipelineRunner] = None,
     ):
-        # ignore notifiers, as they are handled by the remote pipeline notifications,
-        # so overriding with CustomNotificationPusher with empty list of notifiers
-        state, had_errors, text = _KFPRunner.get_run_status(
-            project,
-            run,
-            timeout,
-            expected_statuses,
-            notifiers=mlrun.utils.notifications.CustomNotificationPusher([]),
-        )
+        inner_engine = inner_engine or _KFPRunner
+        if inner_engine.engine == _KFPRunner.engine:
+            # ignore notifiers for remote notifications, as they are handled by the remote pipeline notifications,
+            # so overriding with CustomNotificationPusher with empty list of notifiers or only local notifiers
+            local_project_notifiers = list(
+                set(mlrun.utils.notifications.NotificationTypes.local()).intersection(
+                    set(project.notifiers.notifications.keys())
+                )
+            )
+            notifiers = mlrun.utils.notifications.CustomNotificationPusher(
+                local_project_notifiers
+            )
+            return _KFPRunner.get_run_status(
+                project,
+                run,
+                timeout,
+                expected_statuses,
+                notifiers=notifiers,
+            )
+        elif inner_engine.engine == _LocalRunner.engine:
+            mldb = mlrun.db.get_run_db(secrets=project._secrets)
+            pipeline_runner_run = mldb.read_run(run.run_id, project=project.name)
+            pipeline_runner_run = mlrun.run.RunObject.from_dict(pipeline_runner_run)
+            # here we are waiting for the pipeline run to complete and refreshing after that the pipeline run from the
+            # db
+            # TODO: do it with timeout
+            pipeline_runner_run.logs(db=mldb)
+            pipeline_runner_run.refresh()
+            run._state = mlrun.common.runtimes.constants.RunStates.run_state_to_pipeline_run_status(
+                pipeline_runner_run.status.state
+            )
+            run._exc = pipeline_runner_run.status.error
+            return _LocalRunner.get_run_status(
+                project,
+                run,
+                timeout,
+                expected_statuses,
+                notifiers=notifiers,
+            )
-        # indicate the pipeline status since we don't push the notifications in the remote runner
-        logger.info(text)
-        return state, had_errors, text
+        else:
+            raise mlrun.errors.MLRunInvalidArgumentError(
+                f"Unsupported inner runner engine: {inner_engine.engine}"
+            )
 def create_pipeline(project, pipeline, functions, secrets=None, handler=None):
@@ -974,14 +984,25 @@ def github_webhook(request):
     return {"msg": "pushed"}
-def load_and_run(
+def load_and_run(context, *args, **kwargs):
+    """
+    This function serves as an alias to `load_and_run_workflow`,
+    allowing to continue using `load_and_run` without modifying existing workflows or exported runs.
+    This approach ensures backward compatibility,
+    while directing all new calls to the updated `load_and_run_workflow` function.
+    """
+    kwargs.pop("load_only", None)
+    kwargs.pop("save", None)
+    load_and_run_workflow(context, *args, **kwargs)
+def load_and_run_workflow(
     context: mlrun.execution.MLClientCtx,
     url: str = None,
     project_name: str = "",
     init_git: bool = None,
     subpath: str = None,
     clone: bool = False,
-    save: bool = True,
     workflow_name: str = None,
     workflow_path: str = None,
     workflow_arguments: dict[str, typing.Any] = None,
@@ -994,14 +1015,12 @@ def load_and_run(
     local: bool = None,
     schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
     cleanup_ttl: int = None,
-    load_only: bool = False,
     wait_for_completion: bool = False,
     project_context: str = None,
 ):
     """
     Auxiliary function that the RemoteRunner run once or run every schedule.
     This function loads a project from a given remote source and then runs the workflow.
     :param context:             mlrun context.
     :param url:                 remote url that represents the project's source.
                                 See 'mlrun.load_project()' for details
@@ -1009,7 +1028,6 @@ def load_and_run(
     :param init_git:            if True, will git init the context dir
     :param subpath:             project subpath (within the archive)
     :param clone:               if True, always clone (delete any existing content)
-    :param save:                whether to save the created project and artifact in the DB
     :param workflow_name:       name of the workflow
     :param workflow_path:       url to a workflow file, if not a project workflow
     :param workflow_arguments:  kubeflow pipelines arguments (parameters)
@@ -1025,48 +1043,38 @@ def load_and_run(
     :param schedule:            ScheduleCronTrigger class instance or a standard crontab expression string
     :param cleanup_ttl:         pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
                                 workflow and all its resources are deleted)
-    :param load_only:           for just loading the project, inner use.
     :param wait_for_completion: wait for workflow completion before returning
     :param project_context:     project context path (used for loading the project)
     """
-    try:
-        project = mlrun.load_project(
-            context=project_context or f"./{project_name}",
-            url=url,
-            name=project_name,
-            init_git=init_git,
-            subpath=subpath,
-            clone=clone,
-            save=save,
-            sync_functions=True,
-        )
-    except Exception as error:
-        if schedule:
-            notification_pusher = mlrun.utils.notifications.CustomNotificationPusher(
-                ["slack"]
-            )
-            url = get_ui_url(project_name, context.uid)
-            link = f"<{url}|*view workflow job details*>"
-            message = (
-                f":x: Failed to run scheduled workflow {workflow_name} in Project {project_name} !\n"
-                f"error: ```{error}```\n{link}"
-            )
-            # Sending Slack Notification without losing the original error:
-            try:
-                notification_pusher.push(
-                    message=message,
-                    severity=mlrun.common.schemas.NotificationSeverity.ERROR,
-                )
-            except Exception as exc:
-                logger.error("Failed to send slack notification", exc=exc)
-        raise error
+    project_context = project_context or f"./{project_name}"
+    # Load the project to fetch files which the runner needs, such as remote source files
+    pull_remote_project_files(
+        context=context,
+        project_context=project_context,
+        url=url,
+        project_name=project_name,
+        init_git=init_git,
+        subpath=subpath,
+        clone=clone,
+        schedule=schedule,
+        workflow_name=workflow_name,
+    )
-    context.logger.info(f"Loaded project {project.name} successfully")
+    # Retrieve the project object:
+    # - If the project exists in the MLRun database, it will be loaded from there.
+    # - If it doesn't exist in the database, it will be created from the previously loaded local directory.
+    project = mlrun.get_or_create_project(
+        context=project_context or f"./{project_name}",
+        name=project_name,
+    )
-    if load_only:
-        return
+    # extract "start" notification if exists
+    start_notifications = [
+        notification
+        for notification in context.get_notifications(unmask_secret_params=True)
+        if "running" in notification.when
+    ]
     workflow_log_message = workflow_name or workflow_path
     context.logger.info(f"Running workflow {workflow_log_message} from remote")
@@ -1083,26 +1091,165 @@ def load_and_run(
         cleanup_ttl=cleanup_ttl,
         engine=engine,
         local=local,
+        notifications=start_notifications,
     )
     context.log_result(key="workflow_id", value=run.run_id)
     context.log_result(key="engine", value=run._engine.engine, commit=True)
-    if run.state == mlrun.run.RunStatuses.failed:
+    if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
         raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
     if wait_for_completion:
+        handle_workflow_completion(
+            run=run,
+            project=project,
+            context=context,
+            workflow_log_message=workflow_log_message,
+        )
+def pull_remote_project_files(
+    context: mlrun.execution.MLClientCtx,
+    project_context: str,
+    url: str,
+    project_name: str,
+    init_git: typing.Optional[bool],
+    subpath: typing.Optional[str],
+    clone: bool,
+    schedule: typing.Optional[
+        typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger]
+    ],
+    workflow_name: typing.Optional[str],
+) -> None:
+    """
+    Load the project to clone remote files if they exist.
+    If an exception occurs during project loading, send a notification if the workflow is scheduled.
+    :param context:        MLRun execution context.
+    :param project_context: Path to the project context.
+    :param url:            URL of the project repository.
+    :param project_name:   Name of the project.
+    :param init_git:       Initialize a git repository.
+    :param subpath:        Project subpath within the repository.
+    :param clone:          Whether to clone the repository.
+    :param schedule:       Schedule for running the workflow.
+    :param workflow_name:  Name of the workflow to run.
+    """
+    try:
+        # Load the project to clone remote files if they exist.
+        # Using save=False to avoid overriding changes from the database if it already exists.
+        mlrun.load_project(
+            context=project_context,
+            url=url,
+            name=project_name,
+            init_git=init_git,
+            subpath=subpath,
+            clone=clone,
+            save=False,
+        )
+    except Exception as error:
+        notify_scheduled_workflow_failure(
+            schedule=schedule,
+            project_name=project_name,
+            workflow_name=workflow_name,
+            error=error,
+            context_uid=context.uid,
+        )
+        raise error
+def notify_scheduled_workflow_failure(
+    schedule,
+    project_name: str,
+    workflow_name: str,
+    error: Exception,
+    context_uid: str,
+) -> None:
+    if schedule:
+        notification_pusher = mlrun.utils.notifications.CustomNotificationPusher(
+            ["slack"]
+        )
+        url = get_ui_url(project_name, context_uid)
+        link = f"<{url}|*view workflow job details*>"
+        message = (
+            f":x: Failed to run scheduled workflow {workflow_name} "
+            f"in Project {project_name}!\n"
+            f"Error: ```{err_to_str(error)}```\n{link}"
+        )
+        # Sending Slack Notification without losing the original error:
         try:
-            run.wait_for_completion()
-        except Exception as exc:
-            logger.error(
-                "Failed waiting for workflow completion",
-                workflow=workflow_log_message,
-                exc=err_to_str(exc),
+            notification_pusher.push(
+                message=message,
+                severity=mlrun.common.schemas.NotificationSeverity.ERROR,
             )
-        pipeline_state, _, _ = project.get_run_status(run)
-        context.log_result(key="workflow_state", value=pipeline_state, commit=True)
-        if pipeline_state != mlrun.run.RunStatuses.succeeded:
-            raise RuntimeError(
-                f"Workflow {workflow_log_message} failed, state={pipeline_state}"
-            )
+        except Exception as exc:
+            logger.error("Failed to send slack notification", exc=err_to_str(exc))
+def handle_workflow_completion(
+    run: _PipelineRunStatus,
+    project,
+    context: mlrun.execution.MLClientCtx,
+    workflow_log_message: str,
+) -> None:
+    """
+    Handle workflow completion by waiting for it to finish and logging the final state.
+    :param run:                 Run object containing workflow execution details.
+    :param project:             MLRun project object.
+    :param context:             MLRun execution context.
+    :param workflow_log_message: Message used for logging.
+    """
+    try:
+        run.wait_for_completion()
+    except Exception as exc:
+        mlrun.utils.logger.error(
+            "Failed waiting for workflow completion",
+            workflow=workflow_log_message,
+            exc=err_to_str(exc),
+        )
+    pipeline_state, _, _ = project.get_run_status(run)
+    context.log_result(key="workflow_state", value=pipeline_state, commit=True)
+    if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
+        raise RuntimeError(
+            f"Workflow {workflow_log_message} failed, state={pipeline_state}"
+        )
+def import_remote_project(
+    context: mlrun.execution.MLClientCtx,
+    url: str = None,
+    project_name: str = "",
+    init_git: bool = None,
+    subpath: str = None,
+    clone: bool = False,
+    save: bool = True,
+    project_context: str = None,
+):
+    """
+    This function loads a project from a given remote source.
+    :param context:             mlrun context.
+    :param url:                 remote url that represents the project's source.
+                                See 'mlrun.load_project()' for details
+    :param project_name:        project name
+    :param init_git:            if True, will git init the context dir
+    :param subpath:             project subpath (within the archive)
+    :param clone:               if True, always clone (delete any existing content)
+    :param save:                whether to save the created project and artifact in the DB
+    :param project_context:     project context path (used for loading the project)
+    """
+    project = mlrun.load_project(
+        context=project_context or f"./{project_name}",
+        url=url,
+        name=project_name,
+        init_git=init_git,
+        subpath=subpath,
+        clone=clone,
+        save=save,
+        sync_functions=True,
+    )
+    context.logger.info(f"Loaded project {project.name} successfully")

mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc5py3-none-any.whl → 1.7.2py3-none-any.whl