PyPI - mlrun - Versions diffs - 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl - Mend

mlrun 1.7.0rc28py3-none-any.whl → 1.7.0rc55py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of mlrun might be problematic. Click here for more details.

Files changed (135) hide show

mlrun/__main__.py +4 -2
mlrun/alerts/alert.py +75 -8
mlrun/artifacts/base.py +1 -0
mlrun/artifacts/manager.py +9 -2
mlrun/common/constants.py +4 -1
mlrun/common/db/sql_session.py +3 -2
mlrun/common/formatters/__init__.py +1 -0
mlrun/common/formatters/artifact.py +1 -0
mlrun/{model_monitoring/application.py → common/formatters/feature_set.py} +20 -6
mlrun/common/formatters/run.py +3 -0
mlrun/common/helpers.py +0 -1
mlrun/common/schemas/__init__.py +3 -1
mlrun/common/schemas/alert.py +15 -12
mlrun/common/schemas/api_gateway.py +6 -6
mlrun/common/schemas/auth.py +5 -0
mlrun/common/schemas/client_spec.py +0 -1
mlrun/common/schemas/common.py +7 -4
mlrun/common/schemas/frontend_spec.py +7 -0
mlrun/common/schemas/function.py +7 -0
mlrun/common/schemas/model_monitoring/__init__.py +4 -3
mlrun/common/schemas/model_monitoring/constants.py +41 -26
mlrun/common/schemas/model_monitoring/model_endpoints.py +23 -47
mlrun/common/schemas/notification.py +69 -12
mlrun/common/schemas/project.py +45 -12
mlrun/common/schemas/workflow.py +10 -2
mlrun/common/types.py +1 -0
mlrun/config.py +91 -35
mlrun/data_types/data_types.py +6 -1
mlrun/data_types/spark.py +2 -2
mlrun/data_types/to_pandas.py +57 -25
mlrun/datastore/__init__.py +1 -0
mlrun/datastore/alibaba_oss.py +3 -2
mlrun/datastore/azure_blob.py +125 -37
mlrun/datastore/base.py +42 -21
mlrun/datastore/datastore.py +4 -2
mlrun/datastore/datastore_profile.py +1 -1
mlrun/datastore/dbfs_store.py +3 -7
mlrun/datastore/filestore.py +1 -3
mlrun/datastore/google_cloud_storage.py +85 -29
mlrun/datastore/inmem.py +4 -1
mlrun/datastore/redis.py +1 -0
mlrun/datastore/s3.py +25 -12
mlrun/datastore/sources.py +76 -4
mlrun/datastore/spark_utils.py +30 -0
mlrun/datastore/storeytargets.py +151 -0
mlrun/datastore/targets.py +102 -131
mlrun/datastore/v3io.py +1 -0
mlrun/db/base.py +15 -6
mlrun/db/httpdb.py +57 -28
mlrun/db/nopdb.py +29 -5
mlrun/errors.py +20 -3
mlrun/execution.py +46 -5
mlrun/feature_store/api.py +25 -1
mlrun/feature_store/common.py +6 -11
mlrun/feature_store/feature_vector.py +3 -1
mlrun/feature_store/retrieval/job.py +4 -1
mlrun/feature_store/retrieval/spark_merger.py +10 -39
mlrun/feature_store/steps.py +8 -0
mlrun/frameworks/_common/plan.py +3 -3
mlrun/frameworks/_ml_common/plan.py +1 -1
mlrun/frameworks/parallel_coordinates.py +2 -3
mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
mlrun/k8s_utils.py +48 -2
mlrun/launcher/client.py +6 -6
mlrun/launcher/local.py +2 -2
mlrun/model.py +215 -34
mlrun/model_monitoring/api.py +38 -24
mlrun/model_monitoring/applications/__init__.py +1 -2
mlrun/model_monitoring/applications/_application_steps.py +60 -29
mlrun/model_monitoring/applications/base.py +2 -174
mlrun/model_monitoring/applications/context.py +197 -70
mlrun/model_monitoring/applications/evidently_base.py +11 -85
mlrun/model_monitoring/applications/histogram_data_drift.py +21 -16
mlrun/model_monitoring/applications/results.py +4 -4
mlrun/model_monitoring/controller.py +110 -282
mlrun/model_monitoring/db/stores/__init__.py +8 -3
mlrun/model_monitoring/db/stores/base/store.py +3 -0
mlrun/model_monitoring/db/stores/sqldb/models/base.py +9 -7
mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +18 -3
mlrun/model_monitoring/db/stores/sqldb/sql_store.py +43 -23
mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +48 -35
mlrun/model_monitoring/db/tsdb/__init__.py +7 -2
mlrun/model_monitoring/db/tsdb/base.py +147 -15
mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +94 -55
mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +0 -3
mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +144 -38
mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +44 -3
mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +246 -57
mlrun/model_monitoring/helpers.py +70 -50
mlrun/model_monitoring/stream_processing.py +96 -195
mlrun/model_monitoring/writer.py +13 -5
mlrun/package/packagers/default_packager.py +2 -2
mlrun/projects/operations.py +16 -8
mlrun/projects/pipelines.py +126 -115
mlrun/projects/project.py +286 -129
mlrun/render.py +3 -3
mlrun/run.py +38 -19
mlrun/runtimes/__init__.py +19 -8
mlrun/runtimes/base.py +4 -1
mlrun/runtimes/daskjob.py +1 -1
mlrun/runtimes/funcdoc.py +1 -1
mlrun/runtimes/kubejob.py +6 -6
mlrun/runtimes/local.py +12 -5
mlrun/runtimes/nuclio/api_gateway.py +68 -8
mlrun/runtimes/nuclio/application/application.py +307 -70
mlrun/runtimes/nuclio/function.py +63 -14
mlrun/runtimes/nuclio/serving.py +10 -10
mlrun/runtimes/pod.py +25 -19
mlrun/runtimes/remotesparkjob.py +2 -5
mlrun/runtimes/sparkjob/spark3job.py +16 -17
mlrun/runtimes/utils.py +34 -0
mlrun/serving/routers.py +2 -5
mlrun/serving/server.py +37 -19
mlrun/serving/states.py +30 -3
mlrun/serving/v2_serving.py +44 -35
mlrun/track/trackers/mlflow_tracker.py +5 -0
mlrun/utils/async_http.py +1 -1
mlrun/utils/db.py +18 -0
mlrun/utils/helpers.py +150 -36
mlrun/utils/http.py +1 -1
mlrun/utils/notifications/notification/__init__.py +0 -1
mlrun/utils/notifications/notification/webhook.py +8 -1
mlrun/utils/notifications/notification_pusher.py +1 -1
mlrun/utils/v3io_clients.py +2 -2
mlrun/utils/version/version.json +2 -2
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/METADATA +153 -66
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/RECORD +131 -134
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/WHEEL +1 -1
mlrun/feature_store/retrieval/conversion.py +0 -271
mlrun/model_monitoring/controller_handler.py +0 -37
mlrun/model_monitoring/evidently_application.py +0 -20
mlrun/model_monitoring/prometheus.py +0 -216
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/LICENSE +0 -0
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/entry_points.txt +0 -0
{mlrun-1.7.0rc28.dist-info → mlrun-1.7.0rc55.dist-info}/top_level.txt +0 -0

mlrun/projects/pipelines.py CHANGED Viewed

@@ -27,6 +27,8 @@ import mlrun_pipelines.utils
 import mlrun
 import mlrun.common.runtimes.constants
 import mlrun.common.schemas
+import mlrun.common.schemas.function
+import mlrun.common.schemas.workflow
 import mlrun.utils.notifications
 from mlrun.errors import err_to_str
 from mlrun.utils import (
@@ -44,21 +46,21 @@ from ..runtimes.pod import AutoMountType
 def get_workflow_engine(engine_kind, local=False):
     if pipeline_context.is_run_local(local):
-        if engine_kind == "kfp":
+        if engine_kind == mlrun.common.schemas.workflow.EngineType.KFP:
             logger.warning(
                 "Running kubeflow pipeline locally, note some ops may not run locally!"
             )
-        elif engine_kind == "remote":
+        elif engine_kind == mlrun.common.schemas.workflow.EngineType.REMOTE:
             raise mlrun.errors.MLRunInvalidArgumentError(
                 "Cannot run a remote pipeline locally using `kind='remote'` and `local=True`. "
                 "in order to run a local pipeline remotely, please use `engine='remote:local'` instead"
             )
         return _LocalRunner
-    if not engine_kind or engine_kind == "kfp":
+    if not engine_kind or engine_kind == mlrun.common.schemas.workflow.EngineType.KFP:
         return _KFPRunner
-    if engine_kind == "local":
+    if engine_kind == mlrun.common.schemas.workflow.EngineType.LOCAL:
         return _LocalRunner
-    if engine_kind == "remote":
+    if engine_kind == mlrun.common.schemas.workflow.EngineType.REMOTE:
         return _RemoteRunner
     raise mlrun.errors.MLRunInvalidArgumentError(
         f"Provided workflow engine is not supported. engine_kind={engine_kind}"
@@ -80,6 +82,7 @@ class WorkflowSpec(mlrun.model.ModelObj):
         schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
         cleanup_ttl: typing.Optional[int] = None,
         image: typing.Optional[str] = None,
+        workflow_runner_node_selector: typing.Optional[dict[str, str]] = None,
     ):
         self.engine = engine
         self.code = code
@@ -93,6 +96,7 @@ class WorkflowSpec(mlrun.model.ModelObj):
         self._tmp_path = None
         self.schedule = schedule
         self.image = image
+        self.workflow_runner_node_selector = workflow_runner_node_selector
     def get_source_file(self, context=""):
         if not self.code and not self.path:
@@ -311,7 +315,11 @@ def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
 def enrich_function_object(
-    project, function, decorator=None, copy_function=True, try_auto_mount=True
+    project: mlrun.common.schemas.Project,
+    function: mlrun.runtimes.BaseRuntime,
+    decorator: typing.Callable = None,
+    copy_function: bool = True,
+    try_auto_mount: bool = True,
 ) -> mlrun.runtimes.BaseRuntime:
     if hasattr(function, "_enriched"):
         return function
@@ -352,7 +360,6 @@ def enrich_function_object(
         f.enrich_runtime_spec(
             project.spec.default_function_node_selector,
         )
     if try_auto_mount:
         if (
             decorator and AutoMountType.is_auto_modifier(decorator)
@@ -404,12 +411,15 @@ class _PipelineRunStatus:
         return self._exc
     def wait_for_completion(self, timeout=None, expected_statuses=None):
-        self._state = self._engine.wait_for_completion(
-            self.run_id,
+        returned_state = self._engine.wait_for_completion(
+            self,
             project=self.project,
             timeout=timeout,
             expected_statuses=expected_statuses,
         )
+        # TODO: returning a state is optional until all runners implement wait_for_completion
+        if returned_state:
+            self._state = returned_state
         return self._state
     def __str__(self):
@@ -444,13 +454,17 @@ class _PipelineRunner(abc.ABC):
         namespace=None,
         source=None,
         notifications: list[mlrun.model.Notification] = None,
-        send_start_notification: bool = True,
     ) -> _PipelineRunStatus:
         pass
     @staticmethod
     @abc.abstractmethod
-    def wait_for_completion(run_id, project=None, timeout=None, expected_statuses=None):
+    def wait_for_completion(
+        run: "_PipelineRunStatus",
+        project: typing.Optional["mlrun.projects.MlrunProject"] = None,
+        timeout: typing.Optional[int] = None,
+        expected_statuses: list[str] = None,
+    ):
         pass
     @staticmethod
@@ -458,6 +472,48 @@ class _PipelineRunner(abc.ABC):
     def get_state(run_id, project=None):
         pass
+    @staticmethod
+    def get_run_status(
+        project,
+        run: _PipelineRunStatus,
+        timeout=None,
+        expected_statuses=None,
+        notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
+        **kwargs,
+    ):
+        timeout = timeout or 60 * 60
+        raise_error = None
+        state = ""
+        try:
+            if timeout:
+                state = run.wait_for_completion(
+                    timeout=timeout, expected_statuses=expected_statuses
+                )
+        except RuntimeError as exc:
+            # push runs table also when we have errors
+            raise_error = exc
+        mldb = mlrun.db.get_run_db(secrets=project._secrets)
+        runs = mldb.list_runs(project=project.name, labels=f"workflow={run.run_id}")
+        # TODO: The below section duplicates notifiers.push_pipeline_run_results() logic. We should use it instead.
+        errors_counter = 0
+        for r in runs:
+            if r["status"].get("state", "") == "error":
+                errors_counter += 1
+        text = _PipelineRunner._generate_workflow_finished_message(
+            run.run_id, errors_counter, run._state
+        )
+        notifiers = notifiers or project.notifiers
+        if notifiers:
+            notifiers.push(text, "info", runs)
+        if raise_error:
+            raise raise_error
+        return state or run._state, errors_counter, text
     @staticmethod
     def _get_handler(workflow_handler, workflow_spec, project, secrets):
         if not (workflow_handler and callable(workflow_handler)):
@@ -474,16 +530,13 @@ class _PipelineRunner(abc.ABC):
         return workflow_handler
     @staticmethod
-    @abc.abstractmethod
-    def get_run_status(
-        project,
-        run,
-        timeout=None,
-        expected_statuses=None,
-        notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
-        **kwargs,
-    ):
-        pass
+    def _generate_workflow_finished_message(run_id, errors_counter, state):
+        text = f"Workflow {run_id} finished"
+        if errors_counter:
+            text += f" with {errors_counter} errors"
+        if state:
+            text += f", state={state}"
+        return text
 class _KFPRunner(_PipelineRunner):
@@ -525,7 +578,6 @@ class _KFPRunner(_PipelineRunner):
         namespace=None,
         source=None,
         notifications: list[mlrun.model.Notification] = None,
-        send_start_notification: bool = True,
     ) -> _PipelineRunStatus:
         pipeline_context.set(project, workflow_spec)
         workflow_handler = _PipelineRunner._get_handler(
@@ -541,12 +593,13 @@ class _KFPRunner(_PipelineRunner):
             logger.warning(
                 "Setting notifications on kfp pipeline runner uses old notification behavior. "
                 "Notifications will only be sent if you wait for pipeline completion. "
-                "To use the new notification behavior, use the remote pipeline runner."
+                "Some of the features (like setting message or severity level) are not supported."
             )
-            for notification in notifications:
-                project.notifiers.add_notification(
-                    notification.kind, notification.params
-                )
+            # for start message, fallback to old notification behavior
+            for notification in notifications or []:
+                params = notification.params
+                params.update(notification.secret_params)
+                project.notifiers.add_notification(notification.kind, params)
         run_id = _run_pipeline(
             workflow_handler,
@@ -574,23 +627,31 @@ class _KFPRunner(_PipelineRunner):
                     func_name=func.metadata.name,
                     exc_info=err_to_str(exc),
                 )
-        if send_start_notification:
-            project.notifiers.push_pipeline_start_message(
-                project.metadata.name,
-                project.get_param("commit_id", None),
-                run_id,
-                True,
-            )
+        project.notifiers.push_pipeline_start_message(
+            project.metadata.name,
+            project.get_param("commit_id", None),
+            run_id,
+            True,
+        )
         pipeline_context.clear()
         return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
     @staticmethod
-    def wait_for_completion(run_id, project=None, timeout=None, expected_statuses=None):
-        if timeout is None:
-            timeout = 60 * 60
+    def wait_for_completion(
+        run: "_PipelineRunStatus",
+        project: typing.Optional["mlrun.projects.MlrunProject"] = None,
+        timeout: typing.Optional[int] = None,
+        expected_statuses: list[str] = None,
+    ):
         project_name = project.metadata.name if project else ""
+        logger.info(
+            "Waiting for pipeline run completion",
+            run_id=run.run_id,
+            project=project_name,
+        )
+        timeout = timeout or 60 * 60
         run_info = wait_for_pipeline_completion(
-            run_id,
+            run.run_id,
             timeout=timeout,
             expected_statuses=expected_statuses,
             project=project_name,
@@ -608,51 +669,6 @@ class _KFPRunner(_PipelineRunner):
             return resp["run"].get("status", "")
         return ""
-    @staticmethod
-    def get_run_status(
-        project,
-        run,
-        timeout=None,
-        expected_statuses=None,
-        notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
-        **kwargs,
-    ):
-        if timeout is None:
-            timeout = 60 * 60
-        state = ""
-        raise_error = None
-        try:
-            if timeout:
-                logger.info("Waiting for pipeline run completion")
-                state = run.wait_for_completion(
-                    timeout=timeout, expected_statuses=expected_statuses
-                )
-        except RuntimeError as exc:
-            # push runs table also when we have errors
-            raise_error = exc
-        mldb = mlrun.db.get_run_db(secrets=project._secrets)
-        runs = mldb.list_runs(project=project.name, labels=f"workflow={run.run_id}")
-        # TODO: The below section duplicates notifiers.push_pipeline_run_results() logic. We should use it instead.
-        had_errors = 0
-        for r in runs:
-            if r["status"].get("state", "") == "error":
-                had_errors += 1
-        text = f"Workflow {run.run_id} finished"
-        if had_errors:
-            text += f" with {had_errors} errors"
-        if state:
-            text += f", state={state}"
-        notifiers = notifiers or project.notifiers
-        notifiers.push(text, "info", runs)
-        if raise_error:
-            raise raise_error
-        return state, had_errors, text
 class _LocalRunner(_PipelineRunner):
     """local pipelines runner"""
@@ -671,7 +687,6 @@ class _LocalRunner(_PipelineRunner):
         namespace=None,
         source=None,
         notifications: list[mlrun.model.Notification] = None,
-        send_start_notification: bool = True,
     ) -> _PipelineRunStatus:
         pipeline_context.set(project, workflow_spec)
         workflow_handler = _PipelineRunner._get_handler(
@@ -693,10 +708,9 @@ class _LocalRunner(_PipelineRunner):
             project.set_source(source=source)
         pipeline_context.workflow_artifact_path = artifact_path
-        if send_start_notification:
-            project.notifiers.push_pipeline_start_message(
-                project.metadata.name, pipeline_id=workflow_id
-            )
+        project.notifiers.push_pipeline_start_message(
+            project.metadata.name, pipeline_id=workflow_id
+        )
         err = None
         try:
             workflow_handler(**workflow_spec.args)
@@ -732,18 +746,10 @@ class _LocalRunner(_PipelineRunner):
         return ""
     @staticmethod
-    def wait_for_completion(run_id, project=None, timeout=None, expected_statuses=None):
-        pass
-    @staticmethod
-    def get_run_status(
-        project,
-        run,
-        timeout=None,
-        expected_statuses=None,
-        notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
-        **kwargs,
-    ):
+    def wait_for_completion(run, project=None, timeout=None, expected_statuses=None):
+        # TODO: local runner blocks for the duration of the pipeline.
+        # Therefore usually there will be nothing to wait for.
+        # However, users may run functions with watch=False and then it can be useful to wait for the runs here.
         pass
@@ -764,22 +770,10 @@ class _RemoteRunner(_PipelineRunner):
         namespace: str = None,
         source: str = None,
         notifications: list[mlrun.model.Notification] = None,
-        send_start_notification: bool = True,
     ) -> typing.Optional[_PipelineRunStatus]:
         workflow_name = normalize_workflow_name(name=name, project_name=project.name)
         workflow_id = None
-        # for start message, fallback to old notification behavior
-        if send_start_notification:
-            for notification in notifications or []:
-                project.notifiers.add_notification(
-                    notification.kind, notification.params
-                )
-                # if a notification with `when=running` is provided, it will be used explicitly and others
-                # will be ignored
-                if "running" in notification.when:
-                    break
         # The returned engine for this runner is the engine of the workflow.
         # In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
         inner_engine = get_workflow_engine(workflow_spec.engine)
@@ -879,9 +873,6 @@ class _RemoteRunner(_PipelineRunner):
             state = mlrun_pipelines.common.models.RunStatuses.failed
         else:
             state = mlrun_pipelines.common.models.RunStatuses.running
-            project.notifiers.push_pipeline_start_message(
-                project.metadata.name,
-            )
             pipeline_context.clear()
         return _PipelineRunStatus(
             run_id=workflow_id,
@@ -924,13 +915,25 @@ class _RemoteRunner(_PipelineRunner):
         elif inner_engine.engine == _LocalRunner.engine:
             mldb = mlrun.db.get_run_db(secrets=project._secrets)
             pipeline_runner_run = mldb.read_run(run.run_id, project=project.name)
             pipeline_runner_run = mlrun.run.RunObject.from_dict(pipeline_runner_run)
+            # here we are waiting for the pipeline run to complete and refreshing after that the pipeline run from the
+            # db
+            # TODO: do it with timeout
             pipeline_runner_run.logs(db=mldb)
             pipeline_runner_run.refresh()
             run._state = mlrun.common.runtimes.constants.RunStates.run_state_to_pipeline_run_status(
                 pipeline_runner_run.status.state
             )
             run._exc = pipeline_runner_run.status.error
+            return _LocalRunner.get_run_status(
+                project,
+                run,
+                timeout,
+                expected_statuses,
+                notifiers=notifiers,
+            )
         else:
             raise mlrun.errors.MLRunInvalidArgumentError(
@@ -1075,6 +1078,13 @@ def load_and_run(
     if load_only:
         return
+    # extract "start" notification if exists
+    start_notifications = [
+        notification
+        for notification in context.get_notifications(unmask_secret_params=True)
+        if "running" in notification.when
+    ]
     workflow_log_message = workflow_name or workflow_path
     context.logger.info(f"Running workflow {workflow_log_message} from remote")
     run = project.run(
@@ -1090,6 +1100,7 @@ def load_and_run(
         cleanup_ttl=cleanup_ttl,
         engine=engine,
         local=local,
+        notifications=start_notifications,
     )
     context.log_result(key="workflow_id", value=run.run_id)
     context.log_result(key="engine", value=run._engine.engine, commit=True)

mlrun 1.7.0rc28__py3-none-any.whl → 1.7.0rc55__py3-none-any.whl

Potentially problematic release.

mlrun 1.7.0rc28py3-none-any.whl → 1.7.0rc55py3-none-any.whl