mlrun 1.6.4rc7__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of mlrun might be problematic. Click here for more details.
- mlrun/__init__.py +11 -1
- mlrun/__main__.py +40 -122
- mlrun/alerts/__init__.py +15 -0
- mlrun/alerts/alert.py +248 -0
- mlrun/api/schemas/__init__.py +5 -4
- mlrun/artifacts/__init__.py +8 -3
- mlrun/artifacts/base.py +47 -257
- mlrun/artifacts/dataset.py +11 -192
- mlrun/artifacts/manager.py +79 -47
- mlrun/artifacts/model.py +31 -159
- mlrun/artifacts/plots.py +23 -380
- mlrun/common/constants.py +74 -1
- mlrun/common/db/sql_session.py +5 -5
- mlrun/common/formatters/__init__.py +21 -0
- mlrun/common/formatters/artifact.py +45 -0
- mlrun/common/formatters/base.py +113 -0
- mlrun/common/formatters/feature_set.py +33 -0
- mlrun/common/formatters/function.py +46 -0
- mlrun/common/formatters/pipeline.py +53 -0
- mlrun/common/formatters/project.py +51 -0
- mlrun/common/formatters/run.py +29 -0
- mlrun/common/helpers.py +12 -3
- mlrun/common/model_monitoring/helpers.py +9 -5
- mlrun/{runtimes → common/runtimes}/constants.py +37 -9
- mlrun/common/schemas/__init__.py +31 -5
- mlrun/common/schemas/alert.py +202 -0
- mlrun/common/schemas/api_gateway.py +196 -0
- mlrun/common/schemas/artifact.py +25 -4
- mlrun/common/schemas/auth.py +16 -5
- mlrun/common/schemas/background_task.py +1 -1
- mlrun/common/schemas/client_spec.py +4 -2
- mlrun/common/schemas/common.py +7 -4
- mlrun/common/schemas/constants.py +3 -0
- mlrun/common/schemas/feature_store.py +74 -44
- mlrun/common/schemas/frontend_spec.py +15 -7
- mlrun/common/schemas/function.py +12 -1
- mlrun/common/schemas/hub.py +11 -18
- mlrun/common/schemas/memory_reports.py +2 -2
- mlrun/common/schemas/model_monitoring/__init__.py +20 -4
- mlrun/common/schemas/model_monitoring/constants.py +123 -42
- mlrun/common/schemas/model_monitoring/grafana.py +13 -9
- mlrun/common/schemas/model_monitoring/model_endpoints.py +101 -54
- mlrun/common/schemas/notification.py +71 -14
- mlrun/common/schemas/object.py +2 -2
- mlrun/{model_monitoring/controller_handler.py → common/schemas/pagination.py} +9 -12
- mlrun/common/schemas/pipeline.py +8 -1
- mlrun/common/schemas/project.py +69 -18
- mlrun/common/schemas/runs.py +7 -1
- mlrun/common/schemas/runtime_resource.py +8 -12
- mlrun/common/schemas/schedule.py +4 -4
- mlrun/common/schemas/tag.py +1 -2
- mlrun/common/schemas/workflow.py +12 -4
- mlrun/common/types.py +14 -1
- mlrun/config.py +154 -69
- mlrun/data_types/data_types.py +6 -1
- mlrun/data_types/spark.py +2 -2
- mlrun/data_types/to_pandas.py +67 -37
- mlrun/datastore/__init__.py +6 -8
- mlrun/datastore/alibaba_oss.py +131 -0
- mlrun/datastore/azure_blob.py +143 -42
- mlrun/datastore/base.py +102 -58
- mlrun/datastore/datastore.py +34 -13
- mlrun/datastore/datastore_profile.py +146 -20
- mlrun/datastore/dbfs_store.py +3 -7
- mlrun/datastore/filestore.py +1 -4
- mlrun/datastore/google_cloud_storage.py +97 -33
- mlrun/datastore/hdfs.py +56 -0
- mlrun/datastore/inmem.py +6 -3
- mlrun/datastore/redis.py +7 -2
- mlrun/datastore/s3.py +34 -12
- mlrun/datastore/snowflake_utils.py +45 -0
- mlrun/datastore/sources.py +303 -111
- mlrun/datastore/spark_utils.py +31 -2
- mlrun/datastore/store_resources.py +9 -7
- mlrun/datastore/storeytargets.py +151 -0
- mlrun/datastore/targets.py +453 -176
- mlrun/datastore/utils.py +72 -58
- mlrun/datastore/v3io.py +6 -1
- mlrun/db/base.py +274 -41
- mlrun/db/factory.py +1 -1
- mlrun/db/httpdb.py +893 -225
- mlrun/db/nopdb.py +291 -33
- mlrun/errors.py +36 -6
- mlrun/execution.py +115 -42
- mlrun/feature_store/__init__.py +0 -2
- mlrun/feature_store/api.py +65 -73
- mlrun/feature_store/common.py +7 -12
- mlrun/feature_store/feature_set.py +76 -55
- mlrun/feature_store/feature_vector.py +39 -31
- mlrun/feature_store/ingestion.py +7 -6
- mlrun/feature_store/retrieval/base.py +16 -11
- mlrun/feature_store/retrieval/dask_merger.py +2 -0
- mlrun/feature_store/retrieval/job.py +13 -4
- mlrun/feature_store/retrieval/local_merger.py +2 -0
- mlrun/feature_store/retrieval/spark_merger.py +24 -32
- mlrun/feature_store/steps.py +45 -34
- mlrun/features.py +11 -21
- mlrun/frameworks/_common/artifacts_library.py +9 -9
- mlrun/frameworks/_common/mlrun_interface.py +5 -5
- mlrun/frameworks/_common/model_handler.py +48 -48
- mlrun/frameworks/_common/plan.py +5 -6
- mlrun/frameworks/_common/producer.py +3 -4
- mlrun/frameworks/_common/utils.py +5 -5
- mlrun/frameworks/_dl_common/loggers/logger.py +6 -7
- mlrun/frameworks/_dl_common/loggers/mlrun_logger.py +9 -9
- mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +23 -47
- mlrun/frameworks/_ml_common/artifacts_library.py +1 -2
- mlrun/frameworks/_ml_common/loggers/logger.py +3 -4
- mlrun/frameworks/_ml_common/loggers/mlrun_logger.py +4 -5
- mlrun/frameworks/_ml_common/model_handler.py +24 -24
- mlrun/frameworks/_ml_common/pkl_model_server.py +2 -2
- mlrun/frameworks/_ml_common/plan.py +2 -2
- mlrun/frameworks/_ml_common/plans/calibration_curve_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/confusion_matrix_plan.py +2 -3
- mlrun/frameworks/_ml_common/plans/dataset_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/feature_importance_plan.py +3 -3
- mlrun/frameworks/_ml_common/plans/roc_curve_plan.py +4 -4
- mlrun/frameworks/_ml_common/utils.py +4 -4
- mlrun/frameworks/auto_mlrun/auto_mlrun.py +9 -9
- mlrun/frameworks/huggingface/model_server.py +4 -4
- mlrun/frameworks/lgbm/__init__.py +33 -33
- mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
- mlrun/frameworks/lgbm/callbacks/logging_callback.py +4 -5
- mlrun/frameworks/lgbm/callbacks/mlrun_logging_callback.py +4 -5
- mlrun/frameworks/lgbm/mlrun_interfaces/booster_mlrun_interface.py +1 -3
- mlrun/frameworks/lgbm/mlrun_interfaces/mlrun_interface.py +6 -6
- mlrun/frameworks/lgbm/model_handler.py +10 -10
- mlrun/frameworks/lgbm/model_server.py +6 -6
- mlrun/frameworks/lgbm/utils.py +5 -5
- mlrun/frameworks/onnx/dataset.py +8 -8
- mlrun/frameworks/onnx/mlrun_interface.py +3 -3
- mlrun/frameworks/onnx/model_handler.py +6 -6
- mlrun/frameworks/onnx/model_server.py +7 -7
- mlrun/frameworks/parallel_coordinates.py +6 -6
- mlrun/frameworks/pytorch/__init__.py +18 -18
- mlrun/frameworks/pytorch/callbacks/callback.py +4 -5
- mlrun/frameworks/pytorch/callbacks/logging_callback.py +17 -17
- mlrun/frameworks/pytorch/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/pytorch/callbacks/tensorboard_logging_callback.py +23 -29
- mlrun/frameworks/pytorch/callbacks_handler.py +38 -38
- mlrun/frameworks/pytorch/mlrun_interface.py +20 -20
- mlrun/frameworks/pytorch/model_handler.py +17 -17
- mlrun/frameworks/pytorch/model_server.py +7 -7
- mlrun/frameworks/sklearn/__init__.py +13 -13
- mlrun/frameworks/sklearn/estimator.py +4 -4
- mlrun/frameworks/sklearn/metrics_library.py +14 -14
- mlrun/frameworks/sklearn/mlrun_interface.py +16 -9
- mlrun/frameworks/sklearn/model_handler.py +2 -2
- mlrun/frameworks/tf_keras/__init__.py +10 -7
- mlrun/frameworks/tf_keras/callbacks/logging_callback.py +15 -15
- mlrun/frameworks/tf_keras/callbacks/mlrun_logging_callback.py +11 -11
- mlrun/frameworks/tf_keras/callbacks/tensorboard_logging_callback.py +19 -23
- mlrun/frameworks/tf_keras/mlrun_interface.py +9 -11
- mlrun/frameworks/tf_keras/model_handler.py +14 -14
- mlrun/frameworks/tf_keras/model_server.py +6 -6
- mlrun/frameworks/xgboost/__init__.py +13 -13
- mlrun/frameworks/xgboost/model_handler.py +6 -6
- mlrun/k8s_utils.py +61 -17
- mlrun/launcher/__init__.py +1 -1
- mlrun/launcher/base.py +16 -15
- mlrun/launcher/client.py +13 -11
- mlrun/launcher/factory.py +1 -1
- mlrun/launcher/local.py +23 -13
- mlrun/launcher/remote.py +17 -10
- mlrun/lists.py +7 -6
- mlrun/model.py +478 -103
- mlrun/model_monitoring/__init__.py +1 -1
- mlrun/model_monitoring/api.py +163 -371
- mlrun/{runtimes/mpijob/v1alpha1.py → model_monitoring/applications/__init__.py} +9 -15
- mlrun/model_monitoring/applications/_application_steps.py +188 -0
- mlrun/model_monitoring/applications/base.py +108 -0
- mlrun/model_monitoring/applications/context.py +341 -0
- mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
- mlrun/model_monitoring/applications/histogram_data_drift.py +354 -0
- mlrun/model_monitoring/applications/results.py +99 -0
- mlrun/model_monitoring/controller.py +131 -278
- mlrun/model_monitoring/db/__init__.py +18 -0
- mlrun/model_monitoring/db/stores/__init__.py +136 -0
- mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
- mlrun/model_monitoring/db/stores/base/store.py +213 -0
- mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
- mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
- mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
- mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
- mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
- mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
- mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
- mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
- mlrun/model_monitoring/db/tsdb/base.py +448 -0
- mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
- mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +279 -0
- mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
- mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +507 -0
- mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
- mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
- mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
- mlrun/model_monitoring/features_drift_table.py +134 -106
- mlrun/model_monitoring/helpers.py +199 -55
- mlrun/model_monitoring/metrics/__init__.py +13 -0
- mlrun/model_monitoring/metrics/histogram_distance.py +127 -0
- mlrun/model_monitoring/model_endpoint.py +3 -2
- mlrun/model_monitoring/stream_processing.py +131 -398
- mlrun/model_monitoring/tracking_policy.py +9 -2
- mlrun/model_monitoring/writer.py +161 -125
- mlrun/package/__init__.py +6 -6
- mlrun/package/context_handler.py +5 -5
- mlrun/package/packager.py +7 -7
- mlrun/package/packagers/default_packager.py +8 -8
- mlrun/package/packagers/numpy_packagers.py +15 -15
- mlrun/package/packagers/pandas_packagers.py +5 -5
- mlrun/package/packagers/python_standard_library_packagers.py +10 -10
- mlrun/package/packagers_manager.py +19 -23
- mlrun/package/utils/_formatter.py +6 -6
- mlrun/package/utils/_pickler.py +2 -2
- mlrun/package/utils/_supported_format.py +4 -4
- mlrun/package/utils/log_hint_utils.py +2 -2
- mlrun/package/utils/type_hint_utils.py +4 -9
- mlrun/platforms/__init__.py +11 -10
- mlrun/platforms/iguazio.py +24 -203
- mlrun/projects/operations.py +52 -25
- mlrun/projects/pipelines.py +191 -197
- mlrun/projects/project.py +1227 -400
- mlrun/render.py +16 -19
- mlrun/run.py +209 -184
- mlrun/runtimes/__init__.py +83 -15
- mlrun/runtimes/base.py +51 -35
- mlrun/runtimes/daskjob.py +17 -10
- mlrun/runtimes/databricks_job/databricks_cancel_task.py +1 -1
- mlrun/runtimes/databricks_job/databricks_runtime.py +8 -7
- mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
- mlrun/runtimes/funcdoc.py +1 -29
- mlrun/runtimes/function_reference.py +1 -1
- mlrun/runtimes/kubejob.py +34 -128
- mlrun/runtimes/local.py +40 -11
- mlrun/runtimes/mpijob/__init__.py +0 -20
- mlrun/runtimes/mpijob/abstract.py +9 -10
- mlrun/runtimes/mpijob/v1.py +1 -1
- mlrun/{model_monitoring/stores/models/sqlite.py → runtimes/nuclio/__init__.py} +7 -9
- mlrun/runtimes/nuclio/api_gateway.py +769 -0
- mlrun/runtimes/nuclio/application/__init__.py +15 -0
- mlrun/runtimes/nuclio/application/application.py +758 -0
- mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
- mlrun/runtimes/{function.py → nuclio/function.py} +200 -83
- mlrun/runtimes/{nuclio.py → nuclio/nuclio.py} +6 -6
- mlrun/runtimes/{serving.py → nuclio/serving.py} +65 -68
- mlrun/runtimes/pod.py +281 -101
- mlrun/runtimes/remotesparkjob.py +12 -9
- mlrun/runtimes/sparkjob/spark3job.py +67 -51
- mlrun/runtimes/utils.py +41 -75
- mlrun/secrets.py +9 -5
- mlrun/serving/__init__.py +8 -1
- mlrun/serving/remote.py +2 -7
- mlrun/serving/routers.py +85 -69
- mlrun/serving/server.py +69 -44
- mlrun/serving/states.py +209 -36
- mlrun/serving/utils.py +22 -14
- mlrun/serving/v1_serving.py +6 -7
- mlrun/serving/v2_serving.py +129 -54
- mlrun/track/tracker.py +2 -1
- mlrun/track/tracker_manager.py +3 -3
- mlrun/track/trackers/mlflow_tracker.py +6 -2
- mlrun/utils/async_http.py +6 -8
- mlrun/utils/azure_vault.py +1 -1
- mlrun/utils/clones.py +1 -2
- mlrun/utils/condition_evaluator.py +3 -3
- mlrun/utils/db.py +21 -3
- mlrun/utils/helpers.py +405 -225
- mlrun/utils/http.py +3 -6
- mlrun/utils/logger.py +112 -16
- mlrun/utils/notifications/notification/__init__.py +17 -13
- mlrun/utils/notifications/notification/base.py +50 -2
- mlrun/utils/notifications/notification/console.py +2 -0
- mlrun/utils/notifications/notification/git.py +24 -1
- mlrun/utils/notifications/notification/ipython.py +3 -1
- mlrun/utils/notifications/notification/slack.py +96 -21
- mlrun/utils/notifications/notification/webhook.py +59 -2
- mlrun/utils/notifications/notification_pusher.py +149 -30
- mlrun/utils/regex.py +9 -0
- mlrun/utils/retryer.py +208 -0
- mlrun/utils/singleton.py +1 -1
- mlrun/utils/v3io_clients.py +4 -6
- mlrun/utils/version/version.json +2 -2
- mlrun/utils/version/version.py +2 -6
- mlrun-1.7.0.dist-info/METADATA +378 -0
- mlrun-1.7.0.dist-info/RECORD +351 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/WHEEL +1 -1
- mlrun/feature_store/retrieval/conversion.py +0 -273
- mlrun/kfpops.py +0 -868
- mlrun/model_monitoring/application.py +0 -310
- mlrun/model_monitoring/batch.py +0 -1095
- mlrun/model_monitoring/prometheus.py +0 -219
- mlrun/model_monitoring/stores/__init__.py +0 -111
- mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -576
- mlrun/model_monitoring/stores/model_endpoint_store.py +0 -147
- mlrun/model_monitoring/stores/models/__init__.py +0 -27
- mlrun/model_monitoring/stores/models/base.py +0 -84
- mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -384
- mlrun/platforms/other.py +0 -306
- mlrun-1.6.4rc7.dist-info/METADATA +0 -272
- mlrun-1.6.4rc7.dist-info/RECORD +0 -314
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/LICENSE +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/entry_points.txt +0 -0
- {mlrun-1.6.4rc7.dist-info → mlrun-1.7.0.dist-info}/top_level.txt +0 -0
mlrun/projects/pipelines.py
CHANGED
|
@@ -20,18 +20,20 @@ import tempfile
|
|
|
20
20
|
import typing
|
|
21
21
|
import uuid
|
|
22
22
|
|
|
23
|
-
import
|
|
24
|
-
|
|
25
|
-
|
|
23
|
+
import mlrun_pipelines.common.models
|
|
24
|
+
import mlrun_pipelines.patcher
|
|
25
|
+
import mlrun_pipelines.utils
|
|
26
26
|
|
|
27
27
|
import mlrun
|
|
28
|
+
import mlrun.common.runtimes.constants
|
|
28
29
|
import mlrun.common.schemas
|
|
30
|
+
import mlrun.common.schemas.function
|
|
31
|
+
import mlrun.common.schemas.workflow
|
|
29
32
|
import mlrun.utils.notifications
|
|
30
33
|
from mlrun.errors import err_to_str
|
|
31
34
|
from mlrun.utils import (
|
|
32
35
|
get_ui_url,
|
|
33
36
|
logger,
|
|
34
|
-
new_pipe_metadata,
|
|
35
37
|
normalize_workflow_name,
|
|
36
38
|
retry_until_successful,
|
|
37
39
|
)
|
|
@@ -44,21 +46,21 @@ from ..runtimes.pod import AutoMountType
|
|
|
44
46
|
|
|
45
47
|
def get_workflow_engine(engine_kind, local=False):
|
|
46
48
|
if pipeline_context.is_run_local(local):
|
|
47
|
-
if engine_kind ==
|
|
49
|
+
if engine_kind == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
48
50
|
logger.warning(
|
|
49
51
|
"Running kubeflow pipeline locally, note some ops may not run locally!"
|
|
50
52
|
)
|
|
51
|
-
elif engine_kind ==
|
|
53
|
+
elif engine_kind == mlrun.common.schemas.workflow.EngineType.REMOTE:
|
|
52
54
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
53
55
|
"Cannot run a remote pipeline locally using `kind='remote'` and `local=True`. "
|
|
54
56
|
"in order to run a local pipeline remotely, please use `engine='remote:local'` instead"
|
|
55
57
|
)
|
|
56
58
|
return _LocalRunner
|
|
57
|
-
if not engine_kind or engine_kind ==
|
|
59
|
+
if not engine_kind or engine_kind == mlrun.common.schemas.workflow.EngineType.KFP:
|
|
58
60
|
return _KFPRunner
|
|
59
|
-
if engine_kind ==
|
|
61
|
+
if engine_kind == mlrun.common.schemas.workflow.EngineType.LOCAL:
|
|
60
62
|
return _LocalRunner
|
|
61
|
-
if engine_kind ==
|
|
63
|
+
if engine_kind == mlrun.common.schemas.workflow.EngineType.REMOTE:
|
|
62
64
|
return _RemoteRunner
|
|
63
65
|
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
64
66
|
f"Provided workflow engine is not supported. engine_kind={engine_kind}"
|
|
@@ -80,6 +82,7 @@ class WorkflowSpec(mlrun.model.ModelObj):
|
|
|
80
82
|
schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
|
|
81
83
|
cleanup_ttl: typing.Optional[int] = None,
|
|
82
84
|
image: typing.Optional[str] = None,
|
|
85
|
+
workflow_runner_node_selector: typing.Optional[dict[str, str]] = None,
|
|
83
86
|
):
|
|
84
87
|
self.engine = engine
|
|
85
88
|
self.code = code
|
|
@@ -93,6 +96,7 @@ class WorkflowSpec(mlrun.model.ModelObj):
|
|
|
93
96
|
self._tmp_path = None
|
|
94
97
|
self.schedule = schedule
|
|
95
98
|
self.image = image
|
|
99
|
+
self.workflow_runner_node_selector = workflow_runner_node_selector
|
|
96
100
|
|
|
97
101
|
def get_source_file(self, context=""):
|
|
98
102
|
if not self.code and not self.path:
|
|
@@ -219,9 +223,10 @@ class _PipelineContext:
|
|
|
219
223
|
force_run_local = mlrun.mlconf.force_run_local
|
|
220
224
|
if force_run_local is None or force_run_local == "auto":
|
|
221
225
|
force_run_local = not mlrun.mlconf.is_api_running_on_k8s()
|
|
222
|
-
|
|
223
|
-
|
|
226
|
+
if not mlrun.mlconf.kfp_url:
|
|
227
|
+
logger.debug("Kubeflow pipeline URL is not set, running locally")
|
|
224
228
|
force_run_local = True
|
|
229
|
+
|
|
225
230
|
if self.workflow:
|
|
226
231
|
force_run_local = force_run_local or self.workflow.run_local
|
|
227
232
|
|
|
@@ -301,72 +306,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
|
|
|
301
306
|
}
|
|
302
307
|
|
|
303
308
|
|
|
304
|
-
# When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
|
|
305
|
-
# converts it to a k8s object. As part of the flow in the Compile.compile() method,
|
|
306
|
-
# we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
|
|
307
|
-
# Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
|
|
308
|
-
# I ran across the following problem when seeking for a method to set the priority_class_name:
|
|
309
|
-
# https://github.com/kubeflow/pipelines/issues/3594
|
|
310
|
-
# When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
|
|
311
|
-
# to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
|
|
312
|
-
# https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
|
|
313
|
-
def _create_enriched_mlrun_workflow(
|
|
314
|
-
self,
|
|
315
|
-
pipeline_func: typing.Callable,
|
|
316
|
-
pipeline_name: typing.Optional[typing.Text] = None,
|
|
317
|
-
pipeline_description: typing.Optional[typing.Text] = None,
|
|
318
|
-
params_list: typing.Optional[typing.List[dsl.PipelineParam]] = None,
|
|
319
|
-
pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
|
|
320
|
-
):
|
|
321
|
-
"""Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
|
|
322
|
-
workflow = self._original_create_workflow(
|
|
323
|
-
pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
|
|
324
|
-
)
|
|
325
|
-
# We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
|
|
326
|
-
# that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
|
|
327
|
-
# we know can be raised.
|
|
328
|
-
try:
|
|
329
|
-
functions = []
|
|
330
|
-
if pipeline_context.functions:
|
|
331
|
-
try:
|
|
332
|
-
functions = pipeline_context.functions.values()
|
|
333
|
-
except Exception as err:
|
|
334
|
-
logger.debug(
|
|
335
|
-
"Unable to retrieve project functions, not enriching workflow with mlrun",
|
|
336
|
-
error=err_to_str(err),
|
|
337
|
-
)
|
|
338
|
-
return workflow
|
|
339
|
-
|
|
340
|
-
# enrich each pipeline step with your desire k8s attribute
|
|
341
|
-
for kfp_step_template in workflow["spec"]["templates"]:
|
|
342
|
-
if kfp_step_template.get("container"):
|
|
343
|
-
for function_obj in functions:
|
|
344
|
-
# we condition within each function since the comparison between the function and
|
|
345
|
-
# the kfp pod may change depending on the attribute type.
|
|
346
|
-
_set_function_attribute_on_kfp_pod(
|
|
347
|
-
kfp_step_template,
|
|
348
|
-
function_obj,
|
|
349
|
-
"PriorityClassName",
|
|
350
|
-
"priority_class_name",
|
|
351
|
-
)
|
|
352
|
-
_enrich_kfp_pod_security_context(
|
|
353
|
-
kfp_step_template,
|
|
354
|
-
function_obj,
|
|
355
|
-
)
|
|
356
|
-
except mlrun.errors.MLRunInvalidArgumentError:
|
|
357
|
-
raise
|
|
358
|
-
except Exception as err:
|
|
359
|
-
logger.debug(
|
|
360
|
-
"Something in the enrichment of kfp pods failed", error=err_to_str(err)
|
|
361
|
-
)
|
|
362
|
-
return workflow
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
# patching function as class method
|
|
366
|
-
kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
|
|
367
|
-
kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
|
|
368
|
-
|
|
369
|
-
|
|
370
309
|
def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
|
|
371
310
|
project_instance, name, tag, hash_key = parse_versioned_object_uri(
|
|
372
311
|
key, project.metadata.name
|
|
@@ -376,7 +315,11 @@ def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
|
|
|
376
315
|
|
|
377
316
|
|
|
378
317
|
def enrich_function_object(
|
|
379
|
-
project
|
|
318
|
+
project: mlrun.common.schemas.Project,
|
|
319
|
+
function: mlrun.runtimes.BaseRuntime,
|
|
320
|
+
decorator: typing.Callable = None,
|
|
321
|
+
copy_function: bool = True,
|
|
322
|
+
try_auto_mount: bool = True,
|
|
380
323
|
) -> mlrun.runtimes.BaseRuntime:
|
|
381
324
|
if hasattr(function, "_enriched"):
|
|
382
325
|
return function
|
|
@@ -413,6 +356,10 @@ def enrich_function_object(
|
|
|
413
356
|
if decorator:
|
|
414
357
|
decorator(f)
|
|
415
358
|
|
|
359
|
+
if project.spec.default_function_node_selector:
|
|
360
|
+
f.enrich_runtime_spec(
|
|
361
|
+
project.spec.default_function_node_selector,
|
|
362
|
+
)
|
|
416
363
|
if try_auto_mount:
|
|
417
364
|
if (
|
|
418
365
|
decorator and AutoMountType.is_auto_modifier(decorator)
|
|
@@ -429,10 +376,10 @@ class _PipelineRunStatus:
|
|
|
429
376
|
def __init__(
|
|
430
377
|
self,
|
|
431
378
|
run_id: str,
|
|
432
|
-
engine:
|
|
379
|
+
engine: type["_PipelineRunner"],
|
|
433
380
|
project: "mlrun.projects.MlrunProject",
|
|
434
381
|
workflow: WorkflowSpec = None,
|
|
435
|
-
state:
|
|
382
|
+
state: mlrun_pipelines.common.models.RunStatuses = "",
|
|
436
383
|
exc: Exception = None,
|
|
437
384
|
):
|
|
438
385
|
"""
|
|
@@ -452,7 +399,10 @@ class _PipelineRunStatus:
|
|
|
452
399
|
|
|
453
400
|
@property
|
|
454
401
|
def state(self):
|
|
455
|
-
if
|
|
402
|
+
if (
|
|
403
|
+
self._state
|
|
404
|
+
not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
|
|
405
|
+
):
|
|
456
406
|
self._state = self._engine.get_state(self.run_id, self.project)
|
|
457
407
|
return self._state
|
|
458
408
|
|
|
@@ -461,12 +411,15 @@ class _PipelineRunStatus:
|
|
|
461
411
|
return self._exc
|
|
462
412
|
|
|
463
413
|
def wait_for_completion(self, timeout=None, expected_statuses=None):
|
|
464
|
-
|
|
465
|
-
self
|
|
414
|
+
returned_state = self._engine.wait_for_completion(
|
|
415
|
+
self,
|
|
466
416
|
project=self.project,
|
|
467
417
|
timeout=timeout,
|
|
468
418
|
expected_statuses=expected_statuses,
|
|
469
419
|
)
|
|
420
|
+
# TODO: returning a state is optional until all runners implement wait_for_completion
|
|
421
|
+
if returned_state:
|
|
422
|
+
self._state = returned_state
|
|
470
423
|
return self._state
|
|
471
424
|
|
|
472
425
|
def __str__(self):
|
|
@@ -500,13 +453,18 @@ class _PipelineRunner(abc.ABC):
|
|
|
500
453
|
artifact_path=None,
|
|
501
454
|
namespace=None,
|
|
502
455
|
source=None,
|
|
503
|
-
notifications:
|
|
456
|
+
notifications: list[mlrun.model.Notification] = None,
|
|
504
457
|
) -> _PipelineRunStatus:
|
|
505
458
|
pass
|
|
506
459
|
|
|
507
460
|
@staticmethod
|
|
508
461
|
@abc.abstractmethod
|
|
509
|
-
def wait_for_completion(
|
|
462
|
+
def wait_for_completion(
|
|
463
|
+
run: "_PipelineRunStatus",
|
|
464
|
+
project: typing.Optional["mlrun.projects.MlrunProject"] = None,
|
|
465
|
+
timeout: typing.Optional[int] = None,
|
|
466
|
+
expected_statuses: list[str] = None,
|
|
467
|
+
):
|
|
510
468
|
pass
|
|
511
469
|
|
|
512
470
|
@staticmethod
|
|
@@ -514,10 +472,52 @@ class _PipelineRunner(abc.ABC):
|
|
|
514
472
|
def get_state(run_id, project=None):
|
|
515
473
|
pass
|
|
516
474
|
|
|
475
|
+
@staticmethod
|
|
476
|
+
def get_run_status(
|
|
477
|
+
project,
|
|
478
|
+
run: _PipelineRunStatus,
|
|
479
|
+
timeout=None,
|
|
480
|
+
expected_statuses=None,
|
|
481
|
+
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
482
|
+
**kwargs,
|
|
483
|
+
):
|
|
484
|
+
timeout = timeout or 60 * 60
|
|
485
|
+
raise_error = None
|
|
486
|
+
state = ""
|
|
487
|
+
try:
|
|
488
|
+
if timeout:
|
|
489
|
+
state = run.wait_for_completion(
|
|
490
|
+
timeout=timeout, expected_statuses=expected_statuses
|
|
491
|
+
)
|
|
492
|
+
except RuntimeError as exc:
|
|
493
|
+
# push runs table also when we have errors
|
|
494
|
+
raise_error = exc
|
|
495
|
+
|
|
496
|
+
mldb = mlrun.db.get_run_db(secrets=project._secrets)
|
|
497
|
+
runs = mldb.list_runs(project=project.name, labels=f"workflow={run.run_id}")
|
|
498
|
+
|
|
499
|
+
# TODO: The below section duplicates notifiers.push_pipeline_run_results() logic. We should use it instead.
|
|
500
|
+
errors_counter = 0
|
|
501
|
+
for r in runs:
|
|
502
|
+
if r["status"].get("state", "") == "error":
|
|
503
|
+
errors_counter += 1
|
|
504
|
+
|
|
505
|
+
text = _PipelineRunner._generate_workflow_finished_message(
|
|
506
|
+
run.run_id, errors_counter, run._state
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
notifiers = notifiers or project.notifiers
|
|
510
|
+
if notifiers:
|
|
511
|
+
notifiers.push(text, "info", runs)
|
|
512
|
+
|
|
513
|
+
if raise_error:
|
|
514
|
+
raise raise_error
|
|
515
|
+
return state or run._state, errors_counter, text
|
|
516
|
+
|
|
517
517
|
@staticmethod
|
|
518
518
|
def _get_handler(workflow_handler, workflow_spec, project, secrets):
|
|
519
519
|
if not (workflow_handler and callable(workflow_handler)):
|
|
520
|
-
workflow_file = workflow_spec.get_source_file(project.spec.
|
|
520
|
+
workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
|
|
521
521
|
workflow_handler = create_pipeline(
|
|
522
522
|
project,
|
|
523
523
|
workflow_file,
|
|
@@ -530,15 +530,13 @@ class _PipelineRunner(abc.ABC):
|
|
|
530
530
|
return workflow_handler
|
|
531
531
|
|
|
532
532
|
@staticmethod
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
):
|
|
541
|
-
pass
|
|
533
|
+
def _generate_workflow_finished_message(run_id, errors_counter, state):
|
|
534
|
+
text = f"Workflow {run_id} finished"
|
|
535
|
+
if errors_counter:
|
|
536
|
+
text += f" with {errors_counter} errors"
|
|
537
|
+
if state:
|
|
538
|
+
text += f", state={state}"
|
|
539
|
+
return text
|
|
542
540
|
|
|
543
541
|
|
|
544
542
|
class _KFPRunner(_PipelineRunner):
|
|
@@ -549,7 +547,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
549
547
|
@classmethod
|
|
550
548
|
def save(cls, project, workflow_spec: WorkflowSpec, target, artifact_path=None):
|
|
551
549
|
pipeline_context.set(project, workflow_spec)
|
|
552
|
-
workflow_file = workflow_spec.get_source_file(project.spec.
|
|
550
|
+
workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
|
|
553
551
|
functions = FunctionsDict(project)
|
|
554
552
|
pipeline = create_pipeline(
|
|
555
553
|
project,
|
|
@@ -557,13 +555,14 @@ class _KFPRunner(_PipelineRunner):
|
|
|
557
555
|
functions,
|
|
558
556
|
secrets=project._secrets,
|
|
559
557
|
)
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
conf = new_pipe_metadata(
|
|
563
|
-
artifact_path=artifact_path,
|
|
558
|
+
mlrun_pipelines.utils.compile_pipeline(
|
|
559
|
+
artifact_path=artifact_path or project.spec.artifact_path,
|
|
564
560
|
cleanup_ttl=workflow_spec.cleanup_ttl,
|
|
561
|
+
ops=None,
|
|
562
|
+
pipeline=pipeline,
|
|
563
|
+
pipe_file=target,
|
|
564
|
+
type_check=True,
|
|
565
565
|
)
|
|
566
|
-
compiler.Compiler().compile(pipeline, target, pipeline_conf=conf)
|
|
567
566
|
workflow_spec.clear_tmp()
|
|
568
567
|
pipeline_context.clear()
|
|
569
568
|
|
|
@@ -578,7 +577,7 @@ class _KFPRunner(_PipelineRunner):
|
|
|
578
577
|
artifact_path=None,
|
|
579
578
|
namespace=None,
|
|
580
579
|
source=None,
|
|
581
|
-
notifications:
|
|
580
|
+
notifications: list[mlrun.model.Notification] = None,
|
|
582
581
|
) -> _PipelineRunStatus:
|
|
583
582
|
pipeline_context.set(project, workflow_spec)
|
|
584
583
|
workflow_handler = _PipelineRunner._get_handler(
|
|
@@ -594,12 +593,13 @@ class _KFPRunner(_PipelineRunner):
|
|
|
594
593
|
logger.warning(
|
|
595
594
|
"Setting notifications on kfp pipeline runner uses old notification behavior. "
|
|
596
595
|
"Notifications will only be sent if you wait for pipeline completion. "
|
|
597
|
-
"
|
|
596
|
+
"Some of the features (like setting message or severity level) are not supported."
|
|
598
597
|
)
|
|
599
|
-
for notification
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
)
|
|
598
|
+
# for start message, fallback to old notification behavior
|
|
599
|
+
for notification in notifications or []:
|
|
600
|
+
params = notification.params
|
|
601
|
+
params.update(notification.secret_params)
|
|
602
|
+
project.notifiers.add_notification(notification.kind, params)
|
|
603
603
|
|
|
604
604
|
run_id = _run_pipeline(
|
|
605
605
|
workflow_handler,
|
|
@@ -627,7 +627,6 @@ class _KFPRunner(_PipelineRunner):
|
|
|
627
627
|
func_name=func.metadata.name,
|
|
628
628
|
exc_info=err_to_str(exc),
|
|
629
629
|
)
|
|
630
|
-
|
|
631
630
|
project.notifiers.push_pipeline_start_message(
|
|
632
631
|
project.metadata.name,
|
|
633
632
|
project.get_param("commit_id", None),
|
|
@@ -638,12 +637,21 @@ class _KFPRunner(_PipelineRunner):
|
|
|
638
637
|
return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
|
|
639
638
|
|
|
640
639
|
@staticmethod
|
|
641
|
-
def wait_for_completion(
|
|
642
|
-
|
|
643
|
-
|
|
640
|
+
def wait_for_completion(
|
|
641
|
+
run: "_PipelineRunStatus",
|
|
642
|
+
project: typing.Optional["mlrun.projects.MlrunProject"] = None,
|
|
643
|
+
timeout: typing.Optional[int] = None,
|
|
644
|
+
expected_statuses: list[str] = None,
|
|
645
|
+
):
|
|
644
646
|
project_name = project.metadata.name if project else ""
|
|
647
|
+
logger.info(
|
|
648
|
+
"Waiting for pipeline run completion",
|
|
649
|
+
run_id=run.run_id,
|
|
650
|
+
project=project_name,
|
|
651
|
+
)
|
|
652
|
+
timeout = timeout or 60 * 60
|
|
645
653
|
run_info = wait_for_pipeline_completion(
|
|
646
|
-
run_id,
|
|
654
|
+
run.run_id,
|
|
647
655
|
timeout=timeout,
|
|
648
656
|
expected_statuses=expected_statuses,
|
|
649
657
|
project=project_name,
|
|
@@ -661,50 +669,6 @@ class _KFPRunner(_PipelineRunner):
|
|
|
661
669
|
return resp["run"].get("status", "")
|
|
662
670
|
return ""
|
|
663
671
|
|
|
664
|
-
@staticmethod
|
|
665
|
-
def get_run_status(
|
|
666
|
-
project,
|
|
667
|
-
run,
|
|
668
|
-
timeout=None,
|
|
669
|
-
expected_statuses=None,
|
|
670
|
-
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
671
|
-
):
|
|
672
|
-
if timeout is None:
|
|
673
|
-
timeout = 60 * 60
|
|
674
|
-
state = ""
|
|
675
|
-
raise_error = None
|
|
676
|
-
try:
|
|
677
|
-
if timeout:
|
|
678
|
-
logger.info("Waiting for pipeline run completion")
|
|
679
|
-
state = run.wait_for_completion(
|
|
680
|
-
timeout=timeout, expected_statuses=expected_statuses
|
|
681
|
-
)
|
|
682
|
-
except RuntimeError as exc:
|
|
683
|
-
# push runs table also when we have errors
|
|
684
|
-
raise_error = exc
|
|
685
|
-
|
|
686
|
-
mldb = mlrun.db.get_run_db(secrets=project._secrets)
|
|
687
|
-
runs = mldb.list_runs(project=project.name, labels=f"workflow={run.run_id}")
|
|
688
|
-
|
|
689
|
-
# TODO: The below section duplicates notifiers.push_pipeline_run_results() logic. We should use it instead.
|
|
690
|
-
had_errors = 0
|
|
691
|
-
for r in runs:
|
|
692
|
-
if r["status"].get("state", "") == "error":
|
|
693
|
-
had_errors += 1
|
|
694
|
-
|
|
695
|
-
text = f"Workflow {run.run_id} finished"
|
|
696
|
-
if had_errors:
|
|
697
|
-
text += f" with {had_errors} errors"
|
|
698
|
-
if state:
|
|
699
|
-
text += f", state={state}"
|
|
700
|
-
|
|
701
|
-
notifiers = notifiers or project.notifiers
|
|
702
|
-
notifiers.push(text, "info", runs)
|
|
703
|
-
|
|
704
|
-
if raise_error:
|
|
705
|
-
raise raise_error
|
|
706
|
-
return state, had_errors, text
|
|
707
|
-
|
|
708
672
|
|
|
709
673
|
class _LocalRunner(_PipelineRunner):
|
|
710
674
|
"""local pipelines runner"""
|
|
@@ -722,7 +686,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
722
686
|
artifact_path=None,
|
|
723
687
|
namespace=None,
|
|
724
688
|
source=None,
|
|
725
|
-
notifications:
|
|
689
|
+
notifications: list[mlrun.model.Notification] = None,
|
|
726
690
|
) -> _PipelineRunStatus:
|
|
727
691
|
pipeline_context.set(project, workflow_spec)
|
|
728
692
|
workflow_handler = _PipelineRunner._get_handler(
|
|
@@ -743,13 +707,14 @@ class _LocalRunner(_PipelineRunner):
|
|
|
743
707
|
original_source = project.spec.source
|
|
744
708
|
project.set_source(source=source)
|
|
745
709
|
pipeline_context.workflow_artifact_path = artifact_path
|
|
710
|
+
|
|
746
711
|
project.notifiers.push_pipeline_start_message(
|
|
747
712
|
project.metadata.name, pipeline_id=workflow_id
|
|
748
713
|
)
|
|
749
714
|
err = None
|
|
750
715
|
try:
|
|
751
716
|
workflow_handler(**workflow_spec.args)
|
|
752
|
-
state =
|
|
717
|
+
state = mlrun_pipelines.common.models.RunStatuses.succeeded
|
|
753
718
|
except Exception as exc:
|
|
754
719
|
err = exc
|
|
755
720
|
logger.exception("Workflow run failed")
|
|
@@ -757,7 +722,7 @@ class _LocalRunner(_PipelineRunner):
|
|
|
757
722
|
f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
|
|
758
723
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
759
724
|
)
|
|
760
|
-
state =
|
|
725
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
761
726
|
mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
|
|
762
727
|
project.notifiers.push_pipeline_run_results(
|
|
763
728
|
pipeline_context.runs_map.values(), state=state
|
|
@@ -781,17 +746,10 @@ class _LocalRunner(_PipelineRunner):
|
|
|
781
746
|
return ""
|
|
782
747
|
|
|
783
748
|
@staticmethod
|
|
784
|
-
def wait_for_completion(
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
def get_run_status(
|
|
789
|
-
project,
|
|
790
|
-
run,
|
|
791
|
-
timeout=None,
|
|
792
|
-
expected_statuses=None,
|
|
793
|
-
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
794
|
-
):
|
|
749
|
+
def wait_for_completion(run, project=None, timeout=None, expected_statuses=None):
|
|
750
|
+
# TODO: local runner blocks for the duration of the pipeline.
|
|
751
|
+
# Therefore usually there will be nothing to wait for.
|
|
752
|
+
# However, users may run functions with watch=False and then it can be useful to wait for the runs here.
|
|
795
753
|
pass
|
|
796
754
|
|
|
797
755
|
|
|
@@ -811,15 +769,11 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
811
769
|
artifact_path: str = None,
|
|
812
770
|
namespace: str = None,
|
|
813
771
|
source: str = None,
|
|
814
|
-
notifications:
|
|
772
|
+
notifications: list[mlrun.model.Notification] = None,
|
|
815
773
|
) -> typing.Optional[_PipelineRunStatus]:
|
|
816
774
|
workflow_name = normalize_workflow_name(name=name, project_name=project.name)
|
|
817
775
|
workflow_id = None
|
|
818
776
|
|
|
819
|
-
# for start message, fallback to old notification behavior
|
|
820
|
-
for notification in notifications or []:
|
|
821
|
-
project.notifiers.add_notification(notification.kind, notification.params)
|
|
822
|
-
|
|
823
777
|
# The returned engine for this runner is the engine of the workflow.
|
|
824
778
|
# In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
|
|
825
779
|
inner_engine = get_workflow_engine(workflow_spec.engine)
|
|
@@ -916,12 +870,9 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
916
870
|
f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
|
|
917
871
|
mlrun.common.schemas.NotificationSeverity.ERROR,
|
|
918
872
|
)
|
|
919
|
-
state =
|
|
873
|
+
state = mlrun_pipelines.common.models.RunStatuses.failed
|
|
920
874
|
else:
|
|
921
|
-
state =
|
|
922
|
-
project.notifiers.push_pipeline_start_message(
|
|
923
|
-
project.metadata.name,
|
|
924
|
-
)
|
|
875
|
+
state = mlrun_pipelines.common.models.RunStatuses.running
|
|
925
876
|
pipeline_context.clear()
|
|
926
877
|
return _PipelineRunStatus(
|
|
927
878
|
run_id=workflow_id,
|
|
@@ -935,24 +886,59 @@ class _RemoteRunner(_PipelineRunner):
|
|
|
935
886
|
@staticmethod
|
|
936
887
|
def get_run_status(
|
|
937
888
|
project,
|
|
938
|
-
run,
|
|
889
|
+
run: _PipelineRunStatus,
|
|
939
890
|
timeout=None,
|
|
940
891
|
expected_statuses=None,
|
|
941
892
|
notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
|
|
893
|
+
inner_engine: type[_PipelineRunner] = None,
|
|
942
894
|
):
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
895
|
+
inner_engine = inner_engine or _KFPRunner
|
|
896
|
+
if inner_engine.engine == _KFPRunner.engine:
|
|
897
|
+
# ignore notifiers for remote notifications, as they are handled by the remote pipeline notifications,
|
|
898
|
+
# so overriding with CustomNotificationPusher with empty list of notifiers or only local notifiers
|
|
899
|
+
local_project_notifiers = list(
|
|
900
|
+
set(mlrun.utils.notifications.NotificationTypes.local()).intersection(
|
|
901
|
+
set(project.notifiers.notifications.keys())
|
|
902
|
+
)
|
|
903
|
+
)
|
|
904
|
+
notifiers = mlrun.utils.notifications.CustomNotificationPusher(
|
|
905
|
+
local_project_notifiers
|
|
906
|
+
)
|
|
907
|
+
return _KFPRunner.get_run_status(
|
|
908
|
+
project,
|
|
909
|
+
run,
|
|
910
|
+
timeout,
|
|
911
|
+
expected_statuses,
|
|
912
|
+
notifiers=notifiers,
|
|
913
|
+
)
|
|
914
|
+
|
|
915
|
+
elif inner_engine.engine == _LocalRunner.engine:
|
|
916
|
+
mldb = mlrun.db.get_run_db(secrets=project._secrets)
|
|
917
|
+
pipeline_runner_run = mldb.read_run(run.run_id, project=project.name)
|
|
918
|
+
|
|
919
|
+
pipeline_runner_run = mlrun.run.RunObject.from_dict(pipeline_runner_run)
|
|
952
920
|
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
921
|
+
# here we are waiting for the pipeline run to complete and refreshing after that the pipeline run from the
|
|
922
|
+
# db
|
|
923
|
+
# TODO: do it with timeout
|
|
924
|
+
pipeline_runner_run.logs(db=mldb)
|
|
925
|
+
pipeline_runner_run.refresh()
|
|
926
|
+
run._state = mlrun.common.runtimes.constants.RunStates.run_state_to_pipeline_run_status(
|
|
927
|
+
pipeline_runner_run.status.state
|
|
928
|
+
)
|
|
929
|
+
run._exc = pipeline_runner_run.status.error
|
|
930
|
+
return _LocalRunner.get_run_status(
|
|
931
|
+
project,
|
|
932
|
+
run,
|
|
933
|
+
timeout,
|
|
934
|
+
expected_statuses,
|
|
935
|
+
notifiers=notifiers,
|
|
936
|
+
)
|
|
937
|
+
|
|
938
|
+
else:
|
|
939
|
+
raise mlrun.errors.MLRunInvalidArgumentError(
|
|
940
|
+
f"Unsupported inner runner engine: {inner_engine.engine}"
|
|
941
|
+
)
|
|
956
942
|
|
|
957
943
|
|
|
958
944
|
def create_pipeline(project, pipeline, functions, secrets=None, handler=None):
|
|
@@ -1008,7 +994,7 @@ def load_and_run(
|
|
|
1008
994
|
save: bool = True,
|
|
1009
995
|
workflow_name: str = None,
|
|
1010
996
|
workflow_path: str = None,
|
|
1011
|
-
workflow_arguments:
|
|
997
|
+
workflow_arguments: dict[str, typing.Any] = None,
|
|
1012
998
|
artifact_path: str = None,
|
|
1013
999
|
workflow_handler: typing.Union[str, typing.Callable] = None,
|
|
1014
1000
|
namespace: str = None,
|
|
@@ -1083,7 +1069,7 @@ def load_and_run(
|
|
|
1083
1069
|
)
|
|
1084
1070
|
|
|
1085
1071
|
except Exception as exc:
|
|
1086
|
-
logger.error("Failed to send slack notification", exc=exc)
|
|
1072
|
+
logger.error("Failed to send slack notification", exc=err_to_str(exc))
|
|
1087
1073
|
|
|
1088
1074
|
raise error
|
|
1089
1075
|
|
|
@@ -1092,6 +1078,13 @@ def load_and_run(
|
|
|
1092
1078
|
if load_only:
|
|
1093
1079
|
return
|
|
1094
1080
|
|
|
1081
|
+
# extract "start" notification if exists
|
|
1082
|
+
start_notifications = [
|
|
1083
|
+
notification
|
|
1084
|
+
for notification in context.get_notifications(unmask_secret_params=True)
|
|
1085
|
+
if "running" in notification.when
|
|
1086
|
+
]
|
|
1087
|
+
|
|
1095
1088
|
workflow_log_message = workflow_name or workflow_path
|
|
1096
1089
|
context.logger.info(f"Running workflow {workflow_log_message} from remote")
|
|
1097
1090
|
run = project.run(
|
|
@@ -1107,11 +1100,12 @@ def load_and_run(
|
|
|
1107
1100
|
cleanup_ttl=cleanup_ttl,
|
|
1108
1101
|
engine=engine,
|
|
1109
1102
|
local=local,
|
|
1103
|
+
notifications=start_notifications,
|
|
1110
1104
|
)
|
|
1111
1105
|
context.log_result(key="workflow_id", value=run.run_id)
|
|
1112
1106
|
context.log_result(key="engine", value=run._engine.engine, commit=True)
|
|
1113
1107
|
|
|
1114
|
-
if run.state ==
|
|
1108
|
+
if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
|
|
1115
1109
|
raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
|
|
1116
1110
|
|
|
1117
1111
|
if wait_for_completion:
|
|
@@ -1126,7 +1120,7 @@ def load_and_run(
|
|
|
1126
1120
|
|
|
1127
1121
|
pipeline_state, _, _ = project.get_run_status(run)
|
|
1128
1122
|
context.log_result(key="workflow_state", value=pipeline_state, commit=True)
|
|
1129
|
-
if pipeline_state !=
|
|
1123
|
+
if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
|
|
1130
1124
|
raise RuntimeError(
|
|
1131
1125
|
f"Workflow {workflow_log_message} failed, state={pipeline_state}"
|
|
1132
1126
|
)
|