mlrun 1.7.0rc5__py3-none-any.whl → 1.7.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (234) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +39 -121
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +248 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +39 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +73 -46
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +73 -2
  13. mlrun/common/db/sql_session.py +3 -2
  14. mlrun/common/formatters/__init__.py +21 -0
  15. mlrun/common/formatters/artifact.py +46 -0
  16. mlrun/common/formatters/base.py +113 -0
  17. mlrun/common/formatters/feature_set.py +44 -0
  18. mlrun/common/formatters/function.py +46 -0
  19. mlrun/common/formatters/pipeline.py +53 -0
  20. mlrun/common/formatters/project.py +51 -0
  21. mlrun/common/formatters/run.py +29 -0
  22. mlrun/common/helpers.py +11 -1
  23. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  24. mlrun/common/schemas/__init__.py +21 -4
  25. mlrun/common/schemas/alert.py +202 -0
  26. mlrun/common/schemas/api_gateway.py +113 -2
  27. mlrun/common/schemas/artifact.py +28 -1
  28. mlrun/common/schemas/auth.py +11 -0
  29. mlrun/common/schemas/client_spec.py +2 -1
  30. mlrun/common/schemas/common.py +7 -4
  31. mlrun/common/schemas/constants.py +3 -0
  32. mlrun/common/schemas/feature_store.py +58 -28
  33. mlrun/common/schemas/frontend_spec.py +8 -0
  34. mlrun/common/schemas/function.py +11 -0
  35. mlrun/common/schemas/hub.py +7 -9
  36. mlrun/common/schemas/model_monitoring/__init__.py +21 -4
  37. mlrun/common/schemas/model_monitoring/constants.py +136 -42
  38. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  39. mlrun/common/schemas/model_monitoring/model_endpoints.py +89 -41
  40. mlrun/common/schemas/notification.py +69 -12
  41. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  42. mlrun/common/schemas/pipeline.py +7 -0
  43. mlrun/common/schemas/project.py +67 -16
  44. mlrun/common/schemas/runs.py +17 -0
  45. mlrun/common/schemas/schedule.py +1 -1
  46. mlrun/common/schemas/workflow.py +10 -2
  47. mlrun/common/types.py +14 -1
  48. mlrun/config.py +224 -58
  49. mlrun/data_types/data_types.py +11 -1
  50. mlrun/data_types/spark.py +5 -4
  51. mlrun/data_types/to_pandas.py +75 -34
  52. mlrun/datastore/__init__.py +8 -10
  53. mlrun/datastore/alibaba_oss.py +131 -0
  54. mlrun/datastore/azure_blob.py +131 -43
  55. mlrun/datastore/base.py +107 -47
  56. mlrun/datastore/datastore.py +17 -7
  57. mlrun/datastore/datastore_profile.py +91 -7
  58. mlrun/datastore/dbfs_store.py +3 -7
  59. mlrun/datastore/filestore.py +1 -3
  60. mlrun/datastore/google_cloud_storage.py +92 -32
  61. mlrun/datastore/hdfs.py +5 -0
  62. mlrun/datastore/inmem.py +6 -3
  63. mlrun/datastore/redis.py +3 -2
  64. mlrun/datastore/s3.py +30 -12
  65. mlrun/datastore/snowflake_utils.py +45 -0
  66. mlrun/datastore/sources.py +274 -59
  67. mlrun/datastore/spark_utils.py +30 -0
  68. mlrun/datastore/store_resources.py +9 -7
  69. mlrun/datastore/storeytargets.py +151 -0
  70. mlrun/datastore/targets.py +374 -102
  71. mlrun/datastore/utils.py +68 -5
  72. mlrun/datastore/v3io.py +28 -50
  73. mlrun/db/auth_utils.py +152 -0
  74. mlrun/db/base.py +231 -22
  75. mlrun/db/factory.py +1 -4
  76. mlrun/db/httpdb.py +864 -228
  77. mlrun/db/nopdb.py +268 -16
  78. mlrun/errors.py +35 -5
  79. mlrun/execution.py +111 -38
  80. mlrun/feature_store/__init__.py +0 -2
  81. mlrun/feature_store/api.py +46 -53
  82. mlrun/feature_store/common.py +6 -11
  83. mlrun/feature_store/feature_set.py +48 -23
  84. mlrun/feature_store/feature_vector.py +13 -2
  85. mlrun/feature_store/ingestion.py +7 -6
  86. mlrun/feature_store/retrieval/base.py +9 -4
  87. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  88. mlrun/feature_store/retrieval/job.py +13 -4
  89. mlrun/feature_store/retrieval/local_merger.py +2 -0
  90. mlrun/feature_store/retrieval/spark_merger.py +24 -32
  91. mlrun/feature_store/steps.py +38 -19
  92. mlrun/features.py +6 -14
  93. mlrun/frameworks/_common/plan.py +3 -3
  94. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  95. mlrun/frameworks/_ml_common/plan.py +1 -1
  96. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  97. mlrun/frameworks/lgbm/__init__.py +1 -1
  98. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  99. mlrun/frameworks/lgbm/model_handler.py +1 -1
  100. mlrun/frameworks/parallel_coordinates.py +4 -4
  101. mlrun/frameworks/pytorch/__init__.py +2 -2
  102. mlrun/frameworks/sklearn/__init__.py +1 -1
  103. mlrun/frameworks/sklearn/mlrun_interface.py +13 -3
  104. mlrun/frameworks/tf_keras/__init__.py +5 -2
  105. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  106. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  107. mlrun/frameworks/xgboost/__init__.py +1 -1
  108. mlrun/k8s_utils.py +57 -12
  109. mlrun/launcher/__init__.py +1 -1
  110. mlrun/launcher/base.py +6 -5
  111. mlrun/launcher/client.py +13 -11
  112. mlrun/launcher/factory.py +1 -1
  113. mlrun/launcher/local.py +15 -5
  114. mlrun/launcher/remote.py +10 -3
  115. mlrun/lists.py +6 -2
  116. mlrun/model.py +297 -48
  117. mlrun/model_monitoring/__init__.py +1 -1
  118. mlrun/model_monitoring/api.py +152 -357
  119. mlrun/model_monitoring/applications/__init__.py +10 -0
  120. mlrun/model_monitoring/applications/_application_steps.py +190 -0
  121. mlrun/model_monitoring/applications/base.py +108 -0
  122. mlrun/model_monitoring/applications/context.py +341 -0
  123. mlrun/model_monitoring/{evidently_application.py → applications/evidently_base.py} +27 -22
  124. mlrun/model_monitoring/applications/histogram_data_drift.py +227 -91
  125. mlrun/model_monitoring/applications/results.py +99 -0
  126. mlrun/model_monitoring/controller.py +130 -303
  127. mlrun/model_monitoring/{stores/models/sqlite.py → db/__init__.py} +5 -10
  128. mlrun/model_monitoring/db/stores/__init__.py +136 -0
  129. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  130. mlrun/model_monitoring/db/stores/base/store.py +213 -0
  131. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  132. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  133. mlrun/model_monitoring/db/stores/sqldb/models/base.py +190 -0
  134. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +103 -0
  135. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  136. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +659 -0
  137. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  138. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +726 -0
  139. mlrun/model_monitoring/db/tsdb/__init__.py +105 -0
  140. mlrun/model_monitoring/db/tsdb/base.py +448 -0
  141. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  142. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  143. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +298 -0
  144. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +42 -0
  145. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +522 -0
  146. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  147. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +158 -0
  148. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +849 -0
  149. mlrun/model_monitoring/features_drift_table.py +34 -22
  150. mlrun/model_monitoring/helpers.py +177 -39
  151. mlrun/model_monitoring/model_endpoint.py +3 -2
  152. mlrun/model_monitoring/stream_processing.py +165 -398
  153. mlrun/model_monitoring/tracking_policy.py +7 -1
  154. mlrun/model_monitoring/writer.py +161 -125
  155. mlrun/package/packagers/default_packager.py +2 -2
  156. mlrun/package/packagers_manager.py +1 -0
  157. mlrun/package/utils/_formatter.py +2 -2
  158. mlrun/platforms/__init__.py +11 -10
  159. mlrun/platforms/iguazio.py +67 -228
  160. mlrun/projects/__init__.py +6 -1
  161. mlrun/projects/operations.py +47 -20
  162. mlrun/projects/pipelines.py +396 -249
  163. mlrun/projects/project.py +1125 -414
  164. mlrun/render.py +28 -22
  165. mlrun/run.py +207 -180
  166. mlrun/runtimes/__init__.py +76 -11
  167. mlrun/runtimes/base.py +40 -14
  168. mlrun/runtimes/daskjob.py +9 -2
  169. mlrun/runtimes/databricks_job/databricks_runtime.py +1 -0
  170. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  171. mlrun/runtimes/funcdoc.py +1 -29
  172. mlrun/runtimes/kubejob.py +34 -128
  173. mlrun/runtimes/local.py +39 -10
  174. mlrun/runtimes/mpijob/__init__.py +0 -20
  175. mlrun/runtimes/mpijob/abstract.py +8 -8
  176. mlrun/runtimes/mpijob/v1.py +1 -1
  177. mlrun/runtimes/nuclio/api_gateway.py +646 -177
  178. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  179. mlrun/runtimes/nuclio/application/application.py +758 -0
  180. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  181. mlrun/runtimes/nuclio/function.py +188 -68
  182. mlrun/runtimes/nuclio/serving.py +57 -60
  183. mlrun/runtimes/pod.py +191 -58
  184. mlrun/runtimes/remotesparkjob.py +11 -8
  185. mlrun/runtimes/sparkjob/spark3job.py +17 -18
  186. mlrun/runtimes/utils.py +40 -73
  187. mlrun/secrets.py +6 -2
  188. mlrun/serving/__init__.py +8 -1
  189. mlrun/serving/remote.py +2 -3
  190. mlrun/serving/routers.py +89 -64
  191. mlrun/serving/server.py +54 -26
  192. mlrun/serving/states.py +187 -56
  193. mlrun/serving/utils.py +19 -11
  194. mlrun/serving/v2_serving.py +136 -63
  195. mlrun/track/tracker.py +2 -1
  196. mlrun/track/trackers/mlflow_tracker.py +5 -0
  197. mlrun/utils/async_http.py +26 -6
  198. mlrun/utils/db.py +18 -0
  199. mlrun/utils/helpers.py +375 -105
  200. mlrun/utils/http.py +2 -2
  201. mlrun/utils/logger.py +75 -9
  202. mlrun/utils/notifications/notification/__init__.py +14 -10
  203. mlrun/utils/notifications/notification/base.py +48 -0
  204. mlrun/utils/notifications/notification/console.py +2 -0
  205. mlrun/utils/notifications/notification/git.py +24 -1
  206. mlrun/utils/notifications/notification/ipython.py +2 -0
  207. mlrun/utils/notifications/notification/slack.py +96 -21
  208. mlrun/utils/notifications/notification/webhook.py +63 -2
  209. mlrun/utils/notifications/notification_pusher.py +146 -16
  210. mlrun/utils/regex.py +9 -0
  211. mlrun/utils/retryer.py +3 -2
  212. mlrun/utils/v3io_clients.py +2 -3
  213. mlrun/utils/version/version.json +2 -2
  214. mlrun-1.7.2.dist-info/METADATA +390 -0
  215. mlrun-1.7.2.dist-info/RECORD +351 -0
  216. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/WHEEL +1 -1
  217. mlrun/feature_store/retrieval/conversion.py +0 -271
  218. mlrun/kfpops.py +0 -868
  219. mlrun/model_monitoring/application.py +0 -310
  220. mlrun/model_monitoring/batch.py +0 -974
  221. mlrun/model_monitoring/controller_handler.py +0 -37
  222. mlrun/model_monitoring/prometheus.py +0 -216
  223. mlrun/model_monitoring/stores/__init__.py +0 -111
  224. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +0 -574
  225. mlrun/model_monitoring/stores/model_endpoint_store.py +0 -145
  226. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  227. mlrun/model_monitoring/stores/models/base.py +0 -84
  228. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  229. mlrun/platforms/other.py +0 -305
  230. mlrun-1.7.0rc5.dist-info/METADATA +0 -269
  231. mlrun-1.7.0rc5.dist-info/RECORD +0 -323
  232. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/LICENSE +0 -0
  233. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/entry_points.txt +0 -0
  234. {mlrun-1.7.0rc5.dist-info → mlrun-1.7.2.dist-info}/top_level.txt +0 -0
@@ -13,24 +13,27 @@
13
13
  # limitations under the License.
14
14
  import abc
15
15
  import builtins
16
+ import http
16
17
  import importlib.util as imputil
17
18
  import os
18
19
  import tempfile
19
20
  import typing
20
21
  import uuid
21
22
 
22
- import kfp.compiler
23
- from kfp import dsl
24
- from kfp.compiler import compiler
23
+ import mlrun_pipelines.common.models
24
+ import mlrun_pipelines.patcher
25
+ import mlrun_pipelines.utils
25
26
 
26
27
  import mlrun
28
+ import mlrun.common.runtimes.constants
27
29
  import mlrun.common.schemas
30
+ import mlrun.common.schemas.function
31
+ import mlrun.common.schemas.workflow
28
32
  import mlrun.utils.notifications
29
33
  from mlrun.errors import err_to_str
30
34
  from mlrun.utils import (
31
35
  get_ui_url,
32
36
  logger,
33
- new_pipe_metadata,
34
37
  normalize_workflow_name,
35
38
  retry_until_successful,
36
39
  )
@@ -43,21 +46,21 @@ from ..runtimes.pod import AutoMountType
43
46
 
44
47
  def get_workflow_engine(engine_kind, local=False):
45
48
  if pipeline_context.is_run_local(local):
46
- if engine_kind == "kfp":
49
+ if engine_kind == mlrun.common.schemas.workflow.EngineType.KFP:
47
50
  logger.warning(
48
51
  "Running kubeflow pipeline locally, note some ops may not run locally!"
49
52
  )
50
- elif engine_kind == "remote":
53
+ elif engine_kind == mlrun.common.schemas.workflow.EngineType.REMOTE:
51
54
  raise mlrun.errors.MLRunInvalidArgumentError(
52
55
  "Cannot run a remote pipeline locally using `kind='remote'` and `local=True`. "
53
56
  "in order to run a local pipeline remotely, please use `engine='remote:local'` instead"
54
57
  )
55
58
  return _LocalRunner
56
- if not engine_kind or engine_kind == "kfp":
59
+ if not engine_kind or engine_kind == mlrun.common.schemas.workflow.EngineType.KFP:
57
60
  return _KFPRunner
58
- if engine_kind == "local":
61
+ if engine_kind == mlrun.common.schemas.workflow.EngineType.LOCAL:
59
62
  return _LocalRunner
60
- if engine_kind == "remote":
63
+ if engine_kind == mlrun.common.schemas.workflow.EngineType.REMOTE:
61
64
  return _RemoteRunner
62
65
  raise mlrun.errors.MLRunInvalidArgumentError(
63
66
  f"Provided workflow engine is not supported. engine_kind={engine_kind}"
@@ -79,6 +82,7 @@ class WorkflowSpec(mlrun.model.ModelObj):
79
82
  schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
80
83
  cleanup_ttl: typing.Optional[int] = None,
81
84
  image: typing.Optional[str] = None,
85
+ workflow_runner_node_selector: typing.Optional[dict[str, str]] = None,
82
86
  ):
83
87
  self.engine = engine
84
88
  self.code = code
@@ -92,6 +96,7 @@ class WorkflowSpec(mlrun.model.ModelObj):
92
96
  self._tmp_path = None
93
97
  self.schedule = schedule
94
98
  self.image = image
99
+ self.workflow_runner_node_selector = workflow_runner_node_selector
95
100
 
96
101
  def get_source_file(self, context=""):
97
102
  if not self.code and not self.path:
@@ -218,9 +223,10 @@ class _PipelineContext:
218
223
  force_run_local = mlrun.mlconf.force_run_local
219
224
  if force_run_local is None or force_run_local == "auto":
220
225
  force_run_local = not mlrun.mlconf.is_api_running_on_k8s()
221
- kfp_url = mlrun.mlconf.resolve_kfp_url()
222
- if not kfp_url:
226
+ if not mlrun.mlconf.kfp_url:
227
+ logger.debug("Kubeflow pipeline URL is not set, running locally")
223
228
  force_run_local = True
229
+
224
230
  if self.workflow:
225
231
  force_run_local = force_run_local or self.workflow.run_local
226
232
 
@@ -300,72 +306,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
300
306
  }
301
307
 
302
308
 
303
- # When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
304
- # converts it to a k8s object. As part of the flow in the Compile.compile() method,
305
- # we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
306
- # Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
307
- # I ran across the following problem when seeking for a method to set the priority_class_name:
308
- # https://github.com/kubeflow/pipelines/issues/3594
309
- # When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
310
- # to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
311
- # https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
312
- def _create_enriched_mlrun_workflow(
313
- self,
314
- pipeline_func: typing.Callable,
315
- pipeline_name: typing.Optional[str] = None,
316
- pipeline_description: typing.Optional[str] = None,
317
- params_list: typing.Optional[list[dsl.PipelineParam]] = None,
318
- pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
319
- ):
320
- """Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
321
- workflow = self._original_create_workflow(
322
- pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
323
- )
324
- # We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
325
- # that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
326
- # we know can be raised.
327
- try:
328
- functions = []
329
- if pipeline_context.functions:
330
- try:
331
- functions = pipeline_context.functions.values()
332
- except Exception as err:
333
- logger.debug(
334
- "Unable to retrieve project functions, not enriching workflow with mlrun",
335
- error=err_to_str(err),
336
- )
337
- return workflow
338
-
339
- # enrich each pipeline step with your desire k8s attribute
340
- for kfp_step_template in workflow["spec"]["templates"]:
341
- if kfp_step_template.get("container"):
342
- for function_obj in functions:
343
- # we condition within each function since the comparison between the function and
344
- # the kfp pod may change depending on the attribute type.
345
- _set_function_attribute_on_kfp_pod(
346
- kfp_step_template,
347
- function_obj,
348
- "PriorityClassName",
349
- "priority_class_name",
350
- )
351
- _enrich_kfp_pod_security_context(
352
- kfp_step_template,
353
- function_obj,
354
- )
355
- except mlrun.errors.MLRunInvalidArgumentError:
356
- raise
357
- except Exception as err:
358
- logger.debug(
359
- "Something in the enrichment of kfp pods failed", error=err_to_str(err)
360
- )
361
- return workflow
362
-
363
-
364
- # patching function as class method
365
- kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
366
- kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
367
-
368
-
369
309
  def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
370
310
  project_instance, name, tag, hash_key = parse_versioned_object_uri(
371
311
  key, project.metadata.name
@@ -375,7 +315,11 @@ def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
375
315
 
376
316
 
377
317
  def enrich_function_object(
378
- project, function, decorator=None, copy_function=True, try_auto_mount=True
318
+ project: mlrun.common.schemas.Project,
319
+ function: mlrun.runtimes.BaseRuntime,
320
+ decorator: typing.Callable = None,
321
+ copy_function: bool = True,
322
+ try_auto_mount: bool = True,
379
323
  ) -> mlrun.runtimes.BaseRuntime:
380
324
  if hasattr(function, "_enriched"):
381
325
  return function
@@ -412,6 +356,10 @@ def enrich_function_object(
412
356
  if decorator:
413
357
  decorator(f)
414
358
 
359
+ if project.spec.default_function_node_selector:
360
+ f.enrich_runtime_spec(
361
+ project.spec.default_function_node_selector,
362
+ )
415
363
  if try_auto_mount:
416
364
  if (
417
365
  decorator and AutoMountType.is_auto_modifier(decorator)
@@ -431,7 +379,7 @@ class _PipelineRunStatus:
431
379
  engine: type["_PipelineRunner"],
432
380
  project: "mlrun.projects.MlrunProject",
433
381
  workflow: WorkflowSpec = None,
434
- state: str = "",
382
+ state: mlrun_pipelines.common.models.RunStatuses = "",
435
383
  exc: Exception = None,
436
384
  ):
437
385
  """
@@ -451,7 +399,10 @@ class _PipelineRunStatus:
451
399
 
452
400
  @property
453
401
  def state(self):
454
- if self._state not in mlrun.run.RunStatuses.stable_statuses():
402
+ if (
403
+ self._state
404
+ not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
405
+ ):
455
406
  self._state = self._engine.get_state(self.run_id, self.project)
456
407
  return self._state
457
408
 
@@ -460,12 +411,15 @@ class _PipelineRunStatus:
460
411
  return self._exc
461
412
 
462
413
  def wait_for_completion(self, timeout=None, expected_statuses=None):
463
- self._state = self._engine.wait_for_completion(
464
- self.run_id,
414
+ returned_state = self._engine.wait_for_completion(
415
+ self,
465
416
  project=self.project,
466
417
  timeout=timeout,
467
418
  expected_statuses=expected_statuses,
468
419
  )
420
+ # TODO: returning a state is optional until all runners implement wait_for_completion
421
+ if returned_state:
422
+ self._state = returned_state
469
423
  return self._state
470
424
 
471
425
  def __str__(self):
@@ -505,7 +459,12 @@ class _PipelineRunner(abc.ABC):
505
459
 
506
460
  @staticmethod
507
461
  @abc.abstractmethod
508
- def wait_for_completion(run_id, project=None, timeout=None, expected_statuses=None):
462
+ def wait_for_completion(
463
+ run: "_PipelineRunStatus",
464
+ project: typing.Optional["mlrun.projects.MlrunProject"] = None,
465
+ timeout: typing.Optional[int] = None,
466
+ expected_statuses: list[str] = None,
467
+ ):
509
468
  pass
510
469
 
511
470
  @staticmethod
@@ -513,10 +472,52 @@ class _PipelineRunner(abc.ABC):
513
472
  def get_state(run_id, project=None):
514
473
  pass
515
474
 
475
+ @staticmethod
476
+ def get_run_status(
477
+ project,
478
+ run: _PipelineRunStatus,
479
+ timeout=None,
480
+ expected_statuses=None,
481
+ notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
482
+ **kwargs,
483
+ ):
484
+ timeout = timeout or 60 * 60
485
+ raise_error = None
486
+ state = ""
487
+ try:
488
+ if timeout:
489
+ state = run.wait_for_completion(
490
+ timeout=timeout, expected_statuses=expected_statuses
491
+ )
492
+ except RuntimeError as exc:
493
+ # push runs table also when we have errors
494
+ raise_error = exc
495
+
496
+ mldb = mlrun.db.get_run_db(secrets=project._secrets)
497
+ runs = mldb.list_runs(project=project.name, labels=f"workflow={run.run_id}")
498
+
499
+ # TODO: The below section duplicates notifiers.push_pipeline_run_results() logic. We should use it instead.
500
+ errors_counter = 0
501
+ for r in runs:
502
+ if r["status"].get("state", "") == "error":
503
+ errors_counter += 1
504
+
505
+ text = _PipelineRunner._generate_workflow_finished_message(
506
+ run.run_id, errors_counter, run._state
507
+ )
508
+
509
+ notifiers = notifiers or project.notifiers
510
+ if notifiers:
511
+ notifiers.push(text, "info", runs)
512
+
513
+ if raise_error:
514
+ raise raise_error
515
+ return state or run._state, errors_counter, text
516
+
516
517
  @staticmethod
517
518
  def _get_handler(workflow_handler, workflow_spec, project, secrets):
518
519
  if not (workflow_handler and callable(workflow_handler)):
519
- workflow_file = workflow_spec.get_source_file(project.spec.context)
520
+ workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
520
521
  workflow_handler = create_pipeline(
521
522
  project,
522
523
  workflow_file,
@@ -529,15 +530,13 @@ class _PipelineRunner(abc.ABC):
529
530
  return workflow_handler
530
531
 
531
532
  @staticmethod
532
- @abc.abstractmethod
533
- def get_run_status(
534
- project,
535
- run,
536
- timeout=None,
537
- expected_statuses=None,
538
- notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
539
- ):
540
- pass
533
+ def _generate_workflow_finished_message(run_id, errors_counter, state):
534
+ text = f"Workflow {run_id} finished"
535
+ if errors_counter:
536
+ text += f" with {errors_counter} errors"
537
+ if state:
538
+ text += f", state={state}"
539
+ return text
541
540
 
542
541
 
543
542
  class _KFPRunner(_PipelineRunner):
@@ -548,7 +547,7 @@ class _KFPRunner(_PipelineRunner):
548
547
  @classmethod
549
548
  def save(cls, project, workflow_spec: WorkflowSpec, target, artifact_path=None):
550
549
  pipeline_context.set(project, workflow_spec)
551
- workflow_file = workflow_spec.get_source_file(project.spec.context)
550
+ workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
552
551
  functions = FunctionsDict(project)
553
552
  pipeline = create_pipeline(
554
553
  project,
@@ -556,13 +555,14 @@ class _KFPRunner(_PipelineRunner):
556
555
  functions,
557
556
  secrets=project._secrets,
558
557
  )
559
- artifact_path = artifact_path or project.spec.artifact_path
560
-
561
- conf = new_pipe_metadata(
562
- artifact_path=artifact_path,
558
+ mlrun_pipelines.utils.compile_pipeline(
559
+ artifact_path=artifact_path or project.spec.artifact_path,
563
560
  cleanup_ttl=workflow_spec.cleanup_ttl,
561
+ ops=None,
562
+ pipeline=pipeline,
563
+ pipe_file=target,
564
+ type_check=True,
564
565
  )
565
- compiler.Compiler().compile(pipeline, target, pipeline_conf=conf)
566
566
  workflow_spec.clear_tmp()
567
567
  pipeline_context.clear()
568
568
 
@@ -593,12 +593,13 @@ class _KFPRunner(_PipelineRunner):
593
593
  logger.warning(
594
594
  "Setting notifications on kfp pipeline runner uses old notification behavior. "
595
595
  "Notifications will only be sent if you wait for pipeline completion. "
596
- "To use the new notification behavior, use the remote pipeline runner."
596
+ "Some of the features (like setting message or severity level) are not supported."
597
597
  )
598
- for notification in notifications:
599
- project.notifiers.add_notification(
600
- notification.kind, notification.params
601
- )
598
+ # for start message, fallback to old notification behavior
599
+ for notification in notifications or []:
600
+ params = notification.params
601
+ params.update(notification.secret_params)
602
+ project.notifiers.add_notification(notification.kind, params)
602
603
 
603
604
  run_id = _run_pipeline(
604
605
  workflow_handler,
@@ -608,6 +609,7 @@ class _KFPRunner(_PipelineRunner):
608
609
  namespace=namespace,
609
610
  artifact_path=artifact_path,
610
611
  cleanup_ttl=workflow_spec.cleanup_ttl,
612
+ timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
611
613
  )
612
614
 
613
615
  # The user provided workflow code might have made changes to function specs that require cleanup
@@ -625,7 +627,6 @@ class _KFPRunner(_PipelineRunner):
625
627
  func_name=func.metadata.name,
626
628
  exc_info=err_to_str(exc),
627
629
  )
628
-
629
630
  project.notifiers.push_pipeline_start_message(
630
631
  project.metadata.name,
631
632
  project.get_param("commit_id", None),
@@ -636,12 +637,21 @@ class _KFPRunner(_PipelineRunner):
636
637
  return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
637
638
 
638
639
  @staticmethod
639
- def wait_for_completion(run_id, project=None, timeout=None, expected_statuses=None):
640
- if timeout is None:
641
- timeout = 60 * 60
640
+ def wait_for_completion(
641
+ run: "_PipelineRunStatus",
642
+ project: typing.Optional["mlrun.projects.MlrunProject"] = None,
643
+ timeout: typing.Optional[int] = None,
644
+ expected_statuses: list[str] = None,
645
+ ):
642
646
  project_name = project.metadata.name if project else ""
647
+ logger.info(
648
+ "Waiting for pipeline run completion",
649
+ run_id=run.run_id,
650
+ project=project_name,
651
+ )
652
+ timeout = timeout or 60 * 60
643
653
  run_info = wait_for_pipeline_completion(
644
- run_id,
654
+ run.run_id,
645
655
  timeout=timeout,
646
656
  expected_statuses=expected_statuses,
647
657
  project=project_name,
@@ -659,50 +669,6 @@ class _KFPRunner(_PipelineRunner):
659
669
  return resp["run"].get("status", "")
660
670
  return ""
661
671
 
662
- @staticmethod
663
- def get_run_status(
664
- project,
665
- run,
666
- timeout=None,
667
- expected_statuses=None,
668
- notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
669
- ):
670
- if timeout is None:
671
- timeout = 60 * 60
672
- state = ""
673
- raise_error = None
674
- try:
675
- if timeout:
676
- logger.info("Waiting for pipeline run completion")
677
- state = run.wait_for_completion(
678
- timeout=timeout, expected_statuses=expected_statuses
679
- )
680
- except RuntimeError as exc:
681
- # push runs table also when we have errors
682
- raise_error = exc
683
-
684
- mldb = mlrun.db.get_run_db(secrets=project._secrets)
685
- runs = mldb.list_runs(project=project.name, labels=f"workflow={run.run_id}")
686
-
687
- # TODO: The below section duplicates notifiers.push_pipeline_run_results() logic. We should use it instead.
688
- had_errors = 0
689
- for r in runs:
690
- if r["status"].get("state", "") == "error":
691
- had_errors += 1
692
-
693
- text = f"Workflow {run.run_id} finished"
694
- if had_errors:
695
- text += f" with {had_errors} errors"
696
- if state:
697
- text += f", state={state}"
698
-
699
- notifiers = notifiers or project.notifiers
700
- notifiers.push(text, "info", runs)
701
-
702
- if raise_error:
703
- raise raise_error
704
- return state, had_errors, text
705
-
706
672
 
707
673
  class _LocalRunner(_PipelineRunner):
708
674
  """local pipelines runner"""
@@ -741,13 +707,14 @@ class _LocalRunner(_PipelineRunner):
741
707
  original_source = project.spec.source
742
708
  project.set_source(source=source)
743
709
  pipeline_context.workflow_artifact_path = artifact_path
710
+
744
711
  project.notifiers.push_pipeline_start_message(
745
712
  project.metadata.name, pipeline_id=workflow_id
746
713
  )
747
714
  err = None
748
715
  try:
749
716
  workflow_handler(**workflow_spec.args)
750
- state = mlrun.run.RunStatuses.succeeded
717
+ state = mlrun_pipelines.common.models.RunStatuses.succeeded
751
718
  except Exception as exc:
752
719
  err = exc
753
720
  logger.exception("Workflow run failed")
@@ -755,7 +722,7 @@ class _LocalRunner(_PipelineRunner):
755
722
  f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
756
723
  mlrun.common.schemas.NotificationSeverity.ERROR,
757
724
  )
758
- state = mlrun.run.RunStatuses.failed
725
+ state = mlrun_pipelines.common.models.RunStatuses.failed
759
726
  mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
760
727
  project.notifiers.push_pipeline_run_results(
761
728
  pipeline_context.runs_map.values(), state=state
@@ -779,17 +746,10 @@ class _LocalRunner(_PipelineRunner):
779
746
  return ""
780
747
 
781
748
  @staticmethod
782
- def wait_for_completion(run_id, project=None, timeout=None, expected_statuses=None):
783
- pass
784
-
785
- @staticmethod
786
- def get_run_status(
787
- project,
788
- run,
789
- timeout=None,
790
- expected_statuses=None,
791
- notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
792
- ):
749
+ def wait_for_completion(run, project=None, timeout=None, expected_statuses=None):
750
+ # TODO: local runner blocks for the duration of the pipeline.
751
+ # Therefore usually there will be nothing to wait for.
752
+ # However, users may run functions with watch=False and then it can be useful to wait for the runs here.
793
753
  pass
794
754
 
795
755
 
@@ -814,10 +774,6 @@ class _RemoteRunner(_PipelineRunner):
814
774
  workflow_name = normalize_workflow_name(name=name, project_name=project.name)
815
775
  workflow_id = None
816
776
 
817
- # for start message, fallback to old notification behavior
818
- for notification in notifications or []:
819
- project.notifiers.add_notification(notification.kind, notification.params)
820
-
821
777
  # The returned engine for this runner is the engine of the workflow.
822
778
  # In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
823
779
  inner_engine = get_workflow_engine(workflow_spec.engine)
@@ -865,22 +821,44 @@ class _RemoteRunner(_PipelineRunner):
865
821
  )
866
822
  return
867
823
 
824
+ get_workflow_id_timeout = max(
825
+ int(mlrun.mlconf.workflows.timeouts.remote),
826
+ int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
827
+ )
828
+
868
829
  logger.debug(
869
830
  "Workflow submitted, waiting for pipeline run to start",
870
831
  workflow_name=workflow_response.name,
832
+ get_workflow_id_timeout=get_workflow_id_timeout,
871
833
  )
872
834
 
835
+ def _get_workflow_id_or_bail():
836
+ try:
837
+ return run_db.get_workflow_id(
838
+ project=project.name,
839
+ name=workflow_response.name,
840
+ run_id=workflow_response.run_id,
841
+ engine=workflow_spec.engine,
842
+ )
843
+ except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
844
+ # fail fast on specific errors
845
+ if get_wf_exc.error_status_code in [
846
+ http.HTTPStatus.PRECONDITION_FAILED
847
+ ]:
848
+ raise mlrun.errors.MLRunFatalFailureError(
849
+ original_exception=get_wf_exc
850
+ )
851
+
852
+ # raise for a retry (on other errors)
853
+ raise
854
+
873
855
  # Getting workflow id from run:
874
856
  response = retry_until_successful(
875
857
  1,
876
- getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine),
858
+ get_workflow_id_timeout,
877
859
  logger,
878
860
  False,
879
- run_db.get_workflow_id,
880
- project=project.name,
881
- name=workflow_response.name,
882
- run_id=workflow_response.run_id,
883
- engine=workflow_spec.engine,
861
+ _get_workflow_id_or_bail,
884
862
  )
885
863
  workflow_id = response.workflow_id
886
864
  # After fetching the workflow_id the workflow executed successfully
@@ -892,12 +870,9 @@ class _RemoteRunner(_PipelineRunner):
892
870
  f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
893
871
  mlrun.common.schemas.NotificationSeverity.ERROR,
894
872
  )
895
- state = mlrun.run.RunStatuses.failed
873
+ state = mlrun_pipelines.common.models.RunStatuses.failed
896
874
  else:
897
- state = mlrun.run.RunStatuses.succeeded
898
- project.notifiers.push_pipeline_start_message(
899
- project.metadata.name,
900
- )
875
+ state = mlrun_pipelines.common.models.RunStatuses.running
901
876
  pipeline_context.clear()
902
877
  return _PipelineRunStatus(
903
878
  run_id=workflow_id,
@@ -911,24 +886,59 @@ class _RemoteRunner(_PipelineRunner):
911
886
  @staticmethod
912
887
  def get_run_status(
913
888
  project,
914
- run,
889
+ run: _PipelineRunStatus,
915
890
  timeout=None,
916
891
  expected_statuses=None,
917
892
  notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
893
+ inner_engine: type[_PipelineRunner] = None,
918
894
  ):
919
- # ignore notifiers, as they are handled by the remote pipeline notifications,
920
- # so overriding with CustomNotificationPusher with empty list of notifiers
921
- state, had_errors, text = _KFPRunner.get_run_status(
922
- project,
923
- run,
924
- timeout,
925
- expected_statuses,
926
- notifiers=mlrun.utils.notifications.CustomNotificationPusher([]),
927
- )
895
+ inner_engine = inner_engine or _KFPRunner
896
+ if inner_engine.engine == _KFPRunner.engine:
897
+ # ignore notifiers for remote notifications, as they are handled by the remote pipeline notifications,
898
+ # so overriding with CustomNotificationPusher with empty list of notifiers or only local notifiers
899
+ local_project_notifiers = list(
900
+ set(mlrun.utils.notifications.NotificationTypes.local()).intersection(
901
+ set(project.notifiers.notifications.keys())
902
+ )
903
+ )
904
+ notifiers = mlrun.utils.notifications.CustomNotificationPusher(
905
+ local_project_notifiers
906
+ )
907
+ return _KFPRunner.get_run_status(
908
+ project,
909
+ run,
910
+ timeout,
911
+ expected_statuses,
912
+ notifiers=notifiers,
913
+ )
914
+
915
+ elif inner_engine.engine == _LocalRunner.engine:
916
+ mldb = mlrun.db.get_run_db(secrets=project._secrets)
917
+ pipeline_runner_run = mldb.read_run(run.run_id, project=project.name)
918
+
919
+ pipeline_runner_run = mlrun.run.RunObject.from_dict(pipeline_runner_run)
920
+
921
+ # here we are waiting for the pipeline run to complete and refreshing after that the pipeline run from the
922
+ # db
923
+ # TODO: do it with timeout
924
+ pipeline_runner_run.logs(db=mldb)
925
+ pipeline_runner_run.refresh()
926
+ run._state = mlrun.common.runtimes.constants.RunStates.run_state_to_pipeline_run_status(
927
+ pipeline_runner_run.status.state
928
+ )
929
+ run._exc = pipeline_runner_run.status.error
930
+ return _LocalRunner.get_run_status(
931
+ project,
932
+ run,
933
+ timeout,
934
+ expected_statuses,
935
+ notifiers=notifiers,
936
+ )
928
937
 
929
- # indicate the pipeline status since we don't push the notifications in the remote runner
930
- logger.info(text)
931
- return state, had_errors, text
938
+ else:
939
+ raise mlrun.errors.MLRunInvalidArgumentError(
940
+ f"Unsupported inner runner engine: {inner_engine.engine}"
941
+ )
932
942
 
933
943
 
934
944
  def create_pipeline(project, pipeline, functions, secrets=None, handler=None):
@@ -974,14 +984,25 @@ def github_webhook(request):
974
984
  return {"msg": "pushed"}
975
985
 
976
986
 
977
- def load_and_run(
987
+ def load_and_run(context, *args, **kwargs):
988
+ """
989
+ This function serves as an alias to `load_and_run_workflow`,
990
+ allowing to continue using `load_and_run` without modifying existing workflows or exported runs.
991
+ This approach ensures backward compatibility,
992
+ while directing all new calls to the updated `load_and_run_workflow` function.
993
+ """
994
+ kwargs.pop("load_only", None)
995
+ kwargs.pop("save", None)
996
+ load_and_run_workflow(context, *args, **kwargs)
997
+
998
+
999
+ def load_and_run_workflow(
978
1000
  context: mlrun.execution.MLClientCtx,
979
1001
  url: str = None,
980
1002
  project_name: str = "",
981
1003
  init_git: bool = None,
982
1004
  subpath: str = None,
983
1005
  clone: bool = False,
984
- save: bool = True,
985
1006
  workflow_name: str = None,
986
1007
  workflow_path: str = None,
987
1008
  workflow_arguments: dict[str, typing.Any] = None,
@@ -994,14 +1015,12 @@ def load_and_run(
994
1015
  local: bool = None,
995
1016
  schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
996
1017
  cleanup_ttl: int = None,
997
- load_only: bool = False,
998
1018
  wait_for_completion: bool = False,
999
1019
  project_context: str = None,
1000
1020
  ):
1001
1021
  """
1002
1022
  Auxiliary function that the RemoteRunner run once or run every schedule.
1003
1023
  This function loads a project from a given remote source and then runs the workflow.
1004
-
1005
1024
  :param context: mlrun context.
1006
1025
  :param url: remote url that represents the project's source.
1007
1026
  See 'mlrun.load_project()' for details
@@ -1009,7 +1028,6 @@ def load_and_run(
1009
1028
  :param init_git: if True, will git init the context dir
1010
1029
  :param subpath: project subpath (within the archive)
1011
1030
  :param clone: if True, always clone (delete any existing content)
1012
- :param save: whether to save the created project and artifact in the DB
1013
1031
  :param workflow_name: name of the workflow
1014
1032
  :param workflow_path: url to a workflow file, if not a project workflow
1015
1033
  :param workflow_arguments: kubeflow pipelines arguments (parameters)
@@ -1025,48 +1043,38 @@ def load_and_run(
1025
1043
  :param schedule: ScheduleCronTrigger class instance or a standard crontab expression string
1026
1044
  :param cleanup_ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
1027
1045
  workflow and all its resources are deleted)
1028
- :param load_only: for just loading the project, inner use.
1029
1046
  :param wait_for_completion: wait for workflow completion before returning
1030
1047
  :param project_context: project context path (used for loading the project)
1031
1048
  """
1032
- try:
1033
- project = mlrun.load_project(
1034
- context=project_context or f"./{project_name}",
1035
- url=url,
1036
- name=project_name,
1037
- init_git=init_git,
1038
- subpath=subpath,
1039
- clone=clone,
1040
- save=save,
1041
- sync_functions=True,
1042
- )
1043
- except Exception as error:
1044
- if schedule:
1045
- notification_pusher = mlrun.utils.notifications.CustomNotificationPusher(
1046
- ["slack"]
1047
- )
1048
- url = get_ui_url(project_name, context.uid)
1049
- link = f"<{url}|*view workflow job details*>"
1050
- message = (
1051
- f":x: Failed to run scheduled workflow {workflow_name} in Project {project_name} !\n"
1052
- f"error: ```{error}```\n{link}"
1053
- )
1054
- # Sending Slack Notification without losing the original error:
1055
- try:
1056
- notification_pusher.push(
1057
- message=message,
1058
- severity=mlrun.common.schemas.NotificationSeverity.ERROR,
1059
- )
1060
-
1061
- except Exception as exc:
1062
- logger.error("Failed to send slack notification", exc=exc)
1063
-
1064
- raise error
1049
+ project_context = project_context or f"./{project_name}"
1050
+
1051
+ # Load the project to fetch files which the runner needs, such as remote source files
1052
+ pull_remote_project_files(
1053
+ context=context,
1054
+ project_context=project_context,
1055
+ url=url,
1056
+ project_name=project_name,
1057
+ init_git=init_git,
1058
+ subpath=subpath,
1059
+ clone=clone,
1060
+ schedule=schedule,
1061
+ workflow_name=workflow_name,
1062
+ )
1065
1063
 
1066
- context.logger.info(f"Loaded project {project.name} successfully")
1064
+ # Retrieve the project object:
1065
+ # - If the project exists in the MLRun database, it will be loaded from there.
1066
+ # - If it doesn't exist in the database, it will be created from the previously loaded local directory.
1067
+ project = mlrun.get_or_create_project(
1068
+ context=project_context or f"./{project_name}",
1069
+ name=project_name,
1070
+ )
1067
1071
 
1068
- if load_only:
1069
- return
1072
+ # extract "start" notification if exists
1073
+ start_notifications = [
1074
+ notification
1075
+ for notification in context.get_notifications(unmask_secret_params=True)
1076
+ if "running" in notification.when
1077
+ ]
1070
1078
 
1071
1079
  workflow_log_message = workflow_name or workflow_path
1072
1080
  context.logger.info(f"Running workflow {workflow_log_message} from remote")
@@ -1083,26 +1091,165 @@ def load_and_run(
1083
1091
  cleanup_ttl=cleanup_ttl,
1084
1092
  engine=engine,
1085
1093
  local=local,
1094
+ notifications=start_notifications,
1086
1095
  )
1087
1096
  context.log_result(key="workflow_id", value=run.run_id)
1088
1097
  context.log_result(key="engine", value=run._engine.engine, commit=True)
1089
1098
 
1090
- if run.state == mlrun.run.RunStatuses.failed:
1099
+ if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
1091
1100
  raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
1092
1101
 
1093
1102
  if wait_for_completion:
1103
+ handle_workflow_completion(
1104
+ run=run,
1105
+ project=project,
1106
+ context=context,
1107
+ workflow_log_message=workflow_log_message,
1108
+ )
1109
+
1110
+
1111
+ def pull_remote_project_files(
1112
+ context: mlrun.execution.MLClientCtx,
1113
+ project_context: str,
1114
+ url: str,
1115
+ project_name: str,
1116
+ init_git: typing.Optional[bool],
1117
+ subpath: typing.Optional[str],
1118
+ clone: bool,
1119
+ schedule: typing.Optional[
1120
+ typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger]
1121
+ ],
1122
+ workflow_name: typing.Optional[str],
1123
+ ) -> None:
1124
+ """
1125
+ Load the project to clone remote files if they exist.
1126
+ If an exception occurs during project loading, send a notification if the workflow is scheduled.
1127
+
1128
+ :param context: MLRun execution context.
1129
+ :param project_context: Path to the project context.
1130
+ :param url: URL of the project repository.
1131
+ :param project_name: Name of the project.
1132
+ :param init_git: Initialize a git repository.
1133
+ :param subpath: Project subpath within the repository.
1134
+ :param clone: Whether to clone the repository.
1135
+ :param schedule: Schedule for running the workflow.
1136
+ :param workflow_name: Name of the workflow to run.
1137
+ """
1138
+ try:
1139
+ # Load the project to clone remote files if they exist.
1140
+ # Using save=False to avoid overriding changes from the database if it already exists.
1141
+ mlrun.load_project(
1142
+ context=project_context,
1143
+ url=url,
1144
+ name=project_name,
1145
+ init_git=init_git,
1146
+ subpath=subpath,
1147
+ clone=clone,
1148
+ save=False,
1149
+ )
1150
+ except Exception as error:
1151
+ notify_scheduled_workflow_failure(
1152
+ schedule=schedule,
1153
+ project_name=project_name,
1154
+ workflow_name=workflow_name,
1155
+ error=error,
1156
+ context_uid=context.uid,
1157
+ )
1158
+ raise error
1159
+
1160
+
1161
+ def notify_scheduled_workflow_failure(
1162
+ schedule,
1163
+ project_name: str,
1164
+ workflow_name: str,
1165
+ error: Exception,
1166
+ context_uid: str,
1167
+ ) -> None:
1168
+ if schedule:
1169
+ notification_pusher = mlrun.utils.notifications.CustomNotificationPusher(
1170
+ ["slack"]
1171
+ )
1172
+ url = get_ui_url(project_name, context_uid)
1173
+ link = f"<{url}|*view workflow job details*>"
1174
+ message = (
1175
+ f":x: Failed to run scheduled workflow {workflow_name} "
1176
+ f"in Project {project_name}!\n"
1177
+ f"Error: ```{err_to_str(error)}```\n{link}"
1178
+ )
1179
+ # Sending Slack Notification without losing the original error:
1094
1180
  try:
1095
- run.wait_for_completion()
1096
- except Exception as exc:
1097
- logger.error(
1098
- "Failed waiting for workflow completion",
1099
- workflow=workflow_log_message,
1100
- exc=err_to_str(exc),
1181
+ notification_pusher.push(
1182
+ message=message,
1183
+ severity=mlrun.common.schemas.NotificationSeverity.ERROR,
1101
1184
  )
1102
1185
 
1103
- pipeline_state, _, _ = project.get_run_status(run)
1104
- context.log_result(key="workflow_state", value=pipeline_state, commit=True)
1105
- if pipeline_state != mlrun.run.RunStatuses.succeeded:
1106
- raise RuntimeError(
1107
- f"Workflow {workflow_log_message} failed, state={pipeline_state}"
1108
- )
1186
+ except Exception as exc:
1187
+ logger.error("Failed to send slack notification", exc=err_to_str(exc))
1188
+
1189
+
1190
+ def handle_workflow_completion(
1191
+ run: _PipelineRunStatus,
1192
+ project,
1193
+ context: mlrun.execution.MLClientCtx,
1194
+ workflow_log_message: str,
1195
+ ) -> None:
1196
+ """
1197
+ Handle workflow completion by waiting for it to finish and logging the final state.
1198
+
1199
+ :param run: Run object containing workflow execution details.
1200
+ :param project: MLRun project object.
1201
+ :param context: MLRun execution context.
1202
+ :param workflow_log_message: Message used for logging.
1203
+ """
1204
+ try:
1205
+ run.wait_for_completion()
1206
+ except Exception as exc:
1207
+ mlrun.utils.logger.error(
1208
+ "Failed waiting for workflow completion",
1209
+ workflow=workflow_log_message,
1210
+ exc=err_to_str(exc),
1211
+ )
1212
+
1213
+ pipeline_state, _, _ = project.get_run_status(run)
1214
+ context.log_result(key="workflow_state", value=pipeline_state, commit=True)
1215
+ if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
1216
+ raise RuntimeError(
1217
+ f"Workflow {workflow_log_message} failed, state={pipeline_state}"
1218
+ )
1219
+
1220
+
1221
+ def import_remote_project(
1222
+ context: mlrun.execution.MLClientCtx,
1223
+ url: str = None,
1224
+ project_name: str = "",
1225
+ init_git: bool = None,
1226
+ subpath: str = None,
1227
+ clone: bool = False,
1228
+ save: bool = True,
1229
+ project_context: str = None,
1230
+ ):
1231
+ """
1232
+ This function loads a project from a given remote source.
1233
+
1234
+ :param context: mlrun context.
1235
+ :param url: remote url that represents the project's source.
1236
+ See 'mlrun.load_project()' for details
1237
+ :param project_name: project name
1238
+ :param init_git: if True, will git init the context dir
1239
+ :param subpath: project subpath (within the archive)
1240
+ :param clone: if True, always clone (delete any existing content)
1241
+ :param save: whether to save the created project and artifact in the DB
1242
+ :param project_context: project context path (used for loading the project)
1243
+ """
1244
+ project = mlrun.load_project(
1245
+ context=project_context or f"./{project_name}",
1246
+ url=url,
1247
+ name=project_name,
1248
+ init_git=init_git,
1249
+ subpath=subpath,
1250
+ clone=clone,
1251
+ save=save,
1252
+ sync_functions=True,
1253
+ )
1254
+
1255
+ context.logger.info(f"Loaded project {project.name} successfully")