mlrun 1.7.0rc13__py3-none-any.whl → 1.7.0rc21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (156) hide show
  1. mlrun/__init__.py +10 -1
  2. mlrun/__main__.py +23 -111
  3. mlrun/alerts/__init__.py +15 -0
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +36 -253
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +46 -42
  10. mlrun/artifacts/model.py +9 -141
  11. mlrun/artifacts/plots.py +14 -375
  12. mlrun/common/constants.py +65 -3
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{runtimes/mpijob/v1alpha1.py → common/formatters/artifact.py} +6 -14
  15. mlrun/common/formatters/base.py +113 -0
  16. mlrun/common/formatters/function.py +46 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +10 -5
  21. mlrun/common/schemas/alert.py +92 -11
  22. mlrun/common/schemas/api_gateway.py +56 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +2 -0
  25. mlrun/common/schemas/client_spec.py +1 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/model_monitoring/__init__.py +15 -3
  29. mlrun/common/schemas/model_monitoring/constants.py +58 -7
  30. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  31. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  32. mlrun/common/schemas/pipeline.py +0 -9
  33. mlrun/common/schemas/project.py +6 -11
  34. mlrun/common/types.py +1 -0
  35. mlrun/config.py +36 -8
  36. mlrun/data_types/to_pandas.py +9 -9
  37. mlrun/datastore/base.py +41 -9
  38. mlrun/datastore/datastore.py +6 -2
  39. mlrun/datastore/datastore_profile.py +56 -4
  40. mlrun/datastore/hdfs.py +5 -0
  41. mlrun/datastore/inmem.py +2 -2
  42. mlrun/datastore/redis.py +2 -2
  43. mlrun/datastore/s3.py +5 -0
  44. mlrun/datastore/sources.py +147 -7
  45. mlrun/datastore/store_resources.py +7 -7
  46. mlrun/datastore/targets.py +129 -9
  47. mlrun/datastore/utils.py +42 -0
  48. mlrun/datastore/v3io.py +1 -1
  49. mlrun/db/auth_utils.py +152 -0
  50. mlrun/db/base.py +55 -11
  51. mlrun/db/httpdb.py +346 -107
  52. mlrun/db/nopdb.py +52 -10
  53. mlrun/errors.py +11 -0
  54. mlrun/execution.py +24 -9
  55. mlrun/feature_store/__init__.py +0 -2
  56. mlrun/feature_store/api.py +12 -47
  57. mlrun/feature_store/feature_set.py +9 -0
  58. mlrun/feature_store/feature_vector.py +8 -0
  59. mlrun/feature_store/ingestion.py +7 -6
  60. mlrun/feature_store/retrieval/base.py +9 -4
  61. mlrun/feature_store/retrieval/conversion.py +9 -9
  62. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  63. mlrun/feature_store/retrieval/job.py +9 -3
  64. mlrun/feature_store/retrieval/local_merger.py +2 -0
  65. mlrun/feature_store/retrieval/spark_merger.py +16 -0
  66. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  67. mlrun/frameworks/parallel_coordinates.py +2 -1
  68. mlrun/frameworks/tf_keras/__init__.py +4 -1
  69. mlrun/k8s_utils.py +10 -11
  70. mlrun/launcher/base.py +4 -3
  71. mlrun/launcher/client.py +5 -3
  72. mlrun/launcher/local.py +8 -2
  73. mlrun/launcher/remote.py +8 -2
  74. mlrun/lists.py +6 -2
  75. mlrun/model.py +62 -20
  76. mlrun/model_monitoring/__init__.py +1 -1
  77. mlrun/model_monitoring/api.py +41 -18
  78. mlrun/model_monitoring/application.py +5 -305
  79. mlrun/model_monitoring/applications/__init__.py +11 -0
  80. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  81. mlrun/model_monitoring/applications/base.py +280 -0
  82. mlrun/model_monitoring/applications/context.py +214 -0
  83. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  84. mlrun/model_monitoring/applications/histogram_data_drift.py +132 -91
  85. mlrun/model_monitoring/applications/results.py +99 -0
  86. mlrun/model_monitoring/controller.py +3 -1
  87. mlrun/model_monitoring/db/__init__.py +2 -0
  88. mlrun/model_monitoring/db/stores/__init__.py +0 -2
  89. mlrun/model_monitoring/db/stores/base/store.py +22 -37
  90. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +43 -21
  91. mlrun/model_monitoring/db/stores/sqldb/models/base.py +39 -8
  92. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +27 -7
  93. mlrun/model_monitoring/db/stores/sqldb/models/sqlite.py +5 -0
  94. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +246 -224
  95. mlrun/model_monitoring/db/stores/v3io_kv/kv_store.py +232 -216
  96. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  97. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  98. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  99. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  100. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  101. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  102. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  103. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  104. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  105. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +636 -0
  106. mlrun/model_monitoring/evidently_application.py +6 -118
  107. mlrun/model_monitoring/helpers.py +46 -1
  108. mlrun/model_monitoring/model_endpoint.py +3 -2
  109. mlrun/model_monitoring/stream_processing.py +57 -216
  110. mlrun/model_monitoring/writer.py +134 -124
  111. mlrun/package/utils/_formatter.py +2 -2
  112. mlrun/platforms/__init__.py +10 -9
  113. mlrun/platforms/iguazio.py +21 -202
  114. mlrun/projects/operations.py +19 -12
  115. mlrun/projects/pipelines.py +103 -109
  116. mlrun/projects/project.py +377 -137
  117. mlrun/render.py +15 -14
  118. mlrun/run.py +16 -47
  119. mlrun/runtimes/__init__.py +6 -3
  120. mlrun/runtimes/base.py +8 -7
  121. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  122. mlrun/runtimes/funcdoc.py +0 -28
  123. mlrun/runtimes/kubejob.py +2 -1
  124. mlrun/runtimes/local.py +5 -2
  125. mlrun/runtimes/mpijob/__init__.py +0 -20
  126. mlrun/runtimes/mpijob/v1.py +1 -1
  127. mlrun/runtimes/nuclio/api_gateway.py +440 -208
  128. mlrun/runtimes/nuclio/application/application.py +170 -8
  129. mlrun/runtimes/nuclio/function.py +39 -49
  130. mlrun/runtimes/pod.py +21 -41
  131. mlrun/runtimes/remotesparkjob.py +9 -3
  132. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  133. mlrun/runtimes/utils.py +6 -45
  134. mlrun/serving/server.py +2 -1
  135. mlrun/serving/states.py +53 -2
  136. mlrun/serving/v2_serving.py +5 -1
  137. mlrun/track/tracker.py +2 -1
  138. mlrun/utils/async_http.py +25 -5
  139. mlrun/utils/helpers.py +107 -75
  140. mlrun/utils/logger.py +39 -7
  141. mlrun/utils/notifications/notification/__init__.py +14 -9
  142. mlrun/utils/notifications/notification/base.py +1 -1
  143. mlrun/utils/notifications/notification/slack.py +61 -13
  144. mlrun/utils/notifications/notification/webhook.py +1 -1
  145. mlrun/utils/notifications/notification_pusher.py +147 -16
  146. mlrun/utils/regex.py +9 -0
  147. mlrun/utils/v3io_clients.py +0 -1
  148. mlrun/utils/version/version.json +2 -2
  149. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/METADATA +14 -6
  150. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/RECORD +154 -133
  151. mlrun/kfpops.py +0 -865
  152. mlrun/platforms/other.py +0 -305
  153. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/LICENSE +0 -0
  154. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/WHEEL +0 -0
  155. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/entry_points.txt +0 -0
  156. {mlrun-1.7.0rc13.dist-info → mlrun-1.7.0rc21.dist-info}/top_level.txt +0 -0
@@ -15,9 +15,10 @@
15
15
  import warnings
16
16
  from typing import Optional, Union
17
17
 
18
- import kfp
18
+ from mlrun_pipelines.models import PipelineNodeWrapper
19
19
 
20
20
  import mlrun
21
+ import mlrun.common.constants as mlrun_constants
21
22
  from mlrun.utils import hub_prefix
22
23
 
23
24
  from .pipelines import enrich_function_object, pipeline_context
@@ -76,7 +77,7 @@ def run_function(
76
77
  notifications: list[mlrun.model.Notification] = None,
77
78
  returns: Optional[list[Union[str, dict[str, str]]]] = None,
78
79
  builder_env: Optional[list] = None,
79
- ) -> Union[mlrun.model.RunObject, kfp.dsl.ContainerOp]:
80
+ ) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
80
81
  """Run a local or remote task as part of a local/kubeflow pipeline
81
82
 
82
83
  run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
@@ -86,7 +87,7 @@ def run_function(
86
87
  when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
87
88
  e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
88
89
  project runs provide additional notifications/reporting and exception handling.
89
- inside a Kubeflow pipeline (KFP) run_function() generates KFP "ContainerOps" which are used to form a DAG
90
+ inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
90
91
  some behavior may differ between regular runs and deferred KFP runs.
91
92
 
92
93
  example (use with function object)::
@@ -166,7 +167,7 @@ def run_function(
166
167
  artifact type can be given there. The artifact key must appear in the dictionary as
167
168
  "key": "the_key".
168
169
  :param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
169
- :return: MLRun RunObject or KubeFlow containerOp
170
+ :return: MLRun RunObject or PipelineNodeWrapper
170
171
  """
171
172
  engine, function = _get_engine_and_function(function, project_object)
172
173
  task = mlrun.new_task(
@@ -190,7 +191,9 @@ def run_function(
190
191
  local = pipeline_context.is_run_local(local)
191
192
  task.metadata.labels = task.metadata.labels or labels or {}
192
193
  if pipeline_context.workflow_id:
193
- task.metadata.labels["workflow"] = pipeline_context.workflow_id
194
+ task.metadata.labels[mlrun_constants.MLRunInternalLabels.workflow] = (
195
+ pipeline_context.workflow_id
196
+ )
194
197
  if function.kind == "local":
195
198
  command, function = mlrun.run.load_func_code(function)
196
199
  function.spec.command = command
@@ -225,9 +228,9 @@ def run_function(
225
228
  class BuildStatus:
226
229
  """returned status from build operation"""
227
230
 
228
- def __init__(self, ready, outputs={}, function=None):
231
+ def __init__(self, ready, outputs=None, function=None):
229
232
  self.ready = ready
230
- self.outputs = outputs
233
+ self.outputs = outputs or {}
231
234
  self.function = function
232
235
 
233
236
  def after(self, step):
@@ -254,7 +257,7 @@ def build_function(
254
257
  overwrite_build_params: bool = False,
255
258
  extra_args: str = None,
256
259
  force_build: bool = False,
257
- ) -> Union[BuildStatus, kfp.dsl.ContainerOp]:
260
+ ) -> Union[BuildStatus, PipelineNodeWrapper]:
258
261
  """deploy ML function, build container with its dependencies
259
262
 
260
263
  :param function: Name of the function (in the project) or function object
@@ -294,7 +297,11 @@ def build_function(
294
297
  if overwrite_build_params:
295
298
  function.spec.build.commands = None
296
299
  if requirements or requirements_file:
297
- function.with_requirements(requirements, requirements_file, overwrite=True)
300
+ function.with_requirements(
301
+ requirements=requirements,
302
+ requirements_file=requirements_file,
303
+ overwrite=True,
304
+ )
298
305
  if commands:
299
306
  function.with_commands(commands)
300
307
  return function.deploy_step(
@@ -336,9 +343,9 @@ def build_function(
336
343
  class DeployStatus:
337
344
  """returned status from deploy operation"""
338
345
 
339
- def __init__(self, state, outputs={}, function=None):
346
+ def __init__(self, state, outputs=None, function=None):
340
347
  self.state = state
341
- self.outputs = outputs
348
+ self.outputs = outputs or {}
342
349
  self.function = function
343
350
 
344
351
  def after(self, step):
@@ -358,7 +365,7 @@ def deploy_function(
358
365
  builder_env: dict = None,
359
366
  project_object=None,
360
367
  mock: bool = None,
361
- ) -> Union[DeployStatus, kfp.dsl.ContainerOp]:
368
+ ) -> Union[DeployStatus, PipelineNodeWrapper]:
362
369
  """deploy real-time (nuclio based) functions
363
370
 
364
371
  :param function: name of the function (in the project) or function object
@@ -13,24 +13,26 @@
13
13
  # limitations under the License.
14
14
  import abc
15
15
  import builtins
16
+ import http
16
17
  import importlib.util as imputil
17
18
  import os
18
19
  import tempfile
19
20
  import typing
20
21
  import uuid
21
22
 
22
- import kfp.compiler
23
- from kfp import dsl
23
+ import mlrun_pipelines.common.models
24
+ import mlrun_pipelines.patcher
24
25
  from kfp.compiler import compiler
26
+ from mlrun_pipelines.helpers import new_pipe_metadata
25
27
 
26
28
  import mlrun
29
+ import mlrun.common.runtimes.constants
27
30
  import mlrun.common.schemas
28
31
  import mlrun.utils.notifications
29
32
  from mlrun.errors import err_to_str
30
33
  from mlrun.utils import (
31
34
  get_ui_url,
32
35
  logger,
33
- new_pipe_metadata,
34
36
  normalize_workflow_name,
35
37
  retry_until_successful,
36
38
  )
@@ -300,72 +302,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
300
302
  }
301
303
 
302
304
 
303
- # When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
304
- # converts it to a k8s object. As part of the flow in the Compile.compile() method,
305
- # we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
306
- # Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
307
- # I ran across the following problem when seeking for a method to set the priority_class_name:
308
- # https://github.com/kubeflow/pipelines/issues/3594
309
- # When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
310
- # to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
311
- # https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
312
- def _create_enriched_mlrun_workflow(
313
- self,
314
- pipeline_func: typing.Callable,
315
- pipeline_name: typing.Optional[str] = None,
316
- pipeline_description: typing.Optional[str] = None,
317
- params_list: typing.Optional[list[dsl.PipelineParam]] = None,
318
- pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
319
- ):
320
- """Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
321
- workflow = self._original_create_workflow(
322
- pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
323
- )
324
- # We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
325
- # that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
326
- # we know can be raised.
327
- try:
328
- functions = []
329
- if pipeline_context.functions:
330
- try:
331
- functions = pipeline_context.functions.values()
332
- except Exception as err:
333
- logger.debug(
334
- "Unable to retrieve project functions, not enriching workflow with mlrun",
335
- error=err_to_str(err),
336
- )
337
- return workflow
338
-
339
- # enrich each pipeline step with your desire k8s attribute
340
- for kfp_step_template in workflow["spec"]["templates"]:
341
- if kfp_step_template.get("container"):
342
- for function_obj in functions:
343
- # we condition within each function since the comparison between the function and
344
- # the kfp pod may change depending on the attribute type.
345
- _set_function_attribute_on_kfp_pod(
346
- kfp_step_template,
347
- function_obj,
348
- "PriorityClassName",
349
- "priority_class_name",
350
- )
351
- _enrich_kfp_pod_security_context(
352
- kfp_step_template,
353
- function_obj,
354
- )
355
- except mlrun.errors.MLRunInvalidArgumentError:
356
- raise
357
- except Exception as err:
358
- logger.debug(
359
- "Something in the enrichment of kfp pods failed", error=err_to_str(err)
360
- )
361
- return workflow
362
-
363
-
364
- # patching function as class method
365
- kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
366
- kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
367
-
368
-
369
305
  def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
370
306
  project_instance, name, tag, hash_key = parse_versioned_object_uri(
371
307
  key, project.metadata.name
@@ -436,7 +372,7 @@ class _PipelineRunStatus:
436
372
  engine: type["_PipelineRunner"],
437
373
  project: "mlrun.projects.MlrunProject",
438
374
  workflow: WorkflowSpec = None,
439
- state: str = "",
375
+ state: mlrun_pipelines.common.models.RunStatuses = "",
440
376
  exc: Exception = None,
441
377
  ):
442
378
  """
@@ -456,7 +392,10 @@ class _PipelineRunStatus:
456
392
 
457
393
  @property
458
394
  def state(self):
459
- if self._state not in mlrun.run.RunStatuses.stable_statuses():
395
+ if (
396
+ self._state
397
+ not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
398
+ ):
460
399
  self._state = self._engine.get_state(self.run_id, self.project)
461
400
  return self._state
462
401
 
@@ -505,6 +444,7 @@ class _PipelineRunner(abc.ABC):
505
444
  namespace=None,
506
445
  source=None,
507
446
  notifications: list[mlrun.model.Notification] = None,
447
+ send_start_notification: bool = True,
508
448
  ) -> _PipelineRunStatus:
509
449
  pass
510
450
 
@@ -521,7 +461,7 @@ class _PipelineRunner(abc.ABC):
521
461
  @staticmethod
522
462
  def _get_handler(workflow_handler, workflow_spec, project, secrets):
523
463
  if not (workflow_handler and callable(workflow_handler)):
524
- workflow_file = workflow_spec.get_source_file(project.spec.context)
464
+ workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
525
465
  workflow_handler = create_pipeline(
526
466
  project,
527
467
  workflow_file,
@@ -541,6 +481,7 @@ class _PipelineRunner(abc.ABC):
541
481
  timeout=None,
542
482
  expected_statuses=None,
543
483
  notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
484
+ **kwargs,
544
485
  ):
545
486
  pass
546
487
 
@@ -553,7 +494,7 @@ class _KFPRunner(_PipelineRunner):
553
494
  @classmethod
554
495
  def save(cls, project, workflow_spec: WorkflowSpec, target, artifact_path=None):
555
496
  pipeline_context.set(project, workflow_spec)
556
- workflow_file = workflow_spec.get_source_file(project.spec.context)
497
+ workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
557
498
  functions = FunctionsDict(project)
558
499
  pipeline = create_pipeline(
559
500
  project,
@@ -583,6 +524,7 @@ class _KFPRunner(_PipelineRunner):
583
524
  namespace=None,
584
525
  source=None,
585
526
  notifications: list[mlrun.model.Notification] = None,
527
+ send_start_notification: bool = True,
586
528
  ) -> _PipelineRunStatus:
587
529
  pipeline_context.set(project, workflow_spec)
588
530
  workflow_handler = _PipelineRunner._get_handler(
@@ -631,13 +573,13 @@ class _KFPRunner(_PipelineRunner):
631
573
  func_name=func.metadata.name,
632
574
  exc_info=err_to_str(exc),
633
575
  )
634
-
635
- project.notifiers.push_pipeline_start_message(
636
- project.metadata.name,
637
- project.get_param("commit_id", None),
638
- run_id,
639
- True,
640
- )
576
+ if send_start_notification:
577
+ project.notifiers.push_pipeline_start_message(
578
+ project.metadata.name,
579
+ project.get_param("commit_id", None),
580
+ run_id,
581
+ True,
582
+ )
641
583
  pipeline_context.clear()
642
584
  return _PipelineRunStatus(run_id, cls, project=project, workflow=workflow_spec)
643
585
 
@@ -672,6 +614,7 @@ class _KFPRunner(_PipelineRunner):
672
614
  timeout=None,
673
615
  expected_statuses=None,
674
616
  notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
617
+ **kwargs,
675
618
  ):
676
619
  if timeout is None:
677
620
  timeout = 60 * 60
@@ -727,6 +670,7 @@ class _LocalRunner(_PipelineRunner):
727
670
  namespace=None,
728
671
  source=None,
729
672
  notifications: list[mlrun.model.Notification] = None,
673
+ send_start_notification: bool = True,
730
674
  ) -> _PipelineRunStatus:
731
675
  pipeline_context.set(project, workflow_spec)
732
676
  workflow_handler = _PipelineRunner._get_handler(
@@ -747,13 +691,15 @@ class _LocalRunner(_PipelineRunner):
747
691
  original_source = project.spec.source
748
692
  project.set_source(source=source)
749
693
  pipeline_context.workflow_artifact_path = artifact_path
750
- project.notifiers.push_pipeline_start_message(
751
- project.metadata.name, pipeline_id=workflow_id
752
- )
694
+
695
+ if send_start_notification:
696
+ project.notifiers.push_pipeline_start_message(
697
+ project.metadata.name, pipeline_id=workflow_id
698
+ )
753
699
  err = None
754
700
  try:
755
701
  workflow_handler(**workflow_spec.args)
756
- state = mlrun.run.RunStatuses.succeeded
702
+ state = mlrun_pipelines.common.models.RunStatuses.succeeded
757
703
  except Exception as exc:
758
704
  err = exc
759
705
  logger.exception("Workflow run failed")
@@ -761,7 +707,7 @@ class _LocalRunner(_PipelineRunner):
761
707
  f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
762
708
  mlrun.common.schemas.NotificationSeverity.ERROR,
763
709
  )
764
- state = mlrun.run.RunStatuses.failed
710
+ state = mlrun_pipelines.common.models.RunStatuses.failed
765
711
  mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
766
712
  project.notifiers.push_pipeline_run_results(
767
713
  pipeline_context.runs_map.values(), state=state
@@ -795,6 +741,7 @@ class _LocalRunner(_PipelineRunner):
795
741
  timeout=None,
796
742
  expected_statuses=None,
797
743
  notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
744
+ **kwargs,
798
745
  ):
799
746
  pass
800
747
 
@@ -816,13 +763,21 @@ class _RemoteRunner(_PipelineRunner):
816
763
  namespace: str = None,
817
764
  source: str = None,
818
765
  notifications: list[mlrun.model.Notification] = None,
766
+ send_start_notification: bool = True,
819
767
  ) -> typing.Optional[_PipelineRunStatus]:
820
768
  workflow_name = normalize_workflow_name(name=name, project_name=project.name)
821
769
  workflow_id = None
822
770
 
823
771
  # for start message, fallback to old notification behavior
824
- for notification in notifications or []:
825
- project.notifiers.add_notification(notification.kind, notification.params)
772
+ if send_start_notification:
773
+ for notification in notifications or []:
774
+ project.notifiers.add_notification(
775
+ notification.kind, notification.params
776
+ )
777
+ # if a notification with `when=running` is provided, it will be used explicitly and others
778
+ # will be ignored
779
+ if "running" in notification.when:
780
+ break
826
781
 
827
782
  # The returned engine for this runner is the engine of the workflow.
828
783
  # In this way wait_for_completion/get_run_status would be executed by the correct pipeline runner.
@@ -882,17 +837,33 @@ class _RemoteRunner(_PipelineRunner):
882
837
  get_workflow_id_timeout=get_workflow_id_timeout,
883
838
  )
884
839
 
840
+ def _get_workflow_id_or_bail():
841
+ try:
842
+ return run_db.get_workflow_id(
843
+ project=project.name,
844
+ name=workflow_response.name,
845
+ run_id=workflow_response.run_id,
846
+ engine=workflow_spec.engine,
847
+ )
848
+ except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
849
+ # fail fast on specific errors
850
+ if get_wf_exc.error_status_code in [
851
+ http.HTTPStatus.PRECONDITION_FAILED
852
+ ]:
853
+ raise mlrun.errors.MLRunFatalFailureError(
854
+ original_exception=get_wf_exc
855
+ )
856
+
857
+ # raise for a retry (on other errors)
858
+ raise
859
+
885
860
  # Getting workflow id from run:
886
861
  response = retry_until_successful(
887
862
  1,
888
863
  get_workflow_id_timeout,
889
864
  logger,
890
865
  False,
891
- run_db.get_workflow_id,
892
- project=project.name,
893
- name=workflow_response.name,
894
- run_id=workflow_response.run_id,
895
- engine=workflow_spec.engine,
866
+ _get_workflow_id_or_bail,
896
867
  )
897
868
  workflow_id = response.workflow_id
898
869
  # After fetching the workflow_id the workflow executed successfully
@@ -904,9 +875,9 @@ class _RemoteRunner(_PipelineRunner):
904
875
  f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
905
876
  mlrun.common.schemas.NotificationSeverity.ERROR,
906
877
  )
907
- state = mlrun.run.RunStatuses.failed
878
+ state = mlrun_pipelines.common.models.RunStatuses.failed
908
879
  else:
909
- state = mlrun.run.RunStatuses.succeeded
880
+ state = mlrun_pipelines.common.models.RunStatuses.running
910
881
  project.notifiers.push_pipeline_start_message(
911
882
  project.metadata.name,
912
883
  )
@@ -923,24 +894,47 @@ class _RemoteRunner(_PipelineRunner):
923
894
  @staticmethod
924
895
  def get_run_status(
925
896
  project,
926
- run,
897
+ run: _PipelineRunStatus,
927
898
  timeout=None,
928
899
  expected_statuses=None,
929
900
  notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
901
+ inner_engine: type[_PipelineRunner] = None,
930
902
  ):
931
- # ignore notifiers, as they are handled by the remote pipeline notifications,
932
- # so overriding with CustomNotificationPusher with empty list of notifiers
933
- state, had_errors, text = _KFPRunner.get_run_status(
934
- project,
935
- run,
936
- timeout,
937
- expected_statuses,
938
- notifiers=mlrun.utils.notifications.CustomNotificationPusher([]),
939
- )
903
+ inner_engine = inner_engine or _KFPRunner
904
+ if inner_engine.engine == _KFPRunner.engine:
905
+ # ignore notifiers for remote notifications, as they are handled by the remote pipeline notifications,
906
+ # so overriding with CustomNotificationPusher with empty list of notifiers or only local notifiers
907
+ local_project_notifiers = list(
908
+ set(mlrun.utils.notifications.NotificationTypes.local()).intersection(
909
+ set(project.notifiers.notifications.keys())
910
+ )
911
+ )
912
+ notifiers = mlrun.utils.notifications.CustomNotificationPusher(
913
+ local_project_notifiers
914
+ )
915
+ return _KFPRunner.get_run_status(
916
+ project,
917
+ run,
918
+ timeout,
919
+ expected_statuses,
920
+ notifiers=notifiers,
921
+ )
940
922
 
941
- # indicate the pipeline status since we don't push the notifications in the remote runner
942
- logger.info(text)
943
- return state, had_errors, text
923
+ elif inner_engine.engine == _LocalRunner.engine:
924
+ mldb = mlrun.db.get_run_db(secrets=project._secrets)
925
+ pipeline_runner_run = mldb.read_run(run.run_id, project=project.name)
926
+ pipeline_runner_run = mlrun.run.RunObject.from_dict(pipeline_runner_run)
927
+ pipeline_runner_run.logs(db=mldb)
928
+ pipeline_runner_run.refresh()
929
+ run._state = mlrun.common.runtimes.constants.RunStates.run_state_to_pipeline_run_status(
930
+ pipeline_runner_run.status.state
931
+ )
932
+ run._exc = pipeline_runner_run.status.error
933
+
934
+ else:
935
+ raise mlrun.errors.MLRunInvalidArgumentError(
936
+ f"Unsupported inner runner engine: {inner_engine.engine}"
937
+ )
944
938
 
945
939
 
946
940
  def create_pipeline(project, pipeline, functions, secrets=None, handler=None):
@@ -1099,7 +1093,7 @@ def load_and_run(
1099
1093
  context.log_result(key="workflow_id", value=run.run_id)
1100
1094
  context.log_result(key="engine", value=run._engine.engine, commit=True)
1101
1095
 
1102
- if run.state == mlrun.run.RunStatuses.failed:
1096
+ if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
1103
1097
  raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
1104
1098
 
1105
1099
  if wait_for_completion:
@@ -1114,7 +1108,7 @@ def load_and_run(
1114
1108
 
1115
1109
  pipeline_state, _, _ = project.get_run_status(run)
1116
1110
  context.log_result(key="workflow_state", value=pipeline_state, commit=True)
1117
- if pipeline_state != mlrun.run.RunStatuses.succeeded:
1111
+ if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
1118
1112
  raise RuntimeError(
1119
1113
  f"Workflow {workflow_log_message} failed, state={pipeline_state}"
1120
1114
  )