mlrun 1.7.0rc4__py3-none-any.whl → 1.7.0rc20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (200) hide show
  1. mlrun/__init__.py +11 -1
  2. mlrun/__main__.py +25 -111
  3. mlrun/{datastore/helpers.py → alerts/__init__.py} +2 -5
  4. mlrun/alerts/alert.py +144 -0
  5. mlrun/api/schemas/__init__.py +4 -3
  6. mlrun/artifacts/__init__.py +8 -3
  7. mlrun/artifacts/base.py +38 -254
  8. mlrun/artifacts/dataset.py +9 -190
  9. mlrun/artifacts/manager.py +41 -47
  10. mlrun/artifacts/model.py +30 -158
  11. mlrun/artifacts/plots.py +23 -380
  12. mlrun/common/constants.py +68 -0
  13. mlrun/common/formatters/__init__.py +19 -0
  14. mlrun/{model_monitoring/stores/models/sqlite.py → common/formatters/artifact.py} +6 -8
  15. mlrun/common/formatters/base.py +78 -0
  16. mlrun/common/formatters/function.py +41 -0
  17. mlrun/common/formatters/pipeline.py +53 -0
  18. mlrun/common/formatters/project.py +51 -0
  19. mlrun/{runtimes → common/runtimes}/constants.py +32 -4
  20. mlrun/common/schemas/__init__.py +25 -4
  21. mlrun/common/schemas/alert.py +203 -0
  22. mlrun/common/schemas/api_gateway.py +148 -0
  23. mlrun/common/schemas/artifact.py +15 -5
  24. mlrun/common/schemas/auth.py +8 -2
  25. mlrun/common/schemas/client_spec.py +2 -0
  26. mlrun/common/schemas/frontend_spec.py +1 -0
  27. mlrun/common/schemas/function.py +4 -0
  28. mlrun/common/schemas/hub.py +7 -9
  29. mlrun/common/schemas/model_monitoring/__init__.py +19 -3
  30. mlrun/common/schemas/model_monitoring/constants.py +96 -26
  31. mlrun/common/schemas/model_monitoring/grafana.py +9 -5
  32. mlrun/common/schemas/model_monitoring/model_endpoints.py +86 -2
  33. mlrun/{runtimes/mpijob/v1alpha1.py → common/schemas/pagination.py} +10 -13
  34. mlrun/common/schemas/pipeline.py +0 -9
  35. mlrun/common/schemas/project.py +22 -21
  36. mlrun/common/types.py +7 -1
  37. mlrun/config.py +87 -19
  38. mlrun/data_types/data_types.py +4 -0
  39. mlrun/data_types/to_pandas.py +9 -9
  40. mlrun/datastore/__init__.py +5 -8
  41. mlrun/datastore/alibaba_oss.py +130 -0
  42. mlrun/datastore/azure_blob.py +4 -5
  43. mlrun/datastore/base.py +69 -30
  44. mlrun/datastore/datastore.py +10 -2
  45. mlrun/datastore/datastore_profile.py +90 -6
  46. mlrun/datastore/google_cloud_storage.py +1 -1
  47. mlrun/datastore/hdfs.py +5 -0
  48. mlrun/datastore/inmem.py +2 -2
  49. mlrun/datastore/redis.py +2 -2
  50. mlrun/datastore/s3.py +5 -0
  51. mlrun/datastore/snowflake_utils.py +43 -0
  52. mlrun/datastore/sources.py +172 -44
  53. mlrun/datastore/store_resources.py +7 -7
  54. mlrun/datastore/targets.py +285 -41
  55. mlrun/datastore/utils.py +68 -5
  56. mlrun/datastore/v3io.py +27 -50
  57. mlrun/db/auth_utils.py +152 -0
  58. mlrun/db/base.py +149 -14
  59. mlrun/db/factory.py +1 -1
  60. mlrun/db/httpdb.py +608 -178
  61. mlrun/db/nopdb.py +191 -7
  62. mlrun/errors.py +11 -0
  63. mlrun/execution.py +37 -20
  64. mlrun/feature_store/__init__.py +0 -2
  65. mlrun/feature_store/api.py +21 -52
  66. mlrun/feature_store/feature_set.py +48 -23
  67. mlrun/feature_store/feature_vector.py +2 -1
  68. mlrun/feature_store/ingestion.py +7 -6
  69. mlrun/feature_store/retrieval/base.py +9 -4
  70. mlrun/feature_store/retrieval/conversion.py +9 -9
  71. mlrun/feature_store/retrieval/dask_merger.py +2 -0
  72. mlrun/feature_store/retrieval/job.py +9 -3
  73. mlrun/feature_store/retrieval/local_merger.py +2 -0
  74. mlrun/feature_store/retrieval/spark_merger.py +34 -24
  75. mlrun/feature_store/steps.py +30 -19
  76. mlrun/features.py +4 -13
  77. mlrun/frameworks/_dl_common/loggers/tensorboard_logger.py +7 -12
  78. mlrun/frameworks/auto_mlrun/auto_mlrun.py +2 -2
  79. mlrun/frameworks/lgbm/__init__.py +1 -1
  80. mlrun/frameworks/lgbm/callbacks/callback.py +2 -4
  81. mlrun/frameworks/lgbm/model_handler.py +1 -1
  82. mlrun/frameworks/parallel_coordinates.py +2 -1
  83. mlrun/frameworks/pytorch/__init__.py +2 -2
  84. mlrun/frameworks/sklearn/__init__.py +1 -1
  85. mlrun/frameworks/tf_keras/__init__.py +5 -2
  86. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +1 -1
  87. mlrun/frameworks/tf_keras/mlrun_interface.py +2 -2
  88. mlrun/frameworks/xgboost/__init__.py +1 -1
  89. mlrun/k8s_utils.py +10 -11
  90. mlrun/launcher/__init__.py +1 -1
  91. mlrun/launcher/base.py +6 -5
  92. mlrun/launcher/client.py +8 -6
  93. mlrun/launcher/factory.py +1 -1
  94. mlrun/launcher/local.py +9 -3
  95. mlrun/launcher/remote.py +9 -3
  96. mlrun/lists.py +6 -2
  97. mlrun/model.py +58 -19
  98. mlrun/model_monitoring/__init__.py +1 -1
  99. mlrun/model_monitoring/api.py +127 -301
  100. mlrun/model_monitoring/application.py +5 -296
  101. mlrun/model_monitoring/applications/__init__.py +11 -0
  102. mlrun/model_monitoring/applications/_application_steps.py +157 -0
  103. mlrun/model_monitoring/applications/base.py +282 -0
  104. mlrun/model_monitoring/applications/context.py +214 -0
  105. mlrun/model_monitoring/applications/evidently_base.py +211 -0
  106. mlrun/model_monitoring/applications/histogram_data_drift.py +224 -93
  107. mlrun/model_monitoring/applications/results.py +99 -0
  108. mlrun/model_monitoring/controller.py +30 -36
  109. mlrun/model_monitoring/db/__init__.py +18 -0
  110. mlrun/model_monitoring/{stores → db/stores}/__init__.py +43 -36
  111. mlrun/model_monitoring/db/stores/base/__init__.py +15 -0
  112. mlrun/model_monitoring/{stores/model_endpoint_store.py → db/stores/base/store.py} +58 -32
  113. mlrun/model_monitoring/db/stores/sqldb/__init__.py +13 -0
  114. mlrun/model_monitoring/db/stores/sqldb/models/__init__.py +71 -0
  115. mlrun/model_monitoring/{stores → db/stores/sqldb}/models/base.py +109 -5
  116. mlrun/model_monitoring/db/stores/sqldb/models/mysql.py +88 -0
  117. mlrun/model_monitoring/{stores/models/mysql.py → db/stores/sqldb/models/sqlite.py} +19 -13
  118. mlrun/model_monitoring/db/stores/sqldb/sql_store.py +684 -0
  119. mlrun/model_monitoring/db/stores/v3io_kv/__init__.py +13 -0
  120. mlrun/model_monitoring/{stores/kv_model_endpoint_store.py → db/stores/v3io_kv/kv_store.py} +302 -155
  121. mlrun/model_monitoring/db/tsdb/__init__.py +100 -0
  122. mlrun/model_monitoring/db/tsdb/base.py +329 -0
  123. mlrun/model_monitoring/db/tsdb/helpers.py +30 -0
  124. mlrun/model_monitoring/db/tsdb/tdengine/__init__.py +15 -0
  125. mlrun/model_monitoring/db/tsdb/tdengine/schemas.py +240 -0
  126. mlrun/model_monitoring/db/tsdb/tdengine/stream_graph_steps.py +45 -0
  127. mlrun/model_monitoring/db/tsdb/tdengine/tdengine_connector.py +397 -0
  128. mlrun/model_monitoring/db/tsdb/v3io/__init__.py +15 -0
  129. mlrun/model_monitoring/db/tsdb/v3io/stream_graph_steps.py +117 -0
  130. mlrun/model_monitoring/db/tsdb/v3io/v3io_connector.py +630 -0
  131. mlrun/model_monitoring/evidently_application.py +6 -118
  132. mlrun/model_monitoring/features_drift_table.py +34 -22
  133. mlrun/model_monitoring/helpers.py +100 -7
  134. mlrun/model_monitoring/model_endpoint.py +3 -2
  135. mlrun/model_monitoring/stream_processing.py +93 -228
  136. mlrun/model_monitoring/tracking_policy.py +7 -1
  137. mlrun/model_monitoring/writer.py +152 -124
  138. mlrun/package/packagers_manager.py +1 -0
  139. mlrun/package/utils/_formatter.py +2 -2
  140. mlrun/platforms/__init__.py +11 -10
  141. mlrun/platforms/iguazio.py +21 -202
  142. mlrun/projects/operations.py +30 -16
  143. mlrun/projects/pipelines.py +92 -99
  144. mlrun/projects/project.py +757 -268
  145. mlrun/render.py +15 -14
  146. mlrun/run.py +160 -162
  147. mlrun/runtimes/__init__.py +55 -3
  148. mlrun/runtimes/base.py +33 -19
  149. mlrun/runtimes/databricks_job/databricks_wrapper.py +1 -1
  150. mlrun/runtimes/funcdoc.py +0 -28
  151. mlrun/runtimes/kubejob.py +28 -122
  152. mlrun/runtimes/local.py +5 -2
  153. mlrun/runtimes/mpijob/__init__.py +0 -20
  154. mlrun/runtimes/mpijob/abstract.py +8 -8
  155. mlrun/runtimes/mpijob/v1.py +1 -1
  156. mlrun/runtimes/nuclio/__init__.py +1 -0
  157. mlrun/runtimes/nuclio/api_gateway.py +709 -0
  158. mlrun/runtimes/nuclio/application/__init__.py +15 -0
  159. mlrun/runtimes/nuclio/application/application.py +523 -0
  160. mlrun/runtimes/nuclio/application/reverse_proxy.go +95 -0
  161. mlrun/runtimes/nuclio/function.py +98 -58
  162. mlrun/runtimes/nuclio/serving.py +36 -42
  163. mlrun/runtimes/pod.py +196 -45
  164. mlrun/runtimes/remotesparkjob.py +1 -1
  165. mlrun/runtimes/sparkjob/spark3job.py +1 -1
  166. mlrun/runtimes/utils.py +6 -73
  167. mlrun/secrets.py +6 -2
  168. mlrun/serving/remote.py +2 -3
  169. mlrun/serving/routers.py +7 -4
  170. mlrun/serving/server.py +7 -8
  171. mlrun/serving/states.py +73 -43
  172. mlrun/serving/v2_serving.py +8 -7
  173. mlrun/track/tracker.py +2 -1
  174. mlrun/utils/async_http.py +25 -5
  175. mlrun/utils/helpers.py +141 -75
  176. mlrun/utils/http.py +1 -1
  177. mlrun/utils/logger.py +39 -7
  178. mlrun/utils/notifications/notification/__init__.py +14 -9
  179. mlrun/utils/notifications/notification/base.py +12 -0
  180. mlrun/utils/notifications/notification/console.py +2 -0
  181. mlrun/utils/notifications/notification/git.py +3 -1
  182. mlrun/utils/notifications/notification/ipython.py +2 -0
  183. mlrun/utils/notifications/notification/slack.py +101 -21
  184. mlrun/utils/notifications/notification/webhook.py +11 -1
  185. mlrun/utils/notifications/notification_pusher.py +147 -16
  186. mlrun/utils/retryer.py +3 -2
  187. mlrun/utils/v3io_clients.py +0 -1
  188. mlrun/utils/version/version.json +2 -2
  189. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/METADATA +33 -18
  190. mlrun-1.7.0rc20.dist-info/RECORD +353 -0
  191. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/WHEEL +1 -1
  192. mlrun/kfpops.py +0 -868
  193. mlrun/model_monitoring/batch.py +0 -974
  194. mlrun/model_monitoring/stores/models/__init__.py +0 -27
  195. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -382
  196. mlrun/platforms/other.py +0 -305
  197. mlrun-1.7.0rc4.dist-info/RECORD +0 -321
  198. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/LICENSE +0 -0
  199. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/entry_points.txt +0 -0
  200. {mlrun-1.7.0rc4.dist-info → mlrun-1.7.0rc20.dist-info}/top_level.txt +0 -0
@@ -15,9 +15,10 @@
15
15
  import warnings
16
16
  from typing import Optional, Union
17
17
 
18
- import kfp
18
+ from mlrun_pipelines.models import PipelineNodeWrapper
19
19
 
20
20
  import mlrun
21
+ import mlrun.common.constants as mlrun_constants
21
22
  from mlrun.utils import hub_prefix
22
23
 
23
24
  from .pipelines import enrich_function_object, pipeline_context
@@ -76,7 +77,7 @@ def run_function(
76
77
  notifications: list[mlrun.model.Notification] = None,
77
78
  returns: Optional[list[Union[str, dict[str, str]]]] = None,
78
79
  builder_env: Optional[list] = None,
79
- ) -> Union[mlrun.model.RunObject, kfp.dsl.ContainerOp]:
80
+ ) -> Union[mlrun.model.RunObject, PipelineNodeWrapper]:
80
81
  """Run a local or remote task as part of a local/kubeflow pipeline
81
82
 
82
83
  run_function() allow you to execute a function locally, on a remote cluster, or as part of an automated workflow
@@ -86,7 +87,7 @@ def run_function(
86
87
  when functions run as part of a workflow/pipeline (project.run()) some attributes can be set at the run level,
87
88
  e.g. local=True will run all the functions locally, setting artifact_path will direct all outputs to the same path.
88
89
  project runs provide additional notifications/reporting and exception handling.
89
- inside a Kubeflow pipeline (KFP) run_function() generates KFP "ContainerOps" which are used to form a DAG
90
+ inside a Kubeflow pipeline (KFP) run_function() generates KFP node (see PipelineNodeWrapper) which forms a DAG
90
91
  some behavior may differ between regular runs and deferred KFP runs.
91
92
 
92
93
  example (use with function object)::
@@ -95,8 +96,11 @@ def run_function(
95
96
  MODEL_CLASS = "sklearn.ensemble.RandomForestClassifier"
96
97
  DATA_PATH = "s3://bigdata/data.parquet"
97
98
  function = mlrun.import_function("hub://auto-trainer")
98
- run1 = run_function(function, params={"label_columns": LABELS, "model_class": MODEL_CLASS},
99
- inputs={"dataset": DATA_PATH})
99
+ run1 = run_function(
100
+ function,
101
+ params={"label_columns": LABELS, "model_class": MODEL_CLASS},
102
+ inputs={"dataset": DATA_PATH},
103
+ )
100
104
 
101
105
  example (use with project)::
102
106
 
@@ -115,8 +119,12 @@ def run_function(
115
119
  @dsl.pipeline(name="test pipeline", description="test")
116
120
  def my_pipe(url=""):
117
121
  run1 = run_function("loaddata", params={"url": url}, outputs=["data"])
118
- run2 = run_function("train", params={"label_columns": LABELS, "model_class": MODEL_CLASS},
119
- inputs={"dataset": run1.outputs["data"]})
122
+ run2 = run_function(
123
+ "train",
124
+ params={"label_columns": LABELS, "model_class": MODEL_CLASS},
125
+ inputs={"dataset": run1.outputs["data"]},
126
+ )
127
+
120
128
 
121
129
  project.run(workflow_handler=my_pipe, arguments={"param1": 7})
122
130
 
@@ -159,7 +167,7 @@ def run_function(
159
167
  artifact type can be given there. The artifact key must appear in the dictionary as
160
168
  "key": "the_key".
161
169
  :param builder_env: env vars dict for source archive config/credentials e.g. builder_env={"GIT_TOKEN": token}
162
- :return: MLRun RunObject or KubeFlow containerOp
170
+ :return: MLRun RunObject or PipelineNodeWrapper
163
171
  """
164
172
  engine, function = _get_engine_and_function(function, project_object)
165
173
  task = mlrun.new_task(
@@ -183,7 +191,9 @@ def run_function(
183
191
  local = pipeline_context.is_run_local(local)
184
192
  task.metadata.labels = task.metadata.labels or labels or {}
185
193
  if pipeline_context.workflow_id:
186
- task.metadata.labels["workflow"] = pipeline_context.workflow_id
194
+ task.metadata.labels[mlrun_constants.MLRunInternalLabels.workflow] = (
195
+ pipeline_context.workflow_id
196
+ )
187
197
  if function.kind == "local":
188
198
  command, function = mlrun.run.load_func_code(function)
189
199
  function.spec.command = command
@@ -218,9 +228,9 @@ def run_function(
218
228
  class BuildStatus:
219
229
  """returned status from build operation"""
220
230
 
221
- def __init__(self, ready, outputs={}, function=None):
231
+ def __init__(self, ready, outputs=None, function=None):
222
232
  self.ready = ready
223
- self.outputs = outputs
233
+ self.outputs = outputs or {}
224
234
  self.function = function
225
235
 
226
236
  def after(self, step):
@@ -247,7 +257,7 @@ def build_function(
247
257
  overwrite_build_params: bool = False,
248
258
  extra_args: str = None,
249
259
  force_build: bool = False,
250
- ) -> Union[BuildStatus, kfp.dsl.ContainerOp]:
260
+ ) -> Union[BuildStatus, PipelineNodeWrapper]:
251
261
  """deploy ML function, build container with its dependencies
252
262
 
253
263
  :param function: Name of the function (in the project) or function object
@@ -287,7 +297,11 @@ def build_function(
287
297
  if overwrite_build_params:
288
298
  function.spec.build.commands = None
289
299
  if requirements or requirements_file:
290
- function.with_requirements(requirements, requirements_file, overwrite=True)
300
+ function.with_requirements(
301
+ requirements=requirements,
302
+ requirements_file=requirements_file,
303
+ overwrite=True,
304
+ )
291
305
  if commands:
292
306
  function.with_commands(commands)
293
307
  return function.deploy_step(
@@ -329,9 +343,9 @@ def build_function(
329
343
  class DeployStatus:
330
344
  """returned status from deploy operation"""
331
345
 
332
- def __init__(self, state, outputs={}, function=None):
346
+ def __init__(self, state, outputs=None, function=None):
333
347
  self.state = state
334
- self.outputs = outputs
348
+ self.outputs = outputs or {}
335
349
  self.function = function
336
350
 
337
351
  def after(self, step):
@@ -351,7 +365,7 @@ def deploy_function(
351
365
  builder_env: dict = None,
352
366
  project_object=None,
353
367
  mock: bool = None,
354
- ) -> Union[DeployStatus, kfp.dsl.ContainerOp]:
368
+ ) -> Union[DeployStatus, PipelineNodeWrapper]:
355
369
  """deploy real-time (nuclio based) functions
356
370
 
357
371
  :param function: name of the function (in the project) or function object
@@ -13,24 +13,26 @@
13
13
  # limitations under the License.
14
14
  import abc
15
15
  import builtins
16
+ import http
16
17
  import importlib.util as imputil
17
18
  import os
18
19
  import tempfile
19
20
  import typing
20
21
  import uuid
21
22
 
22
- import kfp.compiler
23
- from kfp import dsl
23
+ import mlrun_pipelines.common.models
24
+ import mlrun_pipelines.patcher
24
25
  from kfp.compiler import compiler
26
+ from mlrun_pipelines.helpers import new_pipe_metadata
25
27
 
26
28
  import mlrun
29
+ import mlrun.common.runtimes.constants
27
30
  import mlrun.common.schemas
28
31
  import mlrun.utils.notifications
29
32
  from mlrun.errors import err_to_str
30
33
  from mlrun.utils import (
31
34
  get_ui_url,
32
35
  logger,
33
- new_pipe_metadata,
34
36
  normalize_workflow_name,
35
37
  retry_until_successful,
36
38
  )
@@ -300,72 +302,6 @@ def _enrich_kfp_pod_security_context(kfp_pod_template, function):
300
302
  }
301
303
 
302
304
 
303
- # When we run pipelines, the kfp.compile.Compile.compile() method takes the decorated function with @dsl.pipeline and
304
- # converts it to a k8s object. As part of the flow in the Compile.compile() method,
305
- # we call _create_and_write_workflow, which builds a dictionary from the workflow and then writes it to a file.
306
- # Unfortunately, the kfp sdk does not provide an API for configuring priority_class_name and other attributes.
307
- # I ran across the following problem when seeking for a method to set the priority_class_name:
308
- # https://github.com/kubeflow/pipelines/issues/3594
309
- # When we patch the _create_and_write_workflow, we can eventually obtain the dictionary right before we write it
310
- # to a file and enrich it with argo compatible fields, make sure you looking for the same argo version we use
311
- # https://github.com/argoproj/argo-workflows/blob/release-2.7/pkg/apis/workflow/v1alpha1/workflow_types.go
312
- def _create_enriched_mlrun_workflow(
313
- self,
314
- pipeline_func: typing.Callable,
315
- pipeline_name: typing.Optional[str] = None,
316
- pipeline_description: typing.Optional[str] = None,
317
- params_list: typing.Optional[list[dsl.PipelineParam]] = None,
318
- pipeline_conf: typing.Optional[dsl.PipelineConf] = None,
319
- ):
320
- """Call internal implementation of create_workflow and enrich with mlrun functions attributes"""
321
- workflow = self._original_create_workflow(
322
- pipeline_func, pipeline_name, pipeline_description, params_list, pipeline_conf
323
- )
324
- # We don't want to interrupt the original flow and don't know all the scenarios the function could be called.
325
- # that's why we have try/except on all the code of the enrichment and also specific try/except for errors that
326
- # we know can be raised.
327
- try:
328
- functions = []
329
- if pipeline_context.functions:
330
- try:
331
- functions = pipeline_context.functions.values()
332
- except Exception as err:
333
- logger.debug(
334
- "Unable to retrieve project functions, not enriching workflow with mlrun",
335
- error=err_to_str(err),
336
- )
337
- return workflow
338
-
339
- # enrich each pipeline step with your desire k8s attribute
340
- for kfp_step_template in workflow["spec"]["templates"]:
341
- if kfp_step_template.get("container"):
342
- for function_obj in functions:
343
- # we condition within each function since the comparison between the function and
344
- # the kfp pod may change depending on the attribute type.
345
- _set_function_attribute_on_kfp_pod(
346
- kfp_step_template,
347
- function_obj,
348
- "PriorityClassName",
349
- "priority_class_name",
350
- )
351
- _enrich_kfp_pod_security_context(
352
- kfp_step_template,
353
- function_obj,
354
- )
355
- except mlrun.errors.MLRunInvalidArgumentError:
356
- raise
357
- except Exception as err:
358
- logger.debug(
359
- "Something in the enrichment of kfp pods failed", error=err_to_str(err)
360
- )
361
- return workflow
362
-
363
-
364
- # patching function as class method
365
- kfp.compiler.Compiler._original_create_workflow = kfp.compiler.Compiler._create_workflow
366
- kfp.compiler.Compiler._create_workflow = _create_enriched_mlrun_workflow
367
-
368
-
369
305
  def get_db_function(project, key) -> mlrun.runtimes.BaseRuntime:
370
306
  project_instance, name, tag, hash_key = parse_versioned_object_uri(
371
307
  key, project.metadata.name
@@ -412,6 +348,11 @@ def enrich_function_object(
412
348
  if decorator:
413
349
  decorator(f)
414
350
 
351
+ if project.spec.default_function_node_selector:
352
+ f.enrich_runtime_spec(
353
+ project.spec.default_function_node_selector,
354
+ )
355
+
415
356
  if try_auto_mount:
416
357
  if (
417
358
  decorator and AutoMountType.is_auto_modifier(decorator)
@@ -431,7 +372,7 @@ class _PipelineRunStatus:
431
372
  engine: type["_PipelineRunner"],
432
373
  project: "mlrun.projects.MlrunProject",
433
374
  workflow: WorkflowSpec = None,
434
- state: str = "",
375
+ state: mlrun_pipelines.common.models.RunStatuses = "",
435
376
  exc: Exception = None,
436
377
  ):
437
378
  """
@@ -451,7 +392,10 @@ class _PipelineRunStatus:
451
392
 
452
393
  @property
453
394
  def state(self):
454
- if self._state not in mlrun.run.RunStatuses.stable_statuses():
395
+ if (
396
+ self._state
397
+ not in mlrun_pipelines.common.models.RunStatuses.stable_statuses()
398
+ ):
455
399
  self._state = self._engine.get_state(self.run_id, self.project)
456
400
  return self._state
457
401
 
@@ -516,7 +460,7 @@ class _PipelineRunner(abc.ABC):
516
460
  @staticmethod
517
461
  def _get_handler(workflow_handler, workflow_spec, project, secrets):
518
462
  if not (workflow_handler and callable(workflow_handler)):
519
- workflow_file = workflow_spec.get_source_file(project.spec.context)
463
+ workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
520
464
  workflow_handler = create_pipeline(
521
465
  project,
522
466
  workflow_file,
@@ -536,6 +480,7 @@ class _PipelineRunner(abc.ABC):
536
480
  timeout=None,
537
481
  expected_statuses=None,
538
482
  notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
483
+ **kwargs,
539
484
  ):
540
485
  pass
541
486
 
@@ -548,7 +493,7 @@ class _KFPRunner(_PipelineRunner):
548
493
  @classmethod
549
494
  def save(cls, project, workflow_spec: WorkflowSpec, target, artifact_path=None):
550
495
  pipeline_context.set(project, workflow_spec)
551
- workflow_file = workflow_spec.get_source_file(project.spec.context)
496
+ workflow_file = workflow_spec.get_source_file(project.spec.get_code_path())
552
497
  functions = FunctionsDict(project)
553
498
  pipeline = create_pipeline(
554
499
  project,
@@ -608,6 +553,7 @@ class _KFPRunner(_PipelineRunner):
608
553
  namespace=namespace,
609
554
  artifact_path=artifact_path,
610
555
  cleanup_ttl=workflow_spec.cleanup_ttl,
556
+ timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
611
557
  )
612
558
 
613
559
  # The user provided workflow code might have made changes to function specs that require cleanup
@@ -666,6 +612,7 @@ class _KFPRunner(_PipelineRunner):
666
612
  timeout=None,
667
613
  expected_statuses=None,
668
614
  notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
615
+ **kwargs,
669
616
  ):
670
617
  if timeout is None:
671
618
  timeout = 60 * 60
@@ -747,7 +694,7 @@ class _LocalRunner(_PipelineRunner):
747
694
  err = None
748
695
  try:
749
696
  workflow_handler(**workflow_spec.args)
750
- state = mlrun.run.RunStatuses.succeeded
697
+ state = mlrun_pipelines.common.models.RunStatuses.succeeded
751
698
  except Exception as exc:
752
699
  err = exc
753
700
  logger.exception("Workflow run failed")
@@ -755,7 +702,7 @@ class _LocalRunner(_PipelineRunner):
755
702
  f":x: Workflow {workflow_id} run failed!, error: {err_to_str(exc)}",
756
703
  mlrun.common.schemas.NotificationSeverity.ERROR,
757
704
  )
758
- state = mlrun.run.RunStatuses.failed
705
+ state = mlrun_pipelines.common.models.RunStatuses.failed
759
706
  mlrun.run.wait_for_runs_completion(pipeline_context.runs_map.values())
760
707
  project.notifiers.push_pipeline_run_results(
761
708
  pipeline_context.runs_map.values(), state=state
@@ -789,6 +736,7 @@ class _LocalRunner(_PipelineRunner):
789
736
  timeout=None,
790
737
  expected_statuses=None,
791
738
  notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
739
+ **kwargs,
792
740
  ):
793
741
  pass
794
742
 
@@ -865,22 +813,44 @@ class _RemoteRunner(_PipelineRunner):
865
813
  )
866
814
  return
867
815
 
816
+ get_workflow_id_timeout = max(
817
+ int(mlrun.mlconf.workflows.timeouts.remote),
818
+ int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
819
+ )
820
+
868
821
  logger.debug(
869
822
  "Workflow submitted, waiting for pipeline run to start",
870
823
  workflow_name=workflow_response.name,
824
+ get_workflow_id_timeout=get_workflow_id_timeout,
871
825
  )
872
826
 
827
+ def _get_workflow_id_or_bail():
828
+ try:
829
+ return run_db.get_workflow_id(
830
+ project=project.name,
831
+ name=workflow_response.name,
832
+ run_id=workflow_response.run_id,
833
+ engine=workflow_spec.engine,
834
+ )
835
+ except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
836
+ # fail fast on specific errors
837
+ if get_wf_exc.error_status_code in [
838
+ http.HTTPStatus.PRECONDITION_FAILED
839
+ ]:
840
+ raise mlrun.errors.MLRunFatalFailureError(
841
+ original_exception=get_wf_exc
842
+ )
843
+
844
+ # raise for a retry (on other errors)
845
+ raise
846
+
873
847
  # Getting workflow id from run:
874
848
  response = retry_until_successful(
875
849
  1,
876
- getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine),
850
+ get_workflow_id_timeout,
877
851
  logger,
878
852
  False,
879
- run_db.get_workflow_id,
880
- project=project.name,
881
- name=workflow_response.name,
882
- run_id=workflow_response.run_id,
883
- engine=workflow_spec.engine,
853
+ _get_workflow_id_or_bail,
884
854
  )
885
855
  workflow_id = response.workflow_id
886
856
  # After fetching the workflow_id the workflow executed successfully
@@ -892,9 +862,9 @@ class _RemoteRunner(_PipelineRunner):
892
862
  f":x: Workflow {workflow_name} run failed!, error: {err_to_str(exc)}",
893
863
  mlrun.common.schemas.NotificationSeverity.ERROR,
894
864
  )
895
- state = mlrun.run.RunStatuses.failed
865
+ state = mlrun_pipelines.common.models.RunStatuses.failed
896
866
  else:
897
- state = mlrun.run.RunStatuses.succeeded
867
+ state = mlrun_pipelines.common.models.RunStatuses.running
898
868
  project.notifiers.push_pipeline_start_message(
899
869
  project.metadata.name,
900
870
  )
@@ -911,24 +881,47 @@ class _RemoteRunner(_PipelineRunner):
911
881
  @staticmethod
912
882
  def get_run_status(
913
883
  project,
914
- run,
884
+ run: _PipelineRunStatus,
915
885
  timeout=None,
916
886
  expected_statuses=None,
917
887
  notifiers: mlrun.utils.notifications.CustomNotificationPusher = None,
888
+ inner_engine: type[_PipelineRunner] = None,
918
889
  ):
919
- # ignore notifiers, as they are handled by the remote pipeline notifications,
920
- # so overriding with CustomNotificationPusher with empty list of notifiers
921
- state, had_errors, text = _KFPRunner.get_run_status(
922
- project,
923
- run,
924
- timeout,
925
- expected_statuses,
926
- notifiers=mlrun.utils.notifications.CustomNotificationPusher([]),
927
- )
890
+ inner_engine = inner_engine or _KFPRunner
891
+ if inner_engine.engine == _KFPRunner.engine:
892
+ # ignore notifiers for remote notifications, as they are handled by the remote pipeline notifications,
893
+ # so overriding with CustomNotificationPusher with empty list of notifiers or only local notifiers
894
+ local_project_notifiers = list(
895
+ set(mlrun.utils.notifications.NotificationTypes.local()).intersection(
896
+ set(project.notifiers.notifications.keys())
897
+ )
898
+ )
899
+ notifiers = mlrun.utils.notifications.CustomNotificationPusher(
900
+ local_project_notifiers
901
+ )
902
+ return _KFPRunner.get_run_status(
903
+ project,
904
+ run,
905
+ timeout,
906
+ expected_statuses,
907
+ notifiers=notifiers,
908
+ )
928
909
 
929
- # indicate the pipeline status since we don't push the notifications in the remote runner
930
- logger.info(text)
931
- return state, had_errors, text
910
+ elif inner_engine.engine == _LocalRunner.engine:
911
+ mldb = mlrun.db.get_run_db(secrets=project._secrets)
912
+ pipeline_runner_run = mldb.read_run(run.run_id, project=project.name)
913
+ pipeline_runner_run = mlrun.run.RunObject.from_dict(pipeline_runner_run)
914
+ pipeline_runner_run.logs(db=mldb)
915
+ pipeline_runner_run.refresh()
916
+ run._state = mlrun.common.runtimes.constants.RunStates.run_state_to_pipeline_run_status(
917
+ pipeline_runner_run.status.state
918
+ )
919
+ run._exc = pipeline_runner_run.status.error
920
+
921
+ else:
922
+ raise mlrun.errors.MLRunInvalidArgumentError(
923
+ f"Unsupported inner runner engine: {inner_engine.engine}"
924
+ )
932
925
 
933
926
 
934
927
  def create_pipeline(project, pipeline, functions, secrets=None, handler=None):
@@ -1059,7 +1052,7 @@ def load_and_run(
1059
1052
  )
1060
1053
 
1061
1054
  except Exception as exc:
1062
- logger.error("Failed to send slack notification", exc=exc)
1055
+ logger.error("Failed to send slack notification", exc=err_to_str(exc))
1063
1056
 
1064
1057
  raise error
1065
1058
 
@@ -1087,7 +1080,7 @@ def load_and_run(
1087
1080
  context.log_result(key="workflow_id", value=run.run_id)
1088
1081
  context.log_result(key="engine", value=run._engine.engine, commit=True)
1089
1082
 
1090
- if run.state == mlrun.run.RunStatuses.failed:
1083
+ if run.state == mlrun_pipelines.common.models.RunStatuses.failed:
1091
1084
  raise RuntimeError(f"Workflow {workflow_log_message} failed") from run.exc
1092
1085
 
1093
1086
  if wait_for_completion:
@@ -1102,7 +1095,7 @@ def load_and_run(
1102
1095
 
1103
1096
  pipeline_state, _, _ = project.get_run_status(run)
1104
1097
  context.log_result(key="workflow_state", value=pipeline_state, commit=True)
1105
- if pipeline_state != mlrun.run.RunStatuses.succeeded:
1098
+ if pipeline_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
1106
1099
  raise RuntimeError(
1107
1100
  f"Workflow {workflow_log_message} failed, state={pipeline_state}"
1108
1101
  )