mlrun 1.6.2rc6__py3-none-any.whl → 1.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

Files changed (61) hide show
  1. mlrun/artifacts/model.py +28 -22
  2. mlrun/common/db/sql_session.py +3 -0
  3. mlrun/common/model_monitoring/helpers.py +4 -2
  4. mlrun/common/schemas/__init__.py +2 -0
  5. mlrun/common/schemas/common.py +40 -0
  6. mlrun/common/schemas/model_monitoring/__init__.py +1 -0
  7. mlrun/common/schemas/model_monitoring/constants.py +21 -5
  8. mlrun/common/schemas/project.py +2 -0
  9. mlrun/config.py +59 -20
  10. mlrun/data_types/data_types.py +4 -0
  11. mlrun/datastore/azure_blob.py +9 -9
  12. mlrun/datastore/base.py +22 -44
  13. mlrun/datastore/google_cloud_storage.py +6 -6
  14. mlrun/datastore/v3io.py +74 -73
  15. mlrun/db/auth_utils.py +152 -0
  16. mlrun/db/base.py +18 -0
  17. mlrun/db/httpdb.py +79 -55
  18. mlrun/execution.py +3 -3
  19. mlrun/frameworks/tf_keras/callbacks/logging_callback.py +3 -3
  20. mlrun/frameworks/tf_keras/model_handler.py +7 -7
  21. mlrun/k8s_utils.py +10 -5
  22. mlrun/kfpops.py +19 -10
  23. mlrun/lists.py +2 -0
  24. mlrun/model.py +31 -2
  25. mlrun/model_monitoring/api.py +8 -8
  26. mlrun/model_monitoring/batch.py +1 -1
  27. mlrun/model_monitoring/controller.py +0 -7
  28. mlrun/model_monitoring/features_drift_table.py +6 -0
  29. mlrun/model_monitoring/helpers.py +4 -1
  30. mlrun/model_monitoring/stores/kv_model_endpoint_store.py +13 -13
  31. mlrun/model_monitoring/stores/sql_model_endpoint_store.py +0 -1
  32. mlrun/model_monitoring/stream_processing.py +50 -37
  33. mlrun/package/packagers/pandas_packagers.py +3 -3
  34. mlrun/package/utils/_archiver.py +3 -1
  35. mlrun/platforms/iguazio.py +6 -65
  36. mlrun/projects/pipelines.py +51 -17
  37. mlrun/projects/project.py +77 -61
  38. mlrun/render.py +13 -4
  39. mlrun/run.py +2 -0
  40. mlrun/runtimes/base.py +24 -1
  41. mlrun/runtimes/function.py +9 -9
  42. mlrun/runtimes/kubejob.py +5 -3
  43. mlrun/runtimes/local.py +2 -2
  44. mlrun/runtimes/mpijob/abstract.py +6 -6
  45. mlrun/runtimes/pod.py +8 -8
  46. mlrun/runtimes/serving.py +3 -3
  47. mlrun/runtimes/sparkjob/spark3job.py +3 -3
  48. mlrun/serving/remote.py +4 -2
  49. mlrun/utils/async_http.py +28 -8
  50. mlrun/utils/helpers.py +20 -0
  51. mlrun/utils/http.py +3 -3
  52. mlrun/utils/logger.py +11 -6
  53. mlrun/utils/notifications/notification_pusher.py +6 -6
  54. mlrun/utils/version/version.json +2 -2
  55. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/METADATA +18 -18
  56. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/RECORD +60 -59
  57. mlrun/datastore/helpers.py +0 -18
  58. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/LICENSE +0 -0
  59. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/WHEEL +0 -0
  60. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/entry_points.txt +0 -0
  61. {mlrun-1.6.2rc6.dist-info → mlrun-1.6.3.dist-info}/top_level.txt +0 -0
@@ -540,24 +540,24 @@ class KVModelEndpointStore(ModelEndpointStore):
540
540
  and endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS]
541
541
  == "null"
542
542
  ):
543
- endpoint[
544
- mlrun.common.schemas.model_monitoring.EventFieldType.METRICS
545
- ] = json.dumps(
546
- {
547
- mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
548
- mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
549
- mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
543
+ endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.METRICS] = (
544
+ json.dumps(
545
+ {
546
+ mlrun.common.schemas.model_monitoring.EventKeyMetrics.GENERIC: {
547
+ mlrun.common.schemas.model_monitoring.EventLiveStats.LATENCY_AVG_1H: 0,
548
+ mlrun.common.schemas.model_monitoring.EventLiveStats.PREDICTIONS_PER_SECOND: 0,
549
+ }
550
550
  }
551
- }
551
+ )
552
552
  )
553
553
  # Validate key `uid` instead of `endpoint_id`
554
554
  # For backwards compatibility reasons, we replace the `endpoint_id` with `uid` which is the updated key name
555
555
  if mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID in endpoint:
556
- endpoint[
557
- mlrun.common.schemas.model_monitoring.EventFieldType.UID
558
- ] = endpoint[
559
- mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
560
- ]
556
+ endpoint[mlrun.common.schemas.model_monitoring.EventFieldType.UID] = (
557
+ endpoint[
558
+ mlrun.common.schemas.model_monitoring.EventFieldType.ENDPOINT_ID
559
+ ]
560
+ )
561
561
 
562
562
  @staticmethod
563
563
  def _encode_field(field: typing.Union[str, bytes]) -> bytes:
@@ -31,7 +31,6 @@ from .models import get_model_endpoints_table
31
31
 
32
32
 
33
33
  class SQLModelEndpointStore(ModelEndpointStore):
34
-
35
34
  """
36
35
  Handles the DB operations when the DB target is from type SQL. For the SQL operations, we use SQLAlchemy, a Python
37
36
  SQL toolkit that handles the communication with the database. When using SQL for storing the model endpoints
@@ -24,6 +24,7 @@ import mlrun
24
24
  import mlrun.common.model_monitoring.helpers
25
25
  import mlrun.config
26
26
  import mlrun.datastore.targets
27
+ import mlrun.feature_store as fstore
27
28
  import mlrun.feature_store.steps
28
29
  import mlrun.model_monitoring.prometheus
29
30
  import mlrun.serving.states
@@ -49,7 +50,7 @@ class EventStreamProcessor:
49
50
  parquet_batching_timeout_secs: int,
50
51
  parquet_target: str,
51
52
  sample_window: int = 10,
52
- aggregate_windows: typing.Optional[typing.List[str]] = None,
53
+ aggregate_windows: typing.Optional[list[str]] = None,
53
54
  aggregate_period: str = "30s",
54
55
  model_monitoring_access_key: str = None,
55
56
  ):
@@ -349,7 +350,6 @@ class EventStreamProcessor:
349
350
  rate="10/m",
350
351
  time_col=EventFieldType.TIMESTAMP,
351
352
  container=self.tsdb_container,
352
- access_key=self.v3io_access_key,
353
353
  v3io_frames=self.v3io_framesd,
354
354
  infer_columns_from_data=True,
355
355
  index_cols=[
@@ -587,6 +587,8 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
587
587
  for key in [
588
588
  EventFieldType.FEATURES,
589
589
  EventFieldType.NAMED_FEATURES,
590
+ EventFieldType.PREDICTION,
591
+ EventFieldType.NAMED_PREDICTIONS,
590
592
  ]:
591
593
  event.pop(key, None)
592
594
 
@@ -629,14 +631,14 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
629
631
  self.project: str = project
630
632
 
631
633
  # First and last requests timestamps (value) of each endpoint (key)
632
- self.first_request: typing.Dict[str, str] = dict()
633
- self.last_request: typing.Dict[str, str] = dict()
634
+ self.first_request: dict[str, str] = dict()
635
+ self.last_request: dict[str, str] = dict()
634
636
 
635
637
  # Number of errors (value) per endpoint (key)
636
- self.error_count: typing.Dict[str, int] = collections.defaultdict(int)
638
+ self.error_count: dict[str, int] = collections.defaultdict(int)
637
639
 
638
640
  # Set of endpoints in the current events
639
- self.endpoints: typing.Set[str] = set()
641
+ self.endpoints: set[str] = set()
640
642
 
641
643
  def do(self, full_event):
642
644
  event = full_event.body
@@ -745,18 +747,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
745
747
  # in list of events. This list will be used as the body for the storey event.
746
748
  events = []
747
749
  for i, (feature, prediction) in enumerate(zip(features, predictions)):
748
- # Validate that inputs are based on numeric values
749
- if not self.is_valid(
750
- endpoint_id,
751
- self.is_list_of_numerics,
752
- feature,
753
- ["request", "inputs", f"[{i}]"],
754
- ):
755
- return None
756
-
757
750
  if not isinstance(prediction, list):
758
751
  prediction = [prediction]
759
752
 
753
+ if not isinstance(feature, list):
754
+ feature = [feature]
755
+
760
756
  events.append(
761
757
  {
762
758
  EventFieldType.FUNCTION_URI: function_uri,
@@ -803,18 +799,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
803
799
  f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
804
800
  )
805
801
 
806
- @staticmethod
807
- def is_list_of_numerics(
808
- field: typing.List[typing.Union[int, float, dict, list]],
809
- dict_path: typing.List[str],
810
- ):
811
- if all(isinstance(x, int) or isinstance(x, float) for x in field):
812
- return True
813
- logger.error(
814
- f"List does not consist of only numeric values: {field} [Event -> {','.join(dict_path)}]"
815
- )
816
- return False
817
-
818
802
  def resume_state(self, endpoint_id):
819
803
  # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
820
804
  # left them
@@ -849,7 +833,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
849
833
  endpoint_id: str,
850
834
  validation_function,
851
835
  field: typing.Any,
852
- dict_path: typing.List[str],
836
+ dict_path: list[str],
853
837
  ):
854
838
  if validation_function(field, dict_path):
855
839
  return True
@@ -857,7 +841,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
857
841
  return False
858
842
 
859
843
 
860
- def is_not_none(field: typing.Any, dict_path: typing.List[str]):
844
+ def is_not_none(field: typing.Any, dict_path: list[str]):
861
845
  if field is not None:
862
846
  return True
863
847
  logger.error(
@@ -946,9 +930,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
946
930
  return self.label_columns[endpoint_id]
947
931
  return None
948
932
 
949
- def do(self, event: typing.Dict):
933
+ def do(self, event: dict):
950
934
  endpoint_id = event[EventFieldType.ENDPOINT_ID]
951
935
 
936
+ feature_values = event[EventFieldType.FEATURES]
937
+ label_values = event[EventFieldType.PREDICTION]
952
938
  # Get feature names and label columns
953
939
  if endpoint_id not in self.feature_names:
954
940
  endpoint_record = get_endpoint_record(
@@ -984,6 +970,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
984
970
  },
985
971
  )
986
972
 
973
+ update_monitoring_feature_set(
974
+ endpoint_record=endpoint_record,
975
+ feature_names=feature_names,
976
+ feature_values=feature_values,
977
+ )
978
+
987
979
  # Similar process with label columns
988
980
  if not label_columns and self._infer_columns_from_data:
989
981
  label_columns = self._infer_label_columns_from_data(event)
@@ -1002,6 +994,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1002
994
  endpoint_id=endpoint_id,
1003
995
  attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
1004
996
  )
997
+ update_monitoring_feature_set(
998
+ endpoint_record=endpoint_record,
999
+ feature_names=label_columns,
1000
+ feature_values=label_values,
1001
+ )
1005
1002
 
1006
1003
  self.label_columns[endpoint_id] = label_columns
1007
1004
  self.feature_names[endpoint_id] = feature_names
@@ -1019,7 +1016,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1019
1016
 
1020
1017
  # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
1021
1018
  feature_names = self.feature_names[endpoint_id]
1022
- feature_values = event[EventFieldType.FEATURES]
1023
1019
  self._map_dictionary_values(
1024
1020
  event=event,
1025
1021
  named_iters=feature_names,
@@ -1029,7 +1025,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1029
1025
 
1030
1026
  # Add label_name:value pairs along with a mapping dictionary of all of these pairs
1031
1027
  label_names = self.label_columns[endpoint_id]
1032
- label_values = event[EventFieldType.PREDICTION]
1033
1028
  self._map_dictionary_values(
1034
1029
  event=event,
1035
1030
  named_iters=label_names,
@@ -1045,9 +1040,9 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1045
1040
 
1046
1041
  @staticmethod
1047
1042
  def _map_dictionary_values(
1048
- event: typing.Dict,
1049
- named_iters: typing.List,
1050
- values_iters: typing.List,
1043
+ event: dict,
1044
+ named_iters: list,
1045
+ values_iters: list,
1051
1046
  mapping_dictionary: str,
1052
1047
  ):
1053
1048
  """Adding name-value pairs to event dictionary based on two provided lists of names and values. These pairs
@@ -1082,7 +1077,7 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
1082
1077
  self.project = project
1083
1078
  self.model_endpoint_store_target = model_endpoint_store_target
1084
1079
 
1085
- def do(self, event: typing.Dict):
1080
+ def do(self, event: dict):
1086
1081
  update_endpoint_record(
1087
1082
  project=self.project,
1088
1083
  endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
@@ -1117,7 +1112,7 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
1117
1112
  self.table = table
1118
1113
  self.keys = set()
1119
1114
 
1120
- def do(self, event: typing.Dict):
1115
+ def do(self, event: dict):
1121
1116
  key_set = set(event.keys())
1122
1117
  if not key_set.issubset(self.keys):
1123
1118
  self.keys.update(key_set)
@@ -1241,3 +1236,21 @@ def get_endpoint_record(project: str, endpoint_id: str):
1241
1236
  project=project,
1242
1237
  )
1243
1238
  return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
1239
+
1240
+
1241
+ def update_monitoring_feature_set(
1242
+ endpoint_record: dict[str, typing.Any],
1243
+ feature_names: list[str],
1244
+ feature_values: list[typing.Any],
1245
+ ):
1246
+ monitoring_feature_set = fstore.get_feature_set(
1247
+ endpoint_record[
1248
+ mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
1249
+ ]
1250
+ )
1251
+ for name, val in zip(feature_names, feature_values):
1252
+ monitoring_feature_set.add_feature(
1253
+ fstore.Feature(name=name, value_type=type(val))
1254
+ )
1255
+
1256
+ monitoring_feature_set.save()
@@ -838,9 +838,9 @@ class PandasDataFramePackager(DefaultPackager):
838
838
  """
839
839
  if isinstance(obj, dict):
840
840
  for key, value in obj.items():
841
- obj[
842
- PandasDataFramePackager._prepare_result(obj=key)
843
- ] = PandasDataFramePackager._prepare_result(obj=value)
841
+ obj[PandasDataFramePackager._prepare_result(obj=key)] = (
842
+ PandasDataFramePackager._prepare_result(obj=value)
843
+ )
844
844
  elif isinstance(obj, list):
845
845
  for i, value in enumerate(obj):
846
846
  obj[i] = PandasDataFramePackager._prepare_result(obj=value)
@@ -179,7 +179,9 @@ class _TarArchiver(_Archiver):
179
179
 
180
180
  # Extract:
181
181
  with tarfile.open(archive_path, f"r:{cls._MODE_STRING}") as tar_file:
182
- tar_file.extractall(directory_path)
182
+ # use 'data' to ensure no security risks are imposed by the archive files
183
+ # see: https://docs.python.org/3/library/tarfile.html#tarfile.TarFile.extractall
184
+ tar_file.extractall(directory_path, filter="data")
183
185
 
184
186
  return str(directory_path)
185
187
 
@@ -16,19 +16,15 @@ import json
16
16
  import os
17
17
  import urllib
18
18
  from collections import namedtuple
19
- from datetime import datetime
20
- from http import HTTPStatus
21
19
  from urllib.parse import urlparse
22
20
 
23
21
  import kfp.dsl
24
22
  import requests
25
23
  import semver
26
- import urllib3
27
24
  import v3io
28
25
 
29
26
  import mlrun.errors
30
27
  from mlrun.config import config as mlconf
31
- from mlrun.errors import err_to_str
32
28
  from mlrun.utils import dict_to_json
33
29
 
34
30
  _cached_control_session = None
@@ -488,25 +484,6 @@ class V3ioStreamClient:
488
484
  return response.output.records
489
485
 
490
486
 
491
- def create_control_session(url, username, password):
492
- # for systems without production cert - silence no cert verification WARN
493
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
494
- if not username or not password:
495
- raise ValueError("cannot create session key, missing username or password")
496
-
497
- session = requests.Session()
498
- session.auth = (username, password)
499
- try:
500
- auth = session.post(f"{url}/api/sessions", verify=False)
501
- except OSError as exc:
502
- raise OSError(f"error: cannot connect to {url}: {err_to_str(exc)}")
503
-
504
- if not auth.ok:
505
- raise OSError(f"failed to create session: {url}, {auth.text}")
506
-
507
- return auth.json()["data"]["id"]
508
-
509
-
510
487
  def is_iguazio_endpoint(endpoint_url: str) -> bool:
511
488
  # TODO: find a better heuristic
512
489
  return ".default-tenant." in endpoint_url
@@ -533,21 +510,6 @@ def is_iguazio_session_cookie(session_cookie: str) -> bool:
533
510
  return False
534
511
 
535
512
 
536
- def is_iguazio_system_2_10_or_above(dashboard_url):
537
- # for systems without production cert - silence no cert verification WARN
538
- urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
539
- response = requests.get(f"{dashboard_url}/api/external_versions", verify=False)
540
-
541
- if not response.ok:
542
- if response.status_code == HTTPStatus.NOT_FOUND.value:
543
- # in iguazio systems prior to 2.10 this endpoint didn't exist, so the api returns 404 cause endpoint not
544
- # found
545
- return False
546
- response.raise_for_status()
547
-
548
- return True
549
-
550
-
551
513
  # we assign the control session or access key to the password since this is iguazio auth scheme
552
514
  # (requests should be sent with username:control_session/access_key as auth header)
553
515
  def add_or_refresh_credentials(
@@ -577,33 +539,12 @@ def add_or_refresh_credentials(
577
539
  # (ideally if we could identify we're in enterprise we would have verify here that token and username have value)
578
540
  if not is_iguazio_endpoint(api_url):
579
541
  return "", "", token
580
- iguazio_dashboard_url = "https://dashboard" + api_url[api_url.find(".") :]
581
-
582
- # in 2.8 mlrun api is protected with control session, from 2.10 it's protected with access key
583
- is_access_key_auth = is_iguazio_system_2_10_or_above(iguazio_dashboard_url)
584
- if is_access_key_auth:
585
- if not username or not token:
586
- raise ValueError(
587
- "username and access key required to authenticate against iguazio system"
588
- )
589
- return username, token, ""
590
-
591
- if not username or not password:
592
- raise ValueError("username and password needed to create session")
593
-
594
- global _cached_control_session
595
- now = datetime.now()
596
- if _cached_control_session:
597
- if (
598
- _cached_control_session[2] == username
599
- and _cached_control_session[3] == password
600
- and (now - _cached_control_session[1]).seconds < 20 * 60 * 60
601
- ):
602
- return _cached_control_session[2], _cached_control_session[0], ""
603
-
604
- control_session = create_control_session(iguazio_dashboard_url, username, password)
605
- _cached_control_session = (control_session, now, username, password)
606
- return username, control_session, ""
542
+
543
+ if not username or not token:
544
+ raise ValueError(
545
+ "username and access key required to authenticate against iguazio system"
546
+ )
547
+ return username, token, ""
607
548
 
608
549
 
609
550
  def parse_path(url, suffix="/"):
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
  import abc
15
15
  import builtins
16
+ import http
16
17
  import importlib.util as imputil
17
18
  import os
18
19
  import tempfile
@@ -69,16 +70,16 @@ class WorkflowSpec(mlrun.model.ModelObj):
69
70
 
70
71
  def __init__(
71
72
  self,
72
- engine=None,
73
- code=None,
74
- path=None,
75
- args=None,
76
- name=None,
77
- handler=None,
78
- args_schema: dict = None,
73
+ engine: typing.Optional[str] = None,
74
+ code: typing.Optional[str] = None,
75
+ path: typing.Optional[str] = None,
76
+ args: typing.Optional[dict] = None,
77
+ name: typing.Optional[str] = None,
78
+ handler: typing.Optional[str] = None,
79
+ args_schema: typing.Optional[dict] = None,
79
80
  schedule: typing.Union[str, mlrun.common.schemas.ScheduleCronTrigger] = None,
80
- cleanup_ttl: int = None,
81
- image: str = None,
81
+ cleanup_ttl: typing.Optional[int] = None,
82
+ image: typing.Optional[str] = None,
82
83
  ):
83
84
  self.engine = engine
84
85
  self.code = code
@@ -401,6 +402,9 @@ def enrich_function_object(
401
402
  else:
402
403
  f.spec.build.source = project.spec.source
403
404
  f.spec.build.load_source_on_run = project.spec.load_source_on_run
405
+ f.spec.build.source_code_target_dir = (
406
+ project.spec.build.source_code_target_dir
407
+ )
404
408
  f.spec.workdir = project.spec.workdir or project.spec.subpath
405
409
  f.prepare_image_for_deploy()
406
410
 
@@ -605,6 +609,7 @@ class _KFPRunner(_PipelineRunner):
605
609
  namespace=namespace,
606
610
  artifact_path=artifact_path,
607
611
  cleanup_ttl=workflow_spec.cleanup_ttl,
612
+ timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
608
613
  )
609
614
 
610
615
  # The user provided workflow code might have made changes to function specs that require cleanup
@@ -862,17 +867,44 @@ class _RemoteRunner(_PipelineRunner):
862
867
  )
863
868
  return
864
869
 
870
+ get_workflow_id_timeout = max(
871
+ int(mlrun.mlconf.workflows.timeouts.remote),
872
+ int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
873
+ )
874
+
875
+ logger.debug(
876
+ "Workflow submitted, waiting for pipeline run to start",
877
+ workflow_name=workflow_response.name,
878
+ get_workflow_id_timeout=get_workflow_id_timeout,
879
+ )
880
+
881
+ def _get_workflow_id_or_bail():
882
+ try:
883
+ return run_db.get_workflow_id(
884
+ project=project.name,
885
+ name=workflow_response.name,
886
+ run_id=workflow_response.run_id,
887
+ engine=workflow_spec.engine,
888
+ )
889
+ except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
890
+ # fail fast on specific errors
891
+ if get_wf_exc.error_status_code in [
892
+ http.HTTPStatus.PRECONDITION_FAILED
893
+ ]:
894
+ raise mlrun.errors.MLRunFatalFailureError(
895
+ original_exception=get_wf_exc
896
+ )
897
+
898
+ # raise for a retry (on other errors)
899
+ raise
900
+
865
901
  # Getting workflow id from run:
866
902
  response = retry_until_successful(
867
903
  1,
868
- getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine),
904
+ get_workflow_id_timeout,
869
905
  logger,
870
906
  False,
871
- run_db.get_workflow_id,
872
- project=project.name,
873
- name=workflow_response.name,
874
- run_id=workflow_response.run_id,
875
- engine=workflow_spec.engine,
907
+ _get_workflow_id_or_bail,
876
908
  )
877
909
  workflow_id = response.workflow_id
878
910
  # After fetching the workflow_id the workflow executed successfully
@@ -988,6 +1020,7 @@ def load_and_run(
988
1020
  cleanup_ttl: int = None,
989
1021
  load_only: bool = False,
990
1022
  wait_for_completion: bool = False,
1023
+ project_context: str = None,
991
1024
  ):
992
1025
  """
993
1026
  Auxiliary function that the RemoteRunner run once or run every schedule.
@@ -1018,10 +1051,11 @@ def load_and_run(
1018
1051
  workflow and all its resources are deleted)
1019
1052
  :param load_only: for just loading the project, inner use.
1020
1053
  :param wait_for_completion: wait for workflow completion before returning
1054
+ :param project_context: project context path (used for loading the project)
1021
1055
  """
1022
1056
  try:
1023
1057
  project = mlrun.load_project(
1024
- context=f"./{project_name}",
1058
+ context=project_context or f"./{project_name}",
1025
1059
  url=url,
1026
1060
  name=project_name,
1027
1061
  init_git=init_git,
@@ -1053,7 +1087,7 @@ def load_and_run(
1053
1087
 
1054
1088
  raise error
1055
1089
 
1056
- context.logger.info(f"Loaded project {project.name} from remote successfully")
1090
+ context.logger.info(f"Loaded project {project.name} successfully")
1057
1091
 
1058
1092
  if load_only:
1059
1093
  return