mlrun 1.6.2rc5__py3-none-any.whl → 1.6.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

mlrun/db/httpdb.py CHANGED
@@ -33,6 +33,7 @@ import mlrun.common.schemas
33
33
  import mlrun.model_monitoring.model_endpoint
34
34
  import mlrun.platforms
35
35
  import mlrun.projects
36
+ from mlrun.db.auth_utils import OAuthClientIDTokenProvider, StaticTokenProvider
36
37
  from mlrun.errors import MLRunInvalidArgumentError, err_to_str
37
38
 
38
39
  from ..artifacts import Artifact
@@ -133,17 +134,28 @@ class HTTPRunDB(RunDBInterface):
133
134
  endpoint += f":{parsed_url.port}"
134
135
  base_url = f"{parsed_url.scheme}://{endpoint}{parsed_url.path}"
135
136
 
137
+ self.base_url = base_url
136
138
  username = parsed_url.username or config.httpdb.user
137
139
  password = parsed_url.password or config.httpdb.password
140
+ self.token_provider = None
138
141
 
139
- username, password, token = mlrun.platforms.add_or_refresh_credentials(
140
- parsed_url.hostname, username, password, config.httpdb.token
141
- )
142
+ if config.auth_with_client_id.enabled:
143
+ self.token_provider = OAuthClientIDTokenProvider(
144
+ token_endpoint=mlrun.get_secret_or_env("MLRUN_AUTH_TOKEN_ENDPOINT"),
145
+ client_id=mlrun.get_secret_or_env("MLRUN_AUTH_CLIENT_ID"),
146
+ client_secret=mlrun.get_secret_or_env("MLRUN_AUTH_CLIENT_SECRET"),
147
+ timeout=config.auth_with_client_id.request_timeout,
148
+ )
149
+ else:
150
+ username, password, token = mlrun.platforms.add_or_refresh_credentials(
151
+ parsed_url.hostname, username, password, config.httpdb.token
152
+ )
153
+
154
+ if token:
155
+ self.token_provider = StaticTokenProvider(token)
142
156
 
143
- self.base_url = base_url
144
157
  self.user = username
145
158
  self.password = password
146
- self.token = token
147
159
 
148
160
  def __repr__(self):
149
161
  cls = self.__class__.__name__
@@ -213,17 +225,19 @@ class HTTPRunDB(RunDBInterface):
213
225
 
214
226
  if self.user:
215
227
  kw["auth"] = (self.user, self.password)
216
- elif self.token:
217
- # Iguazio auth doesn't support passing token through bearer, so use cookie instead
218
- if mlrun.platforms.iguazio.is_iguazio_session(self.token):
219
- session_cookie = f'j:{{"sid": "{self.token}"}}'
220
- cookies = {
221
- "session": session_cookie,
222
- }
223
- kw["cookies"] = cookies
224
- else:
225
- if "Authorization" not in kw.setdefault("headers", {}):
226
- kw["headers"].update({"Authorization": "Bearer " + self.token})
228
+ elif self.token_provider:
229
+ token = self.token_provider.get_token()
230
+ if token:
231
+ # Iguazio auth doesn't support passing token through bearer, so use cookie instead
232
+ if self.token_provider.is_iguazio_session():
233
+ session_cookie = f'j:{{"sid": "{token}"}}'
234
+ cookies = {
235
+ "session": session_cookie,
236
+ }
237
+ kw["cookies"] = cookies
238
+ else:
239
+ if "Authorization" not in kw.setdefault("headers", {}):
240
+ kw["headers"].update({"Authorization": "Bearer " + token})
227
241
 
228
242
  if mlrun.common.schemas.HeaderNames.client_version not in kw.setdefault(
229
243
  "headers", {}
@@ -930,6 +944,7 @@ class HTTPRunDB(RunDBInterface):
930
944
  kind: str = None,
931
945
  category: Union[str, mlrun.common.schemas.ArtifactCategories] = None,
932
946
  tree: str = None,
947
+ producer_uri: str = None,
933
948
  ) -> ArtifactList:
934
949
  """List artifacts filtered by various parameters.
935
950
 
@@ -956,9 +971,12 @@ class HTTPRunDB(RunDBInterface):
956
971
  :param best_iteration: Returns the artifact which belongs to the best iteration of a given run, in the case of
957
972
  artifacts generated from a hyper-param run. If only a single iteration exists, will return the artifact
958
973
  from that iteration. If using ``best_iter``, the ``iter`` parameter must not be used.
959
- :param kind: Return artifacts of the requested kind.
960
- :param category: Return artifacts of the requested category.
961
- :param tree: Return artifacts of the requested tree.
974
+ :param kind: Return artifacts of the requested kind.
975
+ :param category: Return artifacts of the requested category.
976
+ :param tree: Return artifacts of the requested tree.
977
+ :param producer_uri: Return artifacts produced by the requested producer URI. Producer URI usually
978
+ points to a run and is used to filter artifacts by the run that produced them when the artifact producer id
979
+ is a workflow id (artifact was created as part of a workflow).
962
980
  """
963
981
 
964
982
  project = project or config.default_project
@@ -977,6 +995,7 @@ class HTTPRunDB(RunDBInterface):
977
995
  "category": category,
978
996
  "tree": tree,
979
997
  "format": mlrun.common.schemas.ArtifactsFormat.full.value,
998
+ "producer_uri": producer_uri,
980
999
  }
981
1000
  error = "list artifacts"
982
1001
  endpoint_path = f"projects/{project}/artifacts"
@@ -1611,19 +1630,21 @@ class HTTPRunDB(RunDBInterface):
1611
1630
  artifact_path=None,
1612
1631
  ops=None,
1613
1632
  cleanup_ttl=None,
1633
+ timeout=60,
1614
1634
  ):
1615
1635
  """Submit a KFP pipeline for execution.
1616
1636
 
1617
- :param project: The project of the pipeline
1618
- :param pipeline: Pipeline function or path to .yaml/.zip pipeline file.
1619
- :param arguments: A dictionary of arguments to pass to the pipeline.
1620
- :param experiment: A name to assign for the specific experiment.
1621
- :param run: A name for this specific run.
1622
- :param namespace: Kubernetes namespace to execute the pipeline in.
1623
- :param artifact_path: A path to artifacts used by this pipeline.
1624
- :param ops: Transformers to apply on all ops in the pipeline.
1625
- :param cleanup_ttl: pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
1626
- workflow and all its resources are deleted)
1637
+ :param project: The project of the pipeline
1638
+ :param pipeline: Pipeline function or path to .yaml/.zip pipeline file.
1639
+ :param arguments: A dictionary of arguments to pass to the pipeline.
1640
+ :param experiment: A name to assign for the specific experiment.
1641
+ :param run: A name for this specific run.
1642
+ :param namespace: Kubernetes namespace to execute the pipeline in.
1643
+ :param artifact_path: A path to artifacts used by this pipeline.
1644
+ :param ops: Transformers to apply on all ops in the pipeline.
1645
+ :param cleanup_ttl: Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
1646
+ workflow and all its resources are deleted)
1647
+ :param timeout: Timeout for the API call.
1627
1648
  """
1628
1649
 
1629
1650
  if isinstance(pipeline, str):
@@ -1665,7 +1686,7 @@ class HTTPRunDB(RunDBInterface):
1665
1686
  "POST",
1666
1687
  f"projects/{project}/pipelines",
1667
1688
  params=params,
1668
- timeout=20,
1689
+ timeout=timeout,
1669
1690
  body=data,
1670
1691
  headers=headers,
1671
1692
  )
mlrun/lists.py CHANGED
@@ -36,6 +36,7 @@ list_header = [
36
36
  "parameters",
37
37
  "results",
38
38
  "artifacts",
39
+ "artifact_uris",
39
40
  "error",
40
41
  ]
41
42
 
@@ -63,6 +64,7 @@ class RunList(list):
63
64
  get_in(run, "spec.parameters", ""),
64
65
  get_in(run, "status.results", ""),
65
66
  get_in(run, "status.artifacts", []),
67
+ get_in(run, "status.artifact_uris", {}),
66
68
  get_in(run, "status.error", ""),
67
69
  ]
68
70
  if extend_iterations and iterations:
mlrun/model.py CHANGED
@@ -62,6 +62,7 @@ class ModelObj:
62
62
  return new_type.from_dict(param)
63
63
  return param
64
64
 
65
+ @mlrun.utils.filter_warnings("ignore", FutureWarning)
65
66
  def to_dict(self, fields=None, exclude=None):
66
67
  """convert the object to a python dictionary
67
68
 
@@ -623,6 +624,11 @@ class RunMetadata(ModelObj):
623
624
  def iteration(self, iteration):
624
625
  self._iteration = iteration
625
626
 
627
+ def is_workflow_runner(self):
628
+ if not self.labels:
629
+ return False
630
+ return self.labels.get("job-type", "") == "workflow-runner"
631
+
626
632
 
627
633
  class HyperParamStrategies:
628
634
  grid = "grid"
@@ -1052,6 +1058,7 @@ class RunStatus(ModelObj):
1052
1058
  ui_url=None,
1053
1059
  reason: str = None,
1054
1060
  notifications: Dict[str, Notification] = None,
1061
+ artifact_uris: dict[str, str] = None,
1055
1062
  ):
1056
1063
  self.state = state or "created"
1057
1064
  self.status_text = status_text
@@ -1066,6 +1073,21 @@ class RunStatus(ModelObj):
1066
1073
  self.ui_url = ui_url
1067
1074
  self.reason = reason
1068
1075
  self.notifications = notifications or {}
1076
+ # Artifact key -> URI mapping, since the full artifacts are not stored in the runs DB table
1077
+ self.artifact_uris = artifact_uris or {}
1078
+
1079
+ def is_failed(self) -> Optional[bool]:
1080
+ """
1081
+ This method returns whether a run has failed.
1082
+ Returns none if state has yet to be defined. callee is responsible for handling None.
1083
+ (e.g wait for state to be defined)
1084
+ """
1085
+ if not self.state:
1086
+ return None
1087
+ return self.state.casefold() in [
1088
+ mlrun.run.RunStatuses.failed.casefold(),
1089
+ mlrun.run.RunStatuses.error.casefold(),
1090
+ ]
1069
1091
 
1070
1092
 
1071
1093
  class RunTemplate(ModelObj):
@@ -1365,8 +1387,10 @@ class RunObject(RunTemplate):
1365
1387
  iter=self.metadata.iteration,
1366
1388
  )
1367
1389
  if run:
1368
- self.status = RunStatus.from_dict(run.get("status", {}))
1369
- self.status.from_dict(run.get("status", {}))
1390
+ run_status = run.get("status", {})
1391
+ # Artifacts are not stored in the DB, so we need to preserve them here
1392
+ run_status["artifacts"] = self.status.artifacts
1393
+ self.status = RunStatus.from_dict(run_status)
1370
1394
  return self
1371
1395
 
1372
1396
  def show(self):
@@ -426,13 +426,6 @@ class MonitoringApplicationController:
426
426
  m_fs = fstore.get_feature_set(
427
427
  endpoint[mm_constants.EventFieldType.FEATURE_SET_URI]
428
428
  )
429
- labels = endpoint[mm_constants.EventFieldType.LABEL_NAMES]
430
- if labels:
431
- if isinstance(labels, str):
432
- labels = json.loads(labels)
433
- for label in labels:
434
- if label not in list(m_fs.spec.features.keys()):
435
- m_fs.add_feature(fstore.Feature(name=label, value_type="float"))
436
429
 
437
430
  for application in applications_names:
438
431
  batch_window = batch_window_generator.get_batch_window(
@@ -19,6 +19,7 @@ import plotly.graph_objects as go
19
19
  from plotly.subplots import make_subplots
20
20
 
21
21
  import mlrun.common.schemas.model_monitoring
22
+ import mlrun.common.schemas.model_monitoring.constants as mm_constants
22
23
 
23
24
  # A type for representing a drift result, a tuple of the status and the drift mean:
24
25
  DriftResultType = Tuple[mlrun.common.schemas.model_monitoring.DriftStatus, float]
@@ -112,6 +113,11 @@ class FeaturesDriftTablePlot:
112
113
  :return: The full path to the html file of the plot.
113
114
  """
114
115
  # Plot the drift table:
116
+ features = [
117
+ feature
118
+ for feature in features
119
+ if feature not in mm_constants.FeatureSetFeatures.list()
120
+ ]
115
121
  figure = self._plot(
116
122
  features=features,
117
123
  sample_set_statistics=sample_set_statistics,
@@ -41,7 +41,7 @@ class _MLRunNoRunsFoundError(Exception):
41
41
  pass
42
42
 
43
43
 
44
- def get_stream_path(project: str = None, application_name: str = None):
44
+ def get_stream_path(project: str = None, application_name: str = None) -> str:
45
45
  """
46
46
  Get stream path from the project secret. If wasn't set, take it from the system configurations
47
47
 
@@ -62,6 +62,9 @@ def get_stream_path(project: str = None, application_name: str = None):
62
62
  application_name=application_name,
63
63
  )
64
64
 
65
+ if isinstance(stream_uri, list): # ML-6043 - user side gets only the new stream uri
66
+ stream_uri = stream_uri[1]
67
+
65
68
  return mlrun.common.model_monitoring.helpers.parse_monitoring_stream_path(
66
69
  stream_uri=stream_uri, project=project, application_name=application_name
67
70
  )
@@ -24,6 +24,7 @@ import mlrun
24
24
  import mlrun.common.model_monitoring.helpers
25
25
  import mlrun.config
26
26
  import mlrun.datastore.targets
27
+ import mlrun.feature_store as fstore
27
28
  import mlrun.feature_store.steps
28
29
  import mlrun.model_monitoring.prometheus
29
30
  import mlrun.serving.states
@@ -49,7 +50,7 @@ class EventStreamProcessor:
49
50
  parquet_batching_timeout_secs: int,
50
51
  parquet_target: str,
51
52
  sample_window: int = 10,
52
- aggregate_windows: typing.Optional[typing.List[str]] = None,
53
+ aggregate_windows: typing.Optional[list[str]] = None,
53
54
  aggregate_period: str = "30s",
54
55
  model_monitoring_access_key: str = None,
55
56
  ):
@@ -349,7 +350,6 @@ class EventStreamProcessor:
349
350
  rate="10/m",
350
351
  time_col=EventFieldType.TIMESTAMP,
351
352
  container=self.tsdb_container,
352
- access_key=self.v3io_access_key,
353
353
  v3io_frames=self.v3io_framesd,
354
354
  infer_columns_from_data=True,
355
355
  index_cols=[
@@ -587,6 +587,8 @@ class ProcessBeforeParquet(mlrun.feature_store.steps.MapClass):
587
587
  for key in [
588
588
  EventFieldType.FEATURES,
589
589
  EventFieldType.NAMED_FEATURES,
590
+ EventFieldType.PREDICTION,
591
+ EventFieldType.NAMED_PREDICTIONS,
590
592
  ]:
591
593
  event.pop(key, None)
592
594
 
@@ -629,14 +631,14 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
629
631
  self.project: str = project
630
632
 
631
633
  # First and last requests timestamps (value) of each endpoint (key)
632
- self.first_request: typing.Dict[str, str] = dict()
633
- self.last_request: typing.Dict[str, str] = dict()
634
+ self.first_request: dict[str, str] = dict()
635
+ self.last_request: dict[str, str] = dict()
634
636
 
635
637
  # Number of errors (value) per endpoint (key)
636
- self.error_count: typing.Dict[str, int] = collections.defaultdict(int)
638
+ self.error_count: dict[str, int] = collections.defaultdict(int)
637
639
 
638
640
  # Set of endpoints in the current events
639
- self.endpoints: typing.Set[str] = set()
641
+ self.endpoints: set[str] = set()
640
642
 
641
643
  def do(self, full_event):
642
644
  event = full_event.body
@@ -745,18 +747,12 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
745
747
  # in list of events. This list will be used as the body for the storey event.
746
748
  events = []
747
749
  for i, (feature, prediction) in enumerate(zip(features, predictions)):
748
- # Validate that inputs are based on numeric values
749
- if not self.is_valid(
750
- endpoint_id,
751
- self.is_list_of_numerics,
752
- feature,
753
- ["request", "inputs", f"[{i}]"],
754
- ):
755
- return None
756
-
757
750
  if not isinstance(prediction, list):
758
751
  prediction = [prediction]
759
752
 
753
+ if not isinstance(feature, list):
754
+ feature = [feature]
755
+
760
756
  events.append(
761
757
  {
762
758
  EventFieldType.FUNCTION_URI: function_uri,
@@ -803,18 +799,6 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
803
799
  f"{self.last_request[endpoint_id]} - write to TSDB will be rejected"
804
800
  )
805
801
 
806
- @staticmethod
807
- def is_list_of_numerics(
808
- field: typing.List[typing.Union[int, float, dict, list]],
809
- dict_path: typing.List[str],
810
- ):
811
- if all(isinstance(x, int) or isinstance(x, float) for x in field):
812
- return True
813
- logger.error(
814
- f"List does not consist of only numeric values: {field} [Event -> {','.join(dict_path)}]"
815
- )
816
- return False
817
-
818
802
  def resume_state(self, endpoint_id):
819
803
  # Make sure process is resumable, if process fails for any reason, be able to pick things up close to where we
820
804
  # left them
@@ -849,7 +833,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
849
833
  endpoint_id: str,
850
834
  validation_function,
851
835
  field: typing.Any,
852
- dict_path: typing.List[str],
836
+ dict_path: list[str],
853
837
  ):
854
838
  if validation_function(field, dict_path):
855
839
  return True
@@ -857,7 +841,7 @@ class ProcessEndpointEvent(mlrun.feature_store.steps.MapClass):
857
841
  return False
858
842
 
859
843
 
860
- def is_not_none(field: typing.Any, dict_path: typing.List[str]):
844
+ def is_not_none(field: typing.Any, dict_path: list[str]):
861
845
  if field is not None:
862
846
  return True
863
847
  logger.error(
@@ -946,9 +930,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
946
930
  return self.label_columns[endpoint_id]
947
931
  return None
948
932
 
949
- def do(self, event: typing.Dict):
933
+ def do(self, event: dict):
950
934
  endpoint_id = event[EventFieldType.ENDPOINT_ID]
951
935
 
936
+ feature_values = event[EventFieldType.FEATURES]
937
+ label_values = event[EventFieldType.PREDICTION]
952
938
  # Get feature names and label columns
953
939
  if endpoint_id not in self.feature_names:
954
940
  endpoint_record = get_endpoint_record(
@@ -984,6 +970,12 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
984
970
  },
985
971
  )
986
972
 
973
+ update_monitoring_feature_set(
974
+ endpoint_record=endpoint_record,
975
+ feature_names=feature_names,
976
+ feature_values=feature_values,
977
+ )
978
+
987
979
  # Similar process with label columns
988
980
  if not label_columns and self._infer_columns_from_data:
989
981
  label_columns = self._infer_label_columns_from_data(event)
@@ -1002,6 +994,11 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1002
994
  endpoint_id=endpoint_id,
1003
995
  attributes={EventFieldType.LABEL_NAMES: json.dumps(label_columns)},
1004
996
  )
997
+ update_monitoring_feature_set(
998
+ endpoint_record=endpoint_record,
999
+ feature_names=label_columns,
1000
+ feature_values=label_values,
1001
+ )
1005
1002
 
1006
1003
  self.label_columns[endpoint_id] = label_columns
1007
1004
  self.feature_names[endpoint_id] = feature_names
@@ -1019,7 +1016,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1019
1016
 
1020
1017
  # Add feature_name:value pairs along with a mapping dictionary of all of these pairs
1021
1018
  feature_names = self.feature_names[endpoint_id]
1022
- feature_values = event[EventFieldType.FEATURES]
1023
1019
  self._map_dictionary_values(
1024
1020
  event=event,
1025
1021
  named_iters=feature_names,
@@ -1029,7 +1025,6 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1029
1025
 
1030
1026
  # Add label_name:value pairs along with a mapping dictionary of all of these pairs
1031
1027
  label_names = self.label_columns[endpoint_id]
1032
- label_values = event[EventFieldType.PREDICTION]
1033
1028
  self._map_dictionary_values(
1034
1029
  event=event,
1035
1030
  named_iters=label_names,
@@ -1045,9 +1040,9 @@ class MapFeatureNames(mlrun.feature_store.steps.MapClass):
1045
1040
 
1046
1041
  @staticmethod
1047
1042
  def _map_dictionary_values(
1048
- event: typing.Dict,
1049
- named_iters: typing.List,
1050
- values_iters: typing.List,
1043
+ event: dict,
1044
+ named_iters: list,
1045
+ values_iters: list,
1051
1046
  mapping_dictionary: str,
1052
1047
  ):
1053
1048
  """Adding name-value pairs to event dictionary based on two provided lists of names and values. These pairs
@@ -1082,7 +1077,7 @@ class UpdateEndpoint(mlrun.feature_store.steps.MapClass):
1082
1077
  self.project = project
1083
1078
  self.model_endpoint_store_target = model_endpoint_store_target
1084
1079
 
1085
- def do(self, event: typing.Dict):
1080
+ def do(self, event: dict):
1086
1081
  update_endpoint_record(
1087
1082
  project=self.project,
1088
1083
  endpoint_id=event.pop(EventFieldType.ENDPOINT_ID),
@@ -1117,7 +1112,7 @@ class InferSchema(mlrun.feature_store.steps.MapClass):
1117
1112
  self.table = table
1118
1113
  self.keys = set()
1119
1114
 
1120
- def do(self, event: typing.Dict):
1115
+ def do(self, event: dict):
1121
1116
  key_set = set(event.keys())
1122
1117
  if not key_set.issubset(self.keys):
1123
1118
  self.keys.update(key_set)
@@ -1241,3 +1236,21 @@ def get_endpoint_record(project: str, endpoint_id: str):
1241
1236
  project=project,
1242
1237
  )
1243
1238
  return model_endpoint_store.get_model_endpoint(endpoint_id=endpoint_id)
1239
+
1240
+
1241
+ def update_monitoring_feature_set(
1242
+ endpoint_record: dict[str, typing.Any],
1243
+ feature_names: list[str],
1244
+ feature_values: list[typing.Any],
1245
+ ):
1246
+ monitoring_feature_set = fstore.get_feature_set(
1247
+ endpoint_record[
1248
+ mlrun.common.schemas.model_monitoring.EventFieldType.FEATURE_SET_URI
1249
+ ]
1250
+ )
1251
+ for name, val in zip(feature_names, feature_values):
1252
+ monitoring_feature_set.add_feature(
1253
+ fstore.Feature(name=name, value_type=type(val))
1254
+ )
1255
+
1256
+ monitoring_feature_set.save()
@@ -13,6 +13,7 @@
13
13
  # limitations under the License.
14
14
  import abc
15
15
  import builtins
16
+ import http
16
17
  import importlib.util as imputil
17
18
  import os
18
19
  import tempfile
@@ -608,6 +609,7 @@ class _KFPRunner(_PipelineRunner):
608
609
  namespace=namespace,
609
610
  artifact_path=artifact_path,
610
611
  cleanup_ttl=workflow_spec.cleanup_ttl,
612
+ timeout=int(mlrun.mlconf.workflows.timeouts.kfp),
611
613
  )
612
614
 
613
615
  # The user provided workflow code might have made changes to function specs that require cleanup
@@ -865,22 +867,44 @@ class _RemoteRunner(_PipelineRunner):
865
867
  )
866
868
  return
867
869
 
870
+ get_workflow_id_timeout = max(
871
+ int(mlrun.mlconf.workflows.timeouts.remote),
872
+ int(getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine)),
873
+ )
874
+
868
875
  logger.debug(
869
876
  "Workflow submitted, waiting for pipeline run to start",
870
877
  workflow_name=workflow_response.name,
878
+ get_workflow_id_timeout=get_workflow_id_timeout,
871
879
  )
872
880
 
881
+ def _get_workflow_id_or_bail():
882
+ try:
883
+ return run_db.get_workflow_id(
884
+ project=project.name,
885
+ name=workflow_response.name,
886
+ run_id=workflow_response.run_id,
887
+ engine=workflow_spec.engine,
888
+ )
889
+ except mlrun.errors.MLRunHTTPStatusError as get_wf_exc:
890
+ # fail fast on specific errors
891
+ if get_wf_exc.error_status_code in [
892
+ http.HTTPStatus.PRECONDITION_FAILED
893
+ ]:
894
+ raise mlrun.errors.MLRunFatalFailureError(
895
+ original_exception=get_wf_exc
896
+ )
897
+
898
+ # raise for a retry (on other errors)
899
+ raise
900
+
873
901
  # Getting workflow id from run:
874
902
  response = retry_until_successful(
875
903
  1,
876
- getattr(mlrun.mlconf.workflows.timeouts, inner_engine.engine),
904
+ get_workflow_id_timeout,
877
905
  logger,
878
906
  False,
879
- run_db.get_workflow_id,
880
- project=project.name,
881
- name=workflow_response.name,
882
- run_id=workflow_response.run_id,
883
- engine=workflow_spec.engine,
907
+ _get_workflow_id_or_bail,
884
908
  )
885
909
  workflow_id = response.workflow_id
886
910
  # After fetching the workflow_id the workflow executed successfully
mlrun/projects/project.py CHANGED
@@ -2602,16 +2602,12 @@ class MlrunProject(ModelObj):
2602
2602
  ) -> _PipelineRunStatus:
2603
2603
  """Run a workflow using kubeflow pipelines
2604
2604
 
2605
- :param name: Name of the workflow
2606
- :param workflow_path:
2607
- URL to a workflow file, if not a project workflow
2608
- :param arguments:
2609
- Kubeflow pipelines arguments (parameters)
2610
- :param artifact_path:
2611
- Target path/url for workflow artifacts, the string
2612
- '{{workflow.uid}}' will be replaced by workflow id
2613
- :param workflow_handler:
2614
- Workflow function handler (for running workflow function directly)
2605
+ :param name: Name of the workflow
2606
+ :param workflow_path: URL to a workflow file, if not a project workflow
2607
+ :param arguments: Kubeflow pipelines arguments (parameters)
2608
+ :param artifact_path: Target path/URL for workflow artifacts, the string '{{workflow.uid}}' will be
2609
+ replaced by workflow id.
2610
+ :param workflow_handler: Workflow function handler (for running workflow function directly)
2615
2611
  :param namespace: Kubernetes namespace if other than default
2616
2612
  :param sync: Force functions sync before run
2617
2613
  :param watch: Wait for pipeline completion
@@ -2624,7 +2620,7 @@ class MlrunProject(ModelObj):
2624
2620
  (which will be converted to the class using its `from_crontab` constructor),
2625
2621
  see this link for help:
2626
2622
  https://apscheduler.readthedocs.io/en/3.x/modules/triggers/cron.html#module-apscheduler.triggers.cron
2627
- for using the pre-defined workflow's schedule, set `schedule=True`
2623
+ For using the pre-defined workflow's schedule, set `schedule=True`
2628
2624
  :param timeout: Timeout in seconds to wait for pipeline completion (watch will be activated)
2629
2625
  :param source: Source to use instead of the actual `project.spec.source` (used when engine is remote).
2630
2626
  Can be a one of:
@@ -2633,12 +2629,11 @@ class MlrunProject(ModelObj):
2633
2629
  Path can be absolute or relative to `project.spec.build.source_code_target_dir` if defined
2634
2630
  (enriched when building a project image with source, see `MlrunProject.build_image`).
2635
2631
  For other engines the source is used to validate that the code is up-to-date.
2636
- :param cleanup_ttl:
2637
- Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
2638
- Workflow and all its resources are deleted)
2639
- :param notifications:
2640
- List of notifications to send for workflow completion
2641
- :returns: Run id
2632
+ :param cleanup_ttl: Pipeline cleanup ttl in secs (time to wait after workflow completion, at which point the
2633
+ workflow and all its resources are deleted)
2634
+ :param notifications: List of notifications to send for workflow completion
2635
+
2636
+ :returns: ~py:class:`~mlrun.projects.pipelines._PipelineRunStatus` instance
2642
2637
  """
2643
2638
 
2644
2639
  arguments = arguments or {}
@@ -2655,12 +2650,14 @@ class MlrunProject(ModelObj):
2655
2650
  "Remote repo is not defined, use .create_remote() + push()"
2656
2651
  )
2657
2652
 
2658
- self.sync_functions(always=sync)
2659
- if not self.spec._function_objects:
2660
- raise ValueError(
2661
- "There are no functions in the project."
2662
- " Make sure you've set your functions with project.set_function()."
2663
- )
2653
+ if engine not in ["remote"]:
2654
+ # for remote runs we don't require the functions to be synced as they can be loaded dynamically during run
2655
+ self.sync_functions(always=sync)
2656
+ if not self.spec._function_objects:
2657
+ raise ValueError(
2658
+ "There are no functions in the project."
2659
+ " Make sure you've set your functions with project.set_function()."
2660
+ )
2664
2661
 
2665
2662
  if not name and not workflow_path and not workflow_handler:
2666
2663
  raise ValueError("Workflow name, path, or handler must be specified")
mlrun/render.py CHANGED
@@ -134,7 +134,7 @@ def artifacts_html(
134
134
 
135
135
  if not attribute_value:
136
136
  mlrun.utils.logger.warning(
137
- "Artifact is incomplete, omitting from output (most likely due to a failed artifact logging)",
137
+ f"Artifact required attribute {attribute_name} is missing, omitting from output",
138
138
  artifact_key=key,
139
139
  )
140
140
  continue
@@ -404,12 +404,21 @@ def runs_to_html(
404
404
  df.drop("labels", axis=1, inplace=True)
405
405
  df.drop("inputs", axis=1, inplace=True)
406
406
  df.drop("artifacts", axis=1, inplace=True)
407
+ df.drop("artifact_uris", axis=1, inplace=True)
407
408
  else:
408
409
  df["labels"] = df["labels"].apply(dict_html)
409
410
  df["inputs"] = df["inputs"].apply(inputs_html)
410
- df["artifacts"] = df["artifacts"].apply(
411
- lambda artifacts: artifacts_html(artifacts, "target_path"),
412
- )
411
+ if df["artifacts"][0]:
412
+ df["artifacts"] = df["artifacts"].apply(
413
+ lambda artifacts: artifacts_html(artifacts, "target_path"),
414
+ )
415
+ df.drop("artifact_uris", axis=1, inplace=True)
416
+ elif df["artifact_uris"][0]:
417
+ df["artifact_uris"] = df["artifact_uris"].apply(dict_html)
418
+ df.drop("artifacts", axis=1, inplace=True)
419
+ else:
420
+ df.drop("artifacts", axis=1, inplace=True)
421
+ df.drop("artifact_uris", axis=1, inplace=True)
413
422
 
414
423
  def expand_error(x):
415
424
  if x["state"] == "error":