mlrun 1.10.0rc12__py3-none-any.whl → 1.10.0rc13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mlrun might be problematic. Click here for more details.

@@ -61,7 +61,11 @@ class LLMPromptArtifactSpec(ArtifactSpec):
61
61
  self.prompt_legend = prompt_legend
62
62
  self.model_configuration = model_configuration
63
63
  self.description = description
64
- self._model_artifact = None
64
+ self._model_artifact = (
65
+ model_artifact
66
+ if isinstance(model_artifact, model_art.ModelArtifact)
67
+ else None
68
+ )
65
69
 
66
70
  @property
67
71
  def model_uri(self):
@@ -214,7 +214,7 @@ from .secret import (
214
214
  SecretsData,
215
215
  UserSecretCreationRequest,
216
216
  )
217
- from .serving import ModelRunnerStepData, MonitoringData
217
+ from .serving import ModelRunnerStepData, ModelsData, MonitoringData
218
218
  from .tag import Tag, TagObjects
219
219
  from .workflow import (
220
220
  GetWorkflowResponse,
@@ -336,8 +336,8 @@ class ModelEndpointMonitoringMetricNoData(_ModelEndpointMonitoringMetricValuesBa
336
336
 
337
337
  class ApplicationBaseRecord(BaseModel):
338
338
  type: Literal["metric", "result"]
339
- time: datetime
340
339
  value: float
340
+ time: Optional[datetime] = None
341
341
 
342
342
 
343
343
  class ApplicationResultRecord(ApplicationBaseRecord):
@@ -12,6 +12,8 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
+ import enum
16
+
15
17
  from pydantic.v1 import BaseModel
16
18
 
17
19
  from mlrun.common.types import StrEnum
@@ -40,3 +42,8 @@ class MonitoringData(StrEnum):
40
42
  MODEL_PATH = "model_path"
41
43
  MODEL_ENDPOINT_UID = "model_endpoint_uid"
42
44
  MODEL_CLASS = "model_class"
45
+
46
+
47
+ class ModelsData(enum.Enum):
48
+ MODEL_CLASS = 0
49
+ MODEL_PARAMETERS = 1
mlrun/config.py CHANGED
@@ -125,6 +125,8 @@ default_config = {
125
125
  "interval": "30",
126
126
  # runs limit to fetch for retrying
127
127
  "fetch_runs_limit": 1000,
128
+ # minutes until a run is considered stale and will be aborted
129
+ "staleness_threshold": 60 * 24 * 3,
128
130
  },
129
131
  },
130
132
  "projects": {
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  from collections.abc import Awaitable
15
- from typing import Callable, Optional, TypeVar
15
+ from typing import Callable, Optional, TypeVar, Union
16
16
 
17
17
  import mlrun.errors
18
18
  from mlrun.datastore.remote_client import (
@@ -23,6 +23,23 @@ T = TypeVar("T")
23
23
 
24
24
 
25
25
  class ModelProvider(BaseRemoteClient):
26
+ """
27
+ The ModelProvider class is an abstract base for integrating with external
28
+ model providers, primarily generative AI (GenAI) services.
29
+
30
+ Designed to be subclassed, it defines a consistent interface and shared
31
+ functionality for tasks such as text generation, embeddings, and invoking
32
+ fine-tuned models. Subclasses should implement provider-specific logic,
33
+ including SDK client initialization, model invocation, and custom operations.
34
+
35
+ Key Features:
36
+ - Establishes a consistent, reusable client management for model provider integrations.
37
+ - Simplifies GenAI service integration by abstracting common operations.
38
+ - Reduces duplication through shared components for common tasks.
39
+ - Holds default invocation parameters (e.g., temperature, max_tokens) to avoid boilerplate
40
+ code and promote consistency.
41
+ """
42
+
26
43
  support_async = False
27
44
 
28
45
  def __init__(
@@ -44,9 +61,65 @@ class ModelProvider(BaseRemoteClient):
44
61
  self._default_async_operation = None
45
62
 
46
63
  def load_client(self) -> None:
64
+ """
65
+ Initializes the SDK client for the model provider with the given keyword arguments
66
+ and assigns it to an instance attribute (e.g., self._client).
67
+
68
+ Subclasses should override this method to:
69
+ - Create and configure the provider-specific client instance.
70
+ - Assign the client instance to self._client.
71
+ - Define a default operation callable (e.g., a method to invoke model completions)
72
+ and assign it to self._default_operation.
73
+ """
74
+
47
75
  raise NotImplementedError("load_client method is not implemented")
48
76
 
49
- def invoke(self, prompt: Optional[str] = None, **invoke_kwargs) -> str:
77
+ def invoke(
78
+ self,
79
+ messages: Optional[list[dict]] = None,
80
+ as_str: bool = False,
81
+ **invoke_kwargs,
82
+ ) -> Optional[Union[str, T]]:
83
+ """
84
+ Invokes a generative AI model with the provided messages and additional parameters.
85
+ This method is designed to be a flexible interface for interacting with various
86
+ generative AI backends (e.g., OpenAI, Hugging Face, etc.). It allows users to send
87
+ a list of messages (following a standardized format) and receive a response. The
88
+ response can be returned as plain text or in its full structured format, depending
89
+ on the `as_str` parameter.
90
+
91
+ :param messages: A list of dictionaries representing the conversation history or input messages.
92
+ Each dictionary should follow the format::
93
+ {"role": "system"| "user" | "assistant" ..., "content": "Message content as a string"}
94
+ Example:
95
+
96
+ .. code-block:: json
97
+
98
+ [
99
+ {"role": "system", "content": "You are a helpful assistant."},
100
+ {"role": "user", "content": "What is the capital of France?"}
101
+ ]
102
+
103
+ This format is consistent across all backends. Defaults to None if no messages
104
+ are provided.
105
+
106
+ :param as_str: A boolean flag indicating whether to return the response as a plain string.
107
+ - If True, the function extracts and returns the main content of the first
108
+ response.
109
+ - If False, the function returns the full response object,
110
+ which may include additional metadata or multiple response options.
111
+ Defaults to False.
112
+
113
+ :param invoke_kwargs:
114
+ Additional keyword arguments to be passed to the underlying model API call.
115
+ These can include parameters such as temperature, max tokens, etc.,
116
+ depending on the capabilities of the specific backend being used.
117
+
118
+ :return:
119
+ - If `as_str` is True: Returns the main content of the first response as a string.
120
+ - If `as_str` is False: Returns the full response object.
121
+
122
+ """
50
123
  raise NotImplementedError("invoke method is not implemented")
51
124
 
52
125
  def customized_invoke(
@@ -78,5 +151,10 @@ class ModelProvider(BaseRemoteClient):
78
151
  async def async_customized_invoke(self, **kwargs):
79
152
  raise NotImplementedError("async_customized_invoke is not implemented")
80
153
 
81
- async def async_invoke(self, prompt: str, **invoke_kwargs) -> Awaitable[str]:
154
+ async def async_invoke(
155
+ self,
156
+ messages: Optional[list[dict]] = None,
157
+ as_str: bool = False,
158
+ **invoke_kwargs,
159
+ ) -> Awaitable[str]:
82
160
  raise NotImplementedError("async_invoke is not implemented")
@@ -12,7 +12,7 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- from typing import Callable, Optional, TypeVar
15
+ from typing import Callable, Optional, TypeVar, Union
16
16
 
17
17
  import mlrun
18
18
  from mlrun.datastore.model_provider.model_provider import ModelProvider
@@ -21,6 +21,18 @@ T = TypeVar("T")
21
21
 
22
22
 
23
23
  class OpenAIProvider(ModelProvider):
24
+ """
25
+ OpenAIProvider is a wrapper around the OpenAI SDK that provides an interface
26
+ for interacting with OpenAI's generative AI services.
27
+
28
+ It supports both synchronous and asynchronous operations, allowing flexible
29
+ integration into various workflows.
30
+
31
+ This class extends the ModelProvider base class and implements OpenAI-specific
32
+ functionality, including client initialization, model invocation, and custom
33
+ operations tailored to the OpenAI API.
34
+ """
35
+
24
36
  def __init__(
25
37
  self,
26
38
  parent,
@@ -59,6 +71,19 @@ class OpenAIProvider(ModelProvider):
59
71
  return self.endpoint
60
72
 
61
73
  def load_client(self) -> None:
74
+ """
75
+ Initializes the OpenAI SDK client using the provided options.
76
+
77
+ This method imports the `OpenAI` class from the `openai` package, instantiates
78
+ a client with the given keyword arguments (`self.options`), and assigns it to
79
+ `self._client`.
80
+
81
+ It also sets the default operation to `self.client.chat.completions.create`, which is
82
+ typically used for invoking chat-based model completions.
83
+
84
+ Raises:
85
+ ImportError: If the `openai` package is not installed.
86
+ """
62
87
  try:
63
88
  from openai import OpenAI # noqa
64
89
 
@@ -87,34 +112,33 @@ class OpenAIProvider(ModelProvider):
87
112
  else:
88
113
  return self._default_operation(**invoke_kwargs, model=self.model)
89
114
 
90
- def _get_messages_parameter(
91
- self, prompt: Optional[str] = None, **invoke_kwargs
92
- ) -> (str, dict):
93
- invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
94
- messages = invoke_kwargs.get("messages")
95
- if messages:
96
- if prompt:
97
- raise mlrun.errors.MLRunInvalidArgumentError(
98
- "can not provide 'messages' and 'prompt' to invoke"
99
- )
100
- elif prompt:
101
- messages = [
102
- {
103
- "role": "user",
104
- "content": prompt,
105
- },
106
- ]
107
- else:
108
- raise mlrun.errors.MLRunInvalidArgumentError(
109
- "must provide 'messages' or 'prompt' to invoke"
110
- )
111
- return messages, invoke_kwargs
115
+ def invoke(
116
+ self,
117
+ messages: Optional[list[dict]] = None,
118
+ as_str: bool = False,
119
+ **invoke_kwargs,
120
+ ) -> Optional[Union[str, T]]:
121
+ """
122
+ OpenAI-specific implementation of `ModelProvider.invoke`.
123
+ Invokes an OpenAI model operation using the sync client.
124
+ For full details, see `ModelProvider.invoke`.
112
125
 
113
- def invoke(self, prompt: Optional[str] = None, **invoke_kwargs) -> str:
114
- messages, invoke_kwargs = self._get_messages_parameter(
115
- prompt=prompt, **invoke_kwargs
116
- )
126
+ :param messages: Same as ModelProvider.invoke.
127
+
128
+ :param as_str: bool
129
+ If `True`, returns only the main content of the first response
130
+ (`response.choices[0].message.content`).
131
+ If `False`, returns the full response object, whose type depends on
132
+ the specific OpenAI SDK operation used (e.g., chat completion, completion, etc.).
133
+
134
+ :param invoke_kwargs:
135
+ Same as ModelProvider.invoke.
136
+
137
+ """
138
+ invoke_kwargs = self.get_invoke_kwargs(invoke_kwargs)
117
139
  response = self._default_operation(
118
140
  model=self.endpoint, messages=messages, **invoke_kwargs
119
141
  )
120
- return response.choices[0].message.content
142
+ if as_str:
143
+ return response.choices[0].message.content
144
+ return response
@@ -18,6 +18,17 @@ import mlrun
18
18
 
19
19
 
20
20
  class BaseRemoteClient:
21
+ """
22
+ The BaseRemoteClient class serves as a foundational component for managing
23
+ secrets and configurations.
24
+ It is designed to be extended by subclasses that interact with external services,
25
+ such as file systems (e.g., Datastore) or model providers (e.g., ModelProvider).
26
+
27
+ This class is intended to provide shared functionality and should not be
28
+ used directly. Instead, create a subclass to implement logic specific to
29
+ your use case, such as interactions with S3 storage or invoking model providers like OpenAI.
30
+ """
31
+
21
32
  def __init__(self, parent, kind, name, endpoint="", secrets: Optional[dict] = None):
22
33
  self._parent = parent
23
34
  self.kind = kind
mlrun/execution.py CHANGED
@@ -961,6 +961,11 @@ class MLClientCtx:
961
961
  :returns: The logged `LLMPromptArtifact` object.
962
962
  """
963
963
 
964
+ if not prompt_string and not prompt_path:
965
+ raise mlrun.errors.MLRunInvalidArgumentError(
966
+ "Either 'prompt_string' or 'prompt_path' must be provided"
967
+ )
968
+
964
969
  llm_prompt = LLMPromptArtifact(
965
970
  key=key,
966
971
  project=self.project or "",
@@ -804,25 +804,45 @@ class V3IOTSDBConnector(TSDBConnector):
804
804
  @staticmethod
805
805
  def _get_sql_query(
806
806
  *,
807
- endpoint_id: str,
808
807
  table_path: str,
808
+ endpoint_id: Optional[str] = None,
809
+ application_names: Optional[list[str]] = None,
809
810
  name: str = mm_schemas.ResultData.RESULT_NAME,
810
811
  metric_and_app_names: Optional[list[tuple[str, str]]] = None,
811
812
  columns: Optional[list[str]] = None,
813
+ group_by_columns: Optional[list[str]] = None,
812
814
  ) -> str:
813
815
  """Get the SQL query for the results/metrics table"""
816
+
817
+ if metric_and_app_names and not endpoint_id:
818
+ raise mlrun.errors.MLRunInvalidArgumentError(
819
+ "If metric_and_app_names is provided, endpoint_id must also be provided"
820
+ )
821
+
822
+ if metric_and_app_names and application_names:
823
+ raise mlrun.errors.MLRunInvalidArgumentError(
824
+ "Cannot provide both metric_and_app_names and application_names"
825
+ )
826
+
814
827
  if columns:
815
828
  selection = ",".join(columns)
816
829
  else:
817
830
  selection = "*"
818
831
 
819
832
  with StringIO() as query:
820
- query.write(
821
- f"SELECT {selection} FROM '{table_path}' "
822
- f"WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
823
- )
833
+ where_added = False
834
+ query.write(f"SELECT {selection} FROM '{table_path}'")
835
+ if endpoint_id:
836
+ query.write(
837
+ f" WHERE {mm_schemas.WriterEvent.ENDPOINT_ID}='{endpoint_id}'"
838
+ )
839
+ where_added = True
824
840
  if metric_and_app_names:
825
- query.write(" AND (")
841
+ if where_added:
842
+ query.write(" AND (")
843
+ else:
844
+ query.write(" WHERE (")
845
+ where_added = True
826
846
 
827
847
  for i, (app_name, result_name) in enumerate(metric_and_app_names):
828
848
  sub_cond = (
@@ -835,6 +855,22 @@ class V3IOTSDBConnector(TSDBConnector):
835
855
 
836
856
  query.write(")")
837
857
 
858
+ if application_names:
859
+ if where_added:
860
+ query.write(" AND (")
861
+ else:
862
+ query.write(" WHERE (")
863
+ for i, app_name in enumerate(application_names):
864
+ sub_cond = f"{mm_schemas.WriterEvent.APPLICATION_NAME}='{app_name}'"
865
+ if i != 0: # not first sub condition
866
+ query.write(" OR ")
867
+ query.write(sub_cond)
868
+ query.write(")")
869
+
870
+ if group_by_columns:
871
+ query.write(" GROUP BY ")
872
+ query.write(",".join(group_by_columns))
873
+
838
874
  query.write(";")
839
875
  return query.getvalue()
840
876
 
@@ -1272,7 +1308,49 @@ class V3IOTSDBConnector(TSDBConnector):
1272
1308
  end: Optional[Union[datetime, str]] = None,
1273
1309
  application_names: Optional[Union[str, list[str]]] = None,
1274
1310
  ) -> dict[str, int]:
1275
- raise NotImplementedError
1311
+ start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
1312
+ group_by_columns = [
1313
+ mm_schemas.ApplicationEvent.APPLICATION_NAME,
1314
+ mm_schemas.ApplicationEvent.ENDPOINT_ID,
1315
+ ]
1316
+
1317
+ def get_application_endpoints_records(
1318
+ record_type: Literal["metrics", "results"],
1319
+ ):
1320
+ if record_type == "results":
1321
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
1322
+ else:
1323
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
1324
+ sql_query = self._get_sql_query(
1325
+ table_path=table_path,
1326
+ columns=[mm_schemas.WriterEvent.START_INFER_TIME],
1327
+ group_by_columns=group_by_columns,
1328
+ application_names=application_names,
1329
+ )
1330
+ return self.frames_client.read(
1331
+ backend=_TSDB_BE,
1332
+ start=start,
1333
+ end=end,
1334
+ query=sql_query,
1335
+ )
1336
+
1337
+ df_results = get_application_endpoints_records("results")
1338
+ df_metrics = get_application_endpoints_records("metrics")
1339
+
1340
+ if df_results.empty and df_metrics.empty:
1341
+ return {}
1342
+
1343
+ # Combine the two dataframes and count unique endpoints per application
1344
+ combined_df = pd.concat([df_results, df_metrics], ignore_index=True)
1345
+ if combined_df.empty:
1346
+ return {}
1347
+ combined_df.drop_duplicates(subset=group_by_columns, inplace=True)
1348
+
1349
+ grouped_df = combined_df.groupby(
1350
+ mm_schemas.WriterEvent.APPLICATION_NAME
1351
+ ).count()
1352
+
1353
+ return grouped_df[mm_schemas.WriterEvent.ENDPOINT_ID].to_dict()
1276
1354
 
1277
1355
  def calculate_latest_metrics(
1278
1356
  self,
@@ -1282,4 +1360,93 @@ class V3IOTSDBConnector(TSDBConnector):
1282
1360
  ) -> list[
1283
1361
  Union[mm_schemas.ApplicationResultRecord, mm_schemas.ApplicationMetricRecord]
1284
1362
  ]:
1285
- raise NotImplementedError
1363
+ metric_list = []
1364
+ start, end = get_start_end(start=start, end=end, delta=timedelta(hours=24))
1365
+
1366
+ # Get the latest results
1367
+ def get_latest_metrics_records(
1368
+ record_type: Literal["metrics", "results"],
1369
+ ) -> pd.DataFrame:
1370
+ group_by_columns = [mm_schemas.ApplicationEvent.APPLICATION_NAME]
1371
+ if record_type == "results":
1372
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.APP_RESULTS]
1373
+ columns = [
1374
+ f"last({mm_schemas.ResultData.RESULT_STATUS})",
1375
+ f"last({mm_schemas.ResultData.RESULT_VALUE})",
1376
+ f"last({mm_schemas.ResultData.RESULT_KIND})",
1377
+ ]
1378
+ group_by_columns += [
1379
+ mm_schemas.ResultData.RESULT_NAME,
1380
+ ]
1381
+ else:
1382
+ table_path = self.tables[mm_schemas.V3IOTSDBTables.METRICS]
1383
+ columns = [f"last({mm_schemas.MetricData.METRIC_VALUE})"]
1384
+ group_by_columns += [
1385
+ mm_schemas.MetricData.METRIC_NAME,
1386
+ ]
1387
+ sql_query = self._get_sql_query(
1388
+ table_path=table_path,
1389
+ columns=columns,
1390
+ group_by_columns=group_by_columns,
1391
+ application_names=application_names,
1392
+ )
1393
+
1394
+ return self.frames_client.read(
1395
+ backend=_TSDB_BE,
1396
+ start=start,
1397
+ end=end,
1398
+ query=sql_query,
1399
+ )
1400
+
1401
+ df_results = get_latest_metrics_records("results")
1402
+ df_metrics = get_latest_metrics_records("metrics")
1403
+
1404
+ if df_results.empty and df_metrics.empty:
1405
+ return metric_list
1406
+
1407
+ # Convert the results DataFrame to a list of ApplicationResultRecord
1408
+ def build_metric_objects() -> (
1409
+ list[
1410
+ Union[
1411
+ mm_schemas.ApplicationResultRecord,
1412
+ mm_schemas.ApplicationMetricRecord,
1413
+ ]
1414
+ ]
1415
+ ):
1416
+ metric_objects = []
1417
+ if not df_results.empty:
1418
+ df_results.rename(
1419
+ columns={
1420
+ f"last({mm_schemas.ResultData.RESULT_VALUE})": mm_schemas.ResultData.RESULT_VALUE,
1421
+ f"last({mm_schemas.ResultData.RESULT_STATUS})": mm_schemas.ResultData.RESULT_STATUS,
1422
+ f"last({mm_schemas.ResultData.RESULT_KIND})": mm_schemas.ResultData.RESULT_KIND,
1423
+ },
1424
+ inplace=True,
1425
+ )
1426
+ for _, row in df_results.iterrows():
1427
+ metric_objects.append(
1428
+ mm_schemas.ApplicationResultRecord(
1429
+ result_name=row[mm_schemas.ResultData.RESULT_NAME],
1430
+ kind=row[mm_schemas.ResultData.RESULT_KIND],
1431
+ status=row[mm_schemas.ResultData.RESULT_STATUS],
1432
+ value=row[mm_schemas.ResultData.RESULT_VALUE],
1433
+ )
1434
+ )
1435
+ if not df_metrics.empty:
1436
+ df_metrics.rename(
1437
+ columns={
1438
+ f"last({mm_schemas.MetricData.METRIC_VALUE})": mm_schemas.MetricData.METRIC_VALUE,
1439
+ },
1440
+ inplace=True,
1441
+ )
1442
+
1443
+ for _, row in df_metrics.iterrows():
1444
+ metric_objects.append(
1445
+ mm_schemas.ApplicationMetricRecord(
1446
+ metric_name=row[mm_schemas.MetricData.METRIC_NAME],
1447
+ value=row[mm_schemas.MetricData.METRIC_VALUE],
1448
+ )
1449
+ )
1450
+ return metric_objects
1451
+
1452
+ return build_metric_objects()
@@ -1081,34 +1081,56 @@ def rerun_workflow(
1081
1081
  :param run_uid: The run UID of the original workflow to retry.
1082
1082
  :param project_name: The project name.
1083
1083
  """
1084
+ db = mlrun.get_run_db()
1084
1085
 
1085
1086
  try:
1086
- # TODO in followups: handle start and running notifications
1087
-
1088
- # Retry the pipeline - TODO: add submit-direct flag when created
1089
- db = mlrun.get_run_db()
1087
+ # Invoke the KFP retry endpoint (direct-submit mode)
1090
1088
  new_pipeline_id = db.retry_pipeline(
1091
- run_uid, project_name, submit_mode=mlrun_constants.WorkflowSubmitMode.direct
1089
+ run_id=run_uid,
1090
+ project=project_name,
1091
+ submit_mode=mlrun_constants.WorkflowSubmitMode.direct,
1092
+ )
1093
+ logger.info(
1094
+ "KFP retry submitted",
1095
+ new_pipeline_id=new_pipeline_id,
1096
+ rerun_of_workflow=run_uid,
1092
1097
  )
1093
1098
 
1094
- # Store result for observability
1095
- context.set_label(
1096
- mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id
1099
+ except mlrun.errors.MLRunHTTPError as http_exc:
1100
+ logger.error(
1101
+ "Failed calling KFP retry API",
1102
+ run_id=run_uid,
1103
+ error=err_to_str(http_exc),
1097
1104
  )
1098
- context.update_run()
1105
+ raise
1099
1106
 
1100
- context.log_result("workflow_id", new_pipeline_id)
1107
+ # Enqueue "running" notifications server-side for this RerunRunner run
1108
+ db.push_run_notifications(context.uid, project_name)
1101
1109
 
1102
- # wait for pipeline completion so monitor will push terminal notifications
1103
- wait_for_pipeline_completion(
1110
+ context.set_label(mlrun_constants.MLRunInternalLabels.workflow_id, new_pipeline_id)
1111
+ context.update_run()
1112
+
1113
+ context.log_result("workflow_id", new_pipeline_id)
1114
+
1115
+ try:
1116
+ pipeline = wait_for_pipeline_completion(
1104
1117
  new_pipeline_id,
1105
1118
  project=project_name,
1106
1119
  )
1107
-
1108
- # Temporary exception
1109
1120
  except Exception as exc:
1110
- context.logger.error("Failed to rerun workflow", exc=err_to_str(exc))
1111
- raise
1121
+ mlrun.utils.logger.error(
1122
+ "Failed waiting for workflow completion",
1123
+ rerun_pipeline_id=new_pipeline_id,
1124
+ exc=err_to_str(exc),
1125
+ )
1126
+ else:
1127
+ final_state = pipeline["run"]["status"]
1128
+ context.log_result("workflow_state", final_state, commit=True)
1129
+
1130
+ if final_state != mlrun_pipelines.common.models.RunStatuses.succeeded:
1131
+ raise mlrun.errors.MLRunRuntimeError(
1132
+ f"Pipeline retry of {run_uid} finished in state={final_state}"
1133
+ )
1112
1134
 
1113
1135
 
1114
1136
  def load_and_run(context, *args, **kwargs):
@@ -1201,13 +1223,13 @@ def load_and_run_workflow(
1201
1223
  start_notifications = [
1202
1224
  notification
1203
1225
  for notification in context.get_notifications(unmask_secret_params=True)
1204
- if "running" in notification.when
1226
+ if mlrun.common.runtimes.constants.RunStates.running in notification.when
1205
1227
  ]
1206
1228
 
1207
1229
  # Prevent redundant notifications for run completion by ensuring that notifications are only triggered when the run
1208
1230
  # reaches the "running" state, as the server already handles the completion notifications.
1209
1231
  for notification in start_notifications:
1210
- notification.when = ["running"]
1232
+ notification.when = [mlrun.common.runtimes.constants.RunStates.running]
1211
1233
 
1212
1234
  workflow_log_message = workflow_name or workflow_path
1213
1235
  context.logger.info(
mlrun/projects/project.py CHANGED
@@ -1942,6 +1942,11 @@ class MlrunProject(ModelObj):
1942
1942
  :returns: The logged `LLMPromptArtifact` object.
1943
1943
  """
1944
1944
 
1945
+ if not prompt_string and not prompt_path:
1946
+ raise mlrun.errors.MLRunInvalidArgumentError(
1947
+ "Either 'prompt_string' or 'prompt_path' must be provided"
1948
+ )
1949
+
1945
1950
  llm_prompt = LLMPromptArtifact(
1946
1951
  key=key,
1947
1952
  project=self.name,